Files
2025-12-09 12:13:01 +01:00

132 lines
3.5 KiB
Python

import re
from dataclasses import dataclass
from functools import lru_cache
import pandas as pd
from bs4 import BeautifulSoup
from rich.console import Group, Text
from rich.markdown import Markdown
from edgar.core import sec_dot_gov
from edgar.httprequests import download_file
from edgar.richtools import df_to_rich_table, repr_rich
__all__ = [
'SecForms',
'list_forms',
'FUND_FORMS'
]
FUND_FORMS = ["NPORT-P", "NPORT-EX"]
@lru_cache(maxsize=1)
def list_forms():
rows = []
for page in range(7):
forms_html = download_file(f'https://www.sec.gov/forms?page={page}')
soup = BeautifulSoup(forms_html, features="lxml")
data_table = soup.find("table")
tbody = data_table.find("tbody")
for tr in tbody.find_all('tr'):
cells = tr.find_all('td')
rows.append({"Form": cells[0].text.replace("Number:", "").strip(),
"Description": cells[1].text.replace("Description:", "").strip(),
"Url": f"{sec_dot_gov}{cells[1].find('a').attrs['href']}" if cells[1].find('a') else "",
"LastUpdated": cells[2].text.replace("Last Updated:", "").strip(),
"SECNumber": cells[3].text.replace("SEC Number:", "").strip(),
"Topics": cells[4].text.replace("Topic(s):", "").strip()
})
return SecForms(pd.DataFrame(rows))
@dataclass(frozen=True)
class SecForm:
form: str
description: str
url: str
sec_number: str
topics: str
def open(self):
import webbrowser
webbrowser.open(self.url)
def __str__(self):
return f"Form {self.form}: {self.description}"
def __rich__(self):
return Group(
Text(f"Form {self.form}: {self.description}"),
df_to_rich_table(
pd.DataFrame([{"Topics": self.topics, "SEC Number": self.sec_number, "Url": self.url}])
.set_index("Topics")
, index_name="Topics")
)
def __repr__(self):
return repr_rich(self.__rich__())
class SecForms:
def __init__(self,
data: pd.DataFrame):
self.data = data
def get_form(self, form: str):
row = self.data.query(f"Form=='{form}'")
if len(row) == 1:
return SecForm(
form=row.Form.item(),
description=row.Description.item(),
sec_number=row.SECNumber.item(),
url=row.Url.item(),
topics=row.Topics.item()
)
@classmethod
def load(cls):
return SecForms(list_forms())
def __getitem__(self, item):
return self.get_form(item)
def __len__(self):
return len(self.data)
def summary(self) -> pd.DataFrame:
return self.data[['Form', 'Description', 'Topics']]
def __rich__(self):
return Group(
Text("SEC Forms List"),
df_to_rich_table(self.summary().set_index("Form"), index_name="Form", max_rows=200)
)
def __repr__(self):
return repr_rich(self.__rich__())
def find_section(pattern, sections):
for index, section in enumerate(sections):
if re.search(pattern, section, re.IGNORECASE):
return index, section
@dataclass(frozen=True)
class FilingItem:
item_num: str
text: str
def __str__(self):
return f"""
## {self.item_num}
{self.text}
"""
def __rich__(self):
return Markdown(str(self))