edgartools/venv/lib/python3.10/site-packages/edgar/forms.py

import re
from dataclasses import dataclass
from functools import lru_cache

import pandas as pd
from bs4 import BeautifulSoup
from rich.console import Group, Text
from rich.markdown import Markdown

from edgar.core import sec_dot_gov
from edgar.httprequests import download_file
from edgar.richtools import df_to_rich_table, repr_rich

__all__ = [
    'SecForms',
    'list_forms',
    'FUND_FORMS'
]

FUND_FORMS = ["NPORT-P", "NPORT-EX"]


@lru_cache(maxsize=1)
def list_forms():
    rows = []
    for page in range(7):
        forms_html = download_file(f'https://www.sec.gov/forms?page={page}')
        soup = BeautifulSoup(forms_html, features="lxml")
        data_table = soup.find("table")
        tbody = data_table.find("tbody")

        for tr in tbody.find_all('tr'):
            cells = tr.find_all('td')
            rows.append({"Form": cells[0].text.replace("Number:", "").strip(),
                         "Description": cells[1].text.replace("Description:", "").strip(),
                         "Url": f"{sec_dot_gov}{cells[1].find('a').attrs['href']}" if cells[1].find('a') else "",
                         "LastUpdated": cells[2].text.replace("Last Updated:", "").strip(),
                         "SECNumber": cells[3].text.replace("SEC Number:", "").strip(),
                         "Topics": cells[4].text.replace("Topic(s):", "").strip()
                         })

    return SecForms(pd.DataFrame(rows))


@dataclass(frozen=True)
class SecForm:
    form: str
    description: str
    url: str
    sec_number: str
    topics: str

    def open(self):
        import webbrowser
        webbrowser.open(self.url)

    def __str__(self):
        return f"Form {self.form}: {self.description}"

    def __rich__(self):
        return Group(
            Text(f"Form {self.form}: {self.description}"),
            df_to_rich_table(
                pd.DataFrame([{"Topics": self.topics, "SEC Number": self.sec_number, "Url": self.url}])
                .set_index("Topics")
                , index_name="Topics")
        )

    def __repr__(self):
        return repr_rich(self.__rich__())


class SecForms:

    def __init__(self,
                 data: pd.DataFrame):
        self.data = data

    def get_form(self, form: str):
        row = self.data.query(f"Form=='{form}'")
        if len(row) == 1:
            return SecForm(
                form=row.Form.item(),
                description=row.Description.item(),
                sec_number=row.SECNumber.item(),
                url=row.Url.item(),
                topics=row.Topics.item()
            )

    @classmethod
    def load(cls):
        return SecForms(list_forms())

    def __getitem__(self, item):
        return self.get_form(item)

    def __len__(self):
        return len(self.data)

    def summary(self) -> pd.DataFrame:
        return self.data[['Form', 'Description', 'Topics']]

    def __rich__(self):
        return Group(
            Text("SEC Forms List"),
            df_to_rich_table(self.summary().set_index("Form"), index_name="Form", max_rows=200)
        )

    def __repr__(self):
        return repr_rich(self.__rich__())


def find_section(pattern, sections):
    for index, section in enumerate(sections):
        if re.search(pattern, section, re.IGNORECASE):
            return index, section


@dataclass(frozen=True)
class FilingItem:
    item_num: str
    text: str

    def __str__(self):
        return f"""
        ## {self.item_num}
        {self.text}
        """

    def __rich__(self):
        return Markdown(str(self))