Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/edgar/attachments.py
+++ b/venv/lib/python3.10/site-packages/edgar/attachments.py
@@ -0,0 +1,950 @@
+import http.server
+import os
+import re
+import signal
+import socketserver
+import tempfile
+import time
+import webbrowser
+import zipfile
+from functools import lru_cache
+from pathlib import Path
+from threading import Thread
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
+
+if TYPE_CHECKING:
+    from edgar.company_reports import Report
+    from edgar.sgml.sgml_common import FilingSGML, SGMLDocument
+
+import textwrap
+
+from bs4 import BeautifulSoup
+from pydantic import BaseModel
+from rich import box
+from rich.columns import Columns
+from rich.console import Group
+from rich.panel import Panel
+from rich.table import Column, Table
+from rich.text import Text
+
+from edgar.core import binary_extensions, has_html_content, sec_dot_gov, text_extensions
+from edgar.files.html_documents import get_clean_html
+from edgar.files.markdown import to_markdown
+from edgar.httpclient import async_http_client
+from edgar.httprequests import download_file, download_file_async, get_with_retry
+from edgar.richtools import print_rich, print_xml, repr_rich, rich_to_text
+
+xbrl_document_types = ['XBRL INSTANCE DOCUMENT', 'XBRL INSTANCE FILE', 'EXTRACTED XBRL INSTANCE DOCUMENT']
+
+__all__ = ['Attachment', 'Attachments', 'FilingHomepage', 'FilerInfo', 'AttachmentServer', 'sec_document_url', 'get_document_type']
+
+
+def sec_document_url(attachment_url: str) -> str:
+    # Remove "ix?doc=/" or "ix.xhtml?doc=/" from the filing url
+    attachment_url = re.sub(r"ix(\.xhtml)?\?doc=/", "", attachment_url)
+    return f"{sec_dot_gov}{attachment_url}"
+
+def sequence_sort_key(x):
+    seq = x.sequence_number
+    if seq.strip() == '':  # Handle empty or whitespace-only strings
+        return (float('inf'), '')  # Sort to end using infinity
+    try:
+        return (0, float(seq))  # Convert to number for numeric sorting
+    except ValueError:
+        return (1, seq)  #
+
+
+# Mapping of SEC filing file types to Unicode symbols
+FILE_TYPE_SYMBOLS: Dict[str, str] = {
+    # Main SEC filing documents
+    "10-K": "📄",     # Document emoji for main filing
+    "EX-21.1": "📎",  # Paperclip for exhibits
+    "EX-23.1": "📎",
+    "EX-31.1": "📎",
+    "EX-31.2": "📎",
+    "EX-32.1": "📎",
+    "EX-97.1": "📎",
+
+    # XBRL-related documents
+    "EX-101.SCH": "🔰",  # Clipboard for schema
+    "EX-101.CAL": "📊",  # Chart for calculations
+    "EX-101.DEF": "📚",  # Books for definitions
+    "EX-101.LAB": "📎",  # Paperclip for labels (changed from label)
+    "EX-101.PRE": "📈",  # Graph for presentation
+
+    # Common file types
+    "XML": "🔷",      # Document for XML files
+    "HTML": "🌍",     # Page for HTML files
+    "GRAPHIC": "🎨",  # Camera for images
+    "EXCEL": "📊",    # Chart for Excel
+    "JSON": "📝",     # Note for JSON
+    "ZIP": "📦",      # Package for ZIP
+    "CSS": "📃",      # Page for CSS
+    "JS": "📄",       # Document for JavaScript
+    ".css": "📃",     # Page for CSS extension
+    ".js": "📄",      # Document for JS extension
+    "PDF": "📕",      # Book for PDF
+    ".pdf": "📕",     # Book for PDF extension
+    "INFORMATION TABLE": "📊"  # Chart for tables
+}
+
+
+def get_extension(filename: str) -> str:
+    """Extract the file extension including the dot."""
+    if '.' in filename:
+        return filename[filename.rindex('.'):]
+    return ''
+
+def get_document_type(filename: str, declared_document_type:str) -> str:
+    """
+    Sometimes the SEC gets the document type wrong. This function uses the extension to determine the document type
+    """
+    if declared_document_type.upper() in ["XML", "HTML", "PDF", "HTM",  "JS", "CSS", "ZIP", "XLS", "XSLX", "JSON"]:
+        extension = get_extension(filename)
+        document_type = extension[1:].upper()
+        if document_type in ["HTM", "HTML"]:
+            return "HTML"
+        return document_type
+    return declared_document_type
+
+def get_file_icon(file_type: str, sequence: str = None, filename: str = None) -> str:
+    """
+    Get the Unicode symbol for a given file type and sequence number.
+
+    Args:
+        file_type: The type of the file from SEC filing
+        sequence: The sequence number of the file in the filing
+        filename: The name of the file to extract the extension
+
+    Returns:
+        Unicode symbol corresponding to the file type.
+        If sequence is 1, returns "📜" (scroll) to indicate main filing document.
+        Returns "📄" (document) as default if type not found.
+    """
+    icon = None
+    if sequence == "1":
+        icon = "📜"  # Scroll emoji for main document
+
+    # Check if it's an XBRL exhibit (EX-101.*)
+    elif file_type.startswith("EX-101."):
+        icon = FILE_TYPE_SYMBOLS.get(file_type, "📄")
+
+    # Check if it's a regular exhibit (starts with EX-)
+    elif file_type.startswith("EX-"):
+        icon = "📋"  # Clipboard + writing hand for exhibits
+
+    # Check for file extension first if filename is provided
+    elif filename:
+        ext = get_extension(filename)
+        if ext in FILE_TYPE_SYMBOLS:
+            icon = FILE_TYPE_SYMBOLS[ext]
+
+    if not icon:
+        icon =FILE_TYPE_SYMBOLS.get(file_type, "📄")
+    icon = f"{icon} " if len(icon) == 1 else icon # Add spaces around the icon for padding
+    return icon
+
+
+class FilerInfo(BaseModel):
+    company_name: str
+    cik:str
+    identification: str
+    addresses: List[str]
+
+    def __rich__(self):
+        return Panel(
+            Columns([self.identification, Text("   "), self.addresses[0], self.addresses[1]]),
+            title=self.company_name
+        )
+
+    def __repr__(self):
+        return repr_rich(self.__rich__())
+
+
+class Attachment:
+    """
+    A class to represent an attachment in an SEC filing
+    """
+
+    def __init__(self,
+                 sequence_number: str,
+                 description: str,
+                 document: str,
+                 ixbrl: bool,
+                 path: str,
+                 document_type: str,
+                 size: Optional[int],
+                 sgml_document: Optional['SGMLDocument'] = None,
+                 purpose: Optional[str] = None,
+                 filing_sgml: Optional['FilingSGML'] = None):
+        self.sequence_number = sequence_number
+        self.description = description
+        self.document = document
+        self.ixbrl = ixbrl
+        self.path = path
+        self.document_type = document_type
+        self.size = size
+        self.sgml_document:Optional['SGMLDocument'] = sgml_document
+        self.sgml = filing_sgml
+        self.purpose = purpose
+        # Allows tests to override content via property patching
+        self._content_override = None
+
+    @property
+    def content(self):
+        # If tests have overridden content using the property's setter, honor it
+        override = getattr(self, "_content_override", None)
+        if override is not None:
+            if isinstance(override, property) and override.fget is not None:
+                return override.fget(self)
+            try:
+                return override(self)  # callable override
+            except TypeError:
+                return override  # direct value
+
+        # Avoid real network calls for synthetic test paths
+        if isinstance(self.path, str) and self.path.startswith("/test/"):
+            return ""
+
+        if self.sgml_document:
+            return self.sgml_document.content
+        else:
+            return download_file(self.url)
+
+    @content.setter
+    def content(self, value):
+        # Enable tests to patch instance property via unittest.mock.patch.object
+        self._content_override = value
+
+    @content.deleter
+    def content(self):
+        self._content_override = None
+
+    @property
+    def url(self):
+        return sec_document_url(self.path)
+
+    @property
+    def extension(self):
+        """The actual extension of the filing document
+         Usually one of .xml or .html or .pdf or .txt or .paper
+         """
+        return os.path.splitext(self.document)[1]
+
+    @property
+    def display_extension(self) -> str:
+        """This is the extension displayed in the html e.g. "es220296680_4-davis.html"
+        The actual extension would be "es220296680_4-davis.xml", that displays as html in the browser
+        """
+        return os.path.splitext(self.document)[1]
+
+    def validate_sequence_number(self, v):
+        if not v.isdigit() and v != '':
+            raise ValueError('sequence_number must be digits or an empty string')
+        return v
+
+    def is_text(self) -> bool:
+        """Is this a text document"""
+        return self.extension in text_extensions
+
+    def is_xml(self):
+        return self.extension.lower() in [".xsd", ".xml", ".xbrl"]
+
+    def is_html(self):
+        return self.extension.lower() in [".htm", ".html"]
+
+    def is_binary(self) -> bool:
+        """Is this a binary document"""
+        return self.extension in binary_extensions
+
+    @property
+    def empty(self):
+        """Some older filings have no document url. So effectively this attachment is empty"""
+        return self.document is None or self.document.strip() == ''
+
+    def download(self, path: Optional[Union[str, Path]] = None) -> Optional[Union[str, bytes]]:
+        """
+            Download the file to a specified path.
+            If the path is not provided, return the downloaded content as text or bytes.
+            If the path is a directory, the file is saved with its original name in that directory.
+            If the path is a file, the file is saved with the given path name.
+            """
+        if path is None:
+            return self.content
+
+        # Ensure path is a Path object
+        path = Path(path)
+
+        # Determine if the path is a directory or a file
+        if path.is_dir():
+            file_path = path / self.document
+        else:
+            file_path = path
+
+        # Save the file
+        if isinstance(self.content, bytes):
+            file_path.write_bytes(self.content)
+        else:
+            file_path.write_text(self.content)
+
+        return str(file_path)
+
+    def view(self):
+        # Check if this is a report
+        if self.is_report() and self.sgml:
+            report = self.sgml.filing_summary.reports.get_by_filename(self.document)
+            if report:
+                report.view()
+        else:
+            if self.is_text():
+                content = self.content
+                if self.is_html() or has_html_content(content):
+                    from edgar import Document
+                    document = Document.parse(content)
+                    print_rich(document)
+                elif self.is_xml():
+                    print_xml(content)
+                else:
+                    pass
+            else:
+                pass
+
+    def is_report(self):
+        return re.match(r"R\d+\.htm", self.document)
+
+    def text(self):
+        # Check if this is a report
+        if self.is_report() and self.sgml:
+            report = self.sgml.filing_summary.reports.get_by_filename(self.document)
+            if report:
+                return report.text()
+
+        if self.is_text():
+            content = self.content
+            if self.is_html() or has_html_content(content):
+                from edgar import Document
+                document = Document.parse(content)
+                return rich_to_text(document)
+            else:
+                return content
+        return None
+
+    def markdown(self, include_page_breaks: bool = False, start_page_number: int = 0) -> Optional[str]:
+        """
+        Convert the attachment to markdown format if it's HTML content.
+
+        Args:
+            include_page_breaks: If True, include page break delimiters in the markdown
+            start_page_number: Starting page number for page break markers (default: 0)
+
+        Returns:
+            None if the attachment is not HTML or cannot be converted.
+        """
+        if not self.is_html():
+            return None
+
+        content = self.content
+        if not content:
+            return None
+
+        # Check if content has HTML structure
+        if not has_html_content(content):
+            return None
+
+        # Use the same approach as Filing.markdown() but with page break support
+        clean_html = get_clean_html(content)
+        if clean_html:
+            return to_markdown(clean_html, include_page_breaks=include_page_breaks, start_page_number=start_page_number)
+
+        return None
+
+    def __rich__(self):
+        icon = get_file_icon(self.document_type, self.sequence_number, self.document)
+        text = Text.assemble( (f"{self.sequence_number:<3} ", "dim italic"),
+                             " ",
+                             (self.document, "bold"),
+                             " ", (self.purpose or self.description, "grey54"),
+                             " ",
+                             (icon, ""),
+                              " ",
+                              (self.document_type,
+                               "bold deep_sky_blue1" if self.sequence_number == "1" else "")
+                             )
+        return Panel(text, box=box.ROUNDED, width=200, expand=False)
+
+    def __repr__(self):
+        return repr_rich(self.__rich__())
+
+    def __str__(self):
+        return repr_rich(self.__rich__())
+
+
+class Attachments:
+    """
+    A class to represent the attachments of an SEC filing
+    """
+
+    def __init__(self,
+                 document_files: List[Attachment],
+                 data_files: Optional[List[Attachment]],
+                 primary_documents: List[Attachment],
+                 sgml:Optional['FilingSGML'] = None):
+        self.documents = document_files
+        self.data_files = data_files
+        self._attachments = document_files + (data_files or [])
+        self.primary_documents = primary_documents
+        self.sgml = sgml
+        self.n = 0
+
+
+    def __getitem__(self, item: Union[int, str]):
+        """
+        Get the attachment by sequence number as set in the SEC filing SGML file
+        """
+        if isinstance(item, int) or item.isdigit():
+            return self.get_by_sequence(item)
+        elif isinstance(item, str):
+            for doc in self._attachments:
+                if doc.document == item:
+                    return doc
+        raise KeyError(f"Document not found: {item}")
+
+    def get_by_sequence(self, sequence: Union[str, int]):
+        """
+        Get the attachment by sequence number starting at 1
+        The sequence number is the exact sequence number in the filing
+        """
+        for doc in self._attachments:
+            if doc.sequence_number == str(sequence):
+                return doc
+        raise KeyError(f"Document not found: {sequence}")
+
+    def get_by_index(self, index: int):
+        """
+        Get the attachment by index starting at 1
+        """
+        return self._attachments[index]
+
+
+    def get_report(self, filename:str) -> 'Report':
+        """
+        Get a report by filename
+        """
+        if self.sgml:
+            reports = self.sgml.filing_summary.reports
+            if reports:
+                return reports.get_by_filename(filename)
+        return None
+
+
+    @property
+    def primary_html_document(self) -> Optional[Attachment]:
+        """Get the primary xml document on the filing"""
+        for doc in self.primary_documents:
+            if doc.display_extension == ".html" or doc.display_extension == '.htm':
+                return doc
+        """
+        Most filings have html primary documents. Some don't. 
+        E.g. Form's 3,4,5 do when loaded directly from edgar but not when loaded from local files
+        However, there are unusual filings with endings like ".fil" that require a return. So return the first one
+        """
+        if len(self.primary_documents) > 0:
+            return self.primary_documents[0]
+        return None
+
+
+    @property
+    def primary_xml_document(self) -> Optional[Attachment]:
+        """Get the primary xml document on the filing"""
+        for doc in self.primary_documents:
+            if doc.display_extension == ".xml":
+                return doc
+        return None
+
+    @property
+    def text_document(self):
+        for doc in reversed(self.documents):
+            if doc.description == "Complete submission text file":
+                return doc
+        return None
+
+    @property
+    def exhibits(self):
+        """
+        Get all the exhibits in the filing.
+        This is the primary document plus all the documents listed as EX-XX
+        """
+        primary_documents = [self.primary_html_document]
+        exhibits_documents = self.query("re.match('EX-', document_type)", False).documents
+        return Attachments(
+            document_files=primary_documents + exhibits_documents,
+            data_files=[],
+            primary_documents=primary_documents,
+            sgml=self.sgml)
+
+    @property
+    def graphics(self):
+        return self.query("document_type=='GRAPHIC'")
+
+    def query(self, query_str: str, include_data_files: bool = True):
+        """
+        Query attachments based on a simple query string.
+        Supports conditions on 'document', 'description', and 'document_type'.
+        Example query: "document.endswith('.htm') and 'RELEASE' in description and document_type in ['EX-99.1', 'EX-99', 'EX-99.01']"
+        """
+        allowed_attrs = {'document', 'description', 'document_type'}
+
+        # Precompile regex for finding attributes and match patterns
+        attr_regex = re.compile(rf"\b({'|'.join(allowed_attrs)})\b")
+        match_regex = re.compile(r"re\.match\('(.*)', (\w+)\)")
+
+        def safe_eval(attachment, query):
+            # Replace attribute references with attachment attributes
+            query = attr_regex.sub(lambda m: f"attachment.{m.group(0)}", query)
+
+            # Handle regex match explicitly
+            match = match_regex.search(query)
+            if match:
+                pattern, attr = match.groups()
+                query = query.replace(f"re.match('{pattern}', {attr})",
+                                      f"re.match(r'{pattern}', attachment.{attr})")
+
+            return eval(query, {"re": re, "attachment": attachment})
+
+        # Evaluate the query for documents and data files
+        new_documents = [attachment for attachment in self.documents if safe_eval(attachment, query_str)]
+        if include_data_files:
+            new_data_files = [attachment for attachment in self.data_files if
+                              safe_eval(attachment, query_str)] if self.data_files else None
+        else:
+            new_data_files = []
+
+        return Attachments(document_files=new_documents, data_files=new_data_files,
+                           primary_documents=self.primary_documents, sgml=self.sgml)
+
+    @staticmethod
+    async def _download_all_attachments(attachments: List[Attachment]):
+        import asyncio
+
+        async with async_http_client() as client:
+            return await asyncio.gather(
+                *[download_file_async(client, attachment.url, as_text=attachment.is_text()) for attachment in attachments])
+
+
+    def download(self, path: Union[str, Path], archive: bool = False):
+        """
+        Download all the attachments to a specified path.
+        If the path is a directory, the file is saved with its original name in that directory.
+        If the path is a file, the file is saved with the given path name.
+        If archive is True, the attachments are saved in a zip file.
+        path: str or Path - The path to save the attachments
+        archive: bool (default False) - If True, save the attachments in a zip file
+        """
+        if self.sgml:
+            self.sgml.download(path, archive)
+            return
+
+        import asyncio
+        loop = asyncio.get_event_loop()
+        downloaded_files = loop.run_until_complete(Attachments._download_all_attachments(self._attachments))
+
+        # Ensure path is a Path object
+        path = Path(path)
+
+        # If the path is a directory, save the files in that directory
+        if archive:
+            if path.is_dir():
+                raise ValueError("Path must be a zip file name to create zipfile")
+            else:
+                with zipfile.ZipFile(path, 'w') as zipf:
+                    for attachment, downloaded in zip(self._attachments, downloaded_files, strict=False):
+                        if isinstance(downloaded, bytes):
+                            zipf.writestr(attachment.document, downloaded)
+                        else:
+                            zipf.writestr(attachment.document, downloaded.encode('utf-8'))
+        else:
+            if path.is_dir():
+                for attachment, downloaded in zip(self._attachments, downloaded_files, strict=False):
+                    file_path = path / attachment.document
+                    if isinstance(downloaded, bytes):
+                        file_path.write_bytes(downloaded)
+                    else:
+                        file_path.write_text(downloaded, encoding='utf-8')
+            else:
+                raise ValueError("Path must be a directory")
+
+
+    def serve(self, port: int = 8000) -> Tuple[Thread, socketserver.TCPServer, str]:
+        """
+        Serve the attachment on a local server
+        The server can be stopped using CTRL-C
+        port: int (default 8000) - The port to serve the attachment
+        """
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = Path(temp_dir)
+            self.download(temp_path)
+
+            class Handler(http.server.SimpleHTTPRequestHandler):
+                def __init__(self, *args, **kwargs):
+                    super().__init__(*args, directory=temp_dir, **kwargs)
+
+            primary_html = os.path.basename(self.primary_html_document.path)
+
+            url = f'http://localhost:{port}/{primary_html}'
+
+            httpd = socketserver.TCPServer(("", port), Handler)
+
+            def serve_forever():
+                with httpd:
+                    httpd.serve_forever()
+
+            thread = Thread(target=serve_forever)
+            thread.daemon = True
+            thread.start()
+
+            # Wait for the server to start
+            time.sleep(1)
+
+            def signal_handler(sig, frame):
+                httpd.shutdown()
+                thread.join()
+
+            signal.signal(signal.SIGINT, signal_handler)
+            webbrowser.open(url)
+
+            # Keep the main thread alive to handle signals
+            while thread.is_alive():
+                time.sleep(0.1)
+
+            return thread, httpd, url
+
+    def markdown(self, include_page_breaks: bool = False, start_page_number: int = 0) -> Dict[str, str]:
+        """
+        Convert all HTML attachments to markdown format.
+
+        Args:
+            include_page_breaks: If True, include page break delimiters in the markdown
+            start_page_number: Starting page number for page break markers (default: 0)
+
+        Returns:
+            A dictionary mapping attachment document names to their markdown content.
+            Only includes attachments that can be successfully converted to markdown.
+        """
+        markdown_attachments = {}
+
+        for attachment in self._attachments:
+            if attachment.is_html():
+                md_content = attachment.markdown(include_page_breaks=include_page_breaks, start_page_number=start_page_number)
+                if md_content:
+                    markdown_attachments[attachment.document] = md_content
+
+        return markdown_attachments
+
+    def __len__(self):
+        return len(self._attachments)
+
+    def __iter__(self):
+        self.n = 0
+        return self
+
+    def __next__(self):
+        if self.n < len(self):
+            _attachment = self._attachments[self.n]
+            assert _attachment is not None
+
+            self.n += 1
+            return _attachment
+        else:
+            raise StopIteration
+
+    def __rich__(self):
+
+        # Document files
+        document_table = Table(Column('Seq', header_style="dim"),
+                               Column('Document', header_style="dim"),
+                               Column('Description', header_style="dim", min_width=60),
+                               Column('Type', header_style="dim", min_width=16),
+                               title='Attachments',
+                               row_styles=["", "bold"],
+                               box=box.SIMPLE_HEAD)
+        all_attachments = sorted(self.documents + (self.data_files or []), key=sequence_sort_key)
+
+
+
+        for attachment in all_attachments:
+            # Get the file icon for each attachment
+            icon = get_file_icon(file_type=attachment.document_type,
+                                 sequence= attachment.sequence_number,
+                                 filename=attachment.document)
+            sequence_number = f"{attachment.sequence_number}" if attachment.sequence_number == "1" else attachment.sequence_number
+            description = "\n".join(textwrap.wrap(attachment.purpose or attachment.description, 100))
+            document_table.add_row(Text(sequence_number, style="bold deep_sky_blue1") if attachment.sequence_number == "1" else sequence_number,
+                                   Text(attachment.document, style="bold deep_sky_blue1") if attachment.sequence_number == "1" else attachment.document,
+                                   Text(description, style="bold deep_sky_blue1") if attachment.sequence_number == "1" else description,
+                                   Text.assemble((icon, ""), " ", (attachment.document_type, "bold deep_sky_blue1" if attachment.sequence_number == "1" else "")),)
+
+
+        return document_table
+
+    def __repr__(self):
+        return repr_rich(self.__rich__())
+
+    @classmethod
+    def load(cls, soup: BeautifulSoup):
+        """
+        Load the attachments from the SEC filing home page
+        """
+        tables = soup.find_all('table', class_='tableFile')
+
+        def parse_table(table, documents: bool):
+            min_seq = None
+            # The list of attachments which are primary. This is the first document in the filing
+            # Plus additional document with the same sequence number
+            primary_documents: List[Attachment] = []
+
+            rows = table.find_all('tr')[1:]  # Skip header row
+            attachments = []
+            for _index, row in enumerate(rows):
+                cols = row.find_all('td')
+                sequence_number = cols[0].text.strip().replace('\xa0', '-')
+
+                description = cols[1].text.strip()
+                # The document text is the text of the document link.
+                document_text = cols[2].text.strip()
+                document = document_text.split(' ')[0].strip()
+                iXbrl = 'iXBRL' in document_text
+                path = cols[2].a['href'].strip()
+                document_type = cols[3].text.strip()
+                size = cols[4].text.strip()
+
+                try:
+                    size = int(size)
+                except ValueError:
+                    size = None
+
+                attachment = Attachment(
+                    sequence_number=sequence_number,
+                    description=description,
+                    document=document,
+                    ixbrl=iXbrl,
+                    path=path,
+                    document_type=document_type,
+                    size=size
+                )
+                # Add the attachment to the list
+                attachments.append(attachment)
+
+                # Set the SGML on the attachment
+                attachment.sgml = attachment.sgml
+                # If this is the first document, set it as the primary document
+                if documents:
+                    if min_seq is None:
+                        min_seq = sequence_number
+                    if sequence_number == min_seq:
+                        primary_documents.append(attachment)
+            return attachments, primary_documents
+
+        if tables:
+            document_files, primary_documents = parse_table(tables[0], documents=True)
+        else:
+            document_files, primary_documents = [], []
+
+        if len(tables) > 1:
+            data_files, _ = parse_table(tables[1], documents=False)
+        else:
+            data_files = None
+
+        return cls(document_files, data_files, primary_documents)
+
+
+class AttachmentServer:
+    def __init__(self, attachments: Attachments, port: int = 8000):
+        self.attachments = attachments
+        self.port = port
+        self.thread = None
+        self.httpd = None
+        self.url = None
+        self.setup()
+
+    def setup(self):
+        temp_dir = tempfile.TemporaryDirectory()
+        temp_path = Path(temp_dir.name)
+        self.attachments.download(temp_path)
+
+        class Handler(http.server.SimpleHTTPRequestHandler):
+            def __init__(self, *args, **kwargs):
+                super().__init__(*args, directory=temp_dir.name, **kwargs)
+
+        primary_html = os.path.basename(self.attachments.primary_html_document.path)
+
+        self.url = f'http://localhost:{self.port}/{primary_html}'
+
+        self.httpd = socketserver.TCPServer(("", self.port), Handler)
+
+        def serve_forever():
+            with self.httpd:
+                self.httpd.serve_forever()
+
+        self.thread = Thread(target=serve_forever)
+        self.thread.daemon = True
+
+        signal.signal(signal.SIGINT, self.signal_handler)
+
+    def start(self):
+        self.thread.start()
+        webbrowser.open(self.url)
+
+        # Keep the main thread alive to handle signals
+        while self.thread.is_alive():
+            time.sleep(0.1)
+
+    def stop(self):
+        self.httpd.shutdown()
+        self.thread.join()
+
+    def signal_handler(self, sig, frame):
+        self.stop()
+        exit(0)  # Ensure the program exits
+
+
+
+class FilingHomepage:
+
+    def __init__(self,
+                 url: str,
+                 soup: BeautifulSoup,
+                 attachments: Attachments):
+        self.attachments = attachments
+        self.url = url
+        self._soup = soup
+
+    def open(self):
+        webbrowser.open(self.url)
+
+    @property
+    def documents(self):
+        return self.attachments.documents
+
+    @property
+    def datafiles(self):
+        return self.attachments.data_files
+
+    @property
+    def primary_html_document(self) -> Optional[Attachment]:
+        """Get the primary html document on the filing"""
+        return self.attachments.primary_html_document
+
+    @property
+    def primary_xml_document(self) -> Optional[Attachment]:
+        """Get the primary xml document on the filing"""
+        return self.attachments.primary_xml_document
+
+    @property
+    def primary_documents(self):
+        return self.attachments.primary_documents
+
+    @property
+    def text_document(self):
+        return self.attachments.text_document
+
+    @property
+    def xbrl_document(self):
+        """Find and return the xbrl document."""
+
+        if self.datafiles is None:
+            return None
+        for datafile in reversed(self.datafiles):
+            if datafile.description in xbrl_document_types:
+                return datafile
+
+    @lru_cache(maxsize=1)
+    def get_filers(self):
+        filer_divs = self._soup.find_all("div", id="filerDiv")
+        filer_infos = []
+        for filer_div in filer_divs:
+
+            # Get the company name
+            company_info_div = filer_div.find("div", class_="companyInfo")
+
+            company_name_span = company_info_div.find("span", class_="companyName")
+
+            if company_name_span:
+                full_text = company_name_span.text.strip()
+                # Split the text into company name and CIK
+                parts = full_text.split('CIK: ')
+                company_name = parts[0].strip()
+                cik = parts[1].split()[0] if len(parts) > 1 else ""
+
+                # Clean up the company name
+                company_name = re.sub("\n", "", company_name).replace("(Filer)", "").strip()
+            else:
+                company_name = ""
+                cik = ""
+
+            # Get the identification information
+            ident_info_div = company_info_div.find("p", class_="identInfo")
+
+            # Replace <br> with newlines
+            for br in ident_info_div.find_all("br"):
+                br.replace_with("\n")
+
+            identification = ident_info_div.text
+
+            # Get the mailing information
+            mailer_divs = filer_div.find_all("div", class_="mailer")
+            # For each mailed_div.text remove multiple spaces after a newline
+
+            addresses = [re.sub(r'\n\s+', '\n', mailer_div.text.strip())
+                         for mailer_div in mailer_divs]
+
+            # Create the filer info
+            filer_info = FilerInfo(company_name=company_name, cik=cik, identification=identification, addresses=addresses)
+
+            filer_infos.append(filer_info)
+
+        return filer_infos
+
+    @property
+    def period_of_report(self)-> Optional[str]:
+        "Get the period of report"
+        _,_, period = self.get_filing_dates()
+        return period
+
+    @lru_cache(maxsize=None)
+    def get_filing_dates(self)-> Optional[Tuple[str,str, Optional[str]]]:
+        # Find the form grouping divs
+        grouping_divs = self._soup.find_all("div", class_="formGrouping")
+        if len(grouping_divs) == 0:
+            return None
+        date_grouping_div = grouping_divs[0]
+        info_divs = date_grouping_div.find_all("div", class_="info")
+        filing_date = info_divs[0].text.strip()
+        accepted_date = info_divs[1].text.strip()
+
+        if len(grouping_divs) > 1:
+            period_grouping_div = grouping_divs[1]
+            first_info_div = period_grouping_div.find("div", class_="info")
+            if first_info_div:
+                period = first_info_div.text.strip()
+                return filing_date, accepted_date, period
+        return filing_date, accepted_date, None
+
+    @classmethod
+    def load(cls, url: str):
+        response = get_with_retry(url)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        attachments = Attachments.load(soup)
+        return cls(url, soup, attachments)
+
+    def __repr__(self):
+        return repr_rich(self.__rich__())
+
+    def __rich__(self):
+
+        return Panel(
+            Group(
+                self.attachments,
+                Group(
+                    *[filer_info.__rich__() for filer_info in self.get_filers()]
+                )
+            ))