edgartools/venv/lib/python3.10/site-packages/edgar/_filings.py

import itertools
import json
import pickle
import re
import webbrowser
from contextlib import nullcontext
from dataclasses import dataclass
from datetime import datetime
from functools import cached_property, lru_cache
from io import BytesIO
from os import PathLike
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast

import httpx
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.compute as pc
import pyarrow.csv as pa_csv
import pyarrow.parquet as pq
from rich import box
from rich.columns import Columns
from rich.console import Group
from rich.panel import Panel
from rich.status import Status
from rich.table import Table
from rich.text import Text

from edgar._markdown import text_to_markdown
from edgar._party import Address
from edgar.attachments import Attachment, Attachments, AttachmentServer, FilingHomepage
from edgar.core import (
    DataPager,
    IntString,
    PagingState,
    Quarters,
    YearAndQuarter,
    YearAndQuarters,
    Years,
    cache_except_none,
    current_year_and_quarter,
    filing_date_to_year_quarters,
    is_probably_html,
    is_start_of_quarter,
    listify,
    log,
    parallel_thread_map,
    quarters_in_year,
    sec_edgar,
)
from edgar.dates import InvalidDateException
from edgar.files.html import Document
from edgar.files.html_documents import get_clean_html
from edgar.files.htmltools import html_sections
from edgar.files.markdown import to_markdown
from edgar.filtering import filter_by_accession_number, filter_by_cik, filter_by_date, filter_by_exchange, filter_by_form, filter_by_ticker
from edgar.formatting import accession_number_text, display_size
from edgar.headers import FilingDirectory, IndexHeaders
from edgar.httprequests import download_file, download_text, download_text_between_tags
from edgar.reference import describe_form
from edgar.reference.tickers import Exchange, find_ticker, find_ticker_safe
from edgar.richtools import Docs, print_rich, repr_rich, rich_to_text
from edgar.search import BM25Search, RegexSearch
from edgar.sgml import FilingHeader, FilingSGML, Reports, Statements
from edgar.storage import is_using_local_storage, local_filing_path
from edgar.xbrl import XBRL, XBRLFilingWithNoXbrlData

""" Contain functionality for working with SEC filing indexes and filings

The module contains the following functions

- `get_filings(year, quarter, index)`

"""

__all__ = [
    'Filing',
    'Filings',
    'get_filings',
    'FilingHeader',
    'PagingState',
    'Attachment',
    'Attachments',
    'FilingHomepage',
    'available_quarters',
    'get_by_accession_number',
    'filing_date_to_year_quarters'
]

full_index_url = "https://www.sec.gov/Archives/edgar/full-index/{}/QTR{}/{}.{}"
daily_index_url = "https://www.sec.gov/Archives/edgar/daily-index/{}/QTR{}/{}.{}.idx"

filing_homepage_url_re = re.compile(f"{sec_edgar}/data/[0-9]{1,}/[0-9]{10}-[0-9]{2}-[0-9]{4}-index.html")

full_or_daily = ['daily', 'full']
index_types = ['form', 'company', 'xbrl']
file_types = ['gz', 'idx']

form_index = "form"
xbrl_index = "xbrl"
company_index = "company"

index_field_delimiter_re = re.compile(r" {2,}")

max_concurrent_http_connections = 10

accession_number_re = re.compile(r"\d{10}-\d{2}-\d{6}$")

xbrl_document_types = ['XBRL INSTANCE DOCUMENT', 'XBRL INSTANCE FILE', 'EXTRACTED XBRL INSTANCE DOCUMENT']


def is_valid_filing_date(filing_date: str) -> bool:
    if ":" in filing_date:
        # Check for only one colon
        if filing_date.count(":") > 1:
            return False
        start_date, end_date = filing_date.split(":")
        if start_date:
            if not is_valid_date(start_date):
                return False
        if end_date:
            if not is_valid_date(end_date):
                return False
    else:
        if not is_valid_date(filing_date):
            return False

    return True


def is_valid_date(date_str: str, date_format: str = "%Y-%m-%d") -> bool:
    pattern = r"^\d{4}-\d{2}-\d{2}$"
    if not re.match(pattern, date_str):
        return False

    try:
        datetime.strptime(date_str, date_format)
        return True
    except ValueError:
        return False


def get_previous_quarter(year, quarter) -> Tuple[int, int]:
    if not quarter:
        _, quarter = current_year_and_quarter()
    # Given a year and quarter return the previous quarter
    if quarter == 1:
        return year - 1, 4
    else:
        return year, quarter - 1


@lru_cache(maxsize=1)
def available_quarters() -> YearAndQuarters:
    """
    Get a list of year and quarter tuples
    :return:
    """
    current_year, current_quarter = current_year_and_quarter()
    start_quarters = [(1994, 3), (1994, 4)]
    in_between_quarters = list(itertools.product(range(1995, current_year), range(1, 5)))
    end_quarters = list(itertools.product([current_year], range(1, current_quarter + 1)))
    return start_quarters + in_between_quarters + end_quarters


def expand_quarters(year: Union[int, List[int]],
                    quarter: Optional[Union[int, List[int]]] = None) -> YearAndQuarters:
    """
    Expand the list of years and a list of quarters to a full list of tuples covering the full range
    :param year: The year or years
    :param quarter: The quarter or quarters
    :return:
    """
    years = listify(year)
    quarters = listify(quarter) if quarter else quarters_in_year
    return [yq
            for yq in itertools.product(years, quarters)
            if yq in available_quarters()
            ]


class FileSpecs:
    """
    A specification for a fixed width file
    """

    def __init__(self, specs: List[Tuple[str, Tuple[int, int], pa.lib.DataType]]):
        self._spec_type = specs[0][0].title()
        self.splits = list(zip(*specs, strict=False))[1]
        self.schema = pa.schema(
            [
                pa.field(name, datatype)
                for name, _, datatype in specs
            ]
        )

    def __str__(self):
        return f"{self._spec_type} File Specs"


form_specs = FileSpecs(
    [("form", (0, 12), pa.string()),
     ("company", (12, 74), pa.string()),
     ("cik", (74, 82), pa.int32()),
     ("filing_date", (85, 97), pa.string()),
     ("accession_number", (97, 141), pa.string())
     ]
)
company_specs = FileSpecs(
    [("company", (0, 62), pa.string()),
     ("form", (62, 74), pa.string()),
     ("cik", (74, 82), pa.int32()),
     ("filing_date", (85, 97), pa.string()),
     ("accession_number", (97, 141), pa.string())
     ]
)

FORM_INDEX_FORM_COLUMN = 0
COMPANY_INDEX_FORM_COLUMN = -4
INDEX_COLUMN_NAMES = ['form', 'company', 'cik', 'filing_date', 'accession_number']


def read_fixed_width_index(index_text: str,
                           file_specs: FileSpecs) -> pa.Table:
    """
    Read the index text as a fixed width file
    :param index_text: The index text as downloaded from SEC Edgar
    :param file_specs: The file specs containing the column definitions
    :return:
    """
    # Treat as a single array
    lines = index_text.rstrip('\n').split('\n')
    # Find where the data starts
    data_start = 0
    for index, line in enumerate(lines):
        if line.startswith("-----"):
            data_start = index + 1
            break
    data_lines = lines[data_start:]
    array = pa.array(data_lines)

    # Then split into separate arrays by file specs
    arrays = [
        pc.utf8_trim_whitespace(
            pc.utf8_slice_codeunits(array, start=start, stop=stop))
        for start, stop,
        in file_specs.splits
    ]

    # Change the CIK to int
    arrays[2] = pa.compute.cast(arrays[2], pa.int32())

    # Convert filingdate from string to date
    # Some files have %Y%m-%d other %Y%m%d
    date_format = '%Y-%m-%d' if len(arrays[3][0].as_py()) == 10 else '%Y%m%d'
    arrays[3] = pc.cast(pc.strptime(arrays[3], date_format, 'us'), pa.date32())

    # Get the accession number from the file directory_or_file
    arrays[4] = pa.compute.utf8_slice_codeunits(
        pa.compute.utf8_rtrim(arrays[4], characters=".txt"), start=-20)

    return pa.Table.from_arrays(
        arrays=arrays,
        names=list(file_specs.schema.names),
    )


def read_index_file(index_text: str,
                    form_column: int = FORM_INDEX_FORM_COLUMN,
                    filing_date_format:str="%Y-%m-%d") -> pa.Table:
    """
    Read the index text using multiple spaces as delimiter
    """
    # Split into lines and find the data start
    lines = index_text.rstrip('\n').split('\n')
    data_start = 0
    for index, line in enumerate(lines):
        if line.startswith("-----"):
            data_start = index + 1
            break

    # Process data lines
    data_lines = lines[data_start:]

    # Handle empty lines
    if not data_lines:
        return _empty_filing_index()

    # The form and company name can both contain spaces the remaining fields cannot.
    # It is assumed that the form will only contain runs of a single space (e.g. "1-A POS")
    # so splitting on runs of 2 spaces or more will keep form names intact.
    rows = [re.split(index_field_delimiter_re, line.strip()) for line in data_lines if line.strip()]

    # Form names are in a different column depending on the index type.
    forms = pa.array([row[form_column] for row in rows])

    # CIKs are always the third-to-last field
    ciks = pa.array([int(row[-3]) for row in rows], type=pa.int32())

    # Dates are always second-to-last field
    dates = pc.strptime(pa.array([row[-2] for row in rows]), filing_date_format, 'us')
    dates = pc.cast(dates, pa.date32())

    # Accession numbers are in the file path
    accession_numbers = pa.array([row[-1][-24:-4] for row in rows])

    # Company names may have runs of more than one space so anything which hasn't already
    # been extracted is concatenated to form the company name.
    if form_column == 0:
        companies = pa.array([" ".join(row[1:-3]) for row in rows])
    else:
        companies = pa.array([" ".join(row[0:form_column]) for row in rows])

    return pa.Table.from_arrays(
        [forms, companies, ciks, dates, accession_numbers],
        names=INDEX_COLUMN_NAMES
    )


def read_form_index_file(index_text: str) -> pa.Table:
    """Read the form index file"""
    return read_index_file(index_text, form_column=FORM_INDEX_FORM_COLUMN)


def read_company_index_file(index_text: str) -> pa.Table:
    """Read the company index file"""
    return read_index_file(index_text, form_column=COMPANY_INDEX_FORM_COLUMN)


def read_pipe_delimited_index(index_text: str) -> pa.Table:
    """
    Read the index file as a pipe delimited index
    :param index_text: The index text as read from SEC Edgar
    :return: The index data as a pyarrow table
    """
    index_table = pa_csv.read_csv(
        BytesIO(index_text.encode()),
        parse_options=pa_csv.ParseOptions(delimiter="|"),
        read_options=pa_csv.ReadOptions(skip_rows=10,
                                        column_names=['cik', 'company', 'form', 'filing_date', 'accession_number'])
    )
    index_table = index_table.set_column(
        0,
        "cik",
        pa.compute.cast(index_table[0], pa.int32())
    ).set_column(4,
                 "accession_number",
                 pc.utf8_slice_codeunits(index_table[4], start=-24, stop=-4))
    return index_table


def fetch_filing_index(year_and_quarter: YearAndQuarter,
                       index: str
                       ):
    year, quarter = year_and_quarter
    url = full_index_url.format(year, quarter, index, "gz")
    try:
        index_table = fetch_filing_index_at_url(url, index)
        return (year, quarter), index_table
    except httpx.HTTPStatusError as e:
        if is_start_of_quarter() and e.response.status_code == 403:
            # Return an empty filing index
            return (year, quarter), _empty_filing_index()
        else:
            raise


def fetch_daily_filing_index(date: str,
                             index: str = 'form'):
    year, month, day = date.split("-")
    quarter = (int(month) - 1) // 3 + 1
    url = daily_index_url.format(year, quarter, index, date.replace("-", ""))
    index_table = fetch_filing_index_at_url(url, index, filing_date_format='%Y%m%d')
    return index_table


def fetch_filing_index_at_url(url: str,
                              index: str,
                              filing_date_format:str='%Y-%m-%d') -> Optional[pa.Table]:
    index_text = download_text(url=url)
    assert index_text is not None
    if index == "xbrl":
        index_table: pa.Table = read_pipe_delimited_index(str(index_text))
    else:
        # Read as a fixed width index file
        form_column = FORM_INDEX_FORM_COLUMN if index == "form" else COMPANY_INDEX_FORM_COLUMN
        index_table: pa.Table = read_index_file(index_text, form_column=form_column, filing_date_format=filing_date_format)
    return index_table


def _empty_filing_index():
    schema = pa.schema([
        ('form', pa.string()),
        ('company', pa.string()),
        ('cik', pa.int32()),
        ('filing_date', pa.date32()),
        ('accession_number', pa.string()),
    ])

    # Create an empty table with the defined schema
    return pa.Table.from_arrays([
        pa.array([], type=pa.string()),
        pa.array([], type=pa.string()),
        pa.array([], type=pa.int32()),
        pa.array([], type=pa.date32()),
        pa.array([], type=pa.string()),
    ], schema=schema)


def get_filings_for_quarters(year_and_quarters: YearAndQuarters,
                             index="form") -> pa.Table:
    """
    Get the filings for the quarters
    :param year_and_quarters:
    :param index: The index to use - "form", "company", or "xbrl"
    :return: The filings as a pyarrow table
    """

    if len(year_and_quarters) == 1:
        _, final_index_table = fetch_filing_index(year_and_quarter=year_and_quarters[0],
                                                  index=index)
    else:
        quarters_and_indexes = parallel_thread_map(
            lambda yq: fetch_filing_index(year_and_quarter=yq, index=index),
            year_and_quarters
        )
        quarter_and_indexes_sorted = sorted(quarters_and_indexes, key=lambda d: d[0])
        index_tables = [fd[1] for fd in quarter_and_indexes_sorted]
        final_index_table: pa.Table = pa.concat_tables(index_tables, mode="default")
    return final_index_table


class Filings:
    """
    A container for filings
    """

    def __init__(self,
                 filing_index: pa.Table,
                 original_state: Optional[PagingState] = None):
        self.data: pa.Table = filing_index
        self.data_pager = DataPager(self.data)
        # This keeps track of where the index should start in case this is just a page in the Filings
        self._original_state = original_state or PagingState(0, len(self.data))
        self._hash = None

    @property
    def docs(self):
        return Docs(self)

    def to_pandas(self, *columns) -> pd.DataFrame:
        """Return the filing index as a python dataframe"""
        df = self.data.to_pandas()
        return df.filter(columns) if len(columns) > 0 else df

    def save_parquet(self, location: str):
        """Save the filing index as parquet"""
        pq.write_table(self.data, location)

    def save(self, location: str):
        """Save the filing index as parquet"""
        self.save_parquet(location)

    def download(self, data_directory: Optional[str] = None):
        """
        Download the filings based on the accession numbers in this Filings object.

        This is a convenience method that calls `download_filings` with this object
        as the `filings` parameter.

        Args:
            data_directory: Directory to save the downloaded files. Defaults to the Edgar data directory.
        """
        from edgar.storage import download_filings
        download_filings(data_directory=data_directory,
                         overwrite_existing=True,
                         filings=self)

    def get_filing_at(self, item: int, enrich: bool = True):
        """Get filing at index, optionally enriching with related entities"""
        # Get the primary filing data
        accession_no = self.data['accession_number'][item].as_py()

        related_entities = []
        if enrich:
            # Use PyArrow to find all entities with same accession number
            # Limit search to nearby entries for performance (+/- 10 positions)
            start = max(0, item - 10)
            end = min(len(self.data), item + 11)

            # Slice the data and search efficiently
            slice_data = self.data.slice(start, end - start)
            mask = pc.equal(slice_data['accession_number'], accession_no)

            for idx in range(len(mask)):
                if mask[idx].as_py():
                    actual_idx = start + idx
                    if actual_idx != item:  # Skip the primary filing
                        related_entities.append({
                            'cik': slice_data['cik'][idx].as_py(),
                            'company': slice_data['company'][idx].as_py()
                        })

        # Create Filing with related entities
        return Filing(
            cik=self.data['cik'][item].as_py(),
            company=self.data['company'][item].as_py(),
            form=self.data['form'][item].as_py(),
            filing_date=self.data['filing_date'][item].as_py(),
            accession_no=accession_no,
            related_entities=related_entities
        )

    @property
    def date_range(self) -> Tuple[datetime, datetime]:
        """Return a tuple of the start and end dates in the filing index"""
        min_max_dates: dict[str, datetime] = pc.min_max(self.data['filing_date']).as_py()
        return min_max_dates['min'], min_max_dates['max']

    @property
    def start_date(self) -> Optional[str]:
        """Return the start date for the filings"""
        return str(self.date_range[0]) if self.date_range[0] else self.date_range[0]

    @property
    def end_date(self) -> str:
        """Return the end date for the filings"""
        return str(self.date_range[1]) if self.date_range[1] else self.date_range[1]

    def latest(self, n: int = 1):
        """Get the latest n filings"""
        sort_indices = pc.sort_indices(self.data, sort_keys=[("filing_date", "descending")])
        sort_indices_top = sort_indices[:min(n, len(sort_indices))]
        latest_filing_index = pc.take(data=self.data, indices=sort_indices_top)
        filings = Filings(latest_filing_index)
        if len(filings) == 1:
            return filings[0]
        return filings

    def filter(self, *,
               form: Optional[Union[str, List[IntString]]] = None,
               amendments: bool = None,
               filing_date: Optional[str] = None,
               date: Optional[str] = None,
               cik: Union[IntString, List[IntString]] = None,
               exchange: Union[str, List[str], Exchange, List[Exchange]] = None,
               ticker: Union[str, List[str]] = None,
               accession_number: Union[str, List[str]] = None) -> 'Filings':
        """
        Get some filings

        >>> filings = get_filings()

        Filter the filings

        On a date
        >>> filings.filter(date="2020-01-01")

        Up to a date
        >>> filings.filter(date=":2020-03-01")

        From a date
        >>> filings.filter(date="2020-01-01:")

        # Between dates
        >>> filings.filter(date="2020-01-01:2020-03-01")

        :param form: The form or list of forms to filter by
        :param amendments: Whether to include amendments to the forms e.g. include "10-K/A" if filtering for "10-K"
        :param filing_date: The filing date
        :param date: An alias for the filing date
        :param cik: The CIK or list of CIKs to filter by
        :param exchange: The exchange or list of exchanges to filter by
        :param ticker: The ticker or list of tickers to filter by
        :param accession_number: The accession number or list of accession numbers to filter by
        :return: The filtered filings
        """
        filing_index = self.data
        forms = form

        if isinstance(forms, list):
            forms = [str(f) for f in forms]

        # Filter by form
        if forms:
            filing_index = filter_by_form(filing_index, form=forms, amendments=amendments)
        elif amendments is not None:
            # Get the unique values of the form as a pylist
            forms = list(set([form.replace("/A", "") for form in pc.unique(filing_index['form']).to_pylist()]))
            filing_index = filter_by_form(filing_index, form=forms, amendments=amendments)

        # filing_date and date are aliases
        filing_date = filing_date or date
        if filing_date:
            try:
                filing_index = filter_by_date(filing_index, filing_date, 'filing_date')
            except InvalidDateException as e:
                log.error(e)
                return Filings(_empty_filing_index())

        # Filter by cik
        if cik:
            filing_index = filter_by_cik(filing_index, cik)

        # Filter by exchange
        if exchange:
            filing_index = filter_by_exchange(filing_index, exchange)

        if ticker:
            filing_index = filter_by_ticker(filing_index, ticker)

        # Filter by accession number
        if accession_number:
            filing_index = filter_by_accession_number(filing_index, accession_number=accession_number)

        return Filings(filing_index)

    def _head(self, n):
        assert n > 0, "The number of filings to select - `n`, should be greater than 0"
        return self.data.slice(0, min(n, len(self.data)))

    def head(self, n: int):
        """Get the first n filings"""
        selection = self._head(n)
        return Filings(selection)

    def _tail(self, n):
        assert n > 0, "The number of filings to select - `n`, should be greater than 0"
        return self.data.slice(max(0, len(self.data) - n), len(self.data))

    def tail(self, n: int):
        """Get the last n filings"""
        selection = self._tail(n)
        return Filings(selection)

    def _sample(self, n: int):
        assert len(self) >= n > 0, \
            "The number of filings to select - `n`, should be greater than 0 and less than the number of filings"
        return self.data.take(np.random.choice(len(self), n, replace=False)).sort_by([("filing_date", "descending")])

    def sample(self, n: int):
        """Get a random sample of n filings"""
        selection = self._sample(n)
        return Filings(selection)

    @property
    def empty(self) -> bool:
        return len(self.data) == 0

    def current(self):
        """Display the current page ... which is the default for this filings object"""
        return self

    def next(self):
        """Show the next page"""
        data_page = self.data_pager.next()
        if data_page is None:
            log.warning("End of data .. use previous() \u2190 ")
            return None
        start_index, _ = self.data_pager._current_range
        filings_state = PagingState(page_start=start_index, num_records=len(self))
        return Filings(data_page, original_state=filings_state)

    def previous(self):
        """
        Show the previous page of the data
        :return:
        """
        data_page = self.data_pager.previous()
        if data_page is None:
            log.warning(" No previous data .. use next() \u2192 ")
            return None
        start_index, _ = self.data_pager._current_range
        filings_state = PagingState(page_start=start_index, num_records=len(self))
        return Filings(data_page, original_state=filings_state)

    def _get_by_accession_number(self, accession_number: str):
        mask = pc.equal(self.data['accession_number'], accession_number)
        idx = mask.index(True).as_py()
        if idx > -1:
            return self.get_filing_at(idx)

    def get(self, index_or_accession_number: IntString):
        """
        First, get some filings
        >>> filings = get_filings()

        Get the Filing at that index location or that has the accession number
        >>> filings.get(100)

        >>> filings.get("0001721868-22-000010")

        :param index_or_accession_number:
        :return:
        """
        if isinstance(index_or_accession_number, int) or index_or_accession_number.isdigit():
            return self.get_filing_at(int(index_or_accession_number))
        else:
            accession_number = index_or_accession_number.strip()
            mask = pc.equal(self.data['accession_number'], accession_number)
            idx = mask.index(True).as_py()
            if idx > -1:
                return self.get_filing_at(idx)
            if not accession_number_re.match(accession_number):
                log.warning(
                    f"Invalid accession number [{accession_number}]"
                    "\n  valid accession number [0000000000-00-000000]"
                )

    def find(self,
             company_search_str: str):
        from edgar.entity import find_company

        # Search for the company
        search_results = find_company(company_search_str)

        return self.filter(cik=search_results.ciks)

    def to_dict(self, max_rows: int = 1000) -> Dict[str, Any]:
        """Return the filings as a json string but only the first max_rows records"""
        return cast(Dict[str, Any], self.to_pandas().head(max_rows).to_dict(orient="records"))

    def __getitem__(self, item):
        return self.get_filing_at(item)

    def __len__(self):
        return len(self.data)

    def __iter__(self):
        self.n = 0
        return self

    def __next__(self):
        if self.n < len(self.data):
            filing: Filing = self[self.n]
            self.n += 1
            return filing
        else:
            raise StopIteration

    @property
    def summary(self):
        return (f"Showing {self.data_pager.page_size} of "
                f"{self._original_state.num_records:,} filings")

    def _page_index(self) -> range:
        """Create the range index to set on the page dataframe depending on where in the data we are
        """
        if self._original_state:
            return range(self._original_state.page_start,
                         self._original_state.page_start
                         + min(self.data_pager.page_size, len(self.data)))  # set the index to the size of the page
        else:
            return range(*self.data_pager._current_range)

    def __eq__(self, other):
        # Check if other is Filings or subclass of Filings
        if not isinstance(other, self.__class__) and not issubclass(other.__class__, self.__class__):
            return False

        if len(self) != len(other):
            return False

        if self.start_date != other.start_date or self.end_date != other.end_date:
            return False

        # Handle empty tables
        if len(self) == 0:
            return True  # Two empty tables with same dates are equal

        # Compare just accession_number columns
        return self.data['accession_number'].equals(other.data['accession_number'])


    def __hash__(self):
        if self._hash is None:
            # Base hash components
            hash_components = [self.__class__.__name__, len(self), self.start_date, self.end_date]

            # Only add accession numbers if table is not empty
            if len(self) > 0:
                # Handle different table sizes appropriately
                if len(self) == 1:
                    hash_components.append(self.data['accession_number'][0].as_py())
                elif len(self) == 2:
                    hash_components.append(self.data['accession_number'][0].as_py())
                    hash_components.append(self.data['accession_number'][1].as_py())
                else:
                    hash_components.append(self.data['accession_number'][0].as_py())
                    hash_components.append(self.data['accession_number'][len(self) // 2].as_py())
                    hash_components.append(self.data['accession_number'][len(self) - 1].as_py())

            self._hash = hash(tuple(hash_components))
        return self._hash

    def __rich__(self) -> Panel:
        # Create table with appropriate columns and styling
        table = Table(
            show_header=True,
            header_style="bold",
            show_edge=True,
            expand=False,
            padding=(0, 1),
            box=box.SIMPLE,
            row_styles=["", "bold"]
        )

        # Add columns with specific styling and alignment
        table.add_column("#", style="dim", justify="right")
        table.add_column("Form", width=10)
        table.add_column("CIK", style="dim", width=10, justify="right")
        table.add_column("Ticker", width=6, style="yellow")
        table.add_column("Company", style="bold green", width=38, no_wrap=True)
        table.add_column("Filing Date", width=11)
        table.add_column("Accession Number", width=20)
        table.add_column(" ", width=1, style="cyan dim")  # Group indicator column

        # Get current page from data pager
        current_page = self.data_pager.current()

        # Calculate start index for proper indexing
        start_idx = self._original_state.page_start if self._original_state else self.data_pager.start_index

        # Identify groups of consecutive filings with same accession number
        groups = {}
        accession_numbers = [current_page['accession_number'][i].as_py() for i in range(len(current_page))]

        for i in range(len(accession_numbers)):
            acc_no = accession_numbers[i]

            # Check previous and next accession numbers
            prev_acc = accession_numbers[i-1] if i > 0 else None
            next_acc = accession_numbers[i+1] if i < len(accession_numbers)-1 else None

            if acc_no != prev_acc and acc_no == next_acc:
                groups[i] = '┐'  # Start of group
            elif acc_no == prev_acc and acc_no == next_acc:
                groups[i] = '│'  # Middle of group
            elif acc_no == prev_acc and acc_no != next_acc:
                groups[i] = '┘'  # End of group
            else:
                groups[i] = ' '   # Standalone filing

        # Iterate through rows in current page
        for i in range(len(current_page)):
            cik = current_page['cik'][i].as_py()
            ticker = find_ticker(cik)

            row = [
                str(start_idx + i),
                current_page['form'][i].as_py(),
                str(cik),
                ticker,
                current_page['company'][i].as_py(),
                str(current_page['filing_date'][i].as_py()),
                accession_number_text(current_page['accession_number'][i].as_py()),
                groups.get(i, ' ')  # Add group indicator
            ]
            table.add_row(*row)

        # Show paging information only if there are multiple pages
        elements = [table]

        if self.data_pager.total_pages > 1:
            total_filings = self._original_state.num_records
            current_count = len(current_page)
            start_num = start_idx + 1
            end_num = start_idx + current_count

            page_info = Text.assemble(
                ("Showing ", "dim"),
                (f"{start_num:,}", "bold red"),
                (" to ", "dim"),
                (f"{end_num:,}", "bold red"),
                (" of ", "dim"),
                (f"{total_filings:,}", "bold"),
                (" filings.", "dim"),
                (" Page using ", "dim"),
                ("← prev()", "bold gray54"),
                (" and ", "dim"),
                ("next() →", "bold gray54")
            )

            elements.extend([Text("\n"), page_info])

        # Get the subtitle
        start_date, end_date = self.date_range
        subtitle = f"SEC Filings between {start_date:%Y-%m-%d} and {end_date:%Y-%m-%d}" if start_date else ""
        return Panel(
            Group(*elements),
            title="SEC Filings",
            subtitle=subtitle,
            border_style="bold grey54",
            expand=False
        )

    def __repr__(self):
        return repr_rich(self.__rich__())


def sort_filings_by_priority(filing_table: pa.Table,
                             priority_forms: Optional[List[str]] = None) -> pa.Table:
    """
    Sort a filings table by date (descending) and form priority.

    Args:
        filing_table: PyArrow table containing filings data
        priority_forms: List of forms in priority order. Forms not in list will be sorted
                       alphabetically after priority forms. Defaults to common forms if None.

    Returns:
        PyArrow table sorted by date and form priority
    """
    if priority_forms is None:
        priority_forms = ['10-Q', '10-Q/A', '10-K', '10-K/A', '8-K', '8-K/A',
                          '6-K', '6-K/A', '13F-HR', '144', '4', 'D', 'SC 13D', 'SC 13G']

    # Create form priority values
    forms_array = filing_table['form']
    priorities = []
    for form_type in forms_array.to_pylist():
        try:
            priority = priority_forms.index(form_type)
        except ValueError:
            priority = len(priority_forms)
        priorities.append(priority)

    # Add priority column
    with_priority = filing_table.append_column(
        'form_priority',
        pa.array(priorities, type=pa.int32())
    )

    # Sort by date (descending), priority (ascending), form name (ascending)
    sorted_table = with_priority.sort_by([
        ("filing_date", "descending"),
        ("form_priority", "ascending"),
        ("form", "ascending")
    ])

    # Remove temporary priority column
    return sorted_table.drop(['form_priority'])


def get_filings(year: Optional[Years] = None,
                quarter: Optional[Quarters] = None,
                form: Optional[Union[str, List[IntString]]] = None,
                amendments: bool = True,
                filing_date: Optional[str] = None,
                index="form",
                priority_sorted_forms: Optional[List[str]] = None) -> Optional[Filings]:
    """
    Downloads the filing index for a given year or list of years, and a quarter or list of quarters.

    So you can download for 2020, [2020,2021,2022] or range(2020, 2023)

    Examples

    >>> from edgar import get_filings

    >>> filings_ = get_filings(2021) # Get filings for 2021

    >>> filings_ = get_filings(2021, 4) # Get filings for 2021 Q4

    >>> filings_ = get_filings(2021, [3,4]) # Get filings for 2021 Q3 and Q4

    >>> filings_ = get_filings([2020, 2021]) # Get filings for 2020 and 2021

    >>> filings_ = get_filings([2020, 2021], 4) # Get filings for Q4 of 2020 and 2021

    >>> filings_ = get_filings(range(2010, 2021)) # Get filings between 2010 and 2021 - does not include 2021

    >>> filings_ = get_filings(2021, 4, form="D") # Get filings for 2021 Q4 for form D

    >>> filings_ = get_filings(2021, 4, filing_date="2021-10-01") # Get filings for 2021 Q4 on "2021-10-01"

    >>> filings_ = get_filings(2021, 4, filing_date="2021-10-01:2021-10-10") # Get filings for 2021 Q4 between
                                                                            # "2021-10-01" and "2021-10-10"


    :param year The year of the filing
    :param quarter The quarter of the filing
    :param form The form or forms as a string e.g. "10-K" or a List ["10-K", "8-K"]
    :param amendments If True will expand the list of forms to include amendments e.g. "10-K/A"
    :param filing_date The filing date to filter by in YYYY-MM-DD format
                e.g. filing_date="2022-01-17" or filing_date="2022-01-17:2022-02-28"
    :param index The index type - "form" or "company" or "xbrl"
    :param priority_sorted_forms: A list of forms to sort by priority. This presents these forms first for each day.
    :return:
    """
    # Check if defaults were used
    defaults_used = (year is None and
                     quarter is None and
                     form is None and
                     amendments is True and
                     filing_date is None and
                     index == "form" and
                     priority_sorted_forms is None)
    if filing_date:
        if not is_valid_filing_date(filing_date):
            log.warning("""Provide a valid filing date in the format YYYY-MM-DD or YYYY-MM-DD:YYYY-MM-DD""")
            return None
        year_and_quarters = filing_date_to_year_quarters(filing_date)
    elif not year:
        # If no year specified, take the current year and quarter. (We need the quarter later)
        year, quarter = current_year_and_quarter()
        # Expand quarters for the year to date so use expand_quarters(year, quarter=None)
        year_and_quarters: YearAndQuarters = expand_quarters(year, quarter=None)
    else:
        year_and_quarters: YearAndQuarters = expand_quarters(year, quarter)

    if len(year_and_quarters) == 0:
        log.warning(f"""
    Provide a year between 1994 and {datetime.now().year} and optionally a quarter (1-4) for which the SEC has filings.

        e.g. filings = get_filings(2023) OR
             filings = get_filings(2023, 1)

    (You specified the year {year} and quarter {quarter})
        """)
        return None
    filing_index = get_filings_for_quarters(year_and_quarters, index=index)

    filings = Filings(filing_index)

    if form or filing_date:
        filings = filings.filter(form=form, amendments=amendments, filing_date=filing_date)

    if not filings:
        if defaults_used:
            # Ensure at least some data is returned
            previous_quarter = [get_previous_quarter(year, quarter)]
            filing_index = get_filings_for_quarters(previous_quarter, index=index)
            filings = Filings(filing_index)
            sorted_filing_index = sort_filings_by_priority(filings.data, priority_sorted_forms)
            return Filings(sorted_filing_index)
        # Return an empty filings object
        return Filings(_empty_filing_index())

    # Sort the filings using the separate sort function
    sorted_filing_index = sort_filings_by_priority(filings.data, priority_sorted_forms)

    return Filings(sorted_filing_index)


@lru_cache(maxsize=8)
def _get_cached_filings(year: Optional[Years] = None,
                        quarter: Optional[Quarters] = None,
                        form: Optional[Union[str, List[IntString]]] = None,
                        amendments: bool = True,
                        filing_date: Optional[str] = None,
                        index="form") -> Union[Filings, None]:
    # Get the filings but cache the result
    return get_filings(year=year, quarter=quarter, form=form, amendments=amendments, filing_date=filing_date,
                       index=index)


def parse_filing_header(content):
    data = {}
    current_key = None

    lines = content.split('\n')
    for line in lines:
        if line.endswith(':'):
            current_key = line[:-1]  # Remove the trailing colon
            data[current_key] = {}
        elif current_key and ':' in line:
            key, value = map(str.strip, line.split(':', 1))
            data[current_key][key] = value

    return data


def _create_address_table(business_address: Address, mailing_address: Address):
    address_table = Table("Type", "Street1", "Street2", "City", "State", "Zipcode",
                          title="\U0001F4EC Addresses", box=box.SIMPLE)
    if business_address:
        address_table.add_row("\U0001F3E2 Business Address",
                              business_address.street1,
                              business_address.street2,
                              business_address.city,
                              business_address.state_or_country,
                              business_address.zipcode)

    if mailing_address:
        address_table.add_row("\U0001F4ED Mailing Address",
                              mailing_address.street1,
                              mailing_address.street2,
                              mailing_address.city,
                              mailing_address.state_or_country,
                              mailing_address.zipcode)
    return address_table


class Filing:
    """
    A single SEC filing. Allow you to access the documents and data for that filing
    """

    def __init__(self,
                 cik: int,
                 company: str,
                 form: str,
                 filing_date: str,
                 accession_no: str,
                 related_entities: List[Dict] = None):
        self.cik = cik
        self.company = company
        self.form = form
        self.filing_date = filing_date
        self.accession_no = accession_no
        self._filing_homepage = None
        self._sgml = None

        # New: Store related entities from index
        self._related_entities = related_entities or []

    @property
    def docs(self):
        return Docs(self)

    @property
    def accession_number(self):
        return self.accession_no

    @property
    def all_ciks(self) -> List[int]:
        """Get all CIKs including related entities"""
        # If we have related entities from the index, use those
        if self._related_entities:
            ciks = [self.cik]
            ciks.extend(e['cik'] for e in self._related_entities)
            return sorted(list(set(ciks)))

        # Otherwise, check the header for all filers
        try:
            header = self.header
            if header and header.filers and len(header.filers) > 1:
                # Multiple filers in header
                ciks = []
                for filer in header.filers:
                    if filer.company_information and filer.company_information.cik:
                        # Convert CIK string to int, removing leading zeros
                        cik_int = int(filer.company_information.cik.lstrip('0'))
                        ciks.append(cik_int)
                if ciks:
                    return sorted(list(set(ciks)))
        except Exception as e:
            # Log warning when header access fails
            log.warning(
                f"Could not access header for multi-entity detection in Filing "
                f"(accession_no={self.accession_no}, cik={self.cik}): {str(e)}. "
                f"This may occur if the accession number is invalid or the filing doesn't exist on EDGAR."
            )

        return [self.cik]

    @property
    def all_entities(self) -> List[Dict[str, Any]]:
        """Get all entity information"""
        # If we have related entities from the index, use those
        if self._related_entities:
            entities = [{'cik': self.cik, 'company': self.company}]
            entities.extend(self._related_entities)
            return entities

        # Otherwise, check the header for all filers
        try:
            header = self.header
            if header and header.filers and len(header.filers) > 1:
                # Multiple filers in header
                entities = []
                for filer in header.filers:
                    if filer.company_information and filer.company_information.cik:
                        # Convert CIK string to int, removing leading zeros
                        cik_int = int(filer.company_information.cik.lstrip('0'))
                        entities.append({
                            'cik': cik_int,
                            'company': filer.company_information.name or f'CIK {cik_int}'
                        })
                if entities:
                    return entities
        except Exception as e:
            # Log warning when header access fails
            log.warning(
                f"Could not access header for entity information in Filing "
                f"(accession_no={self.accession_no}, cik={self.cik}): {str(e)}. "
                f"This may occur if the accession number is invalid or the filing doesn't exist on EDGAR."
            )

        return [{'cik': self.cik, 'company': self.company}]

    @property
    def is_multi_entity(self) -> bool:
        """Check if this filing has multiple entities"""
        # First check if we have related entities from the index
        if len(self._related_entities) > 0:
            return True

        # Otherwise, check the header for multiple filers
        try:
            header = self.header
            if header and header.filers and len(header.filers) > 1:
                return True
        except Exception as e:
            # Log warning when header access fails
            log.warning(
                f"Could not access header for multi-entity check in Filing "
                f"(accession_no={self.accession_no}, cik={self.cik}): {str(e)}. "
                f"This may occur if the accession number is invalid or the filing doesn't exist on EDGAR."
            )

        return False

    @property
    def document(self):
        """
        :return: The primary display document on the filing, generally HTML but can be XHTML
        """
        document = self.sgml().attachments.primary_html_document
        # If the document is not in the SGML then we have to go to the homepage
        if document:
            if document.extension == '.paper':
                # If the document is a paper filing, we return the scanned document if it exists
                attachments = self.homepage.attachments
                scanned_documents = attachments.query("document == 'scanned.pdf'")
                if len(scanned_documents) > 0:
                    return scanned_documents.get_by_index(0)
                return self.homepage.primary_html_document
            return document
        return self.homepage.primary_html_document

    @property
    def primary_documents(self):
        """
        :return: a list of the primary documents on the filing, generally HTML or XHTML and optionally XML
        """
        documents = self.sgml().attachments.primary_documents
        if len(documents) == 0:
            documents = self.homepage.primary_documents
        return documents

    @property
    def period_of_report(self):
        """
        Get the period of report for the filing
        """
        return self.sgml().period_of_report

    @property
    def attachments(self):
        # Return all the attachments on the filing
        sgml_filing: FilingSGML = self.sgml()
        return sgml_filing.attachments

    @property
    def exhibits(self):
        # Return all the exhibits on the filing
        return self.attachments.exhibits

    @lru_cache(maxsize=4)
    def html(self) -> Optional[str]:
        """Returns the html contents of the primary document if it is html"""
        sgml = self.sgml()
        html = sgml.html()
        if not html:
            document:Attachment = self.homepage.primary_html_document
            if document.empty or document.is_binary():
                return None
            return self.homepage.primary_html_document.download()
        if html.endswith("</PDF>"):
            return None
        if html.startswith("<?xml"):
            if self.form in ['3','3/A', '4', '4/A', '5', '5/A']:
                from edgar.ownership import Ownership
                ownership:Ownership = self.obj()
                html = ownership.to_html()
            else:
                html = self.homepage.primary_html_document.download()
        if isinstance(html, bytes):
            try:
                return html.decode("utf-8")
            except UnicodeDecodeError:
                return None
        if is_probably_html(html):
            return html
        else:
            html = html.replace("<PAGE>", "")
            return f"<html><body><div>{html}</div></body></html>"

    @lru_cache(maxsize=4)
    def xml(self) -> Optional[str]:
        """Returns the xml contents of the primary document if it is xml"""
        sgml = self.sgml()
        return sgml.xml()

    @lru_cache(maxsize=4)
    def text(self) -> str:
        """Convert the html of the main filing document to text"""
        html_content = self.html()
        if html_content and is_probably_html(html_content):
            document = Document.parse(html_content)
            return rich_to_text(document)
        else:
            text_extract_attachments = self.attachments.query("document_type == 'TEXT-EXTRACT'")
            if len(text_extract_attachments) > 0 and text_extract_attachments.get_by_index(0) is not None:
                text_extract_attachment = text_extract_attachments.get_by_index(0)
                return text_extract_attachment.content
            else:
                return self._download_filing_text()

    def _download_filing_text(self):
        """
        Download the text of the filing directly from the primary text sources.
        Either from the text url or the text extract attachment
        """
        text_extract_attachments = self.attachments.query("document_type == 'TEXT-EXTRACT'")
        if len(text_extract_attachments) > 0 and text_extract_attachments[0] is not None:
            text_extract_attachment = text_extract_attachments[0]
            assert text_extract_attachment is not None
            return download_text_between_tags(text_extract_attachment.url, "TEXT")
        else:
            return download_text_between_tags(self.text_url, "TEXT")

    def full_text_submission(self) -> str:
        """Return the complete text submission file"""
        downloaded = download_file(self.text_url, as_text=True)
        assert downloaded is not None
        return str(downloaded)

    def markdown(self, include_page_breaks: bool = False, start_page_number: int = 0) -> str:
        """
        Return the markdown version of this filing html

        Args:
            include_page_breaks: If True, include page break delimiters in the markdown
            start_page_number: Starting page number for page break markers (default: 0)
        """
        html = self.html()
        if html:
            clean_html = get_clean_html(html)
            if clean_html:
                return to_markdown(clean_html, include_page_breaks=include_page_breaks, start_page_number=start_page_number)
        text_content = self.text()
        return text_to_markdown(text_content)

    def view(self):
        """Preview this filing's primary document as markdown. This should display in the console"""
        html_content = self.html()
        if html_content and is_probably_html(html_content):
            document = Document.parse(html_content)
            print_rich(document)
        else:
            # Fallback to text content for forms without HTML (like UPLOAD forms)
            text_content = self.text()
            if text_content:
                print(text_content)

    def xbrl(self) -> Optional[XBRL]:
        """
        Get the XBRL document for the filing, parsed and as a FilingXbrl object
        :return: Get the XBRL document for the filing, parsed and as a FilingXbrl object, or None
        """
        try:
            return XBRL.from_filing(self)
        except XBRLFilingWithNoXbrlData:
            return None

    def serve(self, port: int = 8000) -> AttachmentServer:
        """Serve the filings on a local server
        port: The port to serve the filings on
        """
        return self.attachments.serve(port=port)

    def save(self, directory_or_file: PathLike):
        """Save the filing to a directory path or a file using pickle.dump

            If directory_or_file is a directory then the final file will be

            '<directory>/<accession_number>.pkl'

            Otherwise, save to the file passed in
        """
        filing_path = Path(directory_or_file)
        if filing_path.is_dir():
            filing_path = filing_path / f"{self.accession_no}.pkl"
        with filing_path.open("wb") as f:
            pickle.dump(self, f)

    @classmethod
    def load(cls, path: PathLike):
        """Load a filing from a json file"""
        path = Path(path)
        with path.open("rb") as file:
            return pickle.load(file)

    @cached_property
    def filing_directory(self) -> FilingDirectory:
        return FilingDirectory.load(self.base_dir)

    def _local_path(self) -> Path:
        """
        Get the local path for the filing
        """
        return local_filing_path(str(self.filing_date), self.accession_no)

    @classmethod
    def from_sgml(cls, source: Union[str, Path]):
        """
        Read the filing from the SGML string
        """
        filing_sgml = FilingSGML.from_source(source)
        filers = filing_sgml.header.filers
        if filers and len(filers) > 0:
             company = filers[0].company_information.name if filers[0].company_information else ""
        else:
            company = ""

        filing = cls(cik=filing_sgml.cik,
                   accession_no=filing_sgml.accession_number,
                   form=filing_sgml.form,
                   company=company,
                   filing_date=filing_sgml.filing_date)
        filing._sgml = filing_sgml
        return filing

    @classmethod
    def from_sgml_text(cls, full_text_submission: str):
        """
        Read the filing from the full text submission
        """
        filing_sgml = FilingSGML.from_text(full_text_submission)
        filers = filing_sgml.header.filers
        if filers and len(filers) > 0:
             company = filers[0].company_information.name if filers[0].company_information else ""
        else:
            company = ""

        filing = cls(cik=filing_sgml.cik,
                   accession_no=filing_sgml.accession_number,
                   form=filing_sgml.form,
                   company=company,
                   filing_date=filing_sgml.filing_date)
        filing._sgml = filing_sgml
        return filing

    def sgml(self) -> FilingSGML:
        """
        Read the filing from the local storage path if it exists
        """
        if self._sgml:
            return self._sgml
        if is_using_local_storage():
            local_path = local_filing_path(str(self.filing_date), self.accession_no)
            if local_path.exists():
                self._sgml = FilingSGML.from_source(local_path)

        if self._sgml is None:
            self._sgml = FilingSGML.from_filing(self)
        return self._sgml

    @cached_property
    def reports(self)  -> Optional[Reports]:
        """
        If the filing has report attachments then return the reports
        """
        filing_summary = self.sgml().filing_summary
        if filing_summary:
            return filing_summary.reports

    @cached_property
    def statements(self) -> Optional[Statements]:
        """
        Get the statements for a report
        """
        if self.reports:
            return self.reports.statements

    @cached_property
    def index_headers(self) -> IndexHeaders:
        """
        Get the index headers for the filing. This is a listing of all the files in the filing directory
        """
        index_headers_url = f"{self.base_dir}/{self.accession_no}-index-headers.html"
        index_header_text = download_text(index_headers_url)
        return IndexHeaders.load(index_header_text)

    def to_dict(self) -> Dict[str, Union[str, int]]:
        """Return the filing as a Dict string"""
        return {'accession_number': self.accession_number,
                'cik': self.cik,
                'company': self.company,
                'form': self.form,
                'filing_date': self.filing_date}

    @classmethod
    def from_dict(cls, data: Dict[str, Union[str, int]]):
        """Create a Filing from a dictionary.
        Thw dict must have the keys cik, company, form, filing_date, accession_no
        """
        assert all(key in data for key in ['cik', 'company', 'form', 'filing_date', 'accession_number']), \
            "The dict must have the keys cik, company, form, filing_date, accession_number"
        return cls(cik=int(data['cik']),
                   company=str(data['company']),
                   form=str(data['form']),
                   filing_date=str(data['filing_date']),
                   accession_no=str(data['accession_number']))

    @classmethod
    def from_json(cls, path: str):
        """Create a Filing from a JSON file"""
        with open(path, 'r') as file:
            data = json.load(file)
            return cls.from_dict(data)

    @cached_property
    def header(self):
        """Get the header for the filing"""
        _sgml = self.sgml()
        return _sgml.header


    def data_object(self):
        """ Get this filing as the data object that it might be"""
        from edgar import obj
        return obj(self)

    def obj(self):
        """Alias for data_object()"""
        return self.data_object()

    def open_homepage(self):
        """Open the homepage in the browser"""
        webbrowser.open(self.homepage_url)

    def open(self):
        """Open the main filing document"""
        assert self.document is not None
        # Use the homepage to determine the url since SGML sometimes miss the primary HTML file
        webbrowser.open(self.homepage.primary_html_document.url)

    @lru_cache(maxsize=1)
    def sections(self) -> List[str]:
        html = self.html()
        assert html is not None
        return html_sections(html)

    @cached_property
    def __get_bm25_search_index(self):
        return BM25Search(self.sections())

    @cached_property
    def __get_regex_search_index(self):
        return RegexSearch(self.sections())

    def search(self,
               query: str,
               regex=False):
        """Search for the query string in the filing HTML"""
        if regex:
            return self.__get_regex_search_index.search(query)
        return self.__get_bm25_search_index.search(query)

    @property
    def filing_url(self) -> str:
        return f"{self.base_dir}/{self.document.document}"

    @property
    def homepage_url(self) -> str:
        return f"{sec_edgar}/data/{self.cik}/{self.accession_no}-index.html"

    @property
    def text_url(self) -> str:
        return f"{self.base_dir}/{self.accession_no}.txt"

    @property
    def index_header_url(self) -> str:
        return f"{self.base_dir}/index-headers.html"

    @property
    def base_dir(self) -> str:
        return f"{sec_edgar}/data/{self.cik}/{self.accession_no.replace('-', '')}"

    @property
    def url(self) -> str:
        return self.homepage_url

    @property
    def homepage(self):
        """
        Get the homepage for the filing
        :return: the FilingHomepage
        """
        if not self._filing_homepage:
            self._filing_homepage = FilingHomepage.load(self.homepage_url)
        return self._filing_homepage

    @property
    def home(self):
        """Alias for homepage"""
        return self.homepage

    @lru_cache(maxsize=1)
    def get_entity(self):
        """Get the company to which this filing belongs"""
        "Get the company for cik. Cache for performance"
        from edgar.entity import Company
        return Company(self.cik)

    @lru_cache(maxsize=1)
    def as_company_filing(self):
        """Get this filing as a company filing. Company Filings have more information"""
        company = self.get_entity()
        if not company:
            return None

        filings = company.get_filings(accession_number=self.accession_no)
        if filings and not filings.empty:
            return filings[0]
        return None

    @lru_cache(maxsize=1)
    def related_filings(self):
        """Get all the filings related to this one
        There is no file number on this base Filing class so first get the company,

        then this filing then get the related filings
        """
        company = self.get_entity()
        if not company:
            return

        filings = company.get_filings(accession_number=self.accession_no)
        if not filings or filings.empty:
            if is_using_local_storage():
                # In this case the local storage is missing the filing so we have to download it
                log.warning(f"Filing {self.accession_no} not found in local storage. Downloading from SEC ...")
                from edgar.entity import download_entity_submissions_from_sec, parse_entity_submissions
                submissions_json = download_entity_submissions_from_sec(self.cik)
                c_from_sec = parse_entity_submissions(submissions_json)
                filings = c_from_sec.get_filings(accession_number=self.accession_no)

                if not filings or filings.empty:
                    # Shouldn't get here
                    return company.get_empty_filings()
            else:
                return company.get_empty_filings()
        file_number = filings[0].file_number
        return company.get_filings(file_number=file_number,
                                   sort_by=[("filing_date", "ascending"), ("accession_number", "ascending")])

    def __hash__(self):
        return hash(self.accession_no)

    def __eq__(self, other):
        return isinstance(other, Filing) and self.accession_no == other.accession_no

    def __ne__(self, other):
        return not self == other

    def summary(self) -> pd.DataFrame:
        """Return a summary of this filing as a dataframe"""
        return pd.DataFrame([{"Accession Number": self.accession_no,
                              "Filing Date": self.filing_date,
                              "Company": self.company,
                              "CIK": self.cik}]).set_index("Accession Number")

    def __str__(self):
        """
        Return a string version of this filing e.g.

        Filing(form='10-K', filing_date='2018-03-08', company='CARBO CERAMICS INC',
              cik=1009672, accession_no='0001564590-18-004771')
        :return:
        """
        return (f"Filing(form='{self.form}', filing_date='{self.filing_date}', company='{self.company}', "
                f"cik={self.cik}, accession_no='{self.accession_no}')")

    def __rich__(self):
        """
        Produce a table version of this filing e.g.
        ┌──────────────────────┬──────┬────────────┬────────────────────┬─────────┐
        │                      │ form │ filing_date│ company            │ cik     │
        ├──────────────────────┼──────┼────────────┼────────────────────┼─────────┤
        │ 0001564590-18-004771 │ 10-K │ 2018-03-08 │ CARBO CERAMICS INC │ 1009672 │
        └──────────────────────┴──────┴────────────┴────────────────────┴─────────┘
        :return: a rich table version of this filing
        """
        ticker = find_ticker_safe(self.cik)
        ticker = f"{ticker}" if ticker else ""

        # Check for multi-entity (without triggering header lookup)
        has_related = hasattr(self, '_related_entities') and self._related_entities

        # Build the title components
        title_parts = [
            (f"Form {self.form} ", "bold"),
            (self.company, "bold green"),
            " ",
            (f"[{self.cik}] ", "dim"),
        ]

        if ticker:
            title_parts.append((ticker, "bold yellow"))

        # Add multi-entity indicator if present
        if has_related:
            num_related = len(self._related_entities)
            title_parts.extend([
                " ",
                (f"(+{num_related} {'entity' if num_related == 1 else 'entities'})", "cyan dim")
            ])

        # The title of the panel
        title = Text.assemble(*title_parts)

        # The subtitle of the panel
        form_description = describe_form(self.form, False)
        subtitle = Text.assemble(
            (form_description, "dim"),
            " • ",
            ("filing.docs", "cyan dim"),
            (" for usage guide", "dim")
        )

        attachments = self.attachments

        # The filing information table
        filing_info_table = Table("Accession Number", "Filing Date", "Period of Report", "Documents",
                                  header_style="dim",
                                  box=box.SIMPLE_HEAD)
        filing_info_table.add_row(accession_number_text(self.accession_no),
                                  Text(str(self.filing_date), "bold"),
                                  Text(self.period_of_report or "-", "bold"),
                                  f"{len(attachments)}")

        # Build content elements
        elements = [filing_info_table]

        # Add entities table if multi-entity filing
        if has_related:
            # Add spacing and header
            elements.append(Text())  # Empty line for spacing
            elements.append(Text("All Entities:", style="bold dim"))

            # Create entities table
            entities_table = Table(
                "CIK", "Company",
                header_style="dim",
                box=box.SIMPLE,
                show_edge=False,
                padding=(0, 1)
            )

            # Add primary entity
            entities_table.add_row(
                Text(str(self.cik), style="dim"),
                Text(self.company, style="bold green")
            )

            # Add related entities
            for entity in self._related_entities:
                entities_table.add_row(
                    Text(str(entity.get('cik', '')), style="dim"),
                    Text(entity.get('company', ''), style="green")
                )

            elements.append(entities_table)

        return Panel(
            Group(*elements),
            title=title,
            subtitle=subtitle,
            box=box.ROUNDED,
            expand=False
        )

    def __repr__(self):
        return repr_rich(self.__rich__())


# These are the columns on the table on the filing homepage
filing_file_cols = ['Seq', 'Description', 'Document', 'Type', 'Size', 'Url']


@dataclass(frozen=True)
class ClassContractSeries:
    cik: str
    url: str


@dataclass(frozen=True)
class ClassContract:
    cik: str
    name: str
    ticker: str
    status: str


@dataclass(frozen=True)
class FilerInfo:
    company_name: str
    identification: str
    addresses: List[str]

    def __rich__(self):
        return Panel(
            Columns([self.identification, Text("   "), self.addresses[0], self.addresses[1]]),
            title=self.company_name
        )

    def __repr__(self):
        return repr_rich(self.__rich__())


def summarize_files(data: pd.DataFrame) -> pd.DataFrame:
    return (data
            .filter(["Seq", "Document", "Description", "Size"])
            .assign(Size=data.Size.apply(display_size))
            .set_index("Seq")
            )


@cache_except_none(maxsize=16)
def get_filing_by_accession(accession_number: str, year: int):
    """Cache-friendly version that takes year as parameter instead of using datetime.now()"""
    assert re.match(r"\d{10}-\d{2}-\d{6}", accession_number)

    # Static logic that doesn't depend on current time
    for quarter in range(1, 5):
        filings = _get_cached_filings(year=year, quarter=quarter)
        if filings and (filing := filings.get(accession_number)):
            return filing

    return None


def get_by_accession_number_enriched(accession_number: str):
    """Get filing with all related entities populated using PyArrow"""
    year = int("19" + accession_number[11:13]) if accession_number[11] == '9' else int("20" + accession_number[11:13])

    # Find all entities with this accession number
    all_entities = []
    for quarter in range(1, 5):
        filings = _get_cached_filings(year=year, quarter=quarter)
        if filings:
            # Use PyArrow filtering (same pattern as Filings.get())
            mask = pc.equal(filings.data['accession_number'], accession_number)
            # Convert mask to indices
            indices = []
            for i in range(len(mask)):
                if mask[i].as_py():
                    indices.append(i)

            if len(indices) > 0:
                # Extract all matching entities efficiently
                for idx in indices:
                    all_entities.append({
                        'cik': filings.data['cik'][idx].as_py(),
                        'company': filings.data['company'][idx].as_py(),
                        'form': filings.data['form'][idx].as_py(),
                        'filing_date': filings.data['filing_date'][idx].as_py()
                    })
                break  # Found matches, no need to check other quarters

    if all_entities:
        # Return first entity as primary, with others as related
        primary = all_entities[0]
        related = all_entities[1:] if len(all_entities) > 1 else []

        # Create enriched Filing
        filing = Filing(
            cik=primary['cik'],
            company=primary['company'],
            form=primary['form'],
            filing_date=primary['filing_date'],
            accession_no=accession_number,
            related_entities=related
        )
        return filing

    # Fall back to current behavior if not found
    return get_by_accession_number(accession_number)


def get_by_accession_number(accession_number: str, show_progress: bool = False):
    """Wrapper that handles progress display and current time logic"""
    year = int("19" + accession_number[11:13]) if accession_number[11] == '9' else int("20" + accession_number[11:13])

    with Status("[bold deep_sky_blue1]Searching...", spinner="dots2") if show_progress else nullcontext():
        filing = get_filing_by_accession(accession_number, year)

        if not filing and year == datetime.now().year:
            from edgar.current_filings import get_current_filings
            filings = get_current_filings()
            filing = filings.get(accession_number)

    return filing


def form_with_amendments(*forms: str):
    return list(forms) + [f"{f}/A" for f in forms]


barchart = '\U0001F4CA'
ticket = '\U0001F3AB'
page_facing_up = '\U0001F4C4'
classical_building = '\U0001F3DB'


def unicode_for_form(form: str) -> str:
    """
    Returns a meaningful Unicode symbol based on SEC form type.

    Args:
        form (str): SEC form type identifier

    Returns:
        str: Unicode symbol representing the form type

    Form type categories:
    - Periodic Reports (10-K, 10-Q): 📊 (financial statements/data)
    - Current Reports (8-K, 6-K): ⚡ (immediate/material events)
    - Registration & Offerings:
        - S-1, F-1: 🎯 (initial public offerings)
        - S-3, F-3: 🔄 (follow-on offerings)
        - Prospectuses (424B*): 📖 (offering documents)
    - Insider Forms (3, 4, 5): 👥 (insider activity)
    - Beneficial Ownership:
        - SC 13D/G: 🏰 (significant ownership stakes)
        - 13F-HR: 📈 (institutional holdings)
    - Investment Company:
        - N-CSR, N-Q: 💼 (investment portfolio reports)
        - N-PX: 🗳️ (proxy voting record)
    - Foreign Company Forms (20-F, 40-F): 🌐 (international)
    - Municipal Advisor Forms (MA): ⚖️ (regulation/compliance)
    - Communications (CORRESP/UPLOAD): 💬 (dialogue with SEC)
    - Proxy Materials (DEF 14A): 📩 (shareholder voting)
    - Default: 📄 (generic document)
    """

    # Periodic financial reports
    if form in ['10-K', '10-Q', '10-K/A', '10-Q/A']:
        return '📊'  # Chart for financial statements

    # Current reports (material events)
    elif form in ['8-K', '8-K/A', '6-K', '6-K/A']:
        return '⚡'  # Lightning bolt for immediate/current events

    # Initial registration statements
    elif form.startswith(('S-1', 'F-1')) or form in ['S-1/A', 'F-1/A']:
        return '🎯'  # Target for initial offerings

    # Shelf registration statements
    elif form.startswith(('S-3', 'F-3')) or form in ['S-3/A', 'F-3/A']:
        return '🔄'  # Circular arrows for repeat/follow-on offerings

    # Prospectuses
    elif form.startswith('424B'):
        return '📖'  # Open book for offering documents

    # Foreign issuer annual reports
    elif form in ['20-F', '20-F/A', '40-F', '40-F/A']:
        return '🌐'  # Globe for international filings

    # Insider trading forms
    elif form in ['3', '4', '5', '3/A', '4/A', '5/A']:
        return '👥'  # People for insider/beneficial owner reports

    # Significant beneficial ownership reports
    elif form.startswith(('SC 13D', 'SC 13G')) or form in ['SC 13D/A', 'SC 13G/A']:
        return '🏰'  # Castle for large ownership stakes

    # Institutional investment holdings
    elif form in ['13F-HR', '13F-HR/A', '13F-NT', '13F-NT/A']:
        return '📈'  # Chart up for investment positions

    # Investment company reports
    elif form in ['N-CSR', 'N-CSR/A', 'N-Q', 'N-Q/A']:
        return '💼'  # Briefcase for investment portfolio

    # Proxy voting records
    elif form in ['N-PX', 'N-PX/A']:
        return '🗳️'  # Ballot box for voting records

    # Municipal advisor forms
    elif form in ['MA', 'MA/A', 'MA-I', 'MA-I/A']:
        return '⚖️'  # Scales for regulatory/compliance

    # SEC correspondence
    elif form in ['CORRESP', 'UPLOAD']:
        return '💬'  # Speech bubble for communications

    # Proxy statements
    elif form in ['DEF 14A', 'PRE 14A', 'DEFA14A', 'DEFC14A']:
        return '📩'  # Envelope for shareholder communications

    # Default case - generic document
    return '📄'