Files
2025-12-09 12:13:01 +01:00

455 lines
15 KiB
Python

"""
Filings-related classes for the Entity package.
This module contains classes related to SEC filings for entities, including
collections of filings and filing facts.
"""
from typing import List, Union
import pandas as pd
import pyarrow as pa
import pyarrow.compute as pc
from rich.box import SIMPLE
from rich.console import Group
from rich.panel import Panel
from rich.table import Table
from rich.text import Text
from edgar._filings import Filing, Filings, PagingState
from edgar.core import IntString, log
from edgar.formatting import accession_number_text, display_size
from edgar.reference.forms import describe_form
from edgar.richtools import Docs, df_to_rich_table, repr_rich
__all__ = [
'EntityFiling',
'EntityFilings',
'EntityFacts',
'empty_company_filings'
]
class EntityFiling(Filing):
"""
Represents a single SEC filing for an entity.
This extends the base Filing class with additional information
and methods specific to SEC entities.
Attributes:
items (str): Filing items from SEC metadata. For 8-K filings, this indicates
which items are included (e.g., "2.02,9.01").
**Data Source**: This value comes from SEC filing metadata, not from parsing
the filing document itself.
**Legacy SGML Limitation**: For legacy SGML filings (1999-2001), the SEC's
historical metadata may be incorrect or incomplete. Modern XML filings (2005+)
have accurate metadata.
**Workaround for Legacy Filings**: For accurate item extraction from legacy
SGML 8-K filings, parse the filing text directly using regex patterns.
See GitHub Issue #462 for example code.
"""
def __init__(self,
cik: int,
company: str,
form: str,
filing_date: str,
report_date: str,
acceptance_datetime: str,
accession_no: str,
file_number: str,
items: str,
size: int,
primary_document: str,
primary_doc_description: str,
is_xbrl: bool,
is_inline_xbrl: bool):
super().__init__(cik=cik, company=company, form=form, filing_date=filing_date, accession_no=accession_no)
self.report_date = report_date
self.acceptance_datetime = acceptance_datetime
self.file_number: str = file_number
self.items: str = items # See class docstring for important notes on data source and limitations
self.size: int = size
self.primary_document: str = primary_document
self.primary_doc_description: str = primary_doc_description
self.is_xbrl: bool = is_xbrl
self.is_inline_xbrl: bool = is_inline_xbrl
def related_filings(self):
"""Get all the filings related to this one by file number."""
return self.get_entity().get_filings(file_number=self.file_number, sort_by="filing_date")
def __str__(self):
return (f"Filing(company='{self.company}', cik={self.cik}, form='{self.form}', "
f"filing_date='{self.filing_date}', accession_no='{self.accession_no}')"
)
class EntityFilings(Filings):
"""
Collection of SEC filings for an entity.
This extends the base Filings class with additional methods and properties
specific to entity filings.
"""
def __init__(self,
data: pa.Table,
cik: int,
company_name: str,
original_state: PagingState = None):
super().__init__(data, original_state=original_state)
self.cik = cik
self.company_name = company_name
@property
def docs(self):
return Docs(self)
def __getitem__(self, item):
return self.get_filing_at(item)
@property
def empty(self):
return len(self.data) == 0
def get_filing_at(self, item: int):
"""Get the filing at the specified index."""
return EntityFiling(
cik=self.cik,
company=self.company_name,
form=self.data['form'][item].as_py(),
filing_date=self.data['filing_date'][item].as_py(),
report_date=self.data['reportDate'][item].as_py(),
acceptance_datetime=self.data['acceptanceDateTime'][item].as_py(),
accession_no=self.data['accession_number'][item].as_py(),
file_number=self.data['fileNumber'][item].as_py(),
items=self.data['items'][item].as_py(),
size=self.data['size'][item].as_py(),
primary_document=self.data['primaryDocument'][item].as_py(),
primary_doc_description=self.data['primaryDocDescription'][item].as_py(),
is_xbrl=self.data['isXBRL'][item].as_py(),
is_inline_xbrl=self.data['isInlineXBRL'][item].as_py()
)
def filter(self,
form: Union[str, List[str]] = None,
amendments: bool = None,
filing_date: str = None,
date: str = None,
cik: Union[int, str, List[Union[int, str]]] = None,
ticker: Union[str, List[str]] = None,
accession_number: Union[str, List[str]] = None):
"""
Filter the filings based on various criteria.
Args:
form: Filter by form type
amendments: Include amendments
filing_date: Filter by filing date
date: Alias for filing_date
cik: Filter by CIK
ticker: Filter by ticker
accession_number: Filter by accession number
Returns:
Filtered EntityFilings
"""
# The super filter returns Filings. We want EntityFilings
res = super().filter(form=form,
amendments=amendments,
filing_date=filing_date,
date=date,
cik=cik,
ticker=ticker,
accession_number=accession_number)
return EntityFilings(data=res.data, cik=self.cik, company_name=self.company_name)
def latest(self, n: int = 1):
"""
Get the latest n filings.
Args:
n: Number of filings to return
Returns:
Latest filing(s) - single filing if n=1, otherwise EntityFilings
"""
sort_indices = pc.sort_indices(self.data, sort_keys=[("filing_date", "descending")])
sort_indices_top = sort_indices[:min(n, len(sort_indices))]
latest_filing_index = pc.take(data=self.data, indices=sort_indices_top)
filings = EntityFilings(latest_filing_index,
cik=self.cik,
company_name=self.company_name)
if filings.empty:
return None
if len(filings) == 1:
return filings[0]
else:
return filings
def head(self, n: int):
"""
Get the first n filings.
Args:
n: Number of filings to return
Returns:
EntityFilings containing the first n filings
"""
selection = self._head(n)
return EntityFilings(data=selection, cik=self.cik, company_name=self.company_name)
def tail(self, n: int):
"""
Get the last n filings.
Args:
n: Number of filings to return
Returns:
EntityFilings containing the last n filings
"""
selection = self._tail(n)
return EntityFilings(data=selection, cik=self.cik, company_name=self.company_name)
def sample(self, n: int):
"""
Get a random sample of n filings.
Args:
n: Number of filings to sample
Returns:
EntityFilings containing n random filings
"""
selection = self._sample(n)
return EntityFilings(data=selection, cik=self.cik, company_name=self.company_name)
@staticmethod
def summarize(data) -> pd.DataFrame:
"""
Summarize filing data as a pandas DataFrame.
Args:
data: Filing data to summarize
Returns:
DataFrame with summarized data
"""
return (data
.assign(size=lambda df: df['size'].apply(display_size),
isXBRL=lambda df: df.isXBRL.map({'1': "\u2713", 1: "\u2713"}).fillna(""),
)
.filter(["form", "filing_date", "accession_number", "isXBRL"])
.rename(columns={"filing_date": "filed", "isXBRL": "xbrl"})
)
def next(self):
"""
Show the next page of filings.
Returns:
EntityFilings with the next page of data, or None if at the end
"""
data_page = self.data_pager.next()
if data_page is None:
log.warning("End of data .. use prev() \u2190 ")
return None
start_index, _ = self.data_pager._current_range
filings_state = PagingState(page_start=start_index, num_records=len(self))
return EntityFilings(data_page,
cik=self.cik,
company_name=self.company_name,
original_state=filings_state)
def previous(self):
"""
Show the previous page of filings.
Returns:
EntityFilings with the previous page of data, or None if at the beginning
"""
data_page = self.data_pager.previous()
if data_page is None:
log.warning(" No previous data .. use next() \u2192 ")
return None
start_index, _ = self.data_pager._current_range
filings_state = PagingState(page_start=start_index, num_records=len(self))
return EntityFilings(data_page,
cik=self.cik,
company_name=self.company_name,
original_state=filings_state)
def __repr__(self):
return repr_rich(self.__rich__())
def __rich__(self):
# Create table with appropriate columns and styling
table = Table(
show_header=True,
header_style="bold",
show_edge=True,
expand=False,
padding=(0, 1),
box=SIMPLE,
row_styles=["", "bold"]
)
# Add columns with specific styling and alignment
table.add_column("#", style="dim", justify="right")
table.add_column("Form", width=10, style="bold yellow")
table.add_column("Description", width=60, style="bold blue"),
table.add_column("Filing Date", width=11)
table.add_column("Accession Number", width=20)
# Get current page from data pager
current_page = self.data_pager.current()
# Calculate start index for proper indexing
start_idx = self._original_state.page_start if self._original_state else self.data_pager.start_index
# Iterate through rows in current page
for i in range(len(current_page)):
form = current_page['form'][i].as_py()
description = describe_form(current_page['form'][i].as_py(), prepend_form=False)
row = [
str(start_idx + i),
form,
description,
str(current_page['filing_date'][i].as_py()),
accession_number_text(current_page['accession_number'][i].as_py())
]
table.add_row(*row)
# Show paging information only if there are multiple pages
elements = [table]
if self.data_pager.total_pages > 1:
total_filings = self._original_state.num_records
current_count = len(current_page)
start_num = start_idx + 1
end_num = start_idx + current_count
page_info = Text.assemble(
("Showing ", "dim"),
(f"{start_num:,}", "bold red"),
(" to ", "dim"),
(f"{end_num:,}", "bold red"),
(" of ", "dim"),
(f"{total_filings:,}", "bold"),
(" filings.", "dim"),
(" Page using ", "dim"),
("← prev()", "bold gray54"),
(" and ", "dim"),
("next() →", "bold gray54")
)
elements.extend([Text("\n"), page_info])
# Get the title
title = Text.assemble(
("Filings for ", "bold"),
(f"{self.company_name}", "bold green"),
(" [", "dim"),
(f"{self.cik}", "bold yellow"),
("]", "dim")
)
# Get the subtitle
start_date, end_date = self.date_range
date_range_text = f"Company filings between {start_date:%Y-%m-%d} and {end_date:%Y-%m-%d}" if start_date else "Company filings"
subtitle = Text.assemble(
(date_range_text, "dim"),
"",
("filings.docs", "cyan dim"),
(" for usage guide", "dim")
)
return Panel(
Group(*elements),
title=title,
subtitle=subtitle,
border_style="bold grey54",
expand=False
)
class EntityFacts:
"""
Contains structured facts data about an entity from XBRL filings.
"""
def __init__(self,
cik: int,
name: str,
facts: pa.Table,
fact_meta: pd.DataFrame):
self.cik: int = cik
self.name: str = name
self.facts: pa.Table = facts
self.fact_meta: pd.DataFrame = fact_meta
def to_pandas(self) -> pd.DataFrame:
"""Convert facts to a pandas DataFrame."""
return self.facts.to_pandas()
def __len__(self):
return len(self.facts)
def num_facts(self) -> int:
"""Get the number of facts."""
return len(self.fact_meta)
def __rich__(self):
return Panel(
Group(
df_to_rich_table(self.facts)
), title=f"Company Facts({self.name} [{self.cik}] {len(self.facts):,} total facts)"
)
def __repr__(self):
return repr_rich(self.__rich__())
COMPANY_FILINGS_SCHEMA = schema = pa.schema([
('accession_number', pa.string()),
('filing_date', pa.date32()),
('reportDate', pa.string()),
('acceptanceDateTime', pa.timestamp('us')), # Changed to timestamp
('act', pa.string()),
('form', pa.string()),
('fileNumber', pa.string()),
('items', pa.string()),
('size', pa.string()),
('isXBRL', pa.string()),
('isInlineXBRL', pa.string()),
('primaryDocument', pa.string()),
('primaryDocDescription', pa.string())
])
def empty_company_filings(cik:IntString, company_name:str):
"""
Create an empty filings container.
Args:
cik: The CIK number
company_name: The company name
Returns:
EntityFilings: An empty filings container
"""
table = pa.Table.from_arrays([[] for _ in range(13)], schema=COMPANY_FILINGS_SCHEMA)
return EntityFilings(table, cik=cik, company_name=company_name)
# For backward compatibility
CompanyFiling = EntityFiling
CompanyFilings = EntityFilings
CompanyFacts = EntityFacts