455 lines
15 KiB
Python
455 lines
15 KiB
Python
"""
|
|
Filings-related classes for the Entity package.
|
|
|
|
This module contains classes related to SEC filings for entities, including
|
|
collections of filings and filing facts.
|
|
"""
|
|
from typing import List, Union
|
|
|
|
import pandas as pd
|
|
import pyarrow as pa
|
|
import pyarrow.compute as pc
|
|
from rich.box import SIMPLE
|
|
from rich.console import Group
|
|
from rich.panel import Panel
|
|
from rich.table import Table
|
|
from rich.text import Text
|
|
|
|
from edgar._filings import Filing, Filings, PagingState
|
|
from edgar.core import IntString, log
|
|
from edgar.formatting import accession_number_text, display_size
|
|
from edgar.reference.forms import describe_form
|
|
from edgar.richtools import Docs, df_to_rich_table, repr_rich
|
|
|
|
__all__ = [
|
|
'EntityFiling',
|
|
'EntityFilings',
|
|
'EntityFacts',
|
|
'empty_company_filings'
|
|
]
|
|
|
|
|
|
class EntityFiling(Filing):
|
|
"""
|
|
Represents a single SEC filing for an entity.
|
|
|
|
This extends the base Filing class with additional information
|
|
and methods specific to SEC entities.
|
|
|
|
Attributes:
|
|
items (str): Filing items from SEC metadata. For 8-K filings, this indicates
|
|
which items are included (e.g., "2.02,9.01").
|
|
|
|
**Data Source**: This value comes from SEC filing metadata, not from parsing
|
|
the filing document itself.
|
|
|
|
**Legacy SGML Limitation**: For legacy SGML filings (1999-2001), the SEC's
|
|
historical metadata may be incorrect or incomplete. Modern XML filings (2005+)
|
|
have accurate metadata.
|
|
|
|
**Workaround for Legacy Filings**: For accurate item extraction from legacy
|
|
SGML 8-K filings, parse the filing text directly using regex patterns.
|
|
See GitHub Issue #462 for example code.
|
|
"""
|
|
|
|
def __init__(self,
|
|
cik: int,
|
|
company: str,
|
|
form: str,
|
|
filing_date: str,
|
|
report_date: str,
|
|
acceptance_datetime: str,
|
|
accession_no: str,
|
|
file_number: str,
|
|
items: str,
|
|
size: int,
|
|
primary_document: str,
|
|
primary_doc_description: str,
|
|
is_xbrl: bool,
|
|
is_inline_xbrl: bool):
|
|
super().__init__(cik=cik, company=company, form=form, filing_date=filing_date, accession_no=accession_no)
|
|
self.report_date = report_date
|
|
self.acceptance_datetime = acceptance_datetime
|
|
self.file_number: str = file_number
|
|
self.items: str = items # See class docstring for important notes on data source and limitations
|
|
self.size: int = size
|
|
self.primary_document: str = primary_document
|
|
self.primary_doc_description: str = primary_doc_description
|
|
self.is_xbrl: bool = is_xbrl
|
|
self.is_inline_xbrl: bool = is_inline_xbrl
|
|
|
|
def related_filings(self):
|
|
"""Get all the filings related to this one by file number."""
|
|
return self.get_entity().get_filings(file_number=self.file_number, sort_by="filing_date")
|
|
|
|
def __str__(self):
|
|
return (f"Filing(company='{self.company}', cik={self.cik}, form='{self.form}', "
|
|
f"filing_date='{self.filing_date}', accession_no='{self.accession_no}')"
|
|
)
|
|
|
|
|
|
class EntityFilings(Filings):
|
|
"""
|
|
Collection of SEC filings for an entity.
|
|
|
|
This extends the base Filings class with additional methods and properties
|
|
specific to entity filings.
|
|
"""
|
|
|
|
def __init__(self,
|
|
data: pa.Table,
|
|
cik: int,
|
|
company_name: str,
|
|
original_state: PagingState = None):
|
|
super().__init__(data, original_state=original_state)
|
|
self.cik = cik
|
|
self.company_name = company_name
|
|
|
|
@property
|
|
def docs(self):
|
|
return Docs(self)
|
|
|
|
def __getitem__(self, item):
|
|
return self.get_filing_at(item)
|
|
|
|
@property
|
|
def empty(self):
|
|
return len(self.data) == 0
|
|
|
|
def get_filing_at(self, item: int):
|
|
"""Get the filing at the specified index."""
|
|
return EntityFiling(
|
|
cik=self.cik,
|
|
company=self.company_name,
|
|
form=self.data['form'][item].as_py(),
|
|
filing_date=self.data['filing_date'][item].as_py(),
|
|
report_date=self.data['reportDate'][item].as_py(),
|
|
acceptance_datetime=self.data['acceptanceDateTime'][item].as_py(),
|
|
accession_no=self.data['accession_number'][item].as_py(),
|
|
file_number=self.data['fileNumber'][item].as_py(),
|
|
items=self.data['items'][item].as_py(),
|
|
size=self.data['size'][item].as_py(),
|
|
primary_document=self.data['primaryDocument'][item].as_py(),
|
|
primary_doc_description=self.data['primaryDocDescription'][item].as_py(),
|
|
is_xbrl=self.data['isXBRL'][item].as_py(),
|
|
is_inline_xbrl=self.data['isInlineXBRL'][item].as_py()
|
|
)
|
|
|
|
def filter(self,
|
|
form: Union[str, List[str]] = None,
|
|
amendments: bool = None,
|
|
filing_date: str = None,
|
|
date: str = None,
|
|
cik: Union[int, str, List[Union[int, str]]] = None,
|
|
ticker: Union[str, List[str]] = None,
|
|
accession_number: Union[str, List[str]] = None):
|
|
"""
|
|
Filter the filings based on various criteria.
|
|
|
|
Args:
|
|
form: Filter by form type
|
|
amendments: Include amendments
|
|
filing_date: Filter by filing date
|
|
date: Alias for filing_date
|
|
cik: Filter by CIK
|
|
ticker: Filter by ticker
|
|
accession_number: Filter by accession number
|
|
|
|
Returns:
|
|
Filtered EntityFilings
|
|
"""
|
|
# The super filter returns Filings. We want EntityFilings
|
|
res = super().filter(form=form,
|
|
amendments=amendments,
|
|
filing_date=filing_date,
|
|
date=date,
|
|
cik=cik,
|
|
ticker=ticker,
|
|
accession_number=accession_number)
|
|
return EntityFilings(data=res.data, cik=self.cik, company_name=self.company_name)
|
|
|
|
def latest(self, n: int = 1):
|
|
"""
|
|
Get the latest n filings.
|
|
|
|
Args:
|
|
n: Number of filings to return
|
|
|
|
Returns:
|
|
Latest filing(s) - single filing if n=1, otherwise EntityFilings
|
|
"""
|
|
sort_indices = pc.sort_indices(self.data, sort_keys=[("filing_date", "descending")])
|
|
sort_indices_top = sort_indices[:min(n, len(sort_indices))]
|
|
latest_filing_index = pc.take(data=self.data, indices=sort_indices_top)
|
|
filings = EntityFilings(latest_filing_index,
|
|
cik=self.cik,
|
|
company_name=self.company_name)
|
|
if filings.empty:
|
|
return None
|
|
if len(filings) == 1:
|
|
return filings[0]
|
|
else:
|
|
return filings
|
|
|
|
def head(self, n: int):
|
|
"""
|
|
Get the first n filings.
|
|
|
|
Args:
|
|
n: Number of filings to return
|
|
|
|
Returns:
|
|
EntityFilings containing the first n filings
|
|
"""
|
|
selection = self._head(n)
|
|
return EntityFilings(data=selection, cik=self.cik, company_name=self.company_name)
|
|
|
|
def tail(self, n: int):
|
|
"""
|
|
Get the last n filings.
|
|
|
|
Args:
|
|
n: Number of filings to return
|
|
|
|
Returns:
|
|
EntityFilings containing the last n filings
|
|
"""
|
|
selection = self._tail(n)
|
|
return EntityFilings(data=selection, cik=self.cik, company_name=self.company_name)
|
|
|
|
def sample(self, n: int):
|
|
"""
|
|
Get a random sample of n filings.
|
|
|
|
Args:
|
|
n: Number of filings to sample
|
|
|
|
Returns:
|
|
EntityFilings containing n random filings
|
|
"""
|
|
selection = self._sample(n)
|
|
return EntityFilings(data=selection, cik=self.cik, company_name=self.company_name)
|
|
|
|
|
|
@staticmethod
|
|
def summarize(data) -> pd.DataFrame:
|
|
"""
|
|
Summarize filing data as a pandas DataFrame.
|
|
|
|
Args:
|
|
data: Filing data to summarize
|
|
|
|
Returns:
|
|
DataFrame with summarized data
|
|
"""
|
|
return (data
|
|
.assign(size=lambda df: df['size'].apply(display_size),
|
|
isXBRL=lambda df: df.isXBRL.map({'1': "\u2713", 1: "\u2713"}).fillna(""),
|
|
)
|
|
.filter(["form", "filing_date", "accession_number", "isXBRL"])
|
|
.rename(columns={"filing_date": "filed", "isXBRL": "xbrl"})
|
|
)
|
|
|
|
def next(self):
|
|
"""
|
|
Show the next page of filings.
|
|
|
|
Returns:
|
|
EntityFilings with the next page of data, or None if at the end
|
|
"""
|
|
data_page = self.data_pager.next()
|
|
if data_page is None:
|
|
log.warning("End of data .. use prev() \u2190 ")
|
|
return None
|
|
start_index, _ = self.data_pager._current_range
|
|
filings_state = PagingState(page_start=start_index, num_records=len(self))
|
|
return EntityFilings(data_page,
|
|
cik=self.cik,
|
|
company_name=self.company_name,
|
|
original_state=filings_state)
|
|
|
|
def previous(self):
|
|
"""
|
|
Show the previous page of filings.
|
|
|
|
Returns:
|
|
EntityFilings with the previous page of data, or None if at the beginning
|
|
"""
|
|
data_page = self.data_pager.previous()
|
|
if data_page is None:
|
|
log.warning(" No previous data .. use next() \u2192 ")
|
|
return None
|
|
start_index, _ = self.data_pager._current_range
|
|
filings_state = PagingState(page_start=start_index, num_records=len(self))
|
|
return EntityFilings(data_page,
|
|
cik=self.cik,
|
|
company_name=self.company_name,
|
|
original_state=filings_state)
|
|
|
|
def __repr__(self):
|
|
return repr_rich(self.__rich__())
|
|
|
|
def __rich__(self):
|
|
# Create table with appropriate columns and styling
|
|
table = Table(
|
|
show_header=True,
|
|
header_style="bold",
|
|
show_edge=True,
|
|
expand=False,
|
|
padding=(0, 1),
|
|
box=SIMPLE,
|
|
row_styles=["", "bold"]
|
|
)
|
|
|
|
# Add columns with specific styling and alignment
|
|
table.add_column("#", style="dim", justify="right")
|
|
table.add_column("Form", width=10, style="bold yellow")
|
|
table.add_column("Description", width=60, style="bold blue"),
|
|
table.add_column("Filing Date", width=11)
|
|
table.add_column("Accession Number", width=20)
|
|
|
|
# Get current page from data pager
|
|
current_page = self.data_pager.current()
|
|
|
|
# Calculate start index for proper indexing
|
|
start_idx = self._original_state.page_start if self._original_state else self.data_pager.start_index
|
|
|
|
# Iterate through rows in current page
|
|
for i in range(len(current_page)):
|
|
form = current_page['form'][i].as_py()
|
|
description = describe_form(current_page['form'][i].as_py(), prepend_form=False)
|
|
|
|
row = [
|
|
str(start_idx + i),
|
|
form,
|
|
description,
|
|
str(current_page['filing_date'][i].as_py()),
|
|
accession_number_text(current_page['accession_number'][i].as_py())
|
|
]
|
|
table.add_row(*row)
|
|
|
|
# Show paging information only if there are multiple pages
|
|
elements = [table]
|
|
|
|
if self.data_pager.total_pages > 1:
|
|
total_filings = self._original_state.num_records
|
|
current_count = len(current_page)
|
|
start_num = start_idx + 1
|
|
end_num = start_idx + current_count
|
|
|
|
page_info = Text.assemble(
|
|
("Showing ", "dim"),
|
|
(f"{start_num:,}", "bold red"),
|
|
(" to ", "dim"),
|
|
(f"{end_num:,}", "bold red"),
|
|
(" of ", "dim"),
|
|
(f"{total_filings:,}", "bold"),
|
|
(" filings.", "dim"),
|
|
(" Page using ", "dim"),
|
|
("← prev()", "bold gray54"),
|
|
(" and ", "dim"),
|
|
("next() →", "bold gray54")
|
|
)
|
|
|
|
elements.extend([Text("\n"), page_info])
|
|
|
|
# Get the title
|
|
title = Text.assemble(
|
|
("Filings for ", "bold"),
|
|
(f"{self.company_name}", "bold green"),
|
|
(" [", "dim"),
|
|
(f"{self.cik}", "bold yellow"),
|
|
("]", "dim")
|
|
)
|
|
|
|
# Get the subtitle
|
|
start_date, end_date = self.date_range
|
|
date_range_text = f"Company filings between {start_date:%Y-%m-%d} and {end_date:%Y-%m-%d}" if start_date else "Company filings"
|
|
subtitle = Text.assemble(
|
|
(date_range_text, "dim"),
|
|
" • ",
|
|
("filings.docs", "cyan dim"),
|
|
(" for usage guide", "dim")
|
|
)
|
|
return Panel(
|
|
Group(*elements),
|
|
title=title,
|
|
subtitle=subtitle,
|
|
border_style="bold grey54",
|
|
expand=False
|
|
)
|
|
|
|
|
|
class EntityFacts:
|
|
"""
|
|
Contains structured facts data about an entity from XBRL filings.
|
|
"""
|
|
|
|
def __init__(self,
|
|
cik: int,
|
|
name: str,
|
|
facts: pa.Table,
|
|
fact_meta: pd.DataFrame):
|
|
self.cik: int = cik
|
|
self.name: str = name
|
|
self.facts: pa.Table = facts
|
|
self.fact_meta: pd.DataFrame = fact_meta
|
|
|
|
def to_pandas(self) -> pd.DataFrame:
|
|
"""Convert facts to a pandas DataFrame."""
|
|
return self.facts.to_pandas()
|
|
|
|
def __len__(self):
|
|
return len(self.facts)
|
|
|
|
def num_facts(self) -> int:
|
|
"""Get the number of facts."""
|
|
return len(self.fact_meta)
|
|
|
|
def __rich__(self):
|
|
return Panel(
|
|
Group(
|
|
df_to_rich_table(self.facts)
|
|
), title=f"Company Facts({self.name} [{self.cik}] {len(self.facts):,} total facts)"
|
|
)
|
|
|
|
def __repr__(self):
|
|
return repr_rich(self.__rich__())
|
|
|
|
COMPANY_FILINGS_SCHEMA = schema = pa.schema([
|
|
('accession_number', pa.string()),
|
|
('filing_date', pa.date32()),
|
|
('reportDate', pa.string()),
|
|
('acceptanceDateTime', pa.timestamp('us')), # Changed to timestamp
|
|
('act', pa.string()),
|
|
('form', pa.string()),
|
|
('fileNumber', pa.string()),
|
|
('items', pa.string()),
|
|
('size', pa.string()),
|
|
('isXBRL', pa.string()),
|
|
('isInlineXBRL', pa.string()),
|
|
('primaryDocument', pa.string()),
|
|
('primaryDocDescription', pa.string())
|
|
])
|
|
|
|
def empty_company_filings(cik:IntString, company_name:str):
|
|
"""
|
|
Create an empty filings container.
|
|
|
|
Args:
|
|
cik: The CIK number
|
|
company_name: The company name
|
|
|
|
Returns:
|
|
EntityFilings: An empty filings container
|
|
"""
|
|
table = pa.Table.from_arrays([[] for _ in range(13)], schema=COMPANY_FILINGS_SCHEMA)
|
|
return EntityFilings(table, cik=cik, company_name=company_name)
|
|
|
|
|
|
# For backward compatibility
|
|
CompanyFiling = EntityFiling
|
|
CompanyFilings = EntityFilings
|
|
CompanyFacts = EntityFacts
|
|
|