Files
2025-12-09 12:13:01 +01:00

341 lines
12 KiB
Python

"""
XBRL Statement Stitching - XBRLS Class
This module contains the XBRLS class which represents multiple XBRL filings
stitched together for multi-period analysis.
"""
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
import pandas as pd
from edgar.xbrl.stitching.core import StatementStitcher, stitch_statements
from edgar.xbrl.stitching.query import StitchedFactQuery, StitchedFactsView
if TYPE_CHECKING:
from edgar._filings import Filings
from edgar.xbrl.statements import StitchedStatements
class XBRLS:
"""
A class representing multiple XBRL filings stitched together.
This provides a unified view of financial data across multiple time periods,
automatically handling the complexities of statement stitching.
"""
def __init__(self, xbrl_list: List[Any]):
"""
Initialize an XBRLS instance with a list of XBRL objects.
Args:
xbrl_list: List of XBRL objects, should be from the same company
and ordered from newest to oldest
"""
# Store the list of XBRL objects
self.xbrl_list = xbrl_list
# Extract entity info from the most recent XBRL
self.entity_info = xbrl_list[0].entity_info if xbrl_list else {}
# Cache for stitched statements
self._statement_cache = {}
# Cache for stitched facts view
self._stitched_facts_view = None
@classmethod
def from_filings(cls, filings: Union['Filings', List[Any]], filter_amendments:bool=True) -> 'XBRLS':
"""
Create an XBRLS object from a list of Filing objects or a Filings object containing multiple filings.
Each filing should be the same form (e.g., 10-K, 10-Q) and from the same company.
Args:
filings: List of Filing objects, should be from the same company
Returns:
XBRLS object with stitched data
"""
from edgar.xbrl.xbrl import XBRL
if filter_amendments:
filtered_filings = filings.filter(amendments=False)
else:
filtered_filings = filings
# Sort filings by date (newest first)
sorted_filings = sorted(filtered_filings, key=lambda f: f.filing_date, reverse=True)
# Create XBRL objects from filings
xbrl_list = []
for filing in sorted_filings:
try:
xbrl = XBRL.from_filing(filing)
xbrl_list.append(xbrl)
except Exception:
pass
return cls(xbrl_list)
@classmethod
def from_xbrl_objects(cls, xbrl_list: List[Any]) -> 'XBRLS':
"""
Create an XBRLS object from a list of XBRL objects.
Args:
xbrl_list: List of XBRL objects, should be from the same company
Returns:
XBRLS object with stitched data
"""
return cls(xbrl_list)
@property
def statements(self) -> 'StitchedStatements':
"""
Get a user-friendly interface to access stitched financial statements.
Returns:
StitchedStatements object
"""
from edgar.xbrl.statements import StitchedStatements
return StitchedStatements(self)
@property
def facts(self) -> StitchedFactsView:
"""
Get a view over stitched facts from all XBRL filings.
Returns:
StitchedFactsView for querying standardized, multi-period data
"""
if self._stitched_facts_view is None:
self._stitched_facts_view = StitchedFactsView(self)
return self._stitched_facts_view
def query(self,
max_periods: int = 8,
standardize: bool = True,
statement_types: Optional[List[str]] = None,
**kwargs) -> StitchedFactQuery:
"""
Start a new query for stitched facts across all filings.
Args:
max_periods: Maximum periods to include in stitched data
standardize: Whether to use standardized labels
statement_types: List of statement types to include
**kwargs: Additional options passed to StitchedFactQuery
Returns:
StitchedFactQuery for building complex queries
"""
# Pass query parameters to the StitchedFactQuery
kwargs.update({
'max_periods': max_periods,
'standardize': standardize,
'statement_types': statement_types
})
return self.facts.query(**kwargs)
def get_statement(self, statement_type: str,
max_periods: int = 8,
standard: bool = True,
use_optimal_periods: bool = True,
include_dimensions: bool = False) -> Dict[str, Any]:
"""
Get a stitched statement of the specified type.
Args:
statement_type: Type of statement to stitch ('IncomeStatement', 'BalanceSheet', etc.)
max_periods: Maximum number of periods to include
standard: Whether to use standardized concept labels
use_optimal_periods: Whether to use entity info to determine optimal periods
include_dimensions: Whether to include dimensional segment data (default: False for stitching)
Returns:
Dictionary with stitched statement data
"""
# Check cache first
cache_key = f"{statement_type}_{max_periods}_{standard}_{use_optimal_periods}_{include_dimensions}"
if cache_key in self._statement_cache:
return self._statement_cache[cache_key]
# Stitch the statement
result = stitch_statements(
self.xbrl_list,
statement_type=statement_type,
period_type=StatementStitcher.PeriodType.ALL_PERIODS,
max_periods=max_periods,
standard=standard,
use_optimal_periods=use_optimal_periods,
include_dimensions=include_dimensions
)
# Cache the result
self._statement_cache[cache_key] = result
return result
def render_statement(self, statement_type: str,
max_periods: int = 8,
standardize: bool = True,
use_optimal_periods: bool = True,
show_date_range: bool = False,
include_dimensions: bool = False):
"""
Render a stitched statement in a rich table format.
Args:
statement_type: Type of statement to render ('BalanceSheet', 'IncomeStatement', etc.)
max_periods: Maximum number of periods to include
standardize: Whether to use standardized concept labels
use_optimal_periods: Whether to use entity info to determine optimal periods
show_date_range: Whether to show full date ranges for duration periods
include_dimensions: Whether to include dimensional segment data (default: False for stitching)
Returns:
RichTable: A formatted table representation of the stitched statement
"""
# Create a StitchedStatement object and use its render method
from edgar.xbrl.statements import StitchedStatement
statement = StitchedStatement(self, statement_type, max_periods, standardize, use_optimal_periods, include_dimensions)
return statement.render(show_date_range=show_date_range)
def to_dataframe(self, statement_type: str,
max_periods: int = 8,
standardize: bool = True) -> pd.DataFrame:
"""
Convert a stitched statement to a pandas DataFrame.
Args:
statement_type: Type of statement to convert ('BalanceSheet', 'IncomeStatement', etc.)
max_periods: Maximum number of periods to include
standardize: Whether to use standardized concept labels
Returns:
DataFrame with periods as columns and concepts as index
"""
# Create a StitchedStatement object and use its to_dataframe method
from edgar.xbrl.statements import StitchedStatement
statement = StitchedStatement(self, statement_type, max_periods, standardize)
return statement.to_dataframe()
def get_periods(self) -> List[Dict[str, str]]:
"""
Get all available periods across all XBRL objects.
Returns:
List of period information dictionaries, each containing:
- 'type': 'instant' or 'duration'
- 'key': period key (e.g., 'instant_2024-09-28', 'duration_2024-01-01_2024-09-28')
- 'label': human-readable label
For instant periods:
- 'date': end date as 'YYYY-MM-DD'
For duration periods:
- 'start_date': start date as 'YYYY-MM-DD'
- 'end_date': end date as 'YYYY-MM-DD'
- 'days': duration in days
- 'period_type': classification ('Annual', 'Quarterly', etc.)
"""
all_periods = []
# Go through all XBRL objects to collect periods
for xbrl in self.xbrl_list:
all_periods.extend(xbrl.reporting_periods)
# De-duplicate periods with the same labels
unique_periods = {}
for period in all_periods:
# Use the date string as the unique key
key = period['date'] if period['type'] == 'instant' else f"{period['start_date']}_{period['end_date']}"
if key not in unique_periods:
unique_periods[key] = period
return list(unique_periods.values())
def get_period_end_dates(self) -> List[str]:
"""
Get end dates for all available periods in YYYY-MM-DD format.
This is a convenience method that extracts just the end dates from periods,
handling both instant and duration periods correctly.
Returns:
List of end dates as strings in YYYY-MM-DD format, sorted newest first
"""
periods = self.get_periods()
end_dates = []
for period in periods:
if period.get('type') == 'duration':
end_date = period.get('end_date')
elif period.get('type') == 'instant':
end_date = period.get('date')
else:
continue
if end_date:
end_dates.append(end_date)
# Sort newest first and remove duplicates while preserving order
seen = set()
sorted_dates = []
for date in sorted(set(end_dates), reverse=True):
if date not in seen:
sorted_dates.append(date)
seen.add(date)
return sorted_dates
def __str__(self) -> str:
"""
String representation of the XBRLS object.
Returns:
String representation
"""
filing_count = len(self.xbrl_list)
periods = self.get_periods()
return f"XBRLS with {filing_count} filings covering {len(periods)} unique periods"
def __rich__(self):
"""
Rich representation for pretty console output.
Returns:
Rich console representation
"""
from rich.panel import Panel
from rich.text import Text
# Get information about the XBRLS object
filing_count = len(self.xbrl_list)
periods = self.get_periods()
# Create a panel with the information
content = Text.from_markup("[bold]XBRLS Object[/bold]\n")
content.append(f"Filings: {filing_count}\n")
content.append(f"Unique Periods: {len(periods)}\n")
# List available statement types
statement_types = set()
for xbrl in self.xbrl_list:
statements = xbrl.get_all_statements()
for stmt in statements:
if stmt['type']:
statement_types.add(stmt['type'])
content.append("\n[bold]Available Statement Types:[/bold]\n")
for stmt_type in sorted(statement_types):
content.append(f"- {stmt_type}\n")
# Show how to access statements
content.append("\n[bold]Example Usage:[/bold]\n")
content.append("xbrls.statements.income_statement()\n")
content.append("xbrls.statements.balance_sheet()\n")
content.append("xbrls.to_dataframe('IncomeStatement')\n")
return Panel(content, title="XBRLS", expand=False)