""" Financial statement processing for XBRL data. This module provides functions for working with financial statements. """ from dataclasses import dataclass from typing import Any, Dict, List, Optional, Union import pandas as pd from rich import box from rich.table import Table from edgar.richtools import repr_rich from edgar.xbrl.exceptions import StatementNotFound @dataclass class StatementInfo: name: str concept: str title: str statement_to_concepts = { "IncomeStatement": StatementInfo(name="IncomeStatement", concept="us-gaap_IncomeStatementAbstract", title="Consolidated Statement of Income"), "BalanceSheet": StatementInfo(name="BalanceSheet", concept="us-gaap_StatementOfFinancialPositionAbstract", title="Consolidated Balance Sheets", ), "CashFlowStatement": StatementInfo(name="CashFlowStatement", concept="us-gaap_StatementOfCashFlowsAbstract", title="Consolidated Statement of Cash Flows"), "StatementOfEquity": StatementInfo(name="StatementOfEquity", concept="us-gaap_StatementOfStockholdersEquityAbstract", title="Consolidated Statement of Equity" ), "ComprehensiveIncome": StatementInfo(name="ComprehensiveIncome", concept="us-gaap_StatementOfIncomeAndComprehensiveIncomeAbstract", title="Consolidated Statement of Comprehensive Income" ), "CoverPage": StatementInfo(name="CoverPage", concept="dei_CoverAbstract", title="Cover Page" ), } class StatementValidationError(Exception): """Raised when statement validation fails.""" pass class Statement: """ A single financial statement extracted from XBRL data. This class provides convenient methods for rendering and manipulating a specific financial statement. It includes validation, normalization, and analysis capabilities. Features: - Statement structure validation - Error handling for missing/malformed data - Statement normalization across different companies - Common financial analysis methods - Ratio calculations and trend analysis """ # Required concepts for each statement type REQUIRED_CONCEPTS = { 'BalanceSheet': [ 'us-gaap_Assets', 'us-gaap_Liabilities', 'us-gaap_StockholdersEquity' ], 'IncomeStatement': [ 'us-gaap_Revenues', 'us-gaap_NetIncomeLoss' ], 'CashFlowStatement': [ 'us-gaap_CashAndCashEquivalentsPeriodIncreaseDecrease', 'us-gaap_CashAndCashEquivalentsAtCarryingValue' ] } def __init__(self, xbrl, role_or_type: str, canonical_type: Optional[str] = None, skip_concept_check: bool = False): """ Initialize with an XBRL object and statement identifier. Args: xbrl: XBRL object containing parsed data role_or_type: Role URI, statement type, or statement short name canonical_type: Optional canonical statement type (e.g., "BalanceSheet", "IncomeStatement") If provided, this type will be used for specialized processing logic skip_concept_check: If True, skip checking for required concepts (useful for testing) Raises: StatementValidationError: If statement validation fails """ self.xbrl = xbrl self.role_or_type = role_or_type self.canonical_type = canonical_type def is_segmented(self) -> bool: """ Check if the statement is a segmented statement. Returns: True if the statement is segmented, False otherwise """ return self.role_or_type.startswith("Segment") def render(self, period_filter: Optional[str] = None, period_view: Optional[str] = None, standard: bool = True, show_date_range: bool = False, include_dimensions: bool = True) -> Any: """ Render the statement as a formatted table. Args: period_filter: Optional period key to filter facts period_view: Optional name of a predefined period view standard: Whether to use standardized concept labels show_date_range: Whether to show full date ranges for duration periods include_dimensions: Whether to include dimensional segment data Returns: Rich Table containing the rendered statement """ # Use the canonical type for rendering if available, otherwise use the role rendering_type = self.canonical_type if self.canonical_type else self.role_or_type return self.xbrl.render_statement(rendering_type, period_filter=period_filter, period_view=period_view, standard=standard, show_date_range=show_date_range, include_dimensions=include_dimensions) def __rich__(self) -> Any: """ Rich console representation. Returns: Rich Table object if rich is available, else string representation """ if Table is None: return str(self) return self.render() def __repr__(self): return repr_rich(self.__rich__()) def __str__(self): """String representation using improved rendering with proper width.""" rendered_statement = self.render() return str(rendered_statement) # Delegates to RenderedStatement.__str__() @property def docs(self): """ Get comprehensive documentation for the Statement class. Returns a Docs object with detailed API documentation including usage patterns, examples, and guidance for working with financial statement data. The documentation is searchable using the .search() method. Returns: Docs: Documentation object with rich display and search capabilities Example: >>> statement.docs # Display full documentation >>> statement.docs.search("convert to dataframe") # Search for specific topics """ from edgar.richtools import Docs return Docs(self) @property def primary_concept(self): data = self.get_raw_data() return data[0]['all_names'][0] def to_dataframe(self, period_filter:str=None, period_view:str=None, standard:bool=True, include_dimensions:bool=True, include_unit:bool=False, include_point_in_time:bool=False, presentation:bool=False) -> Any: """Convert statement to pandas DataFrame. Args: period_filter: Optional period key to filter facts period_view: Optional name of a predefined period view standard: Whether to use standardized concept labels include_dimensions: Whether to include dimensional segment data include_unit: If True, add a 'unit' column with unit information (e.g., 'usd', 'shares', 'usdPerShare') include_point_in_time: If True, add a 'point_in_time' boolean column (True for 'instant', False for 'duration') presentation: If True, apply HTML-matching presentation logic (Issue #463) Cash Flow: outflows (balance='credit') shown as negative Income: apply preferred_sign transformations Default: False (raw instance values) Returns: DataFrame with raw values + metadata (balance, weight, preferred_sign) by default """ try: # Build DataFrame from raw data (Issue #463) df = self._build_dataframe_from_raw_data( period_filter=period_filter, period_view=period_view, standard=standard, include_dimensions=include_dimensions, include_unit=include_unit, include_point_in_time=include_point_in_time ) if df is None or isinstance(df, str) or df.empty: return df # Add metadata columns (balance, weight, preferred_sign) - Issue #463 df = self._add_metadata_columns(df) # Apply presentation transformation if requested (Issue #463) if presentation: df = self._apply_presentation(df) return df except ImportError: return "Pandas is required for DataFrame conversion" def _build_dataframe_from_raw_data( self, period_filter: Optional[str] = None, period_view: Optional[str] = None, standard: bool = True, include_dimensions: bool = True, include_unit: bool = False, include_point_in_time: bool = False ) -> pd.DataFrame: """ Build DataFrame directly from raw statement data (Issue #463). This bypasses the rendering pipeline to get raw instance values. """ from edgar.xbrl.core import get_unit_display_name from edgar.xbrl.core import is_point_in_time as get_is_point_in_time from edgar.xbrl.periods import determine_periods_to_display # Get raw statement data raw_data = self.get_raw_data(period_filter=period_filter) if not raw_data: return pd.DataFrame() # Determine which periods to display statement_type = self.canonical_type if self.canonical_type else self.role_or_type if period_view: # Use specified period view from edgar.xbrl.periods import get_period_views period_views = get_period_views(self.xbrl, statement_type) periods_to_display = period_views.get(period_view, []) if not periods_to_display: # Fallback to default periods_to_display = determine_periods_to_display(self.xbrl, statement_type) else: # Use default period selection periods_to_display = determine_periods_to_display(self.xbrl, statement_type) if not periods_to_display: return pd.DataFrame() # Build DataFrame rows df_rows = [] for item in raw_data: # Skip if filtering by dimensions if not include_dimensions and item.get('dimension'): continue # Build base row row = { 'concept': item.get('concept', ''), 'label': item.get('label', '') } # Add period values (raw from instance document) values_dict = item.get('values', {}) for period_key, period_label in periods_to_display: # Use end date as column name (more concise than full label) # Extract date from period_key (e.g., "duration_2016-09-25_2017-09-30" → "2017-09-30") if '_' in period_key: parts = period_key.split('_') if len(parts) >= 2: # Use end date for duration periods, or the date for instant periods column_name = parts[-1] if len(parts) > 2 else parts[1] else: column_name = period_label else: column_name = period_label # Use raw value from instance document row[column_name] = values_dict.get(period_key) # Add unit if requested if include_unit: units_dict = item.get('units', {}) # Get first available unit (should be same for all periods) unit_ref = None for period_key, _ in periods_to_display: if period_key in units_dict and units_dict[period_key] is not None: unit_ref = units_dict[period_key] break row['unit'] = get_unit_display_name(unit_ref) # Add point_in_time if requested if include_point_in_time: period_types_dict = item.get('period_types', {}) # Get first available period type period_type = None for period_key, _ in periods_to_display: if period_key in period_types_dict and period_types_dict[period_key] is not None: period_type = period_types_dict[period_key] break row['point_in_time'] = get_is_point_in_time(period_type) # Add structural columns row['level'] = item.get('level', 0) row['abstract'] = item.get('is_abstract', False) row['dimension'] = item.get('is_dimension', False) df_rows.append(row) return pd.DataFrame(df_rows) def _add_metadata_columns(self, df: pd.DataFrame) -> pd.DataFrame: """ Add metadata columns (balance, weight, preferred_sign) to DataFrame. Issue #463: Users need access to XBRL metadata to understand value transformations. Note: preferred_sign comes from statement's raw data (presentation linkbase), not from facts. It's period-specific in raw data, but we use a representative value (from first period) for the metadata column. """ if df.empty or 'concept' not in df.columns: return df # Get statement's raw data to access preferred_signs raw_data = self.get_raw_data() raw_data_by_concept = {item.get('concept'): item for item in raw_data} # Create metadata dictionaries to populate balance_map = {} weight_map = {} preferred_sign_map = {} # For each unique concept in the DataFrame for concept in df['concept'].unique(): if not concept: continue # Get balance and weight from facts (concept-level attributes) facts_df = self.xbrl.facts.query().by_concept(concept, exact=True).limit(1).to_dataframe() if not facts_df.empty: fact = facts_df.iloc[0] balance_map[concept] = fact.get('balance') weight_map[concept] = fact.get('weight') # Get preferred_sign from statement raw data (presentation linkbase) # preferred_sign is period-specific, so we take the first available value if concept in raw_data_by_concept: item = raw_data_by_concept[concept] preferred_signs = item.get('preferred_signs', {}) if preferred_signs: # Use first period's preferred_sign as representative value preferred_sign_map[concept] = next(iter(preferred_signs.values())) # Add metadata columns df['balance'] = df['concept'].map(balance_map) df['weight'] = df['concept'].map(weight_map) df['preferred_sign'] = df['concept'].map(preferred_sign_map) return df def _apply_presentation(self, df: pd.DataFrame) -> pd.DataFrame: """ Apply presentation logic to match SEC HTML display. Issue #463: Transform values to match how they appear in official SEC filings. Uses preferred_sign from presentation linkbase (not balance attribute). - preferred_sign = -1: negate for display (expenses, dividends, outflows) - preferred_sign = 1: show as-is - preferred_sign = None: no transformation """ if df.empty: return df result = df.copy() # Get period columns (exclude metadata and structural columns) metadata_cols = ['concept', 'label', 'balance', 'weight', 'preferred_sign', 'level', 'abstract', 'dimension', 'unit', 'point_in_time'] period_cols = [col for col in df.columns if col not in metadata_cols] # Get statement type statement_type = self.canonical_type if self.canonical_type else self.role_or_type # For Income Statement and Cash Flow Statement: Use preferred_sign if statement_type in ('IncomeStatement', 'CashFlowStatement'): if 'preferred_sign' in result.columns: for col in period_cols: if col in result.columns and pd.api.types.is_numeric_dtype(result[col]): # Apply preferred_sign where it's not None and not 0 mask = result['preferred_sign'].notna() & (result['preferred_sign'] != 0) result.loc[mask, col] = result.loc[mask, col] * result.loc[mask, 'preferred_sign'] # Balance Sheet: no transformation return result def _validate_statement(self, skip_concept_check: bool = False) -> None: """ Validate the statement structure and required concepts. Args: skip_concept_check: If True, skip checking for required concepts (useful for testing) """ data = self.get_raw_data() if not data: raise StatementValidationError(f"No data found for statement {self.role_or_type}") # Determine the statement type to validate against validate_type = self.canonical_type if self.canonical_type else self.role_or_type # Check for required concepts if this is a standard statement type if validate_type in self.REQUIRED_CONCEPTS and not skip_concept_check: missing_concepts = [] for concept in self.REQUIRED_CONCEPTS[validate_type]: if not any(concept in item.get('all_names', []) for item in data): missing_concepts.append(concept) if missing_concepts: raise StatementValidationError( f"Missing required concepts for {validate_type}: {', '.join(missing_concepts)}") def calculate_ratios(self) -> Dict[str, float]: """Calculate common financial ratios for this statement.""" ratios = {} data = self.get_raw_data() # Use canonical type if available, otherwise use role_or_type statement_type = self.canonical_type if self.canonical_type else self.role_or_type if statement_type == 'BalanceSheet': # Calculate balance sheet ratios ratios.update(self._calculate_balance_sheet_ratios(data)) elif statement_type == 'IncomeStatement': # Calculate income statement ratios ratios.update(self._calculate_income_statement_ratios(data)) return ratios def _calculate_balance_sheet_ratios(self, data: List[Dict[str, Any]]) -> Dict[str, float]: """Calculate balance sheet specific ratios.""" ratios = {} # Current ratio current_assets = self._get_concept_value(data, 'us-gaap_CurrentAssets') current_liabilities = self._get_concept_value(data, 'us-gaap_CurrentLiabilities') if current_assets and current_liabilities: ratios['current_ratio'] = current_assets / current_liabilities # Quick ratio inventory = self._get_concept_value(data, 'us-gaap_Inventory') if current_assets and current_liabilities and inventory: ratios['quick_ratio'] = (current_assets - inventory) / current_liabilities return ratios def _calculate_income_statement_ratios(self, data: List[Dict[str, Any]]) -> Dict[str, float]: """Calculate income statement specific ratios.""" ratios = {} # Gross margin revenue = self._get_concept_value(data, 'us-gaap_Revenues') gross_profit = self._get_concept_value(data, 'us-gaap_GrossProfit') if revenue and gross_profit: ratios['gross_margin'] = gross_profit / revenue # Net margin net_income = self._get_concept_value(data, 'us-gaap_NetIncomeLoss') if revenue and net_income: ratios['net_margin'] = net_income / revenue return ratios def _get_concept_value(self, data: List[Dict[str, Any]], concept: str) -> Optional[float]: """Get the value for a specific concept from statement data.""" for item in data: if concept in item.get('all_names', []): values = item.get('values', {}) if values: return float(next(iter(values.values()))) return None def analyze_trends(self, periods: int = 4) -> Dict[str, List[float]]: """Analyze trends in key metrics over time.""" trends = {} # Use canonical type if available, otherwise use role_or_type statement_type = self.canonical_type if self.canonical_type else self.role_or_type # Get data for multiple periods period_views = self.xbrl.get_period_views(statement_type) if not period_views: return trends periods_to_analyze = period_views[0].get('periods', [])[:periods] for period in periods_to_analyze: data = self.get_raw_data(period) if statement_type == 'BalanceSheet': self._analyze_balance_sheet_trends(data, trends, period) elif statement_type == 'IncomeStatement': self._analyze_income_statement_trends(data, trends, period) return trends def _analyze_balance_sheet_trends(self, data: List[Dict[str, Any]], trends: Dict[str, List[float]], period: str) -> None: """Analyze balance sheet trends.""" metrics = { 'total_assets': 'us-gaap_Assets', 'total_liabilities': 'us-gaap_Liabilities', 'equity': 'us-gaap_StockholdersEquity' } for metric_name, concept in metrics.items(): value = self._get_concept_value(data, concept) if value: if metric_name not in trends: trends[metric_name] = [] trends[metric_name].append(value) def _analyze_income_statement_trends(self, data: List[Dict[str, Any]], trends: Dict[str, List[float]], period: str) -> None: """Analyze income statement trends.""" metrics = { 'revenue': 'us-gaap_Revenues', 'gross_profit': 'us-gaap_GrossProfit', 'net_income': 'us-gaap_NetIncomeLoss' } for metric_name, concept in metrics.items(): value = self._get_concept_value(data, concept) if value: if metric_name not in trends: trends[metric_name] = [] trends[metric_name].append(value) def get_raw_data(self, period_filter: Optional[str] = None) -> List[Dict[str, Any]]: """ Get the raw statement data. Args: period_filter: Optional period key to filter facts Returns: List of line items with values Raises: StatementValidationError: If data retrieval fails """ # Use the canonical type if available, otherwise use the role statement_id = self.canonical_type if self.canonical_type else self.role_or_type data = self.xbrl.get_statement(statement_id, period_filter=period_filter) if data is None: raise StatementValidationError(f"Failed to retrieve data for statement {statement_id}") return data class Statements: """ High-level interface for working with XBRL financial statements. This class provides a user-friendly way to access and manipulate financial statements extracted from XBRL data. """ def __init__(self, xbrl): """ Initialize with an XBRL object. Args: xbrl: XBRL object containing parsed data """ self.xbrl = xbrl self.statements = xbrl.get_all_statements() # Create statement type lookup for quick access self.statement_by_type = {} for stmt in self.statements: if stmt['type']: if stmt['type'] not in self.statement_by_type: self.statement_by_type[stmt['type']] = [] self.statement_by_type[stmt['type']].append(stmt) @staticmethod def classify_statement(stmt: dict) -> str: """ Classify a statement into a category based on its type. Categories: - 'statement': Core financial statements (Income Statement, Balance Sheet, etc.) - 'note': Notes to financial statements - 'disclosure': Disclosure sections - 'document': Document sections (like CoverPage) - 'other': Everything else Args: stmt: Statement dictionary with 'type' and optional 'category' fields Returns: str: Category name ('statement', 'note', 'disclosure', 'document', or 'other') Example: >>> stmt = {'type': 'IncomeStatement', 'title': 'Income Statement'} >>> Statements.classify_statement(stmt) 'statement' >>> stmt = {'type': 'DebtDisclosure', 'title': 'Debt Disclosure'} >>> Statements.classify_statement(stmt) 'disclosure' """ # Use explicit category if provided category = stmt.get('category') if category: return category # Infer from type stmt_type = stmt.get('type', '') if not stmt_type: return 'other' if 'Note' in stmt_type: return 'note' elif 'Disclosure' in stmt_type: return 'disclosure' elif stmt_type == 'CoverPage': return 'document' elif stmt_type in ('BalanceSheet', 'IncomeStatement', 'CashFlowStatement', 'StatementOfEquity', 'ComprehensiveIncome') or 'Statement' in stmt_type: return 'statement' else: return 'other' def get_statements_by_category(self) -> dict: """ Get statements organized by category. Returns a dictionary with statements grouped into categories: - 'statement': Core financial statements - 'note': Notes to financial statements - 'disclosure': Disclosure sections - 'document': Document sections - 'other': Other sections Each statement in the lists includes an 'index' field for positional reference. Returns: dict: Dictionary with category keys, each containing a list of statement dicts Example: >>> categories = xbrl.statements.get_statements_by_category() >>> # Get all disclosures >>> disclosures = categories['disclosure'] >>> for disc in disclosures: ... print(f"{disc['index']}: {disc['title']}") >>> >>> # Get all notes >>> notes = categories['note'] >>> # Get core financial statements >>> statements = categories['statement'] """ categories = { 'statement': [], 'note': [], 'disclosure': [], 'document': [], 'other': [] } for index, stmt in enumerate(self.statements): category = self.classify_statement(stmt) stmt_with_index = dict(stmt) stmt_with_index['index'] = index categories[category].append(stmt_with_index) return categories def _handle_statement_error(self, e: Exception, statement_type: str) -> Optional[Statement]: """ Common error handler for statement resolution failures. Args: e: The exception that occurred statement_type: Type of statement that failed to resolve Returns: None (always, for consistency) """ from edgar.core import log if isinstance(e, StatementNotFound): # Custom exception already has detailed context log.warning(str(e)) else: # For other exceptions, extract context manually entity_name = getattr(self.xbrl, 'entity_name', 'Unknown') cik = getattr(self.xbrl, 'cik', 'Unknown') period_of_report = getattr(self.xbrl, 'period_of_report', 'Unknown') log.warning( f"Failed to resolve {statement_type.lower().replace('_', ' ')} for {entity_name} " f"(CIK: {cik}, Period: {period_of_report}): {type(e).__name__}: {str(e)}" ) return None def find_statement_by_primary_concept(self, statement_type: str, is_parenthetical: bool = False) -> Optional[str]: """ Find a statement by its primary concept. Args: statement_type: Statement type (e.g., 'BalanceSheet', 'IncomeStatement') is_parenthetical: Whether to look for a parenthetical statement (only applicable for BalanceSheet) Returns: Role URI for the matching statement, or None if not found """ if statement_type not in statement_to_concepts: return None # Get information about the statement's identifying concept concept_info = statement_to_concepts[statement_type] concept = concept_info.concept # Find all statements of the requested type matching_statements = self.statement_by_type.get(statement_type, []) if not matching_statements: return None # Parenthetical check is only relevant for BalanceSheet check_parenthetical = statement_type == 'BalanceSheet' # Try to find a statement containing the specific concept for stmt in matching_statements: role = stmt['role'] # Check for parenthetical in the role name if it's a BalanceSheet if check_parenthetical: role_lower = role.lower() is_role_parenthetical = 'parenthetical' in role_lower # Skip if parenthetical status doesn't match what we're looking for if is_parenthetical != is_role_parenthetical: continue # Examine the presentation tree for this role if role in self.xbrl.presentation_trees: tree = self.xbrl.presentation_trees[role] # Check if the identifying concept is in this tree normalized_concept = concept.replace(':', '_') for element_id in tree.all_nodes: # Check both original and normalized form if element_id == concept or element_id == normalized_concept: return role # If no exact concept match, fall back to the first statement of the type # that matches the parenthetical requirement for BalanceSheet if check_parenthetical: for stmt in matching_statements: role = stmt['role'] role_lower = role.lower() is_role_parenthetical = 'parenthetical' in role_lower if is_parenthetical == is_role_parenthetical: return role # If still no match, return the first statement return matching_statements[0]['role'] def __getitem__(self, item: Union[int, str]) -> Optional[Statement]: """ Get a statement by index, type, or role. Args: item: Integer index, string statement type, or role URI Returns: Statement instance for the requested statement """ if isinstance(item, int): if 0 <= item < len(self.statements): stmt = self.statements[item] # Get the canonical type if available canonical_type = None if stmt.get('type') in statement_to_concepts: canonical_type = stmt.get('type') return Statement(self.xbrl, stmt['role'], canonical_type=canonical_type) elif isinstance(item, str): # Check if it's a standard statement type with a specific concept marker if item in statement_to_concepts: # Get the statement role using the primary concept role = self.find_statement_by_primary_concept(item) if role: return Statement(self.xbrl, role, canonical_type=item) # If no concept match, fall back to the type return Statement(self.xbrl, item, canonical_type=item) # If it's a statement type with multiple statements, return the first one if item in self.statement_by_type and self.statement_by_type[item]: return Statement(self.xbrl, item, canonical_type=item) # Otherwise, try to use it directly as a role or statement name # Try to determine canonical type from the name canonical_type = None for std_type in statement_to_concepts.keys(): if std_type.lower() in item.lower(): canonical_type = std_type break return Statement(self.xbrl, item, canonical_type=canonical_type) def __rich__(self) -> Any: """ Rich console representation. Returns: Rich Table object if rich is available, else string representation """ if Table is None: return str(self) from rich.console import Group from rich.text import Text # Group statements by category using the extracted method statements_by_category = self.get_statements_by_category() # Create a table for each category that has statements tables = [] # Define styles and titles for each category category_styles = { 'statement': {'title': "Financial Statements", 'color': "green"}, 'note': {'title': "Notes to Financial Statements", 'color': "blue"}, 'disclosure': {'title': "Disclosures", 'color': "cyan"}, 'document': {'title': "Document Sections", 'color': "magenta"}, 'other': {'title': "Other Sections", 'color': "yellow"} } # Order of categories in the display category_order = ['statement', 'note', 'disclosure', 'document', 'other'] for category in category_order: stmts = statements_by_category[category] if not stmts: continue # Create a table for this category style = category_styles[category] # Create title with color title = Text(style['title']) title.stylize(f"bold {style['color']}") table = Table( title=title, box=box.SIMPLE, title_justify="left", highlight=True ) # Add columns table.add_column("#", style="dim", width=3) table.add_column("Name", style=style['color']) table.add_column("Type", style="italic") table.add_column("Parenthetical", width=14) # Sort statements by type and name for better organization # Handle None values to prevent TypeError when sorting sorted_stmts = sorted(stmts, key=lambda s: (s.get('type') or '', s.get('definition') or '')) # Add rows for stmt in sorted_stmts: # Check if this is a parenthetical statement is_parenthetical = False role_or_def = stmt.get('definition', '').lower() if 'parenthetical' in role_or_def: is_parenthetical = True # Format parenthetical indicator parenthetical_text = "✓" if is_parenthetical else "" table.add_row( str(stmt['index']), stmt.get('definition', 'Untitled'), stmt.get('type', '') or "", parenthetical_text, ) tables.append(table) # If no statements found in any category, show a message if not tables: return Text("No statements found") # Create a group containing all tables return Group(*tables) def __repr__(self): return repr_rich(self.__rich__()) def __str__(self): """String representation with statements organized by category.""" # Group statements by category statements_by_category = { 'statement': [], 'note': [], 'disclosure': [], 'document': [], 'other': [] } # The 'type' field will always exist, but 'category' may not for index, stmt in enumerate(self.statements): # Determine category based on either explicit category or infer from type category = stmt.get('category') if not category: # Fallback logic - infer category from type stmt_type = stmt.get('type', '') if stmt_type: if 'Note' in stmt_type: category = 'note' elif 'Disclosure' in stmt_type: category = 'disclosure' elif stmt_type == 'CoverPage': category = 'document' elif stmt_type in ('BalanceSheet', 'IncomeStatement', 'CashFlowStatement', 'StatementOfEquity', 'ComprehensiveIncome') or 'Statement' in stmt_type: category = 'statement' else: category = 'other' else: category = 'other' # Add to the appropriate category statements_by_category[category].append((index, stmt)) lines = ["Available Statements:"] # Define category titles and order category_titles = { 'statement': "Financial Statements:", 'note': "Notes to Financial Statements:", 'disclosure': "Disclosures:", 'document': "Document Sections:", 'other': "Other Sections:" } category_order = ['statement', 'note', 'disclosure', 'document', 'other'] for category in category_order: stmts = statements_by_category[category] if not stmts: continue lines.append("") lines.append(category_titles[category]) # Sort statements by type and name for better organization # Handle None values to prevent TypeError when sorting sorted_stmts = sorted(stmts, key=lambda s: (s[1].get('type') or '', s[1].get('definition') or '')) for index, stmt in sorted_stmts: # Indicate if parenthetical is_parenthetical = 'parenthetical' in stmt.get('definition', '').lower() parenthetical_text = " (Parenthetical)" if is_parenthetical else "" lines.append(f" {index}. {stmt.get('definition', 'Untitled')}{parenthetical_text}") if len(lines) == 1: # Only the header is present lines.append(" No statements found") return "\n".join(lines) def cover_page(self) -> Statement: """ Get the cover page statement. Returns: A cover page statement """ role = self.find_statement_by_primary_concept("CoverPage") if role: return Statement(self.xbrl, role, canonical_type="CoverPage") # Try using the xbrl.render_statement with parenthetical parameter if hasattr(self.xbrl, 'find_statement'): matching_statements, found_role, _ = self.xbrl.find_statement("CoverPage") if found_role: return Statement(self.xbrl, found_role, canonical_type="CoverPage") return self["CoverPage"] def balance_sheet(self, parenthetical: bool = False) -> Optional[Statement]: """ Get a balance sheet. Args: parenthetical: Whether to get the parenthetical balance sheet Returns: A balance sheet statement, or None if unable to resolve the statement """ try: role = self.find_statement_by_primary_concept("BalanceSheet", is_parenthetical=parenthetical) if role: return Statement(self.xbrl, role, canonical_type="BalanceSheet") # Try using the xbrl.render_statement with parenthetical parameter if hasattr(self.xbrl, 'find_statement'): matching_statements, found_role, _ = self.xbrl.find_statement("BalanceSheet", parenthetical) if found_role: return Statement(self.xbrl, found_role, canonical_type="BalanceSheet") return self["BalanceSheet"] except Exception as e: return self._handle_statement_error(e, "BalanceSheet") def income_statement(self, parenthetical: bool = False, skip_concept_check: bool = False) -> Optional[Statement]: """ Get an income statement. Args: parenthetical: Whether to get the parenthetical income statement skip_concept_check: If True, skip checking for required concepts (useful for testing) Returns: An income statement, or None if unable to resolve the statement Note: To control dimensional display, use the include_dimensions parameter when calling render() or to_dataframe() on the returned Statement object. """ try: # Try using the xbrl.find_statement with parenthetical parameter if hasattr(self.xbrl, 'find_statement'): matching_statements, found_role, _ = self.xbrl.find_statement("IncomeStatement", parenthetical) if found_role: return Statement(self.xbrl, found_role, canonical_type="IncomeStatement", skip_concept_check=skip_concept_check) return self["IncomeStatement"] except Exception as e: return self._handle_statement_error(e, "IncomeStatement") def cashflow_statement(self, parenthetical: bool = False) -> Optional[Statement]: """ Get a cash flow statement. Args: parenthetical: Whether to get the parenthetical cash flow statement Returns: The cash flow statement, or None if unable to resolve the statement """ try: # Try using the xbrl.find_statement with parenthetical parameter if hasattr(self.xbrl, 'find_statement'): matching_statements, found_role, _ = self.xbrl.find_statement("CashFlowStatement", parenthetical) if found_role: return Statement(self.xbrl, found_role, canonical_type="CashFlowStatement") return self["CashFlowStatement"] except Exception as e: return self._handle_statement_error(e, "CashFlowStatement") def statement_of_equity(self, parenthetical: bool = False) -> Optional[Statement]: """ Get a statement of equity. Args: parenthetical: Whether to get the parenthetical statement of equity Returns: The statement of equity, or None if unable to resolve the statement """ try: # Try using the xbrl.find_statement with parenthetical parameter if hasattr(self.xbrl, 'find_statement'): matching_statements, found_role, _ = self.xbrl.find_statement("StatementOfEquity", parenthetical) if found_role: return Statement(self.xbrl, found_role, canonical_type="StatementOfEquity") return self["StatementOfEquity"] except Exception as e: return self._handle_statement_error(e, "StatementOfEquity") def comprehensive_income(self, parenthetical: bool = False) -> Optional[Statement]: """ Get a statement of comprehensive income. Comprehensive income includes net income plus other comprehensive income items such as foreign currency translation adjustments, unrealized gains/losses on investments, and pension adjustments. Args: parenthetical: Whether to get the parenthetical comprehensive income statement Returns: The comprehensive income statement, or None if unable to resolve the statement """ try: # Try using the xbrl.find_statement with parenthetical parameter if hasattr(self.xbrl, 'find_statement'): matching_statements, found_role, _ = self.xbrl.find_statement("ComprehensiveIncome", parenthetical) if found_role: return Statement(self.xbrl, found_role, canonical_type="ComprehensiveIncome") return self["ComprehensiveIncome"] except Exception as e: return self._handle_statement_error(e, "ComprehensiveIncome") def get_period_views(self, statement_type: str) -> List[Dict[str, Any]]: """ Get available period views for a statement type. Args: statement_type: Type of statement to get period views for Returns: List of period view options """ return self.xbrl.get_period_views(statement_type) def get_by_category(self, category: str) -> List[Statement]: """ Get all statements of a specific category. Args: category: Category of statement to find ('statement', 'note', 'disclosure', 'document', or 'other') Returns: List of Statement objects matching the category """ result = [] # Find all statements with matching category for stmt in self.statements: if stmt.get('category') == category: result.append(Statement(self.xbrl, stmt['role'])) return result def notes(self) -> List[Statement]: """ Get all note sections. Returns: List of Statement objects for notes """ return self.get_by_category('note') def disclosures(self) -> List[Statement]: """ Get all disclosure sections. Returns: List of Statement objects for disclosures """ return self.get_by_category('disclosure') def to_dataframe(self, statement_type: str, period_view: Optional[str] = None, standard: bool = True, include_dimensions: bool = True) -> Optional[pd.DataFrame]: """ Convert a statement to a pandas DataFrame. Args: statement_type: Type of statement to convert period_view: Optional period view name standard: Whether to use standardized concept labels (default: True) include_dimensions: Whether to include dimensional segment data (default: True) Returns: pandas DataFrame containing the statement data """ statement = self[statement_type] return statement.render(period_view=period_view, standard=standard, include_dimensions=include_dimensions).to_dataframe() class StitchedStatement: """ A stitched financial statement across multiple time periods. This class provides convenient methods for rendering and manipulating a stitched financial statement from multiple filings. """ def __init__(self, xbrls, statement_type: str, max_periods: int = 8, standard: bool = True, use_optimal_periods: bool = True, include_dimensions: bool = False): """ Initialize with XBRLS object and statement parameters. Args: xbrls: XBRLS object containing stitched data statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', etc.) max_periods: Maximum number of periods to include standard: Whether to use standardized concept labels use_optimal_periods: Whether to use entity info to determine optimal periods include_dimensions: Whether to include dimensional segment data (default: False for stitching) """ self.xbrls = xbrls self.statement_type = statement_type self.max_periods = max_periods self.standard = standard self.use_optimal_periods = use_optimal_periods self.include_dimensions = include_dimensions self.show_date_range = False # Default to not showing date ranges # Statement titles self.statement_titles = { 'BalanceSheet': 'CONSOLIDATED BALANCE SHEET', 'IncomeStatement': 'CONSOLIDATED INCOME STATEMENT', 'CashFlowStatement': 'CONSOLIDATED STATEMENT OF CASH FLOWS', 'StatementOfEquity': 'CONSOLIDATED STATEMENT OF STOCKHOLDERS\' EQUITY', 'ComprehensiveIncome': 'CONSOLIDATED STATEMENT OF COMPREHENSIVE INCOME' } self.title = self.statement_titles.get(statement_type, statement_type.upper()) # Cache statement data self._statement_data = None @property def periods(self): return [ period_id[-10:] for period_id, _ in self.statement_data['periods'] ] @property def statement_data(self): """Get the underlying statement data, loading it if necessary.""" if self._statement_data is None: self._statement_data = self.xbrls.get_statement( self.statement_type, self.max_periods, self.standard, self.use_optimal_periods, self.include_dimensions ) return self._statement_data def render(self, show_date_range: bool = False) -> Table: """ Render the stitched statement as a formatted table. Args: show_date_range: Whether to show full date ranges for duration periods Returns: Rich Table containing the rendered statement """ from edgar.xbrl.stitching import render_stitched_statement # Update the render_stitched_statement function call to pass the show_date_range parameter return render_stitched_statement( self.statement_data, statement_title=self.title, statement_type=self.statement_type, entity_info=self.xbrls.entity_info, show_date_range=show_date_range ) def to_dataframe(self) -> pd.DataFrame: """ Convert the stitched statement to a pandas DataFrame. Returns: pandas DataFrame with periods as columns and concepts as rows """ from edgar.xbrl.stitching import to_pandas return to_pandas(self.statement_data) def __rich__(self): """ Rich console representation. Returns: Rich Table object """ return self.render() def __repr__(self): return repr_rich(self.__rich__()) class StitchedStatements: """ User-friendly access to stitched financial statements across multiple time periods. This class provides a simplified API for accessing and rendering stitched financial statements from multiple filings, without requiring detailed knowledge of the underlying stitching process. """ def __init__(self, xbrls): """ Initialize with an XBRLS object. Args: xbrls: The XBRLS object to extract stitched statements from """ self.xbrls = xbrls def balance_sheet(self, max_periods: int = 8, standard: bool = True, use_optimal_periods: bool = True, show_date_range: bool = False) -> Optional[StitchedStatement]: """ Get a stitched balance sheet across multiple time periods. Args: max_periods: Maximum number of periods to include standard: Whether to use standardized concept labels use_optimal_periods: Whether to use entity info to determine optimal periods show_date_range: Whether to show full date ranges for duration periods Returns: StitchedStatement for the balance sheet """ statement = StitchedStatement(self.xbrls, 'BalanceSheet', max_periods, standard, use_optimal_periods) if show_date_range: statement.show_date_range = show_date_range return statement def income_statement(self, max_periods: int = 8, standard: bool = True, use_optimal_periods: bool = True, show_date_range: bool = False) -> Optional[StitchedStatement]: """ Get a stitched income statement across multiple time periods. Args: max_periods: Maximum number of periods to include standard: Whether to use standardized concept labels use_optimal_periods: Whether to use entity info to determine optimal periods show_date_range: Whether to show full date ranges for duration periods Returns: StitchedStatement for the income statement """ statement = StitchedStatement(self.xbrls, 'IncomeStatement', max_periods, standard, use_optimal_periods) if show_date_range: statement.show_date_range = show_date_range return statement def cashflow_statement(self, max_periods: int = 8, standard: bool = True, use_optimal_periods: bool = True, show_date_range: bool = False) -> Optional[StitchedStatement]: """ Get a stitched cash flow statement across multiple time periods. Args: max_periods: Maximum number of periods to include standard: Whether to use standardized concept labels use_optimal_periods: Whether to use entity info to determine optimal periods show_date_range: Whether to show full date ranges for duration periods Returns: StitchedStatement for the cash flow statement """ statement = StitchedStatement(self.xbrls, 'CashFlowStatement', max_periods, standard, use_optimal_periods) if show_date_range: statement.show_date_range = show_date_range return statement def statement_of_equity(self, max_periods: int = 8, standard: bool = True, use_optimal_periods: bool = True, show_date_range: bool = False) -> Optional[StitchedStatement]: """ Get a stitched statement of changes in equity across multiple time periods. Args: max_periods: Maximum number of periods to include standard: Whether to use standardized concept labels use_optimal_periods: Whether to use entity info to determine optimal periods show_date_range: Whether to show full date ranges for duration periods Returns: StitchedStatement for the statement of equity """ statement = StitchedStatement(self.xbrls, 'StatementOfEquity', max_periods, standard, use_optimal_periods) if show_date_range: statement.show_date_range = show_date_range return statement def comprehensive_income(self, max_periods: int = 8, standard: bool = True, use_optimal_periods: bool = True, show_date_range: bool = False) -> Optional[StitchedStatement]: """ Get a stitched statement of comprehensive income across multiple time periods. Args: max_periods: Maximum number of periods to include standard: Whether to use standardized concept labels use_optimal_periods: Whether to use entity info to determine optimal periods show_date_range: Whether to show full date ranges for duration periods Returns: StitchedStatement for the comprehensive income statement """ statement = StitchedStatement(self.xbrls, 'ComprehensiveIncome', max_periods, standard, use_optimal_periods) if show_date_range: statement.show_date_range = show_date_range return statement def __getitem__(self, statement_type: str) -> StitchedStatement: """ Get a statement by type using dictionary syntax. Args: statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', etc.) Returns: StitchedStatement for the requested statement type """ return StitchedStatement(self.xbrls, statement_type, use_optimal_periods=True) def __rich__(self): """ Rich console representation. Returns: Rich Table object """ table = Table(title="Available Stitched Statements", box=box.SIMPLE) table.add_column("Statement Type") table.add_column("Periods") # Get information about available statements statement_types = set() for xbrl in self.xbrls.xbrl_list: statements = xbrl.get_all_statements() for stmt in statements: if stmt['type']: statement_types.add(stmt['type']) # Get periods periods = self.xbrls.get_periods() period_count = len(periods) # Add rows for each statement type for stmt_type in sorted(statement_types): table.add_row(stmt_type, str(period_count)) return table def __repr__(self): return repr_rich(self.__rich__()) def __str__(self) -> str: """ String representation listing available statements. Returns: String representation """ # Get information about available statements statement_types = set() for xbrl in self.xbrls.xbrl_list: statements = xbrl.get_all_statements() for stmt in statements: if stmt['type']: statement_types.add(stmt['type']) # Get information about periods periods = self.xbrls.get_periods() period_count = len(periods) # Format output output = [f"Stitched statements across {period_count} periods:"] for stmt_type in sorted(statement_types): output.append(f" - {stmt_type}") output.append("\nAvailable methods:") output.append(" - balance_sheet()") output.append(" - income_statement()") output.append(" - cash_flow_statement()") return "\n".join(output)