1733 lines
74 KiB
Python
1733 lines
74 KiB
Python
"""
|
|
XBRL Parser - Top-level integration module for XBRL parsing.
|
|
|
|
This module provides the XBRL class, which integrates all components of the XBRL parsing system:
|
|
- Instance Document Parser
|
|
- Presentation Linkbase Parser
|
|
- Calculation Linkbase Parser
|
|
- Definition Linkbase Parser
|
|
|
|
The XBRL class provides a unified interface for working with XBRL data,
|
|
organizing facts according to presentation hierarchies, validating calculations,
|
|
and handling dimensional qualifiers.
|
|
"""
|
|
import datetime
|
|
from pathlib import Path
|
|
from textwrap import dedent
|
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
|
|
|
if TYPE_CHECKING:
|
|
from edgar.xbrl.facts import FactQuery
|
|
from edgar.xbrl.models import Fact, Footnote
|
|
|
|
import pandas as pd
|
|
from rich import box
|
|
from rich.table import Column, Table
|
|
from rich.table import Table as RichTable
|
|
|
|
from edgar.attachments import Attachments
|
|
from edgar.core import log
|
|
from edgar.richtools import repr_rich, strip_ansi_text
|
|
from edgar.xbrl.core import STANDARD_LABEL
|
|
from edgar.xbrl.models import PresentationNode
|
|
from edgar.xbrl.parsers import XBRLParser
|
|
from edgar.xbrl.period_selector import select_periods
|
|
from edgar.xbrl.periods import get_period_views
|
|
from edgar.xbrl.rendering import RenderedStatement, generate_rich_representation, render_statement
|
|
from edgar.xbrl.statement_resolver import StatementResolver
|
|
from edgar.xbrl.statements import statement_to_concepts
|
|
|
|
|
|
class XBRLFilingWithNoXbrlData(Exception):
|
|
"""Exception raised when a filing does not contain XBRL data."""
|
|
|
|
def __init__(self, message: str):
|
|
super().__init__(message)
|
|
|
|
|
|
class XBRLAttachments:
|
|
"""
|
|
An adapter for the Attachments class that provides easy access to the XBRL documents.
|
|
"""
|
|
|
|
def __init__(self, attachments: Attachments):
|
|
self._documents = dict()
|
|
if attachments.data_files:
|
|
for attachment in attachments.data_files:
|
|
if attachment.document_type in ["XML", 'EX-101.INS'] and attachment.extension.endswith(
|
|
('.xml', '.XML')):
|
|
content = attachment.content
|
|
if '<xbrl' in content[:2000]:
|
|
self._documents['instance'] = attachment
|
|
elif attachment.document_type == 'EX-101.SCH':
|
|
self._documents['schema'] = attachment
|
|
elif attachment.document_type == 'EX-101.DEF':
|
|
self._documents['definition'] = attachment
|
|
elif attachment.document_type == 'EX-101.CAL':
|
|
self._documents['calculation'] = attachment
|
|
elif attachment.document_type == 'EX-101.LAB':
|
|
self._documents['label'] = attachment
|
|
elif attachment.document_type == 'EX-101.PRE':
|
|
self._documents['presentation'] = attachment
|
|
|
|
@property
|
|
def empty(self):
|
|
return not self._documents
|
|
|
|
@property
|
|
def has_instance_document(self):
|
|
return 'instance' in self._documents
|
|
|
|
@property
|
|
def instance_only(self):
|
|
return len(self._documents) == 1 and 'instance' in self._documents
|
|
|
|
def get(self, doc_type: str):
|
|
return self._documents.get(doc_type)
|
|
|
|
def __rich__(self):
|
|
table = Table(Column("Type"),
|
|
Column("Document"),
|
|
title="XBRL Documents",
|
|
box=box.SIMPLE)
|
|
for doc_type, attachment in self._documents.items():
|
|
table.add_row(doc_type, attachment.description)
|
|
return table
|
|
|
|
def __repr__(self):
|
|
return repr_rich(self)
|
|
|
|
|
|
class XBRL:
|
|
"""
|
|
Integrated XBRL parser that combines all linkbase parsers.
|
|
|
|
This is the top-level object that integrates all components of the XBRL parsing system,
|
|
providing access to facts organized according to presentation hierarchies and
|
|
allowing for dimensional analysis and calculation validation.
|
|
"""
|
|
|
|
def __init__(self):
|
|
# Use the parser component
|
|
self.parser = XBRLParser()
|
|
|
|
# Statement resolver for matching statements
|
|
self._statement_resolver = None
|
|
|
|
# Cached indices for fast statement lookup (for backward compatibility)
|
|
self._statement_indices = {}
|
|
self._statement_by_standard_name = {}
|
|
self._statement_by_primary_concept = {}
|
|
self._statement_by_role_uri = {}
|
|
self._statement_by_role_name = {}
|
|
self._all_statements_cached = None
|
|
|
|
def _is_dimension_display_statement(self, statement_type: str, role_definition: str) -> bool:
|
|
"""
|
|
Determine if a statement should display dimensioned line items.
|
|
Args:
|
|
statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', etc.)
|
|
role_definition: The definition of the statement role
|
|
Returns:
|
|
bool: True if dimensions should be displayed, False otherwise
|
|
"""
|
|
# Look for keywords in role definition that suggest dimensional breakdowns
|
|
dimension_keywords = [
|
|
'segment', 'geography', 'geographic', 'region', 'product', 'business',
|
|
'by country', 'by region', 'by product', 'by segment', 'revenues by'
|
|
]
|
|
|
|
role_def_lower = role_definition.lower() if role_definition else ""
|
|
|
|
# For core financial statements, check if they contain segment information
|
|
if statement_type in ['BalanceSheet', 'IncomeStatement', 'CashFlowStatement',
|
|
'StatementOfEquity', 'ComprehensiveIncome']:
|
|
|
|
# Allow dimensional display if the role definition suggests segment/product breakdown
|
|
if any(keyword in role_def_lower for keyword in dimension_keywords):
|
|
return True
|
|
|
|
# For income statements specifically, check if there are segment-related dimensional facts
|
|
if statement_type == 'IncomeStatement':
|
|
# Check if there are facts with ProductOrServiceAxis dimensions
|
|
try:
|
|
# Look for revenue facts with ProductOrServiceAxis dimensions
|
|
revenue_concepts = [
|
|
'us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax',
|
|
'us-gaap:Revenues',
|
|
'us-gaap:SalesRevenueNet'
|
|
]
|
|
|
|
for _fact_key, fact in self.parser.facts.items():
|
|
# Check if this is a revenue-related concept
|
|
concept_name = fact.element_id if hasattr(fact, 'element_id') else getattr(fact, 'concept',
|
|
str(fact))
|
|
if any(revenue_concept in concept_name for revenue_concept in revenue_concepts):
|
|
|
|
# Check if this fact has ProductOrServiceAxis dimension
|
|
context = self.parser.contexts.get(fact.context_ref)
|
|
if context and hasattr(context, 'dimensions') and context.dimensions:
|
|
for dim_name, _dim_value in context.dimensions.items():
|
|
if 'ProductOrServiceAxis' in dim_name:
|
|
return True
|
|
|
|
return False
|
|
except Exception:
|
|
# If any error occurs, default to False
|
|
return False
|
|
|
|
# For other core statements, skip dimensional display by default
|
|
return False
|
|
|
|
# For non-core statements, check if they contain dimensional breakdowns
|
|
return any(keyword in role_def_lower for keyword in dimension_keywords)
|
|
|
|
@property
|
|
def element_catalog(self):
|
|
return self.parser.element_catalog
|
|
|
|
@property
|
|
def contexts(self):
|
|
return self.parser.contexts
|
|
|
|
@property
|
|
def footnotes(self):
|
|
"""Access to XBRL footnotes."""
|
|
return self.parser.footnotes
|
|
|
|
@property
|
|
def _facts(self):
|
|
return self.parser.facts
|
|
|
|
@property
|
|
def units(self):
|
|
return self.parser.units
|
|
|
|
@property
|
|
def presentation_roles(self):
|
|
return self.parser.presentation_roles
|
|
|
|
@property
|
|
def presentation_trees(self):
|
|
return self.parser.presentation_trees
|
|
|
|
@property
|
|
def calculation_roles(self):
|
|
return self.parser.calculation_roles
|
|
|
|
@property
|
|
def calculation_trees(self):
|
|
return self.parser.calculation_trees
|
|
|
|
@property
|
|
def definition_roles(self):
|
|
return self.parser.definition_roles
|
|
|
|
@property
|
|
def tables(self):
|
|
return self.parser.tables
|
|
|
|
@property
|
|
def axes(self):
|
|
return self.parser.axes
|
|
|
|
@property
|
|
def domains(self):
|
|
return self.parser.domains
|
|
|
|
@property
|
|
def entity_info(self):
|
|
return self.parser.entity_info
|
|
|
|
@property
|
|
def reporting_periods(self):
|
|
return self.parser.reporting_periods
|
|
|
|
@property
|
|
def period_of_report(self) -> Optional[str]:
|
|
if 'document_period_end_date' in self.entity_info:
|
|
period = self.entity_info['document_period_end_date']
|
|
return period.strftime('%Y-%m-%d') if isinstance(period, datetime.date) else period
|
|
return None
|
|
|
|
@property
|
|
def entity_name(self):
|
|
return self.entity_info.get('entity_name')
|
|
|
|
@property
|
|
def document_type(self):
|
|
return self.entity_info.get('document_type')
|
|
|
|
@property
|
|
def context_period_map(self):
|
|
return self.parser.context_period_map
|
|
|
|
@classmethod
|
|
def from_directory(cls, directory_path: Union[str, Path]) -> 'XBRL':
|
|
"""
|
|
Parse all XBRL files in a directory.
|
|
Args:
|
|
directory_path: Path to directory containing XBRL files
|
|
Returns:
|
|
XBRL object with parsed data
|
|
"""
|
|
xbrl = cls()
|
|
xbrl.parser.parse_directory(directory_path)
|
|
|
|
# Try to create legacy instance as well for compatibility
|
|
directory = Path(directory_path)
|
|
for file_path in directory.glob("*"):
|
|
if file_path.is_file() and file_path.name.lower().endswith('.xml') and '<xbrl' in file_path.read_text()[
|
|
:2000]:
|
|
break
|
|
|
|
return xbrl
|
|
|
|
@classmethod
|
|
def from_files(cls, instance_file: Optional[Union[str, Path]] = None,
|
|
schema_file: Optional[Union[str, Path]] = None,
|
|
presentation_file: Optional[Union[str, Path]] = None,
|
|
calculation_file: Optional[Union[str, Path]] = None,
|
|
definition_file: Optional[Union[str, Path]] = None,
|
|
label_file: Optional[Union[str, Path]] = None) -> 'XBRL':
|
|
"""
|
|
Create an XBRL object from individual files.
|
|
Args:
|
|
instance_file: Path to instance document file
|
|
schema_file: Path to schema file
|
|
presentation_file: Path to presentation linkbase file
|
|
calculation_file: Path to calculation linkbase file
|
|
definition_file: Path to definition linkbase file
|
|
label_file: Path to label linkbase file
|
|
|
|
Returns:
|
|
XBRL object with parsed data
|
|
"""
|
|
xbrl = cls()
|
|
|
|
# Parse schema first
|
|
if schema_file:
|
|
xbrl.parser.parse_schema(schema_file)
|
|
|
|
# Parse linkbase files
|
|
if label_file:
|
|
xbrl.parser.parse_labels(label_file)
|
|
|
|
if presentation_file:
|
|
xbrl.parser.parse_presentation(presentation_file)
|
|
|
|
if calculation_file:
|
|
xbrl.parser.parse_calculation(calculation_file)
|
|
|
|
if definition_file:
|
|
xbrl.parser.parse_definition(definition_file)
|
|
|
|
# Parse instance last
|
|
if instance_file:
|
|
xbrl.parser.parse_instance(instance_file)
|
|
|
|
return xbrl
|
|
|
|
@classmethod
|
|
def from_filing(cls, filing) -> Optional['XBRL']:
|
|
"""
|
|
Create an XBRL object from a Filing object.
|
|
|
|
Args:
|
|
filing: Filing object with attachments containing XBRL files
|
|
|
|
Returns:
|
|
XBRL object with parsed data
|
|
"""
|
|
if filing.form.endswith("/A"):
|
|
log.warning(dedent(f"""
|
|
{filing}
|
|
is an amended filing and may not contain full XBRL data e.g. some statements might be missing.
|
|
Consider using the original filing instead if available with `get_filings(form="10-K", amendments=False)`
|
|
"""))
|
|
|
|
xbrl = cls()
|
|
|
|
xbrl_attachments = XBRLAttachments(filing.attachments)
|
|
|
|
if xbrl_attachments.empty:
|
|
log.warning(f"No XBRL attachments found in filing {filing}")
|
|
return None
|
|
|
|
if xbrl_attachments.get('schema'):
|
|
xbrl.parser.parse_schema_content(xbrl_attachments.get('schema').content)
|
|
|
|
if xbrl_attachments.get('label'):
|
|
xbrl.parser.parse_labels_content(xbrl_attachments.get('label').content)
|
|
|
|
if xbrl_attachments.get('presentation'):
|
|
xbrl.parser.parse_presentation_content(xbrl_attachments.get('presentation').content)
|
|
|
|
if xbrl_attachments.get('calculation'):
|
|
xbrl.parser.parse_calculation_content(xbrl_attachments.get('calculation').content)
|
|
|
|
if xbrl_attachments.get('definition'):
|
|
xbrl.parser.parse_definition_content(xbrl_attachments.get('definition').content)
|
|
|
|
if xbrl_attachments.get('instance'):
|
|
xbrl.parser.parse_instance_content(xbrl_attachments.get('instance').content)
|
|
|
|
return xbrl
|
|
|
|
@property
|
|
def statements(self):
|
|
from edgar.xbrl.statements import Statements
|
|
return Statements(self)
|
|
|
|
@property
|
|
def facts(self):
|
|
from edgar.xbrl.facts import FactsView
|
|
if not hasattr(self, '_facts_view'):
|
|
self._facts_view = FactsView(self)
|
|
return self._facts_view
|
|
|
|
@property
|
|
def current_period(self):
|
|
"""
|
|
Convenient access to current period financial data.
|
|
|
|
Provides simplified access to the most recent period's financial data
|
|
without comparative information. This addresses common use cases where
|
|
users only need the current period data.
|
|
|
|
Returns:
|
|
CurrentPeriodView: Interface for accessing current period data
|
|
|
|
Example:
|
|
>>> xbrl = filing.xbrl()
|
|
>>> current = xbrl.current_period
|
|
>>> balance_sheet = current.balance_sheet()
|
|
>>> income = current.income_statement(raw_concepts=True)
|
|
"""
|
|
from edgar.xbrl.current_period import CurrentPeriodView
|
|
if not hasattr(self, '_current_period_view'):
|
|
self._current_period_view = CurrentPeriodView(self)
|
|
return self._current_period_view
|
|
|
|
def query(self,
|
|
include_dimensions: bool = True,
|
|
include_contexts: bool = False,
|
|
include_element_info: bool = False) -> 'FactQuery':
|
|
"""
|
|
Start a new query for XBRL facts.
|
|
"""
|
|
fact_query = self.facts.query()
|
|
if not include_dimensions:
|
|
fact_query = fact_query.exclude_dimensions()
|
|
if not include_contexts:
|
|
fact_query = fact_query.exclude_contexts()
|
|
if not include_element_info:
|
|
fact_query = fact_query.exclude_element_info()
|
|
return fact_query
|
|
|
|
def get_all_statements(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get all available financial statements.
|
|
|
|
Returns:
|
|
List of statement metadata (role, definition, element count)
|
|
"""
|
|
# Return cached result if available
|
|
if self._all_statements_cached is not None:
|
|
return self._all_statements_cached
|
|
|
|
statements = []
|
|
|
|
# Reset indices
|
|
self._statement_indices = {}
|
|
self._statement_by_standard_name = {}
|
|
self._statement_by_primary_concept = {}
|
|
self._statement_by_role_uri = {}
|
|
self._statement_by_role_name = {}
|
|
|
|
for role, tree in self.presentation_trees.items():
|
|
# Check if this role appears to be a financial statement
|
|
role_def = tree.definition.lower()
|
|
statement_type = None
|
|
primary_concept = next(iter(tree.all_nodes))
|
|
statement_category = None
|
|
|
|
# First try to match using statement_to_concepts (for backward compatibility)
|
|
for statement_alias, statement_info in statement_to_concepts.items():
|
|
if primary_concept == statement_info.concept:
|
|
if 'parenthetical' in role_def:
|
|
statement_type = f"{statement_alias}Parenthetical"
|
|
else:
|
|
statement_type = statement_alias
|
|
if 'BalanceSheet' not in statement_type:
|
|
break
|
|
|
|
# If we didn't find a match, try additional patterns for notes and disclosures
|
|
if not statement_type:
|
|
if 'us-gaap_NotesToFinancialStatementsAbstract' in primary_concept or 'note' in role_def:
|
|
statement_type = "Notes"
|
|
statement_category = "note"
|
|
elif 'us-gaap_DisclosuresAbstract' in primary_concept or 'disclosure' in role_def:
|
|
statement_type = "Disclosures"
|
|
statement_category = "disclosure"
|
|
elif 'us-gaap_AccountingPoliciesAbstract' in primary_concept or 'accounting policies' in role_def:
|
|
statement_type = "AccountingPolicies"
|
|
statement_category = "note"
|
|
elif 'us-gaap_SegmentDisclosureAbstract' in primary_concept or 'segment' in role_def:
|
|
statement_type = "SegmentDisclosure"
|
|
statement_category = "disclosure"
|
|
# Try to extract role name from URI
|
|
role_name = role.split('/')[-1] if '/' in role else role.split('#')[-1] if '#' in role else ''
|
|
|
|
# Create the statement metadata
|
|
statement = {
|
|
'role': role,
|
|
'definition': tree.definition,
|
|
'element_count': len(tree.all_nodes),
|
|
'type': statement_type,
|
|
'primary_concept': primary_concept,
|
|
'role_name': role_name,
|
|
'category': statement_category # This will be None for backward compatibility unless set above
|
|
}
|
|
|
|
statements.append(statement)
|
|
|
|
# Build lookup indices
|
|
# By role URI
|
|
self._statement_by_role_uri[role] = statement
|
|
|
|
# By role name (short name)
|
|
if role_name:
|
|
role_name_lower = role_name.lower()
|
|
if role_name_lower not in self._statement_by_role_name:
|
|
self._statement_by_role_name[role_name_lower] = []
|
|
self._statement_by_role_name[role_name_lower].append(statement)
|
|
|
|
# By standard name
|
|
if statement_type:
|
|
if statement_type not in self._statement_by_standard_name:
|
|
self._statement_by_standard_name[statement_type] = []
|
|
self._statement_by_standard_name[statement_type].append(statement)
|
|
|
|
# By primary concept
|
|
if primary_concept:
|
|
if primary_concept not in self._statement_by_primary_concept:
|
|
self._statement_by_primary_concept[primary_concept] = []
|
|
self._statement_by_primary_concept[primary_concept].append(statement)
|
|
|
|
# Also index by definition (without spaces, lowercase)
|
|
if statement['definition']:
|
|
def_key = statement['definition'].lower().replace(' ', '')
|
|
if def_key not in self._statement_indices:
|
|
self._statement_indices[def_key] = []
|
|
self._statement_indices[def_key].append(statement)
|
|
|
|
# Cache the result
|
|
self._all_statements_cached = statements
|
|
return statements
|
|
|
|
def get_statement_by_type(self, statement_type: str, include_dimensions: bool = True) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get the first statement matching the given type.
|
|
|
|
Args:
|
|
statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', 'Notes', etc.)
|
|
include_dimensions: Whether to include dimensional segment data (default: True)
|
|
|
|
Returns:
|
|
Statement data if found, None otherwise
|
|
"""
|
|
# Use find_statement instead of the flawed index-based lookup
|
|
matching_statements, found_role, actual_statement_type = self.find_statement(statement_type)
|
|
|
|
if not found_role or not matching_statements:
|
|
return None
|
|
|
|
# Get statement data using the found role
|
|
statement_data = self.get_statement(found_role, should_display_dimensions=include_dimensions)
|
|
|
|
if statement_data:
|
|
# Extract periods from the statement data
|
|
periods = {}
|
|
for item in statement_data:
|
|
for period_id, _value in item.get('values', {}).items():
|
|
if period_id not in periods:
|
|
# Get period label from reporting_periods
|
|
period_label = period_id
|
|
for period in self.reporting_periods:
|
|
if period['key'] == period_id:
|
|
period_label = period['label']
|
|
break
|
|
periods[period_id] = {'label': period_label}
|
|
|
|
return {
|
|
'role': found_role,
|
|
'definition': matching_statements[0]['definition'],
|
|
'statement_type': actual_statement_type,
|
|
'periods': periods,
|
|
'data': statement_data
|
|
}
|
|
|
|
return None
|
|
|
|
@classmethod
|
|
def stitch_statements(cls, xbrl_list: List['XBRL'],
|
|
statement_type: str = 'IncomeStatement',
|
|
period_type: str = 'RECENT_PERIODS',
|
|
max_periods: int = 3,
|
|
standard: bool = True) -> Dict[str, Any]:
|
|
"""
|
|
Stitch together statements from multiple XBRL objects.
|
|
|
|
Args:
|
|
xbrl_list: List of XBRL objects, should be from the same company and ordered by date
|
|
statement_type: Type of statement to stitch ('IncomeStatement', 'BalanceSheet', etc.)
|
|
period_type: Type of period view to generate
|
|
max_periods: Maximum number of periods to include (default: 3)
|
|
standard: Whether to use standardized concept labels (default: True)
|
|
|
|
Returns:
|
|
Stitched statement data
|
|
"""
|
|
from edgar.xbrl.stitching import stitch_statements as _stitch_statements
|
|
return _stitch_statements(xbrl_list, statement_type, period_type, max_periods, standard)
|
|
|
|
def render_stitched_statement(self, stitched_data: Dict[str, Any],
|
|
statement_title: str,
|
|
statement_type: str) -> 'RichTable':
|
|
"""
|
|
Render a stitched statement.
|
|
|
|
Args:
|
|
stitched_data: Stitched statement data
|
|
statement_title: Title of the statement
|
|
statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', etc.)
|
|
|
|
Returns:
|
|
RichTable: A formatted table representation of the stitched statement
|
|
"""
|
|
from edgar.xbrl.stitching import render_stitched_statement as _render_stitched_statement
|
|
return _render_stitched_statement(stitched_data, statement_title, statement_type, self.entity_info)
|
|
|
|
def get_statement(self, role_or_type: str,
|
|
period_filter: Optional[str] = None,
|
|
should_display_dimensions: Optional[bool] = None) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get a financial statement by role URI, statement type, or statement short name.
|
|
|
|
Args:
|
|
role_or_type: Can be one of:
|
|
- Extended link role URI (e.g. "http://apple.com/role/ConsolidatedStatementOfIncome")
|
|
- Statement type name (e.g. "BalanceSheet")
|
|
- Statement short name (e.g. "ConsolidatedStatementOfIncome")
|
|
period_filter: Optional period key to filter facts
|
|
should_display_dimensions: Whether to display dimensions for this statement.
|
|
If None, the method will determine based on statement type and role.
|
|
|
|
Returns:
|
|
List of line items with values
|
|
"""
|
|
# Use the centralized statement finder to get statement information
|
|
matching_statements, found_role, actual_statement_type = self.find_statement(role_or_type)
|
|
|
|
# If no matching statement found, return empty list
|
|
if not found_role or found_role not in self.presentation_trees:
|
|
return []
|
|
|
|
tree = self.presentation_trees[found_role]
|
|
|
|
# Find the root element
|
|
root_id = tree.root_element_id
|
|
|
|
# If should_display_dimensions wasn't provided, determine it from the statement type and role
|
|
if should_display_dimensions is None:
|
|
role_definition = ""
|
|
if matching_statements:
|
|
role_definition = matching_statements[0]['definition']
|
|
|
|
# Determine whether to display dimensions
|
|
should_display_dimensions = self._is_dimension_display_statement(actual_statement_type, role_definition)
|
|
|
|
# Generate line items recursively
|
|
line_items = []
|
|
self._generate_line_items(root_id, tree.all_nodes, line_items, period_filter, None, should_display_dimensions)
|
|
|
|
# Apply revenue deduplication for income statements to fix Issue #438
|
|
if actual_statement_type == 'IncomeStatement':
|
|
from edgar.xbrl.deduplication_strategy import RevenueDeduplicator
|
|
line_items = RevenueDeduplicator.deduplicate_statement_items(line_items)
|
|
|
|
return line_items
|
|
|
|
def _generate_line_items(self, element_id: str, nodes: Dict[str, PresentationNode],
|
|
result: List[Dict[str, Any]], period_filter: Optional[str] = None,
|
|
path: List[str] = None, should_display_dimensions: bool = False) -> None:
|
|
"""
|
|
Recursively generate line items for a statement.
|
|
|
|
Args:
|
|
element_id: Current element ID
|
|
nodes: Dictionary of presentation nodes
|
|
result: List to append line items to
|
|
period_filter: Optional period key to filter facts
|
|
path: Current path in hierarchy
|
|
should_display_dimensions: Whether to display dimensions for this statement
|
|
"""
|
|
if element_id not in nodes:
|
|
return
|
|
|
|
# Update path
|
|
if path is None:
|
|
path = []
|
|
|
|
current_path = path + [element_id]
|
|
|
|
# Get node information
|
|
node = nodes[element_id]
|
|
|
|
# Get label
|
|
label = node.display_label
|
|
|
|
# Get values and decimals across periods
|
|
values = {}
|
|
decimals = {} # Store decimals info for each period
|
|
units = {} # Store unit_ref for each period
|
|
period_types = {} # Store period_type ('instant' or 'duration') for each period
|
|
|
|
# Issue #463: Get balance and weight from element catalog and calculation trees
|
|
# (same approach as FactsView.get_facts())
|
|
balance = None # Debit/credit classification from XBRL schema
|
|
weight = None # Calculation weight from calculation linkbase
|
|
|
|
# Get balance from element catalog
|
|
element_id_normalized = element_id.replace(':', '_')
|
|
if element_id_normalized in self.element_catalog:
|
|
element = self.element_catalog[element_id_normalized]
|
|
balance = element.balance
|
|
if balance is None:
|
|
# Fallback to static US-GAAP mapping
|
|
from edgar.xbrl.parsers.concepts import get_balance_type
|
|
balance = get_balance_type(element_id)
|
|
|
|
# Get weight from calculation trees (Issue #463)
|
|
if hasattr(self, 'calculation_trees') and self.calculation_trees:
|
|
for calc_tree in self.calculation_trees.values():
|
|
if element_id_normalized in calc_tree.all_nodes:
|
|
calc_node = calc_tree.all_nodes[element_id_normalized]
|
|
weight = calc_node.weight
|
|
break # Use first weight found
|
|
|
|
# Calculate preferred_sign from preferred_label (for Issue #463)
|
|
# This determines display transformation: -1 = negate, 1 = as-is, None = not specified
|
|
preferred_sign_value = None
|
|
if node.preferred_label:
|
|
# Check if this is a negatedLabel (indicates value should be negated for display)
|
|
# Use pattern matching to support any XBRL namespace version (2003, 2009, future versions)
|
|
# Matches: 'negatedLabel', 'negatedTerseLabel', 'http://www.xbrl.org/YYYY/role/negated*Label', etc.
|
|
label_lower = node.preferred_label.lower()
|
|
is_negated = 'negated' in label_lower and (
|
|
label_lower.startswith('negated') or # Short form: 'negatedLabel'
|
|
'/role/negated' in label_lower # Full URI: 'http://www.xbrl.org/*/role/negated*'
|
|
)
|
|
preferred_sign_value = -1 if is_negated else 1
|
|
|
|
# Find facts for any of these concept names
|
|
all_relevant_facts = self._find_facts_for_element(node.element_name, period_filter)
|
|
|
|
# Group facts by period for better selection
|
|
facts_by_period = {}
|
|
|
|
# Process all found facts and group by period
|
|
for context_id, wrapped_fact in all_relevant_facts.items():
|
|
# Get period key for this context
|
|
period_key = self.context_period_map.get(context_id)
|
|
if not period_key:
|
|
continue # Skip if no period key found
|
|
|
|
# Initialize period entry if not exists
|
|
if period_key not in facts_by_period:
|
|
facts_by_period[period_key] = []
|
|
|
|
# Add this fact to the period
|
|
facts_by_period[period_key].append((context_id, wrapped_fact))
|
|
|
|
# should_display_dimensions is now passed as a parameter from the calling method
|
|
|
|
# Process facts by period, with different handling based on statement type
|
|
from collections import defaultdict
|
|
dimensioned_facts = defaultdict(list) # For dimensioned statement types
|
|
|
|
for period_key, period_facts in facts_by_period.items():
|
|
if should_display_dimensions:
|
|
# For statements that should display dimensions, group facts by dimension
|
|
for context_id, wrapped_fact in period_facts:
|
|
fact = wrapped_fact['fact']
|
|
dimension_info = wrapped_fact['dimension_info']
|
|
dimension_key = wrapped_fact['dimension_key']
|
|
|
|
if dimension_info:
|
|
# Use the dimension_key we already generated
|
|
dim_key_str = dimension_key
|
|
|
|
# Store dimensioned fact with the full dimension metadata
|
|
dimensioned_facts[dim_key_str].append((period_key, fact, dimension_info))
|
|
else:
|
|
# This is a non-dimensioned fact for this concept, use in the main item
|
|
if not values.get(period_key):
|
|
values[period_key] = fact.numeric_value if fact.numeric_value is not None else fact.value
|
|
|
|
# Store the decimals info for proper scaling
|
|
if fact.decimals is not None:
|
|
try:
|
|
if fact.decimals == 'INF':
|
|
decimals[period_key] = 0 # Infinite precision, no scaling
|
|
else:
|
|
decimals[period_key] = int(fact.decimals)
|
|
except (ValueError, TypeError):
|
|
decimals[period_key] = 0 # Default
|
|
|
|
# Store unit_ref for this period
|
|
units[period_key] = fact.unit_ref
|
|
|
|
# Store period_type from context
|
|
if context_id in self.contexts:
|
|
context = self.contexts[context_id]
|
|
if hasattr(context, 'period') and context.period:
|
|
pt = context.period.get('type') if isinstance(context.period, dict) else getattr(context.period, 'type', None)
|
|
period_types[period_key] = pt
|
|
|
|
else:
|
|
# For standard financial statements, prefer non-dimensioned facts
|
|
# If only one fact, use it
|
|
if len(period_facts) == 1:
|
|
context_id, wrapped_fact = period_facts[0]
|
|
fact = wrapped_fact['fact']
|
|
else:
|
|
# Multiple facts for same period - prioritize based on dimensions
|
|
# Sort facts by preference: no dimensions first, then by dimension count (fewer dimensions preferred)
|
|
sorted_facts = []
|
|
for ctx_id, wrapped_fact in period_facts:
|
|
dimension_count = len(wrapped_fact['dimension_info'])
|
|
sorted_facts.append((dimension_count, ctx_id, wrapped_fact))
|
|
|
|
# Sort by dimension count (no dimensions or fewer dimensions first)
|
|
sorted_facts.sort()
|
|
|
|
# Use the first fact (with fewest dimensions)
|
|
_, context_id, wrapped_fact = sorted_facts[0]
|
|
fact = wrapped_fact['fact']
|
|
|
|
# Store the value
|
|
values[period_key] = fact.numeric_value if fact.numeric_value is not None else fact.value
|
|
|
|
# Store the decimals info for proper scaling
|
|
if fact.decimals is not None:
|
|
try:
|
|
if fact.decimals == 'INF':
|
|
decimals[period_key] = 0 # Infinite precision, no scaling
|
|
else:
|
|
decimals[period_key] = int(fact.decimals)
|
|
except (ValueError, TypeError):
|
|
decimals[period_key] = 0 # Default if decimals can't be converted
|
|
|
|
# Store unit_ref for this period
|
|
units[period_key] = fact.unit_ref
|
|
|
|
# Store period_type from context
|
|
if context_id in self.contexts:
|
|
context = self.contexts[context_id]
|
|
if hasattr(context, 'period') and context.period:
|
|
pt = context.period.get('type') if isinstance(context.period, dict) else getattr(context.period, 'type', None)
|
|
period_types[period_key] = pt
|
|
|
|
# Create preferred_signs dict for all periods (same value for all periods of this concept)
|
|
preferred_signs = {}
|
|
if preferred_sign_value is not None:
|
|
for period_key in values.keys():
|
|
preferred_signs[period_key] = preferred_sign_value
|
|
|
|
# For dimensional statements with dimension data, handle the parent item specially
|
|
if should_display_dimensions and dimensioned_facts:
|
|
# Create parent line item with total values AND dimensional children
|
|
# This ensures users see both the total (e.g., Total Revenue = $25,500M)
|
|
# and the dimensional breakdown (e.g., Auto Revenue = $19,878M, Energy = $3,014M)
|
|
line_item = {
|
|
'concept': element_id,
|
|
'name': node.element_name,
|
|
'all_names': [node.element_name],
|
|
'label': label, # Keep original label, don't add colon
|
|
'values': values, # Show the total values
|
|
'decimals': decimals, # Include decimals for formatting
|
|
'units': units, # Include unit_ref for each period
|
|
'period_types': period_types, # Include period_type for each period
|
|
'preferred_signs': preferred_signs, # Include preferred_sign for display (Issue #463)
|
|
'balance': balance, # Include balance (debit/credit) for display (Issue #463)
|
|
'weight': weight, # Include calculation weight for metadata (Issue #463)
|
|
'level': node.depth,
|
|
'preferred_label': node.preferred_label,
|
|
'is_abstract': node.is_abstract, # Issue #450: Use node's actual abstract flag
|
|
'children': node.children,
|
|
'has_values': len(values) > 0, # True if we have total values
|
|
'has_dimension_children': True # Mark as having dimension children
|
|
}
|
|
else:
|
|
# Non-dimensional case: Create normal line item with values
|
|
line_item = {
|
|
'concept': element_id,
|
|
'name': node.element_name,
|
|
'all_names': [node.element_name],
|
|
'label': label,
|
|
'values': values,
|
|
'decimals': decimals, # Add decimals info for formatting
|
|
'units': units, # Include unit_ref for each period
|
|
'period_types': period_types, # Include period_type for each period
|
|
'preferred_signs': preferred_signs, # Include preferred_sign for display (Issue #463)
|
|
'balance': balance, # Include balance (debit/credit) for display (Issue #463)
|
|
'weight': weight, # Include calculation weight for metadata (Issue #463)
|
|
'level': node.depth,
|
|
'preferred_label': node.preferred_label,
|
|
'is_abstract': node.is_abstract,
|
|
'children': node.children,
|
|
'has_values': len(values) > 0 # Flag to indicate if we found values
|
|
}
|
|
|
|
# Add to result
|
|
result.append(line_item)
|
|
|
|
# For dimensional statements, add dimensioned facts as child line items
|
|
if should_display_dimensions and dimensioned_facts:
|
|
# Add each dimension as a child line item with increased depth
|
|
for dim_key, facts_list in dimensioned_facts.items():
|
|
dim_values = {}
|
|
dim_decimals = {}
|
|
dim_units = {} # Store unit_ref for each period
|
|
dim_period_types = {} # Store period_type for each period
|
|
dim_metadata = None # Store metadata from the first fact
|
|
|
|
# Collect values for each period
|
|
for fact_data in facts_list:
|
|
try:
|
|
# Unpack with consistent 3-part tuples from our updated code
|
|
period_key, fact, dimensions_info = fact_data
|
|
|
|
# Store the dimension metadata from the first fact
|
|
if dim_metadata is None:
|
|
dim_metadata = dimensions_info
|
|
|
|
# Extract value from fact
|
|
dim_values[period_key] = fact.numeric_value if fact.numeric_value is not None else fact.value
|
|
except (ValueError, TypeError, IndexError) as e:
|
|
# Try to handle older format (period_key, fact) tuple for backward compatibility
|
|
try:
|
|
if isinstance(fact_data, tuple) and len(fact_data) == 2:
|
|
period_key, fact = fact_data
|
|
dim_values[
|
|
period_key] = fact.numeric_value if fact.numeric_value is not None else fact.value
|
|
except Exception:
|
|
# Log the error and continue
|
|
log.warning(f"Error processing dimension fact data: {e}")
|
|
continue
|
|
|
|
# Store decimals
|
|
if fact.decimals is not None:
|
|
try:
|
|
if fact.decimals == 'INF':
|
|
dim_decimals[period_key] = 0
|
|
else:
|
|
dim_decimals[period_key] = int(fact.decimals)
|
|
except (ValueError, TypeError):
|
|
dim_decimals[period_key] = 0
|
|
|
|
# Store unit_ref for this period
|
|
dim_units[period_key] = fact.unit_ref
|
|
|
|
# Store period_type from context
|
|
context_id = fact.context_ref
|
|
if context_id in self.contexts:
|
|
context = self.contexts[context_id]
|
|
if hasattr(context, 'period') and context.period:
|
|
pt = context.period.get('type') if isinstance(context.period, dict) else getattr(context.period, 'type', None)
|
|
dim_period_types[period_key] = pt
|
|
|
|
# For better display, use the member label for dimension items,
|
|
# but make sure we don't add the parent concept name as well
|
|
|
|
# Default to the full dimension key (e.g., "Region: Americas")
|
|
display_label = dim_key
|
|
|
|
# Try various member label formats based on dimension structure
|
|
if dim_metadata:
|
|
if len(dim_metadata) == 1:
|
|
# For single dimensions, just use the member label (e.g., "Americas")
|
|
display_label = dim_metadata[0]['member_label']
|
|
else:
|
|
# For multiple dimensions, create a combined label with all member names
|
|
# (e.g., "Americas - iPhone")
|
|
member_labels = [info['member_label'] for info in dim_metadata if 'member_label' in info]
|
|
if member_labels:
|
|
display_label = " - ".join(member_labels)
|
|
|
|
# Create preferred_signs dict for dimensional line items (same value for all periods)
|
|
dim_preferred_signs = {}
|
|
if preferred_sign_value is not None:
|
|
for period_key in dim_values.keys():
|
|
dim_preferred_signs[period_key] = preferred_sign_value
|
|
|
|
# Create dimension line item
|
|
dim_line_item = {
|
|
'concept': element_id, # Use same concept
|
|
'name': node.element_name,
|
|
'all_names': [node.element_name],
|
|
'label': display_label, # Use optimized dimension label
|
|
'full_dimension_label': dim_key, # Keep full dimension notation for reference
|
|
'values': dim_values,
|
|
'decimals': dim_decimals,
|
|
'units': dim_units, # Include unit_ref for each period
|
|
'period_types': dim_period_types, # Include period_type for each period
|
|
'preferred_signs': dim_preferred_signs, # Include preferred_sign for display (Issue #463)
|
|
'level': node.depth + 1, # Increase depth by 1
|
|
'preferred_label': node.preferred_label,
|
|
'is_abstract': False,
|
|
'children': [],
|
|
'has_values': len(dim_values) > 0,
|
|
'is_dimension': True, # Mark as a dimension item
|
|
'dimension_metadata': dim_metadata # Store full dimension information
|
|
}
|
|
|
|
# Add to result
|
|
result.append(dim_line_item)
|
|
|
|
# Process children
|
|
for child_id in node.children:
|
|
self._generate_line_items(child_id, nodes, result, period_filter, current_path, should_display_dimensions)
|
|
|
|
def _find_facts_for_element(self, element_name: str, period_filter: Optional[str] = None,
|
|
dimensions: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
|
|
"""
|
|
Find facts for a specific element, optionally filtered by period and dimensions.
|
|
Args:
|
|
element_name: Element name to find facts for
|
|
period_filter: Optional period key to filter contexts
|
|
dimensions: Optional dictionary of dimension names to dimension values to filter by
|
|
Returns:
|
|
Dictionary of facts by context ID with dimension information attached
|
|
"""
|
|
if not element_name:
|
|
return {} # No element name provided
|
|
|
|
relevant_facts = {}
|
|
|
|
# Check each context
|
|
for context_id in self.contexts:
|
|
# Use parser's get_fact method which handles normalization internally
|
|
fact = self.parser.get_fact(element_name, context_id)
|
|
|
|
if fact:
|
|
# If period filter is specified, check if context matches period
|
|
if period_filter:
|
|
period_key = self.context_period_map.get(context_id)
|
|
if period_key != period_filter:
|
|
continue # Skip if period doesn't match
|
|
|
|
# If dimensions are specified, check if context has matching dimensions
|
|
if dimensions:
|
|
context = self.contexts.get(context_id)
|
|
if not context or not hasattr(context, 'dimensions'):
|
|
continue # Skip if context doesn't have dimensions
|
|
|
|
# Check if all specified dimensions match
|
|
matches_all_dimensions = True
|
|
for dim_name, dim_value in dimensions.items():
|
|
# Normalize dimension name if it contains a colon
|
|
normalized_dim_name = dim_name.replace(':', '_')
|
|
|
|
# Check if this dimension exists and matches the expected value
|
|
if normalized_dim_name not in context.dimensions or context.dimensions[
|
|
normalized_dim_name] != dim_value:
|
|
matches_all_dimensions = False
|
|
break
|
|
|
|
if not matches_all_dimensions:
|
|
continue # Skip if dimensions don't match
|
|
|
|
# Get the context and extract dimension information
|
|
context = self.contexts.get(context_id)
|
|
|
|
# Create a wrapper around the fact with dimension information
|
|
wrapped_fact = {
|
|
'fact': fact,
|
|
'dimension_info': [],
|
|
'dimension_key': ""
|
|
}
|
|
|
|
if context and hasattr(context, 'dimensions') and context.dimensions:
|
|
# Build rich dimension information with formatted labels
|
|
dimension_info = []
|
|
dim_keys = []
|
|
|
|
for dim_name, dim_value in sorted(context.dimensions.items()):
|
|
dim_value = dim_value.replace(":", "_")
|
|
# Initialize with technical names
|
|
dim_label = dim_name
|
|
mem_label = dim_value
|
|
|
|
# Get richer label information from element catalog
|
|
dim_element = None
|
|
mem_element = None
|
|
|
|
# Try to get human-readable dimension name
|
|
if dim_name in self.element_catalog:
|
|
dim_element = self.element_catalog[dim_name]
|
|
# Try different label roles in order of preference
|
|
for role in ['http://www.xbrl.org/2003/role/terseLabel',
|
|
'http://www.xbrl.org/2003/role/label',
|
|
'http://www.xbrl.org/2003/role/verboseLabel']:
|
|
if role in dim_element.labels:
|
|
dim_label = dim_element.labels[role]
|
|
break
|
|
|
|
# Try to get human-readable member name
|
|
if dim_value in self.element_catalog:
|
|
mem_element = self.element_catalog[dim_value]
|
|
# Try different label roles in order of preference
|
|
for role in ['http://www.xbrl.org/2003/role/terseLabel',
|
|
'http://www.xbrl.org/2003/role/label',
|
|
'http://www.xbrl.org/2003/role/verboseLabel']:
|
|
if role in mem_element.labels:
|
|
mem_label = mem_element.labels[role]
|
|
break
|
|
|
|
# Clean up labels (remove [Axis], [Member], etc.)
|
|
dim_label = dim_label.replace('[Axis]', '').replace('[Domain]', '').strip()
|
|
mem_label = mem_label.replace('[Member]', '').strip()
|
|
|
|
# Format key for display
|
|
format_key = f"{dim_label}: {mem_label}"
|
|
dim_keys.append(format_key)
|
|
|
|
# Store rich dimension information
|
|
dimension_info.append({
|
|
'dimension': dim_name,
|
|
'member': dim_value,
|
|
'dimension_label': dim_label,
|
|
'member_label': mem_label,
|
|
'format_key': format_key,
|
|
'dimension_element': dim_element,
|
|
'member_element': mem_element
|
|
})
|
|
|
|
# Store dimension information in the wrapper
|
|
wrapped_fact['dimension_info'] = dimension_info
|
|
wrapped_fact['dimension_key'] = ", ".join(sorted(dim_keys))
|
|
|
|
# If we get here, all filters passed
|
|
relevant_facts[context_id] = wrapped_fact
|
|
|
|
return relevant_facts
|
|
|
|
def get_period_views(self, statement_type: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get available period views for a statement type.
|
|
Args:
|
|
statement_type: Type of statement to get period views for
|
|
|
|
Returns:
|
|
List of period view options with name, description, and period keys
|
|
"""
|
|
return get_period_views(self, statement_type)
|
|
|
|
def get_statements_by_category(self, category: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get all statements matching a specific category.
|
|
Args:
|
|
category: Category of statements to find ('statement', 'note', 'disclosure', 'document', or 'other')
|
|
Returns:
|
|
List of statement metadata matching the category
|
|
"""
|
|
# Ensure indices are built
|
|
if not self._all_statements_cached:
|
|
self.get_all_statements()
|
|
|
|
result = []
|
|
|
|
# Find all statements with matching category
|
|
for stmt in self._all_statements_cached:
|
|
if stmt.get('category') == category:
|
|
result.append(stmt)
|
|
|
|
return result
|
|
|
|
def find_statement(self, statement_type: str, is_parenthetical: bool = False) -> Tuple[
|
|
List[Dict[str, Any]], Optional[str], str]:
|
|
"""
|
|
Find a statement by type, role, or name.
|
|
|
|
Args:
|
|
statement_type: Type of statement (e.g., "BalanceSheet") or role URI or statement name
|
|
is_parenthetical: Whether to look for a parenthetical statement
|
|
Returns:
|
|
Tuple of:
|
|
- List of matching statements
|
|
- Found role URI (or None if not found)
|
|
- Actual statement type (may be different from input if matched by role/name)
|
|
"""
|
|
# Initialize statement resolver if not already done
|
|
if self._statement_resolver is None:
|
|
self._statement_resolver = StatementResolver(self)
|
|
|
|
# Use the enhanced statement resolver
|
|
matching_statements, found_role, actual_statement_type, confidence = self._statement_resolver.find_statement(
|
|
statement_type, is_parenthetical
|
|
)
|
|
|
|
# For backward compatibility, ensure indices are built
|
|
if not self._all_statements_cached:
|
|
self.get_all_statements()
|
|
|
|
# If we couldn't find anything with the resolver, fall back to the old implementation
|
|
if not matching_statements:
|
|
# Original implementation (fallback)
|
|
matching_statements = []
|
|
found_role = None
|
|
actual_statement_type = statement_type
|
|
|
|
# Try to find the statement by standard name first
|
|
if statement_type in self._statement_by_standard_name:
|
|
matching_statements = self._statement_by_standard_name[statement_type]
|
|
if matching_statements:
|
|
found_role = matching_statements[0]['role']
|
|
|
|
# If not found by standard name, try by role URI
|
|
if not matching_statements and statement_type.startswith(
|
|
'http') and statement_type in self._statement_by_role_uri:
|
|
matching_statements = [self._statement_by_role_uri[statement_type]]
|
|
found_role = statement_type
|
|
|
|
# If not found, try by role name (case-insensitive)
|
|
if not matching_statements:
|
|
role_or_type_lower = statement_type.lower()
|
|
if role_or_type_lower in self._statement_by_role_name:
|
|
matching_statements = self._statement_by_role_name[role_or_type_lower]
|
|
if matching_statements:
|
|
found_role = matching_statements[0]['role']
|
|
|
|
# If still not found, try by definition
|
|
if not matching_statements:
|
|
def_key = statement_type.lower().replace(' ', '')
|
|
if def_key in self._statement_indices:
|
|
matching_statements = self._statement_indices[def_key]
|
|
if matching_statements:
|
|
found_role = matching_statements[0]['role']
|
|
|
|
# If still not found, try partial matching on role name
|
|
if not matching_statements:
|
|
for role_name, statements in self._statement_by_role_name.items():
|
|
if statement_type.lower() in role_name:
|
|
matching_statements = statements
|
|
found_role = statements[0]['role']
|
|
break
|
|
|
|
# Update actual statement type if we found a match
|
|
if matching_statements and matching_statements[0]['type']:
|
|
actual_statement_type = matching_statements[0]['type']
|
|
|
|
return matching_statements, found_role, actual_statement_type
|
|
|
|
def render_statement(self, statement_type: str = "BalanceSheet",
|
|
period_filter: Optional[str] = None,
|
|
period_view: Optional[str] = None,
|
|
standard: bool = True,
|
|
show_date_range: bool = False,
|
|
parenthetical: bool = False,
|
|
include_dimensions: bool = True) -> Optional[RenderedStatement]:
|
|
"""
|
|
Render a statement in a rich table format similar to how it would appear in an actual filing.
|
|
Args:
|
|
statement_type: Type of statement to render (e.g., "BalanceSheet", "IncomeStatement")
|
|
or a specific statement role/name (e.g., "CONSOLIDATEDBALANCESHEETS")
|
|
period_filter: Optional period key to filter by specific reporting period
|
|
period_view: Optional name of a predefined period view (e.g., "Quarterly: Current vs Previous")
|
|
standard: Whether to use standardized concept labels (default: True)
|
|
show_date_range: Whether to show full date ranges for duration periods (default: False)
|
|
parenthetical: Whether to look for a parenthetical statement (default: False)
|
|
include_dimensions: Whether to include dimensional segment data (default: True)
|
|
Returns:
|
|
RichTable: A formatted table representation of the statement
|
|
"""
|
|
# Find the statement using the unified statement finder with parenthetical support
|
|
matching_statements, found_role, actual_statement_type = self.find_statement(statement_type, parenthetical)
|
|
|
|
# Get statement definition from matching statements
|
|
role_definition = ""
|
|
if matching_statements:
|
|
role_definition = matching_statements[0]['definition']
|
|
|
|
# Determine if this statement should display dimensions
|
|
should_display_dimensions = include_dimensions and self._is_dimension_display_statement(actual_statement_type,
|
|
role_definition)
|
|
|
|
# Get the statement data with dimension display flag
|
|
statement_data = self.get_statement(statement_type, period_filter, should_display_dimensions)
|
|
if not statement_data:
|
|
return None
|
|
|
|
# Get the statement title
|
|
statement_info = statement_to_concepts.get(actual_statement_type)
|
|
if statement_info:
|
|
statement_title = statement_info.title
|
|
else:
|
|
# Try to get a nice title from the role definition
|
|
if role_definition:
|
|
statement_title = role_definition.split(' - ')[-1].strip()
|
|
else:
|
|
statement_title = statement_type
|
|
|
|
# Add "Parenthetical" to the title if appropriate
|
|
if parenthetical:
|
|
statement_title = f"{statement_title} (Parenthetical)"
|
|
|
|
# Get periods to display using unified period selection
|
|
periods_to_display = select_periods(
|
|
self, actual_statement_type, max_periods=4
|
|
)
|
|
|
|
# Render the statement
|
|
return render_statement(
|
|
statement_data,
|
|
periods_to_display,
|
|
statement_title,
|
|
actual_statement_type,
|
|
self.entity_info,
|
|
standard,
|
|
show_date_range,
|
|
show_comparisons=True,
|
|
xbrl_instance=self
|
|
)
|
|
|
|
def to_pandas(self, statement_role: Optional[str] = None, standard: bool = True) -> Dict[str, pd.DataFrame]:
|
|
"""
|
|
Convert XBRL data to pandas DataFrames.
|
|
Args:
|
|
statement_role: Optional role URI to convert only a specific statement
|
|
standard: Whether to use standardized concept labels (default: True)
|
|
Returns:
|
|
Dictionary of DataFrames for different aspects of the XBRL data
|
|
"""
|
|
|
|
dataframes = {}
|
|
|
|
# Convert contexts to DataFrame
|
|
context_data = []
|
|
for context_id, context in self.contexts.items():
|
|
ctx_dict = context.model_dump()
|
|
ctx_dict['context_id'] = context_id
|
|
|
|
# Extract entity info
|
|
if 'entity' in ctx_dict and ctx_dict['entity']:
|
|
ctx_dict['entity_identifier'] = ctx_dict['entity'].get('identifier')
|
|
ctx_dict['entity_scheme'] = ctx_dict['entity'].get('scheme')
|
|
|
|
# Extract period info
|
|
if 'period' in ctx_dict and ctx_dict['period']:
|
|
ctx_dict['period_type'] = ctx_dict['period'].get('type')
|
|
if ctx_dict['period_type'] == 'instant':
|
|
ctx_dict['period_instant'] = ctx_dict['period'].get('instant')
|
|
elif ctx_dict['period_type'] == 'duration':
|
|
ctx_dict['period_start'] = ctx_dict['period'].get('startDate')
|
|
ctx_dict['period_end'] = ctx_dict['period'].get('endDate')
|
|
|
|
# Extract dimensions
|
|
if 'dimensions' in ctx_dict and ctx_dict['dimensions']:
|
|
for dim_name, dim_value in ctx_dict['dimensions'].items():
|
|
dim_key = f"dim_{dim_name.replace(':', '_')}"
|
|
ctx_dict[dim_key] = dim_value
|
|
|
|
context_data.append(ctx_dict)
|
|
|
|
if context_data:
|
|
dataframes['contexts'] = pd.DataFrame(context_data)
|
|
|
|
# Convert facts to DataFrame
|
|
fact_data = []
|
|
for fact_key, fact in self._facts.items():
|
|
fact_dict = fact.model_dump()
|
|
fact_dict['fact_key'] = fact_key
|
|
|
|
# Try to get additional information
|
|
if fact.context_ref in self.contexts:
|
|
context = self.contexts[fact.context_ref]
|
|
|
|
# Add period information
|
|
if 'period' in context.model_dump() and context.period:
|
|
fact_dict['period_type'] = context.period.get('type')
|
|
if fact_dict['period_type'] == 'instant':
|
|
fact_dict['period_instant'] = context.period.get('instant')
|
|
elif fact_dict['period_type'] == 'duration':
|
|
fact_dict['period_start'] = context.period.get('startDate')
|
|
fact_dict['period_end'] = context.period.get('endDate')
|
|
|
|
# Add entity information
|
|
if 'entity' in context.model_dump() and context.entity:
|
|
fact_dict['entity_identifier'] = context.entity.get('identifier')
|
|
|
|
# Add dimensions
|
|
if 'dimensions' in context.model_dump() and context.dimensions:
|
|
for dim_name, dim_value in context.dimensions.items():
|
|
dim_key = f"dim_{dim_name.replace(':', '_')}"
|
|
fact_dict[dim_key] = dim_value
|
|
|
|
# Try to get element information
|
|
element_id = fact.element_id
|
|
if element_id in self.element_catalog:
|
|
element = self.element_catalog[element_id]
|
|
fact_dict['element_name'] = element.name
|
|
fact_dict['element_type'] = element.data_type
|
|
fact_dict['element_period_type'] = element.period_type
|
|
fact_dict['element_balance'] = element.balance
|
|
|
|
# Add label
|
|
label = None
|
|
if element.labels:
|
|
# Try standard label first
|
|
label = element.labels.get(STANDARD_LABEL)
|
|
if not label:
|
|
# Take first available label
|
|
label = next(iter(element.labels.values()), None)
|
|
fact_dict['element_label'] = label
|
|
|
|
fact_data.append(fact_dict)
|
|
|
|
if fact_data:
|
|
dataframes['facts'] = pd.DataFrame(fact_data)
|
|
|
|
# Convert entity info to DataFrame
|
|
if self.entity_info:
|
|
dataframes['entity_info'] = pd.DataFrame([self.entity_info])
|
|
|
|
# Convert specific statement if requested
|
|
if statement_role:
|
|
# Try direct role URI
|
|
statement_data = self.get_statement(statement_role)
|
|
|
|
# If not found, try by statement type
|
|
if not statement_data and not statement_role.startswith('http'):
|
|
# Find the role for this statement type
|
|
all_statements = self.get_all_statements()
|
|
matching_statements = [stmt for stmt in all_statements if stmt['type'] == statement_role]
|
|
|
|
if matching_statements:
|
|
role = matching_statements[0]['role']
|
|
statement_data = self.get_statement(role)
|
|
|
|
# Convert statement data to DataFrame if found
|
|
if statement_data:
|
|
# Apply standardization if requested
|
|
if standard:
|
|
# Get statement type for context
|
|
stmt_type = statement_role
|
|
if not stmt_type.startswith('http'):
|
|
stmt_type = statement_role
|
|
else:
|
|
# Try to determine statement type from role
|
|
all_statements = self.get_all_statements()
|
|
for stmt in all_statements:
|
|
if stmt['role'] == statement_role:
|
|
stmt_type = stmt['type']
|
|
break
|
|
|
|
# Add statement type to each item
|
|
for item in statement_data:
|
|
item['statement_type'] = stmt_type
|
|
|
|
# Apply standardization
|
|
from edgar.xbrl.standardization import ConceptMapper, initialize_default_mappings, standardize_statement
|
|
mapper = ConceptMapper(initialize_default_mappings(read_only=True))
|
|
statement_data = standardize_statement(statement_data, mapper)
|
|
|
|
# Create rows for the DataFrame
|
|
rows = []
|
|
|
|
# Add columns for all found periods
|
|
all_periods = set()
|
|
for item in statement_data:
|
|
for period in item.get('values', {}).keys():
|
|
all_periods.add(period)
|
|
|
|
# Sort periods (typically instant or duration_start_end format)
|
|
sorted_periods = sorted(all_periods)
|
|
|
|
for item in statement_data:
|
|
row = {
|
|
'concept': item['concept'],
|
|
'label': item['label'],
|
|
'level': item['level'],
|
|
'is_abstract': item['is_abstract'],
|
|
'has_values': item.get('has_values', False),
|
|
}
|
|
|
|
# Add original label if standardized
|
|
if 'original_label' in item:
|
|
row['original_label'] = item['original_label']
|
|
|
|
# Add period values
|
|
for period in sorted_periods:
|
|
value = item.get('values', {}).get(period)
|
|
row[period] = value
|
|
|
|
rows.append(row)
|
|
|
|
if rows:
|
|
dataframes['statement'] = pd.DataFrame(rows)
|
|
# Rename columns to remove duration/instant prefixes
|
|
dataframes['statement'].columns = [
|
|
col.replace('duration_', '').replace('instant_', '')
|
|
for col in dataframes['statement'].columns
|
|
]
|
|
|
|
return dataframes
|
|
|
|
def get_footnotes_for_fact(self, fact_id: str) -> List['Footnote']:
|
|
"""Get all footnotes associated with a specific fact ID.
|
|
Args:
|
|
fact_id: The ID of the fact to get footnotes for
|
|
Returns:
|
|
List of Footnote objects associated with the fact
|
|
"""
|
|
footnotes = []
|
|
|
|
# First check if any fact has this ID and get its footnote references
|
|
for fact in self.parser.facts.values():
|
|
if fact.fact_id == fact_id:
|
|
# Get the footnote objects for each footnote ID
|
|
for footnote_id in fact.footnotes:
|
|
if footnote_id in self.parser.footnotes:
|
|
footnotes.append(self.parser.footnotes[footnote_id])
|
|
break
|
|
|
|
return footnotes
|
|
|
|
def get_facts_with_footnotes(self) -> Dict[str, 'Fact']:
|
|
"""Get all facts that have associated footnotes.
|
|
Returns:
|
|
Dictionary of fact_key -> Fact for all facts with footnotes
|
|
"""
|
|
facts_with_footnotes = {}
|
|
for key, fact in self.parser.facts.items():
|
|
if fact.footnotes:
|
|
facts_with_footnotes[key] = fact
|
|
return facts_with_footnotes
|
|
|
|
def get_currency_for_fact(self, element_name: str, period_key: str) -> Optional[str]:
|
|
"""
|
|
Get currency for a specific fact/period on-demand with caching.
|
|
|
|
Args:
|
|
element_name: The XBRL element name
|
|
period_key: The period key to look up
|
|
|
|
Returns:
|
|
Currency measure string (e.g., 'iso4217:EUR') or None if not found
|
|
"""
|
|
# Create cache key
|
|
cache_key = f"{element_name}_{period_key}"
|
|
|
|
# Check cache first
|
|
if not hasattr(self, '_currency_cache'):
|
|
self._currency_cache = {}
|
|
|
|
if cache_key in self._currency_cache:
|
|
return self._currency_cache[cache_key]
|
|
|
|
# Find facts for this element and period
|
|
facts = self._find_facts_for_element(element_name, period_key)
|
|
|
|
# Look for the first fact with currency information
|
|
currency_measure = None
|
|
for _, wrapped_fact in facts.items():
|
|
fact = wrapped_fact['fact']
|
|
if hasattr(fact, 'unit_ref') and fact.unit_ref and fact.unit_ref in self.units:
|
|
unit_info = self.units[fact.unit_ref]
|
|
if 'measure' in unit_info:
|
|
currency_measure = unit_info['measure']
|
|
break
|
|
|
|
# Cache the result (including None values to avoid repeated lookups)
|
|
self._currency_cache[cache_key] = currency_measure
|
|
return currency_measure
|
|
|
|
def __rich__(self):
|
|
"""Rich representation for pretty printing in console."""
|
|
return generate_rich_representation(self)
|
|
|
|
def __repr__(self):
|
|
return repr_rich(self.__rich__())
|
|
|
|
|
|
def text(self, max_tokens: int = 2000) -> str:
|
|
"""
|
|
Get AI-optimized text representation of XBRL document.
|
|
|
|
Returns a compact Markdown-KV format optimized for LLM consumption,
|
|
including entity information, filing details, period coverage, available
|
|
statements, and common usage patterns.
|
|
|
|
This format uses 64.7% fewer tokens than the visual repr() format while
|
|
retaining all essential information.
|
|
|
|
Args:
|
|
max_tokens: Target token budget (currently not enforced, reserved for future use)
|
|
|
|
Returns:
|
|
Compact Markdown-KV text representation optimized for AI consumption
|
|
|
|
Example:
|
|
>>> xbrl = filing.xbrl()
|
|
>>> text = xbrl.text()
|
|
>>> print(text)
|
|
**Entity:** Apple Inc. (AAPL)
|
|
**CIK:** 0000320193
|
|
**Form:** 10-K
|
|
...
|
|
"""
|
|
lines = []
|
|
|
|
# Entity information
|
|
if self.entity_info:
|
|
entity_name = self.entity_info.get('entity_name', 'Unknown Entity')
|
|
ticker = self.entity_info.get('ticker', '')
|
|
cik = self.entity_info.get('identifier', '')
|
|
|
|
# Entity line with ticker if available
|
|
entity_line = f"**Entity:** {entity_name}"
|
|
if ticker:
|
|
entity_line += f" ({ticker})"
|
|
lines.append(entity_line)
|
|
|
|
if cik:
|
|
lines.append(f"**CIK:** {cik}")
|
|
|
|
# Filing details
|
|
doc_type = self.entity_info.get('document_type', '')
|
|
if doc_type:
|
|
lines.append(f"**Form:** {doc_type}")
|
|
|
|
fiscal_year = self.entity_info.get('fiscal_year', '')
|
|
fiscal_period = self.entity_info.get('fiscal_period', '')
|
|
period_end = self.entity_info.get('document_period_end_date', '')
|
|
|
|
if fiscal_period and fiscal_year:
|
|
period_display = f"Fiscal Year {fiscal_year}" if fiscal_period == 'FY' else f"{fiscal_period} {fiscal_year}"
|
|
if period_end:
|
|
period_display += f" (ended {period_end})"
|
|
lines.append(f"**Fiscal Period:** {period_display}")
|
|
|
|
# Data volume
|
|
lines.append(f"**Facts:** {len(self._facts):,}")
|
|
lines.append(f"**Contexts:** {len(self.contexts):,}")
|
|
|
|
# Period coverage
|
|
if self.reporting_periods:
|
|
lines.append("")
|
|
lines.append("**Available Data Coverage:**")
|
|
|
|
# Categorize periods
|
|
annual_periods = []
|
|
quarterly_periods = []
|
|
|
|
for period in self.reporting_periods[:10]:
|
|
label = period.get('label', '')
|
|
if not label:
|
|
continue
|
|
|
|
if 'Annual:' in label or 'FY' in label.upper():
|
|
# Extract fiscal year
|
|
import re
|
|
year_match = re.search(r'to .* (\d{4})', label)
|
|
if year_match:
|
|
annual_periods.append(f"FY {year_match.group(1)}")
|
|
else:
|
|
annual_periods.append(label)
|
|
elif 'Quarterly:' in label or any(q in label for q in ['Q1', 'Q2', 'Q3', 'Q4']):
|
|
clean_label = label.replace('Quarterly:', '').strip()
|
|
quarterly_periods.append(clean_label)
|
|
|
|
if annual_periods:
|
|
lines.append(f" Annual: {', '.join(annual_periods[:3])}")
|
|
if quarterly_periods:
|
|
lines.append(f" Quarterly: {', '.join(quarterly_periods[:2])}")
|
|
|
|
# Available statements
|
|
statements = self.get_all_statements()
|
|
if statements:
|
|
lines.append("")
|
|
lines.append("**Available Statements:**")
|
|
# Group by core vs other statements
|
|
core_statements = set()
|
|
other_statements = []
|
|
|
|
core_types = {'IncomeStatement', 'BalanceSheet', 'CashFlowStatement',
|
|
'StatementOfEquity', 'ComprehensiveIncome'}
|
|
|
|
for stmt in statements:
|
|
stmt_type = stmt.get('type', '')
|
|
if stmt_type in core_types:
|
|
core_statements.add(stmt_type)
|
|
elif stmt_type:
|
|
other_statements.append(stmt_type)
|
|
|
|
# Show core statements first (in consistent order)
|
|
if core_statements:
|
|
ordered_core = [s for s in ['IncomeStatement', 'ComprehensiveIncome', 'BalanceSheet',
|
|
'StatementOfEquity', 'CashFlowStatement'] if s in core_statements]
|
|
lines.append(f" Core: {', '.join(ordered_core)}")
|
|
if other_statements and len(other_statements) <= 5:
|
|
lines.append(f" Other: {', '.join(other_statements)}")
|
|
elif other_statements:
|
|
lines.append(f" Other: {len(other_statements)} additional statements")
|
|
|
|
# Common actions (compact version)
|
|
lines.append("")
|
|
lines.append("**Common Actions:**")
|
|
lines.append(" # List all available statements")
|
|
lines.append(" xbrl.statements")
|
|
lines.append("")
|
|
lines.append(" # View core financial statements")
|
|
lines.append(" stmt = xbrl.statements.income_statement()")
|
|
lines.append(" stmt = xbrl.statements.balance_sheet()")
|
|
lines.append(" stmt = xbrl.statements.cash_flow_statement()")
|
|
lines.append(" stmt = xbrl.statements.statement_of_equity()")
|
|
lines.append(" stmt = xbrl.statements.comprehensive_income()")
|
|
lines.append("")
|
|
lines.append(" # Get current period only (returns XBRL with filtered context)")
|
|
lines.append(" current = xbrl.current_period")
|
|
lines.append(" stmt = current.income_statement()")
|
|
lines.append("")
|
|
lines.append(" # Convert statement to DataFrame")
|
|
lines.append(" df = stmt.to_dataframe()")
|
|
lines.append("")
|
|
lines.append(" # Query specific facts")
|
|
lines.append(" revenue = xbrl.facts.query().by_concept('Revenue').to_dataframe()")
|
|
lines.append("")
|
|
lines.append("💡 Use xbrl.docs for comprehensive API guide")
|
|
|
|
return "\n".join(lines)
|
|
|
|
@property
|
|
def docs(self):
|
|
"""
|
|
Get comprehensive documentation for the XBRL class.
|
|
|
|
Returns a Docs object with detailed API documentation including usage patterns,
|
|
examples, and guidance for working with XBRL data. The documentation is searchable
|
|
using the .search() method.
|
|
|
|
Returns:
|
|
Docs: Documentation object with rich display and search capabilities
|
|
|
|
Example:
|
|
>>> xbrl.docs # Display full documentation
|
|
>>> xbrl.docs.search("extract revenue") # Search for specific topics
|
|
"""
|
|
from edgar.richtools import Docs
|
|
return Docs(self)
|