""" Statement Resolution for XBRL data. This module provides a robust system for identifying and matching XBRL financial statements, notes, and disclosures regardless of taxonomy variations and company-specific customizations. """ import re from dataclasses import dataclass, field from enum import Enum from typing import Any, Dict, List, Optional, Tuple from edgar.core import log from edgar.xbrl.exceptions import StatementNotFound from edgar.xbrl.statements import statement_to_concepts class StatementCategory(Enum): """Categories of XBRL presentation sections.""" FINANCIAL_STATEMENT = "statement" NOTE = "note" DISCLOSURE = "disclosure" DOCUMENT = "document" # For cover page, signatures, etc. OTHER = "other" @dataclass class ConceptPattern: """Pattern for matching statement concepts across different taxonomies.""" pattern: str weight: float = 1.0 @dataclass class StatementType: """Detailed information about a statement type for matching.""" name: str primary_concepts: List[str] category: StatementCategory = StatementCategory.FINANCIAL_STATEMENT # Default to financial statement alternative_concepts: List[str] = field(default_factory=list) concept_patterns: List[str] = field(default_factory=list) key_concepts: List[str] = field(default_factory=list) role_patterns: List[str] = field(default_factory=list) title: str = "" supports_parenthetical: bool = False weight_map: Dict[str, float] = field(default_factory=dict) def match_concept(self, concept_name: str) -> bool: """Check if a concept name matches this statement type's concepts.""" # Try exact primary concept match if concept_name in self.primary_concepts: return True # Try alternate concepts if concept_name in self.alternative_concepts: return True # Try matching against patterns for pattern in self.concept_patterns: if re.match(pattern, concept_name): return True return False def match_role(self, role_uri: str, role_name: str = "", role_def: str = "") -> bool: """Check if role information matches this statement type.""" name_lower = self.name.lower() # Check exact match in role parts if name_lower in role_uri.lower(): return True if role_name and name_lower in role_name.lower(): return True if role_def and name_lower in role_def.lower(): return True # Try pattern matching for pattern in self.role_patterns: if re.match(pattern, role_uri) or (role_name and re.match(pattern, role_name)): return True return False # Registry of statement types with matching information statement_registry = { "BalanceSheet": StatementType( name="BalanceSheet", category=StatementCategory.FINANCIAL_STATEMENT, primary_concepts=["us-gaap_StatementOfFinancialPositionAbstract"], alternative_concepts=[ "us-gaap_BalanceSheetAbstract", "ifrs-full_StatementOfFinancialPositionAbstract" # IFRS equivalent ], concept_patterns=[ r".*_StatementOfFinancialPositionAbstract$", r".*_BalanceSheetAbstract$", r".*_ConsolidatedBalanceSheetsAbstract$", r".*_CondensedConsolidatedBalanceSheetsUnauditedAbstract$" ], key_concepts=[ "us-gaap_Assets", "us-gaap_Liabilities", "us-gaap_StockholdersEquity", "ifrs-full_Assets", "ifrs-full_Liabilities", "ifrs-full_Equity" # IFRS equivalents ], role_patterns=[ r".*[Bb]alance[Ss]heet.*", r".*[Ss]tatement[Oo]f[Ff]inancial[Pp]osition.*", r".*StatementConsolidatedBalanceSheets.*" ], title="Consolidated Balance Sheets", supports_parenthetical=True, weight_map={"assets": 0.3, "liabilities": 0.3, "equity": 0.4} ), "IncomeStatement": StatementType( name="IncomeStatement", category=StatementCategory.FINANCIAL_STATEMENT, primary_concepts=["us-gaap_IncomeStatementAbstract"], alternative_concepts=[ "us-gaap_StatementOfIncomeAbstract", "ifrs-full_IncomeStatementAbstract" # IFRS equivalent ], concept_patterns=[ r".*_IncomeStatementAbstract$", r".*_StatementOfIncomeAbstract$", r".*_ConsolidatedStatementsOfIncomeAbstract$", r".*_CondensedConsolidatedStatementsOfIncomeUnauditedAbstract$" ], key_concepts=[ "us-gaap_Revenues", "us-gaap_NetIncomeLoss", "ifrs-full_Revenue", "ifrs-full_ProfitLoss" # IFRS equivalents ], role_patterns=[ r".*[Ii]ncome[Ss]tatement.*", r".*[Ss]tatement[Oo]f[Ii]ncome.*", r".*[Oo]perations.*", r".*StatementConsolidatedStatementsOfIncome.*" ], title="Consolidated Statement of Income", supports_parenthetical=True, weight_map={"revenues": 0.4, "netIncomeLoss": 0.6} ), "CashFlowStatement": StatementType( name="CashFlowStatement", category=StatementCategory.FINANCIAL_STATEMENT, primary_concepts=["us-gaap_StatementOfCashFlowsAbstract"], alternative_concepts=["ifrs-full_StatementOfCashFlowsAbstract"], # IFRS equivalent concept_patterns=[ r".*_StatementOfCashFlowsAbstract$", r".*_CashFlowsAbstract$", r".*_ConsolidatedStatementsOfCashFlowsAbstract$", r".*_CondensedConsolidatedStatementsOfCashFlowsUnauditedAbstract$" ], key_concepts=[ "us-gaap_NetCashProvidedByUsedInOperatingActivities", "us-gaap_CashAndCashEquivalentsPeriodIncreaseDecrease", "ifrs-full_CashFlowsFromUsedInOperatingActivities", # IFRS equivalents "ifrs-full_IncreaseDecreaseInCashAndCashEquivalents" ], role_patterns=[ r".*[Cc]ash[Ff]low.*", r".*[Ss]tatement[Oo]f[Cc]ash[Ff]lows.*", r".*StatementConsolidatedStatementsOfCashFlows.*" ], title="Consolidated Statement of Cash Flows", supports_parenthetical=False ), "StatementOfEquity": StatementType( name="StatementOfEquity", category=StatementCategory.FINANCIAL_STATEMENT, primary_concepts=["us-gaap_StatementOfStockholdersEquityAbstract"], alternative_concepts=[ "us-gaap_StatementOfShareholdersEquityAbstract", "us-gaap_StatementOfPartnersCapitalAbstract" ], concept_patterns=[ r".*_StatementOfStockholdersEquityAbstract$", r".*_StatementOfShareholdersEquityAbstract$", r".*_StatementOfChangesInEquityAbstract$", r".*_ConsolidatedStatementsOfShareholdersEquityAbstract$" ], key_concepts=["us-gaap_StockholdersEquity", "us-gaap_CommonStock", "us-gaap_RetainedEarnings"], role_patterns=[ r".*[Ee]quity.*", r".*[Ss]tockholders.*", r".*[Ss]hareholders.*", r".*[Cc]hanges[Ii]n[Ee]quity.*", r".*StatementConsolidatedStatementsOfStockholdersEquity.*" ], title="Consolidated Statement of Equity", supports_parenthetical=False ), "ComprehensiveIncome": StatementType( name="ComprehensiveIncome", category=StatementCategory.FINANCIAL_STATEMENT, primary_concepts=["us-gaap_StatementOfIncomeAndComprehensiveIncomeAbstract"], alternative_concepts=["us-gaap_StatementOfComprehensiveIncomeAbstract"], concept_patterns=[ r".*_ComprehensiveIncomeAbstract$", r".*_StatementOfComprehensiveIncomeAbstract$", r".*_ConsolidatedStatementsOfComprehensiveIncomeAbstract$" ], key_concepts=["us-gaap_ComprehensiveIncomeNetOfTax"], role_patterns=[ r".*[Cc]omprehensive[Ii]ncome.*", r".*[Oo]ther[Cc]omprehensive.*", r".*StatementConsolidatedStatementsOfComprehensiveIncome.*" ], title="Consolidated Statement of Comprehensive Income", supports_parenthetical=True ), "Notes": StatementType( name="Notes", category=StatementCategory.NOTE, primary_concepts=["us-gaap_NotesToFinancialStatementsAbstract"], alternative_concepts=[], concept_patterns=[ r".*_NotesToFinancialStatementsAbstract$", r".*_NotesAbstract$" ], key_concepts=[], role_patterns=[ r".*[Nn]otes[Tt]o[Ff]inancial[Ss]tatements.*", r".*[Nn]ote\s+\d+.*", r".*[Nn]otes.*" ], title="Notes to Financial Statements", supports_parenthetical=False ), "AccountingPolicies": StatementType( name="AccountingPolicies", category=StatementCategory.NOTE, primary_concepts=["us-gaap_AccountingPoliciesAbstract"], alternative_concepts=[], concept_patterns=[ r".*_AccountingPoliciesAbstract$", r".*_SignificantAccountingPoliciesAbstract$" ], key_concepts=["us-gaap_SignificantAccountingPoliciesTextBlock"], role_patterns=[ r".*[Aa]ccounting[Pp]olicies.*", r".*[Ss]ignificant[Aa]ccounting[Pp]olicies.*" ], title="Significant Accounting Policies", supports_parenthetical=False ), "Disclosures": StatementType( name="Disclosures", category=StatementCategory.DISCLOSURE, primary_concepts=["us-gaap_DisclosuresAbstract"], alternative_concepts=[], concept_patterns=[ r".*_DisclosuresAbstract$", r".*_DisclosureAbstract$" ], key_concepts=[], role_patterns=[ r".*[Dd]isclosure.*" ], title="Disclosures", supports_parenthetical=False ), "SegmentDisclosure": StatementType( name="SegmentDisclosure", category=StatementCategory.DISCLOSURE, primary_concepts=["us-gaap_SegmentDisclosureAbstract"], alternative_concepts=[], concept_patterns=[ r".*_SegmentDisclosureAbstract$", r".*_SegmentReportingDisclosureAbstract$" ], key_concepts=["us-gaap_SegmentReportingDisclosureTextBlock"], role_patterns=[ r".*[Ss]egment.*", r".*[Ss]egment[Rr]eporting.*", r".*[Ss]egment[Ii]nformation.*" ], title="Segment Information", supports_parenthetical=False ), "CoverPage": StatementType( name="CoverPage", category=StatementCategory.DOCUMENT, primary_concepts=["dei_CoverAbstract"], concept_patterns=[r".*_CoverAbstract$"], key_concepts=["dei_EntityRegistrantName", "dei_DocumentType"], role_patterns=[r".*[Cc]over.*"], title="Cover Page", supports_parenthetical=False ) } class StatementResolver: """ Resolves statement identifiers to actual XBRL statement roles. This class provides a multi-layered approach to statement matching, handling taxonomy variations and company-specific customizations. """ def __init__(self, xbrl): """ Initialize with an XBRL object. Args: xbrl: XBRL object containing parsed data """ self.xbrl = xbrl self._cache = {} # Build indices for faster lookups self._statement_by_role_uri = {} self._statement_by_role_name = {} self._statement_by_primary_concept = {} self._statement_by_type = {} self._statement_by_role_def = {} # Map legacy statement types to new registry self._legacy_to_registry = {} for legacy_type, info in statement_to_concepts.items(): if legacy_type in statement_registry: self._legacy_to_registry[legacy_type] = legacy_type continue # Try to find a match in the registry for reg_type, reg_info in statement_registry.items(): if info.concept in reg_info.primary_concepts or info.concept in reg_info.alternative_concepts: self._legacy_to_registry[legacy_type] = reg_type break # Initialize indices when instantiated self._initialize_indices() def _initialize_indices(self): """Build lookup indices for fast statement retrieval.""" # Get all statements statements = self.xbrl.get_all_statements() # Reset indices self._statement_by_role_uri = {} self._statement_by_role_name = {} self._statement_by_primary_concept = {} self._statement_by_type = {} self._statement_by_role_def = {} # Build indices for stmt in statements: role = stmt.get('role', '') role_name = stmt.get('role_name', '').lower() if stmt.get('role_name') else '' primary_concept = stmt.get('primary_concept', '') stmt_type = stmt.get('type', '') role_def = stmt.get('definition', '').lower() if stmt.get('definition') else '' # By role URI self._statement_by_role_uri[role] = stmt # By role name if role_name: if role_name not in self._statement_by_role_name: self._statement_by_role_name[role_name] = [] self._statement_by_role_name[role_name].append(stmt) # By primary concept if primary_concept: if primary_concept not in self._statement_by_primary_concept: self._statement_by_primary_concept[primary_concept] = [] self._statement_by_primary_concept[primary_concept].append(stmt) # By statement type if stmt_type: if stmt_type not in self._statement_by_type: self._statement_by_type[stmt_type] = [] self._statement_by_type[stmt_type].append(stmt) # By role definition (without spaces, lowercase) if role_def: def_key = role_def.replace(' ', '') if def_key not in self._statement_by_role_def: self._statement_by_role_def[def_key] = [] self._statement_by_role_def[def_key].append(stmt) def _match_by_primary_concept(self, statement_type: str, is_parenthetical: bool = False) -> Tuple[List[Dict[str, Any]], Optional[str], float]: """ Match statements using primary concept names. Args: statement_type: Statement type to match is_parenthetical: Whether to look for a parenthetical statement Returns: Tuple of (matching statements, found role, confidence score) """ # Convert legacy types to registry types if needed if statement_type in self._legacy_to_registry: registry_type = self._legacy_to_registry[statement_type] else: registry_type = statement_type # Check if this is a known statement type if registry_type not in statement_registry: return [], None, 0.0 # Get registry information registry_entry = statement_registry[registry_type] # Try to match by primary concepts matched_statements = [] for concept in registry_entry.primary_concepts + registry_entry.alternative_concepts: if concept in self._statement_by_primary_concept: for stmt in self._statement_by_primary_concept[concept]: # Handle parenthetical check if registry_entry.supports_parenthetical: role_def = stmt.get('definition', '').lower() is_role_parenthetical = 'parenthetical' in role_def # Skip if parenthetical status doesn't match if is_parenthetical != is_role_parenthetical: continue matched_statements.append(stmt) # If we found matching statements, return with high confidence if matched_statements: return matched_statements, matched_statements[0]['role'], 0.9 return [], None, 0.0 def _match_by_concept_pattern(self, statement_type: str, is_parenthetical: bool = False) -> Tuple[List[Dict[str, Any]], Optional[str], float]: """ Match statements using regex patterns on concept names to handle custom company namespaces. Args: statement_type: Statement type to match is_parenthetical: Whether to look for a parenthetical statement Returns: Tuple of (matching statements, found role, confidence score) """ # Convert legacy types to registry types if needed if statement_type in self._legacy_to_registry: registry_type = self._legacy_to_registry[statement_type] else: registry_type = statement_type # Check if this is a known statement type if registry_type not in statement_registry: return [], None, 0.0 # Get registry information registry_entry = statement_registry[registry_type] concept_patterns = registry_entry.concept_patterns if not concept_patterns: return [], None, 0.0 # Get all statements to check against patterns all_statements = self.xbrl.get_all_statements() # Check each statement's primary concept against our patterns matched_statements = [] for stmt in all_statements: primary_concept = stmt.get('primary_concept', '') # Skip if no primary concept if not primary_concept: continue # Check if this concept matches any of our patterns for pattern in concept_patterns: if re.match(pattern, primary_concept): # For parenthetical statements, check the role definition if registry_entry.supports_parenthetical: role_def = stmt.get('definition', '').lower() is_role_parenthetical = 'parenthetical' in role_def # Skip if parenthetical status doesn't match if is_parenthetical != is_role_parenthetical: continue matched_statements.append(stmt) break # Found a match, no need to check other patterns # If we found matching statements, return with high confidence if matched_statements: return matched_statements, matched_statements[0]['role'], 0.85 return [], None, 0.0 def _match_by_role_pattern(self, statement_type: str, is_parenthetical: bool = False) -> Tuple[List[Dict[str, Any]], Optional[str], float]: """ Match statements using role URI or role name patterns. Args: statement_type: Statement type to match is_parenthetical: Whether to look for a parenthetical statement Returns: Tuple of (matching statements, found role, confidence score) """ # Convert legacy types to registry types if needed if statement_type in self._legacy_to_registry: registry_type = self._legacy_to_registry[statement_type] else: registry_type = statement_type # Check if this is a known statement type if registry_type not in statement_registry: return [], None, 0.0 # Get registry information registry_entry = statement_registry[registry_type] role_patterns = registry_entry.role_patterns if not role_patterns: return [], None, 0.0 # Get all statements all_statements = self.xbrl.get_all_statements() # Check each statement's role and role name against our patterns matched_statements = [] for stmt in all_statements: role = stmt.get('role', '') role_name = stmt.get('role_name', '') # Check if role matches any pattern for pattern in role_patterns: if (re.search(pattern, role, re.IGNORECASE) or (role_name and re.search(pattern, role_name, re.IGNORECASE))): # For parenthetical statements, check the role definition if registry_entry.supports_parenthetical: role_def = stmt.get('definition', '').lower() is_role_parenthetical = 'parenthetical' in role_def # Skip if parenthetical status doesn't match if is_parenthetical != is_role_parenthetical: continue matched_statements.append(stmt) break # Found a match, no need to check other patterns # If we found matching statements, return with good confidence if matched_statements: return matched_statements, matched_statements[0]['role'], 0.75 return [], None, 0.0 def _match_by_content(self, statement_type: str) -> Tuple[List[Dict[str, Any]], Optional[str], float]: """ Match statements by analyzing their content against key concepts. Args: statement_type: Statement type to match Returns: Tuple of (matching statements, found role, confidence score) """ # Convert legacy types to registry types if needed if statement_type in self._legacy_to_registry: registry_type = self._legacy_to_registry[statement_type] else: registry_type = statement_type # Check if this is a known statement type if registry_type not in statement_registry: return [], None, 0.0 # Get registry information registry_entry = statement_registry[registry_type] key_concepts = registry_entry.key_concepts if not key_concepts: return [], None, 0.0 # Get all statements all_statements = self.xbrl.get_all_statements() # Score each statement based on presence of key concepts statement_scores = [] for stmt in all_statements: role = stmt.get('role', '') if role not in self.xbrl.presentation_trees: continue # Get concept nodes for this role tree = self.xbrl.presentation_trees[role] all_nodes = set(tree.all_nodes.keys()) # Count matching key concepts matches = 0 total_weight = 0.0 for concept in key_concepts: # Normalize concept name normalized = concept.replace(':', '_') if concept in all_nodes or normalized in all_nodes: matches += 1 # Add weighting if available concept_key = concept.split('_')[-1].lower() weight = registry_entry.weight_map.get(concept_key, 1.0) total_weight += weight # Calculate confidence score (weighted by presence of key concepts) if key_concepts: # Base confidence on percentage of key concepts found confidence = matches / len(key_concepts) # Apply weighting if available if total_weight > 0: confidence = min(total_weight / sum(registry_entry.weight_map.values()), 1.0) else: confidence = 0.0 if confidence > 0: statement_scores.append((stmt, confidence)) # Sort by confidence score statement_scores.sort(key=lambda x: x[1], reverse=True) # Return best match if above threshold if statement_scores and statement_scores[0][1] >= 0.4: best_match, confidence = statement_scores[0] return [best_match], best_match['role'], min(confidence + 0.2, 0.85) # Boost confidence but cap at 0.85 return [], None, 0.0 def _match_by_standard_name(self, statement_type: str) -> Tuple[List[Dict[str, Any]], Optional[str], float]: """ Match statements by standard statement type name. Args: statement_type: Statement type to match Returns: Tuple of (matching statements, found role, confidence score) """ # Check if we have statements of this type if statement_type in self._statement_by_type: statements = self._statement_by_type[statement_type] if statements: return statements, statements[0]['role'], 0.95 return [], None, 0.0 def _match_by_role_definition(self, statement_type: str) -> Tuple[List[Dict[str, Any]], Optional[str], float]: """ Match statements by role definition text. Args: statement_type: Statement type or definition text to match Returns: Tuple of (matching statements, found role, confidence score) """ # Clean statement type for matching clean_type = statement_type.lower().replace(' ', '') # Try exact match if clean_type in self._statement_by_role_def: statements = self._statement_by_role_def[clean_type] if statements: return statements, statements[0]['role'], 0.85 # Try partial match for def_key, statements in self._statement_by_role_def.items(): if clean_type in def_key: return statements, statements[0]['role'], 0.65 if def_key in clean_type: return statements, statements[0]['role'], 0.55 return [], None, 0.0 def _get_best_guess(self, statement_type: str) -> Tuple[List[Dict[str, Any]], Optional[str], float]: """ Make a best guess when all other methods fail. Args: statement_type: Statement type to guess Returns: Tuple of (matching statements, found role, confidence score) """ # Try partial matching on role names clean_type = statement_type.lower() for role_name, statements in self._statement_by_role_name.items(): if clean_type in role_name or role_name in clean_type: return statements, statements[0]['role'], 0.4 # If we have statements of any type, return the first one with very low confidence all_statements = self.xbrl.get_all_statements() if all_statements: # Try to find a primary financial statement for stmt_type in ['BalanceSheet', 'IncomeStatement', 'CashFlowStatement']: if stmt_type in self._statement_by_type: statements = self._statement_by_type[stmt_type] if statements: return statements, statements[0]['role'], 0.2 # Last resort: return first statement return [all_statements[0]], all_statements[0]['role'], 0.1 return [], None, 0.0 def find_statement(self, statement_type: str, is_parenthetical: bool = False, category_filter: Optional[StatementCategory] = None) -> Tuple[List[Dict[str, Any]], Optional[str], str, float]: """ Find a statement by type, with multi-layered fallback approach. Args: statement_type: Statement type or identifier is_parenthetical: Whether to look for parenthetical version category_filter: Optional filter to only match statements of a specific category Returns: Tuple of (matching_statements, found_role, canonical_statement_type, confidence_score) Note: For standard statement types like "BalanceSheet", "IncomeStatement", etc., the canonical_statement_type will be the input statement_type, allowing downstream code to still recognize and apply type-specific logic. """ # Check cache first category_key = str(category_filter.value) if category_filter else "None" cache_key = f"{statement_type}_{is_parenthetical}_{category_key}" if cache_key in self._cache: return self._cache[cache_key] # If this is a role URI we already know, return immediately if statement_type in self._statement_by_role_uri: stmt = self._statement_by_role_uri[statement_type] # Apply category filter if specified if category_filter: # Get category from statement or determine based on type stmt_category = None if 'category' in stmt and stmt['category']: stmt_category = stmt['category'] elif stmt['type'] in statement_registry: stmt_category = statement_registry[stmt['type']].category.value # Skip if category doesn't match if stmt_category != category_filter.value: result = ([], None, statement_type, 0.0) self._cache[cache_key] = result return result result = ([stmt], statement_type, stmt.get('type', statement_type), 1.0) self._cache[cache_key] = result return result # Check if this is a canonical statement type from the registry is_canonical_type = statement_type in statement_registry # Try standard name matching first (exact type match) match = self._match_by_standard_name(statement_type) if match[0] and match[2] > 0.9: # Very high confidence statements, role, conf = match # For canonical types, preserve the original statement_type canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type) result = (statements, role, canonical_type, conf) self._cache[cache_key] = result return result # Try primary concept matching match = self._match_by_primary_concept(statement_type, is_parenthetical) if match[0] and match[2] > 0.8: # High confidence statements, role, conf = match # For canonical types, preserve the original statement_type canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type) result = (statements, role, canonical_type, conf) self._cache[cache_key] = result return result # Try custom namespace matching match = self._match_by_concept_pattern(statement_type, is_parenthetical) if match[0] and match[2] > 0.8: # High confidence statements, role, conf = match # For canonical types, preserve the original statement_type canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type) result = (statements, role, canonical_type, conf) self._cache[cache_key] = result return result # Try role pattern matching match = self._match_by_role_pattern(statement_type, is_parenthetical) if match[0] and match[2] > 0.7: # Good confidence statements, role, conf = match # For canonical types, preserve the original statement_type canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type) result = (statements, role, canonical_type, conf) self._cache[cache_key] = result return result # Try content-based analysis match = self._match_by_content(statement_type) if match[0] and match[2] > 0.6: # Moderate confidence statements, role, conf = match # For canonical types, preserve the original statement_type canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type) result = (statements, role, canonical_type, conf) self._cache[cache_key] = result return result # Try role definition matching match = self._match_by_role_definition(statement_type) if match[0] and match[2] > 0.5: # Lower confidence but still useful statements, role, conf = match # For canonical types, preserve the original statement_type canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type) result = (statements, role, canonical_type, conf) self._cache[cache_key] = result return result # No good match found, return best guess with low confidence statements, role, conf = self._get_best_guess(statement_type) if conf < 0.4: # Get entity context for detailed error reporting entity_name = getattr(self.xbrl, 'entity_name', 'Unknown') cik = getattr(self.xbrl, 'cik', 'Unknown') period_of_report = getattr(self.xbrl, 'period_of_report', 'Unknown') if len(statements) == 0: raise StatementNotFound( statement_type=statement_type, confidence=conf, found_statements=[], entity_name=entity_name, cik=cik, period_of_report=period_of_report, reason="No statements available in XBRL data" ) elif conf < 0.3: found_statements = [s['definition'] for s in statements] raise StatementNotFound( statement_type=statement_type, confidence=conf, found_statements=found_statements, entity_name=entity_name, cik=cik, period_of_report=period_of_report, reason="Confidence threshold not met" ) else: log.warn( f"No good match found for statement type '{statement_type}'. The best guess has low confidence: {conf:.2f}") if statements: # For canonical types, preserve the original statement_type canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type) result = (statements, role, canonical_type, conf) else: result = ([], None, statement_type, 0.0) self._cache[cache_key] = result return result