Files
edgartools/venv/lib/python3.10/site-packages/edgar/xbrl/statement_resolver.py
2025-12-09 12:13:01 +01:00

873 lines
34 KiB
Python

"""
Statement Resolution for XBRL data.
This module provides a robust system for identifying and matching XBRL financial statements,
notes, and disclosures regardless of taxonomy variations and company-specific customizations.
"""
import re
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple
from edgar.core import log
from edgar.xbrl.exceptions import StatementNotFound
from edgar.xbrl.statements import statement_to_concepts
class StatementCategory(Enum):
"""Categories of XBRL presentation sections."""
FINANCIAL_STATEMENT = "statement"
NOTE = "note"
DISCLOSURE = "disclosure"
DOCUMENT = "document" # For cover page, signatures, etc.
OTHER = "other"
@dataclass
class ConceptPattern:
"""Pattern for matching statement concepts across different taxonomies."""
pattern: str
weight: float = 1.0
@dataclass
class StatementType:
"""Detailed information about a statement type for matching."""
name: str
primary_concepts: List[str]
category: StatementCategory = StatementCategory.FINANCIAL_STATEMENT # Default to financial statement
alternative_concepts: List[str] = field(default_factory=list)
concept_patterns: List[str] = field(default_factory=list)
key_concepts: List[str] = field(default_factory=list)
role_patterns: List[str] = field(default_factory=list)
title: str = ""
supports_parenthetical: bool = False
weight_map: Dict[str, float] = field(default_factory=dict)
def match_concept(self, concept_name: str) -> bool:
"""Check if a concept name matches this statement type's concepts."""
# Try exact primary concept match
if concept_name in self.primary_concepts:
return True
# Try alternate concepts
if concept_name in self.alternative_concepts:
return True
# Try matching against patterns
for pattern in self.concept_patterns:
if re.match(pattern, concept_name):
return True
return False
def match_role(self, role_uri: str, role_name: str = "", role_def: str = "") -> bool:
"""Check if role information matches this statement type."""
name_lower = self.name.lower()
# Check exact match in role parts
if name_lower in role_uri.lower():
return True
if role_name and name_lower in role_name.lower():
return True
if role_def and name_lower in role_def.lower():
return True
# Try pattern matching
for pattern in self.role_patterns:
if re.match(pattern, role_uri) or (role_name and re.match(pattern, role_name)):
return True
return False
# Registry of statement types with matching information
statement_registry = {
"BalanceSheet": StatementType(
name="BalanceSheet",
category=StatementCategory.FINANCIAL_STATEMENT,
primary_concepts=["us-gaap_StatementOfFinancialPositionAbstract"],
alternative_concepts=[
"us-gaap_BalanceSheetAbstract",
"ifrs-full_StatementOfFinancialPositionAbstract" # IFRS equivalent
],
concept_patterns=[
r".*_StatementOfFinancialPositionAbstract$",
r".*_BalanceSheetAbstract$",
r".*_ConsolidatedBalanceSheetsAbstract$",
r".*_CondensedConsolidatedBalanceSheetsUnauditedAbstract$"
],
key_concepts=[
"us-gaap_Assets", "us-gaap_Liabilities", "us-gaap_StockholdersEquity",
"ifrs-full_Assets", "ifrs-full_Liabilities", "ifrs-full_Equity" # IFRS equivalents
],
role_patterns=[
r".*[Bb]alance[Ss]heet.*",
r".*[Ss]tatement[Oo]f[Ff]inancial[Pp]osition.*",
r".*StatementConsolidatedBalanceSheets.*"
],
title="Consolidated Balance Sheets",
supports_parenthetical=True,
weight_map={"assets": 0.3, "liabilities": 0.3, "equity": 0.4}
),
"IncomeStatement": StatementType(
name="IncomeStatement",
category=StatementCategory.FINANCIAL_STATEMENT,
primary_concepts=["us-gaap_IncomeStatementAbstract"],
alternative_concepts=[
"us-gaap_StatementOfIncomeAbstract",
"ifrs-full_IncomeStatementAbstract" # IFRS equivalent
],
concept_patterns=[
r".*_IncomeStatementAbstract$",
r".*_StatementOfIncomeAbstract$",
r".*_ConsolidatedStatementsOfIncomeAbstract$",
r".*_CondensedConsolidatedStatementsOfIncomeUnauditedAbstract$"
],
key_concepts=[
"us-gaap_Revenues", "us-gaap_NetIncomeLoss",
"ifrs-full_Revenue", "ifrs-full_ProfitLoss" # IFRS equivalents
],
role_patterns=[
r".*[Ii]ncome[Ss]tatement.*",
r".*[Ss]tatement[Oo]f[Ii]ncome.*",
r".*[Oo]perations.*",
r".*StatementConsolidatedStatementsOfIncome.*"
],
title="Consolidated Statement of Income",
supports_parenthetical=True,
weight_map={"revenues": 0.4, "netIncomeLoss": 0.6}
),
"CashFlowStatement": StatementType(
name="CashFlowStatement",
category=StatementCategory.FINANCIAL_STATEMENT,
primary_concepts=["us-gaap_StatementOfCashFlowsAbstract"],
alternative_concepts=["ifrs-full_StatementOfCashFlowsAbstract"], # IFRS equivalent
concept_patterns=[
r".*_StatementOfCashFlowsAbstract$",
r".*_CashFlowsAbstract$",
r".*_ConsolidatedStatementsOfCashFlowsAbstract$",
r".*_CondensedConsolidatedStatementsOfCashFlowsUnauditedAbstract$"
],
key_concepts=[
"us-gaap_NetCashProvidedByUsedInOperatingActivities",
"us-gaap_CashAndCashEquivalentsPeriodIncreaseDecrease",
"ifrs-full_CashFlowsFromUsedInOperatingActivities", # IFRS equivalents
"ifrs-full_IncreaseDecreaseInCashAndCashEquivalents"
],
role_patterns=[
r".*[Cc]ash[Ff]low.*",
r".*[Ss]tatement[Oo]f[Cc]ash[Ff]lows.*",
r".*StatementConsolidatedStatementsOfCashFlows.*"
],
title="Consolidated Statement of Cash Flows",
supports_parenthetical=False
),
"StatementOfEquity": StatementType(
name="StatementOfEquity",
category=StatementCategory.FINANCIAL_STATEMENT,
primary_concepts=["us-gaap_StatementOfStockholdersEquityAbstract"],
alternative_concepts=[
"us-gaap_StatementOfShareholdersEquityAbstract",
"us-gaap_StatementOfPartnersCapitalAbstract"
],
concept_patterns=[
r".*_StatementOfStockholdersEquityAbstract$",
r".*_StatementOfShareholdersEquityAbstract$",
r".*_StatementOfChangesInEquityAbstract$",
r".*_ConsolidatedStatementsOfShareholdersEquityAbstract$"
],
key_concepts=["us-gaap_StockholdersEquity", "us-gaap_CommonStock", "us-gaap_RetainedEarnings"],
role_patterns=[
r".*[Ee]quity.*",
r".*[Ss]tockholders.*",
r".*[Ss]hareholders.*",
r".*[Cc]hanges[Ii]n[Ee]quity.*",
r".*StatementConsolidatedStatementsOfStockholdersEquity.*"
],
title="Consolidated Statement of Equity",
supports_parenthetical=False
),
"ComprehensiveIncome": StatementType(
name="ComprehensiveIncome",
category=StatementCategory.FINANCIAL_STATEMENT,
primary_concepts=["us-gaap_StatementOfIncomeAndComprehensiveIncomeAbstract"],
alternative_concepts=["us-gaap_StatementOfComprehensiveIncomeAbstract"],
concept_patterns=[
r".*_ComprehensiveIncomeAbstract$",
r".*_StatementOfComprehensiveIncomeAbstract$",
r".*_ConsolidatedStatementsOfComprehensiveIncomeAbstract$"
],
key_concepts=["us-gaap_ComprehensiveIncomeNetOfTax"],
role_patterns=[
r".*[Cc]omprehensive[Ii]ncome.*",
r".*[Oo]ther[Cc]omprehensive.*",
r".*StatementConsolidatedStatementsOfComprehensiveIncome.*"
],
title="Consolidated Statement of Comprehensive Income",
supports_parenthetical=True
),
"Notes": StatementType(
name="Notes",
category=StatementCategory.NOTE,
primary_concepts=["us-gaap_NotesToFinancialStatementsAbstract"],
alternative_concepts=[],
concept_patterns=[
r".*_NotesToFinancialStatementsAbstract$",
r".*_NotesAbstract$"
],
key_concepts=[],
role_patterns=[
r".*[Nn]otes[Tt]o[Ff]inancial[Ss]tatements.*",
r".*[Nn]ote\s+\d+.*",
r".*[Nn]otes.*"
],
title="Notes to Financial Statements",
supports_parenthetical=False
),
"AccountingPolicies": StatementType(
name="AccountingPolicies",
category=StatementCategory.NOTE,
primary_concepts=["us-gaap_AccountingPoliciesAbstract"],
alternative_concepts=[],
concept_patterns=[
r".*_AccountingPoliciesAbstract$",
r".*_SignificantAccountingPoliciesAbstract$"
],
key_concepts=["us-gaap_SignificantAccountingPoliciesTextBlock"],
role_patterns=[
r".*[Aa]ccounting[Pp]olicies.*",
r".*[Ss]ignificant[Aa]ccounting[Pp]olicies.*"
],
title="Significant Accounting Policies",
supports_parenthetical=False
),
"Disclosures": StatementType(
name="Disclosures",
category=StatementCategory.DISCLOSURE,
primary_concepts=["us-gaap_DisclosuresAbstract"],
alternative_concepts=[],
concept_patterns=[
r".*_DisclosuresAbstract$",
r".*_DisclosureAbstract$"
],
key_concepts=[],
role_patterns=[
r".*[Dd]isclosure.*"
],
title="Disclosures",
supports_parenthetical=False
),
"SegmentDisclosure": StatementType(
name="SegmentDisclosure",
category=StatementCategory.DISCLOSURE,
primary_concepts=["us-gaap_SegmentDisclosureAbstract"],
alternative_concepts=[],
concept_patterns=[
r".*_SegmentDisclosureAbstract$",
r".*_SegmentReportingDisclosureAbstract$"
],
key_concepts=["us-gaap_SegmentReportingDisclosureTextBlock"],
role_patterns=[
r".*[Ss]egment.*",
r".*[Ss]egment[Rr]eporting.*",
r".*[Ss]egment[Ii]nformation.*"
],
title="Segment Information",
supports_parenthetical=False
),
"CoverPage": StatementType(
name="CoverPage",
category=StatementCategory.DOCUMENT,
primary_concepts=["dei_CoverAbstract"],
concept_patterns=[r".*_CoverAbstract$"],
key_concepts=["dei_EntityRegistrantName", "dei_DocumentType"],
role_patterns=[r".*[Cc]over.*"],
title="Cover Page",
supports_parenthetical=False
)
}
class StatementResolver:
"""
Resolves statement identifiers to actual XBRL statement roles.
This class provides a multi-layered approach to statement matching,
handling taxonomy variations and company-specific customizations.
"""
def __init__(self, xbrl):
"""
Initialize with an XBRL object.
Args:
xbrl: XBRL object containing parsed data
"""
self.xbrl = xbrl
self._cache = {}
# Build indices for faster lookups
self._statement_by_role_uri = {}
self._statement_by_role_name = {}
self._statement_by_primary_concept = {}
self._statement_by_type = {}
self._statement_by_role_def = {}
# Map legacy statement types to new registry
self._legacy_to_registry = {}
for legacy_type, info in statement_to_concepts.items():
if legacy_type in statement_registry:
self._legacy_to_registry[legacy_type] = legacy_type
continue
# Try to find a match in the registry
for reg_type, reg_info in statement_registry.items():
if info.concept in reg_info.primary_concepts or info.concept in reg_info.alternative_concepts:
self._legacy_to_registry[legacy_type] = reg_type
break
# Initialize indices when instantiated
self._initialize_indices()
def _initialize_indices(self):
"""Build lookup indices for fast statement retrieval."""
# Get all statements
statements = self.xbrl.get_all_statements()
# Reset indices
self._statement_by_role_uri = {}
self._statement_by_role_name = {}
self._statement_by_primary_concept = {}
self._statement_by_type = {}
self._statement_by_role_def = {}
# Build indices
for stmt in statements:
role = stmt.get('role', '')
role_name = stmt.get('role_name', '').lower() if stmt.get('role_name') else ''
primary_concept = stmt.get('primary_concept', '')
stmt_type = stmt.get('type', '')
role_def = stmt.get('definition', '').lower() if stmt.get('definition') else ''
# By role URI
self._statement_by_role_uri[role] = stmt
# By role name
if role_name:
if role_name not in self._statement_by_role_name:
self._statement_by_role_name[role_name] = []
self._statement_by_role_name[role_name].append(stmt)
# By primary concept
if primary_concept:
if primary_concept not in self._statement_by_primary_concept:
self._statement_by_primary_concept[primary_concept] = []
self._statement_by_primary_concept[primary_concept].append(stmt)
# By statement type
if stmt_type:
if stmt_type not in self._statement_by_type:
self._statement_by_type[stmt_type] = []
self._statement_by_type[stmt_type].append(stmt)
# By role definition (without spaces, lowercase)
if role_def:
def_key = role_def.replace(' ', '')
if def_key not in self._statement_by_role_def:
self._statement_by_role_def[def_key] = []
self._statement_by_role_def[def_key].append(stmt)
def _match_by_primary_concept(self, statement_type: str, is_parenthetical: bool = False) -> Tuple[List[Dict[str, Any]], Optional[str], float]:
"""
Match statements using primary concept names.
Args:
statement_type: Statement type to match
is_parenthetical: Whether to look for a parenthetical statement
Returns:
Tuple of (matching statements, found role, confidence score)
"""
# Convert legacy types to registry types if needed
if statement_type in self._legacy_to_registry:
registry_type = self._legacy_to_registry[statement_type]
else:
registry_type = statement_type
# Check if this is a known statement type
if registry_type not in statement_registry:
return [], None, 0.0
# Get registry information
registry_entry = statement_registry[registry_type]
# Try to match by primary concepts
matched_statements = []
for concept in registry_entry.primary_concepts + registry_entry.alternative_concepts:
if concept in self._statement_by_primary_concept:
for stmt in self._statement_by_primary_concept[concept]:
# Handle parenthetical check
if registry_entry.supports_parenthetical:
role_def = stmt.get('definition', '').lower()
is_role_parenthetical = 'parenthetical' in role_def
# Skip if parenthetical status doesn't match
if is_parenthetical != is_role_parenthetical:
continue
matched_statements.append(stmt)
# If we found matching statements, return with high confidence
if matched_statements:
return matched_statements, matched_statements[0]['role'], 0.9
return [], None, 0.0
def _match_by_concept_pattern(self, statement_type: str, is_parenthetical: bool = False) -> Tuple[List[Dict[str, Any]], Optional[str], float]:
"""
Match statements using regex patterns on concept names to handle custom company namespaces.
Args:
statement_type: Statement type to match
is_parenthetical: Whether to look for a parenthetical statement
Returns:
Tuple of (matching statements, found role, confidence score)
"""
# Convert legacy types to registry types if needed
if statement_type in self._legacy_to_registry:
registry_type = self._legacy_to_registry[statement_type]
else:
registry_type = statement_type
# Check if this is a known statement type
if registry_type not in statement_registry:
return [], None, 0.0
# Get registry information
registry_entry = statement_registry[registry_type]
concept_patterns = registry_entry.concept_patterns
if not concept_patterns:
return [], None, 0.0
# Get all statements to check against patterns
all_statements = self.xbrl.get_all_statements()
# Check each statement's primary concept against our patterns
matched_statements = []
for stmt in all_statements:
primary_concept = stmt.get('primary_concept', '')
# Skip if no primary concept
if not primary_concept:
continue
# Check if this concept matches any of our patterns
for pattern in concept_patterns:
if re.match(pattern, primary_concept):
# For parenthetical statements, check the role definition
if registry_entry.supports_parenthetical:
role_def = stmt.get('definition', '').lower()
is_role_parenthetical = 'parenthetical' in role_def
# Skip if parenthetical status doesn't match
if is_parenthetical != is_role_parenthetical:
continue
matched_statements.append(stmt)
break # Found a match, no need to check other patterns
# If we found matching statements, return with high confidence
if matched_statements:
return matched_statements, matched_statements[0]['role'], 0.85
return [], None, 0.0
def _match_by_role_pattern(self, statement_type: str, is_parenthetical: bool = False) -> Tuple[List[Dict[str, Any]], Optional[str], float]:
"""
Match statements using role URI or role name patterns.
Args:
statement_type: Statement type to match
is_parenthetical: Whether to look for a parenthetical statement
Returns:
Tuple of (matching statements, found role, confidence score)
"""
# Convert legacy types to registry types if needed
if statement_type in self._legacy_to_registry:
registry_type = self._legacy_to_registry[statement_type]
else:
registry_type = statement_type
# Check if this is a known statement type
if registry_type not in statement_registry:
return [], None, 0.0
# Get registry information
registry_entry = statement_registry[registry_type]
role_patterns = registry_entry.role_patterns
if not role_patterns:
return [], None, 0.0
# Get all statements
all_statements = self.xbrl.get_all_statements()
# Check each statement's role and role name against our patterns
matched_statements = []
for stmt in all_statements:
role = stmt.get('role', '')
role_name = stmt.get('role_name', '')
# Check if role matches any pattern
for pattern in role_patterns:
if (re.search(pattern, role, re.IGNORECASE) or
(role_name and re.search(pattern, role_name, re.IGNORECASE))):
# For parenthetical statements, check the role definition
if registry_entry.supports_parenthetical:
role_def = stmt.get('definition', '').lower()
is_role_parenthetical = 'parenthetical' in role_def
# Skip if parenthetical status doesn't match
if is_parenthetical != is_role_parenthetical:
continue
matched_statements.append(stmt)
break # Found a match, no need to check other patterns
# If we found matching statements, return with good confidence
if matched_statements:
return matched_statements, matched_statements[0]['role'], 0.75
return [], None, 0.0
def _match_by_content(self, statement_type: str) -> Tuple[List[Dict[str, Any]], Optional[str], float]:
"""
Match statements by analyzing their content against key concepts.
Args:
statement_type: Statement type to match
Returns:
Tuple of (matching statements, found role, confidence score)
"""
# Convert legacy types to registry types if needed
if statement_type in self._legacy_to_registry:
registry_type = self._legacy_to_registry[statement_type]
else:
registry_type = statement_type
# Check if this is a known statement type
if registry_type not in statement_registry:
return [], None, 0.0
# Get registry information
registry_entry = statement_registry[registry_type]
key_concepts = registry_entry.key_concepts
if not key_concepts:
return [], None, 0.0
# Get all statements
all_statements = self.xbrl.get_all_statements()
# Score each statement based on presence of key concepts
statement_scores = []
for stmt in all_statements:
role = stmt.get('role', '')
if role not in self.xbrl.presentation_trees:
continue
# Get concept nodes for this role
tree = self.xbrl.presentation_trees[role]
all_nodes = set(tree.all_nodes.keys())
# Count matching key concepts
matches = 0
total_weight = 0.0
for concept in key_concepts:
# Normalize concept name
normalized = concept.replace(':', '_')
if concept in all_nodes or normalized in all_nodes:
matches += 1
# Add weighting if available
concept_key = concept.split('_')[-1].lower()
weight = registry_entry.weight_map.get(concept_key, 1.0)
total_weight += weight
# Calculate confidence score (weighted by presence of key concepts)
if key_concepts:
# Base confidence on percentage of key concepts found
confidence = matches / len(key_concepts)
# Apply weighting if available
if total_weight > 0:
confidence = min(total_weight / sum(registry_entry.weight_map.values()), 1.0)
else:
confidence = 0.0
if confidence > 0:
statement_scores.append((stmt, confidence))
# Sort by confidence score
statement_scores.sort(key=lambda x: x[1], reverse=True)
# Return best match if above threshold
if statement_scores and statement_scores[0][1] >= 0.4:
best_match, confidence = statement_scores[0]
return [best_match], best_match['role'], min(confidence + 0.2, 0.85) # Boost confidence but cap at 0.85
return [], None, 0.0
def _match_by_standard_name(self, statement_type: str) -> Tuple[List[Dict[str, Any]], Optional[str], float]:
"""
Match statements by standard statement type name.
Args:
statement_type: Statement type to match
Returns:
Tuple of (matching statements, found role, confidence score)
"""
# Check if we have statements of this type
if statement_type in self._statement_by_type:
statements = self._statement_by_type[statement_type]
if statements:
return statements, statements[0]['role'], 0.95
return [], None, 0.0
def _match_by_role_definition(self, statement_type: str) -> Tuple[List[Dict[str, Any]], Optional[str], float]:
"""
Match statements by role definition text.
Args:
statement_type: Statement type or definition text to match
Returns:
Tuple of (matching statements, found role, confidence score)
"""
# Clean statement type for matching
clean_type = statement_type.lower().replace(' ', '')
# Try exact match
if clean_type in self._statement_by_role_def:
statements = self._statement_by_role_def[clean_type]
if statements:
return statements, statements[0]['role'], 0.85
# Try partial match
for def_key, statements in self._statement_by_role_def.items():
if clean_type in def_key:
return statements, statements[0]['role'], 0.65
if def_key in clean_type:
return statements, statements[0]['role'], 0.55
return [], None, 0.0
def _get_best_guess(self, statement_type: str) -> Tuple[List[Dict[str, Any]], Optional[str], float]:
"""
Make a best guess when all other methods fail.
Args:
statement_type: Statement type to guess
Returns:
Tuple of (matching statements, found role, confidence score)
"""
# Try partial matching on role names
clean_type = statement_type.lower()
for role_name, statements in self._statement_by_role_name.items():
if clean_type in role_name or role_name in clean_type:
return statements, statements[0]['role'], 0.4
# If we have statements of any type, return the first one with very low confidence
all_statements = self.xbrl.get_all_statements()
if all_statements:
# Try to find a primary financial statement
for stmt_type in ['BalanceSheet', 'IncomeStatement', 'CashFlowStatement']:
if stmt_type in self._statement_by_type:
statements = self._statement_by_type[stmt_type]
if statements:
return statements, statements[0]['role'], 0.2
# Last resort: return first statement
return [all_statements[0]], all_statements[0]['role'], 0.1
return [], None, 0.0
def find_statement(self, statement_type: str, is_parenthetical: bool = False,
category_filter: Optional[StatementCategory] = None) -> Tuple[List[Dict[str, Any]], Optional[str], str, float]:
"""
Find a statement by type, with multi-layered fallback approach.
Args:
statement_type: Statement type or identifier
is_parenthetical: Whether to look for parenthetical version
category_filter: Optional filter to only match statements of a specific category
Returns:
Tuple of (matching_statements, found_role, canonical_statement_type, confidence_score)
Note:
For standard statement types like "BalanceSheet", "IncomeStatement", etc., the
canonical_statement_type will be the input statement_type, allowing downstream
code to still recognize and apply type-specific logic.
"""
# Check cache first
category_key = str(category_filter.value) if category_filter else "None"
cache_key = f"{statement_type}_{is_parenthetical}_{category_key}"
if cache_key in self._cache:
return self._cache[cache_key]
# If this is a role URI we already know, return immediately
if statement_type in self._statement_by_role_uri:
stmt = self._statement_by_role_uri[statement_type]
# Apply category filter if specified
if category_filter:
# Get category from statement or determine based on type
stmt_category = None
if 'category' in stmt and stmt['category']:
stmt_category = stmt['category']
elif stmt['type'] in statement_registry:
stmt_category = statement_registry[stmt['type']].category.value
# Skip if category doesn't match
if stmt_category != category_filter.value:
result = ([], None, statement_type, 0.0)
self._cache[cache_key] = result
return result
result = ([stmt], statement_type, stmt.get('type', statement_type), 1.0)
self._cache[cache_key] = result
return result
# Check if this is a canonical statement type from the registry
is_canonical_type = statement_type in statement_registry
# Try standard name matching first (exact type match)
match = self._match_by_standard_name(statement_type)
if match[0] and match[2] > 0.9: # Very high confidence
statements, role, conf = match
# For canonical types, preserve the original statement_type
canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type)
result = (statements, role, canonical_type, conf)
self._cache[cache_key] = result
return result
# Try primary concept matching
match = self._match_by_primary_concept(statement_type, is_parenthetical)
if match[0] and match[2] > 0.8: # High confidence
statements, role, conf = match
# For canonical types, preserve the original statement_type
canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type)
result = (statements, role, canonical_type, conf)
self._cache[cache_key] = result
return result
# Try custom namespace matching
match = self._match_by_concept_pattern(statement_type, is_parenthetical)
if match[0] and match[2] > 0.8: # High confidence
statements, role, conf = match
# For canonical types, preserve the original statement_type
canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type)
result = (statements, role, canonical_type, conf)
self._cache[cache_key] = result
return result
# Try role pattern matching
match = self._match_by_role_pattern(statement_type, is_parenthetical)
if match[0] and match[2] > 0.7: # Good confidence
statements, role, conf = match
# For canonical types, preserve the original statement_type
canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type)
result = (statements, role, canonical_type, conf)
self._cache[cache_key] = result
return result
# Try content-based analysis
match = self._match_by_content(statement_type)
if match[0] and match[2] > 0.6: # Moderate confidence
statements, role, conf = match
# For canonical types, preserve the original statement_type
canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type)
result = (statements, role, canonical_type, conf)
self._cache[cache_key] = result
return result
# Try role definition matching
match = self._match_by_role_definition(statement_type)
if match[0] and match[2] > 0.5: # Lower confidence but still useful
statements, role, conf = match
# For canonical types, preserve the original statement_type
canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type)
result = (statements, role, canonical_type, conf)
self._cache[cache_key] = result
return result
# No good match found, return best guess with low confidence
statements, role, conf = self._get_best_guess(statement_type)
if conf < 0.4:
# Get entity context for detailed error reporting
entity_name = getattr(self.xbrl, 'entity_name', 'Unknown')
cik = getattr(self.xbrl, 'cik', 'Unknown')
period_of_report = getattr(self.xbrl, 'period_of_report', 'Unknown')
if len(statements) == 0:
raise StatementNotFound(
statement_type=statement_type,
confidence=conf,
found_statements=[],
entity_name=entity_name,
cik=cik,
period_of_report=period_of_report,
reason="No statements available in XBRL data"
)
elif conf < 0.3:
found_statements = [s['definition'] for s in statements]
raise StatementNotFound(
statement_type=statement_type,
confidence=conf,
found_statements=found_statements,
entity_name=entity_name,
cik=cik,
period_of_report=period_of_report,
reason="Confidence threshold not met"
)
else:
log.warn(
f"No good match found for statement type '{statement_type}'. The best guess has low confidence: {conf:.2f}")
if statements:
# For canonical types, preserve the original statement_type
canonical_type = statement_type if is_canonical_type else statements[0].get('type', statement_type)
result = (statements, role, canonical_type, conf)
else:
result = ([], None, statement_type, 0.0)
self._cache[cache_key] = result
return result