732 lines
25 KiB
Python
732 lines
25 KiB
Python
"""
|
|
Statement Builder for reconstructing financial statements using canonical structures.
|
|
|
|
This module provides intelligent statement reconstruction using learned canonical
|
|
structures and virtual presentation trees.
|
|
"""
|
|
|
|
import logging
|
|
from collections import defaultdict
|
|
from dataclasses import dataclass, field
|
|
from datetime import date
|
|
from typing import Any, Dict, List, Optional, Set
|
|
|
|
from rich import box
|
|
from rich.columns import Columns
|
|
from rich.console import Group
|
|
from rich.padding import Padding
|
|
from rich.panel import Panel
|
|
from rich.table import Table
|
|
from rich.text import Text
|
|
|
|
from edgar.entity.mappings_loader import load_canonical_structures, load_virtual_trees
|
|
from edgar.entity.models import FinancialFact
|
|
from edgar.richtools import repr_rich
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class StatementItem:
|
|
"""A single item in a reconstructed financial statement."""
|
|
concept: str
|
|
label: str
|
|
value: Optional[float]
|
|
depth: int
|
|
parent_concept: Optional[str]
|
|
children: List['StatementItem'] = field(default_factory=list)
|
|
|
|
# Metadata
|
|
is_abstract: bool = False
|
|
is_total: bool = False
|
|
section: Optional[str] = None
|
|
confidence: float = 1.0
|
|
source: str = 'fact' # 'fact', 'calculated', 'canonical', 'placeholder'
|
|
|
|
# Original fact if available
|
|
fact: Optional[FinancialFact] = None
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary representation."""
|
|
return {
|
|
'concept': self.concept,
|
|
'label': self.label,
|
|
'value': self.value,
|
|
'depth': self.depth,
|
|
'is_abstract': self.is_abstract,
|
|
'is_total': self.is_total,
|
|
'section': self.section,
|
|
'confidence': self.confidence,
|
|
'source': self.source,
|
|
'children': [child.to_dict() for child in self.children]
|
|
}
|
|
|
|
def get_display_value(self) -> str:
|
|
"""Get formatted value for display."""
|
|
if self.value is not None:
|
|
if abs(self.value) >= 1_000_000_000:
|
|
return f"${self.value/1_000_000_000:.1f}B"
|
|
elif abs(self.value) >= 1_000_000:
|
|
return f"${self.value/1_000_000:.1f}M"
|
|
elif abs(self.value) >= 1_000:
|
|
return f"${self.value/1_000:.0f}K"
|
|
else:
|
|
return f"${self.value:.0f}"
|
|
elif self.is_abstract:
|
|
return ""
|
|
elif self.source == 'placeholder':
|
|
return "[Missing]"
|
|
else:
|
|
return "-"
|
|
|
|
def __rich__(self):
|
|
"""Create a rich representation of the statement item."""
|
|
from rich.tree import Tree
|
|
|
|
# Create the node label
|
|
if self.is_abstract:
|
|
label = Text(self.label, style="bold cyan")
|
|
elif self.is_total:
|
|
label = Text(self.label, style="bold yellow")
|
|
else:
|
|
style = "dim" if self.confidence < 0.8 else ""
|
|
confidence_marker = " ◦" if self.confidence < 0.8 else ""
|
|
label = Text(f"{self.label}{confidence_marker}", style=style)
|
|
|
|
# Add value if present
|
|
value_str = self.get_display_value()
|
|
if value_str and value_str != "-":
|
|
# Color code values
|
|
if value_str.startswith("$") and self.value and isinstance(self.value, (int, float)):
|
|
value_style = "red" if self.value < 0 else "green"
|
|
else:
|
|
value_style = ""
|
|
|
|
label_with_value = Text.assemble(
|
|
label,
|
|
" ",
|
|
(value_str, value_style)
|
|
)
|
|
else:
|
|
label_with_value = label
|
|
|
|
# Create tree with this item as root
|
|
tree = Tree(label_with_value)
|
|
|
|
# Add children
|
|
for child in self.children:
|
|
tree.add(child.__rich__())
|
|
|
|
return tree
|
|
|
|
def __repr__(self) -> str:
|
|
"""String representation using rich formatting."""
|
|
return repr_rich(self.__rich__())
|
|
|
|
|
|
@dataclass
|
|
class StructuredStatement:
|
|
"""A complete structured financial statement."""
|
|
statement_type: str
|
|
fiscal_year: Optional[int]
|
|
fiscal_period: Optional[str]
|
|
period_end: Optional[date]
|
|
|
|
items: List[StatementItem]
|
|
|
|
# Metadata
|
|
company_name: Optional[str] = None
|
|
cik: Optional[str] = None
|
|
canonical_coverage: float = 0.0
|
|
facts_used: int = 0
|
|
facts_total: int = 0
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convert to dictionary representation."""
|
|
return {
|
|
'statement_type': self.statement_type,
|
|
'fiscal_year': self.fiscal_year,
|
|
'fiscal_period': self.fiscal_period,
|
|
'period_end': self.period_end.isoformat() if self.period_end else None,
|
|
'company_name': self.company_name,
|
|
'cik': self.cik,
|
|
'canonical_coverage': self.canonical_coverage,
|
|
'facts_used': self.facts_used,
|
|
'facts_total': self.facts_total,
|
|
'items': [item.to_dict() for item in self.items]
|
|
}
|
|
|
|
def get_hierarchical_display(self, max_depth: int = 3) -> str:
|
|
"""Get hierarchical text representation."""
|
|
lines = []
|
|
|
|
def add_item(item: StatementItem, indent: int = 0):
|
|
if indent > max_depth:
|
|
return
|
|
|
|
indent_str = " " * indent
|
|
value_str = item.get_display_value()
|
|
|
|
if item.is_abstract:
|
|
lines.append(f"{indent_str}{item.label}")
|
|
elif item.is_total:
|
|
lines.append(f"{indent_str}{item.label:<40} {value_str:>15}")
|
|
lines.append(f"{indent_str}{'-' * 55}")
|
|
else:
|
|
confidence_marker = "" if item.confidence > 0.8 else " *"
|
|
lines.append(f"{indent_str}{item.label:<40} {value_str:>15}{confidence_marker}")
|
|
|
|
for child in item.children:
|
|
add_item(child, indent + 1)
|
|
|
|
for item in self.items:
|
|
add_item(item)
|
|
|
|
return "\n".join(lines)
|
|
|
|
def __rich__(self):
|
|
"""Create a rich representation of the structured statement."""
|
|
# Statement type mapping for better display
|
|
statement_names = {
|
|
'IncomeStatement': 'Income Statement',
|
|
'BalanceSheet': 'Balance Sheet',
|
|
'CashFlow': 'Cash Flow Statement',
|
|
'StatementsOfComprehensiveIncome': 'Comprehensive Income',
|
|
'StatementsOfShareholdersEquity': 'Shareholders Equity'
|
|
}
|
|
|
|
# Title with company name and period
|
|
title_parts = []
|
|
if self.company_name:
|
|
title_parts.append((self.company_name, "bold green"))
|
|
else:
|
|
title_parts.append(("Financial Statement", "bold"))
|
|
|
|
title = Text.assemble(*title_parts)
|
|
|
|
# Subtitle with statement type and period
|
|
statement_display = statement_names.get(self.statement_type, self.statement_type)
|
|
if self.fiscal_period and self.fiscal_year:
|
|
subtitle = f"{statement_display} • {self.fiscal_period} {self.fiscal_year}"
|
|
elif self.period_end:
|
|
subtitle = f"{statement_display} • As of {self.period_end}"
|
|
else:
|
|
subtitle = statement_display
|
|
|
|
# Main statement table
|
|
stmt_table = Table(
|
|
box=box.SIMPLE,
|
|
show_header=False,
|
|
padding=(0, 1),
|
|
expand=True
|
|
)
|
|
stmt_table.add_column("Item", style="", ratio=3)
|
|
stmt_table.add_column("Value", justify="right", style="bold", ratio=1)
|
|
|
|
def add_item_to_table(item: StatementItem, depth: int = 0):
|
|
"""Add an item to the table with proper indentation."""
|
|
indent = " " * depth
|
|
|
|
if item.is_abstract:
|
|
# Abstract items are headers
|
|
stmt_table.add_row(
|
|
Text(f"{indent}{item.label}", style="bold cyan"),
|
|
""
|
|
)
|
|
elif item.is_total:
|
|
# Total items with underline
|
|
value_text = Text(item.get_display_value(), style="bold yellow")
|
|
stmt_table.add_row(
|
|
Text(f"{indent}{item.label}", style="bold"),
|
|
value_text
|
|
)
|
|
# Add a separator line after totals
|
|
if depth == 0:
|
|
stmt_table.add_row("", "")
|
|
stmt_table.add_row(
|
|
Text("─" * 40, style="dim"),
|
|
Text("─" * 15, style="dim")
|
|
)
|
|
else:
|
|
# Regular items
|
|
style = "dim" if item.confidence < 0.8 else ""
|
|
confidence_marker = " ◦" if item.confidence < 0.8 else ""
|
|
label_text = f"{indent}{item.label}{confidence_marker}"
|
|
|
|
# Color code positive/negative values
|
|
value_str = item.get_display_value()
|
|
if value_str and value_str.startswith("$"):
|
|
try:
|
|
# Extract numeric value for coloring
|
|
if item.value and isinstance(item.value, (int, float)):
|
|
if item.value < 0:
|
|
value_style = "red"
|
|
else:
|
|
value_style = "green"
|
|
else:
|
|
value_style = ""
|
|
except:
|
|
value_style = ""
|
|
else:
|
|
value_style = ""
|
|
|
|
stmt_table.add_row(
|
|
Text(label_text, style=style),
|
|
Text(value_str, style=value_style) if value_str else ""
|
|
)
|
|
|
|
# Add children recursively
|
|
for child in item.children:
|
|
if depth < 3: # Limit depth for display
|
|
add_item_to_table(child, depth + 1)
|
|
|
|
# Add all items to the table
|
|
for item in self.items:
|
|
add_item_to_table(item)
|
|
|
|
# Metadata summary
|
|
metadata = Table(box=box.SIMPLE, show_header=False, padding=(0, 1))
|
|
metadata.add_column("Metric", style="dim")
|
|
metadata.add_column("Value", style="bold")
|
|
|
|
metadata.add_row("Facts Used", f"{self.facts_used:,}")
|
|
if self.facts_total > 0:
|
|
metadata.add_row("Total Facts", f"{self.facts_total:,}")
|
|
|
|
if self.canonical_coverage > 0:
|
|
coverage_pct = self.canonical_coverage * 100
|
|
coverage_style = "green" if coverage_pct >= 50 else "yellow" if coverage_pct >= 25 else "red"
|
|
metadata.add_row(
|
|
"Canonical Coverage",
|
|
Text(f"{coverage_pct:.1f}%", style=coverage_style)
|
|
)
|
|
|
|
if self.cik:
|
|
metadata.add_row("CIK", self.cik)
|
|
|
|
# Data quality indicators
|
|
quality_notes = []
|
|
|
|
# Count items by confidence
|
|
low_confidence_count = sum(
|
|
1 for item in self._flatten_items()
|
|
if not item.is_abstract and item.confidence < 0.8
|
|
)
|
|
|
|
if low_confidence_count > 0:
|
|
quality_notes.append(
|
|
Text(f"◦ {low_confidence_count} items with lower confidence", style="dim yellow")
|
|
)
|
|
|
|
# Count calculated vs actual values
|
|
calculated_count = sum(
|
|
1 for item in self._flatten_items()
|
|
if item.source == 'calculated'
|
|
)
|
|
|
|
if calculated_count > 0:
|
|
quality_notes.append(
|
|
Text(f"◦ {calculated_count} calculated values", style="dim cyan")
|
|
)
|
|
|
|
# Combine metadata and quality notes
|
|
metadata_panel = Panel(
|
|
metadata,
|
|
title="📊 Statement Metadata",
|
|
border_style="bright_black"
|
|
)
|
|
|
|
# Create the main content group
|
|
content_parts = [
|
|
Padding("", (1, 0, 0, 0)),
|
|
stmt_table
|
|
]
|
|
|
|
# Add metadata in a column layout
|
|
if self.facts_used > 0:
|
|
bottom_content = [metadata_panel]
|
|
|
|
if quality_notes:
|
|
quality_panel = Panel(
|
|
Group(*quality_notes),
|
|
title="📝 Data Quality Notes",
|
|
border_style="bright_black"
|
|
)
|
|
bottom_content.append(quality_panel)
|
|
|
|
content_parts.append(Padding("", (1, 0)))
|
|
content_parts.append(Columns(bottom_content, equal=True, expand=True))
|
|
|
|
content = Group(*content_parts)
|
|
|
|
# Create the main panel
|
|
return Panel(
|
|
content,
|
|
title=title,
|
|
subtitle=subtitle,
|
|
border_style="blue",
|
|
expand=True
|
|
)
|
|
|
|
def _flatten_items(self) -> List[StatementItem]:
|
|
"""Flatten the hierarchical items into a flat list."""
|
|
flat_items = []
|
|
|
|
def flatten(item: StatementItem):
|
|
flat_items.append(item)
|
|
for child in item.children:
|
|
flatten(child)
|
|
|
|
for item in self.items:
|
|
flatten(item)
|
|
|
|
return flat_items
|
|
|
|
def __repr__(self) -> str:
|
|
"""String representation using rich formatting."""
|
|
return repr_rich(self.__rich__())
|
|
|
|
|
|
class StatementBuilder:
|
|
"""
|
|
Builds structured financial statements using canonical templates.
|
|
|
|
This class reconstructs complete financial statements by combining
|
|
actual facts with canonical structures, filling in missing concepts
|
|
and maintaining proper hierarchy.
|
|
"""
|
|
|
|
def __init__(self, cik: Optional[str] = None):
|
|
"""
|
|
Initialize the statement builder.
|
|
|
|
Args:
|
|
cik: Company CIK for context
|
|
"""
|
|
self.cik = cik
|
|
self.canonical_structures = load_canonical_structures()
|
|
self.virtual_trees = load_virtual_trees()
|
|
|
|
def build_statement(self,
|
|
facts: List[FinancialFact],
|
|
statement_type: str,
|
|
fiscal_year: Optional[int] = None,
|
|
fiscal_period: Optional[str] = None,
|
|
use_canonical: bool = True,
|
|
include_missing: bool = False) -> StructuredStatement:
|
|
"""
|
|
Build a structured financial statement from facts.
|
|
|
|
Args:
|
|
facts: List of financial facts
|
|
statement_type: Type of statement (BalanceSheet, IncomeStatement, etc.)
|
|
fiscal_year: Fiscal year to filter for
|
|
fiscal_period: Fiscal period (FY, Q1, Q2, Q3, Q4)
|
|
use_canonical: Whether to use canonical structure for organization
|
|
include_missing: Whether to include placeholder for missing concepts
|
|
|
|
Returns:
|
|
StructuredStatement with hierarchical organization
|
|
"""
|
|
# Filter facts for this statement and period
|
|
filtered_facts = self._filter_facts(facts, statement_type, fiscal_year, fiscal_period)
|
|
|
|
# Create fact lookup
|
|
fact_map = self._create_fact_map(filtered_facts)
|
|
|
|
# Get period end date
|
|
period_end = self._get_period_end(filtered_facts)
|
|
|
|
if use_canonical and statement_type in self.virtual_trees:
|
|
# Build using canonical structure
|
|
items = self._build_with_canonical(
|
|
fact_map,
|
|
self.virtual_trees[statement_type],
|
|
include_missing
|
|
)
|
|
|
|
# Add unmatched facts
|
|
unmatched = self._find_unmatched_facts(fact_map, self.virtual_trees[statement_type])
|
|
items.extend(self._create_items_from_facts(unmatched))
|
|
else:
|
|
# Build from facts only
|
|
items = self._build_from_facts(fact_map)
|
|
|
|
# Calculate metadata
|
|
facts_used = len(fact_map)
|
|
canonical_coverage = self._calculate_coverage(fact_map, statement_type) if use_canonical else 0.0
|
|
|
|
return StructuredStatement(
|
|
statement_type=statement_type,
|
|
fiscal_year=fiscal_year,
|
|
fiscal_period=fiscal_period,
|
|
period_end=period_end,
|
|
items=items,
|
|
cik=self.cik,
|
|
canonical_coverage=canonical_coverage,
|
|
facts_used=facts_used,
|
|
facts_total=len(facts)
|
|
)
|
|
|
|
def _filter_facts(self, facts: List[FinancialFact],
|
|
statement_type: str,
|
|
fiscal_year: Optional[int],
|
|
fiscal_period: Optional[str]) -> List[FinancialFact]:
|
|
"""Filter facts for the requested statement and period."""
|
|
filtered = []
|
|
|
|
for fact in facts:
|
|
# Check statement type
|
|
if fact.statement_type != statement_type:
|
|
continue
|
|
|
|
# Check fiscal year
|
|
if fiscal_year and fact.fiscal_year != fiscal_year:
|
|
continue
|
|
|
|
# Check fiscal period
|
|
if fiscal_period and fact.fiscal_period != fiscal_period:
|
|
continue
|
|
|
|
filtered.append(fact)
|
|
|
|
return filtered
|
|
|
|
def _create_fact_map(self, facts: List[FinancialFact]) -> Dict[str, FinancialFact]:
|
|
"""Create a map of concept to fact."""
|
|
fact_map = {}
|
|
|
|
for fact in facts:
|
|
# Extract clean concept name
|
|
concept = fact.concept
|
|
if ':' in concept:
|
|
concept = concept.split(':', 1)[1]
|
|
|
|
# Use most recent fact for duplicates
|
|
if concept not in fact_map or fact.filing_date > fact_map[concept].filing_date:
|
|
fact_map[concept] = fact
|
|
|
|
return fact_map
|
|
|
|
def _get_period_end(self, facts: List[FinancialFact]) -> Optional[date]:
|
|
"""Get the period end date from facts."""
|
|
for fact in facts:
|
|
if fact.period_end:
|
|
return fact.period_end
|
|
return None
|
|
|
|
def _build_with_canonical(self, fact_map: Dict[str, FinancialFact],
|
|
virtual_tree: Dict[str, Any],
|
|
include_missing: bool) -> List[StatementItem]:
|
|
"""Build statement using canonical structure."""
|
|
items = []
|
|
processed = set()
|
|
|
|
# Process root nodes
|
|
for root_concept in virtual_tree.get('roots', []):
|
|
item = self._build_canonical_item(
|
|
root_concept,
|
|
virtual_tree['nodes'],
|
|
fact_map,
|
|
processed,
|
|
include_missing,
|
|
depth=0
|
|
)
|
|
if item:
|
|
items.append(item)
|
|
|
|
return items
|
|
|
|
def _build_canonical_item(self, concept: str,
|
|
nodes: Dict[str, Any],
|
|
fact_map: Dict[str, FinancialFact],
|
|
processed: Set[str],
|
|
include_missing: bool,
|
|
depth: int = 0,
|
|
parent: Optional[str] = None) -> Optional[StatementItem]:
|
|
"""Build a single canonical item with children."""
|
|
if concept in processed:
|
|
return None
|
|
|
|
processed.add(concept)
|
|
|
|
# Get node info
|
|
node = nodes.get(concept, {})
|
|
|
|
# Check if we have a fact for this concept
|
|
fact = fact_map.get(concept)
|
|
|
|
# Determine if we should include this item
|
|
if not fact and not include_missing and not node.get('is_abstract'):
|
|
# Skip missing concrete concepts unless required
|
|
if node.get('occurrence_rate', 0) < 0.8: # Not a core concept
|
|
return None
|
|
|
|
# Create the item
|
|
item = StatementItem(
|
|
concept=concept,
|
|
label=fact.label if fact else node.get('label', concept),
|
|
value=fact.numeric_value if fact else None,
|
|
depth=depth,
|
|
parent_concept=parent,
|
|
is_abstract=node.get('is_abstract', False),
|
|
is_total=node.get('is_total', False),
|
|
section=node.get('section'),
|
|
confidence=node.get('occurrence_rate', 1.0) if not fact else 1.0,
|
|
source='fact' if fact else ('canonical' if not include_missing else 'placeholder'),
|
|
fact=fact
|
|
)
|
|
|
|
# Process children
|
|
for child_concept in node.get('children', []):
|
|
child_item = self._build_canonical_item(
|
|
child_concept,
|
|
nodes,
|
|
fact_map,
|
|
processed,
|
|
include_missing,
|
|
depth + 1,
|
|
concept
|
|
)
|
|
if child_item:
|
|
item.children.append(child_item)
|
|
|
|
# Try to calculate total if missing
|
|
if item.is_total and item.value is None and item.children:
|
|
calculated_value = self._calculate_total(item.children)
|
|
if calculated_value is not None:
|
|
item.value = calculated_value
|
|
item.source = 'calculated'
|
|
|
|
return item
|
|
|
|
def _calculate_total(self, children: List[StatementItem]) -> Optional[float]:
|
|
"""Calculate total from children values."""
|
|
total = 0
|
|
has_values = False
|
|
|
|
for child in children:
|
|
if not child.is_abstract and child.value is not None:
|
|
total += child.value
|
|
has_values = True
|
|
|
|
return total if has_values else None
|
|
|
|
def _find_unmatched_facts(self, fact_map: Dict[str, FinancialFact],
|
|
virtual_tree: Dict[str, Any]) -> Dict[str, FinancialFact]:
|
|
"""Find facts that don't match canonical concepts."""
|
|
canonical_concepts = set(virtual_tree.get('nodes', {}).keys())
|
|
unmatched = {}
|
|
|
|
for concept, fact in fact_map.items():
|
|
if concept not in canonical_concepts:
|
|
unmatched[concept] = fact
|
|
|
|
return unmatched
|
|
|
|
def _create_items_from_facts(self, facts: Dict[str, FinancialFact]) -> List[StatementItem]:
|
|
"""Create statement items from unmatched facts."""
|
|
items = []
|
|
|
|
for concept, fact in facts.items():
|
|
item = StatementItem(
|
|
concept=concept,
|
|
label=fact.label,
|
|
value=fact.numeric_value,
|
|
depth=1, # Default depth
|
|
parent_concept=None,
|
|
is_abstract=fact.is_abstract,
|
|
is_total=fact.is_total,
|
|
section=fact.section,
|
|
confidence=0.7, # Lower confidence for unmatched
|
|
source='fact',
|
|
fact=fact
|
|
)
|
|
items.append(item)
|
|
|
|
return items
|
|
|
|
def _build_from_facts(self, fact_map: Dict[str, FinancialFact]) -> List[StatementItem]:
|
|
"""Build statement directly from facts without canonical structure."""
|
|
# Group facts by parent
|
|
hierarchy = defaultdict(list)
|
|
roots = []
|
|
|
|
for concept, fact in fact_map.items():
|
|
if fact.parent_concept:
|
|
hierarchy[fact.parent_concept].append(concept)
|
|
else:
|
|
roots.append(concept)
|
|
|
|
# Build items recursively
|
|
items = []
|
|
for root_concept in roots:
|
|
item = self._build_fact_item(root_concept, fact_map, hierarchy)
|
|
if item:
|
|
items.append(item)
|
|
|
|
# Add orphaned facts
|
|
for concept, fact in fact_map.items():
|
|
if concept not in roots and not fact.parent_concept:
|
|
item = StatementItem(
|
|
concept=concept,
|
|
label=fact.label,
|
|
value=fact.numeric_value,
|
|
depth=0,
|
|
parent_concept=None,
|
|
is_abstract=fact.is_abstract,
|
|
is_total=fact.is_total,
|
|
section=fact.section,
|
|
confidence=1.0,
|
|
source='fact',
|
|
fact=fact
|
|
)
|
|
items.append(item)
|
|
|
|
return items
|
|
|
|
def _build_fact_item(self, concept: str,
|
|
fact_map: Dict[str, FinancialFact],
|
|
hierarchy: Dict[str, List[str]],
|
|
depth: int = 0) -> Optional[StatementItem]:
|
|
"""Build item from fact with children."""
|
|
if concept not in fact_map:
|
|
return None
|
|
|
|
fact = fact_map[concept]
|
|
|
|
item = StatementItem(
|
|
concept=concept,
|
|
label=fact.label,
|
|
value=fact.numeric_value,
|
|
depth=depth,
|
|
parent_concept=fact.parent_concept,
|
|
is_abstract=fact.is_abstract,
|
|
is_total=fact.is_total,
|
|
section=fact.section,
|
|
confidence=1.0,
|
|
source='fact',
|
|
fact=fact
|
|
)
|
|
|
|
# Add children
|
|
for child_concept in hierarchy.get(concept, []):
|
|
child_item = self._build_fact_item(child_concept, fact_map, hierarchy, depth + 1)
|
|
if child_item:
|
|
item.children.append(child_item)
|
|
|
|
return item
|
|
|
|
def _calculate_coverage(self, fact_map: Dict[str, FinancialFact],
|
|
statement_type: str) -> float:
|
|
"""Calculate canonical coverage percentage."""
|
|
if statement_type not in self.virtual_trees:
|
|
return 0.0
|
|
|
|
canonical_concepts = set(self.virtual_trees[statement_type].get('nodes', {}).keys())
|
|
if not canonical_concepts:
|
|
return 0.0
|
|
|
|
matched = len(set(fact_map.keys()) & canonical_concepts)
|
|
return matched / len(canonical_concepts)
|