""" Data models for the enhanced Entity Facts API. This module provides the unified data models for financial facts, optimized for both traditional analysis and AI consumption. """ from dataclasses import dataclass, field from datetime import date from enum import Enum from typing import Any, Dict, List, Literal, Optional, Union class DataQuality(Enum): """Data quality indicators for facts""" HIGH = "high" # Direct from XBRL, validated MEDIUM = "medium" # Derived or calculated LOW = "low" # Estimated or inferred @dataclass class FinancialFact: """ Unified fact representation optimized for both traditional analysis and AI consumption. This class represents a single financial fact with rich contextual information, quality indicators, and AI-ready metadata. """ # Core identification concept: str # Standardized concept (e.g., 'us-gaap:Revenue') taxonomy: str # Taxonomy namespace (us-gaap, ifrs, etc.) label: str # Human-readable label # Values with proper typing value: Union[float, int, str] # The actual value numeric_value: Optional[float] # Numeric representation for calculations unit: str # Unit of measure (USD, shares, etc.) scale: Optional[int] = None # Scale factor (thousands=1000, millions=1000000) # Temporal context period_start: Optional[date] = None period_end: date = None period_type: Literal['instant', 'duration'] = 'instant' fiscal_year: int = 0 fiscal_period: str = '' # FY, Q1, Q2, Q3, Q4 # Filing context filing_date: date = None form_type: str = '' # 10-K, 10-Q, 8-K, etc. accession: str = '' # SEC accession number # Quality and provenance data_quality: DataQuality = DataQuality.MEDIUM is_audited: bool = False is_restated: bool = False is_estimated: bool = False confidence_score: float = 0.8 # 0.0 to 1.0 # AI-ready context semantic_tags: List[str] = field(default_factory=list) # ['revenue', 'recurring', 'operating'] business_context: str = '' # "Product revenue from iPhone sales" calculation_context: Optional[str] = None # "Derived from segment data" # Optional XBRL specifics context_ref: Optional[str] = None dimensions: Dict[str, str] = field(default_factory=dict) statement_type: Optional[str] = None line_item_sequence: Optional[int] = None # Structural metadata (from learned mappings) depth: Optional[int] = None # Hierarchy depth in statement parent_concept: Optional[str] = None # Parent concept in hierarchy section: Optional[str] = None # Statement section (e.g., "Current Assets") is_abstract: bool = False # Abstract/header item is_total: bool = False # Total/sum item presentation_order: Optional[float] = None # Order in presentation def to_llm_context(self) -> Dict[str, Any]: """ Generate rich context for LLM consumption. Returns a dictionary with formatted values and contextual information optimized for language model understanding. """ # Format the value appropriately if self.numeric_value is not None: if self.unit.upper() in ['USD', 'EUR', 'GBP', 'JPY']: # Currency formatting formatted_value = f"{self.numeric_value:,.0f}" if self.scale: if self.scale == 1000: formatted_value += " thousand" elif self.scale == 1000000: formatted_value += " million" elif self.scale == 1000000000: formatted_value += " billion" else: formatted_value = f"{self.numeric_value:,.2f}" else: formatted_value = str(self.value) # Format the period if self.period_type == 'instant': period_desc = f"as of {self.period_end}" else: period_desc = f"for {self.fiscal_period} {self.fiscal_year}" if self.period_start and self.period_end: period_desc += f" ({self.period_start} to {self.period_end})" return { "concept": self.label, "value": formatted_value, "unit": self.unit, "period": period_desc, "context": self.business_context, "quality": self.data_quality.value, "confidence": self.confidence_score, "tags": self.semantic_tags, "source": f"{self.form_type} filed {self.filing_date}" if self.filing_date else "Unknown source", "is_audited": self.is_audited, "is_estimated": self.is_estimated, "dimensions": self.dimensions if self.dimensions else None } def get_display_period_key(self) -> str: """ Generate a display-friendly period key based on actual period dates. This method creates period keys like "Q1 2024" based on the actual period covered by the data, not the filing year. It uses the period_end date to determine the calendar year and quarter. Returns: A period key in format like "Q1 2024", "FY 2023", etc. """ if not self.period_end: # Fallback to fiscal year/period if no period_end return f"{self.fiscal_period} {self.fiscal_year}" # Extract calendar year from period_end calendar_year = self.period_end.year # For fiscal years, use "FY" prefix if self.fiscal_period == 'FY': return f"FY {calendar_year}" # For quarters, determine the calendar quarter from the end date if self.fiscal_period in ['Q1', 'Q2', 'Q3', 'Q4']: end_month = self.period_end.month # Map end month to calendar quarter if end_month in [1, 2, 3]: quarter = 'Q1' elif end_month in [4, 5, 6]: quarter = 'Q2' elif end_month in [7, 8, 9]: quarter = 'Q3' else: # 10, 11, 12 quarter = 'Q4' return f"{quarter} {calendar_year}" # For other periods, use the fiscal period with calendar year return f"{self.fiscal_period} {calendar_year}" def get_formatted_value(self) -> str: """ Format the numeric value for display, avoiding scientific notation. Returns: Formatted string representation of the value """ if self.numeric_value is None: return str(self.value) # For currency values if self.unit.upper() in ['USD', 'EUR', 'GBP', 'JPY', 'CAD', 'CHF']: # Round to nearest whole number for large values if abs(self.numeric_value) >= 1000: return f"{self.numeric_value:,.0f}" else: return f"{self.numeric_value:,.2f}" # For share counts elif self.unit.lower() in ['shares', 'share']: return f"{self.numeric_value:,.0f}" # For percentages and ratios elif self.unit.lower() in ['pure', 'percent', '%']: return f"{self.numeric_value:.2f}" # Default formatting else: if abs(self.numeric_value) >= 1000: return f"{self.numeric_value:,.0f}" else: return f"{self.numeric_value:,.2f}" def __repr__(self) -> str: """String representation focusing on key information""" value_str = f"{self.numeric_value:,.0f}" if self.numeric_value else str(self.value) return f"FinancialFact({self.concept}={value_str} {self.unit}, {self.fiscal_period} {self.fiscal_year})" @dataclass class ConceptMetadata: """ Metadata about a financial concept. This provides additional context about what a concept represents, how it's calculated, and how it relates to other concepts. """ concept: str # The concept identifier label: str # Primary display label definition: str # Detailed definition # Concept relationships parent_concepts: List[str] = field(default_factory=list) child_concepts: List[str] = field(default_factory=list) calculation_components: List[str] = field(default_factory=list) # Classification statement_type: Optional[str] = None # BalanceSheet, IncomeStatement, etc. is_monetary: bool = True is_duration: bool = True # True for flow concepts, False for stock concepts normal_balance: Optional[Literal['debit', 'credit']] = None # Usage guidance common_names: List[str] = field(default_factory=list) # Alternative labels usage_notes: str = '' # Special considerations typical_scale: Optional[int] = None # Common scale factor @dataclass class FactCollection: """ A collection of related facts, typically for a specific time period or statement. This is used internally to group facts for efficient processing and analysis. """ facts: List[FinancialFact] period_key: str # e.g., "2024-Q4", "2024-FY" statement_type: Optional[str] = None def get_fact(self, concept: str) -> Optional[FinancialFact]: """Get a specific fact by concept""" for fact in self.facts: if fact.concept == concept or fact.label == concept: return fact return None def to_dict(self) -> Dict[str, Any]: """Convert to dictionary keyed by concept""" return { fact.concept: { 'value': fact.numeric_value or fact.value, 'label': fact.label, 'unit': fact.unit } for fact in self.facts }