edgartools/venv/lib/python3.10/site-packages/edgar/entity/models.py

"""
Data models for the enhanced Entity Facts API.

This module provides the unified data models for financial facts,
optimized for both traditional analysis and AI consumption.
"""

from dataclasses import dataclass, field
from datetime import date
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union


class DataQuality(Enum):
    """Data quality indicators for facts"""
    HIGH = "high"          # Direct from XBRL, validated
    MEDIUM = "medium"      # Derived or calculated
    LOW = "low"           # Estimated or inferred


@dataclass
class FinancialFact:
    """
    Unified fact representation optimized for both traditional analysis and AI consumption.

    This class represents a single financial fact with rich contextual information,
    quality indicators, and AI-ready metadata.
    """

    # Core identification
    concept: str                    # Standardized concept (e.g., 'us-gaap:Revenue')
    taxonomy: str                   # Taxonomy namespace (us-gaap, ifrs, etc.)
    label: str                      # Human-readable label

    # Values with proper typing
    value: Union[float, int, str]   # The actual value
    numeric_value: Optional[float]  # Numeric representation for calculations
    unit: str                       # Unit of measure (USD, shares, etc.)
    scale: Optional[int] = None     # Scale factor (thousands=1000, millions=1000000)

    # Temporal context
    period_start: Optional[date] = None
    period_end: date = None
    period_type: Literal['instant', 'duration'] = 'instant'
    fiscal_year: int = 0
    fiscal_period: str = ''         # FY, Q1, Q2, Q3, Q4

    # Filing context
    filing_date: date = None
    form_type: str = ''             # 10-K, 10-Q, 8-K, etc.
    accession: str = ''             # SEC accession number

    # Quality and provenance
    data_quality: DataQuality = DataQuality.MEDIUM
    is_audited: bool = False
    is_restated: bool = False
    is_estimated: bool = False
    confidence_score: float = 0.8   # 0.0 to 1.0

    # AI-ready context
    semantic_tags: List[str] = field(default_factory=list)  # ['revenue', 'recurring', 'operating']
    business_context: str = ''      # "Product revenue from iPhone sales"
    calculation_context: Optional[str] = None  # "Derived from segment data"

    # Optional XBRL specifics
    context_ref: Optional[str] = None
    dimensions: Dict[str, str] = field(default_factory=dict)
    statement_type: Optional[str] = None
    line_item_sequence: Optional[int] = None

    # Structural metadata (from learned mappings)
    depth: Optional[int] = None            # Hierarchy depth in statement
    parent_concept: Optional[str] = None   # Parent concept in hierarchy
    section: Optional[str] = None          # Statement section (e.g., "Current Assets")
    is_abstract: bool = False              # Abstract/header item
    is_total: bool = False                 # Total/sum item
    presentation_order: Optional[float] = None  # Order in presentation

    def to_llm_context(self) -> Dict[str, Any]:
        """
        Generate rich context for LLM consumption.

        Returns a dictionary with formatted values and contextual information
        optimized for language model understanding.
        """
        # Format the value appropriately
        if self.numeric_value is not None:
            if self.unit.upper() in ['USD', 'EUR', 'GBP', 'JPY']:
                # Currency formatting
                formatted_value = f"{self.numeric_value:,.0f}"
                if self.scale:
                    if self.scale == 1000:
                        formatted_value += " thousand"
                    elif self.scale == 1000000:
                        formatted_value += " million"
                    elif self.scale == 1000000000:
                        formatted_value += " billion"
            else:
                formatted_value = f"{self.numeric_value:,.2f}"
        else:
            formatted_value = str(self.value)

        # Format the period
        if self.period_type == 'instant':
            period_desc = f"as of {self.period_end}"
        else:
            period_desc = f"for {self.fiscal_period} {self.fiscal_year}"
            if self.period_start and self.period_end:
                period_desc += f" ({self.period_start} to {self.period_end})"

        return {
            "concept": self.label,
            "value": formatted_value,
            "unit": self.unit,
            "period": period_desc,
            "context": self.business_context,
            "quality": self.data_quality.value,
            "confidence": self.confidence_score,
            "tags": self.semantic_tags,
            "source": f"{self.form_type} filed {self.filing_date}" if self.filing_date else "Unknown source",
            "is_audited": self.is_audited,
            "is_estimated": self.is_estimated,
            "dimensions": self.dimensions if self.dimensions else None
        }

    def get_display_period_key(self) -> str:
        """
        Generate a display-friendly period key based on actual period dates.

        This method creates period keys like "Q1 2024" based on the actual period
        covered by the data, not the filing year. It uses the period_end date to
        determine the calendar year and quarter.

        Returns:
            A period key in format like "Q1 2024", "FY 2023", etc.
        """
        if not self.period_end:
            # Fallback to fiscal year/period if no period_end
            return f"{self.fiscal_period} {self.fiscal_year}"

        # Extract calendar year from period_end
        calendar_year = self.period_end.year

        # For fiscal years, use "FY" prefix
        if self.fiscal_period == 'FY':
            return f"FY {calendar_year}"

        # For quarters, determine the calendar quarter from the end date
        if self.fiscal_period in ['Q1', 'Q2', 'Q3', 'Q4']:
            end_month = self.period_end.month

            # Map end month to calendar quarter
            if end_month in [1, 2, 3]:
                quarter = 'Q1'
            elif end_month in [4, 5, 6]:
                quarter = 'Q2'
            elif end_month in [7, 8, 9]:
                quarter = 'Q3'
            else:  # 10, 11, 12
                quarter = 'Q4'

            return f"{quarter} {calendar_year}"

        # For other periods, use the fiscal period with calendar year
        return f"{self.fiscal_period} {calendar_year}"

    def get_formatted_value(self) -> str:
        """
        Format the numeric value for display, avoiding scientific notation.

        Returns:
            Formatted string representation of the value
        """
        if self.numeric_value is None:
            return str(self.value)

        # For currency values
        if self.unit.upper() in ['USD', 'EUR', 'GBP', 'JPY', 'CAD', 'CHF']:
            # Round to nearest whole number for large values
            if abs(self.numeric_value) >= 1000:
                return f"{self.numeric_value:,.0f}"
            else:
                return f"{self.numeric_value:,.2f}"

        # For share counts
        elif self.unit.lower() in ['shares', 'share']:
            return f"{self.numeric_value:,.0f}"

        # For percentages and ratios
        elif self.unit.lower() in ['pure', 'percent', '%']:
            return f"{self.numeric_value:.2f}"

        # Default formatting
        else:
            if abs(self.numeric_value) >= 1000:
                return f"{self.numeric_value:,.0f}"
            else:
                return f"{self.numeric_value:,.2f}"

    def __repr__(self) -> str:
        """String representation focusing on key information"""
        value_str = f"{self.numeric_value:,.0f}" if self.numeric_value else str(self.value)
        return f"FinancialFact({self.concept}={value_str} {self.unit}, {self.fiscal_period} {self.fiscal_year})"


@dataclass
class ConceptMetadata:
    """
    Metadata about a financial concept.

    This provides additional context about what a concept represents,
    how it's calculated, and how it relates to other concepts.
    """
    concept: str                    # The concept identifier
    label: str                      # Primary display label
    definition: str                 # Detailed definition

    # Concept relationships
    parent_concepts: List[str] = field(default_factory=list)
    child_concepts: List[str] = field(default_factory=list)
    calculation_components: List[str] = field(default_factory=list)

    # Classification
    statement_type: Optional[str] = None  # BalanceSheet, IncomeStatement, etc.
    is_monetary: bool = True
    is_duration: bool = True        # True for flow concepts, False for stock concepts
    normal_balance: Optional[Literal['debit', 'credit']] = None

    # Usage guidance
    common_names: List[str] = field(default_factory=list)  # Alternative labels
    usage_notes: str = ''           # Special considerations
    typical_scale: Optional[int] = None  # Common scale factor


@dataclass
class FactCollection:
    """
    A collection of related facts, typically for a specific time period or statement.

    This is used internally to group facts for efficient processing and analysis.
    """
    facts: List[FinancialFact]
    period_key: str                 # e.g., "2024-Q4", "2024-FY"
    statement_type: Optional[str] = None

    def get_fact(self, concept: str) -> Optional[FinancialFact]:
        """Get a specific fact by concept"""
        for fact in self.facts:
            if fact.concept == concept or fact.label == concept:
                return fact
        return None

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary keyed by concept"""
        return {
            fact.concept: {
                'value': fact.numeric_value or fact.value,
                'label': fact.label,
                'unit': fact.unit
            }
            for fact in self.facts
        }