"""
Financial Statement wrapper classes with rich display and concept-aware formatting.
This module provides Statement classes that wrap pandas DataFrames with:
- Intelligent formatting based on financial concept types
- Rich display for professional presentation
- Access to underlying data for calculations
- LLM-ready context generation
"""
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
import pandas as pd
from rich.box import SIMPLE, SIMPLE_HEAVY
from rich.console import Group
from rich.padding import Padding
from rich.panel import Panel
from rich.table import Table
from rich.text import Text
from .terminal_styles import get_current_scheme
@dataclass
class ConceptFormatting:
"""Formatting rules for specific financial concepts"""
decimal_places: int = 2
show_currency: bool = True
scale_display: bool = True # Show M, B suffixes
percentage: bool = False
class FinancialStatement:
"""
A wrapper around pandas DataFrame for financial statements with intelligent formatting.
This class provides:
- Concept-aware formatting (EPS to 2 decimals, revenue in millions, etc.)
- Rich display for professional presentation
- Access to underlying numeric data
- LLM context generation
"""
# Formatting rules by concept pattern
CONCEPT_FORMATS = {
# Earnings per share - always show decimals
'earningspershare': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
'earnings per share': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
'eps': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
# Ratios and percentages
'ratio': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
'margin': ConceptFormatting(decimal_places=1, show_currency=False, scale_display=False, percentage=True),
'percent': ConceptFormatting(decimal_places=1, show_currency=False, scale_display=False, percentage=True),
# Per-share values
'per share': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
'pershare': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
'book value': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
'dividend': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
# Share counts - show full numbers with commas
'shares outstanding': ConceptFormatting(decimal_places=0, show_currency=False, scale_display=False),
'common stock': ConceptFormatting(decimal_places=0, show_currency=False, scale_display=False),
'weighted average': ConceptFormatting(decimal_places=0, show_currency=False, scale_display=False),
# Large financial amounts - show full numbers with commas
'revenue': ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False),
'income': ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False),
'assets': ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False),
'liabilities': ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False),
}
def __init__(self,
data: pd.DataFrame,
statement_type: str,
entity_name: str = "",
period_lengths: Optional[List[str]] = None,
mixed_periods: bool = False):
"""
Initialize financial statement.
Args:
data: DataFrame with financial data
statement_type: Type of statement (IncomeStatement, BalanceSheet, etc.)
entity_name: Company name
period_lengths: List of period lengths in the data
mixed_periods: Whether data contains mixed period lengths
"""
self.data = data
self.statement_type = statement_type
self.entity_name = entity_name
self.period_lengths = period_lengths or []
self.mixed_periods = mixed_periods
# Store original numeric data
self._numeric_data = data.copy()
def get_concept_formatting(self, concept_label: str) -> ConceptFormatting:
"""
Get formatting rules for a specific concept.
Args:
concept_label: Label of the financial concept
Returns:
ConceptFormatting rules for this concept
"""
label_lower = concept_label.lower()
# Check for exact matches first
for pattern, formatting in self.CONCEPT_FORMATS.items():
if pattern in label_lower:
return formatting
# Default formatting for large amounts - show full numbers with commas
return ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False)
def format_value(self, value: float, concept_label: str) -> str:
"""
Format a single value based on its concept.
Args:
value: Numeric value to format
concept_label: Label of the financial concept
Returns:
Formatted string representation
"""
if pd.isna(value):
return ''
formatting = self.get_concept_formatting(concept_label)
# Handle percentage formatting
if formatting.percentage:
return f"{value:.{formatting.decimal_places}f}%"
# Always use full number formatting with commas - no scaling to preserve precision
if formatting.show_currency:
return f"${value:,.{formatting.decimal_places}f}"
else:
return f"{value:,.{formatting.decimal_places}f}"
def _repr_html_(self) -> str:
"""
Rich HTML representation for Jupyter notebooks.
Returns:
HTML string for rich display
"""
# Create a formatted copy as string DataFrame
formatted_data = pd.DataFrame(index=self.data.index, columns=self.data.columns, dtype=str)
# Apply formatting to each cell
for index in self.data.index:
concept_label = str(index)
for column in self.data.columns:
value = self.data.loc[index, column]
if pd.notna(value) and isinstance(value, (int, float)):
formatted_data.loc[index, column] = self.format_value(value, concept_label)
else:
formatted_data.loc[index, column] = str(value) if pd.notna(value) else ''
# Create HTML with styling
html = f"""
{self.entity_name} - {self.statement_type.replace('Statement', ' Statement')}
"""
# Add period warning if mixed
if self.mixed_periods:
html += """
⚠️ Mixed Period Lengths: This statement contains periods of different lengths
({periods}). Consider filtering to comparable periods for accurate analysis.
""".format(periods=', '.join(self.period_lengths))
# Add the formatted table
html += formatted_data.to_html(classes='financial-statement',
table_id='fs-table',
escape=False)
# Add CSS styling
html += """
"""
return html
def __str__(self) -> str:
"""
String representation for console display.
Returns:
Formatted string representation
"""
# Create formatted version as string DataFrame
formatted_data = pd.DataFrame(index=self.data.index, columns=self.data.columns, dtype=str)
# Apply formatting to each cell
for index in self.data.index:
concept_label = str(index)
for column in self.data.columns:
value = self.data.loc[index, column]
if pd.notna(value) and isinstance(value, (int, float)):
formatted_data.loc[index, column] = self.format_value(value, concept_label)
else:
formatted_data.loc[index, column] = str(value) if pd.notna(value) else ''
header = f"\n{self.entity_name} - {self.statement_type.replace('Statement', ' Statement')}\n"
header += "=" * len(header.strip()) + "\n"
if self.mixed_periods:
header += f"⚠️ Mixed period lengths: {', '.join(self.period_lengths)}\n\n"
return header + str(formatted_data)
def __rich__(self):
"""Creates a rich representation for professional financial statement display."""
colors = get_current_scheme()
if self.data.empty:
return Panel(
Text("No data available", style=colors["empty_value"]),
title=f"📊 {self.statement_type.replace('Statement', ' Statement')}",
border_style=colors["panel_border"]
)
# Statement type icon mapping
icon_map = {
'IncomeStatement': '💰',
'BalanceSheet': '⚖️',
'CashFlow': '💵',
'Statement': '📊'
}
icon = icon_map.get(self.statement_type, '📊')
# Title with company name and statement type
if self.entity_name:
title = Text.assemble(
icon + " ",
(self.entity_name, colors["company_name"]),
" ",
(self.statement_type.replace('Statement', ' Statement'), colors["statement_type"])
)
else:
title = Text.assemble(
icon + " ",
(self.statement_type.replace('Statement', ' Statement'), colors["statement_type"])
)
# Create the main financial statement table
statement_table = Table(box=SIMPLE, show_header=True, padding=(0, 1))
statement_table.add_column("Line Item", style=colors["total_item"], no_wrap=True, max_width=30)
# Add period columns (limit to reasonable number for display)
periods = list(self.data.columns)
display_periods = periods[:6] # Show max 6 periods for readability
has_more_periods = len(periods) > 6
for period in display_periods:
statement_table.add_column(str(period), justify="right", max_width=15)
# Add rows with formatted values
for index in self.data.index:
concept_label = str(index)
# Truncate long concept names
display_label = concept_label[:28] + "..." if len(concept_label) > 30 else concept_label
row_values = [display_label]
for period in display_periods:
value = self.data.loc[index, period]
if pd.notna(value) and isinstance(value, (int, float)):
formatted_value = self.format_value(value, concept_label)
row_values.append(formatted_value)
else:
row_values.append("-" if pd.isna(value) else str(value)[:12])
statement_table.add_row(*row_values)
# Create summary info panel
info_table = Table(box=SIMPLE_HEAVY, show_header=False, padding=(0, 1))
info_table.add_column("Metric", style=colors["low_confidence_item"])
info_table.add_column("Value", style=colors["total_item"])
info_table.add_row("Line Items", f"{len(self.data.index):,}")
info_table.add_row("Periods", f"{len(self.data.columns):,}")
if self.period_lengths:
info_table.add_row("Period Types", ", ".join(set(self.period_lengths)))
info_panel = Panel(
info_table,
title="📋 Statement Info",
border_style="bright_black"
)
# Create period warning if needed
warning_panel = None
if self.mixed_periods:
warning_text = Text.assemble(
"⚠️ Mixed period lengths detected: ",
(", ".join(self.period_lengths), "yellow"),
"\nConsider filtering to comparable periods for accurate analysis."
)
warning_panel = Panel(
warning_text,
title="🚨 Period Warning",
border_style=colors.get("warning", "yellow")
)
# Subtitle with additional info
subtitle_parts = [f"{len(self.data.index):,} line items"]
if has_more_periods:
subtitle_parts.append(f"showing first {len(display_periods)} of {len(periods)} periods")
subtitle = " • ".join(subtitle_parts)
# Main statement panel
statement_panel = Panel(
statement_table,
title="📊 Financial Data",
subtitle=subtitle,
border_style="bright_black"
)
# Combine all panels
content_renderables = [
Padding("", (1, 0, 0, 0)),
info_panel
]
if warning_panel:
content_renderables.append(warning_panel)
content_renderables.append(statement_panel)
content = Group(*content_renderables)
return Panel(
content,
title=title,
border_style=colors["panel_border"]
)
def __repr__(self):
"""String representation using rich formatting."""
from edgar.richtools import repr_rich
return repr_rich(self.__rich__())
def to_numeric(self) -> pd.DataFrame:
"""
Get the underlying numeric DataFrame for calculations.
Returns:
DataFrame with original numeric values
"""
return self._numeric_data.copy()
def to_llm_context(self) -> Dict[str, Any]:
"""
Generate LLM-friendly context from the statement.
Returns:
Dictionary with structured financial data for LLM consumption
"""
context = {
"entity_name": self.entity_name,
"statement_type": self.statement_type,
"period_lengths": self.period_lengths,
"mixed_periods": self.mixed_periods,
"periods": list(self.data.columns),
"line_items": {}
}
# Convert each line item to LLM-friendly format
for index in self.data.index:
concept_label = str(index)
line_item = {
"label": concept_label,
"values": {},
"formatting": self.get_concept_formatting(concept_label).__dict__
}
for column in self.data.columns:
value = self.data.loc[index, column]
if pd.notna(value):
line_item["values"][str(column)] = {
"raw_value": float(value),
"formatted_value": self.format_value(value, concept_label)
}
context["line_items"][concept_label] = line_item
return context
def get_concept(self, concept_name: str) -> Optional[pd.Series]:
"""
Get data for a specific concept across all periods.
Args:
concept_name: Name of the concept to retrieve
Returns:
Series with values across periods, or None if not found
"""
# Try exact match first
if concept_name in self.data.index:
return self.data.loc[concept_name]
# Try case-insensitive partial match
concept_lower = concept_name.lower()
for index in self.data.index:
if concept_lower in str(index).lower():
return self.data.loc[index]
return None
def calculate_growth(self, concept_name: str, periods: int = 2) -> Optional[pd.Series]:
"""
Calculate period-over-period growth for a concept.
Args:
concept_name: Name of the concept
periods: Number of periods to calculate growth over
Returns:
Series with growth rates, or None if concept not found
"""
concept_data = self.get_concept(concept_name)
if concept_data is None:
return None
# Calculate percentage change
return concept_data.pct_change(periods=periods) * 100
@property
def shape(self) -> tuple:
"""Get the shape of the underlying data."""
return self.data.shape
@property
def columns(self) -> pd.Index:
"""Get the columns of the underlying data."""
return self.data.columns
@property
def index(self) -> pd.Index:
"""Get the index of the underlying data."""
return self.data.index
@property
def empty(self) -> bool:
"""Check if the underlying DataFrame is empty."""
return self.data.empty
def __len__(self) -> int:
"""Get the length of the underlying DataFrame."""
return len(self.data)