"""
Rendering functions for XBRL data.
This module provides functions for formatting and displaying XBRL data.
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import pandas as pd
from rich import box
from rich.console import Group
from rich.panel import Panel
from rich.table import Table as RichTable
from rich.text import Text
from edgar.files.html import Document
from edgar.formatting import cik_text
from edgar.richtools import repr_rich, rich_to_text
from edgar.xbrl import standardization
from edgar.xbrl.core import determine_dominant_scale, format_date, format_value, parse_date
# Import color schemes from entity package
try:
import os
from edgar.entity.terminal_styles import get_color_scheme
def get_xbrl_color_scheme():
"""Get XBRL-specific color scheme with filing as default."""
scheme_name = os.environ.get("EDGAR_FINANCIALS_COLOR_SCHEME", "filing") # Default to filing for XBRL
return get_color_scheme(scheme_name)
except ImportError:
# Fallback if terminal_styles not available
def get_xbrl_color_scheme():
return {
"abstract_item": "bold",
"total_item": "bold",
"regular_item": "",
"low_confidence_item": "dim",
"positive_value": "",
"negative_value": "",
"total_value_prefix": "bold",
"separator": "dim",
"company_name": "bold",
"statement_type": "bold",
"panel_border": "white",
"empty_value": "dim",
}
# Enhanced style configuration using XBRL color schemes
def get_xbrl_styles():
"""Get XBRL rendering styles based on current color scheme."""
colors = get_xbrl_color_scheme()
return {
'header': {
'company_name': colors['company_name'],
'statement_title': colors['statement_type'],
'top_level': colors['abstract_item'], # Major sections like ASSETS, LIABILITIES
'section': colors['total_item'], # Subtotals like Current assets
'subsection': colors['regular_item'] # Regular line items
},
'value': {
'positive': colors['positive_value'],
'negative': colors['negative_value'],
'total': colors['total_value_prefix'],
'empty': colors['empty_value']
},
'structure': {
'separator': colors['separator'],
'border': colors['panel_border'],
'abstract': colors['abstract_item'],
'total': colors['total_item'],
'regular': colors['regular_item'],
'low_confidence': colors['low_confidence_item']
},
'comparison': {
'increase': {'symbol': '▲', 'color': colors['positive_value']},
'decrease': {'symbol': '▼', 'color': colors['negative_value']},
'unchanged': {'symbol': '•', 'color': colors['separator']}
}
}
# Legacy fallback for existing code
DEFAULT_STYLES = get_xbrl_styles()
# Configuration for comparative analysis
COMPARISON_CONFIG = {
'threshold': 0.01, # 1% change threshold
'enabled_types': ['IncomeStatement', 'CashFlowStatement'], # Statement types to show comparisons for
'excluded_concepts': ['us-gaap_SharesOutstanding', 'us-gaap_CommonStockSharesOutstanding'] # Concepts to exclude
}
def _apply_style(text: str, style_config: dict) -> str:
"""Apply rich text styling based on configuration.
Args:
text: Text to style
style_config: Style configuration dictionary with 'style' and optional 'color' keys
Returns:
str: Styled text with rich markup
"""
style = style_config.get('style', 'none')
color = style_config.get('color', 'default')
case = style_config.get('case', 'none')
# Apply text case transformation
if case == 'upper':
text = text.upper()
elif case == 'title':
text = text.title()
# Build style tags
tags = []
if style == 'bold':
tags.append('bold')
if style == 'italic':
tags.append('italic')
if style == 'dim':
tags.append('dim')
if color != 'default':
tags.append(color)
# Apply styling
if tags:
return f"[{' '.join(tags)}]{text}[/{' '.join(tags)}]"
return text
def _calculate_comparison(current_value: Any, previous_value: Any) -> Optional[Tuple[float, str]]:
"""Calculate the percentage change between two values.
Args:
current_value: Current period value
previous_value: Previous period value
Returns:
Tuple of (percentage_change, comparison_symbol) or None if comparison not possible
"""
try:
if isinstance(current_value, str):
current_value = float(current_value.replace(',', ''))
if isinstance(previous_value, str):
previous_value = float(previous_value.replace(',', ''))
if previous_value == 0:
if current_value == 0:
return (0.0, 'unchanged')
return (float('inf'), 'increase' if current_value > 0 else 'decrease')
pct_change = (current_value - previous_value) / abs(previous_value)
if abs(pct_change) < COMPARISON_CONFIG['threshold']:
return (0.0, 'unchanged')
return (pct_change, 'increase' if pct_change > 0 else 'decrease')
except (ValueError, TypeError):
return None
@dataclass
class PeriodData:
"""Data about a single period for display in a statement."""
key: str # The period key (e.g., "instant_2023-12-31")
label: str # The formatted display label (e.g., "Dec 31, 2023")
end_date: Optional[str] = None # The end date in YYYY-MM-DD format
start_date: Optional[str] = None # The start date for duration periods
is_duration: bool = False # Whether this is a duration period
quarter: Optional[str] = None # Quarter identifier if applicable (Q1-Q4)
@dataclass
class StatementCell:
"""A single cell in a statement row."""
value: Any
style: Dict[str, str] = field(default_factory=dict) # Style attributes like color, bold, etc.
comparison: Optional[Dict[str, Any]] = None # Comparison info if applicable
# Custom formatter for the cell value
formatter: Callable[[Any], str] = str # Using built-in str function directly
def get_formatted_value(self) -> str:
return self.formatter(self.value)
@dataclass
class StatementRow:
"""A row in a financial statement."""
label: str
level: int # Indentation/hierarchy level
cells: List[StatementCell] = field(default_factory=list)
metadata: Dict[str, Any] = field(default_factory=dict) # Additional info like concept name, type, etc.
is_abstract: bool = False
is_dimension: bool = False
has_dimension_children: bool = False
@dataclass
class StatementHeader:
"""Header information for a financial statement."""
columns: List[str] = field(default_factory=list) # Period labels
period_keys: List[str] = field(default_factory=list) # Period keys for mapping to data
periods: List[PeriodData] = field(default_factory=list) # Detailed period information
metadata: Dict[str, Any] = field(default_factory=dict) # Info like date ranges, fiscal periods
@dataclass
class RenderedStatement:
"""Complete representation of a financial statement.
This class provides an intermediate representation of statement data
that can be used by different rendering backends (e.g. rich, web, etc).
"""
title: str
header: StatementHeader
rows: List[StatementRow]
metadata: Dict[str, Any] = field(default_factory=dict) # Statement-level metadata like units, scales
statement_type: str = ""
fiscal_period_indicator: Optional[str] = None
units_note: Optional[str] = None
@property
def periods(self):
return self.header.periods
def __rich__(self) -> RichTable:
"""Render as a rich table with professional styling"""
# Get professional color scheme
styles = get_xbrl_styles()
# Clean up title - remove internal terminology like "(Standardized)"
clean_title = self.title.replace("(Standardized)", "").strip()
# Build title hierarchy with improved visual design
title_parts = []
# Main title (bold, prominent)
title_parts.append(f"[{styles['header']['statement_title']}]{clean_title}[/{styles['header']['statement_title']}]")
# Subtitle: fiscal period indicator (normal weight)
if self.fiscal_period_indicator:
title_parts.append(f"{self.fiscal_period_indicator}")
# Units note (dim, subtle)
if self.units_note:
title_parts.append(f"[{styles['structure']['separator']}]{self.units_note}[/{styles['structure']['separator']}]")
# Create the table with clean title hierarchy
table = RichTable(title="\n".join(title_parts),
box=box.SIMPLE,
border_style=styles['structure']['border'])
# Add columns with right-alignment for numeric columns
table.add_column("", justify="left")
for column in self.header.columns:
# Apply styling to column headers
header_style = styles['structure']['total']
if header_style:
styled_column = Text(column, style=header_style)
table.add_column(styled_column)
else:
table.add_column(column)
# Add rows with professional styling
for row in self.rows:
# Format the label based on level and properties with professional colors
indent = " " * row.level
if row.is_dimension:
# Format dimension items with italic style
label_text = f"{indent}{row.label}"
style = styles['structure']['low_confidence']
styled_label = Text(label_text, style=style) if style else Text(label_text)
elif row.is_abstract:
if row.level == 0:
# Top-level header - major sections like ASSETS, LIABILITIES
label_text = row.label.upper()
style = styles['header']['top_level']
styled_label = Text(label_text, style=style) if style else Text(label_text)
elif row.level == 1:
# Section header - subtotals like Current assets
label_text = row.label
style = styles['header']['section']
styled_label = Text(label_text, style=style) if style else Text(label_text)
else:
# Sub-section header - indented, bold
sub_indent = " " * (row.level - 1)
label_text = f"{sub_indent}{row.label}"
style = styles['header']['subsection']
styled_label = Text(label_text, style=style) if style else Text(label_text)
else:
# Regular line items - indented based on level
if row.has_dimension_children and row.cells:
# Items with dimension children get bold styling and colon
label_text = f"{indent}{row.label}:"
style = styles['structure']['total']
styled_label = Text(label_text, style=style) if style else Text(label_text)
else:
# Regular line items
label_text = f"{indent}{row.label}"
style = styles['header']['subsection'] if styles['header']['subsection'] else None
styled_label = Text(label_text, style=style) if style else Text(label_text)
# Convert cells to their display representation with value-based styling
cell_values = []
for cell in row.cells:
if cell.value is None or cell.value == "":
# Empty values - create empty Text object
cell_values.append(Text("", justify="right"))
else:
# Format the cell value first
cell_value = cell.formatter(cell.value)
cell_str = str(cell_value)
# Determine the style to apply based on content
if row.is_abstract or "Total" in row.label:
# Totals get special styling
style = styles['value']['total']
elif cell_str.startswith('(') or cell_str.startswith('-') or cell_str.startswith('$('):
# Negative values
style = styles['value']['negative']
else:
# Positive values
style = styles['value']['positive']
# Create Rich Text object with proper styling
if style:
# Apply the style directly to the Text object
text_obj = Text(cell_str, style=style, justify="right")
else:
text_obj = Text(cell_str, justify="right")
cell_values.append(text_obj)
table.add_row(styled_label, *cell_values)
# Add footer metadata as table caption
footer_parts = []
# Extract metadata if available
company_name = self.metadata.get('company_name')
form_type = self.metadata.get('form_type')
period_end = self.metadata.get('period_end')
fiscal_period = self.metadata.get('fiscal_period')
# Build footer with available information
if company_name:
footer_parts.append(company_name)
if form_type:
footer_parts.append(f"Form {form_type}")
if period_end:
footer_parts.append(f"Period ending {period_end}")
if fiscal_period:
footer_parts.append(f"Fiscal {fiscal_period}")
# Always add source
footer_parts.append("Source: SEC XBRL")
# Apply dim styling to footer
if footer_parts:
footer_text = " • ".join(footer_parts)
table.caption = f"[{styles['structure']['separator']}]{footer_text}[/{styles['structure']['separator']}]"
return table
def __repr__(self):
return repr_rich(self.__rich__())
def __str__(self) -> str:
"""Convert to string with proper width to avoid truncation."""
from edgar.richtools import rich_to_text
return rich_to_text(self.__rich__(), width=150)
def to_dataframe(self, include_unit: bool = False, include_point_in_time: bool = False) -> Any:
"""Convert to a pandas DataFrame
Args:
include_unit: If True, add a 'unit' column with unit information (e.g., 'usd', 'shares', 'usdPerShare')
include_point_in_time: If True, add a 'point_in_time' boolean column (True for 'instant', False for 'duration')
Returns:
pd.DataFrame: DataFrame with statement data and optional unit/point-in-time columns
"""
try:
from edgar.xbrl.core import get_unit_display_name
from edgar.xbrl.core import is_point_in_time as get_is_point_in_time
# Create rows for the DataFrame
df_rows = []
# Create column map - use end_date from period data if available
column_map = {}
for i, period in enumerate(self.header.periods):
# Use date strings as column names if available
if period.end_date:
# Optional: add quarter info to column name
if period.quarter:
column_map[i] = f"{period.end_date} ({period.quarter})"
else:
column_map[i] = period.end_date
else:
# Fallback to the display label
column_map[i] = self.header.columns[i]
for row in self.rows:
df_row = {
'concept': row.metadata.get('concept', ''),
'label': row.label
}
# Add unit column if requested
if include_unit:
# Get units from row metadata
units_dict = row.metadata.get('units', {})
# Get the first non-None unit (all periods should have same unit for a given concept)
unit_ref = None
for period_key in self.header.period_keys:
if period_key in units_dict and units_dict[period_key] is not None:
unit_ref = units_dict[period_key]
break
# Convert to display name
df_row['unit'] = get_unit_display_name(unit_ref)
# Add point_in_time column if requested
if include_point_in_time:
# Get period_types from row metadata
period_types_dict = row.metadata.get('period_types', {})
# Get the first non-None period_type (all periods should have same type structure)
period_type = None
for period_key in self.header.period_keys:
if period_key in period_types_dict and period_types_dict[period_key] is not None:
period_type = period_types_dict[period_key]
break
# Convert to boolean
df_row['point_in_time'] = get_is_point_in_time(period_type)
# Add cell values using date string column names where available
for i, cell in enumerate(row.cells):
if i < len(self.header.periods):
column_name = column_map[i]
df_row[column_name] = cell.value
df_row['level'] = row.level
df_row['abstract'] = row.is_abstract
df_row['dimension'] = row.is_dimension
df_rows.append(df_row)
return pd.DataFrame(df_rows)
except ImportError:
return "Pandas is required for DataFrame conversion"
def to_markdown(self) -> str:
"""Convert to a markdown table representation"""
lines = []
# Add title as a header
lines.append(f"## {self.title}")
lines.append("")
# Add subtitle info if available
if self.fiscal_period_indicator or self.units_note:
subtitle_parts = []
if self.fiscal_period_indicator:
subtitle_parts.append(f"**{self.fiscal_period_indicator}**")
if self.units_note:
# Remove rich formatting tags from units note
clean_units = self.units_note.replace('[italic]', '').replace('[/italic]', '')
subtitle_parts.append(f"*{clean_units}*")
lines.append(" ".join(subtitle_parts))
lines.append("")
# Create header row
header = [""] + self.header.columns
lines.append("| " + " | ".join(header) + " |")
# Add separator row
separator = ["---"] + ["---" for _ in self.header.columns]
lines.append("| " + " | ".join(separator) + " |")
# Add data rows
for row in self.rows:
# Handle indentation for row label
indent = " " * row.level
# Format row label based on properties
if row.is_abstract:
label = f"**{indent}{row.label}**"
elif row.is_dimension:
label = f"*{indent}{row.label}*"
else:
label = f"{indent}{row.label}"
# Format cell values
cell_values = []
for cell in row.cells:
cell_value = cell.formatter(cell.value)
if cell_value is None or cell_value == "":
cell_values.append("")
elif isinstance(cell_value, Text):
cell_values.append(str(cell_value))
else:
cell_values.append(cell_value)
# Add the row
row_data = [label] + cell_values
lines.append("| " + " | ".join(row_data) + " |")
return "\n".join(lines)
def _format_comparison(pct_change: float, comparison_type: str) -> str:
"""Format a comparison indicator with the appropriate symbol and color.
Args:
pct_change: Percentage change value
comparison_type: Type of comparison ('increase', 'decrease', or 'unchanged')
Returns:
str: Formatted comparison indicator with rich markup
"""
style = DEFAULT_STYLES['comparison'][comparison_type]
color = style['color']
symbol = style['symbol']
if comparison_type != 'unchanged':
pct_text = f" {abs(pct_change):.1%}"
else:
pct_text = ""
return f"[{color}]{symbol}{pct_text}[/{color}]"
share_concepts = [
'us-gaap_CommonStockSharesOutstanding',
'us-gaap_WeightedAverageNumberOfSharesOutstandingBasic',
'us-gaap_WeightedAverageNumberOfSharesOutstandingDiluted',
'us-gaap_WeightedAverageNumberOfDilutedSharesOutstanding',
'us-gaap_CommonStockSharesIssued',
]
eps_concepts = [
'us-gaap_EarningsPerShareBasic',
'us-gaap_EarningsPerShareDiluted',
'us-gaap_EarningsPerShareBasicAndDiluted',
'us-gaap_IncomeLossFromContinuingOperationsPerBasicShare',
'us-gaap_IncomeLossFromContinuingOperationsPerDilutedShare',
'us-gaap_IncomeLossFromDiscontinuedOperationsNetOfTaxPerBasicShare',
'us-gaap_IncomeLossFromDiscontinuedOperationsNetOfTaxPerDilutedShare',
'us-gaap_NetAssetValuePerShare',
'us-gaap_BookValuePerShare',
'us-gaap_CommonStockDividendsPerShareDeclared',
'us-gaap_CommonStockDividendsPerShareCashPaid',
'us-gaap_CommonStockParOrStatedValuePerShare',
]
def _is_html(text: str) -> bool:
"""
Simple check to determine if a string contains HTML content.
Args:
text: The string to check
Returns:
bool: True if the string appears to contain HTML, False otherwise
"""
html_tags = ['
', '
', '', '', '', '', '', '']
text_lower = text.lower()
return any(tag in text_lower for tag in html_tags)
def html_to_text(html: str) -> str:
"""
Convert HTML to plain text.
Args:
html: HTML content to convert
Returns:
str: Plain text representation of the HTML
"""
# Wrap in html tag if not present
html = f"{html}" if not html.startswith("") else html
document = Document.parse(html)
return rich_to_text(document.__str__(), width=80)
def _format_period_labels(
periods_to_display: List[Tuple[str, str]],
entity_info: Dict[str, Any],
statement_type: str,
show_date_range: bool = False
) -> Tuple[List[PeriodData], Optional[str]]:
"""
Format period labels for display and determine fiscal period indicator.
This function processes period keys and labels to create human-readable period labels
for financial statements. When show_date_range=True, duration periods are displayed
with both start and end dates (e.g., "Jan 1, 2023 - Mar 31, 2023"). When
show_date_range=False (default), only the end date is shown (e.g., "Mar 31, 2023").
The function handles various input formats:
1. Period keys in standard format (instant_YYYY-MM-DD or duration_YYYY-MM-DD_YYYY-MM-DD)
2. Original labels with full or abbreviated month names
3. Special formatted labels with date range information
For quarterly periods, quarter numbers (Q1-Q4) are added to provide additional context.
Args:
periods_to_display: List of period keys and original labels
entity_info: Entity information dictionary
statement_type: Type of statement (BalanceSheet, IncomeStatement, etc.)
show_date_range: Whether to show full date ranges for duration periods
Returns:
Tuple of (formatted_periods, fiscal_period_indicator)
where formatted_periods is a list of PeriodData objects containing detailed period information
"""
formatted_periods = []
fiscal_period_indicator = None
# We get entity_info but don't currently use document_period_end_date
# Uncomment if needed: doc_period_end_date = entity_info.get('document_period_end_date')
# Analyze ALL periods to detect mixed period types (not just the first one)
period_types = []
is_balance_sheet = False
if periods_to_display:
# Check if this is a balance sheet (instant periods)
first_period_key = periods_to_display[0][0]
is_balance_sheet = first_period_key.startswith('instant_')
if is_balance_sheet:
# For Balance Sheet - simple "As of" indicator
fiscal_period_indicator = "As of"
# Include dates in the indicator if multiple periods
if len(periods_to_display) > 1:
try:
dates = []
for period_key, _ in periods_to_display:
if period_key.startswith('instant_'):
date_str = period_key.split('_')[1]
date_obj = parse_date(date_str)
dates.append(date_obj.strftime("%B %d, %Y"))
if len(dates) == 2:
fiscal_period_indicator = f"As of {dates[0]} and {dates[1]}"
else:
fiscal_period_indicator = "As of"
except (ValueError, TypeError, IndexError):
fiscal_period_indicator = "As of"
else:
# For Income/Cash Flow - analyze duration periods to detect mixed types
for period_key, _ in periods_to_display:
if not period_key.startswith('instant_') and '_' in period_key:
try:
parts = period_key.split('_')
if len(parts) >= 3:
start_date = parse_date(parts[1])
end_date = parse_date(parts[2])
duration_days = (end_date - start_date).days
# Categorize by duration
if 85 <= duration_days <= 95:
period_types.append("quarterly")
elif 175 <= duration_days <= 190:
period_types.append("semi-annual")
elif 265 <= duration_days <= 285:
period_types.append("nine-month")
elif 355 <= duration_days <= 375:
period_types.append("annual")
else:
period_types.append("other")
except (ValueError, TypeError, IndexError):
period_types.append("other")
# Generate fiscal period indicator based on detected types
unique_types = list(set(period_types))
if len(unique_types) == 1:
# Single period type
period_type = unique_types[0]
if period_type == "quarterly":
fiscal_period_indicator = "Three Months Ended"
elif period_type == "semi-annual":
fiscal_period_indicator = "Six Months Ended"
elif period_type == "nine-month":
fiscal_period_indicator = "Nine Months Ended"
elif period_type == "annual":
fiscal_period_indicator = "Year Ended"
else:
fiscal_period_indicator = "Period Ended"
elif "quarterly" in unique_types and "nine-month" in unique_types:
# Mixed quarterly and YTD - common for Q3 reports
fiscal_period_indicator = "Three and Nine Months Ended"
elif "quarterly" in unique_types and "semi-annual" in unique_types:
# Mixed quarterly and semi-annual - common for Q2 reports
fiscal_period_indicator = "Three and Six Months Ended"
elif "quarterly" in unique_types and "annual" in unique_types:
# Mixed quarterly and annual - common for Q4/year-end reports
fiscal_period_indicator = "Three Months and Year Ended"
elif len(unique_types) > 1:
# Other mixed types
fiscal_period_indicator = "Multiple Periods Ended"
else:
fiscal_period_indicator = "Period Ended"
# Create formatted period columns
for period_key, original_label in periods_to_display:
# Extract start/end dates from duration periods for date range display
start_date_obj = None
end_date_obj = None
start_date_str = None
end_date_str = None
is_duration = False
duration_days = 0
q_num = None
# Parse dates from period key for duration periods
if not period_key.startswith('instant_') and '_' in period_key and len(period_key.split('_')) >= 3:
parts = period_key.split('_')
try:
start_date_str = parts[1]
end_date_str = parts[2]
start_date_obj = parse_date(start_date_str)
end_date_obj = parse_date(end_date_str)
is_duration = True
duration_days = (end_date_obj - start_date_obj).days
# Determine quarter number for quarterly periods
if 80 <= duration_days <= 100: # Quarterly period
month = end_date_obj.month
if month <= 3 or month == 12:
q_num = "Q1"
elif month <= 6:
q_num = "Q2"
elif month <= 9:
q_num = "Q3"
else:
q_num = "Q4"
except (ValueError, TypeError, IndexError):
pass
# For instant periods, extract the date
elif period_key.startswith('instant_'):
try:
end_date_str = period_key.split('_')[1]
end_date_obj = parse_date(end_date_str)
except (ValueError, TypeError, IndexError):
pass
# Start with the original label or an empty string
final_label = ""
# First check for date range labels with "to" - prioritize this check
if original_label and 'to' in original_label:
# Handle date range labels like "Annual: September 25, 2022 to September 30, 2023"
try:
parts = original_label.split(' to ')
if len(parts) > 1:
if show_date_range:
# Use the full date range that's already in the label
final_label = original_label
else:
# Extract just the end date when show_date_range is False
end_date_display_str = parts[1].strip()
try:
if not end_date_obj: # If we don't already have end_date from period_key
end_date_obj = parse_date(end_date_display_str)
if end_date_obj:
end_date_str = end_date_obj.strftime('%Y-%m-%d')
final_label = format_date(end_date_obj)
# Add quarter info if available
if q_num and statement_type in ['IncomeStatement', 'CashFlowStatement']:
final_label = f"{final_label} ({q_num})"
# Add YTD indicator for year-to-date periods
elif duration_days and statement_type in ['IncomeStatement', 'CashFlowStatement']:
if 175 <= duration_days <= 190: # ~6 months
final_label = f"{final_label} (YTD)"
elif 265 <= duration_days <= 285: # ~9 months
final_label = f"{final_label} (YTD)"
except (ValueError, TypeError):
# If we can't parse the end date, use the original label
final_label = end_date_display_str
# Try to parse start date if we're dealing with a duration
if is_duration and not start_date_str and 'to' in original_label:
try:
start_date_display_str = parts[0].split(':')[-1].strip()
start_date_tmp = parse_date(start_date_display_str)
if start_date_tmp:
start_date_str = start_date_tmp.strftime('%Y-%m-%d')
except (ValueError, TypeError, IndexError):
pass
except (ValueError, TypeError, IndexError):
# If any parsing fails, leave label unchanged
final_label = original_label
# Case 1: If we still don't have a final label and have a date with commas, process it
elif not final_label and original_label and ',' in original_label:
for full_month, abbr in [
('January', 'Jan'), ('February', 'Feb'), ('March', 'Mar'),
('April', 'Apr'), ('May', 'May'), ('June', 'Jun'),
('July', 'Jul'), ('August', 'Aug'), ('September', 'Sep'),
('October', 'Oct'), ('November', 'Nov'), ('December', 'Dec')
]:
if full_month in original_label:
try:
# Extract year from the original label
year = int(''.join(c for c in original_label.split(',')[1] if c.isdigit()))
# Extract day - find digits after the month
day_part = original_label.split(full_month)[1].strip()
day = int(''.join(c for c in day_part.split(',')[0] if c.isdigit()))
month_num = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6,
'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12}[abbr]
try:
date_obj = datetime(year, month_num, day).date()
# If we don't already have an end date from the period key
if not end_date_obj:
end_date_obj = date_obj
end_date_str = date_obj.strftime('%Y-%m-%d')
final_label = format_date(date_obj)
# If showing date range and we have start_date for duration, use it
if show_date_range and is_duration and start_date_obj:
final_label = f"{format_date(start_date_obj)} - {final_label}"
break
except ValueError:
# Handle invalid dates
if day > 28:
if month_num == 2: # February
day = 28 if year % 4 != 0 else 29
elif month_num in [4, 6, 9, 11]: # 30-day months
day = 30
else: # 31-day months
day = 31
try:
date_obj = datetime(year, month_num, day).date()
# If we don't already have an end date from the period key
if not end_date_obj:
end_date_obj = date_obj
end_date_str = date_obj.strftime('%Y-%m-%d')
final_label = format_date(date_obj)
# If showing date range and we have start_date for duration, use it
if show_date_range and is_duration and start_date_obj:
final_label = f"{format_date(start_date_obj)} - {final_label}"
break
except ValueError:
pass
except (ValueError, IndexError):
pass
# If we couldn't extract a date but label has abbreviated month, use the original
if not final_label:
for abbr in ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']:
if abbr in original_label:
final_label = original_label
break
# If no month abbreviation was found, use original label
if not final_label:
final_label = original_label
# Case 2: Handle other special formatted period labels like those with colons
elif original_label and len(original_label) > 4 and not final_label:
if ':' in original_label:
# Labels with prefix like "Annual:" but without "to"
final_label = original_label
else:
# Any other labels we couldn't handle
final_label = original_label
# Case 3: Either use existing final_label (possibly from Case 1/2) or extract from period key
# If final_label is set but we want date range for a duration period, check if we need to add the start date
if final_label and show_date_range and is_duration and start_date_obj and end_date_obj:
# Check if the final_label already includes a date range (contains a hyphen)
if "-" not in final_label:
# If it's not already a date range, it's likely just the end date
# Try to detect if the label contains the formatted end date
end_date_formatted = format_date(end_date_obj)
if end_date_formatted in final_label:
# Replace the end date with the full range
full_range = f"{format_date(start_date_obj)} - {end_date_formatted}"
final_label = final_label.replace(end_date_formatted, full_range)
else:
# If we can't detect the end date pattern, prepend the start date
final_label = f"{format_date(start_date_obj)} - {final_label}"
# If we have quarter info, ensure it's present for income/cash flow statements
if q_num and statement_type in ['IncomeStatement', 'CashFlowStatement'] and f"({q_num})" not in final_label:
final_label = f"{final_label} ({q_num})"
# Add YTD indicator for year-to-date periods if not already added
elif duration_days and statement_type in ['IncomeStatement', 'CashFlowStatement'] and "(YTD)" not in final_label:
if 175 <= duration_days <= 190: # ~6 months
final_label = f"{final_label} (YTD)"
elif 265 <= duration_days <= 285: # ~9 months
final_label = f"{final_label} (YTD)"
# If we don't have a final_label yet, process based on period key
if not final_label:
if period_key.startswith('instant_'):
# For instant periods, just use the date
if end_date_obj:
final_label = format_date(end_date_obj)
else:
final_label = original_label
elif is_duration:
# For duration periods, format based on show_date_range
if show_date_range and start_date_obj and end_date_obj:
final_label = f"{format_date(start_date_obj)} - {format_date(end_date_obj)}"
# Add quarter info if available
if q_num and statement_type in ['IncomeStatement', 'CashFlowStatement']:
final_label = f"{final_label} ({q_num})"
# Add YTD indicator for year-to-date periods
elif duration_days and statement_type in ['IncomeStatement', 'CashFlowStatement']:
if 175 <= duration_days <= 190: # ~6 months
final_label = f"{final_label} (YTD)"
elif 265 <= duration_days <= 285: # ~9 months
final_label = f"{final_label} (YTD)"
elif end_date_obj:
final_label = format_date(end_date_obj)
# Add quarter info if available
if q_num and statement_type in ['IncomeStatement', 'CashFlowStatement']:
final_label = f"{final_label} ({q_num})"
# Add YTD indicator for year-to-date periods
elif duration_days and statement_type in ['IncomeStatement', 'CashFlowStatement']:
if 175 <= duration_days <= 190: # ~6 months
final_label = f"{final_label} (YTD)"
elif 265 <= duration_days <= 285: # ~9 months
final_label = f"{final_label} (YTD)"
else:
final_label = original_label
else:
# Fall back to original label for anything else
final_label = original_label
# Create PeriodData object with all the information
period_data = PeriodData(
key=period_key,
label=final_label,
end_date=end_date_str,
start_date=start_date_str if is_duration else None,
is_duration=is_duration,
quarter=q_num
)
# Add the formatted period to the result
formatted_periods.append(period_data)
return formatted_periods, fiscal_period_indicator
def _create_units_note(
is_monetary_statement: bool,
dominant_scale: int,
shares_scale: Optional[int]
) -> str:
"""
Create the units note for the statement title.
Args:
is_monetary_statement: Whether the statement contains monetary values
dominant_scale: The dominant scale for monetary values
shares_scale: The scale for share values, if present
Returns:
str: Formatted units note or empty string
"""
if not is_monetary_statement:
return ""
monetary_scale_text = ""
if dominant_scale == -3:
monetary_scale_text = "thousands"
elif dominant_scale == -6:
monetary_scale_text = "millions"
elif dominant_scale == -9:
monetary_scale_text = "billions"
shares_scale_text = ""
if shares_scale is not None:
if shares_scale == -3:
shares_scale_text = "thousands"
elif shares_scale == -6:
shares_scale_text = "millions"
elif shares_scale == -9:
shares_scale_text = "billions"
elif shares_scale == 0:
shares_scale_text = "actual amounts"
else:
# For other negative scales (like -4, -5, -7, etc.)
# Use a more generic description based on the scale
scale_factor = 10 ** (-shares_scale)
if scale_factor >= 1000:
shares_scale_text = f"scaled by {scale_factor:,}"
# Construct appropriate units note
if monetary_scale_text and shares_scale_text and shares_scale != dominant_scale:
return f"[italic](In {monetary_scale_text}, except shares in {shares_scale_text} and per share data)[/italic]"
elif monetary_scale_text:
return f"[italic](In {monetary_scale_text}, except shares and per share data)[/italic]"
else:
return ""
def _format_value_for_display_as_string(
value: Any,
item: Dict[str, Any],
period_key: str,
is_monetary_statement: bool,
dominant_scale: int,
shares_scale: Optional[int],
comparison_info: Optional[Dict[str, Any]] = None,
xbrl_instance: Optional[Any] = None
) -> str:
"""
Format a value for display in a financial statement, returning a string.
Args:
value: The value to format
item: The statement line item containing the value
period_key: The period key for this value
is_monetary_statement: Whether this is a monetary statement
dominant_scale: The dominant scale for monetary values
shares_scale: The scale for share values, if present
comparison_info: Optional comparison information for showing trends
Returns:
str: Formatted value as a string
"""
# Fast path for empty values
if not value or value == "":
return ""
# Type check without multiple isinstance calls
value_type = type(value)
if value_type not in (int, float, str):
return ""
# Extract only needed metadata
concept = item.get('concept', '')
# Fast check for common share and EPS concepts
is_share_value = concept in share_concepts
is_eps_value = concept in eps_concepts
# Only perform expensive label operations if needed for monetary determination
is_monetary = is_monetary_statement
if is_eps_value or is_share_value:
is_monetary = False
elif not is_monetary:
# Skip label checks entirely if we already know it's not monetary
pass
else:
# Only check label for ratio-related items if we think it might be monetary
label = item.get('label', '').lower()
if any(keyword in label for keyword in ('ratio', 'percentage', 'per cent')):
is_monetary = False
# Get decimals with a default value to avoid conditional logic later
fact_decimals = 0
if period_key:
decimals_dict = item.get('decimals', {})
if decimals_dict:
fact_decimals = decimals_dict.get(period_key, 0) or 0
# Apply presentation logic for display (Issue #463)
# Matches SEC HTML filing display - uses preferred_sign from presentation linkbase
if value_type in (int, float) and period_key:
# Get statement context
statement_type = item.get('statement_type')
# For Income Statement and Cash Flow Statement: Use preferred_sign
# preferred_sign comes from preferredLabel in presentation linkbase
# -1 = negate for display (e.g., expenses, dividends, outflows)
# 1 = show as-is
# None = no transformation specified
if statement_type in ('IncomeStatement', 'CashFlowStatement'):
preferred_sign = item.get('preferred_signs', {}).get(period_key)
if preferred_sign is not None and preferred_sign != 0:
value = value * preferred_sign
# Balance Sheet: No transformation (use as-is)
# else: pass
# Format numeric values efficiently
if value_type in (int, float):
# Handle EPS values with decimal precision
if is_eps_value:
# EPS values should show 2-3 decimal places and not be scaled
if abs(value) >= 1000:
# For very large EPS values, use thousands separator
return f"{value:,.2f}"
elif abs(value) >= 10:
# For EPS values >= 10, use 2 decimal places
return f"{value:.2f}"
else:
# For typical EPS values < 10, use up to 3 decimal places but remove trailing zeros
formatted = f"{value:.3f}".rstrip('0').rstrip('.')
# Ensure at least 2 decimal places for EPS
if '.' not in formatted or len(formatted.split('.')[1]) < 2:
return f"{value:.2f}"
return formatted
# Handle share values with a specialized path
elif is_share_value:
if fact_decimals <= -3:
# Efficiently apply scaling
scale_factor = 10 ** (-fact_decimals)
scaled_value = value / scale_factor
return f"{scaled_value:,.0f}"
else:
# For smaller share values, no scaling needed
return f"{value:,.0f}"
else:
# Use cached format_value function for other values
# Get currency symbol for this period using on-demand resolution
currency_symbol = None
if is_monetary and period_key and xbrl_instance:
from edgar.xbrl.core import get_currency_symbol
# Get element name from item
element_name = item.get('name') or item.get('concept', '')
if element_name:
currency_measure = xbrl_instance.get_currency_for_fact(element_name, period_key)
if currency_measure:
currency_symbol = get_currency_symbol(currency_measure)
return format_value(value, is_monetary, dominant_scale, fact_decimals, currency_symbol)
else:
# String values - only check HTML if it might contain tags
if '<' in value and '>' in value and _is_html(value):
return html_to_text(value)
return value
def _format_value_for_display(
value: Any,
item: Dict[str, Any],
period_key: str,
is_monetary_statement: bool,
dominant_scale: int,
shares_scale: Optional[int],
comparison_info: Optional[Dict[str, Any]] = None,
xbrl_instance: Optional[Any] = None
) -> Text:
"""
Format a value for display in a financial statement, returning a Rich Text object.
Args:
value: The value to format
item: The statement line item containing the value
period_key: The period key for this value
is_monetary_statement: Whether this is a monetary statement
dominant_scale: The dominant scale for monetary values
shares_scale: The scale for share values, if present
comparison_info: Optional comparison information for showing trends
Returns:
Text: Formatted value as a Rich Text object
"""
# Get the formatted string value
formatted_str = _format_value_for_display_as_string(
value, item, period_key, is_monetary_statement, dominant_scale, shares_scale, comparison_info, xbrl_instance
)
# Convert to Rich Text object with right justification
return Text(formatted_str, justify="right")
def _filter_empty_string_periods(statement_data: List[Dict[str, Any]], periods_to_display: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
"""
Filter out periods that contain only empty strings in their values.
This addresses Issue #408 specifically - periods that have facts but only empty string values.
This is a lighter filter than the full data availability system, targeting the specific problem.
Args:
statement_data: Statement data with items and values
periods_to_display: List of period keys and labels
Returns:
Filtered list of periods that contain meaningful financial data
"""
if not statement_data or not periods_to_display:
return periods_to_display
filtered_periods = []
for period_key, period_label in periods_to_display:
has_meaningful_value = False
# Check all statement items for this period
for item in statement_data:
values = item.get('values', {})
value = values.get(period_key)
if value is not None:
# Convert to string and check if it's meaningful
str_value = str(value).strip()
# Check for actual content (not just empty strings)
if str_value and str_value.lower() not in ['', 'nan', 'none']:
# Try to parse as numeric - if successful, it's meaningful
try:
numeric_value = pd.to_numeric(str_value, errors='coerce')
if not pd.isna(numeric_value):
has_meaningful_value = True
break
except Exception:
# If not numeric but has content, still count as meaningful
if len(str_value) > 0:
has_meaningful_value = True
break
# Only include periods that have at least some meaningful values
if has_meaningful_value:
filtered_periods.append((period_key, period_label))
return filtered_periods
def render_statement(
statement_data: List[Dict[str, Any]],
periods_to_display: List[Tuple[str, str]],
statement_title: str,
statement_type: str,
entity_info: Optional[Dict[str, Any]] = None,
standard: bool = True,
show_date_range: bool = False,
show_comparisons: bool = True,
xbrl_instance: Optional[Any] = None
) -> RenderedStatement:
"""
Render a financial statement as a structured intermediate representation.
Args:
statement_data: Statement data with items and values
periods_to_display: List of period keys and labels
statement_title: Title of the statement
statement_type: Type of statement (BalanceSheet, IncomeStatement, etc.)
entity_info: Entity information (optional)
standard: Whether to use standardized concept labels (default: True)
show_date_range: Whether to show full date ranges for duration periods (default: False)
show_comparisons: Whether to show period-to-period comparisons (default: True)
Returns:
RenderedStatement: A structured representation of the statement that can be rendered
in various formats
"""
if entity_info is None:
entity_info = {}
# Filter out periods with only empty strings (Fix for Issue #408)
# Apply to all major financial statement types that could have empty periods
if statement_type in ['CashFlowStatement', 'IncomeStatement', 'BalanceSheet']:
periods_to_display = _filter_empty_string_periods(statement_data, periods_to_display)
# Apply standardization if requested
if standard:
# Create a concept mapper with default mappings
mapper = standardization.ConceptMapper(standardization.initialize_default_mappings())
# Add statement type to context for better mapping
for item in statement_data:
item['statement_type'] = statement_type
# Standardize the statement data
statement_data = standardization.standardize_statement(statement_data, mapper)
# Update facts with standardized labels if XBRL instance is available
entity_xbrl_instance = entity_info.get('xbrl_instance')
# Use passed xbrl_instance or fall back to entity info
facts_xbrl_instance = xbrl_instance or entity_xbrl_instance
if facts_xbrl_instance and hasattr(facts_xbrl_instance, 'facts_view'):
facts_view = facts_xbrl_instance.facts_view
facts = facts_view.get_facts()
# Create a mapping of concept -> standardized label from statement data
standardization_map = {}
for item in statement_data:
if 'concept' in item and 'label' in item and 'original_label' in item:
if item.get('is_dimension', False):
continue
standardization_map[item['concept']] = {
'label': item['label'],
'original_label': item['original_label']
}
# Update facts with standardized labels
for fact in facts:
if 'concept' in fact and fact['concept'] in standardization_map:
mapping = standardization_map[fact['concept']]
if fact.get('label') == mapping.get('original_label'):
# Store original label if not already set
if 'original_label' not in fact:
fact['original_label'] = fact['label']
# Update with standardized label
fact['label'] = mapping['label']
# Clear the cache to ensure it's rebuilt with updated facts
facts_view.clear_cache()
# Indicate that standardization is being used in the title
statement_title = f"{statement_title} (Standardized)"
# Determine if this is likely a monetary statement
is_monetary_statement = statement_type in ['BalanceSheet', 'IncomeStatement', 'CashFlowStatement']
# Format period headers, but keep original tuples for now (we'll use the fully parsed objects later)
# These are now PeriodData objects but we'll continue with string period_keys for compatibility
formatted_period_objects_initial, fiscal_period_indicator = _format_period_labels(
periods_to_display, entity_info, statement_type, show_date_range
)
formatted_periods = [(p.key, p.label) for p in formatted_period_objects_initial]
# Determine the dominant scale for monetary values in this statement
dominant_scale = determine_dominant_scale(statement_data, periods_to_display)
# Determine the scale used for share amounts if present
shares_scale = None
# Look for share-related concepts to determine their scaling from the decimals attribute
for item in statement_data:
concept = item.get('concept', '')
if concept in share_concepts:
# Check decimals attribute to determine proper scaling
for period_key, _ in periods_to_display:
decimals = item.get('decimals', {}).get(period_key)
if isinstance(decimals, int) and decimals <= 0:
# Use the decimals attribute to determine the scale
# For shares, decimals is typically negative
# -3 means thousands, -6 means millions, etc.
shares_scale = decimals
break
if shares_scale is not None:
break
# Create the units note
units_note = _create_units_note(is_monetary_statement, dominant_scale, shares_scale)
# Extract period metadata for each period for better filtering
period_metadatas = []
for period_key, period_label in formatted_periods:
# Try to extract dates from the period key
if period_key.startswith('instant_'):
try:
date_str = period_key.split('_')[1]
date = parse_date(date_str)
period_metadatas.append({
'key': period_key,
'label': period_label,
'type': 'instant',
'date': date,
'end_date': date, # Use same date as end_date for comparison
'has_metadata': True
})
continue
except (ValueError, TypeError, IndexError):
pass
elif '_' in period_key and len(period_key.split('_')) >= 3:
try:
parts = period_key.split('_')
start_date_str = parts[1]
end_date_str = parts[2]
start_date = parse_date(start_date_str)
end_date = parse_date(end_date_str)
duration_days = (end_date - start_date).days
period_metadatas.append({
'key': period_key,
'label': period_label,
'type': 'duration',
'start_date': start_date,
'end_date': end_date,
'duration_days': duration_days,
'has_metadata': True
})
continue
except (ValueError, TypeError, IndexError):
pass
# If we get here, we couldn't extract meaningful metadata
period_metadatas.append({
'key': period_key,
'label': period_label,
'type': 'unknown',
'has_metadata': False
})
# Calculate data density and prepare comparison data
period_value_counts = {period_key: 0 for period_key, _ in formatted_periods}
period_item_counts = {period_key: 0 for period_key, _ in formatted_periods}
# Prepare comparison data if enabled and appropriate for statement type
comparison_data = {}
if show_comparisons and statement_type in COMPARISON_CONFIG['enabled_types']:
# Sort periods by date for proper comparison
sorted_periods = sorted(
[m for m in period_metadatas if m['has_metadata']],
key=lambda x: x['end_date'],
reverse=True
)
# For each item, calculate comparisons between consecutive periods
for item in statement_data:
if item.get('is_abstract') or not item.get('has_values'):
continue
concept = item.get('concept')
if not concept or concept in COMPARISON_CONFIG['excluded_concepts']:
continue
item_comparisons = {}
for i in range(len(sorted_periods) - 1):
current_period = sorted_periods[i]
prev_period = sorted_periods[i + 1]
current_value = item['values'].get(current_period['key'])
prev_value = item['values'].get(prev_period['key'])
comparison = _calculate_comparison(current_value, prev_value)
if comparison:
item_comparisons[current_period['key']] = comparison
if item_comparisons:
comparison_data[item['concept']] = item_comparisons
# Count non-empty values for each period
for item in statement_data:
# Skip abstract items as they typically don't have values
if item.get('is_abstract', False):
continue
# Skip items with brackets in labels (usually axis/dimension items)
if any(bracket in item['label'] for bracket in ['[Axis]', '[Domain]', '[Member]', '[Line Items]', '[Table]', '[Abstract]']):
continue
for period_key, _ in formatted_periods:
# Count this item for the period
period_item_counts[period_key] += 1
# Check if it has a value
value = item['values'].get(period_key)
if value not in (None, "", 0): # Consider 0 as a value for financial statements
period_value_counts[period_key] += 1
# Calculate percentage of non-empty values for each period
for metadata in period_metadatas:
period_key = metadata['key']
count = period_item_counts.get(period_key, 0)
if count > 0:
data_density = period_value_counts.get(period_key, 0) / count
else:
data_density = 0
metadata['data_density'] = data_density
metadata['num_values'] = period_value_counts.get(period_key, 0)
metadata['total_items'] = count
# Get the PeriodData objects from _format_period_labels
formatted_period_objects, fiscal_period_indicator = _format_period_labels(
periods_to_display, entity_info, statement_type, show_date_range
)
# Create the RenderedStatement and its header
header = StatementHeader(
columns=[period.label for period in formatted_period_objects],
period_keys=[period.key for period in formatted_period_objects],
periods=formatted_period_objects,
metadata={
'dominant_scale': dominant_scale,
'shares_scale': shares_scale,
'is_monetary_statement': is_monetary_statement,
'period_metadatas': period_metadatas
}
)
# Extract footer information from XBRL and entity info
footer_metadata = {}
# Extract company name
if hasattr(xbrl_instance, 'entity_name') and xbrl_instance.entity_name:
footer_metadata['company_name'] = xbrl_instance.entity_name
elif hasattr(xbrl_instance, 'company_name') and xbrl_instance.company_name:
footer_metadata['company_name'] = xbrl_instance.company_name
# Extract form type and periods
if hasattr(xbrl_instance, 'form_type') and xbrl_instance.form_type:
footer_metadata['form_type'] = xbrl_instance.form_type
if hasattr(xbrl_instance, 'period_of_report') and xbrl_instance.period_of_report:
footer_metadata['period_end'] = str(xbrl_instance.period_of_report)
if entity_info and entity_info.get('fiscal_period'):
footer_metadata['fiscal_period'] = entity_info.get('fiscal_period')
rendered_statement = RenderedStatement(
title=statement_title,
header=header,
rows=[],
metadata={
'standard': standard,
'show_date_range': show_date_range,
'entity_info': entity_info,
'comparison_data': comparison_data,
**footer_metadata # Add footer metadata
},
statement_type=statement_type,
fiscal_period_indicator=fiscal_period_indicator,
units_note=units_note
)
# Issue #450: For Statement of Equity, track concept occurrences to determine beginning vs ending balances
concept_occurrence_count = {}
if statement_type == 'StatementOfEquity':
for item in statement_data:
concept = item.get('concept', '')
if concept:
concept_occurrence_count[concept] = concept_occurrence_count.get(concept, 0) + 1
concept_current_index = {}
# Detect if this statement has dimensional display (for Member filtering logic)
has_dimensional_display = any(item.get('is_dimension', False) for item in statement_data)
# Process and add rows
for _index, item in enumerate(statement_data):
# Skip rows with no values if they're abstract (headers without data)
# But keep abstract items with children (section headers)
has_children = len(item.get('children', [])) > 0 or item.get('has_dimension_children', False)
if not item.get('has_values', False) and item.get('is_abstract') and not has_children:
continue
# Skip axis/dimension items (they contain brackets in their labels OR concept ends with these suffixes)
# Issue #450: Also filter based on concept name to catch dimensional members without bracket labels
concept = item.get('concept', '')
if any(bracket in item['label'] for bracket in ['[Axis]', '[Domain]', '[Member]', '[Line Items]', '[Table]', '[Abstract]']):
continue
if any(concept.endswith(suffix) for suffix in ['Axis', 'Domain', 'Member', 'LineItems', 'Table']):
# Issue #450: For Statement of Equity, Members are always structural (column headers), never data
if statement_type == 'StatementOfEquity':
continue
# Issue #416: For dimensional displays, keep Members even without values (they're category headers)
# For non-dimensional displays, only filter if no values
if not has_dimensional_display and not item.get('has_values', False):
continue
# Track which occurrence of this concept we're on
if concept:
concept_current_index[concept] = concept_current_index.get(concept, 0) + 1
# Remove [Abstract] from label if present
label = item['label'].replace(' [Abstract]', '')
level = item['level']
# Issue #450: For Statement of Equity, add "Beginning balance" / "Ending balance"
# to labels when concept appears multiple times (e.g., Total Stockholders' Equity)
if statement_type == 'StatementOfEquity' and concept:
total_occurrences = concept_occurrence_count.get(concept, 1)
current_occurrence = concept_current_index.get(concept, 1)
if total_occurrences > 1:
if current_occurrence == 1:
label = f"{label} - Beginning balance"
elif current_occurrence == total_occurrences:
label = f"{label} - Ending balance"
# Create the row with metadata
row = StatementRow(
label=label,
level=level,
cells=[],
metadata={
'concept': item.get('concept', ''),
'has_values': item.get('has_values', False),
'children': item.get('children', []),
'dimension_metadata': item.get('dimension_metadata', {}),
'units': item.get('units', {}), # Pass through unit_ref for each period
'period_types': item.get('period_types', {}) # Pass through period_type for each period
},
is_abstract=item.get('is_abstract', False),
is_dimension=item.get('is_dimension', False),
has_dimension_children=item.get('has_dimension_children', False)
)
# Add values for each period
for period in formatted_period_objects:
period_key = period.key
value = item['values'].get(period_key, "")
# Issue #450: For Statement of Equity with duration periods, match instant facts
# at the appropriate date based on position in roll-forward structure
if value == "" and period.end_date and statement_type == 'StatementOfEquity':
# Determine if this is beginning balance (first occurrence) or ending balance (later occurrences)
is_first_occurrence = concept_current_index.get(concept, 1) == 1
if is_first_occurrence and hasattr(period, 'start_date') and period.start_date:
# Beginning balance: Try instant at day before start_date
from datetime import datetime, timedelta
try:
start_dt = datetime.strptime(period.start_date, '%Y-%m-%d')
beginning_date = (start_dt - timedelta(days=1)).strftime('%Y-%m-%d')
instant_key = f"instant_{beginning_date}"
value = item['values'].get(instant_key, "")
except (ValueError, AttributeError):
pass # Fall through to try end_date
# If still no value, try instant at end_date (ending balance)
if value == "":
instant_key = f"instant_{period.end_date}"
value = item['values'].get(instant_key, "")
# Get comparison info for this item and period if available
comparison_info = None
if show_comparisons and item.get('concept') in comparison_data:
comparison_info = comparison_data[item['concept']]
# Create a format function to use when rendering - use a proper closure to avoid variable capture issues
# Clone item at the time of creating this function to prevent it from changing later
current_item = dict(item)
current_period_key = period_key
def format_func(value, item=current_item, pk=current_period_key):
return _format_value_for_display_as_string(
value, item, pk,
is_monetary_statement, dominant_scale, shares_scale,
comparison_info, xbrl_instance
)
# Create a cell and add it to the row
cell = StatementCell(
value=value, # Store the plain value
formatter=format_func, # Set the format function to use when rendering
style={}, # Style will be handled in renderer
comparison=comparison_info
)
row.cells.append(cell)
# Add the row to the statement
rendered_statement.rows.append(row)
return rendered_statement
def generate_rich_representation(xbrl) -> Union[str, 'Panel']:
"""
Generate a clean, human-focused representation of the XBRL document.
Args:
xbrl: XBRL object
Returns:
Panel: A formatted panel focused on statement availability and usage
"""
components = []
# Header: Clean, crisp information hierarchy
if xbrl.entity_info:
entity_name = xbrl.entity_info.get('entity_name', 'Unknown Entity')
ticker = xbrl.entity_info.get('ticker', '')
cik = xbrl.entity_info.get('identifier', '')
doc_type = xbrl.entity_info.get('document_type', '')
fiscal_year = xbrl.entity_info.get('fiscal_year', '')
fiscal_period = xbrl.entity_info.get('fiscal_period', '')
period_end = xbrl.entity_info.get('document_period_end_date', '')
# Company name with ticker (bold yellow) and CIK on same line
from rich.text import Text as RichText
company_line = RichText()
company_line.append(entity_name, style="bold cyan")
if ticker:
company_line.append(" (", style="bold cyan")
company_line.append(ticker, style="bold yellow")
company_line.append(")", style="bold cyan")
if cik:
# Format CIK with leading zeros dimmed
company_line.append(" • CIK ", style="dim")
company_line.append(cik_text(cik))
components.append(company_line)
components.append(Text("")) # Spacing
# Filing information - crisp, key-value style
filing_table = RichTable.grid(padding=(0, 2))
filing_table.add_column(style="bold", justify="right")
filing_table.add_column(style="default")
if doc_type:
filing_table.add_row("Form:", doc_type)
# Combine fiscal period and end date on one line (they're related!)
if fiscal_period and fiscal_year:
period_display = f"Fiscal Year {fiscal_year}" if fiscal_period == 'FY' else f"{fiscal_period} {fiscal_year}"
if period_end:
# Format date more readably
from datetime import datetime
try:
date_obj = datetime.strptime(str(period_end), '%Y-%m-%d')
period_display += f" (ended {date_obj.strftime('%b %d, %Y')})"
except:
period_display += f" (ended {period_end})"
filing_table.add_row("Fiscal Period:", period_display)
# Data volume
filing_table.add_row("Data:", f"{len(xbrl._facts):,} facts • {len(xbrl.contexts):,} contexts")
components.append(filing_table)
# Period coverage - cleaner, more scannable format
if xbrl.reporting_periods:
components.append(Text("")) # Spacing
components.append(Text("Available Data Coverage:", style="bold"))
# Parse periods into annual and quarterly
annual_periods = []
quarterly_periods = []
other_periods = []
for period in xbrl.reporting_periods[:10]: # Show more periods
label = period.get('label', '')
if not label:
continue
# Categorize by label content
if 'Annual:' in label or 'FY' in label.upper():
# Extract just the fiscal year or simplified label
if 'Annual:' in label:
# Extract dates and format as FY YYYY
try:
import re
year_match = re.search(r'to .* (\d{4})', label)
if year_match:
year = year_match.group(1)
annual_periods.append(f"FY {year}")
else:
annual_periods.append(label)
except:
annual_periods.append(label)
else:
annual_periods.append(label)
elif 'Quarterly:' in label or any(q in label for q in ['Q1', 'Q2', 'Q3', 'Q4']):
# Remove "Quarterly:" prefix if present for cleaner display
clean_label = label.replace('Quarterly:', '').strip()
quarterly_periods.append(clean_label)
else:
other_periods.append(label)
# Display periods in organized way
if annual_periods:
components.append(Text(f" Annual: {', '.join(annual_periods[:3])}", style="default"))
if quarterly_periods:
components.append(Text(f" Quarterly: {', '.join(quarterly_periods[:3])}", style="default"))
statements = xbrl.get_all_statements()
statement_types = {stmt['type'] for stmt in statements if stmt['type']}
# Common Actions section - expanded and instructive
components.append(Text("")) # Spacing
components.append(Text("Common Actions", style="bold"))
components.append(Text("─" * 60, style="dim"))
# Build actions list dynamically
actions = [
("# List all available statements", ""),
("xbrl.statements", ""),
("", ""),
("# Access statements by name or index", ""),
("stmt = xbrl.statements['CoverPage']", ""),
("stmt = xbrl.statements[6]", ""),
("", ""),
("# View core financial statements", ""),
]
# Add available core statements dynamically
core_statement_methods = {
'IncomeStatement': 'income_statement()',
'BalanceSheet': 'balance_sheet()',
'CashFlowStatement': 'cash_flow_statement()',
'StatementOfEquity': 'statement_of_equity()',
'ComprehensiveIncome': 'comprehensive_income()'
}
for stmt_type, method in core_statement_methods.items():
if stmt_type in statement_types:
actions.append((f"stmt = xbrl.statements.{method}", ""))
# Continue with other actions
actions.extend([
("", ""),
("# Get current period only", ""),
("current = xbrl.current_period", ""),
("stmt = current.income_statement()", ""),
("", ""),
("# Convert statement to DataFrame", ""),
("df = stmt.to_dataframe()", ""),
("", ""),
("# Query specific facts", ""),
("revenue = xbrl.facts.query().by_concept('Revenue').to_dataframe()", ""),
])
for code, comment in actions:
if not code and not comment:
# Blank line for spacing
components.append(Text(""))
elif code.startswith("#"):
# Comment line - bold
components.append(Text(code, style="bold"))
else:
# Code line
action_line = Text()
action_line.append(f" {code}", style="cyan")
if comment:
action_line.append(f" {comment}", style="dim")
components.append(action_line)
# Add hint about comprehensive docs
components.append(Text(""))
components.append(Text("💡 Tip: Use xbrl.docs for comprehensive usage guide", style="dim italic"))
return Panel(Group(*components), title="XBRL Document", border_style="blue")