Files
edgartools/venv/lib/python3.10/site-packages/edgar/entity/entity_facts.py
2025-12-09 12:13:01 +01:00

1733 lines
63 KiB
Python

"""
Enhanced EntityFacts class for AI-ready company facts analysis.
This module provides the main EntityFacts class with investment-focused
analytics and AI-ready interfaces.
"""
from collections import defaultdict
from datetime import date
from functools import lru_cache
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional
if TYPE_CHECKING:
from edgar.entity.query import FactQuery
from edgar.enums import PeriodType
from typing import Union
import httpx
import orjson as json
import pandas as pd
from pandas.core.interchange.dataframe_protocol import DataFrame
from rich.box import SIMPLE, SIMPLE_HEAVY
from rich.columns import Columns
from rich.console import Group
from rich.padding import Padding
from rich.panel import Panel
from rich.table import Table
from rich.text import Text
from edgar.core import log
from edgar.entity.enhanced_statement import MultiPeriodStatement
from edgar.entity.models import FinancialFact
from edgar.httprequests import download_json
from edgar.storage import get_edgar_data_directory, is_using_local_storage
class NoCompanyFactsFound(Exception):
"""Exception raised when no company facts are found for a given CIK."""
def __init__(self, cik: int):
super().__init__()
self.message = f"""No Company facts found for cik {cik}"""
def download_company_facts_from_sec(cik: int) -> Dict[str, Any]:
"""
Download company facts from the SEC
"""
company_facts_url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik:010}.json"
try:
return download_json(company_facts_url)
except httpx.HTTPStatusError as err:
if err.response.status_code == 404:
log.warning(f"No company facts found on url {company_facts_url}")
raise NoCompanyFactsFound(cik=cik) from None
else:
raise
def load_company_facts_from_local(cik: int) -> Optional[Dict[str, Any]]:
"""
Load company facts from local data
"""
company_facts_dir = get_edgar_data_directory() / "companyfacts"
if not company_facts_dir.exists():
return None
company_facts_file = company_facts_dir / f"CIK{cik:010}.json"
if not company_facts_file.exists():
raise NoCompanyFactsFound(cik=cik)
return json.loads(company_facts_file.read_text())
@lru_cache(maxsize=32)
def get_company_facts(cik: int):
"""
Get company facts for a given CIK.
Args:
cik: The company CIK
Returns:
CompanyFacts: The company facts
Raises:
NoCompanyFactsFound: If no facts are found for the given CIK
"""
if is_using_local_storage():
company_facts_json = load_company_facts_from_local(cik)
else:
company_facts_json = download_company_facts_from_sec(cik)
from edgar.entity.parser import EntityFactsParser
return EntityFactsParser.parse_company_facts(company_facts_json)
class EntityFacts:
"""
AI-ready company facts with investment-focused analytics.
This class provides a comprehensive interface for analyzing company financial data,
with support for both traditional DataFrame-based workflows and modern AI/LLM
consumption patterns.
"""
def __init__(self, cik: int, name: str, facts: List[FinancialFact]):
"""
Initialize EntityFacts with company information and facts.
Args:
cik: Company CIK number
name: Company name
facts: List of FinancialFact objects
"""
self.cik = cik
self.name = name
self._facts = facts
self._fact_index = self._build_indices()
self._cache = {}
def _build_indices(self) -> Dict[str, Dict]:
"""Build optimized indices for fast querying"""
indices = {
'by_concept': defaultdict(list),
'by_period': defaultdict(list),
'by_statement': defaultdict(list),
'by_form': defaultdict(list),
'by_fiscal_year': defaultdict(list),
'by_fiscal_period': defaultdict(list)
}
for fact in self._facts:
# Index by concept
indices['by_concept'][fact.concept].append(fact)
if fact.label:
indices['by_concept'][fact.label.lower()].append(fact)
# Index by period
period_key = f"{fact.fiscal_year}-{fact.fiscal_period}"
indices['by_period'][period_key].append(fact)
# Index by fiscal year and period
indices['by_fiscal_year'][fact.fiscal_year].append(fact)
indices['by_fiscal_period'][fact.fiscal_period].append(fact)
# Index by statement type
if fact.statement_type:
indices['by_statement'][fact.statement_type].append(fact)
# Index by form type
indices['by_form'][fact.form_type].append(fact)
return indices
def __len__(self) -> int:
"""Return the total number of facts"""
return len(self._facts)
def __iter__(self) -> Iterator[FinancialFact]:
"""Iterate over all facts"""
return iter(self._facts)
def get_all_facts(self) -> List[FinancialFact]:
"""
Get all facts for this entity.
Returns:
List of all FinancialFact objects
"""
return self._facts
def to_dataframe(self,
include_metadata: bool = False,
columns: Optional[List[str]] = None) -> pd.DataFrame:
"""
Export all facts to a pandas DataFrame for analysis.
This method provides direct access to all financial facts in a tabular format,
enabling custom analysis, filtering, and integration with data science workflows.
Args:
include_metadata: Include filing references and data quality metadata (default: False)
columns: Specific columns to include. If None, includes standard columns.
Returns:
DataFrame with one row per fact, sorted by concept and period_end
Example:
Basic export for exploration:
>>> facts = company.get_facts()
>>> df = facts.to_dataframe()
>>> print(df.head())
Export with metadata for audit trail:
>>> df_full = facts.to_dataframe(include_metadata=True)
Custom columns for specific analysis:
>>> df_slim = facts.to_dataframe(columns=['concept', 'fiscal_year', 'numeric_value'])
Filter and analyze:
>>> df = annual_facts.to_dataframe()
>>> revenue = df[df['concept'].str.contains('Revenue')]
>>> print(revenue[['fiscal_year', 'numeric_value']])
"""
# Build records from facts
records = []
for fact in self._facts:
record = {
'concept': fact.concept,
'label': fact.label,
'value': fact.value,
'numeric_value': fact.numeric_value,
'unit': fact.unit,
'period_type': fact.period_type,
'period_start': fact.period_start,
'period_end': fact.period_end,
'fiscal_year': fact.fiscal_year,
'fiscal_period': fact.fiscal_period
}
# Add metadata if requested
if include_metadata:
record.update({
'accession': fact.accession,
'filing_date': fact.filing_date,
'form_type': fact.form_type,
'statement_type': fact.statement_type,
'taxonomy': fact.taxonomy,
'scale': fact.scale,
'data_quality': fact.data_quality.value if fact.data_quality else None,
'is_audited': fact.is_audited,
'confidence_score': fact.confidence_score
})
records.append(record)
# Create DataFrame
df = pd.DataFrame(records)
# Filter to specific columns if requested
if columns is not None:
df = df[columns]
# Sort for consistency
if not df.empty:
sort_cols = []
if 'concept' in df.columns:
sort_cols.append('concept')
if 'period_end' in df.columns:
sort_cols.append('period_end')
if sort_cols:
df = df.sort_values(sort_cols).reset_index(drop=True)
return df
def filter_by_period_type(self, period_type: Union[str, 'PeriodType']) -> 'EntityFacts':
"""
Filter facts by period type and return a new EntityFacts instance.
Args:
period_type: Period type to filter by - either PeriodType enum or string
('annual', 'quarterly', 'monthly')
Returns:
New EntityFacts instance with filtered facts
Example:
>>> annual_facts = facts.filter_by_period_type('annual')
>>> quarterly_facts = facts.filter_by_period_type(PeriodType.QUARTERLY)
"""
# Use the query interface to filter facts
filtered_facts = self.query().by_period_type(period_type).execute()
# Create a new EntityFacts instance with the filtered facts
return EntityFacts(
cik=self.cik,
name=self.name,
facts=filtered_facts
)
def __rich__(self):
"""Creates a rich representation providing an at-a-glance view of company facts."""
# Title
title = Text.assemble(
"📊 ",
(self.name, "bold green"),
" Financial Facts"
)
# Summary Statistics Table
stats = Table(box=SIMPLE_HEAVY, show_header=False, padding=(0, 1))
stats.add_column("Metric", style="dim")
stats.add_column("Value", style="bold")
# Get date range
dates = [f.filing_date for f in self._facts if f.filing_date]
if dates:
min_date = min(dates)
max_date = max(dates)
date_range = f"{min_date.strftime('%Y-%m-%d')} to {max_date.strftime('%Y-%m-%d')}"
else:
date_range = "No dates available"
# Count unique concepts
unique_concepts = len(set(f.concept for f in self._facts))
# Count by form type
form_counts = defaultdict(int)
for fact in self._facts:
form_counts[fact.form_type] += 1
# Get fiscal years covered
fiscal_years = sorted(set(f.fiscal_year for f in self._facts if f.fiscal_year))
if fiscal_years:
year_range = f"{min(fiscal_years)} - {max(fiscal_years)}"
else:
year_range = "N/A"
stats.add_row("CIK", str(self.cik))
stats.add_row("Total Facts", f"{len(self._facts):,}")
stats.add_row("Unique Concepts", f"{unique_concepts:,}")
stats.add_row("Date Range", date_range)
stats.add_row("Fiscal Years", year_range)
stats_panel = Panel(
stats,
title="📈 Summary Statistics",
border_style="bright_black"
)
# Key Financial Metrics Table
metrics = Table(box=SIMPLE, show_header=True, padding=(0, 1))
metrics.add_column("Metric", style="bold")
metrics.add_column("Value", justify="right")
metrics.add_column("Period")
metrics.add_column("Quality", style="dim")
# Try to get key metrics
key_metrics = [
('Revenue', 'Revenue'),
('Net Income', 'NetIncome'),
('Total Assets', 'Assets'),
('Total Liabilities', 'Liabilities'),
('Stockholders Equity', 'StockholdersEquity'),
('Operating Income', 'OperatingIncome'),
('Public Float', 'dei:EntityPublicFloat'),
('Shares Outstanding', 'dei:EntityCommonStockSharesOutstanding')
]
has_metrics = False
for label, concept in key_metrics:
fact = self.get_fact(concept)
if fact:
has_metrics = True
# Format value based on unit
if fact.numeric_value:
if 'share' in fact.unit.lower():
value = f"{fact.numeric_value:,.0f}"
else:
value = f"${fact.numeric_value:,.0f}"
else:
value = str(fact.value)
period = f"{fact.fiscal_period} {fact.fiscal_year}"
quality = fact.data_quality.value if fact.data_quality else "N/A"
metrics.add_row(label, value, period, quality)
if has_metrics:
metrics_panel = Panel(
metrics,
title="💰 Key Financial Metrics",
border_style="bright_black"
)
else:
metrics_panel = Panel(
Text("No key financial metrics available", style="dim"),
title="💰 Key Financial Metrics",
border_style="bright_black"
)
# Available Statements
statement_counts = defaultdict(int)
for fact in self._facts:
if fact.statement_type:
statement_counts[fact.statement_type] += 1
if statement_counts:
statements = Table(box=SIMPLE, show_header=True, padding=(0, 1))
statements.add_column("Statement Type", style="bold")
statements.add_column("Fact Count", justify="right")
for stmt_type, count in sorted(statement_counts.items()):
statements.add_row(stmt_type, f"{count:,}")
statements_panel = Panel(
statements,
title="📋 Available Statements",
border_style="bright_black"
)
else:
statements_panel = Panel(
Text("No statement information available", style="dim"),
title="📋 Available Statements",
border_style="bright_black"
)
# Recent Filings
filing_info = defaultdict(lambda: {'count': 0, 'date': None})
for fact in self._facts:
key = fact.form_type
filing_info[key]['count'] += 1
if fact.filing_date:
if filing_info[key]['date'] is None or fact.filing_date > filing_info[key]['date']:
filing_info[key]['date'] = fact.filing_date
filings = Table(box=SIMPLE, show_header=True, padding=(0, 1))
filings.add_column("Form", style="bold")
filings.add_column("Latest Filing")
filings.add_column("Facts", justify="right")
# Sort by most recent filing date
sorted_filings = sorted(
filing_info.items(),
key=lambda x: x[1]['date'] or date.min,
reverse=True
)[:5] # Show top 5
for form_type, info in sorted_filings:
date_str = info['date'].strftime('%Y-%m-%d') if info['date'] else "N/A"
filings.add_row(form_type, date_str, f"{info['count']:,}")
filings_panel = Panel(
filings,
title="📄 Recent Filings",
border_style="bright_black"
)
# Data Quality Summary
quality_counts = defaultdict(int)
audited_count = sum(1 for f in self._facts if f.is_audited)
for fact in self._facts:
if fact.data_quality:
quality_counts[fact.data_quality.value] += 1
quality = Table(box=SIMPLE, show_header=False, padding=(0, 1))
quality.add_column("Metric", style="dim")
quality.add_column("Value", style="bold")
if quality_counts:
for q_level, count in sorted(quality_counts.items()):
percentage = (count / len(self._facts)) * 100
quality.add_row(f"{q_level} Quality", f"{count:,} ({percentage:.1f}%)")
if audited_count > 0:
audit_percentage = (audited_count / len(self._facts)) * 100
quality.add_row("Audited Facts", f"{audited_count:,} ({audit_percentage:.1f}%)")
quality_panel = Panel(
quality,
title="✅ Data Quality",
border_style="bright_black"
)
# Combine all sections
content_renderables = [
Padding("", (1, 0, 0, 0)),
stats_panel,
Columns([metrics_panel, statements_panel], equal=True, expand=True),
Columns([filings_panel, quality_panel], equal=True, expand=True)
]
content = Group(*content_renderables)
# Create the main panel
return Panel(
content,
title=title,
subtitle=f"SEC XBRL Facts • {len(self._facts):,} total facts",
border_style="blue"
)
def __repr__(self):
"""String representation using rich formatting."""
from edgar.richtools import repr_rich
return repr_rich(self.__rich__())
# Core query interface
def query(self) -> 'FactQuery':
"""
Start building a facts query.
Returns:
FactQuery: A new query builder instance
Example:
>>> facts.query().by_concept('Revenue').latest(4).to_dataframe()
"""
from edgar.entity.query import FactQuery
return FactQuery(self._facts, self._fact_index)
# Convenience methods for common queries
def get_fact(self, concept: str, period: Optional[str] = None) -> Optional[FinancialFact]:
"""
Get a single fact by concept and optional period.
Args:
concept: Concept name or label
period: Optional period in format "YYYY-QN" or "YYYY-FY"
Returns:
The most recent matching fact, or None if not found
"""
# Try exact concept match first
facts = self._fact_index['by_concept'].get(concept, [])
# Try case-insensitive label match
if not facts:
facts = self._fact_index['by_concept'].get(concept.lower(), [])
if not facts:
return None
# Filter by period if specified
if period:
facts = [f for f in facts if f"{f.fiscal_year}-{f.fiscal_period}" == period]
# Return most recent
if facts:
return max(facts, key=lambda f: (f.filing_date, f.period_end))
return None
def time_series(self, concept: str, periods: int = 20) -> pd.DataFrame:
"""
Get time series data for a concept.
Args:
concept: Concept name or label
periods: Number of periods to retrieve
Returns:
DataFrame with time series data
"""
from edgar.entity.query import FactQuery
query = FactQuery(self._facts, self._fact_index)
# Get facts and limit
return query \
.by_concept(concept) \
.sort_by('filing_date', ascending=False) \
.to_dataframe('period_end', 'numeric_value', 'fiscal_period', 'fiscal_year') \
.head(periods)
# DEI (Document and Entity Information) helpers
def dei_facts(self, as_of: Optional[date] = None) -> pd.DataFrame:
"""
Get Document and Entity Information (DEI) facts.
DEI facts contain company metadata like entity name, trading symbol,
fiscal year-end, shares outstanding, public float, etc.
Args:
as_of: Optional date for point-in-time view (gets latest if not specified)
Returns:
DataFrame with DEI facts
Example:
# Get latest DEI facts
dei = facts.dei_facts()
# Get DEI facts as of specific date
dei = facts.dei_facts(as_of=date(2024, 12, 31))
"""
from edgar.entity.query import FactQuery
query = FactQuery(self._facts, self._fact_index)
# Get DEI taxonomy facts
query = query.by_concept('dei:', exact=False)
if as_of:
query = query.as_of(as_of)
else:
# Get latest instant facts for DEI data
query = query.latest_instant()
facts = query.execute()
if not facts:
return pd.DataFrame()
# Convert to simple DataFrame
records = []
for fact in facts:
records.append({
'concept': fact.concept,
'label': fact.label,
'value': fact.get_formatted_value(),
'raw_value': fact.numeric_value or fact.value,
'unit': fact.unit,
'period_end': fact.period_end,
'filing_date': fact.filing_date,
'form_type': fact.form_type
})
df = pd.DataFrame(records)
# Sort by concept for consistent ordering
if not df.empty:
df = df.sort_values('concept').reset_index(drop=True)
return df
def entity_info(self) -> Dict[str, Any]:
"""
Get key entity information as a clean dictionary.
Returns:
Dictionary with entity name, shares outstanding, public float, etc.
Example:
info = facts.entity_info()
print(f"Company: {info.get('entity_name', 'Unknown')}")
print(f"Shares Outstanding: {info.get('shares_outstanding', 'N/A')}")
"""
dei_df = self.dei_facts()
info = {
'entity_name': self.name,
'cik': self.cik
}
if dei_df.empty:
return info
# Map common DEI concepts to friendly keys
concept_mapping = {
'dei:EntityCommonStockSharesOutstanding': 'shares_outstanding',
'dei:EntityPublicFloat': 'public_float',
'dei:TradingSymbol': 'trading_symbol',
'dei:EntityFilerCategory': 'filer_category',
'dei:EntityCurrentReportingStatus': 'reporting_status',
'dei:EntityWellKnownSeasonedIssuer': 'well_known_seasoned_issuer',
'dei:EntityVoluntaryFilers': 'voluntary_filer',
'dei:EntitySmallBusiness': 'small_business',
'dei:EntityEmergingGrowthCompany': 'emerging_growth_company',
'dei:EntityShellCompany': 'shell_company'
}
for _, row in dei_df.iterrows():
concept = row['concept']
if concept in concept_mapping:
key = concept_mapping[concept]
info[key] = row['value']
info[f'{key}_raw'] = row['raw_value']
info[f'{key}_as_of'] = row['period_end']
return info
# Standardized financial concept access methods (FEAT-411)
def get_revenue(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
"""
Get standardized revenue value across all companies.
This method handles various revenue concept names (Revenue, Contract Revenue, Net Sales, etc.)
and provides consistent access regardless of company-specific naming conventions.
Args:
period: Optional period in format "YYYY-QN" or "YYYY-FY"
unit: Optional unit filter (defaults to USD if not specified)
Returns:
Revenue value as float, or None if not found
Example:
>>> revenue = facts.get_revenue()
>>> quarterly_revenue = facts.get_revenue(period="2024-Q1")
"""
return self._get_standardized_concept_value(
concept_variants=[
'RevenueFromContractWithCustomerExcludingAssessedTax',
'SalesRevenueNet',
'Revenues',
'Revenue',
'TotalRevenues',
'NetSales'
],
period=period,
unit=unit,
fallback_calculation=self._calculate_revenue_from_components,
strict_unit_match=True
)
def get_net_income(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
"""
Get standardized net income value across all companies.
Handles various net income concept names and provides consistent access.
Args:
period: Optional period in format "YYYY-QN" or "YYYY-FY"
unit: Optional unit filter (defaults to USD if not specified)
Returns:
Net income value as float, or None if not found
Example:
>>> net_income = facts.get_net_income()
>>> annual_income = facts.get_net_income(period="2024-FY")
"""
return self._get_standardized_concept_value(
concept_variants=[
'NetIncomeLoss',
'ProfitLoss',
'NetIncome',
'NetEarnings',
'NetIncomeLossAttributableToParent'
],
period=period,
unit=unit
)
def get_total_assets(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
"""
Get standardized total assets value across all companies.
Args:
period: Optional period in format "YYYY-QN" or "YYYY-FY"
unit: Optional unit filter (defaults to USD if not specified)
Returns:
Total assets value as float, or None if not found
Example:
>>> assets = facts.get_total_assets()
>>> q4_assets = facts.get_total_assets(period="2024-Q4")
"""
return self._get_standardized_concept_value(
concept_variants=[
'Assets',
'TotalAssets',
'AssetsCurrent' # Fallback for some filings
],
period=period,
unit=unit
)
def get_total_liabilities(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
"""
Get standardized total liabilities value across all companies.
Args:
period: Optional period in format "YYYY-QN" or "YYYY-FY"
unit: Optional unit filter (defaults to USD if not specified)
Returns:
Total liabilities value as float, or None if not found
Example:
>>> liabilities = facts.get_total_liabilities()
"""
return self._get_standardized_concept_value(
concept_variants=[
'Liabilities',
'TotalLiabilities',
'LiabilitiesAndStockholdersEquity' # Some companies structure it this way
],
period=period,
unit=unit
)
def get_shareholders_equity(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
"""
Get standardized shareholders equity value across all companies.
Args:
period: Optional period in format "YYYY-QN" or "YYYY-FY"
unit: Optional unit filter (defaults to USD if not specified)
Returns:
Shareholders equity value as float, or None if not found
Example:
>>> equity = facts.get_shareholders_equity()
"""
return self._get_standardized_concept_value(
concept_variants=[
'StockholdersEquity',
'ShareholdersEquity',
'TotalEquity',
'PartnersCapital', # For partnerships
'MembersEquity' # For LLCs
],
period=period,
unit=unit
)
def get_operating_income(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
"""
Get standardized operating income value across all companies.
Args:
period: Optional period in format "YYYY-QN" or "YYYY-FY"
unit: Optional unit filter (defaults to USD if not specified)
Returns:
Operating income value as float, or None if not found
Example:
>>> op_income = facts.get_operating_income()
"""
return self._get_standardized_concept_value(
concept_variants=[
'OperatingIncomeLoss',
'OperatingIncome',
'IncomeLossFromOperations',
'OperatingProfit'
],
period=period,
unit=unit
)
def get_gross_profit(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
"""
Get standardized gross profit value across all companies.
Args:
period: Optional period in format "YYYY-QN" or "YYYY-FY"
unit: Optional unit filter (defaults to USD if not specified)
Returns:
Gross profit value as float, or None if not found
Example:
>>> gross_profit = facts.get_gross_profit()
"""
return self._get_standardized_concept_value(
concept_variants=[
'GrossProfit',
'GrossMargin'
],
period=period,
unit=unit,
fallback_calculation=self._calculate_gross_profit_from_components
)
# Convenient properties for common DEI facts
@property
def shares_outstanding(self) -> Optional[float]:
"""
Get the most recent shares outstanding value.
Returns:
Number of shares outstanding as float, or None if not available
Example:
shares = facts.shares_outstanding
if shares:
print(f"Shares Outstanding: {shares:,.0f}")
"""
fact = self.get_fact('dei:EntityCommonStockSharesOutstanding')
return fact.numeric_value if fact else None
@property
def public_float(self) -> Optional[float]:
"""
Get the most recent public float value.
Returns:
Public float value as float, or None if not available
Example:
float_val = facts.public_float
if float_val:
print(f"Public Float: ${float_val:,.0f}")
"""
fact = self.get_fact('dei:EntityPublicFloat')
return fact.numeric_value if fact else None
@property
def shares_outstanding_fact(self) -> Optional[FinancialFact]:
"""
Get the most recent shares outstanding fact with full context.
Returns:
FinancialFact object with shares outstanding data, or None
Example:
fact = facts.shares_outstanding_fact
if fact:
print(f"Shares: {fact.get_formatted_value()} as of {fact.period_end}")
"""
return self.get_fact('dei:EntityCommonStockSharesOutstanding')
@property
def public_float_fact(self) -> Optional[FinancialFact]:
"""
Get the most recent public float fact with full context.
Returns:
FinancialFact object with public float data, or None
Example:
fact = facts.public_float_fact
if fact:
print(f"Float: {fact.get_formatted_value()} as of {fact.period_end}")
"""
return self.get_fact('dei:EntityPublicFloat')
# Financial statement helpers
def income_statement(self, periods: int = 4, period_length: Optional[int] = None, as_dataframe: bool = False,
annual: bool = True, concise_format: bool = False) -> Union[DataFrame, MultiPeriodStatement]:
"""
Get income statement facts for recent periods.
Args:
periods: Number of periods to retrieve
period_length: Optional filter for period length in months (3=quarterly, 12=annual)
as_dataframe: If True, return DataFrame; if False, return MultiPeriodStatement
annual: If True, prefer annual (FY) periods over interim periods
concise_format: If True, display values as $1.0B, if False display as $1,000,000,000
Returns:
MultiPeriodStatement or DataFrame with income statement data
Example:
# Get hierarchical multi-period statement (default)
stmt = facts.income_statement(periods=4, annual=True)
print(stmt) # Rich display with hierarchy
# Get with concise format
stmt = facts.income_statement(periods=4, concise_format=True)
# Get DataFrame for analysis
df = facts.income_statement(periods=4, as_dataframe=True)
# Convert statement to DataFrame later
stmt = facts.income_statement(periods=4)
df = stmt.to_dataframe()
"""
# Always build the enhanced multi-period statement
from edgar.entity.enhanced_statement import EnhancedStatementBuilder
builder = EnhancedStatementBuilder()
enhanced_stmt = builder.build_multi_period_statement(
facts=self._facts,
statement_type='IncomeStatement',
periods=periods,
annual=annual
)
enhanced_stmt.company_name = self.name
enhanced_stmt.cik = str(self.cik)
enhanced_stmt.concise_format = concise_format
# Return DataFrame if requested
if as_dataframe:
return enhanced_stmt.to_dataframe()
return enhanced_stmt
def balance_sheet(self, periods: int = 4, as_of: Optional[date] = None, as_dataframe: bool = False,
annual: bool = True, concise_format: bool = False) -> Union[pd.DataFrame, MultiPeriodStatement]:
"""
Get balance sheet facts for recent periods or as of a specific date.
Args:
periods: Number of periods to retrieve (ignored if as_of is specified)
as_of: Optional date for point-in-time view; if specified, gets single snapshot
as_dataframe: If True, return DataFrame; if False, return MultiPeriodStatement
annual: If True, prefer annual (FY) periods over interim periods
concise_format: If True, display values as $1.0B, if False display as $1,000,000,000
Returns:
MultiPeriodStatement or DataFrame with balance sheet data
Example:
# Get hierarchical multi-period statement (default)
stmt = facts.balance_sheet(periods=4, annual=True)
print(stmt) # Rich display with hierarchy
# Get DataFrame for analysis
df = facts.balance_sheet(periods=4, as_dataframe=True)
# Convert statement to DataFrame later
stmt = facts.balance_sheet(periods=4)
df = stmt.to_dataframe()
"""
if not as_of:
# Always build the enhanced multi-period statement for regular periods
from edgar.entity.enhanced_statement import EnhancedStatementBuilder
builder = EnhancedStatementBuilder()
enhanced_stmt = builder.build_multi_period_statement(
facts=self._facts,
statement_type='BalanceSheet',
periods=periods,
annual=annual
)
enhanced_stmt.company_name = self.name
enhanced_stmt.cik = str(self.cik)
enhanced_stmt.concise_format = concise_format
# Return DataFrame if requested
if as_dataframe:
return enhanced_stmt.to_dataframe()
return enhanced_stmt
from edgar.entity.query import FactQuery
query = FactQuery(self._facts, self._fact_index)
query = query.by_statement_type('BalanceSheet')
if as_of:
# Point-in-time view - get latest instant facts as of the specified date
query = query.as_of(as_of).latest_instant()
facts = query.execute()
if not facts:
if not as_dataframe:
from edgar.entity.statement import FinancialStatement
return FinancialStatement(
data=pd.DataFrame(),
statement_type="BalanceSheet",
entity_name=self.name,
period_lengths=[],
mixed_periods=False
)
else:
return pd.DataFrame()
# Convert to simple DataFrame for point-in-time view
records = []
for fact in facts:
records.append({
'label': fact.label,
'concept': fact.concept,
'value': fact.get_formatted_value(),
'raw_value': fact.numeric_value or fact.value,
'unit': fact.unit,
'period_end': fact.period_end,
'filing_date': fact.filing_date,
'form_type': fact.form_type
})
df = pd.DataFrame(records)
if not as_dataframe:
from edgar.entity.statement import FinancialStatement
# For point-in-time, create a single-column statement
if not df.empty:
period_label = f"As of {as_of}"
pivot_data = pd.DataFrame({
period_label: df.set_index('label')['raw_value']
})
else:
pivot_data = pd.DataFrame()
return FinancialStatement(
data=pivot_data,
statement_type="BalanceSheet",
entity_name=self.name,
period_lengths=['instant'],
mixed_periods=False
)
else:
return df
else:
# Multi-period view - get trends over time using latest instant facts per period
# Pass entity information and return preference (flip the boolean)
result = query.latest_periods(periods, annual=annual).pivot_by_period(
return_statement=not as_dataframe)
# If returning a Statement object, set the entity name
if not as_dataframe and hasattr(result, 'entity_name'):
result.entity_name = self.name
return result
def cash_flow(self, periods: int = 4, period_length: Optional[int] = None, as_dataframe: bool = False,
annual: bool = True, concise_format: bool = False) -> Union[DataFrame, MultiPeriodStatement]:
"""
Get cash flow statement facts.
Args:
periods: Number of periods to retrieve
period_length: Optional filter for period length in months (3=quarterly, 12=annual)
as_dataframe: If True, return DataFrame; if False, return MultiPeriodStatement
annual: If True, prefer annual (FY) periods over interim periods
concise_format: If True, display values as $1.0B, if False display as $1,000,000,000
Returns:
MultiPeriodStatement or DataFrame with cash flow data
Example:
# Get hierarchical multi-period statement (default)
stmt = facts.cash_flow(periods=4, annual=True)
print(stmt) # Rich display with hierarchy
# Get DataFrame for analysis
df = facts.cash_flow(periods=4, as_dataframe=True)
# Convert statement to DataFrame later
stmt = facts.cash_flow(periods=4)
df = stmt.to_dataframe()
"""
# Always build the enhanced multi-period statement
from edgar.entity.enhanced_statement import EnhancedStatementBuilder
builder = EnhancedStatementBuilder()
enhanced_stmt = builder.build_multi_period_statement(
facts=self._facts,
statement_type='CashFlow',
periods=periods,
annual=annual
)
enhanced_stmt.company_name = self.name
enhanced_stmt.cik = str(self.cik)
enhanced_stmt.concise_format = concise_format
# Return DataFrame if requested
if as_dataframe:
return enhanced_stmt.to_dataframe()
return enhanced_stmt
# Investment analytics
def calculate_ratios(self) -> Dict[str, float]:
"""
Calculate common financial ratios.
Returns:
Dictionary of ratio names to values
"""
# This will be implemented in Phase 3
# For now, return a placeholder
return {
"note": "Ratio calculation will be implemented in Phase 3"
}
def peer_comparison(self, peer_ciks: List[int],
metrics: Optional[List[str]] = None) -> pd.DataFrame:
"""
Compare key metrics with peer companies.
Args:
peer_ciks: List of peer company CIKs
metrics: Optional list of specific metrics to compare
Returns:
DataFrame with comparative analysis
"""
# This will be implemented in Phase 3
# For now, return a placeholder
return pd.DataFrame({
"note": ["Peer comparison will be implemented in Phase 3"]
})
def detect_anomalies(self) -> List[Dict[str, Any]]:
"""
Detect unusual patterns or potential red flags.
Returns:
List of detected anomalies with descriptions
"""
# This will be implemented in Phase 3
# For now, return a placeholder
return [{
"type": "placeholder",
"message": "Anomaly detection will be implemented in Phase 3"
}]
# AI-ready methods
def to_llm_context(self,
focus_areas: Optional[List[str]] = None,
time_period: str = "recent") -> Dict[str, Any]:
"""
Generate comprehensive context for LLM analysis.
Args:
focus_areas: Specific areas to emphasize (e.g., ['profitability', 'growth'])
time_period: Time period to analyze ('recent', '5Y', '10Y', 'all')
Returns:
Dictionary with structured context for LLM consumption
"""
context = {
"company": {
"name": self.name,
"cik": self.cik,
"total_facts": len(self._facts)
},
"data_summary": self._generate_data_summary(),
"recent_filings": self._get_recent_filings_summary(),
"key_metrics": self._extract_key_metrics(time_period)
}
# Add time period context
if time_period == "recent":
context["time_period"] = "Most recent reported period"
elif time_period == "5Y":
context["time_period"] = "Five year historical view"
elif time_period == "10Y":
context["time_period"] = "Ten year historical view"
else:
context["time_period"] = "All available historical data"
# Add focus area analysis if specified
if focus_areas:
context["focus_analysis"] = {}
for area in focus_areas:
if area == "profitability":
context["focus_analysis"][area] = self._analyze_profitability()
elif area == "growth":
context["focus_analysis"][area] = self._analyze_growth()
elif area == "liquidity":
context["focus_analysis"][area] = self._analyze_liquidity()
return context
def to_agent_tools(self) -> List[Dict[str, Any]]:
"""
Export facts as tools for AI agents (MCP-compatible).
Returns:
List of tool definitions for agent consumption
"""
return [
{
"name": f"get_{self.name.lower().replace(' ', '_')}_financials",
"description": f"Retrieve financial data for {self.name}",
"parameters": {
"statement": {
"type": "string",
"description": "Financial statement type (income_statement, balance_sheet, cash_flow)",
"enum": ["income_statement", "balance_sheet", "cash_flow"]
},
"periods": {
"type": "integer",
"description": "Number of periods to retrieve",
"default": 4
}
},
"returns": "Financial data with context"
},
{
"name": f"analyze_{self.name.lower().replace(' ', '_')}_trends",
"description": f"Analyze financial trends for {self.name}",
"parameters": {
"metric": {
"type": "string",
"description": "Financial metric to analyze (e.g., Revenue, NetIncome)"
},
"periods": {
"type": "integer",
"description": "Number of periods to analyze",
"default": 8
}
},
"returns": "Trend analysis with insights"
},
{
"name": f"get_{self.name.lower().replace(' ', '_')}_fact",
"description": f"Get a specific financial fact for {self.name}",
"parameters": {
"concept": {
"type": "string",
"description": "The financial concept to retrieve (e.g., Revenue, Assets)"
},
"period": {
"type": "string",
"description": "Optional period (e.g., 2024-Q4, 2024-FY)",
"required": False
}
},
"returns": "Fact value with full context"
}
]
# Helper methods
def _generate_data_summary(self) -> Dict[str, Any]:
"""Generate a summary of available data"""
unique_concepts = len(set(f.concept for f in self._facts))
# Get date range
dates = [f.filing_date for f in self._facts if f.filing_date]
if dates:
min_date = min(dates)
max_date = max(dates)
date_range = f"{min_date} to {max_date}"
else:
date_range = "Unknown"
# Count by form type
form_counts = defaultdict(int)
for fact in self._facts:
form_counts[fact.form_type] += 1
return {
"total_facts": len(self._facts),
"unique_concepts": unique_concepts,
"date_range": date_range,
"form_types": dict(form_counts),
"fiscal_years": sorted(set(f.fiscal_year for f in self._facts if f.fiscal_year))
}
def _get_recent_filings_summary(self) -> List[Dict[str, Any]]:
"""Get summary of recent filings"""
# Group facts by filing
filings = defaultdict(list)
for fact in self._facts:
key = (fact.form_type, fact.filing_date, fact.accession)
filings[key].append(fact)
# Sort by filing date
recent_filings = sorted(filings.keys(), key=lambda x: x[1] or date.min, reverse=True)[:5]
summaries = []
for form_type, filing_date, accession in recent_filings:
summaries.append({
"form": form_type,
"date": str(filing_date) if filing_date else "Unknown",
"fact_count": len(filings[(form_type, filing_date, accession)])
})
return summaries
def _extract_key_metrics(self, time_period: str) -> Dict[str, Any]:
"""Extract key financial metrics for the specified time period"""
# Define key metrics to extract
key_concepts = [
'Revenue', 'NetIncome', 'Assets', 'Liabilities',
'StockholdersEquity', 'OperatingIncome', 'EarningsPerShare'
]
metrics = {}
for concept in key_concepts:
fact = self.get_fact(concept)
if fact:
metrics[concept] = {
"value": fact.numeric_value or fact.value,
"unit": fact.unit,
"period": f"{fact.fiscal_period} {fact.fiscal_year}",
"quality": fact.data_quality.value
}
return metrics
def _analyze_profitability(self) -> Dict[str, Any]:
"""Analyze profitability metrics"""
revenue = self.get_fact('Revenue')
net_income = self.get_fact('NetIncome')
analysis = {}
if revenue and net_income and revenue.numeric_value and net_income.numeric_value:
net_margin = (net_income.numeric_value / revenue.numeric_value) * 100
analysis["net_margin"] = {
"value": round(net_margin, 2),
"unit": "percent",
"interpretation": f"For every dollar of revenue, {self.name} generates ${net_margin / 100:.2f} in profit"
}
return analysis
def _analyze_growth(self) -> Dict[str, Any]:
"""Analyze growth trends"""
# Get revenue time series
revenue_series = self.time_series('Revenue', periods=8)
if len(revenue_series) >= 2:
# Calculate year-over-year growth
latest = revenue_series.iloc[0]['numeric_value']
prior = revenue_series.iloc[1]['numeric_value']
if prior and prior != 0:
growth_rate = ((latest - prior) / prior) * 100
return {
"revenue_growth_yoy": {
"value": round(growth_rate, 2),
"unit": "percent",
"period_comparison": f"{revenue_series.iloc[0]['fiscal_period']} vs {revenue_series.iloc[1]['fiscal_period']}"
}
}
return {"message": "Insufficient data for growth analysis"}
def _analyze_liquidity(self) -> Dict[str, Any]:
"""Analyze liquidity metrics"""
current_assets = self.get_fact('CurrentAssets')
current_liabilities = self.get_fact('CurrentLiabilities')
if current_assets and current_liabilities and current_assets.numeric_value and current_liabilities.numeric_value:
current_ratio = current_assets.numeric_value / current_liabilities.numeric_value
return {
"current_ratio": {
"value": round(current_ratio, 2),
"interpretation": f"{self.name} has ${current_ratio:.2f} in current assets for every $1 of current liabilities"
}
}
return {"message": "Insufficient data for liquidity analysis"}
# Helper methods for standardized concept access (FEAT-411)
def _get_standardized_concept_value(self,
concept_variants: List[str],
period: Optional[str] = None,
unit: Optional[str] = None,
fallback_calculation: Optional[callable] = None,
return_detailed: bool = False,
strict_unit_match: bool = False) -> Optional[float]:
"""
Core method for retrieving standardized concept values with enhanced unit handling.
Args:
concept_variants: List of concept names to try in priority order
period: Optional period filter
unit: Optional unit filter (defaults to USD)
fallback_calculation: Optional function to calculate value from components
return_detailed: If True, return UnitResult instead of just value
strict_unit_match: If True, require exact unit match. If False, allow compatible units.
Returns:
Numeric value or None if not found (or UnitResult if return_detailed=True)
"""
from edgar.entity.unit_handling import UnitNormalizer, UnitResult
# Default to USD if no unit specified
target_unit = unit or 'USD'
# Try each concept variant in priority order
for concept in concept_variants:
# Try both with and without namespace prefix
for concept_variant in [concept, f'us-gaap:{concept}']:
fact = self.get_fact(concept_variant, period)
if fact and fact.numeric_value is not None:
# Use enhanced unit handling
unit_result = UnitNormalizer.get_normalized_value(
fact=fact,
target_unit=target_unit,
apply_scale=True,
strict_unit_match=strict_unit_match
)
if unit_result.success:
if return_detailed:
return unit_result
return unit_result.value
# Try fallback calculation if provided
if fallback_calculation:
try:
fallback_value = fallback_calculation(period, target_unit)
if fallback_value is not None:
if return_detailed:
return UnitResult(
value=fallback_value,
normalized_unit=UnitNormalizer.normalize_unit(target_unit),
original_unit=target_unit,
success=True,
error_reason="Calculated from components"
)
return fallback_value
except Exception as e:
# Fallback calculation failed, continue
if return_detailed:
return UnitResult(
value=None,
normalized_unit=None,
original_unit=target_unit or "",
success=False,
error_reason=f"Fallback calculation failed: {str(e)}"
)
# No value found
if return_detailed:
return UnitResult(
value=None,
normalized_unit=None,
original_unit=target_unit or "",
success=False,
error_reason="No matching concept found",
suggestions=["Try checking if company uses alternative concept names"]
)
return None
def _calculate_revenue_from_components(self, period: Optional[str] = None, unit: str = 'USD') -> Optional[float]:
"""
Calculate revenue from Gross Profit + Cost of Revenue when explicit revenue not available.
This follows the same logic as the enhanced_statement.py revenue deduplication.
"""
from edgar.entity.unit_handling import UnitNormalizer
gross_profit_fact = self.get_fact('GrossProfit', period)
cost_of_revenue_fact = self.get_fact('CostOfRevenue', period)
# Try alternative cost concepts
if not cost_of_revenue_fact:
for cost_concept in ['CostOfGoodsAndServicesSold', 'CostOfGoodsSold', 'CostOfSales']:
cost_of_revenue_fact = self.get_fact(cost_concept, period)
if cost_of_revenue_fact:
break
if (gross_profit_fact and cost_of_revenue_fact and
gross_profit_fact.numeric_value is not None and
cost_of_revenue_fact.numeric_value is not None):
# Use enhanced unit compatibility checking
gp_result = UnitNormalizer.get_normalized_value(gross_profit_fact, target_unit=unit, apply_scale=True, strict_unit_match=True)
cr_result = UnitNormalizer.get_normalized_value(cost_of_revenue_fact, target_unit=unit, apply_scale=True, strict_unit_match=True)
if gp_result.success and cr_result.success:
return gp_result.value + cr_result.value
# Try compatibility check if direct match failed
if UnitNormalizer.are_compatible(gross_profit_fact.unit, cost_of_revenue_fact.unit):
# Same unit type but different representations - try calculation anyway
gp_normalized = UnitNormalizer.get_normalized_value(gross_profit_fact, apply_scale=True, strict_unit_match=False)
cr_normalized = UnitNormalizer.get_normalized_value(cost_of_revenue_fact, apply_scale=True, strict_unit_match=False)
if gp_normalized.success and cr_normalized.success:
return gp_normalized.value + cr_normalized.value
return None
def _calculate_gross_profit_from_components(self, period: Optional[str] = None, unit: str = 'USD') -> Optional[float]:
"""
Calculate gross profit from Revenue - Cost of Revenue when explicit gross profit not available.
"""
from edgar.entity.unit_handling import UnitNormalizer
# Try to get revenue using standardized method (but avoid infinite recursion)
revenue_fact = None
for concept in ['RevenueFromContractWithCustomerExcludingAssessedTax', 'SalesRevenueNet', 'Revenues', 'Revenue']:
revenue_fact = self.get_fact(concept, period)
if revenue_fact:
break
cost_of_revenue_fact = self.get_fact('CostOfRevenue', period)
# Try alternative cost concepts
if not cost_of_revenue_fact:
for cost_concept in ['CostOfGoodsAndServicesSold', 'CostOfGoodsSold', 'CostOfSales']:
cost_of_revenue_fact = self.get_fact(cost_concept, period)
if cost_of_revenue_fact:
break
if (revenue_fact and cost_of_revenue_fact and
revenue_fact.numeric_value is not None and
cost_of_revenue_fact.numeric_value is not None):
# Use enhanced unit compatibility checking
rev_result = UnitNormalizer.get_normalized_value(revenue_fact, target_unit=unit, apply_scale=True)
cr_result = UnitNormalizer.get_normalized_value(cost_of_revenue_fact, target_unit=unit, apply_scale=True)
if rev_result.success and cr_result.success:
return rev_result.value - cr_result.value
# Try compatibility check if direct match failed
if UnitNormalizer.are_compatible(revenue_fact.unit, cost_of_revenue_fact.unit):
# Same unit type but different representations - try calculation anyway
rev_normalized = UnitNormalizer.get_normalized_value(revenue_fact, apply_scale=True)
cr_normalized = UnitNormalizer.get_normalized_value(cost_of_revenue_fact, apply_scale=True)
if rev_normalized.success and cr_normalized.success:
return rev_normalized.value - cr_normalized.value
return None
def get_concept_mapping_info(self, concept_variants: List[str]) -> Dict[str, Any]:
"""
Get information about which concept variants are available for this company.
Useful for debugging standardized method behavior and understanding
company-specific concept usage.
Args:
concept_variants: List of concept names to check
Returns:
Dictionary with availability and confidence information
Example:
>>> info = facts.get_concept_mapping_info(['Revenue', 'Revenues', 'NetSales'])
>>> print(f"Available concepts: {info['available']}")
"""
info = {
'available': [],
'missing': [],
'fact_details': {}
}
for concept in concept_variants:
fact = self.get_fact(concept)
if fact:
info['available'].append(concept)
info['fact_details'][concept] = {
'label': fact.label,
'unit': fact.unit,
'latest_period': f"{fact.fiscal_period} {fact.fiscal_year}",
'latest_value': fact.numeric_value,
'filing_date': fact.filing_date
}
else:
info['missing'].append(concept)
return info
# Enhanced methods with detailed unit information (FEAT-411 Unit Handling)
def get_revenue_detailed(self, period: Optional[str] = None, unit: Optional[str] = None):
"""
Get revenue with detailed unit information and error reporting.
Args:
period: Optional period in format "YYYY-QN" or "YYYY-FY"
unit: Optional unit filter (defaults to USD)
Returns:
UnitResult with value, unit info, and error details
Example:
>>> result = facts.get_revenue_detailed()
>>> if result.success:
... print(f"Revenue: ${result.value/1e9:.1f}B (unit: {result.normalized_unit})")
... else:
... print(f"Error: {result.error_reason}")
... for suggestion in result.suggestions:
... print(f" - {suggestion}")
"""
return self._get_standardized_concept_value(
concept_variants=[
'RevenueFromContractWithCustomerExcludingAssessedTax',
'SalesRevenueNet',
'Revenues',
'Revenue',
'TotalRevenues',
'NetSales'
],
period=period,
unit=unit,
fallback_calculation=self._calculate_revenue_from_components,
return_detailed=True
)
def get_net_income_detailed(self, period: Optional[str] = None, unit: Optional[str] = None):
"""
Get net income with detailed unit information and error reporting.
Args:
period: Optional period in format "YYYY-QN" or "YYYY-FY"
unit: Optional unit filter (defaults to USD)
Returns:
UnitResult with value, unit info, and error details
"""
return self._get_standardized_concept_value(
concept_variants=[
'NetIncomeLoss',
'ProfitLoss',
'NetIncome',
'NetEarnings',
'NetIncomeLossAttributableToParent'
],
period=period,
unit=unit,
return_detailed=True
)
def check_unit_compatibility(self, concept1: str, concept2: str, period: Optional[str] = None) -> Dict[str, Any]:
"""
Check unit compatibility between two concepts for calculations.
Args:
concept1: First concept name
concept2: Second concept name
period: Optional period filter
Returns:
Dictionary with compatibility info and suggestions
Example:
>>> compat = facts.check_unit_compatibility('Revenue', 'CostOfRevenue')
>>> if compat['compatible']:
... print("Units are compatible for calculations")
... else:
... print(f"Unit issue: {compat['issue']}")
"""
from edgar.entity.unit_handling import UnitNormalizer
fact1 = self.get_fact(concept1, period)
fact2 = self.get_fact(concept2, period)
result = {
'compatible': False,
'concept1': concept1,
'concept2': concept2,
'fact1_found': fact1 is not None,
'fact2_found': fact2 is not None,
'issue': None,
'suggestions': []
}
if not fact1:
result['issue'] = f"Concept '{concept1}' not found"
result['suggestions'].append(f"Check if {concept1} exists for this company")
return result
if not fact2:
result['issue'] = f"Concept '{concept2}' not found"
result['suggestions'].append(f"Check if {concept2} exists for this company")
return result
# Check unit compatibility
compatible = UnitNormalizer.are_compatible(fact1.unit, fact2.unit)
result['compatible'] = compatible
result['fact1_unit'] = fact1.unit
result['fact2_unit'] = fact2.unit
result['fact1_normalized'] = UnitNormalizer.normalize_unit(fact1.unit)
result['fact2_normalized'] = UnitNormalizer.normalize_unit(fact2.unit)
if not compatible:
result['issue'] = f"Incompatible units: {fact1.unit} vs {fact2.unit}"
unit1_type = UnitNormalizer.get_unit_type(fact1.unit)
unit2_type = UnitNormalizer.get_unit_type(fact2.unit)
if unit1_type != unit2_type:
result['suggestions'].append(f"Unit type mismatch: {unit1_type.value} vs {unit2_type.value}")
else:
result['suggestions'].append("Same unit type but different representations")
return result