1733 lines
63 KiB
Python
1733 lines
63 KiB
Python
"""
|
|
Enhanced EntityFacts class for AI-ready company facts analysis.
|
|
|
|
This module provides the main EntityFacts class with investment-focused
|
|
analytics and AI-ready interfaces.
|
|
"""
|
|
|
|
from collections import defaultdict
|
|
from datetime import date
|
|
from functools import lru_cache
|
|
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional
|
|
|
|
if TYPE_CHECKING:
|
|
from edgar.entity.query import FactQuery
|
|
from edgar.enums import PeriodType
|
|
|
|
from typing import Union
|
|
|
|
import httpx
|
|
import orjson as json
|
|
import pandas as pd
|
|
from pandas.core.interchange.dataframe_protocol import DataFrame
|
|
from rich.box import SIMPLE, SIMPLE_HEAVY
|
|
from rich.columns import Columns
|
|
from rich.console import Group
|
|
from rich.padding import Padding
|
|
from rich.panel import Panel
|
|
from rich.table import Table
|
|
from rich.text import Text
|
|
|
|
from edgar.core import log
|
|
from edgar.entity.enhanced_statement import MultiPeriodStatement
|
|
from edgar.entity.models import FinancialFact
|
|
from edgar.httprequests import download_json
|
|
from edgar.storage import get_edgar_data_directory, is_using_local_storage
|
|
|
|
|
|
class NoCompanyFactsFound(Exception):
|
|
"""Exception raised when no company facts are found for a given CIK."""
|
|
|
|
def __init__(self, cik: int):
|
|
super().__init__()
|
|
self.message = f"""No Company facts found for cik {cik}"""
|
|
|
|
|
|
def download_company_facts_from_sec(cik: int) -> Dict[str, Any]:
|
|
"""
|
|
Download company facts from the SEC
|
|
"""
|
|
company_facts_url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik:010}.json"
|
|
try:
|
|
return download_json(company_facts_url)
|
|
except httpx.HTTPStatusError as err:
|
|
if err.response.status_code == 404:
|
|
log.warning(f"No company facts found on url {company_facts_url}")
|
|
raise NoCompanyFactsFound(cik=cik) from None
|
|
else:
|
|
raise
|
|
|
|
|
|
def load_company_facts_from_local(cik: int) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Load company facts from local data
|
|
"""
|
|
company_facts_dir = get_edgar_data_directory() / "companyfacts"
|
|
if not company_facts_dir.exists():
|
|
return None
|
|
company_facts_file = company_facts_dir / f"CIK{cik:010}.json"
|
|
if not company_facts_file.exists():
|
|
raise NoCompanyFactsFound(cik=cik)
|
|
|
|
return json.loads(company_facts_file.read_text())
|
|
|
|
|
|
@lru_cache(maxsize=32)
|
|
def get_company_facts(cik: int):
|
|
"""
|
|
Get company facts for a given CIK.
|
|
|
|
Args:
|
|
cik: The company CIK
|
|
|
|
Returns:
|
|
CompanyFacts: The company facts
|
|
|
|
Raises:
|
|
NoCompanyFactsFound: If no facts are found for the given CIK
|
|
"""
|
|
if is_using_local_storage():
|
|
company_facts_json = load_company_facts_from_local(cik)
|
|
else:
|
|
company_facts_json = download_company_facts_from_sec(cik)
|
|
from edgar.entity.parser import EntityFactsParser
|
|
return EntityFactsParser.parse_company_facts(company_facts_json)
|
|
|
|
|
|
class EntityFacts:
|
|
"""
|
|
AI-ready company facts with investment-focused analytics.
|
|
|
|
This class provides a comprehensive interface for analyzing company financial data,
|
|
with support for both traditional DataFrame-based workflows and modern AI/LLM
|
|
consumption patterns.
|
|
"""
|
|
|
|
def __init__(self, cik: int, name: str, facts: List[FinancialFact]):
|
|
"""
|
|
Initialize EntityFacts with company information and facts.
|
|
|
|
Args:
|
|
cik: Company CIK number
|
|
name: Company name
|
|
facts: List of FinancialFact objects
|
|
"""
|
|
self.cik = cik
|
|
self.name = name
|
|
self._facts = facts
|
|
self._fact_index = self._build_indices()
|
|
self._cache = {}
|
|
|
|
def _build_indices(self) -> Dict[str, Dict]:
|
|
"""Build optimized indices for fast querying"""
|
|
indices = {
|
|
'by_concept': defaultdict(list),
|
|
'by_period': defaultdict(list),
|
|
'by_statement': defaultdict(list),
|
|
'by_form': defaultdict(list),
|
|
'by_fiscal_year': defaultdict(list),
|
|
'by_fiscal_period': defaultdict(list)
|
|
}
|
|
|
|
for fact in self._facts:
|
|
# Index by concept
|
|
indices['by_concept'][fact.concept].append(fact)
|
|
if fact.label:
|
|
indices['by_concept'][fact.label.lower()].append(fact)
|
|
|
|
# Index by period
|
|
period_key = f"{fact.fiscal_year}-{fact.fiscal_period}"
|
|
indices['by_period'][period_key].append(fact)
|
|
|
|
# Index by fiscal year and period
|
|
indices['by_fiscal_year'][fact.fiscal_year].append(fact)
|
|
indices['by_fiscal_period'][fact.fiscal_period].append(fact)
|
|
|
|
# Index by statement type
|
|
if fact.statement_type:
|
|
indices['by_statement'][fact.statement_type].append(fact)
|
|
|
|
# Index by form type
|
|
indices['by_form'][fact.form_type].append(fact)
|
|
|
|
return indices
|
|
|
|
def __len__(self) -> int:
|
|
"""Return the total number of facts"""
|
|
return len(self._facts)
|
|
|
|
def __iter__(self) -> Iterator[FinancialFact]:
|
|
"""Iterate over all facts"""
|
|
return iter(self._facts)
|
|
|
|
def get_all_facts(self) -> List[FinancialFact]:
|
|
"""
|
|
Get all facts for this entity.
|
|
|
|
Returns:
|
|
List of all FinancialFact objects
|
|
"""
|
|
return self._facts
|
|
|
|
def to_dataframe(self,
|
|
include_metadata: bool = False,
|
|
columns: Optional[List[str]] = None) -> pd.DataFrame:
|
|
"""
|
|
Export all facts to a pandas DataFrame for analysis.
|
|
|
|
This method provides direct access to all financial facts in a tabular format,
|
|
enabling custom analysis, filtering, and integration with data science workflows.
|
|
|
|
Args:
|
|
include_metadata: Include filing references and data quality metadata (default: False)
|
|
columns: Specific columns to include. If None, includes standard columns.
|
|
|
|
Returns:
|
|
DataFrame with one row per fact, sorted by concept and period_end
|
|
|
|
Example:
|
|
Basic export for exploration:
|
|
>>> facts = company.get_facts()
|
|
>>> df = facts.to_dataframe()
|
|
>>> print(df.head())
|
|
|
|
Export with metadata for audit trail:
|
|
>>> df_full = facts.to_dataframe(include_metadata=True)
|
|
|
|
Custom columns for specific analysis:
|
|
>>> df_slim = facts.to_dataframe(columns=['concept', 'fiscal_year', 'numeric_value'])
|
|
|
|
Filter and analyze:
|
|
>>> df = annual_facts.to_dataframe()
|
|
>>> revenue = df[df['concept'].str.contains('Revenue')]
|
|
>>> print(revenue[['fiscal_year', 'numeric_value']])
|
|
"""
|
|
# Build records from facts
|
|
records = []
|
|
for fact in self._facts:
|
|
record = {
|
|
'concept': fact.concept,
|
|
'label': fact.label,
|
|
'value': fact.value,
|
|
'numeric_value': fact.numeric_value,
|
|
'unit': fact.unit,
|
|
'period_type': fact.period_type,
|
|
'period_start': fact.period_start,
|
|
'period_end': fact.period_end,
|
|
'fiscal_year': fact.fiscal_year,
|
|
'fiscal_period': fact.fiscal_period
|
|
}
|
|
|
|
# Add metadata if requested
|
|
if include_metadata:
|
|
record.update({
|
|
'accession': fact.accession,
|
|
'filing_date': fact.filing_date,
|
|
'form_type': fact.form_type,
|
|
'statement_type': fact.statement_type,
|
|
'taxonomy': fact.taxonomy,
|
|
'scale': fact.scale,
|
|
'data_quality': fact.data_quality.value if fact.data_quality else None,
|
|
'is_audited': fact.is_audited,
|
|
'confidence_score': fact.confidence_score
|
|
})
|
|
|
|
records.append(record)
|
|
|
|
# Create DataFrame
|
|
df = pd.DataFrame(records)
|
|
|
|
# Filter to specific columns if requested
|
|
if columns is not None:
|
|
df = df[columns]
|
|
|
|
# Sort for consistency
|
|
if not df.empty:
|
|
sort_cols = []
|
|
if 'concept' in df.columns:
|
|
sort_cols.append('concept')
|
|
if 'period_end' in df.columns:
|
|
sort_cols.append('period_end')
|
|
if sort_cols:
|
|
df = df.sort_values(sort_cols).reset_index(drop=True)
|
|
|
|
return df
|
|
|
|
def filter_by_period_type(self, period_type: Union[str, 'PeriodType']) -> 'EntityFacts':
|
|
"""
|
|
Filter facts by period type and return a new EntityFacts instance.
|
|
|
|
Args:
|
|
period_type: Period type to filter by - either PeriodType enum or string
|
|
('annual', 'quarterly', 'monthly')
|
|
|
|
Returns:
|
|
New EntityFacts instance with filtered facts
|
|
|
|
Example:
|
|
>>> annual_facts = facts.filter_by_period_type('annual')
|
|
>>> quarterly_facts = facts.filter_by_period_type(PeriodType.QUARTERLY)
|
|
"""
|
|
# Use the query interface to filter facts
|
|
filtered_facts = self.query().by_period_type(period_type).execute()
|
|
|
|
# Create a new EntityFacts instance with the filtered facts
|
|
return EntityFacts(
|
|
cik=self.cik,
|
|
name=self.name,
|
|
facts=filtered_facts
|
|
)
|
|
|
|
def __rich__(self):
|
|
"""Creates a rich representation providing an at-a-glance view of company facts."""
|
|
# Title
|
|
title = Text.assemble(
|
|
"📊 ",
|
|
(self.name, "bold green"),
|
|
" Financial Facts"
|
|
)
|
|
|
|
# Summary Statistics Table
|
|
stats = Table(box=SIMPLE_HEAVY, show_header=False, padding=(0, 1))
|
|
stats.add_column("Metric", style="dim")
|
|
stats.add_column("Value", style="bold")
|
|
|
|
# Get date range
|
|
dates = [f.filing_date for f in self._facts if f.filing_date]
|
|
if dates:
|
|
min_date = min(dates)
|
|
max_date = max(dates)
|
|
date_range = f"{min_date.strftime('%Y-%m-%d')} to {max_date.strftime('%Y-%m-%d')}"
|
|
else:
|
|
date_range = "No dates available"
|
|
|
|
# Count unique concepts
|
|
unique_concepts = len(set(f.concept for f in self._facts))
|
|
|
|
# Count by form type
|
|
form_counts = defaultdict(int)
|
|
for fact in self._facts:
|
|
form_counts[fact.form_type] += 1
|
|
|
|
# Get fiscal years covered
|
|
fiscal_years = sorted(set(f.fiscal_year for f in self._facts if f.fiscal_year))
|
|
if fiscal_years:
|
|
year_range = f"{min(fiscal_years)} - {max(fiscal_years)}"
|
|
else:
|
|
year_range = "N/A"
|
|
|
|
stats.add_row("CIK", str(self.cik))
|
|
stats.add_row("Total Facts", f"{len(self._facts):,}")
|
|
stats.add_row("Unique Concepts", f"{unique_concepts:,}")
|
|
stats.add_row("Date Range", date_range)
|
|
stats.add_row("Fiscal Years", year_range)
|
|
|
|
stats_panel = Panel(
|
|
stats,
|
|
title="📈 Summary Statistics",
|
|
border_style="bright_black"
|
|
)
|
|
|
|
# Key Financial Metrics Table
|
|
metrics = Table(box=SIMPLE, show_header=True, padding=(0, 1))
|
|
metrics.add_column("Metric", style="bold")
|
|
metrics.add_column("Value", justify="right")
|
|
metrics.add_column("Period")
|
|
metrics.add_column("Quality", style="dim")
|
|
|
|
# Try to get key metrics
|
|
key_metrics = [
|
|
('Revenue', 'Revenue'),
|
|
('Net Income', 'NetIncome'),
|
|
('Total Assets', 'Assets'),
|
|
('Total Liabilities', 'Liabilities'),
|
|
('Stockholders Equity', 'StockholdersEquity'),
|
|
('Operating Income', 'OperatingIncome'),
|
|
('Public Float', 'dei:EntityPublicFloat'),
|
|
('Shares Outstanding', 'dei:EntityCommonStockSharesOutstanding')
|
|
]
|
|
|
|
has_metrics = False
|
|
for label, concept in key_metrics:
|
|
fact = self.get_fact(concept)
|
|
if fact:
|
|
has_metrics = True
|
|
# Format value based on unit
|
|
if fact.numeric_value:
|
|
if 'share' in fact.unit.lower():
|
|
value = f"{fact.numeric_value:,.0f}"
|
|
else:
|
|
value = f"${fact.numeric_value:,.0f}"
|
|
else:
|
|
value = str(fact.value)
|
|
|
|
period = f"{fact.fiscal_period} {fact.fiscal_year}"
|
|
quality = fact.data_quality.value if fact.data_quality else "N/A"
|
|
metrics.add_row(label, value, period, quality)
|
|
|
|
if has_metrics:
|
|
metrics_panel = Panel(
|
|
metrics,
|
|
title="💰 Key Financial Metrics",
|
|
border_style="bright_black"
|
|
)
|
|
else:
|
|
metrics_panel = Panel(
|
|
Text("No key financial metrics available", style="dim"),
|
|
title="💰 Key Financial Metrics",
|
|
border_style="bright_black"
|
|
)
|
|
|
|
# Available Statements
|
|
statement_counts = defaultdict(int)
|
|
for fact in self._facts:
|
|
if fact.statement_type:
|
|
statement_counts[fact.statement_type] += 1
|
|
|
|
if statement_counts:
|
|
statements = Table(box=SIMPLE, show_header=True, padding=(0, 1))
|
|
statements.add_column("Statement Type", style="bold")
|
|
statements.add_column("Fact Count", justify="right")
|
|
|
|
for stmt_type, count in sorted(statement_counts.items()):
|
|
statements.add_row(stmt_type, f"{count:,}")
|
|
|
|
statements_panel = Panel(
|
|
statements,
|
|
title="📋 Available Statements",
|
|
border_style="bright_black"
|
|
)
|
|
else:
|
|
statements_panel = Panel(
|
|
Text("No statement information available", style="dim"),
|
|
title="📋 Available Statements",
|
|
border_style="bright_black"
|
|
)
|
|
|
|
# Recent Filings
|
|
filing_info = defaultdict(lambda: {'count': 0, 'date': None})
|
|
for fact in self._facts:
|
|
key = fact.form_type
|
|
filing_info[key]['count'] += 1
|
|
if fact.filing_date:
|
|
if filing_info[key]['date'] is None or fact.filing_date > filing_info[key]['date']:
|
|
filing_info[key]['date'] = fact.filing_date
|
|
|
|
filings = Table(box=SIMPLE, show_header=True, padding=(0, 1))
|
|
filings.add_column("Form", style="bold")
|
|
filings.add_column("Latest Filing")
|
|
filings.add_column("Facts", justify="right")
|
|
|
|
# Sort by most recent filing date
|
|
sorted_filings = sorted(
|
|
filing_info.items(),
|
|
key=lambda x: x[1]['date'] or date.min,
|
|
reverse=True
|
|
)[:5] # Show top 5
|
|
|
|
for form_type, info in sorted_filings:
|
|
date_str = info['date'].strftime('%Y-%m-%d') if info['date'] else "N/A"
|
|
filings.add_row(form_type, date_str, f"{info['count']:,}")
|
|
|
|
filings_panel = Panel(
|
|
filings,
|
|
title="📄 Recent Filings",
|
|
border_style="bright_black"
|
|
)
|
|
|
|
# Data Quality Summary
|
|
quality_counts = defaultdict(int)
|
|
audited_count = sum(1 for f in self._facts if f.is_audited)
|
|
|
|
for fact in self._facts:
|
|
if fact.data_quality:
|
|
quality_counts[fact.data_quality.value] += 1
|
|
|
|
quality = Table(box=SIMPLE, show_header=False, padding=(0, 1))
|
|
quality.add_column("Metric", style="dim")
|
|
quality.add_column("Value", style="bold")
|
|
|
|
if quality_counts:
|
|
for q_level, count in sorted(quality_counts.items()):
|
|
percentage = (count / len(self._facts)) * 100
|
|
quality.add_row(f"{q_level} Quality", f"{count:,} ({percentage:.1f}%)")
|
|
|
|
if audited_count > 0:
|
|
audit_percentage = (audited_count / len(self._facts)) * 100
|
|
quality.add_row("Audited Facts", f"{audited_count:,} ({audit_percentage:.1f}%)")
|
|
|
|
quality_panel = Panel(
|
|
quality,
|
|
title="✅ Data Quality",
|
|
border_style="bright_black"
|
|
)
|
|
|
|
# Combine all sections
|
|
content_renderables = [
|
|
Padding("", (1, 0, 0, 0)),
|
|
stats_panel,
|
|
Columns([metrics_panel, statements_panel], equal=True, expand=True),
|
|
Columns([filings_panel, quality_panel], equal=True, expand=True)
|
|
]
|
|
|
|
content = Group(*content_renderables)
|
|
|
|
# Create the main panel
|
|
return Panel(
|
|
content,
|
|
title=title,
|
|
subtitle=f"SEC XBRL Facts • {len(self._facts):,} total facts",
|
|
border_style="blue"
|
|
)
|
|
|
|
def __repr__(self):
|
|
"""String representation using rich formatting."""
|
|
from edgar.richtools import repr_rich
|
|
return repr_rich(self.__rich__())
|
|
|
|
# Core query interface
|
|
def query(self) -> 'FactQuery':
|
|
"""
|
|
Start building a facts query.
|
|
|
|
Returns:
|
|
FactQuery: A new query builder instance
|
|
|
|
Example:
|
|
>>> facts.query().by_concept('Revenue').latest(4).to_dataframe()
|
|
"""
|
|
from edgar.entity.query import FactQuery
|
|
return FactQuery(self._facts, self._fact_index)
|
|
|
|
# Convenience methods for common queries
|
|
def get_fact(self, concept: str, period: Optional[str] = None) -> Optional[FinancialFact]:
|
|
"""
|
|
Get a single fact by concept and optional period.
|
|
|
|
Args:
|
|
concept: Concept name or label
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
|
|
Returns:
|
|
The most recent matching fact, or None if not found
|
|
"""
|
|
# Try exact concept match first
|
|
facts = self._fact_index['by_concept'].get(concept, [])
|
|
|
|
# Try case-insensitive label match
|
|
if not facts:
|
|
facts = self._fact_index['by_concept'].get(concept.lower(), [])
|
|
|
|
if not facts:
|
|
return None
|
|
|
|
# Filter by period if specified
|
|
if period:
|
|
facts = [f for f in facts if f"{f.fiscal_year}-{f.fiscal_period}" == period]
|
|
|
|
# Return most recent
|
|
if facts:
|
|
return max(facts, key=lambda f: (f.filing_date, f.period_end))
|
|
|
|
return None
|
|
|
|
def time_series(self, concept: str, periods: int = 20) -> pd.DataFrame:
|
|
"""
|
|
Get time series data for a concept.
|
|
|
|
Args:
|
|
concept: Concept name or label
|
|
periods: Number of periods to retrieve
|
|
|
|
Returns:
|
|
DataFrame with time series data
|
|
"""
|
|
from edgar.entity.query import FactQuery
|
|
query = FactQuery(self._facts, self._fact_index)
|
|
|
|
# Get facts and limit
|
|
return query \
|
|
.by_concept(concept) \
|
|
.sort_by('filing_date', ascending=False) \
|
|
.to_dataframe('period_end', 'numeric_value', 'fiscal_period', 'fiscal_year') \
|
|
.head(periods)
|
|
|
|
# DEI (Document and Entity Information) helpers
|
|
def dei_facts(self, as_of: Optional[date] = None) -> pd.DataFrame:
|
|
"""
|
|
Get Document and Entity Information (DEI) facts.
|
|
|
|
DEI facts contain company metadata like entity name, trading symbol,
|
|
fiscal year-end, shares outstanding, public float, etc.
|
|
|
|
Args:
|
|
as_of: Optional date for point-in-time view (gets latest if not specified)
|
|
|
|
Returns:
|
|
DataFrame with DEI facts
|
|
|
|
Example:
|
|
# Get latest DEI facts
|
|
dei = facts.dei_facts()
|
|
|
|
# Get DEI facts as of specific date
|
|
dei = facts.dei_facts(as_of=date(2024, 12, 31))
|
|
"""
|
|
from edgar.entity.query import FactQuery
|
|
query = FactQuery(self._facts, self._fact_index)
|
|
|
|
# Get DEI taxonomy facts
|
|
query = query.by_concept('dei:', exact=False)
|
|
|
|
if as_of:
|
|
query = query.as_of(as_of)
|
|
else:
|
|
# Get latest instant facts for DEI data
|
|
query = query.latest_instant()
|
|
|
|
facts = query.execute()
|
|
|
|
if not facts:
|
|
return pd.DataFrame()
|
|
|
|
# Convert to simple DataFrame
|
|
records = []
|
|
for fact in facts:
|
|
records.append({
|
|
'concept': fact.concept,
|
|
'label': fact.label,
|
|
'value': fact.get_formatted_value(),
|
|
'raw_value': fact.numeric_value or fact.value,
|
|
'unit': fact.unit,
|
|
'period_end': fact.period_end,
|
|
'filing_date': fact.filing_date,
|
|
'form_type': fact.form_type
|
|
})
|
|
|
|
df = pd.DataFrame(records)
|
|
|
|
# Sort by concept for consistent ordering
|
|
if not df.empty:
|
|
df = df.sort_values('concept').reset_index(drop=True)
|
|
|
|
return df
|
|
|
|
def entity_info(self) -> Dict[str, Any]:
|
|
"""
|
|
Get key entity information as a clean dictionary.
|
|
|
|
Returns:
|
|
Dictionary with entity name, shares outstanding, public float, etc.
|
|
|
|
Example:
|
|
info = facts.entity_info()
|
|
print(f"Company: {info.get('entity_name', 'Unknown')}")
|
|
print(f"Shares Outstanding: {info.get('shares_outstanding', 'N/A')}")
|
|
"""
|
|
dei_df = self.dei_facts()
|
|
|
|
info = {
|
|
'entity_name': self.name,
|
|
'cik': self.cik
|
|
}
|
|
|
|
if dei_df.empty:
|
|
return info
|
|
|
|
# Map common DEI concepts to friendly keys
|
|
concept_mapping = {
|
|
'dei:EntityCommonStockSharesOutstanding': 'shares_outstanding',
|
|
'dei:EntityPublicFloat': 'public_float',
|
|
'dei:TradingSymbol': 'trading_symbol',
|
|
'dei:EntityFilerCategory': 'filer_category',
|
|
'dei:EntityCurrentReportingStatus': 'reporting_status',
|
|
'dei:EntityWellKnownSeasonedIssuer': 'well_known_seasoned_issuer',
|
|
'dei:EntityVoluntaryFilers': 'voluntary_filer',
|
|
'dei:EntitySmallBusiness': 'small_business',
|
|
'dei:EntityEmergingGrowthCompany': 'emerging_growth_company',
|
|
'dei:EntityShellCompany': 'shell_company'
|
|
}
|
|
|
|
for _, row in dei_df.iterrows():
|
|
concept = row['concept']
|
|
if concept in concept_mapping:
|
|
key = concept_mapping[concept]
|
|
info[key] = row['value']
|
|
info[f'{key}_raw'] = row['raw_value']
|
|
info[f'{key}_as_of'] = row['period_end']
|
|
|
|
return info
|
|
|
|
# Standardized financial concept access methods (FEAT-411)
|
|
def get_revenue(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
|
|
"""
|
|
Get standardized revenue value across all companies.
|
|
|
|
This method handles various revenue concept names (Revenue, Contract Revenue, Net Sales, etc.)
|
|
and provides consistent access regardless of company-specific naming conventions.
|
|
|
|
Args:
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
unit: Optional unit filter (defaults to USD if not specified)
|
|
|
|
Returns:
|
|
Revenue value as float, or None if not found
|
|
|
|
Example:
|
|
>>> revenue = facts.get_revenue()
|
|
>>> quarterly_revenue = facts.get_revenue(period="2024-Q1")
|
|
"""
|
|
return self._get_standardized_concept_value(
|
|
concept_variants=[
|
|
'RevenueFromContractWithCustomerExcludingAssessedTax',
|
|
'SalesRevenueNet',
|
|
'Revenues',
|
|
'Revenue',
|
|
'TotalRevenues',
|
|
'NetSales'
|
|
],
|
|
period=period,
|
|
unit=unit,
|
|
fallback_calculation=self._calculate_revenue_from_components,
|
|
strict_unit_match=True
|
|
)
|
|
|
|
def get_net_income(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
|
|
"""
|
|
Get standardized net income value across all companies.
|
|
|
|
Handles various net income concept names and provides consistent access.
|
|
|
|
Args:
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
unit: Optional unit filter (defaults to USD if not specified)
|
|
|
|
Returns:
|
|
Net income value as float, or None if not found
|
|
|
|
Example:
|
|
>>> net_income = facts.get_net_income()
|
|
>>> annual_income = facts.get_net_income(period="2024-FY")
|
|
"""
|
|
return self._get_standardized_concept_value(
|
|
concept_variants=[
|
|
'NetIncomeLoss',
|
|
'ProfitLoss',
|
|
'NetIncome',
|
|
'NetEarnings',
|
|
'NetIncomeLossAttributableToParent'
|
|
],
|
|
period=period,
|
|
unit=unit
|
|
)
|
|
|
|
def get_total_assets(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
|
|
"""
|
|
Get standardized total assets value across all companies.
|
|
|
|
Args:
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
unit: Optional unit filter (defaults to USD if not specified)
|
|
|
|
Returns:
|
|
Total assets value as float, or None if not found
|
|
|
|
Example:
|
|
>>> assets = facts.get_total_assets()
|
|
>>> q4_assets = facts.get_total_assets(period="2024-Q4")
|
|
"""
|
|
return self._get_standardized_concept_value(
|
|
concept_variants=[
|
|
'Assets',
|
|
'TotalAssets',
|
|
'AssetsCurrent' # Fallback for some filings
|
|
],
|
|
period=period,
|
|
unit=unit
|
|
)
|
|
|
|
def get_total_liabilities(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
|
|
"""
|
|
Get standardized total liabilities value across all companies.
|
|
|
|
Args:
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
unit: Optional unit filter (defaults to USD if not specified)
|
|
|
|
Returns:
|
|
Total liabilities value as float, or None if not found
|
|
|
|
Example:
|
|
>>> liabilities = facts.get_total_liabilities()
|
|
"""
|
|
return self._get_standardized_concept_value(
|
|
concept_variants=[
|
|
'Liabilities',
|
|
'TotalLiabilities',
|
|
'LiabilitiesAndStockholdersEquity' # Some companies structure it this way
|
|
],
|
|
period=period,
|
|
unit=unit
|
|
)
|
|
|
|
def get_shareholders_equity(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
|
|
"""
|
|
Get standardized shareholders equity value across all companies.
|
|
|
|
Args:
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
unit: Optional unit filter (defaults to USD if not specified)
|
|
|
|
Returns:
|
|
Shareholders equity value as float, or None if not found
|
|
|
|
Example:
|
|
>>> equity = facts.get_shareholders_equity()
|
|
"""
|
|
return self._get_standardized_concept_value(
|
|
concept_variants=[
|
|
'StockholdersEquity',
|
|
'ShareholdersEquity',
|
|
'TotalEquity',
|
|
'PartnersCapital', # For partnerships
|
|
'MembersEquity' # For LLCs
|
|
],
|
|
period=period,
|
|
unit=unit
|
|
)
|
|
|
|
def get_operating_income(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
|
|
"""
|
|
Get standardized operating income value across all companies.
|
|
|
|
Args:
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
unit: Optional unit filter (defaults to USD if not specified)
|
|
|
|
Returns:
|
|
Operating income value as float, or None if not found
|
|
|
|
Example:
|
|
>>> op_income = facts.get_operating_income()
|
|
"""
|
|
return self._get_standardized_concept_value(
|
|
concept_variants=[
|
|
'OperatingIncomeLoss',
|
|
'OperatingIncome',
|
|
'IncomeLossFromOperations',
|
|
'OperatingProfit'
|
|
],
|
|
period=period,
|
|
unit=unit
|
|
)
|
|
|
|
def get_gross_profit(self, period: Optional[str] = None, unit: Optional[str] = None) -> Optional[float]:
|
|
"""
|
|
Get standardized gross profit value across all companies.
|
|
|
|
Args:
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
unit: Optional unit filter (defaults to USD if not specified)
|
|
|
|
Returns:
|
|
Gross profit value as float, or None if not found
|
|
|
|
Example:
|
|
>>> gross_profit = facts.get_gross_profit()
|
|
"""
|
|
return self._get_standardized_concept_value(
|
|
concept_variants=[
|
|
'GrossProfit',
|
|
'GrossMargin'
|
|
],
|
|
period=period,
|
|
unit=unit,
|
|
fallback_calculation=self._calculate_gross_profit_from_components
|
|
)
|
|
|
|
# Convenient properties for common DEI facts
|
|
@property
|
|
def shares_outstanding(self) -> Optional[float]:
|
|
"""
|
|
Get the most recent shares outstanding value.
|
|
|
|
Returns:
|
|
Number of shares outstanding as float, or None if not available
|
|
|
|
Example:
|
|
shares = facts.shares_outstanding
|
|
if shares:
|
|
print(f"Shares Outstanding: {shares:,.0f}")
|
|
"""
|
|
fact = self.get_fact('dei:EntityCommonStockSharesOutstanding')
|
|
return fact.numeric_value if fact else None
|
|
|
|
@property
|
|
def public_float(self) -> Optional[float]:
|
|
"""
|
|
Get the most recent public float value.
|
|
|
|
Returns:
|
|
Public float value as float, or None if not available
|
|
|
|
Example:
|
|
float_val = facts.public_float
|
|
if float_val:
|
|
print(f"Public Float: ${float_val:,.0f}")
|
|
"""
|
|
fact = self.get_fact('dei:EntityPublicFloat')
|
|
return fact.numeric_value if fact else None
|
|
|
|
@property
|
|
def shares_outstanding_fact(self) -> Optional[FinancialFact]:
|
|
"""
|
|
Get the most recent shares outstanding fact with full context.
|
|
|
|
Returns:
|
|
FinancialFact object with shares outstanding data, or None
|
|
|
|
Example:
|
|
fact = facts.shares_outstanding_fact
|
|
if fact:
|
|
print(f"Shares: {fact.get_formatted_value()} as of {fact.period_end}")
|
|
"""
|
|
return self.get_fact('dei:EntityCommonStockSharesOutstanding')
|
|
|
|
@property
|
|
def public_float_fact(self) -> Optional[FinancialFact]:
|
|
"""
|
|
Get the most recent public float fact with full context.
|
|
|
|
Returns:
|
|
FinancialFact object with public float data, or None
|
|
|
|
Example:
|
|
fact = facts.public_float_fact
|
|
if fact:
|
|
print(f"Float: {fact.get_formatted_value()} as of {fact.period_end}")
|
|
"""
|
|
return self.get_fact('dei:EntityPublicFloat')
|
|
|
|
# Financial statement helpers
|
|
def income_statement(self, periods: int = 4, period_length: Optional[int] = None, as_dataframe: bool = False,
|
|
annual: bool = True, concise_format: bool = False) -> Union[DataFrame, MultiPeriodStatement]:
|
|
"""
|
|
Get income statement facts for recent periods.
|
|
|
|
Args:
|
|
periods: Number of periods to retrieve
|
|
period_length: Optional filter for period length in months (3=quarterly, 12=annual)
|
|
as_dataframe: If True, return DataFrame; if False, return MultiPeriodStatement
|
|
annual: If True, prefer annual (FY) periods over interim periods
|
|
concise_format: If True, display values as $1.0B, if False display as $1,000,000,000
|
|
|
|
Returns:
|
|
MultiPeriodStatement or DataFrame with income statement data
|
|
|
|
Example:
|
|
# Get hierarchical multi-period statement (default)
|
|
stmt = facts.income_statement(periods=4, annual=True)
|
|
print(stmt) # Rich display with hierarchy
|
|
|
|
# Get with concise format
|
|
stmt = facts.income_statement(periods=4, concise_format=True)
|
|
|
|
# Get DataFrame for analysis
|
|
df = facts.income_statement(periods=4, as_dataframe=True)
|
|
|
|
# Convert statement to DataFrame later
|
|
stmt = facts.income_statement(periods=4)
|
|
df = stmt.to_dataframe()
|
|
"""
|
|
# Always build the enhanced multi-period statement
|
|
from edgar.entity.enhanced_statement import EnhancedStatementBuilder
|
|
builder = EnhancedStatementBuilder()
|
|
enhanced_stmt = builder.build_multi_period_statement(
|
|
facts=self._facts,
|
|
statement_type='IncomeStatement',
|
|
periods=periods,
|
|
annual=annual
|
|
)
|
|
enhanced_stmt.company_name = self.name
|
|
enhanced_stmt.cik = str(self.cik)
|
|
enhanced_stmt.concise_format = concise_format
|
|
|
|
# Return DataFrame if requested
|
|
if as_dataframe:
|
|
return enhanced_stmt.to_dataframe()
|
|
|
|
return enhanced_stmt
|
|
|
|
def balance_sheet(self, periods: int = 4, as_of: Optional[date] = None, as_dataframe: bool = False,
|
|
annual: bool = True, concise_format: bool = False) -> Union[pd.DataFrame, MultiPeriodStatement]:
|
|
"""
|
|
Get balance sheet facts for recent periods or as of a specific date.
|
|
|
|
Args:
|
|
periods: Number of periods to retrieve (ignored if as_of is specified)
|
|
as_of: Optional date for point-in-time view; if specified, gets single snapshot
|
|
as_dataframe: If True, return DataFrame; if False, return MultiPeriodStatement
|
|
annual: If True, prefer annual (FY) periods over interim periods
|
|
concise_format: If True, display values as $1.0B, if False display as $1,000,000,000
|
|
|
|
Returns:
|
|
MultiPeriodStatement or DataFrame with balance sheet data
|
|
|
|
Example:
|
|
# Get hierarchical multi-period statement (default)
|
|
stmt = facts.balance_sheet(periods=4, annual=True)
|
|
print(stmt) # Rich display with hierarchy
|
|
|
|
# Get DataFrame for analysis
|
|
df = facts.balance_sheet(periods=4, as_dataframe=True)
|
|
|
|
# Convert statement to DataFrame later
|
|
stmt = facts.balance_sheet(periods=4)
|
|
df = stmt.to_dataframe()
|
|
"""
|
|
if not as_of:
|
|
# Always build the enhanced multi-period statement for regular periods
|
|
from edgar.entity.enhanced_statement import EnhancedStatementBuilder
|
|
builder = EnhancedStatementBuilder()
|
|
enhanced_stmt = builder.build_multi_period_statement(
|
|
facts=self._facts,
|
|
statement_type='BalanceSheet',
|
|
periods=periods,
|
|
annual=annual
|
|
)
|
|
enhanced_stmt.company_name = self.name
|
|
enhanced_stmt.cik = str(self.cik)
|
|
enhanced_stmt.concise_format = concise_format
|
|
|
|
# Return DataFrame if requested
|
|
if as_dataframe:
|
|
return enhanced_stmt.to_dataframe()
|
|
|
|
return enhanced_stmt
|
|
from edgar.entity.query import FactQuery
|
|
query = FactQuery(self._facts, self._fact_index)
|
|
|
|
query = query.by_statement_type('BalanceSheet')
|
|
|
|
if as_of:
|
|
# Point-in-time view - get latest instant facts as of the specified date
|
|
query = query.as_of(as_of).latest_instant()
|
|
facts = query.execute()
|
|
|
|
if not facts:
|
|
if not as_dataframe:
|
|
from edgar.entity.statement import FinancialStatement
|
|
return FinancialStatement(
|
|
data=pd.DataFrame(),
|
|
statement_type="BalanceSheet",
|
|
entity_name=self.name,
|
|
period_lengths=[],
|
|
mixed_periods=False
|
|
)
|
|
else:
|
|
return pd.DataFrame()
|
|
|
|
# Convert to simple DataFrame for point-in-time view
|
|
records = []
|
|
for fact in facts:
|
|
records.append({
|
|
'label': fact.label,
|
|
'concept': fact.concept,
|
|
'value': fact.get_formatted_value(),
|
|
'raw_value': fact.numeric_value or fact.value,
|
|
'unit': fact.unit,
|
|
'period_end': fact.period_end,
|
|
'filing_date': fact.filing_date,
|
|
'form_type': fact.form_type
|
|
})
|
|
|
|
df = pd.DataFrame(records)
|
|
|
|
if not as_dataframe:
|
|
from edgar.entity.statement import FinancialStatement
|
|
# For point-in-time, create a single-column statement
|
|
if not df.empty:
|
|
period_label = f"As of {as_of}"
|
|
pivot_data = pd.DataFrame({
|
|
period_label: df.set_index('label')['raw_value']
|
|
})
|
|
else:
|
|
pivot_data = pd.DataFrame()
|
|
|
|
return FinancialStatement(
|
|
data=pivot_data,
|
|
statement_type="BalanceSheet",
|
|
entity_name=self.name,
|
|
period_lengths=['instant'],
|
|
mixed_periods=False
|
|
)
|
|
else:
|
|
return df
|
|
else:
|
|
# Multi-period view - get trends over time using latest instant facts per period
|
|
# Pass entity information and return preference (flip the boolean)
|
|
result = query.latest_periods(periods, annual=annual).pivot_by_period(
|
|
return_statement=not as_dataframe)
|
|
|
|
# If returning a Statement object, set the entity name
|
|
if not as_dataframe and hasattr(result, 'entity_name'):
|
|
result.entity_name = self.name
|
|
|
|
return result
|
|
|
|
def cash_flow(self, periods: int = 4, period_length: Optional[int] = None, as_dataframe: bool = False,
|
|
annual: bool = True, concise_format: bool = False) -> Union[DataFrame, MultiPeriodStatement]:
|
|
"""
|
|
Get cash flow statement facts.
|
|
|
|
Args:
|
|
periods: Number of periods to retrieve
|
|
period_length: Optional filter for period length in months (3=quarterly, 12=annual)
|
|
as_dataframe: If True, return DataFrame; if False, return MultiPeriodStatement
|
|
annual: If True, prefer annual (FY) periods over interim periods
|
|
concise_format: If True, display values as $1.0B, if False display as $1,000,000,000
|
|
|
|
Returns:
|
|
MultiPeriodStatement or DataFrame with cash flow data
|
|
|
|
Example:
|
|
# Get hierarchical multi-period statement (default)
|
|
stmt = facts.cash_flow(periods=4, annual=True)
|
|
print(stmt) # Rich display with hierarchy
|
|
|
|
# Get DataFrame for analysis
|
|
df = facts.cash_flow(periods=4, as_dataframe=True)
|
|
|
|
# Convert statement to DataFrame later
|
|
stmt = facts.cash_flow(periods=4)
|
|
df = stmt.to_dataframe()
|
|
"""
|
|
# Always build the enhanced multi-period statement
|
|
from edgar.entity.enhanced_statement import EnhancedStatementBuilder
|
|
builder = EnhancedStatementBuilder()
|
|
enhanced_stmt = builder.build_multi_period_statement(
|
|
facts=self._facts,
|
|
statement_type='CashFlow',
|
|
periods=periods,
|
|
annual=annual
|
|
)
|
|
enhanced_stmt.company_name = self.name
|
|
enhanced_stmt.cik = str(self.cik)
|
|
enhanced_stmt.concise_format = concise_format
|
|
|
|
# Return DataFrame if requested
|
|
if as_dataframe:
|
|
return enhanced_stmt.to_dataframe()
|
|
|
|
return enhanced_stmt
|
|
|
|
# Investment analytics
|
|
def calculate_ratios(self) -> Dict[str, float]:
|
|
"""
|
|
Calculate common financial ratios.
|
|
|
|
Returns:
|
|
Dictionary of ratio names to values
|
|
"""
|
|
# This will be implemented in Phase 3
|
|
# For now, return a placeholder
|
|
return {
|
|
"note": "Ratio calculation will be implemented in Phase 3"
|
|
}
|
|
|
|
def peer_comparison(self, peer_ciks: List[int],
|
|
metrics: Optional[List[str]] = None) -> pd.DataFrame:
|
|
"""
|
|
Compare key metrics with peer companies.
|
|
|
|
Args:
|
|
peer_ciks: List of peer company CIKs
|
|
metrics: Optional list of specific metrics to compare
|
|
|
|
Returns:
|
|
DataFrame with comparative analysis
|
|
"""
|
|
# This will be implemented in Phase 3
|
|
# For now, return a placeholder
|
|
return pd.DataFrame({
|
|
"note": ["Peer comparison will be implemented in Phase 3"]
|
|
})
|
|
|
|
def detect_anomalies(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Detect unusual patterns or potential red flags.
|
|
|
|
Returns:
|
|
List of detected anomalies with descriptions
|
|
"""
|
|
# This will be implemented in Phase 3
|
|
# For now, return a placeholder
|
|
return [{
|
|
"type": "placeholder",
|
|
"message": "Anomaly detection will be implemented in Phase 3"
|
|
}]
|
|
|
|
# AI-ready methods
|
|
def to_llm_context(self,
|
|
focus_areas: Optional[List[str]] = None,
|
|
time_period: str = "recent") -> Dict[str, Any]:
|
|
"""
|
|
Generate comprehensive context for LLM analysis.
|
|
|
|
Args:
|
|
focus_areas: Specific areas to emphasize (e.g., ['profitability', 'growth'])
|
|
time_period: Time period to analyze ('recent', '5Y', '10Y', 'all')
|
|
|
|
Returns:
|
|
Dictionary with structured context for LLM consumption
|
|
"""
|
|
context = {
|
|
"company": {
|
|
"name": self.name,
|
|
"cik": self.cik,
|
|
"total_facts": len(self._facts)
|
|
},
|
|
"data_summary": self._generate_data_summary(),
|
|
"recent_filings": self._get_recent_filings_summary(),
|
|
"key_metrics": self._extract_key_metrics(time_period)
|
|
}
|
|
|
|
# Add time period context
|
|
if time_period == "recent":
|
|
context["time_period"] = "Most recent reported period"
|
|
elif time_period == "5Y":
|
|
context["time_period"] = "Five year historical view"
|
|
elif time_period == "10Y":
|
|
context["time_period"] = "Ten year historical view"
|
|
else:
|
|
context["time_period"] = "All available historical data"
|
|
|
|
# Add focus area analysis if specified
|
|
if focus_areas:
|
|
context["focus_analysis"] = {}
|
|
for area in focus_areas:
|
|
if area == "profitability":
|
|
context["focus_analysis"][area] = self._analyze_profitability()
|
|
elif area == "growth":
|
|
context["focus_analysis"][area] = self._analyze_growth()
|
|
elif area == "liquidity":
|
|
context["focus_analysis"][area] = self._analyze_liquidity()
|
|
|
|
return context
|
|
|
|
def to_agent_tools(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Export facts as tools for AI agents (MCP-compatible).
|
|
|
|
Returns:
|
|
List of tool definitions for agent consumption
|
|
"""
|
|
return [
|
|
{
|
|
"name": f"get_{self.name.lower().replace(' ', '_')}_financials",
|
|
"description": f"Retrieve financial data for {self.name}",
|
|
"parameters": {
|
|
"statement": {
|
|
"type": "string",
|
|
"description": "Financial statement type (income_statement, balance_sheet, cash_flow)",
|
|
"enum": ["income_statement", "balance_sheet", "cash_flow"]
|
|
},
|
|
"periods": {
|
|
"type": "integer",
|
|
"description": "Number of periods to retrieve",
|
|
"default": 4
|
|
}
|
|
},
|
|
"returns": "Financial data with context"
|
|
},
|
|
{
|
|
"name": f"analyze_{self.name.lower().replace(' ', '_')}_trends",
|
|
"description": f"Analyze financial trends for {self.name}",
|
|
"parameters": {
|
|
"metric": {
|
|
"type": "string",
|
|
"description": "Financial metric to analyze (e.g., Revenue, NetIncome)"
|
|
},
|
|
"periods": {
|
|
"type": "integer",
|
|
"description": "Number of periods to analyze",
|
|
"default": 8
|
|
}
|
|
},
|
|
"returns": "Trend analysis with insights"
|
|
},
|
|
{
|
|
"name": f"get_{self.name.lower().replace(' ', '_')}_fact",
|
|
"description": f"Get a specific financial fact for {self.name}",
|
|
"parameters": {
|
|
"concept": {
|
|
"type": "string",
|
|
"description": "The financial concept to retrieve (e.g., Revenue, Assets)"
|
|
},
|
|
"period": {
|
|
"type": "string",
|
|
"description": "Optional period (e.g., 2024-Q4, 2024-FY)",
|
|
"required": False
|
|
}
|
|
},
|
|
"returns": "Fact value with full context"
|
|
}
|
|
]
|
|
|
|
# Helper methods
|
|
def _generate_data_summary(self) -> Dict[str, Any]:
|
|
"""Generate a summary of available data"""
|
|
unique_concepts = len(set(f.concept for f in self._facts))
|
|
|
|
# Get date range
|
|
dates = [f.filing_date for f in self._facts if f.filing_date]
|
|
if dates:
|
|
min_date = min(dates)
|
|
max_date = max(dates)
|
|
date_range = f"{min_date} to {max_date}"
|
|
else:
|
|
date_range = "Unknown"
|
|
|
|
# Count by form type
|
|
form_counts = defaultdict(int)
|
|
for fact in self._facts:
|
|
form_counts[fact.form_type] += 1
|
|
|
|
return {
|
|
"total_facts": len(self._facts),
|
|
"unique_concepts": unique_concepts,
|
|
"date_range": date_range,
|
|
"form_types": dict(form_counts),
|
|
"fiscal_years": sorted(set(f.fiscal_year for f in self._facts if f.fiscal_year))
|
|
}
|
|
|
|
def _get_recent_filings_summary(self) -> List[Dict[str, Any]]:
|
|
"""Get summary of recent filings"""
|
|
# Group facts by filing
|
|
filings = defaultdict(list)
|
|
for fact in self._facts:
|
|
key = (fact.form_type, fact.filing_date, fact.accession)
|
|
filings[key].append(fact)
|
|
|
|
# Sort by filing date
|
|
recent_filings = sorted(filings.keys(), key=lambda x: x[1] or date.min, reverse=True)[:5]
|
|
|
|
summaries = []
|
|
for form_type, filing_date, accession in recent_filings:
|
|
summaries.append({
|
|
"form": form_type,
|
|
"date": str(filing_date) if filing_date else "Unknown",
|
|
"fact_count": len(filings[(form_type, filing_date, accession)])
|
|
})
|
|
|
|
return summaries
|
|
|
|
def _extract_key_metrics(self, time_period: str) -> Dict[str, Any]:
|
|
"""Extract key financial metrics for the specified time period"""
|
|
# Define key metrics to extract
|
|
key_concepts = [
|
|
'Revenue', 'NetIncome', 'Assets', 'Liabilities',
|
|
'StockholdersEquity', 'OperatingIncome', 'EarningsPerShare'
|
|
]
|
|
|
|
metrics = {}
|
|
for concept in key_concepts:
|
|
fact = self.get_fact(concept)
|
|
if fact:
|
|
metrics[concept] = {
|
|
"value": fact.numeric_value or fact.value,
|
|
"unit": fact.unit,
|
|
"period": f"{fact.fiscal_period} {fact.fiscal_year}",
|
|
"quality": fact.data_quality.value
|
|
}
|
|
|
|
return metrics
|
|
|
|
def _analyze_profitability(self) -> Dict[str, Any]:
|
|
"""Analyze profitability metrics"""
|
|
revenue = self.get_fact('Revenue')
|
|
net_income = self.get_fact('NetIncome')
|
|
|
|
analysis = {}
|
|
|
|
if revenue and net_income and revenue.numeric_value and net_income.numeric_value:
|
|
net_margin = (net_income.numeric_value / revenue.numeric_value) * 100
|
|
analysis["net_margin"] = {
|
|
"value": round(net_margin, 2),
|
|
"unit": "percent",
|
|
"interpretation": f"For every dollar of revenue, {self.name} generates ${net_margin / 100:.2f} in profit"
|
|
}
|
|
|
|
return analysis
|
|
|
|
def _analyze_growth(self) -> Dict[str, Any]:
|
|
"""Analyze growth trends"""
|
|
# Get revenue time series
|
|
revenue_series = self.time_series('Revenue', periods=8)
|
|
|
|
if len(revenue_series) >= 2:
|
|
# Calculate year-over-year growth
|
|
latest = revenue_series.iloc[0]['numeric_value']
|
|
prior = revenue_series.iloc[1]['numeric_value']
|
|
|
|
if prior and prior != 0:
|
|
growth_rate = ((latest - prior) / prior) * 100
|
|
return {
|
|
"revenue_growth_yoy": {
|
|
"value": round(growth_rate, 2),
|
|
"unit": "percent",
|
|
"period_comparison": f"{revenue_series.iloc[0]['fiscal_period']} vs {revenue_series.iloc[1]['fiscal_period']}"
|
|
}
|
|
}
|
|
|
|
return {"message": "Insufficient data for growth analysis"}
|
|
|
|
def _analyze_liquidity(self) -> Dict[str, Any]:
|
|
"""Analyze liquidity metrics"""
|
|
current_assets = self.get_fact('CurrentAssets')
|
|
current_liabilities = self.get_fact('CurrentLiabilities')
|
|
|
|
if current_assets and current_liabilities and current_assets.numeric_value and current_liabilities.numeric_value:
|
|
current_ratio = current_assets.numeric_value / current_liabilities.numeric_value
|
|
return {
|
|
"current_ratio": {
|
|
"value": round(current_ratio, 2),
|
|
"interpretation": f"{self.name} has ${current_ratio:.2f} in current assets for every $1 of current liabilities"
|
|
}
|
|
}
|
|
|
|
return {"message": "Insufficient data for liquidity analysis"}
|
|
|
|
# Helper methods for standardized concept access (FEAT-411)
|
|
def _get_standardized_concept_value(self,
|
|
concept_variants: List[str],
|
|
period: Optional[str] = None,
|
|
unit: Optional[str] = None,
|
|
fallback_calculation: Optional[callable] = None,
|
|
return_detailed: bool = False,
|
|
strict_unit_match: bool = False) -> Optional[float]:
|
|
"""
|
|
Core method for retrieving standardized concept values with enhanced unit handling.
|
|
|
|
Args:
|
|
concept_variants: List of concept names to try in priority order
|
|
period: Optional period filter
|
|
unit: Optional unit filter (defaults to USD)
|
|
fallback_calculation: Optional function to calculate value from components
|
|
return_detailed: If True, return UnitResult instead of just value
|
|
strict_unit_match: If True, require exact unit match. If False, allow compatible units.
|
|
|
|
Returns:
|
|
Numeric value or None if not found (or UnitResult if return_detailed=True)
|
|
"""
|
|
from edgar.entity.unit_handling import UnitNormalizer, UnitResult
|
|
|
|
# Default to USD if no unit specified
|
|
target_unit = unit or 'USD'
|
|
|
|
# Try each concept variant in priority order
|
|
for concept in concept_variants:
|
|
# Try both with and without namespace prefix
|
|
for concept_variant in [concept, f'us-gaap:{concept}']:
|
|
fact = self.get_fact(concept_variant, period)
|
|
if fact and fact.numeric_value is not None:
|
|
# Use enhanced unit handling
|
|
unit_result = UnitNormalizer.get_normalized_value(
|
|
fact=fact,
|
|
target_unit=target_unit,
|
|
apply_scale=True,
|
|
strict_unit_match=strict_unit_match
|
|
)
|
|
|
|
if unit_result.success:
|
|
if return_detailed:
|
|
return unit_result
|
|
return unit_result.value
|
|
|
|
# Try fallback calculation if provided
|
|
if fallback_calculation:
|
|
try:
|
|
fallback_value = fallback_calculation(period, target_unit)
|
|
if fallback_value is not None:
|
|
if return_detailed:
|
|
return UnitResult(
|
|
value=fallback_value,
|
|
normalized_unit=UnitNormalizer.normalize_unit(target_unit),
|
|
original_unit=target_unit,
|
|
success=True,
|
|
error_reason="Calculated from components"
|
|
)
|
|
return fallback_value
|
|
except Exception as e:
|
|
# Fallback calculation failed, continue
|
|
if return_detailed:
|
|
return UnitResult(
|
|
value=None,
|
|
normalized_unit=None,
|
|
original_unit=target_unit or "",
|
|
success=False,
|
|
error_reason=f"Fallback calculation failed: {str(e)}"
|
|
)
|
|
|
|
# No value found
|
|
if return_detailed:
|
|
return UnitResult(
|
|
value=None,
|
|
normalized_unit=None,
|
|
original_unit=target_unit or "",
|
|
success=False,
|
|
error_reason="No matching concept found",
|
|
suggestions=["Try checking if company uses alternative concept names"]
|
|
)
|
|
|
|
return None
|
|
|
|
def _calculate_revenue_from_components(self, period: Optional[str] = None, unit: str = 'USD') -> Optional[float]:
|
|
"""
|
|
Calculate revenue from Gross Profit + Cost of Revenue when explicit revenue not available.
|
|
|
|
This follows the same logic as the enhanced_statement.py revenue deduplication.
|
|
"""
|
|
from edgar.entity.unit_handling import UnitNormalizer
|
|
|
|
gross_profit_fact = self.get_fact('GrossProfit', period)
|
|
cost_of_revenue_fact = self.get_fact('CostOfRevenue', period)
|
|
|
|
# Try alternative cost concepts
|
|
if not cost_of_revenue_fact:
|
|
for cost_concept in ['CostOfGoodsAndServicesSold', 'CostOfGoodsSold', 'CostOfSales']:
|
|
cost_of_revenue_fact = self.get_fact(cost_concept, period)
|
|
if cost_of_revenue_fact:
|
|
break
|
|
|
|
if (gross_profit_fact and cost_of_revenue_fact and
|
|
gross_profit_fact.numeric_value is not None and
|
|
cost_of_revenue_fact.numeric_value is not None):
|
|
|
|
# Use enhanced unit compatibility checking
|
|
gp_result = UnitNormalizer.get_normalized_value(gross_profit_fact, target_unit=unit, apply_scale=True, strict_unit_match=True)
|
|
cr_result = UnitNormalizer.get_normalized_value(cost_of_revenue_fact, target_unit=unit, apply_scale=True, strict_unit_match=True)
|
|
|
|
if gp_result.success and cr_result.success:
|
|
return gp_result.value + cr_result.value
|
|
|
|
# Try compatibility check if direct match failed
|
|
if UnitNormalizer.are_compatible(gross_profit_fact.unit, cost_of_revenue_fact.unit):
|
|
# Same unit type but different representations - try calculation anyway
|
|
gp_normalized = UnitNormalizer.get_normalized_value(gross_profit_fact, apply_scale=True, strict_unit_match=False)
|
|
cr_normalized = UnitNormalizer.get_normalized_value(cost_of_revenue_fact, apply_scale=True, strict_unit_match=False)
|
|
|
|
if gp_normalized.success and cr_normalized.success:
|
|
return gp_normalized.value + cr_normalized.value
|
|
|
|
return None
|
|
|
|
def _calculate_gross_profit_from_components(self, period: Optional[str] = None, unit: str = 'USD') -> Optional[float]:
|
|
"""
|
|
Calculate gross profit from Revenue - Cost of Revenue when explicit gross profit not available.
|
|
"""
|
|
from edgar.entity.unit_handling import UnitNormalizer
|
|
|
|
# Try to get revenue using standardized method (but avoid infinite recursion)
|
|
revenue_fact = None
|
|
for concept in ['RevenueFromContractWithCustomerExcludingAssessedTax', 'SalesRevenueNet', 'Revenues', 'Revenue']:
|
|
revenue_fact = self.get_fact(concept, period)
|
|
if revenue_fact:
|
|
break
|
|
|
|
cost_of_revenue_fact = self.get_fact('CostOfRevenue', period)
|
|
|
|
# Try alternative cost concepts
|
|
if not cost_of_revenue_fact:
|
|
for cost_concept in ['CostOfGoodsAndServicesSold', 'CostOfGoodsSold', 'CostOfSales']:
|
|
cost_of_revenue_fact = self.get_fact(cost_concept, period)
|
|
if cost_of_revenue_fact:
|
|
break
|
|
|
|
if (revenue_fact and cost_of_revenue_fact and
|
|
revenue_fact.numeric_value is not None and
|
|
cost_of_revenue_fact.numeric_value is not None):
|
|
|
|
# Use enhanced unit compatibility checking
|
|
rev_result = UnitNormalizer.get_normalized_value(revenue_fact, target_unit=unit, apply_scale=True)
|
|
cr_result = UnitNormalizer.get_normalized_value(cost_of_revenue_fact, target_unit=unit, apply_scale=True)
|
|
|
|
if rev_result.success and cr_result.success:
|
|
return rev_result.value - cr_result.value
|
|
|
|
# Try compatibility check if direct match failed
|
|
if UnitNormalizer.are_compatible(revenue_fact.unit, cost_of_revenue_fact.unit):
|
|
# Same unit type but different representations - try calculation anyway
|
|
rev_normalized = UnitNormalizer.get_normalized_value(revenue_fact, apply_scale=True)
|
|
cr_normalized = UnitNormalizer.get_normalized_value(cost_of_revenue_fact, apply_scale=True)
|
|
|
|
if rev_normalized.success and cr_normalized.success:
|
|
return rev_normalized.value - cr_normalized.value
|
|
|
|
return None
|
|
|
|
def get_concept_mapping_info(self, concept_variants: List[str]) -> Dict[str, Any]:
|
|
"""
|
|
Get information about which concept variants are available for this company.
|
|
|
|
Useful for debugging standardized method behavior and understanding
|
|
company-specific concept usage.
|
|
|
|
Args:
|
|
concept_variants: List of concept names to check
|
|
|
|
Returns:
|
|
Dictionary with availability and confidence information
|
|
|
|
Example:
|
|
>>> info = facts.get_concept_mapping_info(['Revenue', 'Revenues', 'NetSales'])
|
|
>>> print(f"Available concepts: {info['available']}")
|
|
"""
|
|
info = {
|
|
'available': [],
|
|
'missing': [],
|
|
'fact_details': {}
|
|
}
|
|
|
|
for concept in concept_variants:
|
|
fact = self.get_fact(concept)
|
|
if fact:
|
|
info['available'].append(concept)
|
|
info['fact_details'][concept] = {
|
|
'label': fact.label,
|
|
'unit': fact.unit,
|
|
'latest_period': f"{fact.fiscal_period} {fact.fiscal_year}",
|
|
'latest_value': fact.numeric_value,
|
|
'filing_date': fact.filing_date
|
|
}
|
|
else:
|
|
info['missing'].append(concept)
|
|
|
|
return info
|
|
|
|
# Enhanced methods with detailed unit information (FEAT-411 Unit Handling)
|
|
def get_revenue_detailed(self, period: Optional[str] = None, unit: Optional[str] = None):
|
|
"""
|
|
Get revenue with detailed unit information and error reporting.
|
|
|
|
Args:
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
unit: Optional unit filter (defaults to USD)
|
|
|
|
Returns:
|
|
UnitResult with value, unit info, and error details
|
|
|
|
Example:
|
|
>>> result = facts.get_revenue_detailed()
|
|
>>> if result.success:
|
|
... print(f"Revenue: ${result.value/1e9:.1f}B (unit: {result.normalized_unit})")
|
|
... else:
|
|
... print(f"Error: {result.error_reason}")
|
|
... for suggestion in result.suggestions:
|
|
... print(f" - {suggestion}")
|
|
"""
|
|
return self._get_standardized_concept_value(
|
|
concept_variants=[
|
|
'RevenueFromContractWithCustomerExcludingAssessedTax',
|
|
'SalesRevenueNet',
|
|
'Revenues',
|
|
'Revenue',
|
|
'TotalRevenues',
|
|
'NetSales'
|
|
],
|
|
period=period,
|
|
unit=unit,
|
|
fallback_calculation=self._calculate_revenue_from_components,
|
|
return_detailed=True
|
|
)
|
|
|
|
def get_net_income_detailed(self, period: Optional[str] = None, unit: Optional[str] = None):
|
|
"""
|
|
Get net income with detailed unit information and error reporting.
|
|
|
|
Args:
|
|
period: Optional period in format "YYYY-QN" or "YYYY-FY"
|
|
unit: Optional unit filter (defaults to USD)
|
|
|
|
Returns:
|
|
UnitResult with value, unit info, and error details
|
|
"""
|
|
return self._get_standardized_concept_value(
|
|
concept_variants=[
|
|
'NetIncomeLoss',
|
|
'ProfitLoss',
|
|
'NetIncome',
|
|
'NetEarnings',
|
|
'NetIncomeLossAttributableToParent'
|
|
],
|
|
period=period,
|
|
unit=unit,
|
|
return_detailed=True
|
|
)
|
|
|
|
def check_unit_compatibility(self, concept1: str, concept2: str, period: Optional[str] = None) -> Dict[str, Any]:
|
|
"""
|
|
Check unit compatibility between two concepts for calculations.
|
|
|
|
Args:
|
|
concept1: First concept name
|
|
concept2: Second concept name
|
|
period: Optional period filter
|
|
|
|
Returns:
|
|
Dictionary with compatibility info and suggestions
|
|
|
|
Example:
|
|
>>> compat = facts.check_unit_compatibility('Revenue', 'CostOfRevenue')
|
|
>>> if compat['compatible']:
|
|
... print("Units are compatible for calculations")
|
|
... else:
|
|
... print(f"Unit issue: {compat['issue']}")
|
|
"""
|
|
from edgar.entity.unit_handling import UnitNormalizer
|
|
|
|
fact1 = self.get_fact(concept1, period)
|
|
fact2 = self.get_fact(concept2, period)
|
|
|
|
result = {
|
|
'compatible': False,
|
|
'concept1': concept1,
|
|
'concept2': concept2,
|
|
'fact1_found': fact1 is not None,
|
|
'fact2_found': fact2 is not None,
|
|
'issue': None,
|
|
'suggestions': []
|
|
}
|
|
|
|
if not fact1:
|
|
result['issue'] = f"Concept '{concept1}' not found"
|
|
result['suggestions'].append(f"Check if {concept1} exists for this company")
|
|
return result
|
|
|
|
if not fact2:
|
|
result['issue'] = f"Concept '{concept2}' not found"
|
|
result['suggestions'].append(f"Check if {concept2} exists for this company")
|
|
return result
|
|
|
|
# Check unit compatibility
|
|
compatible = UnitNormalizer.are_compatible(fact1.unit, fact2.unit)
|
|
result['compatible'] = compatible
|
|
|
|
result['fact1_unit'] = fact1.unit
|
|
result['fact2_unit'] = fact2.unit
|
|
result['fact1_normalized'] = UnitNormalizer.normalize_unit(fact1.unit)
|
|
result['fact2_normalized'] = UnitNormalizer.normalize_unit(fact2.unit)
|
|
|
|
if not compatible:
|
|
result['issue'] = f"Incompatible units: {fact1.unit} vs {fact2.unit}"
|
|
|
|
unit1_type = UnitNormalizer.get_unit_type(fact1.unit)
|
|
unit2_type = UnitNormalizer.get_unit_type(fact2.unit)
|
|
|
|
if unit1_type != unit2_type:
|
|
result['suggestions'].append(f"Unit type mismatch: {unit1_type.value} vs {unit2_type.value}")
|
|
else:
|
|
result['suggestions'].append("Same unit type but different representations")
|
|
|
|
return result
|