Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,403 @@
from typing import Any, Dict, Optional, Union
import pandas as pd
from edgar.core import log
from edgar.richtools import repr_rich
from edgar.xbrl import XBRL, XBRLS, Statement
from edgar.xbrl.xbrl import XBRLFilingWithNoXbrlData
class Financials:
def __init__(self, xb: Optional[XBRL]):
self.xb: XBRL = xb
@classmethod
def extract(cls, filing) -> Optional["Financials"]:
try:
xb = XBRL.from_filing(filing)
return Financials(xb)
except XBRLFilingWithNoXbrlData as e:
# Handle the case where the filing does not have XBRL data
log.warning(f"Filing {filing} does not contain XBRL data: {e}")
return None
def balance_sheet(self):
if self.xb is None:
return None
return self.xb.statements.balance_sheet()
def income_statement(self):
if self.xb is None:
return None
return self.xb.statements.income_statement()
def cashflow_statement(self):
if self.xb is None:
return None
return self.xb.statements.cashflow_statement()
def statement_of_equity(self):
if self.xb is None:
return None
return self.xb.statements.statement_of_equity()
def comprehensive_income(self):
if self.xb is None:
return None
return self.xb.statements.comprehensive_income()
def cover(self):
if self.xb is None:
return None
return self.xb.statements.cover_page()
# Standardized Financial Data Accessor Methods
# These methods provide easy access to common financial metrics
# using standardized labels across different companies
def _get_standardized_concept_value(self, statement_type: str, concept_patterns: list,
period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Helper method to extract standardized concept values from financial statements.
Args:
statement_type: Type of statement ('income', 'balance', 'cashflow')
concept_patterns: List of label patterns to search for (case-insensitive)
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
The concept value if found, None otherwise
"""
if self.xb is None:
return None
try:
# Get the appropriate statement
if statement_type == 'income':
statement = self.income_statement()
elif statement_type == 'balance':
statement = self.balance_sheet()
elif statement_type == 'cashflow':
statement = self.cashflow_statement()
else:
return None
if statement is None:
return None
# Render with standardization enabled
rendered = statement.render(standard=True)
df = rendered.to_dataframe()
if df.empty:
return None
# Find the concept using pattern matching
for pattern in concept_patterns:
matches = df[df['label'].str.contains(pattern, case=False, na=False)]
if not matches.empty:
# Get available period columns (excluding metadata columns)
period_columns = [col for col in df.columns if col not in ['concept', 'label', 'level', 'abstract', 'dimension']]
if len(period_columns) > period_offset:
period_col = period_columns[period_offset]
value = matches.iloc[0][period_col]
# Convert to numeric if possible
if pd.notna(value) and value != '':
try:
return float(value) if '.' in str(value) else int(value)
except (ValueError, TypeError):
return value
return value
return None
except Exception as e:
log.debug(f"Error getting standardized concept value: {e}")
return None
def get_revenue(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Get revenue from the income statement using standardized labels.
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Revenue value if found, None otherwise
Example:
>>> company = Company('AAPL')
>>> financials = company.get_financials()
>>> revenue = financials.get_revenue() # Most recent revenue
>>> prev_revenue = financials.get_revenue(1) # Previous period revenue
"""
patterns = [
r'Revenue$', # Exact match for "Revenue"
r'^Revenue', # Starts with "Revenue"
r'Contract Revenue', # Common standardized label
r'Sales Revenue', # Alternative form
r'Total Revenue', # Comprehensive revenue
r'Net Revenue' # Net form
]
return self._get_standardized_concept_value('income', patterns, period_offset)
def get_net_income(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Get net income from the income statement using standardized labels.
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Net income value if found, None otherwise
Example:
>>> company = Company('AAPL')
>>> financials = company.get_financials()
>>> net_income = financials.get_net_income()
"""
patterns = [
r'Net Income$', # Exact match
r'^Net Income', # Starts with
r'Net Income.*Common', # Net income attributable to common
r'Net Income.*Shareholders', # Net income attributable to shareholders
r'Profit.*Loss', # International variations
r'Net Earnings' # Alternative terminology
]
return self._get_standardized_concept_value('income', patterns, period_offset)
def get_total_assets(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Get total assets from the balance sheet using standardized labels.
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Total assets value if found, None otherwise
Example:
>>> company = Company('AAPL')
>>> financials = company.get_financials()
>>> total_assets = financials.get_total_assets()
"""
patterns = [
r'Total Assets$', # Exact match
r'^Total Assets', # Starts with
r'Assets$' # Just "Assets"
]
return self._get_standardized_concept_value('balance', patterns, period_offset)
def get_total_liabilities(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Get total liabilities from the balance sheet using standardized labels.
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Total liabilities value if found, None otherwise
"""
patterns = [
r'Total Liabilities$',
r'^Total Liabilities',
r'Liabilities$'
]
return self._get_standardized_concept_value('balance', patterns, period_offset)
def get_stockholders_equity(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Get stockholders' equity from the balance sheet using standardized labels.
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Stockholders' equity value if found, None otherwise
"""
patterns = [
r'Total.*Stockholders.*Equity',
r'Stockholders.*Equity$',
r'Shareholders.*Equity',
r'Total.*Equity$',
r'^Equity$'
]
return self._get_standardized_concept_value('balance', patterns, period_offset)
def get_operating_cash_flow(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Get operating cash flow from the cash flow statement using standardized labels.
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Operating cash flow value if found, None otherwise
"""
patterns = [
r'Net Cash.*Operations',
r'Operating.*Cash.*Flow',
r'Cash.*Operations',
r'Net Cash.*Operating'
]
return self._get_standardized_concept_value('cashflow', patterns, period_offset)
def get_free_cash_flow(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Calculate free cash flow (Operating Cash Flow - Capital Expenditures).
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Free cash flow if both components found, None otherwise
"""
operating_cf = self.get_operating_cash_flow(period_offset)
capex = self.get_capital_expenditures(period_offset)
if operating_cf is not None and capex is not None:
# CapEx is usually negative, so we subtract it (making FCF = OCF - |CapEx|)
return operating_cf - abs(capex)
return None
def get_capital_expenditures(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Get capital expenditures from the cash flow statement using standardized labels.
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Capital expenditures value if found, None otherwise
"""
patterns = [
r'Capital Expenditures',
r'Property.*Plant.*Equipment',
r'Payments.*Property',
r'Acquisitions.*Property',
r'Capex'
]
return self._get_standardized_concept_value('cashflow', patterns, period_offset)
def get_current_assets(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Get current assets from the balance sheet using standardized labels.
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Current assets value if found, None otherwise
"""
patterns = [
r'Total Current Assets',
r'^Current Assets',
r'Assets Current'
]
return self._get_standardized_concept_value('balance', patterns, period_offset)
def get_current_liabilities(self, period_offset: int = 0) -> Optional[Union[int, float]]:
"""
Get current liabilities from the balance sheet using standardized labels.
Args:
period_offset: Which period to get (0=most recent, 1=previous, etc.)
Returns:
Current liabilities value if found, None otherwise
"""
patterns = [
r'Total Current Liabilities',
r'^Current Liabilities',
r'Liabilities Current'
]
return self._get_standardized_concept_value('balance', patterns, period_offset)
def get_financial_metrics(self) -> Dict[str, Any]:
"""
Get a dictionary of common financial metrics using standardized labels.
Returns:
Dictionary containing available financial metrics
Example:
>>> company = Company('AAPL')
>>> financials = company.get_financials()
>>> metrics = financials.get_financial_metrics()
>>> print(f"Revenue: ${metrics.get('revenue', 'N/A'):,}")
"""
metrics = {}
# Income Statement Metrics
metrics['revenue'] = self.get_revenue()
metrics['net_income'] = self.get_net_income()
# Balance Sheet Metrics
metrics['total_assets'] = self.get_total_assets()
metrics['total_liabilities'] = self.get_total_liabilities()
metrics['stockholders_equity'] = self.get_stockholders_equity()
metrics['current_assets'] = self.get_current_assets()
metrics['current_liabilities'] = self.get_current_liabilities()
# Cash Flow Metrics
metrics['operating_cash_flow'] = self.get_operating_cash_flow()
metrics['capital_expenditures'] = self.get_capital_expenditures()
metrics['free_cash_flow'] = self.get_free_cash_flow()
# Calculate basic ratios if we have the data
if metrics['current_assets'] and metrics['current_liabilities']:
try:
metrics['current_ratio'] = metrics['current_assets'] / metrics['current_liabilities']
except (TypeError, ZeroDivisionError):
metrics['current_ratio'] = None
else:
metrics['current_ratio'] = None
if metrics['total_liabilities'] and metrics['total_assets']:
try:
metrics['debt_to_assets'] = metrics['total_liabilities'] / metrics['total_assets']
except (TypeError, ZeroDivisionError):
metrics['debt_to_assets'] = None
else:
metrics['debt_to_assets'] = None
return metrics
def __rich__(self):
if self.xb is None:
return "No XBRL data available"
return self.xb.__rich__()
def __repr__(self):
return repr_rich(self.__rich__())
class MultiFinancials:
"""
Merges the financial statements from multiple periods into a single financials.
"""
def __init__(self, xbs: XBRLS):
self.xbs = xbs
@classmethod
def extract(cls, filings) -> "MultiFinancials":
return cls(XBRLS.from_filings(filings))
def balance_sheet(self) -> Optional[Statement]:
return self.xbs.statements.balance_sheet()
def income_statement(self) -> Optional[Statement]:
return self.xbs.statements.income_statement()
def cashflow_statement(self) -> Optional[Statement]:
return self.xbs.statements.cashflow_statement()
def __rich__(self):
return self.xbs.__rich__()
def __repr__(self):
return repr_rich(self.__rich__())