1514 lines
55 KiB
Python
1514 lines
55 KiB
Python
"""
|
|
Facts module for querying XBRL facts.
|
|
|
|
This module provides a powerful interface for querying XBRL facts based on various
|
|
attributes including concept, value, dimension, dates, statement, and more.
|
|
It enables convenient retrieval of facts as pandas DataFrames for analysis.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from decimal import Decimal
|
|
from functools import lru_cache
|
|
from textwrap import dedent
|
|
from typing import Any, Callable, Dict, List, Optional, Set, Union
|
|
|
|
import pandas as pd
|
|
from rich import box
|
|
from rich.console import Group
|
|
from rich.markdown import Markdown
|
|
from rich.panel import Panel
|
|
from rich.table import Column, Table
|
|
from rich.text import Text
|
|
|
|
from edgar.richtools import repr_rich
|
|
from edgar.xbrl.core import STANDARD_LABEL, parse_date
|
|
from edgar.xbrl.models import select_display_label
|
|
|
|
|
|
class FactQuery:
|
|
"""
|
|
A query builder for XBRL facts that enables filtering by various attributes.
|
|
|
|
This class provides a fluent interface for building queries against XBRL facts,
|
|
allowing filtering by concept, value, period, dimensions, and other attributes.
|
|
"""
|
|
|
|
def __init__(self, facts_view: FactsView):
|
|
"""
|
|
Initialize a new fact query.
|
|
|
|
Args:
|
|
facts_view: The FactsView instance to query against
|
|
"""
|
|
self._facts_view = facts_view
|
|
self._filters = []
|
|
self._transformations = []
|
|
self._aggregations = []
|
|
self._include_dimensions = True
|
|
self._include_contexts = True
|
|
self._include_element_info = True
|
|
self._sort_by = None
|
|
self._sort_ascending = True
|
|
self._limit = None
|
|
self._statement_type = None
|
|
|
|
def by_concept(self, pattern: str, exact: bool = False) -> FactQuery:
|
|
"""
|
|
Filter facts by concept name.
|
|
|
|
Args:
|
|
pattern: Pattern to match against concept names
|
|
exact: If True, require exact match; otherwise, use regex pattern matching
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
pattern = pattern.replace('_', ':') # Normalize underscores to colons for concept names
|
|
if exact:
|
|
self._filters.append(lambda f: f['concept'] == pattern)
|
|
else:
|
|
regex = re.compile(pattern, re.IGNORECASE)
|
|
self._filters.append(lambda f: bool(regex.search(f['concept'])))
|
|
return self
|
|
|
|
def by_label(self, pattern: str, exact: bool = False) -> FactQuery:
|
|
"""
|
|
Filter facts by element label.
|
|
|
|
This method searches across different label fields, including both the standardized label
|
|
(if standardization was applied) and the original label. This ensures you can query by either
|
|
the standardized label or the original company-specific label.
|
|
|
|
Args:
|
|
pattern: Pattern to match against element labels
|
|
exact: If True, require exact match; otherwise, use regex pattern matching
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
if exact:
|
|
# Try multiple label fields with exact matching
|
|
self._filters.append(lambda f:
|
|
('label' in f and f['label'] == pattern) or
|
|
('element_label' in f and f['element_label'] == pattern) or
|
|
# Also check original_label (present when standardization has been applied)
|
|
('original_label' in f and f['original_label'] == pattern)
|
|
)
|
|
else:
|
|
# Use regex pattern matching across multiple label fields
|
|
regex = re.compile(pattern, re.IGNORECASE)
|
|
self._filters.append(lambda f:
|
|
('label' in f and f['label'] is not None and bool(regex.search(str(f['label'])))) or
|
|
('element_label' in f and f['element_label'] is not None and
|
|
bool(regex.search(str(f['element_label'])))) or
|
|
# Also check original_label with regex
|
|
('original_label' in f and f['original_label'] is not None and
|
|
bool(regex.search(str(f['original_label']))))
|
|
)
|
|
return self
|
|
|
|
def by_value(self, value_filter: Union[Callable, str, int, float, list, tuple]) -> FactQuery:
|
|
"""
|
|
Filter facts by value.
|
|
|
|
Args:
|
|
value_filter: Can be:
|
|
- A callable predicate that takes a value and returns bool
|
|
- A specific value to match exactly
|
|
- A tuple or list of (min, max) for range filtering
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
if callable(value_filter):
|
|
def numeric_value_filter(f):
|
|
return ('numeric_value' in f and
|
|
f['numeric_value'] is not None and
|
|
value_filter(f['numeric_value']))
|
|
|
|
self._filters.append(numeric_value_filter)
|
|
elif isinstance(value_filter, (list, tuple)) and len(value_filter) == 2:
|
|
min_val, max_val = value_filter
|
|
|
|
def numeric_range_filter(f):
|
|
return ('numeric_value' in f and
|
|
f['numeric_value'] is not None and
|
|
min_val <= f['numeric_value'] <= max_val)
|
|
|
|
self._filters.append(numeric_range_filter)
|
|
else:
|
|
def numeric_equality_filter(f):
|
|
return ('numeric_value' in f and
|
|
f['numeric_value'] is not None and
|
|
f['numeric_value'] == value_filter)
|
|
|
|
self._filters.append(numeric_equality_filter)
|
|
return self
|
|
|
|
def by_period_type(self, period_type: str) -> FactQuery:
|
|
"""
|
|
Filter facts by period type ('instant' or 'duration').
|
|
|
|
Args:
|
|
period_type: Period type to filter by
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
|
|
def period_type_filter(f):
|
|
return 'period_type' in f and f['period_type'] == period_type
|
|
|
|
self._filters.append(period_type_filter)
|
|
return self
|
|
|
|
def by_period_key(self, period_key: str) -> FactQuery:
|
|
"""
|
|
Filter facts by a specific period key.
|
|
|
|
Args:
|
|
period_key: Period key to filter by (e.g., "instant_2023-12-31")
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._filters.append(lambda f: 'period_key' in f and f['period_key'] == period_key)
|
|
return self
|
|
|
|
def by_period_keys(self, period_keys: List[str]) -> FactQuery:
|
|
"""
|
|
Filter facts by a list of period keys.
|
|
|
|
Args:
|
|
period_keys: List of period keys to filter by
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._filters.append(lambda f: 'period_key' in f and f['period_key'] in period_keys)
|
|
return self
|
|
|
|
def by_instant_date(self, date_str: str, exact: bool = True) -> FactQuery:
|
|
"""
|
|
Filter facts by instant date.
|
|
|
|
Args:
|
|
date_str: Date string in YYYY-MM-DD format
|
|
exact: If True, require exact match; if False, match facts with date less than or equal to date_str
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
if exact:
|
|
self._filters.append(lambda f: 'period_instant' in f and f['period_instant'] == date_str)
|
|
else:
|
|
date_obj = parse_date(date_str)
|
|
self._filters.append(lambda f: 'period_instant' in f and
|
|
parse_date(f['period_instant']) <= date_obj)
|
|
return self
|
|
|
|
def by_date_range(self, start_date: Optional[str] = None,
|
|
end_date: Optional[str] = None) -> FactQuery:
|
|
"""
|
|
Filter facts by date range.
|
|
|
|
Args:
|
|
start_date: Optional start date string in YYYY-MM-DD format
|
|
end_date: Optional end date string in YYYY-MM-DD format
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
if start_date and end_date:
|
|
# Match duration facts that fall within the date range
|
|
start_obj = parse_date(start_date)
|
|
end_obj = parse_date(end_date)
|
|
self._filters.append(lambda f:
|
|
('period_start' in f and 'period_end' in f and
|
|
parse_date(f['period_start']) >= start_obj and
|
|
parse_date(f['period_end']) <= end_obj))
|
|
elif start_date:
|
|
# Match duration facts that start on or after start_date
|
|
start_obj = parse_date(start_date)
|
|
self._filters.append(lambda f:
|
|
('period_start' in f and
|
|
parse_date(f['period_start']) >= start_obj))
|
|
elif end_date:
|
|
# Match duration facts that end on or before end_date
|
|
end_obj = parse_date(end_date)
|
|
self._filters.append(lambda f:
|
|
('period_end' in f and
|
|
parse_date(f['period_end']) <= end_obj))
|
|
return self
|
|
|
|
def by_dimension(self, dimension: Optional[str], value: Optional[str] = None) -> FactQuery:
|
|
"""
|
|
Filter facts by dimension with flexible matching.
|
|
|
|
This method provides intelligent matching for dimension names and values, handling
|
|
common XBRL formatting variations including:
|
|
- Namespace prefixes (us-gaap:, srt:, etc.)
|
|
- Underscore vs colon separators
|
|
- Partial dimension names
|
|
|
|
Args:
|
|
dimension: Dimension name (supports multiple formats), or None to filter for facts with no dimensions
|
|
value: Optional dimension value to filter by (supports multiple formats)
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
|
|
Examples:
|
|
# These are all equivalent:
|
|
.by_dimension("srt_ProductOrServiceAxis", "us-gaap:ServiceMember")
|
|
.by_dimension("srt:ProductOrServiceAxis", "us-gaap_ServiceMember")
|
|
.by_dimension("ProductOrServiceAxis", "ServiceMember")
|
|
"""
|
|
if dimension is None:
|
|
# Filter for facts with no dimensions
|
|
self._filters.append(lambda f: not any(key.startswith('dim_') for key in f.keys()))
|
|
return self
|
|
|
|
# Normalize the input dimension to match stored format
|
|
normalized_dim = self._normalize_dimension_key(dimension)
|
|
|
|
if value is not None:
|
|
# Normalize the value as well
|
|
normalized_value = self._normalize_dimension_value(value)
|
|
|
|
def dimension_filter_with_value(f):
|
|
# Try exact match first
|
|
if f'dim_{normalized_dim}' in f and f[f'dim_{normalized_dim}'] == normalized_value:
|
|
return True
|
|
|
|
# Try flexible matching for dimensions
|
|
for dim_key, dim_value in f.items():
|
|
if not dim_key.startswith('dim_'):
|
|
continue
|
|
|
|
# Check if this dimension key matches (flexible)
|
|
if self._dimension_key_matches(dim_key, dimension):
|
|
# Check if the value matches (flexible)
|
|
if self._dimension_value_matches(dim_value, value):
|
|
return True
|
|
return False
|
|
|
|
self._filters.append(dimension_filter_with_value)
|
|
else:
|
|
# Filter for facts that have this dimension (any value)
|
|
def dimension_filter_exists(f):
|
|
# Try exact match first
|
|
if f'dim_{normalized_dim}' in f:
|
|
return True
|
|
|
|
# Try flexible matching
|
|
for dim_key in f.keys():
|
|
if dim_key.startswith('dim_') and self._dimension_key_matches(dim_key, dimension):
|
|
return True
|
|
return False
|
|
|
|
self._filters.append(dimension_filter_exists)
|
|
|
|
return self
|
|
|
|
def _normalize_dimension_key(self, dimension: str) -> str:
|
|
"""Normalize dimension key to the format used internally (underscores)."""
|
|
# Replace colons with underscores (us-gaap:Axis -> us-gaap_Axis)
|
|
return dimension.replace(':', '_')
|
|
|
|
def _normalize_dimension_value(self, value: str) -> str:
|
|
"""Normalize dimension value to the format used internally."""
|
|
# Replace underscores with colons for values (us-gaap_Member -> us-gaap:Member)
|
|
return value.replace('_', ':')
|
|
|
|
def _dimension_key_matches(self, stored_key: str, query_key: str) -> bool:
|
|
"""
|
|
Check if a stored dimension key matches a query key with flexible matching.
|
|
|
|
Args:
|
|
stored_key: The dimension key as stored (e.g., 'dim_us-gaap_ProductAxis')
|
|
query_key: The dimension key from the query (e.g., 'ProductAxis' or 'us-gaap:ProductAxis')
|
|
|
|
Returns:
|
|
True if the keys match
|
|
"""
|
|
# Remove 'dim_' prefix from stored key
|
|
stored_clean = stored_key[4:] if stored_key.startswith('dim_') else stored_key
|
|
|
|
# Normalize both keys
|
|
stored_normalized = stored_clean.replace(':', '_').replace('-', '_')
|
|
query_normalized = query_key.replace(':', '_').replace('-', '_')
|
|
|
|
# Try exact match
|
|
if stored_normalized == query_normalized:
|
|
return True
|
|
|
|
# Try partial match (query might be just the local name without namespace)
|
|
if '_' in stored_normalized:
|
|
# Extract local name (part after last underscore)
|
|
stored_local = stored_normalized.split('_')[-1]
|
|
query_local = query_normalized.split('_')[-1]
|
|
if stored_local == query_local:
|
|
return True
|
|
|
|
return False
|
|
|
|
def _dimension_value_matches(self, stored_value: str, query_value: str) -> bool:
|
|
"""
|
|
Check if a stored dimension value matches a query value with flexible matching.
|
|
|
|
Args:
|
|
stored_value: The dimension value as stored (e.g., 'us-gaap:ServiceMember')
|
|
query_value: The dimension value from query (e.g., 'ServiceMember' or 'us-gaap_ServiceMember')
|
|
|
|
Returns:
|
|
True if the values match
|
|
"""
|
|
if not stored_value or not query_value:
|
|
return stored_value == query_value
|
|
|
|
# Normalize both values (handle colon/underscore variations)
|
|
stored_normalized = stored_value.replace('_', ':').replace('-', '_')
|
|
query_normalized = query_value.replace('_', ':').replace('-', '_')
|
|
|
|
# Try exact match
|
|
if stored_normalized == query_normalized:
|
|
return True
|
|
|
|
# Try partial match (query might be just the local name without namespace)
|
|
if ':' in stored_normalized:
|
|
stored_local = stored_normalized.split(':')[-1]
|
|
query_local = query_normalized.split(':')[-1] if ':' in query_normalized else query_normalized
|
|
if stored_local == query_local:
|
|
return True
|
|
|
|
return False
|
|
|
|
def by_statement_type(self, statement_type: str) -> FactQuery:
|
|
"""
|
|
Filter facts by statement type.
|
|
|
|
Args:
|
|
statement_type: Statement type ('BalanceSheet', 'IncomeStatement', etc.)
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._filters.append(lambda f: 'statement_type' in f and f['statement_type'] == statement_type)
|
|
return self
|
|
|
|
def by_fiscal_period(self, fiscal_period: str) -> FactQuery:
|
|
"""
|
|
Filter facts by fiscal period (FY, Q1, Q2, Q3, Q4).
|
|
|
|
Args:
|
|
fiscal_period: Fiscal period identifier
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._filters.append(lambda f: 'fiscal_period' in f and f['fiscal_period'] == fiscal_period)
|
|
return self
|
|
|
|
def by_fiscal_year(self, fiscal_year: Union[int, str]) -> FactQuery:
|
|
"""
|
|
Filter facts by fiscal year.
|
|
|
|
Args:
|
|
fiscal_year: Fiscal year to filter by
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._filters.append(lambda f: 'fiscal_year' in f and str(f['fiscal_year']) == str(fiscal_year))
|
|
return self
|
|
|
|
def by_unit(self, unit: str) -> FactQuery:
|
|
"""
|
|
Filter facts by unit reference.
|
|
|
|
Args:
|
|
unit: Unit reference to filter by
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._filters.append(lambda f: 'unit_ref' in f and f['unit_ref'] == unit)
|
|
return self
|
|
|
|
def by_custom(self, filter_func: Callable) -> FactQuery:
|
|
"""
|
|
Add a custom filter function.
|
|
|
|
Args:
|
|
filter_func: Custom filter function that takes a fact dict and returns bool
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._filters.append(filter_func)
|
|
return self
|
|
|
|
def by_text(self, pattern: str) -> FactQuery:
|
|
"""
|
|
Search across concept names, labels, and element names for a pattern.
|
|
|
|
This is a flexible search that looks for the pattern in all text fields, including
|
|
both standardized labels and original labels when standardization has been applied.
|
|
|
|
Args:
|
|
pattern: Pattern to search for in various text fields
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
regex = re.compile(pattern, re.IGNORECASE)
|
|
|
|
def text_filter(f):
|
|
# Search in concept name
|
|
if 'concept' in f and f['concept'] is not None and regex.search(str(f['concept'])):
|
|
return True
|
|
|
|
# Search in label
|
|
if 'label' in f and f['label'] is not None and regex.search(str(f['label'])):
|
|
return True
|
|
|
|
# Search in element_label
|
|
if 'element_label' in f and f['element_label'] is not None and regex.search(str(f['element_label'])):
|
|
return True
|
|
|
|
# Search in element_name
|
|
if 'element_name' in f and f['element_name'] is not None and regex.search(str(f['element_name'])):
|
|
return True
|
|
|
|
# Search in original_label (present when standardization has been applied)
|
|
if 'original_label' in f and f['original_label'] is not None and regex.search(str(f['original_label'])):
|
|
return True
|
|
|
|
return False
|
|
|
|
self._filters.append(text_filter)
|
|
return self
|
|
|
|
def exclude_dimensions(self) -> FactQuery:
|
|
"""
|
|
Exclude dimension columns from results.
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._include_dimensions = False
|
|
return self
|
|
|
|
def exclude_contexts(self) -> FactQuery:
|
|
"""
|
|
Exclude context information from results.
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._include_contexts = False
|
|
return self
|
|
|
|
def exclude_element_info(self) -> FactQuery:
|
|
"""
|
|
Exclude element catalog information from results.
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._include_element_info = False
|
|
return self
|
|
|
|
def sort_by(self, column: str, ascending: bool = True) -> FactQuery:
|
|
"""
|
|
Set sorting for results.
|
|
|
|
Args:
|
|
column: Column name to sort by
|
|
ascending: Sort order (True for ascending, False for descending)
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._sort_by = column
|
|
self._sort_ascending = ascending
|
|
return self
|
|
|
|
def limit(self, n: int) -> FactQuery:
|
|
"""
|
|
Limit the number of results.
|
|
|
|
Args:
|
|
n: Maximum number of results to return
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._limit = n
|
|
return self
|
|
|
|
def from_statement(self, statement_type: str) -> 'FactQuery':
|
|
"""
|
|
Filter facts to only those from a specific statement.
|
|
|
|
Args:
|
|
statement_type: Type of statement (e.g., 'BalanceSheet', 'IncomeStatement')
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._statement_type = statement_type
|
|
self._filters.append(lambda f: f.get('statement_type') == statement_type)
|
|
return self
|
|
|
|
def transform(self, transform_fn: Callable[[Any], Any]) -> 'FactQuery':
|
|
"""
|
|
Transform fact values using a custom function.
|
|
|
|
Args:
|
|
transform_fn: Function to transform values
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._transformations.append(transform_fn)
|
|
return self
|
|
|
|
def scale(self, scale_factor: int) -> 'FactQuery':
|
|
"""
|
|
Scale numeric values by a factor.
|
|
|
|
Args:
|
|
scale_factor: The scaling factor (e.g., 1000 for thousands)
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
|
|
def scale_transform(value):
|
|
if isinstance(value, (int, float, Decimal)):
|
|
return value / scale_factor
|
|
return value
|
|
|
|
return self.transform(scale_transform)
|
|
|
|
def aggregate(self, dimension: str, func: str = 'sum') -> 'FactQuery':
|
|
"""
|
|
Aggregate values by a dimension.
|
|
|
|
Args:
|
|
dimension: The dimension to aggregate by
|
|
func: Aggregation function ('sum' or 'average')
|
|
|
|
Returns:
|
|
Self for method chaining
|
|
"""
|
|
self._aggregations.append({
|
|
'dimension': dimension,
|
|
'function': func
|
|
})
|
|
return self
|
|
|
|
def execute(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Execute the query and return matching facts.
|
|
|
|
Returns:
|
|
List of fact dictionaries
|
|
"""
|
|
results = self._facts_view.get_facts()
|
|
|
|
# Apply filters
|
|
for filter_func in self._filters:
|
|
results = [f for f in results if filter_func(f)]
|
|
|
|
# Apply transformations
|
|
for transform_fn in self._transformations:
|
|
for fact in results:
|
|
if 'value' in fact and fact['value'] is not None:
|
|
fact['value'] = transform_fn(fact['value'])
|
|
|
|
# Apply aggregations
|
|
if self._aggregations:
|
|
aggregated_results = {}
|
|
for agg in self._aggregations:
|
|
dimension = agg['dimension']
|
|
func = agg['function']
|
|
|
|
# Group facts by dimension
|
|
groups = {}
|
|
for fact in results:
|
|
dim_value = fact.get(f'dim_{dimension}')
|
|
if dim_value and 'value' in fact and fact['value'] is not None:
|
|
if dim_value not in groups:
|
|
groups[dim_value] = []
|
|
groups[dim_value].append(fact['value'])
|
|
|
|
# Apply aggregation function
|
|
for dim_value, values in groups.items():
|
|
if func == 'sum':
|
|
agg_value = sum(values)
|
|
elif func == 'average':
|
|
agg_value = sum(values) / len(values)
|
|
|
|
key = (dimension, dim_value)
|
|
if key not in aggregated_results:
|
|
aggregated_results[key] = {'dimension': dimension, 'value': dim_value, 'values': {}}
|
|
aggregated_results[key]['values'][func] = agg_value
|
|
|
|
results = list(aggregated_results.values())
|
|
|
|
# Apply sorting if specified
|
|
if results and self._sort_by and self._sort_by in results[0]:
|
|
results.sort(key=lambda f: f.get(self._sort_by, ''),
|
|
reverse=not self._sort_ascending)
|
|
|
|
# Apply limit if specified
|
|
if self._limit is not None:
|
|
results = results[:self._limit]
|
|
|
|
return results
|
|
|
|
@lru_cache(maxsize=8)
|
|
def to_dataframe(self, *columns) -> pd.DataFrame:
|
|
"""
|
|
Execute the query and return results as a DataFrame.
|
|
:param columns: List of columns to include in the DataFrame
|
|
|
|
Returns:
|
|
pandas DataFrame with query results
|
|
"""
|
|
results = self.execute()
|
|
|
|
if not results:
|
|
return pd.DataFrame()
|
|
|
|
df = pd.DataFrame(results)
|
|
|
|
# Filter columns based on inclusion flags
|
|
if not self._include_dimensions:
|
|
df = df.loc[:, [col for col in df.columns if not col.startswith('dim_')]]
|
|
|
|
if not self._include_contexts:
|
|
context_cols = ['context_ref', 'entity_identifier', 'entity_scheme',
|
|
'period_type']
|
|
df = df.loc[:, [col for col in df.columns if col not in context_cols]]
|
|
|
|
if not self._include_element_info:
|
|
element_cols = ['element_id', 'element_name', 'element_type', 'element_period_type',
|
|
'element_balance', 'element_label']
|
|
df = df.loc[:, [col for col in df.columns if col not in element_cols]]
|
|
|
|
# Drop empty columns
|
|
df = df.dropna(axis=1, how='all')
|
|
|
|
# Filter columns if specified
|
|
if columns:
|
|
columns = [col for col in columns if col in df.columns]
|
|
df = df[list(columns)]
|
|
# skip these columns
|
|
# Note: period_key is now included for time series analysis (Issue #464)
|
|
skip_columns = ['fact_key', 'original_label']
|
|
|
|
if 'statement_role' in df.columns:
|
|
# Change the statement_role to statement name
|
|
df['statement_name'] = df.statement_role.fillna('').apply(lambda s: s.split('/')[-1] if s else None)
|
|
# Remove statement_role column if it exists
|
|
if 'statement_role' in df.columns:
|
|
df = df.drop(columns=['statement_role'])
|
|
|
|
# order columns
|
|
first_columns = [col for col in
|
|
['concept', 'label', 'balance', 'preferred_sign', 'weight', 'value', 'numeric_value',
|
|
'period_key', 'period_start', 'period_end', 'period_instant',
|
|
'decimals', 'statement_type', 'statement_name']
|
|
if col in df.columns]
|
|
columns = first_columns + [col for col in df.columns
|
|
if col not in first_columns
|
|
and col not in skip_columns]
|
|
|
|
return df[columns]
|
|
|
|
def __rich__(self):
|
|
|
|
title = Text.assemble(("Facts Query"),
|
|
)
|
|
subtitle = Text.assemble((self._facts_view.entity_name, "bold deep_sky_blue1"),
|
|
" - ",
|
|
(self._facts_view.document_type)
|
|
)
|
|
df = self.to_dataframe().fillna('')
|
|
columns = df.columns.tolist()
|
|
description = Markdown(
|
|
dedent(f"""
|
|
Use *to_dataframe(columns)* to get a DataFrame of the results.
|
|
|
|
e.g. `query.to_dataframe('concept', 'value', 'period_end')`
|
|
|
|
Available columns:
|
|
'{', '.join(columns)}'
|
|
""")
|
|
)
|
|
|
|
|
|
display_columns = [col for col in ['concept','label', 'value', 'period_start', 'period_end']
|
|
if col in columns]
|
|
# What is the maximum width of the concept column?
|
|
max_width = df.concept.apply(len).max() if 'concept' in df.columns else 20
|
|
rich_columns = [Column('concept', width=max_width)] + display_columns[1:]
|
|
df = df[display_columns]
|
|
table = Table(*rich_columns, show_header=True, header_style="bold", box=box.SIMPLE)
|
|
for t in df.itertuples(index=False):
|
|
row = []
|
|
for i in t:
|
|
row.append(str(i))
|
|
table.add_row(*row)
|
|
|
|
panel = Panel(Group(description, table), title=title, subtitle=subtitle, box=box.ROUNDED)
|
|
return panel
|
|
|
|
def __repr__(self):
|
|
return repr_rich(self.__rich__())
|
|
|
|
|
|
class FactsView:
|
|
"""
|
|
A view over all facts in an XBRL instance, providing methods to query and analyze facts.
|
|
"""
|
|
|
|
def __init__(self, xbrl):
|
|
"""
|
|
Initialize the FactsView with an XBRL instance.
|
|
|
|
Args:
|
|
xbrl: XBRL instance containing facts, contexts, and elements
|
|
"""
|
|
self.xbrl = xbrl
|
|
self._facts_cache = None
|
|
self._facts_df_cache = None
|
|
|
|
def __len__(self):
|
|
return len(self.get_facts())
|
|
|
|
@property
|
|
def entity_name(self):
|
|
return self.xbrl.entity_name
|
|
|
|
@property
|
|
def document_type(self):
|
|
return self.xbrl.document_type
|
|
|
|
def get_facts(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get all facts with enriched context and element information.
|
|
|
|
Returns:
|
|
List of enriched fact dictionaries
|
|
"""
|
|
# Return cached facts if available
|
|
if self._facts_cache is not None:
|
|
return self._facts_cache
|
|
|
|
# Prepare a mapping of roles to statement types for faster lookup
|
|
# This avoids repeated calls to get_all_statements() for each fact
|
|
role_to_statement_type = {}
|
|
statements = self.xbrl.get_all_statements()
|
|
for stmt in statements:
|
|
if stmt['role'] and stmt['type']:
|
|
role_to_statement_type[stmt['role']] = (stmt['type'], stmt['role'])
|
|
|
|
# Prepare a mapping of period keys to fiscal info for faster lookup
|
|
period_to_fiscal_info = {}
|
|
for period in self.xbrl.reporting_periods:
|
|
if 'key' in period:
|
|
fiscal_info = {}
|
|
if 'fiscal_period' in period:
|
|
fiscal_info['fiscal_period'] = period['fiscal_period']
|
|
if 'fiscal_year' in period:
|
|
fiscal_info['fiscal_year'] = period['fiscal_year']
|
|
period_to_fiscal_info[period['key']] = fiscal_info
|
|
|
|
# Build enriched facts from raw facts, contexts, and elements
|
|
enriched_facts = []
|
|
|
|
for fact_key, fact in self.xbrl._facts.items():
|
|
# Create a dict with only necessary fields instead of full model_dump
|
|
fact_dict = {
|
|
'fact_key': fact_key,
|
|
'concept': fact.element_id,
|
|
'context_ref': fact.context_ref,
|
|
'value': fact.value,
|
|
'unit_ref': fact.unit_ref,
|
|
'decimals': fact.decimals,
|
|
'numeric_value': fact.numeric_value
|
|
}
|
|
|
|
# Split element name from context for better concept display
|
|
# Don't override if element_id already has a namespace prefix with colon
|
|
if "_" in fact_key and ":" not in fact_dict['concept']:
|
|
parts = fact_key.split("_", 1)
|
|
if len(parts) == 2:
|
|
fact_dict['concept'] = parts[0]
|
|
|
|
# Add context information
|
|
if fact.context_ref in self.xbrl.contexts:
|
|
context = self.xbrl.contexts[fact.context_ref]
|
|
|
|
# Add period information - extract only what we need
|
|
if context.period:
|
|
# Handle both object and dict representations of period
|
|
# (Model objects are converted to dicts in some contexts)
|
|
if hasattr(context.period, 'type'):
|
|
# Object access
|
|
period_type = context.period.type
|
|
fact_dict['period_type'] = period_type
|
|
if period_type == 'instant':
|
|
fact_dict['period_instant'] = context.period.instant
|
|
elif period_type == 'duration':
|
|
fact_dict['period_start'] = context.period.startDate
|
|
fact_dict['period_end'] = context.period.endDate
|
|
elif isinstance(context.period, dict):
|
|
# Dict access
|
|
period_type = context.period.get('type')
|
|
fact_dict['period_type'] = period_type
|
|
if period_type == 'instant':
|
|
fact_dict['period_instant'] = context.period.get('instant')
|
|
elif period_type == 'duration':
|
|
fact_dict['period_start'] = context.period.get('startDate')
|
|
fact_dict['period_end'] = context.period.get('endDate')
|
|
|
|
# Add entity information - extract only what we need
|
|
if context.entity:
|
|
# Handle both object and dict representations of entity
|
|
if hasattr(context.entity, 'identifier'):
|
|
# Object access
|
|
fact_dict['entity_identifier'] = context.entity.identifier
|
|
fact_dict['entity_scheme'] = context.entity.scheme
|
|
elif isinstance(context.entity, dict):
|
|
# Dict access
|
|
fact_dict['entity_identifier'] = context.entity.get('identifier')
|
|
fact_dict['entity_scheme'] = context.entity.get('scheme')
|
|
|
|
# Add dimensions - handle both object and dict representation
|
|
if hasattr(context, 'dimensions') and context.dimensions:
|
|
# Check if dimensions is a dict or an attribute
|
|
if isinstance(context.dimensions, dict):
|
|
for dim_name, dim_value in context.dimensions.items():
|
|
dim_key = f"dim_{dim_name.replace(':', '_')}"
|
|
fact_dict[dim_key] = dim_value
|
|
elif hasattr(context.dimensions, 'items'):
|
|
# Handle case where dimensions has items() method but isn't a dict
|
|
for dim_name, dim_value in context.dimensions.items():
|
|
dim_key = f"dim_{dim_name.replace(':', '_')}"
|
|
fact_dict[dim_key] = dim_value
|
|
|
|
# Get period key from context_period_map if available
|
|
period_key = self.xbrl.context_period_map.get(fact.context_ref)
|
|
if period_key:
|
|
fact_dict['period_key'] = period_key
|
|
# Add fiscal info if available
|
|
if period_key in period_to_fiscal_info:
|
|
fact_dict.update(period_to_fiscal_info[period_key])
|
|
|
|
# Add element information and statement type
|
|
# Normalize element_id to match catalog keys (replace ':' with '_')
|
|
element_id = fact.element_id.replace(':', '_')
|
|
if element_id in self.xbrl.element_catalog:
|
|
element = self.xbrl.element_catalog[element_id]
|
|
|
|
# First look up preferred_label from presentation trees
|
|
# to ensure label consistency between rendering and facts
|
|
preferred_label = None
|
|
for _role, tree in self.xbrl.presentation_trees.items():
|
|
if element_id in tree.all_nodes:
|
|
# Get presentation node to find preferred_label
|
|
pres_node = tree.all_nodes[element_id]
|
|
if pres_node.preferred_label:
|
|
preferred_label = pres_node.preferred_label
|
|
break # Use the first preferred_label found
|
|
|
|
# Add label using the same selection logic as display_label
|
|
# but including the preferred_label we found above
|
|
label = select_display_label(
|
|
labels=element.labels,
|
|
standard_label=element.labels.get(STANDARD_LABEL),
|
|
preferred_label=preferred_label, # May be None, which is handled by select_display_label
|
|
element_id=element_id,
|
|
element_name=element.name
|
|
)
|
|
|
|
fact_dict['label'] = label
|
|
# Store original label (will be used for standardization comparison)
|
|
fact_dict['original_label'] = label
|
|
|
|
# Add balance from element catalog (Issue #463)
|
|
# Balance indicates accounting classification (debit/credit)
|
|
# Try element catalog first, then fall back to static US-GAAP mapping
|
|
balance = element.balance
|
|
if balance is None:
|
|
# Import here to avoid circular dependencies
|
|
from edgar.xbrl.parsers.concepts import get_balance_type
|
|
# Try to get balance from static mapping using the original concept ID
|
|
balance = get_balance_type(fact.element_id)
|
|
fact_dict['balance'] = balance # "debit", "credit", or None
|
|
|
|
# Add preferred_sign from presentation linkbase (Issue #463)
|
|
# Convert preferredLabel to a numeric sign multiplier for display
|
|
# -1 means "negate for display", 1 means "use as-is", None means "not specified"
|
|
if preferred_label:
|
|
# Common preferredLabel values that indicate negation
|
|
negation_labels = [
|
|
'negatedLabel',
|
|
'http://www.xbrl.org/2003/role/negatedLabel',
|
|
'negatedTerseLabel',
|
|
'http://www.xbrl.org/2003/role/negatedTerseLabel',
|
|
'negatedPeriodStartLabel',
|
|
'http://www.xbrl.org/2003/role/negatedPeriodStartLabel',
|
|
'negatedPeriodEndLabel',
|
|
'http://www.xbrl.org/2003/role/negatedPeriodEndLabel'
|
|
]
|
|
fact_dict['preferred_sign'] = -1 if preferred_label in negation_labels else 1
|
|
else:
|
|
fact_dict['preferred_sign'] = None
|
|
|
|
# Determine statement type by checking presentation trees using our precomputed mapping
|
|
for role, tree in self.xbrl.presentation_trees.items():
|
|
if element_id in tree.all_nodes and role in role_to_statement_type:
|
|
statement_type, statement_role = role_to_statement_type[role]
|
|
fact_dict['statement_type'] = statement_type
|
|
fact_dict['statement_role'] = statement_role
|
|
break
|
|
|
|
# Add weight from calculation tree (Issue #463)
|
|
# Weight indicates calculation role (1.0 = add, -1.0 = subtract)
|
|
# Note: Weight is role-specific, use primary statement role when available
|
|
statement_type = fact_dict.get('statement_type')
|
|
fact_dict['weight'] = self._get_primary_weight(element_id, statement_type)
|
|
|
|
enriched_facts.append(fact_dict)
|
|
|
|
# Cache the enriched facts
|
|
self._facts_cache = enriched_facts
|
|
return self._facts_cache
|
|
|
|
def query(self) -> FactQuery:
|
|
"""
|
|
Start building a query against facts.
|
|
|
|
Returns:
|
|
FactQuery: A new query builder
|
|
"""
|
|
return FactQuery(self)
|
|
|
|
def to_dataframe(self) -> pd.DataFrame:
|
|
"""
|
|
Convert all facts to a DataFrame.
|
|
|
|
Returns:
|
|
pandas DataFrame containing all facts
|
|
"""
|
|
if self._facts_df_cache is not None:
|
|
return self._facts_df_cache
|
|
|
|
facts = self.get_facts()
|
|
df = pd.DataFrame(facts)
|
|
self._facts_df_cache = df
|
|
return df
|
|
|
|
def get_statement_facts(self, statement_type: str) -> pd.DataFrame:
|
|
"""
|
|
Get facts belonging to a specific statement.
|
|
|
|
Args:
|
|
statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', etc.)
|
|
|
|
Returns:
|
|
pandas DataFrame with facts for the specified statement
|
|
"""
|
|
return self.query().by_statement_type(statement_type).to_dataframe()
|
|
|
|
def get_facts_by_concept(self, concept_pattern: str, exact: bool = False) -> pd.DataFrame:
|
|
"""
|
|
Get facts matching a concept name pattern.
|
|
|
|
Args:
|
|
concept_pattern: Pattern to match against concept names
|
|
exact: If True, perform exact matching; otherwise, use regex
|
|
|
|
Returns:
|
|
pandas DataFrame with matching facts
|
|
"""
|
|
return self.query().by_concept(concept_pattern, exact).to_dataframe()
|
|
|
|
def search_facts(self, text_pattern: str) -> pd.DataFrame:
|
|
"""
|
|
Search for facts containing a text pattern in any text field.
|
|
|
|
This is a flexible search that looks across concept names, labels,
|
|
and element names for matching text.
|
|
|
|
Args:
|
|
text_pattern: Text pattern to search for
|
|
|
|
Returns:
|
|
pandas DataFrame with matching facts
|
|
"""
|
|
return self.query().by_text(text_pattern).to_dataframe()
|
|
|
|
def get_facts_with_dimensions(self) -> pd.DataFrame:
|
|
"""
|
|
Get facts that have dimensional qualifiers.
|
|
|
|
Returns:
|
|
pandas DataFrame with dimensionally-qualified facts
|
|
"""
|
|
return self.query().by_custom(
|
|
lambda f: any(key.startswith('dim_') for key in f.keys())
|
|
).to_dataframe()
|
|
|
|
def get_facts_by_period(self, period_key: str) -> pd.DataFrame:
|
|
"""
|
|
Get facts for a specific reporting period.
|
|
|
|
Args:
|
|
period_key: Period key from reporting_periods
|
|
|
|
Returns:
|
|
pandas DataFrame with facts for the specified period
|
|
"""
|
|
return self.query().by_period_key(period_key).to_dataframe()
|
|
|
|
def get_facts_by_period_view(self, statement_type: str, period_view_name: str) -> pd.DataFrame:
|
|
"""
|
|
Get facts for a specific period view (e.g., "Annual Comparison", "Three-Year Comparison").
|
|
|
|
Args:
|
|
statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', etc.)
|
|
period_view_name: Name of the period view as defined in get_period_views
|
|
|
|
Returns:
|
|
pandas DataFrame with facts for the specified period view
|
|
"""
|
|
# Get available period views for this statement type
|
|
period_views = self.xbrl.get_period_views(statement_type)
|
|
|
|
# Find the requested view
|
|
matching_view = next((view for view in period_views if view['name'] == period_view_name), None)
|
|
|
|
if not matching_view:
|
|
# If view not found, return empty DataFrame
|
|
return pd.DataFrame()
|
|
|
|
# Get the period keys for this view
|
|
period_keys = matching_view['period_keys']
|
|
|
|
# Query facts that match any of these period keys and the statement type
|
|
query = self.query()
|
|
|
|
# Filter by statement type
|
|
if statement_type:
|
|
query = query.by_statement_type(statement_type)
|
|
|
|
# Filter by the period keys
|
|
query = query.by_period_keys(period_keys)
|
|
|
|
return query.to_dataframe()
|
|
|
|
def get_facts_by_fiscal_period(self, fiscal_year: Union[int, str],
|
|
fiscal_period: str) -> pd.DataFrame:
|
|
"""
|
|
Get facts for a specific fiscal period.
|
|
|
|
Args:
|
|
fiscal_year: Fiscal year
|
|
fiscal_period: Fiscal period ('FY', 'Q1', 'Q2', 'Q3', 'Q4')
|
|
|
|
Returns:
|
|
pandas DataFrame with facts for the specified fiscal period
|
|
"""
|
|
return self.query().by_fiscal_year(fiscal_year).by_fiscal_period(fiscal_period).to_dataframe()
|
|
|
|
def summarize(self) -> Dict[str, Any]:
|
|
"""
|
|
Generate a summary of facts in the XBRL instance.
|
|
|
|
Returns:
|
|
Dictionary with fact summary statistics
|
|
"""
|
|
facts = self.get_facts()
|
|
|
|
# Count total facts
|
|
total_facts = len(facts)
|
|
|
|
# Count by data type
|
|
types = {}
|
|
for fact in facts:
|
|
element_type = fact.get('element_type', 'unknown')
|
|
types[element_type] = types.get(element_type, 0) + 1
|
|
|
|
# Count by statement
|
|
by_statement = {}
|
|
for fact in facts:
|
|
stmt_type = fact.get('statement_type', 'unknown')
|
|
by_statement[stmt_type] = by_statement.get(stmt_type, 0) + 1
|
|
|
|
# Count by period type
|
|
by_period_type = {}
|
|
for fact in facts:
|
|
period_type = fact.get('period_type', 'unknown')
|
|
by_period_type[period_type] = by_period_type.get(period_type, 0) + 1
|
|
|
|
# List unique dimensions
|
|
dimensions = set()
|
|
for fact in facts:
|
|
for key in fact.keys():
|
|
if key.startswith('dim_'):
|
|
dimensions.add(key.replace('dim_', ''))
|
|
|
|
# List unique periods
|
|
periods = set()
|
|
for fact in facts:
|
|
if 'period_key' in fact:
|
|
periods.add(fact['period_key'])
|
|
|
|
return {
|
|
'total_facts': total_facts,
|
|
'by_type': types,
|
|
'by_statement': by_statement,
|
|
'by_period_type': by_period_type,
|
|
'dimensions': sorted(list(dimensions)),
|
|
'periods': sorted(list(periods))
|
|
}
|
|
|
|
def get_unique_concepts(self) -> List[str]:
|
|
"""
|
|
Get list of unique concept names in the facts.
|
|
|
|
Returns:
|
|
List of unique concept names
|
|
"""
|
|
facts = self.get_facts()
|
|
concepts = {fact.get('concept') for fact in facts if 'concept' in fact}
|
|
return sorted(list(concepts))
|
|
|
|
def get_unique_dimensions(self) -> Dict[str, Set[str]]:
|
|
"""
|
|
Get unique dimensions and their values.
|
|
|
|
Returns:
|
|
Dictionary mapping dimension names to sets of possible values
|
|
"""
|
|
facts = self.get_facts()
|
|
dimensions = {}
|
|
|
|
for fact in facts:
|
|
for key, value in fact.items():
|
|
if key.startswith('dim_'):
|
|
dim_name = key.replace('dim_', '')
|
|
if dim_name not in dimensions:
|
|
dimensions[dim_name] = set()
|
|
dimensions[dim_name].add(value)
|
|
|
|
return dimensions
|
|
|
|
def get_available_period_views(self, statement_type: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get available period views for a statement type.
|
|
|
|
This method returns the period views that can be used with get_facts_by_period_view.
|
|
|
|
Args:
|
|
statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', etc.)
|
|
|
|
Returns:
|
|
List of period view metadata with name, description, and period keys
|
|
"""
|
|
period_views = self.xbrl.get_period_views(statement_type)
|
|
|
|
# Add facts count for each period view
|
|
for view in period_views:
|
|
# Count facts for each period key in this view
|
|
period_keys = view.get('period_keys', [])
|
|
if period_keys:
|
|
facts_count = len(self.query()
|
|
.by_statement_type(statement_type)
|
|
.by_period_keys(period_keys)
|
|
.execute())
|
|
view['facts_count'] = facts_count
|
|
else:
|
|
view['facts_count'] = 0
|
|
|
|
return period_views
|
|
|
|
def pivot_by_period(self, concept_pattern: str = None,
|
|
statement_type: str = None) -> pd.DataFrame:
|
|
"""
|
|
Create a pivoted view of facts by period.
|
|
|
|
Args:
|
|
concept_pattern: Optional concept pattern to filter by
|
|
statement_type: Optional statement type to filter by
|
|
|
|
Returns:
|
|
pandas DataFrame with concepts as rows and periods as columns
|
|
"""
|
|
query = self.query()
|
|
|
|
if concept_pattern:
|
|
query = query.by_concept(concept_pattern)
|
|
|
|
if statement_type:
|
|
query = query.by_statement_type(statement_type)
|
|
|
|
df = query.to_dataframe()
|
|
|
|
if df.empty:
|
|
return pd.DataFrame()
|
|
|
|
# Create concept-period pivot
|
|
if 'period_key' in df.columns and 'concept' in df.columns and 'numeric_value' in df.columns:
|
|
pivot = df.pivot_table(
|
|
values='numeric_value',
|
|
index=['concept', 'label'],
|
|
columns='period_key',
|
|
aggfunc='first' # Take first occurrence for each concept-period combo
|
|
)
|
|
|
|
# Reset index to make 'concept' and 'label' regular columns
|
|
pivot = pivot.reset_index()
|
|
|
|
return pivot
|
|
|
|
return df # Return original DataFrame if pivoting isn't possible
|
|
|
|
def pivot_by_dimension(self, dimension: str,
|
|
concept_pattern: str = None,
|
|
period_key: str = None) -> pd.DataFrame:
|
|
"""
|
|
Create a pivoted view of facts by dimension values.
|
|
|
|
Args:
|
|
dimension: Dimension to pivot by
|
|
concept_pattern: Optional concept pattern to filter by
|
|
period_key: Optional period key to filter by
|
|
|
|
Returns:
|
|
pandas DataFrame with concepts as rows and dimension values as columns
|
|
"""
|
|
query = self.query()
|
|
|
|
# Apply filters if provided
|
|
if concept_pattern:
|
|
query = query.by_concept(concept_pattern)
|
|
|
|
if period_key:
|
|
query = query.by_custom(lambda f: 'period_key' in f and f['period_key'] == period_key)
|
|
|
|
# Ensure we only get facts with this dimension
|
|
query = query.by_dimension(dimension)
|
|
|
|
df = query.to_dataframe()
|
|
|
|
if df.empty:
|
|
return pd.DataFrame()
|
|
|
|
dim_col = f"dim_{dimension}"
|
|
|
|
# Create concept-dimension pivot
|
|
if dim_col in df.columns and 'concept' in df.columns and 'numeric_value' in df.columns:
|
|
pivot = df.pivot_table(
|
|
values='numeric_value',
|
|
index=['concept', 'label'],
|
|
columns=dim_col,
|
|
aggfunc='first' # Take first occurrence for each concept-dimension combo
|
|
)
|
|
|
|
# Reset index to make 'concept' and 'label' regular columns
|
|
pivot = pivot.reset_index()
|
|
|
|
return pivot
|
|
|
|
return df # Return original DataFrame if pivoting isn't possible
|
|
|
|
def time_series(self, concept: str, exact: bool = True) -> pd.DataFrame:
|
|
"""
|
|
Create a time series view for a specific concept.
|
|
|
|
Args:
|
|
concept: Concept name to create time series for
|
|
exact: If True, require exact concept match; otherwise, use pattern matching
|
|
|
|
Returns:
|
|
pandas DataFrame with time series data for the concept
|
|
"""
|
|
df = self.query().by_concept(concept, exact).to_dataframe()
|
|
|
|
if df.empty:
|
|
return pd.DataFrame()
|
|
|
|
# For instant periods, use the instant date
|
|
# For duration periods, use the end date
|
|
df['date'] = df.apply(
|
|
lambda row: row.get('period_instant') if row.get('period_type') == 'instant'
|
|
else row.get('period_end') if row.get('period_type') == 'duration'
|
|
else None,
|
|
axis=1
|
|
)
|
|
|
|
# Drop rows without valid dates
|
|
df = df.dropna(subset=['date'])
|
|
|
|
# Sort by date
|
|
df['date'] = pd.to_datetime(df['date'])
|
|
df = df.sort_values('date')
|
|
|
|
# Select relevant columns
|
|
columns = ['date', 'numeric_value', 'unit_ref']
|
|
if 'label' in df.columns:
|
|
columns.append('label')
|
|
if 'fiscal_period' in df.columns:
|
|
columns.append('fiscal_period')
|
|
if 'fiscal_year' in df.columns:
|
|
columns.append('fiscal_year')
|
|
|
|
# Add any dimension columns that exist
|
|
dim_cols = [col for col in df.columns if col.startswith('dim_')]
|
|
columns.extend(dim_cols)
|
|
|
|
return df[columns]
|
|
|
|
def facts_history(self, concept: str, date_col: str = 'period_end',
|
|
include_dimensions: bool = True) -> pd.DataFrame:
|
|
"""
|
|
Get the history of a concept across time, optionally including dimensions.
|
|
|
|
Args:
|
|
concept: Concept name to track
|
|
date_col: Date column to use for time series ('period_end', 'period_instant')
|
|
include_dimensions: Whether to include dimensional breakdowns
|
|
|
|
Returns:
|
|
pandas DataFrame with time series data
|
|
"""
|
|
df = self.query().by_concept(concept, True).to_dataframe()
|
|
|
|
if df.empty:
|
|
return pd.DataFrame()
|
|
|
|
# Filter to only rows with the date column
|
|
df = df.dropna(subset=[date_col])
|
|
|
|
# Convert to datetime
|
|
df[date_col] = pd.to_datetime(df[date_col])
|
|
|
|
# If including dimensions, create a more complex view
|
|
if include_dimensions:
|
|
# Convert dimension columns to category names
|
|
dimension_cols = [col for col in df.columns if col.startswith('dim_')]
|
|
|
|
if dimension_cols:
|
|
# Create a combined dimension key
|
|
if len(dimension_cols) > 0:
|
|
df['dimension_key'] = df.apply(
|
|
lambda row: '-'.join(str(row.get(col, '')) for col in dimension_cols),
|
|
axis=1
|
|
)
|
|
else:
|
|
df['dimension_key'] = 'No dimensions'
|
|
|
|
# Pivot to show time series by dimension
|
|
pivot = df.pivot_table(
|
|
values='numeric_value',
|
|
index=[date_col],
|
|
columns=['dimension_key'],
|
|
aggfunc='first'
|
|
)
|
|
|
|
return pivot.sort_index()
|
|
|
|
# Simple time series without dimensions
|
|
result = df.sort_values(date_col)[['concept', 'label', date_col, 'numeric_value', 'unit_ref']]
|
|
if 'fiscal_period' in df.columns:
|
|
result['fiscal_period'] = df['fiscal_period']
|
|
if 'fiscal_year' in df.columns:
|
|
result['fiscal_year'] = df['fiscal_year']
|
|
|
|
return result
|
|
|
|
def _get_primary_weight(self, element_id: str, statement_type: Optional[str]) -> Optional[float]:
|
|
"""
|
|
Get calculation weight for element from primary statement role.
|
|
|
|
Weight is role-specific (same concept can have different weights in different statements).
|
|
Returns weight from primary statement role if available.
|
|
|
|
Args:
|
|
element_id: Normalized element ID (e.g., 'us_gaap_Revenue')
|
|
statement_type: Statement type ('IncomeStatement', 'BalanceSheet', etc.)
|
|
|
|
Returns:
|
|
Weight value (typically 1.0 or -1.0) or None if not in calculations
|
|
"""
|
|
if not hasattr(self.xbrl, 'calculation_trees'):
|
|
return None
|
|
|
|
# Try to find weight in calculation trees
|
|
for role_uri, calc_tree in self.xbrl.calculation_trees.items():
|
|
# Prefer calculation tree matching the statement type
|
|
if statement_type:
|
|
role_lower = role_uri.lower()
|
|
if statement_type == "IncomeStatement" and "income" in role_lower:
|
|
node = calc_tree.all_nodes.get(element_id)
|
|
if node:
|
|
return node.weight
|
|
elif statement_type == "BalanceSheet" and ("balance" in role_lower or "position" in role_lower):
|
|
node = calc_tree.all_nodes.get(element_id)
|
|
if node:
|
|
return node.weight
|
|
elif statement_type == "CashFlowStatement" and "cash" in role_lower:
|
|
node = calc_tree.all_nodes.get(element_id)
|
|
if node:
|
|
return node.weight
|
|
|
|
# Fallback: return first weight found in any role
|
|
for calc_tree in self.xbrl.calculation_trees.values():
|
|
node = calc_tree.all_nodes.get(element_id)
|
|
if node:
|
|
return node.weight
|
|
|
|
# Not found in any calculation tree
|
|
return None
|
|
|
|
def clear_cache(self) -> None:
|
|
"""Clear cached data."""
|
|
self._facts_cache = None
|
|
self._facts_df_cache = None
|
|
|
|
def __str__(self):
|
|
return f"Facts for {self.xbrl}"
|
|
|
|
@property
|
|
def _title_text(self):
|
|
return Text.assemble(("XBRL Facts for ", "bold white"),
|
|
(self.xbrl.entity_name, "bold deep_sky_blue1"),
|
|
(" - ", "bold magenta"),
|
|
(self.xbrl.document_type, "bold white"))
|
|
|
|
|
|
def add_facts_view(xbrl):
|
|
"""
|
|
Add a FactsView instance to an XBRL object.
|
|
|
|
Args:
|
|
xbrl: XBRL instance
|
|
|
|
Returns:
|
|
FactsView instance
|
|
"""
|
|
facts_view = FactsView(xbrl)
|
|
xbrl.facts_view = facts_view
|
|
return facts_view
|