Files
edgartools/venv/lib/python3.10/site-packages/edgar/xbrl/models.py
2025-12-09 12:13:01 +01:00

314 lines
9.9 KiB
Python

"""
Data models for XBRL parsing.
This module defines the core data structures used throughout the XBRL parser.
"""
from typing import Any, Dict, List, Optional, Union
from pydantic import BaseModel, Field
# Constants for label roles
STANDARD_LABEL = "http://www.xbrl.org/2003/role/label"
TERSE_LABEL = "http://www.xbrl.org/2003/role/terseLabel"
PERIOD_START_LABEL = "http://www.xbrl.org/2003/role/periodStartLabel"
PERIOD_END_LABEL = "http://www.xbrl.org/2003/role/periodEndLabel"
TOTAL_LABEL = "http://www.xbrl.org/2003/role/totalLabel"
def select_display_label(
labels: Dict[str, str],
preferred_label: Optional[str] = None,
standard_label: Optional[str] = None,
element_id: Optional[str] = None,
element_name: Optional[str] = None
) -> str:
"""
Select the most appropriate label for display, following a consistent priority order.
Includes standardization mapping to provide consistent labels across companies.
Args:
labels: Dictionary of available labels
preferred_label: Role of the preferred label (if specified in presentation linkbase)
standard_label: The standard label content (if available)
element_id: Element ID (fallback)
element_name: Element name (alternative fallback)
Returns:
The selected label according to priority rules, with standardization applied if available
"""
# First, select the best available label using existing priority logic
selected_label = None
# 1. Use preferred label if specified and available
if preferred_label and labels and preferred_label in labels:
selected_label = labels[preferred_label]
# 2. Use terse label if available (more user-friendly)
elif labels and TERSE_LABEL in labels:
selected_label = labels[TERSE_LABEL]
# 3. Fall back to standard label
elif standard_label:
selected_label = standard_label
# 4. Try STANDARD_LABEL directly from labels dict
elif labels and STANDARD_LABEL in labels:
selected_label = labels[STANDARD_LABEL]
# 5. Take any available label
elif labels:
selected_label = next(iter(labels.values()), "")
# 6. Use element name if available
elif element_name:
selected_label = element_name
# 7. Last resort: element ID
else:
selected_label = element_id or ""
# Apply standardization if we have an element_id (concept)
if element_id and selected_label:
try:
from edgar.xbrl.standardization.core import initialize_default_mappings
# Initialize mapping store (cached after first call)
if not hasattr(select_display_label, '_mapping_store'):
select_display_label._mapping_store = initialize_default_mappings(read_only=True)
# Try to get standardized concept
standardized_label = select_display_label._mapping_store.get_standard_concept(element_id)
if standardized_label:
return standardized_label
except ImportError:
# Standardization not available, continue with selected label
pass
except Exception:
# Any other error in standardization, continue with selected label
pass
return selected_label
class ElementCatalog:
"""
A catalog of XBRL elements with their properties.
This is the base data structure for element metadata as described in the design document.
Attributes:
name: The name of the element (e.g., "us-gaap_NetIncome")
data_type: The data type of the element (e.g., "monetary", "string", etc.)
period_type: The period type of the element (e.g., "instant", "duration")
balance: The balance type of the element (e.g., "debit", "credit", or None)
abstract: Whether the element is abstract (True/False)
labels: A dictionary of labels for the element, keyed by role URI
"""
def __init__(self,
name: str,
data_type: str,
period_type: str,
balance: Optional[str] = None,
abstract: bool = False,
labels: Optional[Dict[str, str]] = None
):
self.name = name
self.data_type = data_type
self.period_type = period_type
self.balance = balance
self.abstract = abstract
self.labels = labels if labels is not None else {}
def __str__(self) -> str:
return self.name
class Context(BaseModel):
"""
An XBRL context defining entity, period, and dimensional information.
This corresponds to the Context Registry in the design document.
"""
context_id: str
entity: Dict[str, Any] = Field(default_factory=dict)
period: Dict[str, Any] = Field(default_factory=dict)
dimensions: Dict[str, str] = Field(default_factory=dict)
@property
def period_string(self) -> str:
"""Return a human-readable string representation of the period."""
if self.period.get('type') == 'instant':
return f"As of {self.period.get('instant')}"
elif self.period.get('type') == 'duration':
return f"From {self.period.get('startDate')} to {self.period.get('endDate')}"
else:
return "Forever"
class Fact(BaseModel):
"""
An XBRL fact with value and references to context, unit, and element.
This corresponds to the Fact Database in the design document.
The instance_id field is used to differentiate between duplicate facts
that share the same element_id and context_ref. When a fact has no
duplicates, instance_id will be None.
The fact_id field preserves the original id attribute from the XML element,
enabling linkage with footnotes.
"""
element_id: str
context_ref: str
value: str
unit_ref: Optional[str] = None
decimals: Optional[Union[int, str]] = None # int or "INF"
numeric_value: Optional[float] = None
footnotes: List[str] = Field(default_factory=list)
instance_id: Optional[int] = None
fact_id: Optional[str] = None # Original id attribute from the XML
class Footnote(BaseModel):
"""
Represents an XBRL footnote with its text content and related facts.
Footnotes are linked to facts via footnoteArc elements that connect
fact IDs to footnote IDs using xlink:from and xlink:to attributes.
"""
footnote_id: str
text: str
lang: Optional[str] = "en-US"
role: Optional[str] = None
related_fact_ids: List[str] = Field(default_factory=list)
class PresentationNode(BaseModel):
"""
A node in the presentation hierarchy.
This corresponds to the Presentation Node in the design document.
"""
element_id: str
parent: Optional[str] = None
children: List[str] = Field(default_factory=list)
order: float = 0.0
preferred_label: Optional[str] = None
depth: int = 0
# Additional information linked from element catalog
element_name: Optional[str] = None
standard_label: Optional[str] = None
is_abstract: bool = False
labels: Dict[str, str] = Field(default_factory=dict)
@property
def display_label(self) -> str:
"""
Return the appropriate label for display, prioritizing user-friendly options.
Label selection priority:
1. Preferred label (if specified in presentation linkbase)
2. Terse label (for more concise display)
3. Label (standard label)
4. Element ID (fallback)
"""
return select_display_label(
labels=self.labels,
standard_label=self.standard_label,
preferred_label=self.preferred_label,
element_id=self.element_id
)
class PresentationTree(BaseModel):
"""
A presentation tree for a specific role.
This corresponds to the Presentation Hierarchy in the design document.
"""
role_uri: str
definition: str
root_element_id: str
all_nodes: Dict[str, PresentationNode] = Field(default_factory=dict)
order: int = 0
class CalculationNode(BaseModel):
"""
A node in the calculation hierarchy.
This corresponds to the Calculation Node in the design document.
"""
element_id: str
children: List[str] = Field(default_factory=list)
parent: Optional[str] = None
weight: float = 1.0
order: float = 0.0
# Information linked from schema
balance_type: Optional[str] = None # "debit", "credit", or None
period_type: Optional[str] = None # "instant" or "duration"
class CalculationTree(BaseModel):
"""
A calculation tree for a specific role.
This corresponds to the Calculation Network in the design document.
"""
role_uri: str
definition: str
root_element_id: str
all_nodes: Dict[str, CalculationNode] = Field(default_factory=dict)
class Axis(BaseModel):
"""
A dimensional axis (dimension) in XBRL.
This corresponds to the Axis (Dimension) in the design document.
"""
element_id: str
label: str
domain_id: Optional[str] = None
default_member_id: Optional[str] = None
is_typed_dimension: bool = False
typed_domain_ref: str = ""
class Domain(BaseModel):
"""
A domain in an XBRL dimensional structure.
This corresponds to the Domain in the design document.
"""
element_id: str
label: str
members: List[str] = Field(default_factory=list) # List of domain member element IDs
parent: Optional[str] = None # Parent domain element ID
class Table(BaseModel):
"""
A dimensional table (hypercube) in XBRL.
This corresponds to the Table (Hypercube) in the design document.
"""
element_id: str
label: str
role_uri: str
axes: List[str] = Field(default_factory=list) # List of axis element IDs
line_items: List[str] = Field(default_factory=list) # List of line item element IDs
closed: bool = False
context_element: str = "segment"
class XBRLProcessingError(Exception):
"""Exception raised for errors during XBRL processing."""
pass