Files
2025-12-09 12:13:01 +01:00

236 lines
9.4 KiB
Python

"""
Definition parser for XBRL documents.
This module handles parsing of XBRL definition linkbases and building
dimensional structures like tables, axes, and domains.
"""
from pathlib import Path
from typing import Any, Dict, List, Union
from edgar.xbrl.core import NAMESPACES, STANDARD_LABEL, extract_element_id
from edgar.xbrl.models import Axis, Domain, ElementCatalog, Table, XBRLProcessingError
from .base import BaseParser
class DefinitionParser(BaseParser):
"""Parser for XBRL definition linkbases."""
def __init__(self, definition_roles: Dict[str, Dict[str, Any]],
tables: Dict[str, List[Table]],
axes: Dict[str, Axis],
domains: Dict[str, Domain],
element_catalog: Dict[str, ElementCatalog]):
"""
Initialize definition parser with data structure references.
Args:
definition_roles: Reference to definition roles dictionary
tables: Reference to tables dictionary
axes: Reference to axes dictionary
domains: Reference to domains dictionary
element_catalog: Reference to element catalog dictionary
"""
super().__init__()
# Store references to data structures
self.definition_roles = definition_roles
self.tables = tables
self.axes = axes
self.domains = domains
self.element_catalog = element_catalog
def parse_definition(self, file_path: Union[str, Path]) -> None:
"""Parse definition linkbase file and build dimensional structures."""
try:
content = Path(file_path).read_text()
self.parse_definition_content(content)
except Exception as e:
raise XBRLProcessingError(f"Error parsing definition file {file_path}: {str(e)}") from e
def parse_definition_content(self, content: str) -> None:
"""Parse definition linkbase content and build dimensional structures."""
try:
root = self._safe_parse_xml(content)
# Extract definition links
definition_links = root.findall('.//{http://www.xbrl.org/2003/linkbase}definitionLink')
for link in definition_links:
role = link.get('{http://www.w3.org/1999/xlink}role')
if not role:
continue
# Store role information
role_id = role.split('/')[-1] if '/' in role else role
role_def = role_id.replace('_', ' ')
self.definition_roles[role] = {
'roleUri': role,
'definition': role_def,
'roleId': role_id
}
# Extract arcs
arcs = link.findall('.//{http://www.xbrl.org/2003/linkbase}definitionArc')
# Create relationships list
relationships = []
for arc in arcs:
from_ref = arc.get('{http://www.w3.org/1999/xlink}from')
to_ref = arc.get('{http://www.w3.org/1999/xlink}to')
order = self._parse_order_attribute(arc)
# Get the arcrole - this is important for identifying dimensional relationships
arcrole = arc.get('{http://www.w3.org/1999/xlink}arcrole')
if not from_ref or not to_ref or not arcrole:
continue
# Find locators for from/to references
from_loc = link.find(f'.//*[@{{{NAMESPACES["xlink"]}}}label="{from_ref}"]')
to_loc = link.find(f'.//*[@{{{NAMESPACES["xlink"]}}}label="{to_ref}"]')
if from_loc is None or to_loc is None:
continue
from_href = from_loc.get('{http://www.w3.org/1999/xlink}href')
to_href = to_loc.get('{http://www.w3.org/1999/xlink}href')
if not from_href or not to_href:
continue
# Extract element IDs
from_element = extract_element_id(from_href)
to_element = extract_element_id(to_href)
# Add relationship with arcrole
relationships.append({
'from_element': from_element,
'to_element': to_element,
'order': order,
'arcrole': arcrole
})
# Process dimensional structures from relationships
self._process_dimensional_relationships(role, relationships)
except Exception as e:
raise XBRLProcessingError(f"Error parsing definition content: {str(e)}") from e
def _process_dimensional_relationships(self, role: str, relationships: List[Dict[str, Any]]) -> None:
"""
Process dimensional relationships to build tables, axes, and domains.
Args:
role: Extended link role URI
relationships: List of dimensional relationships
"""
# XBRL Dimensions arcrole URIs
HYPERCUBE_DIMENSION = "http://xbrl.org/int/dim/arcrole/hypercube-dimension"
DIMENSION_DOMAIN = "http://xbrl.org/int/dim/arcrole/dimension-domain"
DOMAIN_MEMBER = "http://xbrl.org/int/dim/arcrole/domain-member"
ALL = "http://xbrl.org/int/dim/arcrole/all"
# Group relationships by arcrole
grouped_rels = {}
for rel in relationships:
arcrole = rel['arcrole']
if arcrole not in grouped_rels:
grouped_rels[arcrole] = []
grouped_rels[arcrole].append(rel)
# Process hypercube-dimension relationships to identify tables and axes
hypercube_axes = {} # Map of hypercubes to their axes
if HYPERCUBE_DIMENSION in grouped_rels:
for rel in grouped_rels[HYPERCUBE_DIMENSION]:
table_id = rel['from_element']
axis_id = rel['to_element']
if table_id not in hypercube_axes:
hypercube_axes[table_id] = []
hypercube_axes[table_id].append(axis_id)
# Create or update axis
if axis_id not in self.axes:
self.axes[axis_id] = Axis(
element_id=axis_id,
label=self._get_element_label(axis_id)
)
# Process dimension-domain relationships to link axes to domains
if DIMENSION_DOMAIN in grouped_rels:
for rel in grouped_rels[DIMENSION_DOMAIN]:
axis_id = rel['from_element']
domain_id = rel['to_element']
# Link domain to axis
if axis_id in self.axes:
self.axes[axis_id].domain_id = domain_id
# Create or update domain
if domain_id not in self.domains:
self.domains[domain_id] = Domain(
element_id=domain_id,
label=self._get_element_label(domain_id)
)
# Process domain-member relationships to build domain hierarchies
if DOMAIN_MEMBER in grouped_rels:
# Group by parent (domain) element
domain_members = {}
for rel in grouped_rels[DOMAIN_MEMBER]:
domain_id = rel['from_element']
member_id = rel['to_element']
if domain_id not in domain_members:
domain_members[domain_id] = []
domain_members[domain_id].append(member_id)
# Also create the domain if it doesn't exist
if domain_id not in self.domains:
self.domains[domain_id] = Domain(
element_id=domain_id,
label=self._get_element_label(domain_id)
)
# Update domains with their members
for domain_id, members in domain_members.items():
if domain_id in self.domains:
self.domains[domain_id].members = members
# Process 'all' relationships to identify line items and build hypercubes (tables)
if ALL in grouped_rels:
tables_by_role = []
for rel in grouped_rels[ALL]:
line_items_id = rel['to_element']
table_id = rel['from_element']
# Only process if this table has axes defined
if table_id in hypercube_axes:
table = Table(
element_id=table_id,
label=self._get_element_label(table_id),
role_uri=role,
axes=hypercube_axes[table_id],
line_items=[line_items_id],
closed=False # Default
)
tables_by_role.append(table)
# Add tables to collection
if tables_by_role:
self.tables[role] = tables_by_role
def _get_element_label(self, element_id: str) -> str:
"""Get the label for an element, falling back to the element ID if not found."""
if element_id in self.element_catalog and self.element_catalog[element_id].labels:
# Use standard label if available
standard_label = self.element_catalog[element_id].labels.get(STANDARD_LABEL)
if standard_label:
return standard_label
return element_id # Fallback to element ID