""" Definition parser for XBRL documents. This module handles parsing of XBRL definition linkbases and building dimensional structures like tables, axes, and domains. """ from pathlib import Path from typing import Any, Dict, List, Union from edgar.xbrl.core import NAMESPACES, STANDARD_LABEL, extract_element_id from edgar.xbrl.models import Axis, Domain, ElementCatalog, Table, XBRLProcessingError from .base import BaseParser class DefinitionParser(BaseParser): """Parser for XBRL definition linkbases.""" def __init__(self, definition_roles: Dict[str, Dict[str, Any]], tables: Dict[str, List[Table]], axes: Dict[str, Axis], domains: Dict[str, Domain], element_catalog: Dict[str, ElementCatalog]): """ Initialize definition parser with data structure references. Args: definition_roles: Reference to definition roles dictionary tables: Reference to tables dictionary axes: Reference to axes dictionary domains: Reference to domains dictionary element_catalog: Reference to element catalog dictionary """ super().__init__() # Store references to data structures self.definition_roles = definition_roles self.tables = tables self.axes = axes self.domains = domains self.element_catalog = element_catalog def parse_definition(self, file_path: Union[str, Path]) -> None: """Parse definition linkbase file and build dimensional structures.""" try: content = Path(file_path).read_text() self.parse_definition_content(content) except Exception as e: raise XBRLProcessingError(f"Error parsing definition file {file_path}: {str(e)}") from e def parse_definition_content(self, content: str) -> None: """Parse definition linkbase content and build dimensional structures.""" try: root = self._safe_parse_xml(content) # Extract definition links definition_links = root.findall('.//{http://www.xbrl.org/2003/linkbase}definitionLink') for link in definition_links: role = link.get('{http://www.w3.org/1999/xlink}role') if not role: continue # Store role information role_id = role.split('/')[-1] if '/' in role else role role_def = role_id.replace('_', ' ') self.definition_roles[role] = { 'roleUri': role, 'definition': role_def, 'roleId': role_id } # Extract arcs arcs = link.findall('.//{http://www.xbrl.org/2003/linkbase}definitionArc') # Create relationships list relationships = [] for arc in arcs: from_ref = arc.get('{http://www.w3.org/1999/xlink}from') to_ref = arc.get('{http://www.w3.org/1999/xlink}to') order = self._parse_order_attribute(arc) # Get the arcrole - this is important for identifying dimensional relationships arcrole = arc.get('{http://www.w3.org/1999/xlink}arcrole') if not from_ref or not to_ref or not arcrole: continue # Find locators for from/to references from_loc = link.find(f'.//*[@{{{NAMESPACES["xlink"]}}}label="{from_ref}"]') to_loc = link.find(f'.//*[@{{{NAMESPACES["xlink"]}}}label="{to_ref}"]') if from_loc is None or to_loc is None: continue from_href = from_loc.get('{http://www.w3.org/1999/xlink}href') to_href = to_loc.get('{http://www.w3.org/1999/xlink}href') if not from_href or not to_href: continue # Extract element IDs from_element = extract_element_id(from_href) to_element = extract_element_id(to_href) # Add relationship with arcrole relationships.append({ 'from_element': from_element, 'to_element': to_element, 'order': order, 'arcrole': arcrole }) # Process dimensional structures from relationships self._process_dimensional_relationships(role, relationships) except Exception as e: raise XBRLProcessingError(f"Error parsing definition content: {str(e)}") from e def _process_dimensional_relationships(self, role: str, relationships: List[Dict[str, Any]]) -> None: """ Process dimensional relationships to build tables, axes, and domains. Args: role: Extended link role URI relationships: List of dimensional relationships """ # XBRL Dimensions arcrole URIs HYPERCUBE_DIMENSION = "http://xbrl.org/int/dim/arcrole/hypercube-dimension" DIMENSION_DOMAIN = "http://xbrl.org/int/dim/arcrole/dimension-domain" DOMAIN_MEMBER = "http://xbrl.org/int/dim/arcrole/domain-member" ALL = "http://xbrl.org/int/dim/arcrole/all" # Group relationships by arcrole grouped_rels = {} for rel in relationships: arcrole = rel['arcrole'] if arcrole not in grouped_rels: grouped_rels[arcrole] = [] grouped_rels[arcrole].append(rel) # Process hypercube-dimension relationships to identify tables and axes hypercube_axes = {} # Map of hypercubes to their axes if HYPERCUBE_DIMENSION in grouped_rels: for rel in grouped_rels[HYPERCUBE_DIMENSION]: table_id = rel['from_element'] axis_id = rel['to_element'] if table_id not in hypercube_axes: hypercube_axes[table_id] = [] hypercube_axes[table_id].append(axis_id) # Create or update axis if axis_id not in self.axes: self.axes[axis_id] = Axis( element_id=axis_id, label=self._get_element_label(axis_id) ) # Process dimension-domain relationships to link axes to domains if DIMENSION_DOMAIN in grouped_rels: for rel in grouped_rels[DIMENSION_DOMAIN]: axis_id = rel['from_element'] domain_id = rel['to_element'] # Link domain to axis if axis_id in self.axes: self.axes[axis_id].domain_id = domain_id # Create or update domain if domain_id not in self.domains: self.domains[domain_id] = Domain( element_id=domain_id, label=self._get_element_label(domain_id) ) # Process domain-member relationships to build domain hierarchies if DOMAIN_MEMBER in grouped_rels: # Group by parent (domain) element domain_members = {} for rel in grouped_rels[DOMAIN_MEMBER]: domain_id = rel['from_element'] member_id = rel['to_element'] if domain_id not in domain_members: domain_members[domain_id] = [] domain_members[domain_id].append(member_id) # Also create the domain if it doesn't exist if domain_id not in self.domains: self.domains[domain_id] = Domain( element_id=domain_id, label=self._get_element_label(domain_id) ) # Update domains with their members for domain_id, members in domain_members.items(): if domain_id in self.domains: self.domains[domain_id].members = members # Process 'all' relationships to identify line items and build hypercubes (tables) if ALL in grouped_rels: tables_by_role = [] for rel in grouped_rels[ALL]: line_items_id = rel['to_element'] table_id = rel['from_element'] # Only process if this table has axes defined if table_id in hypercube_axes: table = Table( element_id=table_id, label=self._get_element_label(table_id), role_uri=role, axes=hypercube_axes[table_id], line_items=[line_items_id], closed=False # Default ) tables_by_role.append(table) # Add tables to collection if tables_by_role: self.tables[role] = tables_by_role def _get_element_label(self, element_id: str) -> str: """Get the label for an element, falling back to the element ID if not found.""" if element_id in self.element_catalog and self.element_catalog[element_id].labels: # Use standard label if available standard_label = self.element_catalog[element_id].labels.get(STANDARD_LABEL) if standard_label: return standard_label return element_id # Fallback to element ID