Files
edgartools/venv/lib/python3.10/site-packages/edgar/xbrl/stitching/presentation.py
2025-12-09 12:13:01 +01:00

257 lines
10 KiB
Python

"""
XBRL Presentation Tree - Virtual presentation tree for multi-period statements
This module creates a virtual presentation tree that preserves hierarchical
relationships while applying semantic ordering within sibling groups.
"""
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
@dataclass
class PresentationNode:
"""Represents a node in the virtual presentation tree"""
concept: str
label: str
level: int
metadata: Dict[str, Any]
semantic_order: float = 999.0
original_index: int = 999
def __post_init__(self):
self.children: List[PresentationNode] = []
self.parent: Optional[PresentationNode] = None
def add_child(self, child: 'PresentationNode'):
"""Add a child node and set parent relationship"""
child.parent = self
self.children.append(child)
def sort_children(self):
"""Sort children using semantic ordering while preserving hierarchy"""
# Sort direct children by semantic order, then by original index as tiebreaker
self.children.sort(key=lambda x: (x.semantic_order, x.original_index))
# Recursively sort grandchildren
for child in self.children:
child.sort_children()
def flatten_to_list(self) -> List['PresentationNode']:
"""Flatten tree to ordered list while preserving hierarchy"""
result = [self]
for child in self.children:
result.extend(child.flatten_to_list())
return result
class VirtualPresentationTree:
"""Builds and manages virtual presentation tree for stitched statements"""
def __init__(self, ordering_manager=None):
self.ordering_manager = ordering_manager
self.root_nodes: List[PresentationNode] = []
self.all_nodes: Dict[str, PresentationNode] = {}
def build_tree(self, concept_metadata: Dict, concept_ordering: Dict,
original_statement_order: List[str] = None) -> List[PresentationNode]:
"""
Build presentation tree from concept metadata and ordering.
Args:
concept_metadata: Metadata for each concept including level
concept_ordering: Semantic ordering positions
original_statement_order: Original order of concepts for context
Returns:
Flattened list of nodes in correct presentation order
"""
# Step 1: Create nodes for all concepts
self._create_nodes(concept_metadata, concept_ordering, original_statement_order)
# Step 2: Build parent-child relationships based on levels and context
self._build_hierarchy(original_statement_order or [])
# Step 3: Apply semantic ordering within sibling groups
self._apply_semantic_ordering()
# Step 4: Flatten tree to linear list
return self._flatten_tree()
def _create_nodes(self, concept_metadata: Dict, concept_ordering: Dict,
original_statement_order: List[str] = None):
"""Create nodes for all concepts"""
self.all_nodes = {}
for i, (concept, metadata) in enumerate(concept_metadata.items()):
label = metadata.get('latest_label', concept)
level = metadata.get('level', 0)
semantic_order = concept_ordering.get(concept, concept_ordering.get(label, 999.0))
# Track original index for maintaining some original order context
original_index = i
if original_statement_order:
try:
original_index = original_statement_order.index(concept)
except ValueError:
try:
original_index = original_statement_order.index(label)
except ValueError:
original_index = i + 1000 # Place unknown concepts later
node = PresentationNode(
concept=concept,
label=label,
level=level,
metadata=metadata,
semantic_order=semantic_order,
original_index=original_index
)
self.all_nodes[concept] = node
def _build_hierarchy(self, original_order: List[str]):
"""Build parent-child relationships based on level progression and context"""
# Sort nodes by their original order to maintain context for hierarchy detection
nodes_in_order = []
# First, try to use original order if available
if original_order:
# Map concepts in original order
concept_to_node = {node.concept: node for node in self.all_nodes.values()}
label_to_node = {node.label: node for node in self.all_nodes.values()}
for item in original_order:
if item in concept_to_node:
nodes_in_order.append(concept_to_node[item])
elif item in label_to_node:
nodes_in_order.append(label_to_node[item])
# Add any remaining nodes not in original order
remaining_nodes = [node for node in self.all_nodes.values()
if node not in nodes_in_order]
remaining_nodes.sort(key=lambda x: x.original_index)
nodes_in_order.extend(remaining_nodes)
else:
# Fall back to sorting by original index
nodes_in_order = sorted(self.all_nodes.values(),
key=lambda x: x.original_index)
# Build hierarchy using a parent stack approach
parent_stack = [] # Stack of potential parents at each level
for node in nodes_in_order:
current_level = node.level
# Pop parents that are at the same level or deeper
# We're looking for a parent at a level less than current
while parent_stack and parent_stack[-1].level >= current_level:
parent_stack.pop()
if parent_stack:
# Check if potential parent and child belong to compatible sections
parent = parent_stack[-1]
# Prevent cross-section hierarchies for critical sections like per_share
should_be_child = self._should_be_hierarchical_child(parent, node)
if should_be_child:
# Valid parent-child relationship
parent.add_child(node)
else:
# Different sections - make this a root node instead
self.root_nodes.append(node)
else:
# No parent - this is a root node
self.root_nodes.append(node)
# This node could be a parent for subsequent nodes
parent_stack.append(node)
def _apply_semantic_ordering(self):
"""Apply semantic ordering within sibling groups"""
# Sort root nodes by semantic order first, then original index
self.root_nodes.sort(key=lambda x: (x.semantic_order, x.original_index))
# Sort children within each parent recursively
for root in self.root_nodes:
root.sort_children()
def _flatten_tree(self) -> List[PresentationNode]:
"""Flatten tree to linear list preserving hierarchy"""
result = []
for root in self.root_nodes:
result.extend(root.flatten_to_list())
return result
def _should_be_hierarchical_child(self, parent: PresentationNode, child: PresentationNode) -> bool:
"""
Determine if child should be hierarchically under parent based on semantic ordering.
Prevents cross-section hierarchies that would break template section groupings.
"""
# Get semantic ordering positions
parent_order = parent.semantic_order
child_order = child.semantic_order
# If both have very specific semantic orders from templates (not defaults),
# check if they're in similar ranges (same section)
if parent_order < 900 and child_order < 900:
# Both are template-positioned, check if they're in similar sections
# Allow parent-child within 200 points (roughly same section)
section_gap = abs(parent_order - child_order)
if section_gap > 200:
return False
# Special case: Per-share items (900+) should never be children of early items
if child_order >= 900 and parent_order < 800:
return False
# Special case: Non-operating items (500-599) should not be children of operating items
if 500 <= child_order < 600 and parent_order < 500:
return False
# Special case: Revenue items should not be parents of per-share items
if parent_order < 100 and child_order >= 900:
return False
# Check for semantic incompatibility based on labels
child_label = child.label.lower()
parent_label = parent.label.lower()
# Per-share items should not be children of non-per-share items
if any(term in child_label for term in ['earnings per share', 'shares outstanding']):
if not any(term in parent_label for term in ['earnings', 'shares', 'per share']):
return False
# Interest expense items should not be children of non-interest items
if 'interest expense' in child_label:
if 'interest' not in parent_label and 'nonoperating' not in parent_label:
return False
# Otherwise, allow hierarchical relationship
return True
def debug_tree(self) -> str:
"""Generate a debug representation of the tree"""
lines = []
def _add_node_lines(node: PresentationNode, depth: int = 0):
indent = " " * depth
lines.append(f"{indent}├─ {node.label} (level={node.level}, "
f"semantic={node.semantic_order:.1f}, orig={node.original_index})")
for child in node.children:
_add_node_lines(child, depth + 1)
lines.append("Virtual Presentation Tree:")
for root in self.root_nodes:
_add_node_lines(root)
return "\n".join(lines)