edgartools/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/presentation.py

"""
Presentation parser for XBRL documents.

This module handles parsing of XBRL presentation linkbases and building
presentation trees for financial statement structure.
"""

from pathlib import Path
from typing import Any, Dict, List, Optional, Union

from lxml import etree as ET

from edgar.xbrl.core import extract_element_id
from edgar.xbrl.models import ElementCatalog, PresentationNode, PresentationTree, XBRLProcessingError

from .base import BaseParser


class PresentationParser(BaseParser):
    """Parser for XBRL presentation linkbases."""

    def __init__(self, presentation_roles: Dict[str, Dict[str, Any]],
                 presentation_trees: Dict[str, PresentationTree],
                 element_catalog: Dict[str, ElementCatalog]):
        """
        Initialize presentation parser with data structure references.

        Args:
            presentation_roles: Reference to presentation roles dictionary
            presentation_trees: Reference to presentation trees dictionary
            element_catalog: Reference to element catalog dictionary
        """
        super().__init__()

        # Store references to data structures
        self.presentation_roles = presentation_roles
        self.presentation_trees = presentation_trees
        self.element_catalog = element_catalog

    def parse_presentation(self, file_path: Union[str, Path]) -> None:
        """Parse presentation linkbase file and build presentation trees."""
        try:
            content = Path(file_path).read_text()
            self.parse_presentation_content(content)
        except Exception as e:
            raise XBRLProcessingError(f"Error parsing presentation file {file_path}: {str(e)}") from e

    def parse_presentation_content(self, content: str) -> None:
        """Parse presentation linkbase content and build presentation trees."""
        try:
            # Optimize: Register namespaces for faster XPath lookups
            nsmap = {
                'link': 'http://www.xbrl.org/2003/linkbase',
                'xlink': 'http://www.w3.org/1999/xlink'
            }

            # Optimize: Use lxml parser with smart string handling
            parser = ET.XMLParser(remove_blank_text=True, recover=True)
            root = ET.XML(content.encode('utf-8'), parser)

            # Optimize: Use XPath with namespaces for faster extraction
            presentation_links = root.xpath('//link:presentationLink', namespaces=nsmap)

            # Optimize: Cache attribute paths
            xlink_role = '{http://www.w3.org/1999/xlink}role'
            xlink_from = '{http://www.w3.org/1999/xlink}from'
            xlink_to = '{http://www.w3.org/1999/xlink}to'
            xlink_label = '{http://www.w3.org/1999/xlink}label'
            xlink_href = '{http://www.w3.org/1999/xlink}href'

            for link in presentation_links:
                role = link.get(xlink_role)
                if not role:
                    continue

                # Store role information
                role_id = role.split('/')[-1] if '/' in role else role
                role_def = role_id.replace('_', ' ')

                self.presentation_roles[role] = {
                    'roleUri': role,
                    'definition': role_def,
                    'roleId': role_id
                }

                # Optimize: Pre-build locator map to avoid repeated XPath lookups
                loc_map = {}
                for loc in link.xpath('.//link:loc', namespaces=nsmap):
                    label = loc.get(xlink_label)
                    if label:
                        loc_map[label] = loc.get(xlink_href)

                # Optimize: Extract arcs using direct xpath with context
                arcs = link.xpath('.//link:presentationArc', namespaces=nsmap)

                # Create relationships map - pre-allocate with known size
                relationships = []
                relationships_append = relationships.append  # Local function reference for speed

                # Process arcs with optimized locator lookups
                for arc in arcs:
                    from_ref = arc.get(xlink_from)
                    to_ref = arc.get(xlink_to)

                    if not from_ref or not to_ref:
                        continue

                    # Optimize: Use cached locator references instead of expensive XPath lookups
                    from_href = loc_map.get(from_ref)
                    to_href = loc_map.get(to_ref)

                    if not from_href or not to_href:
                        continue

                    # Parse order attribute correctly
                    order = self._parse_order_attribute(arc)

                    preferred_label = arc.get('preferredLabel')

                    # Extract element IDs from hrefs
                    from_element = extract_element_id(from_href)
                    to_element = extract_element_id(to_href)

                    # Add relationship using local function reference
                    relationships_append({
                        'from_element': from_element,
                        'to_element': to_element,
                        'order': order,
                        'preferred_label': preferred_label
                    })

                # Build presentation tree for this role if we have relationships
                if relationships:
                    self._build_presentation_tree(role, relationships)

        except Exception as e:
            raise XBRLProcessingError(f"Error parsing presentation content: {str(e)}") from e

    def _build_presentation_tree(self, role: str, relationships: List[Dict[str, Any]]) -> None:
        """
        Build a presentation tree from relationships.

        Args:
            role: Extended link role URI
            relationships: List of relationships (from_element, to_element, order, preferred_label)
        """
        # Group relationships by source element
        from_map = {}
        to_map = {}

        for rel in relationships:
            from_element = rel['from_element']
            to_element = rel['to_element']

            if from_element not in from_map:
                from_map[from_element] = []
            from_map[from_element].append(rel)

            if to_element not in to_map:
                to_map[to_element] = []
            to_map[to_element].append(rel)

        # Find root elements (appear as 'from' but not as 'to')
        root_elements = set(from_map.keys()) - set(to_map.keys())

        if not root_elements:
            return  # No root elements found

        # Create presentation tree
        tree = PresentationTree(
            role_uri=role,
            definition=self.presentation_roles[role]['definition'],
            root_element_id=next(iter(root_elements)),
            all_nodes={}
        )

        # Build tree recursively
        for root_id in root_elements:
            self._build_presentation_subtree(root_id, None, 0, from_map, tree.all_nodes)

        # Add tree to collection
        self.presentation_trees[role] = tree

    def _build_presentation_subtree(self, element_id: str, parent_id: Optional[str], depth: int,
                                 from_map: Dict[str, List[Dict[str, Any]]],
                                 all_nodes: Dict[str, PresentationNode]) -> None:
        """
        Recursively build a presentation subtree.

        Args:
            element_id: Current element ID
            parent_id: Parent element ID
            depth: Current depth in tree
            from_map: Map of relationships by source element
            all_nodes: Dictionary to store all nodes
        """
        # Create node
        node = PresentationNode(
            element_id=element_id,
            parent=parent_id,
            children=[],
            depth=depth
        )

        # Add element information if available
        if element_id in self.element_catalog:
            elem_info = self.element_catalog[element_id]
            node.element_name = elem_info.name
            node.standard_label = elem_info.labels.get('http://www.xbrl.org/2003/role/label', elem_info.name)

            # Use enhanced abstract detection (Issue #450 fix)
            # The element catalog may not have correct abstract info for standard taxonomy concepts
            from edgar.xbrl.abstract_detection import is_abstract_concept
            node.is_abstract = is_abstract_concept(
                concept_name=elem_info.name,
                schema_abstract=elem_info.abstract,
                has_children=False,  # Will be updated after children are processed
                has_values=False     # Will be determined later when facts are loaded
            )

            node.labels = elem_info.labels

        # Add to collection
        all_nodes[element_id] = node

        # Process children
        if element_id in from_map:
            # Sort children by order
            children = sorted(from_map[element_id], key=lambda r: r['order'])

            for rel in children:
                child_id = rel['to_element']

                # Add child to parent's children list
                node.children.append(child_id)

                # Set preferred label
                preferred_label = rel['preferred_label']

                # Recursively build child subtree
                self._build_presentation_subtree(
                    child_id, element_id, depth + 1, from_map, all_nodes
                )

                # Update preferred label and order after child is built
                if child_id in all_nodes:
                    if preferred_label:
                        all_nodes[child_id].preferred_label = preferred_label
                    all_nodes[child_id].order = rel['order']