Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/init.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/init.py
@@ -0,0 +1,27 @@
+"""
+XBRL Parser Components.
+
+This package provides specialized parser components for different aspects
+of XBRL document processing. Each parser handles a specific responsibility
+in the XBRL parsing workflow.
+"""
+
+from .base import BaseParser
+from .calculation import CalculationParser
+from .coordinator import XBRLParser
+from .definition import DefinitionParser
+from .instance import InstanceParser
+from .labels import LabelsParser
+from .presentation import PresentationParser
+from .schema import SchemaParser
+
+__all__ = [
+    'BaseParser',
+    'XBRLParser',
+    'SchemaParser',
+    'LabelsParser',
+    'PresentationParser',
+    'CalculationParser',
+    'DefinitionParser',
+    'InstanceParser',
+]
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/init.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/init.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/base.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/base.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/calculation.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/calculation.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/concepts.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/concepts.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/coordinator.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/coordinator.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/definition.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/definition.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/instance.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/instance.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/labels.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/labels.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/presentation.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/presentation.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/schema.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/pycache/schema.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/base.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/base.py
@@ -0,0 +1,148 @@
+"""
+Base parser functionality for XBRL parsing components.
+
+This module provides common utilities and base functionality shared across
+all XBRL parser components.
+"""
+
+from typing import Any, Dict
+
+from lxml import etree as ET
+
+from edgar.core import log
+from edgar.xbrl.core import NAMESPACES
+
+
+class BaseParser:
+    """Base class for XBRL parser components with common functionality."""
+
+    def __init__(self):
+        """Initialize base parser with common data structures."""
+        # Common namespaces and utilities available to all parsers
+        self.namespaces = NAMESPACES
+
+    def _safe_parse_xml(self, content: str) -> ET.Element:
+        """
+        Safely parse XML content with lxml, handling encoding declarations properly.
+
+        Args:
+            content: XML content as string or bytes
+
+        Returns:
+            parsed XML root element
+        """
+        parser = ET.XMLParser(remove_blank_text=True, recover=True)
+
+        # Convert to bytes for safer parsing if needed
+        if isinstance(content, str):
+            content_bytes = content.encode('utf-8')
+        else:
+            content_bytes = content
+
+        # Parse with lxml
+        return ET.XML(content_bytes, parser)
+
+    def _parse_order_attribute(self, arc) -> float:
+        """Parse order attribute from arc, checking both order and xlink:order."""
+        # Try xlink:order first (XBRL standard)
+        order_value = arc.get('{http://www.w3.org/1999/xlink}order')
+        if order_value is None:
+            # Fallback to order attribute
+            order_value = arc.get('order')
+
+        # Debug logging to understand what's in the XBRL document
+        if order_value is not None:
+            log.debug(f"Found order attribute: {order_value}")
+        else:
+            # Log all attributes to see what's actually there
+            all_attrs = dict(arc.attrib) if hasattr(arc, 'attrib') else {}
+            log.debug(f"No order attribute found. Available attributes: {all_attrs}")
+
+        try:
+            return float(order_value) if order_value is not None else 0.0
+        except (ValueError, TypeError):
+            return 0.0
+
+    def _extract_role_info(self, role_element) -> Dict[str, Any]:
+        """
+        Extract role information from a role element.
+
+        Args:
+            role_element: XML element containing role definition
+
+        Returns:
+            Dictionary with role information
+        """
+        role_info = {}
+
+        # Get role URI
+        role_uri = role_element.get('roleURI', '')
+        role_info['uri'] = role_uri
+
+        # Extract role definition/label
+        definition_elem = role_element.find('.//{http://www.xbrl.org/2003/linkbase}definition')
+        if definition_elem is not None:
+            role_info['definition'] = definition_elem.text or ''
+        else:
+            # Fallback: create definition from role URI
+            role_info['definition'] = role_uri.split('/')[-1].replace('_', ' ') if role_uri else ''
+
+        return role_info
+
+    def _get_element_namespace_and_name(self, element_id: str) -> tuple[str, str]:
+        """
+        Extract namespace and local name from an element ID.
+
+        Args:
+            element_id: Element identifier (may include namespace prefix)
+
+        Returns:
+            Tuple of (namespace, local_name)
+        """
+        if ':' in element_id:
+            prefix, local_name = element_id.split(':', 1)
+            # Map common prefixes to namespaces
+            namespace_map = {
+                'us-gaap': 'http://fasb.org/us-gaap/2024',
+                'dei': 'http://xbrl.sec.gov/dei/2024',
+                'invest': 'http://xbrl.sec.gov/invest/2013-01-31',
+                'country': 'http://xbrl.sec.gov/country/2023',
+                'currency': 'http://xbrl.sec.gov/currency/2023',
+                'exch': 'http://xbrl.sec.gov/exch/2023',
+                'naics': 'http://xbrl.sec.gov/naics/2023',
+                'sic': 'http://xbrl.sec.gov/sic/2023',
+                'stpr': 'http://xbrl.sec.gov/stpr/2023',
+            }
+            namespace = namespace_map.get(prefix, f'http://unknown.namespace/{prefix}')
+            return namespace, local_name
+        else:
+            return '', element_id
+
+    def _normalize_element_id(self, element_id: str) -> str:
+        """
+        Normalize element ID to a consistent format.
+
+        Args:
+            element_id: Original element identifier
+
+        Returns:
+            Normalized element identifier
+        """
+        if ':' in element_id:
+            prefix, name = element_id.split(':', 1)
+            return f"{prefix}_{name}"
+        return element_id
+
+    def _log_parsing_progress(self, component: str, count: int, total: int = None):
+        """
+        Log parsing progress for debugging.
+
+        Args:
+            component: Name of component being parsed
+            count: Number of items processed
+            total: Total number of items (optional)
+        """
+        if total:
+            log.debug(f"Parsed {count}/{total} {component}")
+        else:
+            log.debug(f"Parsed {count} {component}")
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/calculation.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/calculation.py
@@ -0,0 +1,223 @@
+"""
+Calculation parser for XBRL documents.
+
+This module handles parsing of XBRL calculation linkbases and building
+calculation trees with weights for validation.
+"""
+
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+from edgar.xbrl.core import NAMESPACES, extract_element_id
+from edgar.xbrl.models import CalculationNode, CalculationTree, ElementCatalog, Fact, XBRLProcessingError
+
+from .base import BaseParser
+
+
+class CalculationParser(BaseParser):
+    """Parser for XBRL calculation linkbases."""
+
+    def __init__(self, calculation_roles: Dict[str, Dict[str, Any]],
+                 calculation_trees: Dict[str, CalculationTree],
+                 element_catalog: Dict[str, ElementCatalog],
+                 facts: Dict[str, Fact]):
+        """
+        Initialize calculation parser with data structure references.
+
+        Args:
+            calculation_roles: Reference to calculation roles dictionary
+            calculation_trees: Reference to calculation trees dictionary
+            element_catalog: Reference to element catalog dictionary
+            facts: Reference to facts dictionary
+        """
+        super().__init__()
+
+        # Store references to data structures
+        self.calculation_roles = calculation_roles
+        self.calculation_trees = calculation_trees
+        self.element_catalog = element_catalog
+        self.facts = facts
+
+    def parse_calculation(self, file_path: Union[str, Path]) -> None:
+        """Parse calculation linkbase file and build calculation trees."""
+        try:
+            content = Path(file_path).read_text()
+            self.parse_calculation_content(content)
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing calculation file {file_path}: {str(e)}") from e
+
+    def parse_calculation_content(self, content: str) -> None:
+        """Parse calculation linkbase content and build calculation trees."""
+        try:
+            # Use safe XML parsing method
+            root = self._safe_parse_xml(content)
+
+            # Extract calculation links
+            calculation_links = root.findall('.//{http://www.xbrl.org/2003/linkbase}calculationLink')
+
+            for link in calculation_links:
+                role = link.get('{http://www.w3.org/1999/xlink}role')
+                if not role:
+                    continue
+
+                # Store role information
+                role_id = role.split('/')[-1] if '/' in role else role
+                role_def = role_id.replace('_', ' ')
+
+                self.calculation_roles[role] = {
+                    'roleUri': role,
+                    'definition': role_def,
+                    'roleId': role_id
+                }
+
+                # Extract arcs
+                arcs = link.findall('.//{http://www.xbrl.org/2003/linkbase}calculationArc')
+
+                # Create relationships list
+                relationships = []
+
+                for arc in arcs:
+                    from_ref = arc.get('{http://www.w3.org/1999/xlink}from')
+                    to_ref = arc.get('{http://www.w3.org/1999/xlink}to')
+                    order = self._parse_order_attribute(arc)
+                    weight = float(arc.get('weight', '1.0'))
+
+                    if not from_ref or not to_ref:
+                        continue
+
+                    # Find locators for from/to references
+                    from_loc = link.find(f'.//*[@{{{NAMESPACES["xlink"]}}}label="{from_ref}"]')
+                    to_loc = link.find(f'.//*[@{{{NAMESPACES["xlink"]}}}label="{to_ref}"]')
+
+                    if from_loc is None or to_loc is None:
+                        continue
+
+                    from_href = from_loc.get('{http://www.w3.org/1999/xlink}href')
+                    to_href = to_loc.get('{http://www.w3.org/1999/xlink}href')
+
+                    if not from_href or not to_href:
+                        continue
+
+                    # Extract element IDs
+                    from_element = extract_element_id(from_href)
+                    to_element = extract_element_id(to_href)
+
+                    # Add relationship
+                    relationships.append({
+                        'from_element': from_element,
+                        'to_element': to_element,
+                        'order': order,
+                        'weight': weight
+                    })
+
+                # Build calculation tree for this role
+                if relationships:
+                    self._build_calculation_tree(role, relationships)
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing calculation content: {str(e)}") from e
+
+    def _build_calculation_tree(self, role: str, relationships: List[Dict[str, Any]]) -> None:
+        """
+        Build a calculation tree from relationships.
+
+        Args:
+            role: Extended link role URI
+            relationships: List of relationships (from_element, to_element, order, weight)
+        """
+        # Group relationships by source element
+        from_map = {}
+        to_map = {}
+
+        for rel in relationships:
+            from_element = rel['from_element']
+            to_element = rel['to_element']
+
+            if from_element not in from_map:
+                from_map[from_element] = []
+            from_map[from_element].append(rel)
+
+            if to_element not in to_map:
+                to_map[to_element] = []
+            to_map[to_element].append(rel)
+
+        # Find root elements (appear as 'from' but not as 'to')
+        root_elements = set(from_map.keys()) - set(to_map.keys())
+
+        if not root_elements:
+            return  # No root elements found
+
+        # Create calculation tree
+        tree = CalculationTree(
+            role_uri=role,
+            definition=self.calculation_roles[role]['definition'],
+            root_element_id=next(iter(root_elements)),
+            all_nodes={}
+        )
+
+        # Build tree recursively
+        for root_id in root_elements:
+            self._build_calculation_subtree(root_id, None, from_map, tree.all_nodes)
+
+        # Add tree to collection
+        self.calculation_trees[role] = tree
+
+    def _build_calculation_subtree(self, element_id: str, parent_id: Optional[str],
+                               from_map: Dict[str, List[Dict[str, Any]]],
+                               all_nodes: Dict[str, CalculationNode]) -> None:
+        """
+        Recursively build a calculation subtree.
+
+        Args:
+            element_id: Current element ID
+            parent_id: Parent element ID
+            from_map: Map of relationships by source element
+            all_nodes: Dictionary to store all nodes
+        """
+        # Create node
+        node = CalculationNode(
+            element_id=element_id,
+            parent=parent_id,
+            children=[]
+        )
+
+        # Add element information if available
+        elem_info = None
+        if element_id in self.element_catalog:
+            elem_info = self.element_catalog[element_id]
+        else:
+            # Try alternative element ID formats (colon vs underscore)
+            alt_element_id = element_id.replace(':', '_') if ':' in element_id else element_id.replace('_', ':')
+            if alt_element_id in self.element_catalog:
+                elem_info = self.element_catalog[alt_element_id]
+
+        if elem_info:
+            node.balance_type = elem_info.balance
+            node.period_type = elem_info.period_type
+
+        # Add to collection
+        all_nodes[element_id] = node
+
+        # Process children
+        if element_id in from_map:
+            # Sort children by order
+            children = sorted(from_map[element_id], key=lambda r: r['order'])
+
+            for rel in children:
+                child_id = rel['to_element']
+
+                # Add child to parent's children list
+                node.children.append(child_id)
+
+                # Set weight
+                weight = rel['weight']
+
+                # Recursively build child subtree
+                self._build_calculation_subtree(
+                    child_id, element_id, from_map, all_nodes
+                )
+
+                # Update weight and order after child is built
+                if child_id in all_nodes:
+                    all_nodes[child_id].weight = weight
+                    all_nodes[child_id].order = rel['order']
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/concepts.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/concepts.py
@@ -0,0 +1,382 @@
+"""
+Shared XBRL concept definitions for balance types and deprecated normalization lists.
+
+This module contains balance type mappings for common US-GAAP concepts to support
+the balance column in DataFrame exports without parsing full taxonomy schemas.
+
+DEPRECATED: Static normalization concept lists (CONSISTENT_POSITIVE_CONCEPTS,
+LEGITIMATE_NEGATIVE_CONCEPTS) are kept for historical reference but no longer used.
+Testing confirmed that SEC XBRL instance data is already consistent across companies.
+See Issue #463 analysis for details.
+"""
+
+# =============================================================================
+# DEPRECATED CONCEPT LISTS (No longer used as of Issue #463)
+# =============================================================================
+# These lists were created to work around perceived inconsistencies in XBRL data.
+# Testing revealed that raw SEC instance data is ALREADY consistent across companies.
+#
+# Historical context:
+# - Issues #290, #334, #451 reported negative values for expenses
+# - Root cause: EdgarTools was misusing calculation weights for display logic
+# - These lists fixed symptoms but not the actual problem
+# - Issue #463 removed calculation weight application during parsing
+# - Result: Raw values preserved as-is (matching SEC CompanyFacts API)
+#
+# Kept for historical reference and potential future use cases.
+# =============================================================================
+
+CONSISTENT_POSITIVE_CONCEPTS = {
+    # Research and Development Expenses
+    'us-gaap_ResearchAndDevelopmentExpense',
+    'us_gaap_ResearchAndDevelopmentExpense',
+    'ResearchAndDevelopmentExpense',
+
+    # Selling, General & Administrative Expenses
+    'us-gaap_SellingGeneralAndAdministrativeExpense',
+    'us_gaap_SellingGeneralAndAdministrativeExpense',
+    'SellingGeneralAndAdministrativeExpense',
+
+    # General and Administrative Expenses (separate from SG&A)
+    'us-gaap_GeneralAndAdministrativeExpense',
+    'us_gaap_GeneralAndAdministrativeExpense',
+    'GeneralAndAdministrativeExpense',
+
+    # Selling Expenses
+    'us-gaap_SellingExpense',
+    'us_gaap_SellingExpense',
+    'SellingExpense',
+
+    # Marketing and Advertising Expenses
+    'us-gaap_SellingAndMarketingExpense',
+    'us_gaap_SellingAndMarketingExpense',
+    'SellingAndMarketingExpense',
+    'us-gaap_MarketingExpense',
+    'us_gaap_MarketingExpense',
+    'MarketingExpense',
+    'us-gaap_AdvertisingExpense',
+    'us_gaap_AdvertisingExpense',
+    'AdvertisingExpense',
+
+    # Share-based Compensation Expenses
+    'us-gaap_AllocatedShareBasedCompensationExpense',
+    'us_gaap_AllocatedShareBasedCompensationExpense',
+    'AllocatedShareBasedCompensationExpense',
+    'us-gaap_ShareBasedCompensationArrangementByShareBasedPaymentAwardExpenseRecognized',
+    'us_gaap_ShareBasedCompensationArrangementByShareBasedPaymentAwardExpenseRecognized',
+    'ShareBasedCompensationArrangementByShareBasedPaymentAwardExpenseRecognized',
+
+    # Operating Expenses (general)
+    'us-gaap_OperatingExpenses',
+    'us_gaap_OperatingExpenses',
+    'OperatingExpenses',
+
+    # Professional Services Expenses
+    'us-gaap_ProfessionalServiceFees',
+    'us_gaap_ProfessionalServiceFees',
+    'ProfessionalServiceFees',
+
+    # Compensation and Benefits
+    'us-gaap_LaborAndRelatedExpense',
+    'us_gaap_LaborAndRelatedExpense',
+    'LaborAndRelatedExpense',
+    'us-gaap_EmployeeBenefitsExpense',
+    'us_gaap_EmployeeBenefitsExpense',
+    'EmployeeBenefitsExpense',
+
+    # Cost of Revenue and Cost of Goods/Services Sold (Issue #290, #451)
+    'us-gaap_CostOfRevenue',
+    'us_gaap_CostOfRevenue',
+    'CostOfRevenue',
+    'us-gaap_CostOfGoodsAndServicesSold',
+    'us_gaap_CostOfGoodsAndServicesSold',
+    'CostOfGoodsAndServicesSold',
+    'us-gaap_CostOfGoodsSold',
+    'us_gaap_CostOfGoodsSold',
+    'CostOfGoodsSold',
+    'us-gaap_CostOfServices',
+    'us_gaap_CostOfServices',
+    'CostOfServices',
+
+    # Income Tax Expense (Issue #451)
+    'us-gaap_IncomeTaxExpenseBenefit',
+    'us_gaap_IncomeTaxExpenseBenefit',
+    'IncomeTaxExpenseBenefit',
+    'us-gaap_IncomeTaxRecoveryExpense',
+    'us_gaap_IncomeTaxRecoveryExpense',
+    'IncomeTaxRecoveryExpense',
+
+    # Cash Flow Statement - Financing Activities (cash outflows)
+    # These represent uses of cash that should always be positive
+    'us-gaap_PaymentsForRepurchaseOfCommonStock',
+    'us_gaap_PaymentsForRepurchaseOfCommonStock',
+    'PaymentsForRepurchaseOfCommonStock',
+    'us-gaap_PaymentsOfDividends',
+    'us_gaap_PaymentsOfDividends',
+    'PaymentsOfDividends',
+    'us-gaap_PaymentsOfDividendsCommonStock',
+    'us_gaap_PaymentsOfDividendsCommonStock',
+    'PaymentsOfDividendsCommonStock',
+    'us-gaap_PaymentsOfDividendsPreferredStockAndPreferenceStock',
+    'us_gaap_PaymentsOfDividendsPreferredStockAndPreferenceStock',
+    'PaymentsOfDividendsPreferredStockAndPreferenceStock'
+}
+
+# DEPRECATED: Concepts that can legitimately be negative
+# This list is no longer used but kept for historical reference.
+LEGITIMATE_NEGATIVE_CONCEPTS = {
+    # Interest expense/income that can be net negative
+    'us-gaap_InterestIncomeExpenseNet',
+    'us_gaap_InterestIncomeExpenseNet',
+    'InterestIncomeExpenseNet',
+
+    # Foreign exchange gains/losses
+    'us-gaap_ForeignCurrencyTransactionGainLossBeforeTax',
+    'us_gaap_ForeignCurrencyTransactionGainLossBeforeTax',
+    'ForeignCurrencyTransactionGainLossBeforeTax',
+
+    # Restructuring reversals/credits
+    'us-gaap_RestructuringChargesAndReversals',
+    'us_gaap_RestructuringChargesAndReversals',
+    'RestructuringChargesAndReversals'
+}
+
+# US-GAAP Balance Type Mappings (Issue #463)
+#
+# This mapping provides balance types for common US-GAAP concepts to support
+# the balance column in DataFrame exports without requiring full taxonomy parsing.
+#
+# Balance types:
+#   - "debit": Assets, Expenses (increase with debits, decrease with credits)
+#   - "credit": Liabilities, Equity, Revenue (increase with credits, decrease with debits)
+#
+# TODO: Eventually replace with full US-GAAP taxonomy parser that follows schema imports
+#
+US_GAAP_BALANCE_TYPES = {
+    # ============================================================================
+    # ASSETS (Balance: debit)
+    # ============================================================================
+
+    # Current Assets
+    'us-gaap:Cash': 'debit',
+    'Cash': 'debit',  # Short form
+    'us-gaap:CashAndCashEquivalentsAtCarryingValue': 'debit',
+    'CashAndCashEquivalentsAtCarryingValue': 'debit',  # Short form
+    'us-gaap:CashEquivalentsAtCarryingValue': 'debit',
+    'us-gaap:RestrictedCashAndCashEquivalents': 'debit',
+    'us-gaap:MarketableSecurities': 'debit',
+    'us-gaap:AvailableForSaleSecuritiesDebtSecurities': 'debit',
+    'us-gaap:ShortTermInvestments': 'debit',
+    'us-gaap:AccountsReceivableNetCurrent': 'debit',
+    'us-gaap:AccountsReceivableGrossCurrent': 'debit',
+    'us-gaap:Inventory': 'debit',
+    'us-gaap:InventoryNet': 'debit',
+    'us-gaap:PrepaidExpenseAndOtherAssetsCurrent': 'debit',
+    'us-gaap:DeferredTaxAssetsNetCurrent': 'debit',
+    'us-gaap:OtherAssetsCurrent': 'debit',
+    'us-gaap:AssetsCurrent': 'debit',
+
+    # Non-Current Assets
+    'us-gaap:PropertyPlantAndEquipmentNet': 'debit',
+    'us-gaap:PropertyPlantAndEquipmentGross': 'debit',
+    'us-gaap:Land': 'debit',
+    'us-gaap:BuildingsAndImprovementsGross': 'debit',
+    'us-gaap:MachineryAndEquipmentGross': 'debit',
+    'us-gaap:Goodwill': 'debit',
+    'us-gaap:IntangibleAssetsNetExcludingGoodwill': 'debit',
+    'us-gaap:IntangibleAssetsGrossExcludingGoodwill': 'debit',
+    'us-gaap:LongTermInvestments': 'debit',
+    'us-gaap:DeferredTaxAssetsNetNoncurrent': 'debit',
+    'us-gaap:OtherAssetsNoncurrent': 'debit',
+    'us-gaap:AssetsNoncurrent': 'debit',
+    'us-gaap:Assets': 'debit',
+    'Assets': 'debit',  # Short form
+
+    # ============================================================================
+    # LIABILITIES (Balance: credit)
+    # ============================================================================
+
+    # Current Liabilities
+    'us-gaap:AccountsPayableCurrent': 'credit',
+    'us-gaap:AccruedLiabilitiesCurrent': 'credit',
+    'us-gaap:DeferredRevenueCurrent': 'credit',
+    'us-gaap:ContractWithCustomerLiabilityCurrent': 'credit',
+    'us-gaap:ShortTermBorrowings': 'credit',
+    'us-gaap:LongTermDebtCurrent': 'credit',
+    'us-gaap:CommercialPaper': 'credit',
+    'us-gaap:AccruedIncomeTaxesCurrent': 'credit',
+    'us-gaap:DividendsPayableCurrent': 'credit',
+    'us-gaap:OtherLiabilitiesCurrent': 'credit',
+    'us-gaap:LiabilitiesCurrent': 'credit',
+
+    # Non-Current Liabilities
+    'us-gaap:LongTermDebtNoncurrent': 'credit',
+    'us-gaap:LongTermDebtAndCapitalLeaseObligations': 'credit',
+    'us-gaap:DeferredRevenueNoncurrent': 'credit',
+    'us-gaap:DeferredTaxLiabilitiesNoncurrent': 'credit',
+    'us-gaap:PensionAndOtherPostretirementDefinedBenefitPlansLiabilitiesNoncurrent': 'credit',
+    'us-gaap:OtherLiabilitiesNoncurrent': 'credit',
+    'us-gaap:LiabilitiesNoncurrent': 'credit',
+    'us-gaap:Liabilities': 'credit',
+
+    # ============================================================================
+    # EQUITY (Balance: credit)
+    # ============================================================================
+
+    'us-gaap:CommonStockValue': 'credit',
+    'us-gaap:CommonStockSharesIssued': 'credit',
+    'us-gaap:CommonStockSharesOutstanding': 'credit',
+    'us-gaap:PreferredStockValue': 'credit',
+    'us-gaap:AdditionalPaidInCapital': 'credit',
+    'us-gaap:AdditionalPaidInCapitalCommonStock': 'credit',
+    'us-gaap:RetainedEarningsAccumulatedDeficit': 'credit',
+    'us-gaap:TreasuryStockValue': 'debit',  # Contra-equity (debit balance)
+    'us-gaap:AccumulatedOtherComprehensiveIncomeLossNetOfTax': 'credit',
+    'us-gaap:StockholdersEquity': 'credit',
+    'us-gaap:StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest': 'credit',
+    'us-gaap:LiabilitiesAndStockholdersEquity': 'credit',
+
+    # ============================================================================
+    # REVENUE (Balance: credit)
+    # ============================================================================
+
+    'us-gaap:Revenues': 'credit',
+    'Revenues': 'credit',  # Short form
+    'Revenue': 'credit',  # Short form (singular)
+    'us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax': 'credit',
+    'RevenueFromContractWithCustomerExcludingAssessedTax': 'credit',  # Short form
+    'us-gaap:RevenueFromContractWithCustomerIncludingAssessedTax': 'credit',
+    'RevenueFromContractWithCustomerIncludingAssessedTax': 'credit',  # Short form
+    'us-gaap:SalesRevenueNet': 'credit',
+    'us-gaap:SalesRevenueGoodsNet': 'credit',
+    'us-gaap:SalesRevenueServicesNet': 'credit',
+    'us-gaap:InterestAndDividendIncomeOperating': 'credit',
+    'us-gaap:InterestIncomeOther': 'credit',
+    'us-gaap:InvestmentIncomeInterest': 'credit',
+    'us-gaap:GainLossOnSaleOfPropertyPlantEquipment': 'credit',
+    'us-gaap:GainLossOnInvestments': 'credit',
+    'us-gaap:OtherNonoperatingIncomeExpense': 'credit',
+
+    # ============================================================================
+    # EXPENSES & COSTS (Balance: debit)
+    # ============================================================================
+
+    # Cost of Revenue
+    'us-gaap:CostOfRevenue': 'debit',
+    'us-gaap:CostOfGoodsAndServicesSold': 'debit',
+    'us-gaap:CostOfGoodsSold': 'debit',
+    'us-gaap:CostOfServices': 'debit',
+
+    # Operating Expenses
+    'us-gaap:ResearchAndDevelopmentExpense': 'debit',
+    'us-gaap:SellingGeneralAndAdministrativeExpense': 'debit',
+    'us-gaap:GeneralAndAdministrativeExpense': 'debit',
+    'us-gaap:SellingExpense': 'debit',
+    'us-gaap:SellingAndMarketingExpense': 'debit',
+    'us-gaap:MarketingExpense': 'debit',
+    'us-gaap:AdvertisingExpense': 'debit',
+    'us-gaap:DepreciationDepletionAndAmortization': 'debit',
+    'us-gaap:Depreciation': 'debit',
+    'us-gaap:AmortizationOfIntangibleAssets': 'debit',
+    'us-gaap:RestructuringCharges': 'debit',
+    'us-gaap:AssetImpairmentCharges': 'debit',
+    'us-gaap:ShareBasedCompensation': 'debit',
+
+    # Other Expenses
+    'us-gaap:InterestExpense': 'debit',
+    'us-gaap:InterestExpenseDebt': 'debit',
+    'us-gaap:IncomeTaxExpenseBenefit': 'debit',
+    'us-gaap:ProvisionForDoubtfulAccounts': 'debit',
+
+    # ============================================================================
+    # INCOME & TOTALS (Balance: credit)
+    # ============================================================================
+
+    'us-gaap:GrossProfit': 'credit',
+    'us-gaap:OperatingIncomeLoss': 'credit',
+    'us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest': 'credit',
+    'us-gaap:IncomeLossFromContinuingOperations': 'credit',
+    'us-gaap:NetIncomeLoss': 'credit',
+    'us-gaap:NetIncomeLossAvailableToCommonStockholdersBasic': 'credit',
+    'us-gaap:NetIncomeLossAvailableToCommonStockholdersDiluted': 'credit',
+    'us-gaap:ComprehensiveIncomeNetOfTax': 'credit',
+
+    # ============================================================================
+    # CASH FLOW STATEMENT
+    # ============================================================================
+
+    # Operating Activities
+    'us-gaap:NetCashProvidedByUsedInOperatingActivities': 'debit',
+    'us-gaap:DepreciationAndAmortization': 'debit',
+    'us-gaap:ShareBasedCompensationArrangementByShareBasedPaymentAwardExpenseRecognized': 'debit',
+    'us-gaap:DeferredIncomeTaxExpenseBenefit': 'debit',
+
+    # Investing Activities
+    'us-gaap:NetCashProvidedByUsedInInvestingActivities': 'debit',
+    'us-gaap:PaymentsToAcquirePropertyPlantAndEquipment': 'credit',  # Cash outflow
+    'us-gaap:PaymentsToAcquireBusinessesNetOfCashAcquired': 'credit',  # Cash outflow
+    'us-gaap:PaymentsToAcquireMarketableSecurities': 'credit',  # Cash outflow
+    'us-gaap:ProceedsFromSaleOfPropertyPlantAndEquipment': 'debit',  # Cash inflow
+    'us-gaap:ProceedsFromSaleOfAvailableForSaleSecuritiesDebt': 'debit',  # Cash inflow
+
+    # Financing Activities
+    'us-gaap:NetCashProvidedByUsedInFinancingActivities': 'debit',
+    'us-gaap:ProceedsFromIssuanceOfCommonStock': 'debit',  # Cash inflow
+    'us-gaap:ProceedsFromIssuanceOfLongTermDebt': 'debit',  # Cash inflow
+    'us-gaap:RepaymentsOfLongTermDebt': 'credit',  # Cash outflow
+    'us-gaap:PaymentsOfDividends': 'credit',  # Cash outflow
+    'us-gaap:PaymentsOfDividendsCommonStock': 'credit',  # Cash outflow
+    'us-gaap:PaymentsForRepurchaseOfCommonStock': 'credit',  # Cash outflow
+}
+
+
+def get_balance_type(concept: str) -> str:
+    """
+    Get the balance type for a concept.
+
+    Looks up the balance type from the static US-GAAP mapping, handling
+    both colon and underscore namespace separators.
+
+    Args:
+        concept: The concept name (e.g., 'us-gaap:Revenue' or 'us-gaap_Revenue' or 'us_gaap_Revenue')
+
+    Returns:
+        Balance type ('debit', 'credit', or None if not found)
+
+    Example:
+        >>> get_balance_type('us-gaap:Cash')
+        'debit'
+        >>> get_balance_type('us-gaap_Revenue')
+        'credit'
+        >>> get_balance_type('us_gaap_Revenue')
+        'credit'
+        >>> get_balance_type('UnknownConcept')
+        None
+    """
+    # Try direct lookup first (standard form)
+    if concept in US_GAAP_BALANCE_TYPES:
+        return US_GAAP_BALANCE_TYPES[concept]
+
+    # Normalize to standard form: us-gaap:LocalName
+    # Handle common namespace prefix variations
+    normalized = concept
+
+    # Replace known namespace patterns
+    # us_gaap_Cash -> us-gaap:Cash
+    # us-gaap_Cash -> us-gaap:Cash
+    if 'us_gaap' in normalized:
+        normalized = normalized.replace('us_gaap_', 'us-gaap:')
+        normalized = normalized.replace('us_gaap:', 'us-gaap:')
+    elif 'us-gaap' in normalized:
+        normalized = normalized.replace('us-gaap_', 'us-gaap:')
+
+    # Try normalized form
+    if normalized in US_GAAP_BALANCE_TYPES:
+        return US_GAAP_BALANCE_TYPES[normalized]
+
+    # Try converting all underscores to colons (simple fallback)
+    concept_all_colons = concept.replace('_', ':')
+    if concept_all_colons in US_GAAP_BALANCE_TYPES:
+        return US_GAAP_BALANCE_TYPES[concept_all_colons]
+
+    return None
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/coordinator.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/coordinator.py
@@ -0,0 +1,291 @@
+"""
+XBRL Parser Coordinator.
+
+This module provides the main XBRLParser class that coordinates parsing
+workflow across all specialized parser components while maintaining
+API compatibility with the original monolithic parser.
+"""
+
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+from edgar.core import log
+from edgar.xbrl.models import (
+    Axis,
+    CalculationTree,
+    Context,
+    Domain,
+    ElementCatalog,
+    Fact,
+    PresentationTree,
+    Table,
+    XBRLProcessingError,
+)
+
+from .calculation import CalculationParser
+from .definition import DefinitionParser
+from .instance import InstanceParser
+from .labels import LabelsParser
+from .presentation import PresentationParser
+from .schema import SchemaParser
+
+
+class XBRLParser:
+    """
+    Coordinated XBRL parser that delegates to specialized component parsers.
+
+    This class maintains full API compatibility with the original monolithic
+    XBRLParser while providing improved maintainability through component separation.
+    """
+
+    def __init__(self):
+        """Initialize the coordinated XBRL parser with all data structures."""
+        # Core data structures
+        self.element_catalog: Dict[str, ElementCatalog] = {}
+        self.contexts: Dict[str, Context] = {}
+        self.facts: Dict[str, Fact] = {}
+        self.units: Dict[str, Any] = {}
+        self.footnotes: Dict[str, Any] = {}
+
+        # Presentation structures
+        self.presentation_roles: Dict[str, Dict[str, Any]] = {}
+        self.presentation_trees: Dict[str, PresentationTree] = {}
+
+        # Calculation structures
+        self.calculation_roles: Dict[str, Dict[str, Any]] = {}
+        self.calculation_trees: Dict[str, CalculationTree] = {}
+
+        # Definition (dimensional) structures
+        self.definition_roles: Dict[str, Dict[str, Any]] = {}
+        self.tables: Dict[str, List[Table]] = {}
+        self.axes: Dict[str, Axis] = {}
+        self.domains: Dict[str, Domain] = {}
+
+        # Entity information
+        self.entity_info: Dict[str, Any] = {}
+        self.dei_facts: Dict[str, Fact] = {}
+
+        # Reporting periods
+        self.reporting_periods: List[Dict[str, Any]] = []
+
+        # Mapping of context IDs to period identifiers for easy lookup
+        self.context_period_map: Dict[str, str] = {}
+
+        # Initialize component parsers
+        self._init_parsers()
+
+    def _init_parsers(self):
+        """Initialize all component parsers with shared data structures."""
+        # Create component parsers with references to shared data structures
+        self.schema_parser = SchemaParser(
+            element_catalog=self.element_catalog
+        )
+
+        self.labels_parser = LabelsParser(
+            element_catalog=self.element_catalog
+        )
+
+        self.presentation_parser = PresentationParser(
+            presentation_roles=self.presentation_roles,
+            presentation_trees=self.presentation_trees,
+            element_catalog=self.element_catalog
+        )
+
+        self.calculation_parser = CalculationParser(
+            calculation_roles=self.calculation_roles,
+            calculation_trees=self.calculation_trees,
+            element_catalog=self.element_catalog,
+            facts=self.facts
+        )
+
+        self.definition_parser = DefinitionParser(
+            definition_roles=self.definition_roles,
+            tables=self.tables,
+            axes=self.axes,
+            domains=self.domains,
+            element_catalog=self.element_catalog
+        )
+
+        self.instance_parser = InstanceParser(
+            contexts=self.contexts,
+            facts=self.facts,
+            units=self.units,
+            footnotes=self.footnotes,
+            calculation_trees=self.calculation_trees,
+            entity_info=self.entity_info,
+            reporting_periods=self.reporting_periods,
+            context_period_map=self.context_period_map
+        )
+
+        # Set up cross-references for embedded linkbase processing
+        self.schema_parser.set_linkbase_parsers(
+            labels_parser=self.labels_parser,
+            presentation_parser=self.presentation_parser,
+            calculation_parser=self.calculation_parser,
+            definition_parser=self.definition_parser
+        )
+
+    def _create_normalized_fact_key(self, element_id: str, context_ref: str, instance_id: Optional[int] = None) -> str:
+        """
+        Create a normalized fact key using underscore format.
+
+        Args:
+            element_id: The element ID
+            context_ref: The context reference
+            instance_id: Optional instance ID for duplicate facts
+
+        Returns:
+            Normalized key in format: element_id_context_ref[_instance_id]
+        """
+        return self.instance_parser._create_normalized_fact_key(element_id, context_ref, instance_id)
+
+    def get_facts_by_key(self, element_id: str, context_ref: str) -> List[Fact]:
+        """Get all facts matching the given element ID and context reference.
+
+        This method handles both single facts and duplicate facts using the hybrid storage approach.
+        For single facts, it returns a list with one fact. For duplicates, it returns all instances.
+
+        Args:
+            element_id: The element ID to look up
+            context_ref: The context reference
+
+        Returns:
+            List of matching facts
+        """
+        # Create base key for lookup
+        base_key = self._create_normalized_fact_key(element_id, context_ref)
+
+        # Check if single fact exists
+        if base_key in self.facts:
+            return [self.facts[base_key]]
+
+        # Check for duplicate facts (with instance IDs)
+        matching_facts = []
+        instance_id = 0
+        while True:
+            instance_key = self._create_normalized_fact_key(element_id, context_ref, instance_id)
+            if instance_key in self.facts:
+                matching_facts.append(self.facts[instance_key])
+                instance_id += 1
+            else:
+                break
+
+        return matching_facts
+
+    def get_fact(self, element_id: str, context_ref: str) -> Optional[Fact]:
+        """Get a single fact by element ID and context reference.
+
+        Returns the first fact if multiple instances exist.
+
+        Args:
+            element_id: The element ID to look up
+            context_ref: The context reference
+
+        Returns:
+            The fact if found, None otherwise
+        """
+        facts = self.get_facts_by_key(element_id, context_ref)
+        return facts[0] if facts else None
+
+    def parse_directory(self, directory_path: Union[str, Path]) -> None:
+        """
+        Parse all XBRL files in a directory.
+
+        Args:
+            directory_path: Path to directory containing XBRL files
+        """
+        try:
+            directory = Path(directory_path)
+            if not directory.is_dir():
+                raise XBRLProcessingError(f"Directory not found: {directory_path}")
+
+            log.debug(f"Parsing XBRL directory: {directory}")
+
+            # Parse schema files first to build element catalog
+            schema_files = list(directory.glob('*.xsd'))
+            for schema_file in schema_files:
+                log.debug(f"Parsing schema: {schema_file}")
+                self.schema_parser.parse_schema(schema_file)
+
+            # Parse linkbase files
+            linkbase_patterns = [
+                ('*_lab.xml', self.labels_parser.parse_labels),
+                ('*_pre.xml', self.presentation_parser.parse_presentation),
+                ('*_cal.xml', self.calculation_parser.parse_calculation),
+                ('*_def.xml', self.definition_parser.parse_definition),
+            ]
+
+            for pattern, parser_method in linkbase_patterns:
+                linkbase_files = list(directory.glob(pattern))
+                for linkbase_file in linkbase_files:
+                    log.debug(f"Parsing linkbase: {linkbase_file}")
+                    parser_method(linkbase_file)
+
+            # Parse instance files last (they depend on schemas and linkbases)
+            instance_files = list(directory.glob('*.xml'))
+            # Filter out linkbase files
+            instance_files = [f for f in instance_files if not any(
+                f.name.endswith(suffix) for suffix in ['_lab.xml', '_pre.xml', '_cal.xml', '_def.xml']
+            )]
+
+            for instance_file in instance_files:
+                log.debug(f"Parsing instance: {instance_file}")
+                self.instance_parser.parse_instance(instance_file)
+
+            log.info(f"Successfully parsed XBRL directory with {len(self.facts)} facts")
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing directory {directory_path}: {str(e)}") from e
+
+    # Delegate methods to component parsers for API compatibility
+    def parse_schema(self, file_path: Union[str, Path]) -> None:
+        """Parse schema file and extract element information."""
+        return self.schema_parser.parse_schema(file_path)
+
+    def parse_schema_content(self, content: str) -> None:
+        """Parse schema content and extract element information."""
+        return self.schema_parser.parse_schema_content(content)
+
+    def parse_labels(self, file_path: Union[str, Path]) -> None:
+        """Parse label linkbase file and extract label information."""
+        return self.labels_parser.parse_labels(file_path)
+
+    def parse_labels_content(self, content: str) -> None:
+        """Parse label linkbase content and extract label information."""
+        return self.labels_parser.parse_labels_content(content)
+
+    def parse_presentation(self, file_path: Union[str, Path]) -> None:
+        """Parse presentation linkbase file and build presentation trees."""
+        return self.presentation_parser.parse_presentation(file_path)
+
+    def parse_presentation_content(self, content: str) -> None:
+        """Parse presentation linkbase content and build presentation trees."""
+        return self.presentation_parser.parse_presentation_content(content)
+
+    def parse_calculation(self, file_path: Union[str, Path]) -> None:
+        """Parse calculation linkbase file and build calculation trees."""
+        return self.calculation_parser.parse_calculation(file_path)
+
+    def parse_calculation_content(self, content: str) -> None:
+        """Parse calculation linkbase content and build calculation trees."""
+        return self.calculation_parser.parse_calculation_content(content)
+
+    def parse_definition(self, file_path: Union[str, Path]) -> None:
+        """Parse definition linkbase file and build dimensional structures."""
+        return self.definition_parser.parse_definition(file_path)
+
+    def parse_definition_content(self, content: str) -> None:
+        """Parse definition linkbase content and build dimensional structures."""
+        return self.definition_parser.parse_definition_content(content)
+
+    def parse_instance(self, file_path: Union[str, Path]) -> None:
+        """Parse instance document file and extract contexts, facts, and units."""
+        return self.instance_parser.parse_instance(file_path)
+
+    def parse_instance_content(self, content: str) -> None:
+        """Parse instance document content and extract contexts, facts, and units."""
+        return self.instance_parser.parse_instance_content(content)
+
+    def count_facts(self, content: str) -> tuple:
+        """Count the number of facts in the instance document."""
+        return self.instance_parser.count_facts(content)
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/definition.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/definition.py
@@ -0,0 +1,235 @@
+"""
+Definition parser for XBRL documents.
+
+This module handles parsing of XBRL definition linkbases and building
+dimensional structures like tables, axes, and domains.
+"""
+
+from pathlib import Path
+from typing import Any, Dict, List, Union
+
+from edgar.xbrl.core import NAMESPACES, STANDARD_LABEL, extract_element_id
+from edgar.xbrl.models import Axis, Domain, ElementCatalog, Table, XBRLProcessingError
+
+from .base import BaseParser
+
+
+class DefinitionParser(BaseParser):
+    """Parser for XBRL definition linkbases."""
+
+    def __init__(self, definition_roles: Dict[str, Dict[str, Any]],
+                 tables: Dict[str, List[Table]],
+                 axes: Dict[str, Axis],
+                 domains: Dict[str, Domain],
+                 element_catalog: Dict[str, ElementCatalog]):
+        """
+        Initialize definition parser with data structure references.
+
+        Args:
+            definition_roles: Reference to definition roles dictionary
+            tables: Reference to tables dictionary
+            axes: Reference to axes dictionary
+            domains: Reference to domains dictionary
+            element_catalog: Reference to element catalog dictionary
+        """
+        super().__init__()
+
+        # Store references to data structures
+        self.definition_roles = definition_roles
+        self.tables = tables
+        self.axes = axes
+        self.domains = domains
+        self.element_catalog = element_catalog
+
+    def parse_definition(self, file_path: Union[str, Path]) -> None:
+        """Parse definition linkbase file and build dimensional structures."""
+        try:
+            content = Path(file_path).read_text()
+            self.parse_definition_content(content)
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing definition file {file_path}: {str(e)}") from e
+
+    def parse_definition_content(self, content: str) -> None:
+        """Parse definition linkbase content and build dimensional structures."""
+        try:
+            root = self._safe_parse_xml(content)
+
+            # Extract definition links
+            definition_links = root.findall('.//{http://www.xbrl.org/2003/linkbase}definitionLink')
+
+            for link in definition_links:
+                role = link.get('{http://www.w3.org/1999/xlink}role')
+                if not role:
+                    continue
+
+                # Store role information
+                role_id = role.split('/')[-1] if '/' in role else role
+                role_def = role_id.replace('_', ' ')
+
+                self.definition_roles[role] = {
+                    'roleUri': role,
+                    'definition': role_def,
+                    'roleId': role_id
+                }
+
+                # Extract arcs
+                arcs = link.findall('.//{http://www.xbrl.org/2003/linkbase}definitionArc')
+
+                # Create relationships list
+                relationships = []
+
+                for arc in arcs:
+                    from_ref = arc.get('{http://www.w3.org/1999/xlink}from')
+                    to_ref = arc.get('{http://www.w3.org/1999/xlink}to')
+                    order = self._parse_order_attribute(arc)
+
+                    # Get the arcrole - this is important for identifying dimensional relationships
+                    arcrole = arc.get('{http://www.w3.org/1999/xlink}arcrole')
+                    if not from_ref or not to_ref or not arcrole:
+                        continue
+
+                    # Find locators for from/to references
+                    from_loc = link.find(f'.//*[@{{{NAMESPACES["xlink"]}}}label="{from_ref}"]')
+                    to_loc = link.find(f'.//*[@{{{NAMESPACES["xlink"]}}}label="{to_ref}"]')
+
+                    if from_loc is None or to_loc is None:
+                        continue
+
+                    from_href = from_loc.get('{http://www.w3.org/1999/xlink}href')
+                    to_href = to_loc.get('{http://www.w3.org/1999/xlink}href')
+
+                    if not from_href or not to_href:
+                        continue
+
+                    # Extract element IDs
+                    from_element = extract_element_id(from_href)
+                    to_element = extract_element_id(to_href)
+
+                    # Add relationship with arcrole
+                    relationships.append({
+                        'from_element': from_element,
+                        'to_element': to_element,
+                        'order': order,
+                        'arcrole': arcrole
+                    })
+
+                # Process dimensional structures from relationships
+                self._process_dimensional_relationships(role, relationships)
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing definition content: {str(e)}") from e
+
+    def _process_dimensional_relationships(self, role: str, relationships: List[Dict[str, Any]]) -> None:
+        """
+        Process dimensional relationships to build tables, axes, and domains.
+
+        Args:
+            role: Extended link role URI
+            relationships: List of dimensional relationships
+        """
+        # XBRL Dimensions arcrole URIs
+        HYPERCUBE_DIMENSION = "http://xbrl.org/int/dim/arcrole/hypercube-dimension"
+        DIMENSION_DOMAIN = "http://xbrl.org/int/dim/arcrole/dimension-domain"
+        DOMAIN_MEMBER = "http://xbrl.org/int/dim/arcrole/domain-member"
+        ALL = "http://xbrl.org/int/dim/arcrole/all"
+
+        # Group relationships by arcrole
+        grouped_rels = {}
+        for rel in relationships:
+            arcrole = rel['arcrole']
+            if arcrole not in grouped_rels:
+                grouped_rels[arcrole] = []
+            grouped_rels[arcrole].append(rel)
+
+        # Process hypercube-dimension relationships to identify tables and axes
+        hypercube_axes = {}  # Map of hypercubes to their axes
+        if HYPERCUBE_DIMENSION in grouped_rels:
+            for rel in grouped_rels[HYPERCUBE_DIMENSION]:
+                table_id = rel['from_element']
+                axis_id = rel['to_element']
+
+                if table_id not in hypercube_axes:
+                    hypercube_axes[table_id] = []
+
+                hypercube_axes[table_id].append(axis_id)
+
+                # Create or update axis
+                if axis_id not in self.axes:
+                    self.axes[axis_id] = Axis(
+                        element_id=axis_id,
+                        label=self._get_element_label(axis_id)
+                    )
+
+        # Process dimension-domain relationships to link axes to domains
+        if DIMENSION_DOMAIN in grouped_rels:
+            for rel in grouped_rels[DIMENSION_DOMAIN]:
+                axis_id = rel['from_element']
+                domain_id = rel['to_element']
+
+                # Link domain to axis
+                if axis_id in self.axes:
+                    self.axes[axis_id].domain_id = domain_id
+
+                # Create or update domain
+                if domain_id not in self.domains:
+                    self.domains[domain_id] = Domain(
+                        element_id=domain_id,
+                        label=self._get_element_label(domain_id)
+                    )
+
+        # Process domain-member relationships to build domain hierarchies
+        if DOMAIN_MEMBER in grouped_rels:
+            # Group by parent (domain) element
+            domain_members = {}
+            for rel in grouped_rels[DOMAIN_MEMBER]:
+                domain_id = rel['from_element']
+                member_id = rel['to_element']
+
+                if domain_id not in domain_members:
+                    domain_members[domain_id] = []
+
+                domain_members[domain_id].append(member_id)
+
+                # Also create the domain if it doesn't exist
+                if domain_id not in self.domains:
+                    self.domains[domain_id] = Domain(
+                        element_id=domain_id,
+                        label=self._get_element_label(domain_id)
+                    )
+
+            # Update domains with their members
+            for domain_id, members in domain_members.items():
+                if domain_id in self.domains:
+                    self.domains[domain_id].members = members
+
+        # Process 'all' relationships to identify line items and build hypercubes (tables)
+        if ALL in grouped_rels:
+            tables_by_role = []
+            for rel in grouped_rels[ALL]:
+                line_items_id = rel['to_element']
+                table_id = rel['from_element']
+
+                # Only process if this table has axes defined
+                if table_id in hypercube_axes:
+                    table = Table(
+                        element_id=table_id,
+                        label=self._get_element_label(table_id),
+                        role_uri=role,
+                        axes=hypercube_axes[table_id],
+                        line_items=[line_items_id],
+                        closed=False  # Default
+                    )
+                    tables_by_role.append(table)
+
+            # Add tables to collection
+            if tables_by_role:
+                self.tables[role] = tables_by_role
+
+    def _get_element_label(self, element_id: str) -> str:
+        """Get the label for an element, falling back to the element ID if not found."""
+        if element_id in self.element_catalog and self.element_catalog[element_id].labels:
+            # Use standard label if available
+            standard_label = self.element_catalog[element_id].labels.get(STANDARD_LABEL)
+            if standard_label:
+                return standard_label
+        return element_id  # Fallback to element ID
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/instance.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/instance.py
@@ -0,0 +1,768 @@
+"""
+Instance parser for XBRL documents.
+
+This module handles parsing of XBRL instance documents including facts, contexts,
+units, footnotes, and entity information extraction.
+"""
+
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Union
+
+from lxml import etree as ET
+
+from edgar.core import log
+from edgar.xbrl.core import NAMESPACES, classify_duration
+from edgar.xbrl.models import Context, Fact, XBRLProcessingError
+
+from .base import BaseParser
+
+
+class InstanceParser(BaseParser):
+    """Parser for XBRL instance documents."""
+
+    def __init__(self, contexts: Dict[str, Context], facts: Dict[str, Fact],
+                 units: Dict[str, Any], footnotes: Dict[str, Any],
+                 calculation_trees: Dict[str, Any], entity_info: Dict[str, Any],
+                 reporting_periods: List[Dict[str, Any]], context_period_map: Dict[str, str]):
+        """
+        Initialize instance parser with data structure references.
+
+        Args:
+            contexts: Reference to contexts dictionary
+            facts: Reference to facts dictionary
+            units: Reference to units dictionary
+            footnotes: Reference to footnotes dictionary
+            calculation_trees: Reference to calculation trees dictionary
+            entity_info: Reference to entity info dictionary
+            reporting_periods: Reference to reporting periods list
+            context_period_map: Reference to context period map
+        """
+        super().__init__()
+
+        # Store references to data structures
+        self.contexts = contexts
+        self.facts = facts
+        self.units = units
+        self.footnotes = footnotes
+        self.calculation_trees = calculation_trees
+        self.entity_info = entity_info
+        self.reporting_periods = reporting_periods
+        self.context_period_map = context_period_map
+
+        # DEI facts extracted during entity info processing
+        self.dei_facts: Dict[str, Fact] = {}
+
+    def _create_normalized_fact_key(self, element_id: str, context_ref: str, instance_id: int = None) -> str:
+        """
+        Create a normalized fact key using underscore format.
+
+        Args:
+            element_id: The element ID
+            context_ref: The context reference
+            instance_id: Optional instance ID for duplicate facts
+
+        Returns:
+            Normalized key in format: element_id_context_ref[_instance_id]
+        """
+        normalized_element_id = element_id
+        if ':' in element_id:
+            prefix, name = element_id.split(':', 1)
+            normalized_element_id = f"{prefix}_{name}"
+        if instance_id is not None:
+            return f"{normalized_element_id}_{context_ref}_{instance_id}"
+        return f"{normalized_element_id}_{context_ref}"
+
+    def parse_instance(self, file_path: Union[str, Path]) -> None:
+        """Parse instance document file and extract contexts, facts, and units."""
+        try:
+            content = Path(file_path).read_text()
+            self.parse_instance_content(content)
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing instance file {file_path}: {str(e)}") from e
+
+    def parse_instance_content(self, content: str) -> None:
+        """Parse instance document content and extract contexts, facts, and units."""
+        try:
+            # Use lxml's optimized parser with smart string handling and recovery mode
+            parser = ET.XMLParser(remove_blank_text=True, recover=True, huge_tree=True)
+
+            # Convert to bytes for faster parsing if not already
+            if isinstance(content, str):
+                content_bytes = content.encode('utf-8')
+            else:
+                content_bytes = content
+
+            # Parse content with optimized settings
+            root = ET.XML(content_bytes, parser)
+
+            # Extract data in optimal order (contexts first, then units, then facts)
+            # This ensures dependencies are resolved before they're needed
+            self._extract_contexts(root)
+            self._extract_units(root)
+            self._extract_facts(root)
+            self._extract_footnotes(root)
+
+            # Post-processing steps after all raw data is extracted
+            self._extract_entity_info()
+            self._build_reporting_periods()
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing instance content: {str(e)}") from e
+
+    def count_facts(self, content: str) -> tuple:
+        """Count the number of facts in the instance document
+        This function counts both unique facts and total fact instances in the XBRL document.
+
+        Returns:
+            tuple: (unique_facts_count, total_fact_instances)
+        """
+
+        # Use lxml's optimized parser with smart string handling and recovery mode
+        parser = ET.XMLParser(remove_blank_text=True, recover=True, huge_tree=True)
+
+        # Convert to bytes for faster parsing if not already
+        if isinstance(content, str):
+            content_bytes = content.encode('utf-8')
+        else:
+            content_bytes = content
+
+        # Parse content with optimized settings
+        root = ET.XML(content_bytes, parser)
+
+        # Fast path to identify non-fact elements to skip
+        skip_tag_endings = {'}context', '}unit', '}schemaRef'}
+
+        # Track both total instances and unique facts
+        total_fact_instances = 0  # Total number of fact references in the document
+        unique_facts = set()      # Set of unique element_id + context_ref combinations
+        create_key = self._create_normalized_fact_key
+
+        # Define counting function
+        def count_element(element):
+            """Process a single element as a potential fact."""
+            nonlocal total_fact_instances
+
+            # Skip known non-fact elements
+            tag = element.tag
+            for ending in skip_tag_endings:
+                if tag.endswith(ending):
+                    return
+
+            # Get context reference - key check to identify facts
+            context_ref = element.get('contextRef')
+            if context_ref is None:
+                return
+
+            # Extract element namespace and name - optimized split
+            if '}' in tag:
+                namespace, element_name = tag.split('}', 1)
+                namespace = namespace[1:]  # Faster than strip('{')
+            else:
+                element_name = tag
+                namespace = None
+
+            # Get namespace prefix - cached for performance
+            prefix = None
+            for std_prefix, std_uri_base in NAMESPACES.items():
+                if namespace.startswith(std_uri_base):
+                    prefix = std_prefix
+                    break
+
+            if not prefix and namespace:
+                # Try to extract prefix from the namespace
+                parts = namespace.split('/')
+                prefix = parts[-1] if parts else ''
+
+            # Construct element ID with optimized string concatenation
+            if prefix:
+                element_id = f"{prefix}:{element_name}" if prefix else element_name
+            else:
+                element_id = element_name
+
+            # Create a normalized key using underscore format for consistency
+            normalized_key = create_key(element_id, context_ref)
+
+            # Track unique facts
+            unique_facts.add(normalized_key)
+
+            # Increment total instances count
+            total_fact_instances += 1
+
+        # Optimize traversal using lxml's iterchildren and iterdescendants if available
+        if hasattr(root, 'iterchildren'):
+            # Use lxml's optimized traversal methods
+            for child in root.iterchildren():
+                count_element(child)
+                # Process nested elements with optimized iteration
+                for descendant in child.iterdescendants():
+                    count_element(descendant)
+        else:
+            # Fallback for ElementTree
+            for child in root:
+                count_element(child)
+                for descendant in child.findall('.//*'):
+                    count_element(descendant)
+
+        # Return tuple of counts (unique_facts_count, total_fact_instances)
+        return len(unique_facts), total_fact_instances
+
+    def _extract_contexts(self, root: ET.Element) -> None:
+        """Extract contexts from instance document."""
+        try:
+            # Find all context elements
+            for context_elem in root.findall('.//{http://www.xbrl.org/2003/instance}context'):
+                context_id = context_elem.get('id')
+                if not context_id:
+                    continue
+
+                # Create context object
+                context = Context(context_id=context_id)
+
+                # Extract entity information
+                entity_elem = context_elem.find('.//{http://www.xbrl.org/2003/instance}entity')
+                if entity_elem is not None:
+                    # Get identifier
+                    identifier_elem = entity_elem.find('.//{http://www.xbrl.org/2003/instance}identifier')
+                    if identifier_elem is not None:
+                        scheme = identifier_elem.get('scheme', '')
+                        identifier = identifier_elem.text
+                        context.entity = {
+                            'scheme': scheme,
+                            'identifier': identifier
+                        }
+
+                    # Get segment dimensions if present
+                    segment_elem = entity_elem.find('.//{http://www.xbrl.org/2003/instance}segment')
+                    if segment_elem is not None:
+                        # Extract explicit dimensions
+                        for dim_elem in segment_elem.findall('.//{http://xbrl.org/2006/xbrldi}explicitMember'):
+                            dimension = dim_elem.get('dimension')
+                            value = dim_elem.text
+                            if dimension and value:
+                                context.dimensions[dimension] = value
+
+                        # Extract typed dimensions
+                        for dim_elem in segment_elem.findall('.//{http://xbrl.org/2006/xbrldi}typedMember'):
+                            dimension = dim_elem.get('dimension')
+                            if dimension:
+                                # The typed dimension value is the text content of the first child element
+                                for child in dim_elem:
+                                    # Extract the text content, which contains the actual typed member value
+                                    if child.text and child.text.strip():
+                                        context.dimensions[dimension] = child.text.strip()
+                                    else:
+                                        # Fallback to tag if no text content
+                                        context.dimensions[dimension] = child.tag
+                                    break
+
+                # Extract period information
+                period_elem = context_elem.find('.//{http://www.xbrl.org/2003/instance}period')
+                if period_elem is not None:
+                    # Check for instant period
+                    instant_elem = period_elem.find('.//{http://www.xbrl.org/2003/instance}instant')
+                    if instant_elem is not None and instant_elem.text:
+                        context.period = {
+                            'type': 'instant',
+                            'instant': instant_elem.text
+                        }
+
+                    # Check for duration period
+                    start_elem = period_elem.find('.//{http://www.xbrl.org/2003/instance}startDate')
+                    end_elem = period_elem.find('.//{http://www.xbrl.org/2003/instance}endDate')
+                    if start_elem is not None and end_elem is not None and start_elem.text and end_elem.text:
+                        context.period = {
+                            'type': 'duration',
+                            'startDate': start_elem.text,
+                            'endDate': end_elem.text
+                        }
+
+                    # Check for forever period
+                    forever_elem = period_elem.find('.//{http://www.xbrl.org/2003/instance}forever')
+                    if forever_elem is not None:
+                        context.period = {
+                            'type': 'forever'
+                        }
+
+                # Add context to registry
+                self.contexts[context_id] = context
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error extracting contexts: {str(e)}") from e
+
+    def _extract_units(self, root: ET.Element) -> None:
+        """Extract units from instance document."""
+        try:
+            # Find all unit elements
+            for unit_elem in root.findall('.//{http://www.xbrl.org/2003/instance}unit'):
+                unit_id = unit_elem.get('id')
+                if not unit_id:
+                    continue
+
+                # Check for measure
+                measure_elem = unit_elem.find('.//{http://www.xbrl.org/2003/instance}measure')
+                if measure_elem is not None and measure_elem.text:
+                    self.units[unit_id] = {
+                        'type': 'simple',
+                        'measure': measure_elem.text
+                    }
+                    continue
+
+                # Check for divide
+                divide_elem = unit_elem.find('.//{http://www.xbrl.org/2003/instance}divide')
+                if divide_elem is not None:
+                    # Get numerator
+                    numerator_elem = divide_elem.find('.//{http://www.xbrl.org/2003/instance}unitNumerator')
+                    denominator_elem = divide_elem.find('.//{http://www.xbrl.org/2003/instance}unitDenominator')
+
+                    if numerator_elem is not None and denominator_elem is not None:
+                        # Get measures
+                        numerator_measures = [elem.text for elem in numerator_elem.findall('.//{http://www.xbrl.org/2003/instance}measure') if elem.text]
+                        denominator_measures = [elem.text for elem in denominator_elem.findall('.//{http://www.xbrl.org/2003/instance}measure') if elem.text]
+
+                        self.units[unit_id] = {
+                            'type': 'divide',
+                            'numerator': numerator_measures,
+                            'denominator': denominator_measures
+                        }
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error extracting units: {str(e)}") from e
+
+    def _extract_facts(self, root: ET.Element) -> None:
+        """Extract facts from instance document."""
+        try:
+            # Get direct access to nsmap if using lxml (much faster than regex extraction)
+            if hasattr(root, 'nsmap'):
+                # Leverage lxml's native nsmap functionality
+                prefix_map = {uri: prefix for prefix, uri in root.nsmap.items() if prefix is not None}
+            else:
+                # Fallback for ElementTree - precompile regex patterns for namespace extraction
+                xmlns_pattern = '{http://www.w3.org/2000/xmlns/}'
+                prefix_map = {}
+
+                # Extract namespace declarations from root
+                for attr_name, attr_value in root.attrib.items():
+                    if attr_name.startswith(xmlns_pattern) or attr_name.startswith('xmlns:'):
+                        # Extract the prefix more efficiently
+                        if attr_name.startswith(xmlns_pattern):
+                            prefix = attr_name[len(xmlns_pattern):]
+                        else:
+                            prefix = attr_name.split(':', 1)[1]
+                        prefix_map[attr_value] = prefix
+
+            # Initialize counters and tracking
+            fact_count = 0
+            facts_dict = {}
+            base_keys = {}
+
+            # Fast path to identify non-fact elements to skip - compile as set for O(1) lookup
+            skip_tag_endings = {
+                'schemaRef',
+                'roleRef',
+                'arcroleRef',
+                'linkbaseRef',
+                'context',
+                'unit'
+            }
+
+            def process_element(element):
+                """Process a single element as a potential fact."""
+                nonlocal fact_count
+
+                # Skip annotation nodes and other non element nodes
+                if not ET.iselement(element):
+                    return
+                # Skip known non-fact elements - faster check with set membership
+                # If the tag is not a string, try calling () to get the string value (in rare cases)
+                if callable(element.tag):
+                    if isinstance(element, ET._Comment):
+                        return
+                    if not element.values():
+                        return
+                tag = element.tag
+                for ending in skip_tag_endings:
+                    if tag.endswith(ending):
+                        return
+
+                # Get context reference - key check to identify facts
+                context_ref = element.get('contextRef')
+                if not context_ref:
+                    return
+
+                # Get fact ID if present (for footnote linkage)
+                fact_id = element.get('id')
+
+                # Extract element namespace and name - optimized split
+                if '}' in tag:
+                    namespace, element_name = tag.split('}', 1)
+                    namespace = namespace[1:]  # Faster than strip('{')
+
+                    # Try to extract prefix from the namespace
+                    prefix = prefix_map.get(namespace)
+                    if not prefix:
+                        parts = namespace.split('/')
+                        prefix = parts[-1] if parts else ''
+                else:
+                    element_name = tag
+                    prefix = ''
+
+                # Construct element ID with optimized string concatenation
+                element_id = f"{prefix}:{element_name}" if prefix else element_name
+
+                # Get unit reference
+                unit_ref = element.get('unitRef')
+
+                # Get value - optimize string handling
+                value = element.text
+                if not value or not value.strip():
+                    # Only check children if text is empty - use direct iteration for speed
+                    for sub_elem in element:
+                        sub_text = sub_elem.text
+                        if sub_text and sub_text.strip():
+                            value = sub_text
+                            break
+
+                # Optimize string handling - inline conditional
+                value = value.strip() if value else ""
+
+                # Get decimals attribute - direct access
+                decimals = element.get('decimals')
+
+                # Optimize numeric conversion with faster try/except
+                numeric_value = None
+                if value:
+                    try:
+                        numeric_value = float(value)
+                    except (ValueError, TypeError):
+                        pass
+
+                # Create base key for duplicate detection
+                base_key = self._create_normalized_fact_key(element_id, context_ref)
+
+                # Handle duplicates
+                instance_id = None
+                if base_key in base_keys:
+                    # This is a duplicate - convert existing fact to use instance_id if needed
+                    if base_key in facts_dict:
+                        existing_fact = facts_dict[base_key]
+                        # Move existing fact to new key with instance_id=0
+                        del facts_dict[base_key]
+                        existing_fact.instance_id = 0
+                        facts_dict[self._create_normalized_fact_key(element_id, context_ref, 0)] = existing_fact
+                    # Add new fact with next instance_id
+                    instance_id = len(base_keys[base_key])
+                    base_keys[base_key].append(True)
+                else:
+                    # First instance of this fact
+                    base_keys[base_key] = [True]
+
+                # Create fact object
+                fact = Fact(
+                    element_id=element_id,
+                    context_ref=context_ref,
+                    value=value,
+                    unit_ref=unit_ref,
+                    decimals=decimals,
+                    numeric_value=numeric_value,
+                    instance_id=instance_id,
+                    fact_id=fact_id
+                )
+
+                # Store fact with appropriate key
+                key = self._create_normalized_fact_key(element_id, context_ref, instance_id)
+                facts_dict[key] = fact
+                fact_count += 1
+
+            # Use lxml's optimized traversal methods
+            if hasattr(root, 'iterchildren'):
+                # Use lxml's optimized traversal methods
+                for child in root.iterchildren():
+                    process_element(child)
+                    # Process nested elements with optimized iteration
+                    for descendant in child.iterdescendants():
+                        process_element(descendant)
+            else:
+                # Fallback for ElementTree
+                for child in root:
+                    process_element(child)
+                    for descendant in child.findall('.//*'):
+                        process_element(descendant)
+
+            # Update instance facts
+            self.facts.update(facts_dict)
+
+            log.debug(f"Extracted {fact_count} facts ({len(base_keys)} unique fact identifiers)")
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error extracting facts: {str(e)}") from e
+
+    def _extract_footnotes(self, root: ET.Element) -> None:
+        """Extract footnotes from instance document.
+
+        Footnotes in XBRL are linked to facts via footnoteLink elements that contain:
+        1. footnote elements with the actual text content
+        2. footnoteArc elements that connect fact IDs to footnote IDs
+        """
+        try:
+            from edgar.xbrl.models import Footnote
+
+            # Find all footnoteLink elements
+            for footnote_link in root.findall('.//{http://www.xbrl.org/2003/linkbase}footnoteLink'):
+                # First, extract all footnote definitions
+                for footnote_elem in footnote_link.findall('{http://www.xbrl.org/2003/linkbase}footnote'):
+                    # Try both 'id' and 'xlink:label' attributes
+                    footnote_id = footnote_elem.get('id') or footnote_elem.get('{http://www.w3.org/1999/xlink}label')
+                    if not footnote_id:
+                        continue
+
+                    # Get footnote attributes
+                    lang = footnote_elem.get('{http://www.w3.org/XML/1998/namespace}lang', 'en-US')
+                    role = footnote_elem.get('{http://www.w3.org/1999/xlink}role')
+
+                    # Extract text content, handling XHTML formatting
+                    footnote_text = ""
+                    # Check for XHTML content
+                    xhtml_divs = footnote_elem.findall('.//{http://www.w3.org/1999/xhtml}div')
+                    if xhtml_divs:
+                        # Concatenate all text within XHTML elements
+                        for div in xhtml_divs:
+                            footnote_text += "".join(div.itertext()).strip()
+                    else:
+                        # Fall back to direct text content
+                        footnote_text = "".join(footnote_elem.itertext()).strip()
+
+                    # Create Footnote object
+                    footnote = Footnote(
+                        footnote_id=footnote_id,
+                        text=footnote_text,
+                        lang=lang,
+                        role=role,
+                        related_fact_ids=[]
+                    )
+                    self.footnotes[footnote_id] = footnote
+
+                # Second, process footnoteArc elements to link facts to footnotes
+                for arc_elem in footnote_link.findall('{http://www.xbrl.org/2003/linkbase}footnoteArc'):
+                    fact_id = arc_elem.get('{http://www.w3.org/1999/xlink}from')
+                    footnote_id = arc_elem.get('{http://www.w3.org/1999/xlink}to')
+
+                    if fact_id and footnote_id:
+                        # Add fact ID to footnote's related facts
+                        if footnote_id in self.footnotes:
+                            self.footnotes[footnote_id].related_fact_ids.append(fact_id)
+                        else:
+                            log.warning(f"Footnote arc references undefined footnote: {footnote_id}")
+
+                        # Also update the fact's footnotes list if we can find it
+                        # This requires finding the fact by its fact_id
+                        for fact in self.facts.values():
+                            if fact.fact_id == fact_id:
+                                if footnote_id not in fact.footnotes:
+                                    fact.footnotes.append(footnote_id)
+                                break
+
+            log.debug(f"Extracted {len(self.footnotes)} footnotes")
+
+        except Exception as e:
+            # Log the error but don't fail - footnotes are optional
+            log.warning(f"Error extracting footnotes: {str(e)}")
+
+    def _extract_entity_info(self) -> None:
+        """Extract entity information from contexts and DEI facts."""
+        try:
+            # Extract CIK/identifier from first context
+            identifier = None
+            if self.contexts:
+                first = next(iter(self.contexts.values()))
+                ident = first.entity.get('identifier')
+                if ident and ident.isdigit():
+                    identifier = ident.lstrip('0')
+
+            # Collect all DEI facts into a dict: concept -> Fact
+            self.dei_facts: Dict[str, Fact] = {}
+            for fact in self.facts.values():
+                eid = fact.element_id
+                if eid.startswith('dei:'):
+                    concept = eid.split(':', 1)[1]
+                elif eid.startswith('dei_'):
+                    concept = eid.split('_', 1)[1]
+                else:
+                    continue
+                self.dei_facts[concept] = fact
+
+            # Helper: get the first available DEI fact value
+            def get_dei(*names):
+                for n in names:
+                    f = self.dei_facts.get(n)
+                    if f:
+                        return f.value
+                return None
+
+            # Build entity_info preserving existing keys
+            self.entity_info.update({
+                'entity_name':             get_dei('EntityRegistrantName'),
+                'ticker':                  get_dei('TradingSymbol'),
+                'identifier':              identifier,
+                'document_type':           get_dei('DocumentType'),
+                'reporting_end_date':      None,
+                'document_period_end_date':get_dei('DocumentPeriodEndDate'),
+                'fiscal_year':             get_dei('DocumentFiscalYearFocus','FiscalYearFocus','FiscalYear'),
+                'fiscal_period':           get_dei('DocumentFiscalPeriodFocus','FiscalPeriodFocus'),
+                'fiscal_year_end_month':   None,
+                'fiscal_year_end_day':     None,
+                'annual_report':           False,
+                'quarterly_report':        False,
+                'amendment':               False,
+            })
+
+            # Determine reporting_end_date from contexts
+            for ctx in self.contexts.values():
+                period = getattr(ctx, 'period', {})
+                if period.get('type') == 'instant':
+                    ds = period.get('instant')
+                    if ds:
+                        try:
+                            dt_obj = datetime.strptime(ds, '%Y-%m-%d').date()
+                            curr = self.entity_info['reporting_end_date']
+                            if curr is None or dt_obj > curr:
+                                self.entity_info['reporting_end_date'] = dt_obj
+                        except Exception:
+                            pass
+
+            # Parse fiscal year end date into month/day
+            fye = get_dei('CurrentFiscalYearEndDate','FiscalYearEnd')
+            if fye:
+                try:
+                    s = fye
+                    if s.startswith('--'):
+                        s = s[2:]
+                    if '-' in s:
+                        m, d = s.split('-', 1)
+                        if m.isdigit() and d.isdigit():
+                            self.entity_info['fiscal_year_end_month'] = int(m)
+                            self.entity_info['fiscal_year_end_day'] = int(d)
+                except Exception:
+                    pass
+
+            # Flags based on document_type
+            dt_val = self.entity_info['document_type'] or ''
+            self.entity_info['annual_report']    = (dt_val == '10-K')
+            self.entity_info['quarterly_report'] = (dt_val == '10-Q')
+            self.entity_info['amendment']        = ('/A' in dt_val)
+
+            log.debug(f"Entity info: {self.entity_info}")
+        except Exception as e:
+            log.warning(f"Warning: Error extracting entity info: {str(e)}")
+
+    def _build_reporting_periods(self) -> None:
+        """Build reporting periods from contexts."""
+        try:
+            # Clear existing periods
+            self.reporting_periods.clear()
+            self.context_period_map.clear()
+
+            # Collect unique periods from contexts
+            instant_periods = {}
+            duration_periods = {}
+
+            for context_id, context in self.contexts.items():
+                if 'period' in context.model_dump() and 'type' in context.period:
+                    period_type = context.period.get('type')
+
+                    if period_type == 'instant':
+                        date_str = context.period.get('instant')
+                        if date_str:
+                            if date_str not in instant_periods:
+                                instant_periods[date_str] = []
+
+                            # Add context ID to this period
+                            instant_periods[date_str].append(context_id)
+
+                            # Map context to period key
+                            period_key = f"instant_{date_str}"
+                            self.context_period_map[context_id] = period_key
+
+                    elif period_type == 'duration':
+                        start_date = context.period.get('startDate')
+                        end_date = context.period.get('endDate')
+                        if start_date and end_date:
+                            duration_key = f"{start_date}_{end_date}"
+                            if duration_key not in duration_periods:
+                                duration_periods[duration_key] = []
+
+                            # Add context ID to this period
+                            duration_periods[duration_key].append(context_id)
+
+                            # Map context to period key
+                            period_key = f"duration_{start_date}_{end_date}"
+                            self.context_period_map[context_id] = period_key
+
+            # Process instant periods
+            for date_str, context_ids in instant_periods.items():
+                try:
+                    date_obj = datetime.strptime(date_str, '%Y-%m-%d').date()
+                    formatted_date = date_obj.strftime('%B %d, %Y')
+
+                    period = {
+                        'type': 'instant',
+                        'date': date_str,
+                        'date_obj': date_obj,
+                        'label': formatted_date,
+                        'context_ids': context_ids,
+                        'key': f"instant_{date_str}"
+                    }
+                    self.reporting_periods.append(period)
+                except (ValueError, TypeError):
+                    # Skip invalid dates
+                    continue
+
+            # Process duration periods
+            for period_key, context_ids in duration_periods.items():
+                start_date, end_date = period_key.split('_')
+                try:
+                    start_obj = datetime.strptime(start_date, '%Y-%m-%d').date()
+                    end_obj = datetime.strptime(end_date, '%Y-%m-%d').date()
+                    formatted_start = start_obj.strftime('%B %d, %Y')
+                    formatted_end = end_obj.strftime('%B %d, %Y')
+
+                    # Calculate duration in days
+                    days = (end_obj - start_obj).days
+
+                    # Determine period type based on duration
+                    period_description = classify_duration(days)
+
+                    period = {
+                        'type': 'duration',
+                        'start_date': start_date,
+                        'end_date': end_date,
+                        'start_obj': start_obj,
+                        'end_obj': end_obj,
+                        'days': days,
+                        'period_type': period_description,
+                        'label': f"{period_description}: {formatted_start} to {formatted_end}",
+                        'context_ids': context_ids,
+                        'key': f"duration_{start_date}_{end_date}"
+                    }
+                    self.reporting_periods.append(period)
+                except (ValueError, TypeError):
+                    # Skip invalid dates
+                    continue
+
+            # Sort periods by date (most recent first)
+            self.reporting_periods.sort(key=lambda p: p['date_obj'] if p['type'] == 'instant' else p['end_obj'], reverse=True)
+
+            # Debug printout to verify periods are extracted
+            if len(self.reporting_periods) > 0:
+                log.debug(f"Found {len(self.reporting_periods)} reporting periods.")
+                log.debug(f"First period: {self.reporting_periods[0]['label']}")
+            else:
+                log.debug("Warning: No reporting periods found!")
+
+            # Debug context period map
+            log.debug(f"Context period map has {len(self.context_period_map)} entries.")
+
+        except Exception as e:
+            # Log error but don't fail
+            log.debug(f"Warning: Error building reporting periods: {str(e)}")
+            self.reporting_periods.clear()
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/labels.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/labels.py
@@ -0,0 +1,149 @@
+"""
+Labels parser for XBRL documents.
+
+This module handles parsing of XBRL label linkbases and extracting
+element labels for display purposes.
+"""
+
+from pathlib import Path
+from typing import Dict, Union
+
+from lxml import etree as ET
+
+from edgar.xbrl.core import STANDARD_LABEL, extract_element_id
+from edgar.xbrl.models import ElementCatalog, XBRLProcessingError
+
+from .base import BaseParser
+
+
+class LabelsParser(BaseParser):
+    """Parser for XBRL label linkbases."""
+
+    def __init__(self, element_catalog: Dict[str, ElementCatalog]):
+        """
+        Initialize labels parser with data structure references.
+
+        Args:
+            element_catalog: Reference to element catalog dictionary
+        """
+        super().__init__()
+
+        # Store references to data structures
+        self.element_catalog = element_catalog
+
+    def parse_labels(self, file_path: Union[str, Path]) -> None:
+        """Parse label linkbase file and extract label information."""
+        try:
+            content = Path(file_path).read_text()
+            self.parse_labels_content(content)
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing label file {file_path}: {str(e)}") from e
+
+    def parse_labels_content(self, content: str) -> None:
+        """Parse label linkbase content and extract label information."""
+        try:
+            # Optimize: Register namespaces for faster XPath lookups
+            nsmap = {
+                'link': 'http://www.xbrl.org/2003/linkbase',
+                'xlink': 'http://www.w3.org/1999/xlink',
+                'xml': 'http://www.w3.org/XML/1998/namespace'
+            }
+
+            # Optimize: Use lxml parser with smart string handling
+            parser = ET.XMLParser(remove_blank_text=True, recover=True)
+            root = ET.XML(content.encode('utf-8'), parser)
+
+            # Optimize: Use specific XPath expressions with namespaces for faster lookups
+            # This is much faster than using findall with '//' in element tree
+            label_arcs = root.xpath('//link:labelArc', namespaces=nsmap)
+            labels = root.xpath('//link:label', namespaces=nsmap)
+
+            # Optimize: Pre-allocate dictionary with expected size
+            label_lookup = {}
+
+            # Optimize: Cache attribute lookups
+            xlink_label = '{http://www.w3.org/1999/xlink}label'
+            xlink_role = '{http://www.w3.org/1999/xlink}role'
+            xml_lang = '{http://www.w3.org/XML/1998/namespace}lang'
+            default_role = 'http://www.xbrl.org/2003/role/label'
+
+            # Optimize: Process labels in a single pass with direct attribute access
+            for label in labels:
+                label_id = label.get(xlink_label)
+                if not label_id:
+                    continue
+
+                # Get text first - if empty, skip further processing
+                text = label.text
+                if text is None:
+                    continue
+
+                # Get attributes - direct lookup is faster than method calls
+                role = label.get(xlink_role, default_role)
+                lang = label.get(xml_lang, 'en-US')
+
+                # Create nested dictionaries only when needed
+                if label_id not in label_lookup:
+                    label_lookup[label_id] = {}
+
+                if lang not in label_lookup[label_id]:
+                    label_lookup[label_id][lang] = {}
+
+                label_lookup[label_id][lang][role] = text
+
+            # Optimize: Cache attribute lookups for arcs
+            xlink_from = '{http://www.w3.org/1999/xlink}from'
+            xlink_to = '{http://www.w3.org/1999/xlink}to'
+            xlink_href = '{http://www.w3.org/1999/xlink}href'
+
+            # Optimize: Create a lookup table for locators by label for faster access
+            loc_by_label = {}
+            for loc in root.xpath('//link:loc', namespaces=nsmap):
+                loc_label = loc.get(xlink_label)
+                if loc_label:
+                    loc_by_label[loc_label] = loc.get(xlink_href)
+
+            # Connect labels to elements using arcs - with optimized lookups
+            for arc in label_arcs:
+                from_ref = arc.get(xlink_from)
+                to_ref = arc.get(xlink_to)
+
+                if not from_ref or not to_ref or to_ref not in label_lookup:
+                    continue
+
+                # Use cached locator lookup instead of expensive XPath
+                href = loc_by_label.get(from_ref)
+                if not href:
+                    continue
+
+                # Extract element ID from href
+                element_id = extract_element_id(href)
+
+                # Find labels for this element - check most likely case first
+                if 'en-US' in label_lookup[to_ref]:
+                    element_labels = label_lookup[to_ref]['en-US']
+
+                    # Optimize: Update catalog with minimal overhead
+                    catalog_entry = self.element_catalog.get(element_id)
+                    if catalog_entry:
+                        catalog_entry.labels.update(element_labels)
+                    else:
+                        # Create placeholder in catalog
+                        self.element_catalog[element_id] = ElementCatalog(
+                            name=element_id,
+                            data_type="",
+                            period_type="duration",
+                            labels=element_labels
+                        )
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing label content: {str(e)}") from e
+
+    def get_element_label(self, element_id: str) -> str:
+        """Get the label for an element, falling back to the element ID if not found."""
+        if element_id in self.element_catalog and self.element_catalog[element_id].labels:
+            # Use standard label if available
+            standard_label = self.element_catalog[element_id].labels.get(STANDARD_LABEL)
+            if standard_label:
+                return standard_label
+        return element_id  # Fallback to element ID
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/presentation.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/presentation.py
@@ -0,0 +1,249 @@
+"""
+Presentation parser for XBRL documents.
+
+This module handles parsing of XBRL presentation linkbases and building
+presentation trees for financial statement structure.
+"""
+
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+from lxml import etree as ET
+
+from edgar.xbrl.core import extract_element_id
+from edgar.xbrl.models import ElementCatalog, PresentationNode, PresentationTree, XBRLProcessingError
+
+from .base import BaseParser
+
+
+class PresentationParser(BaseParser):
+    """Parser for XBRL presentation linkbases."""
+
+    def __init__(self, presentation_roles: Dict[str, Dict[str, Any]],
+                 presentation_trees: Dict[str, PresentationTree],
+                 element_catalog: Dict[str, ElementCatalog]):
+        """
+        Initialize presentation parser with data structure references.
+
+        Args:
+            presentation_roles: Reference to presentation roles dictionary
+            presentation_trees: Reference to presentation trees dictionary
+            element_catalog: Reference to element catalog dictionary
+        """
+        super().__init__()
+
+        # Store references to data structures
+        self.presentation_roles = presentation_roles
+        self.presentation_trees = presentation_trees
+        self.element_catalog = element_catalog
+
+    def parse_presentation(self, file_path: Union[str, Path]) -> None:
+        """Parse presentation linkbase file and build presentation trees."""
+        try:
+            content = Path(file_path).read_text()
+            self.parse_presentation_content(content)
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing presentation file {file_path}: {str(e)}") from e
+
+    def parse_presentation_content(self, content: str) -> None:
+        """Parse presentation linkbase content and build presentation trees."""
+        try:
+            # Optimize: Register namespaces for faster XPath lookups
+            nsmap = {
+                'link': 'http://www.xbrl.org/2003/linkbase',
+                'xlink': 'http://www.w3.org/1999/xlink'
+            }
+
+            # Optimize: Use lxml parser with smart string handling
+            parser = ET.XMLParser(remove_blank_text=True, recover=True)
+            root = ET.XML(content.encode('utf-8'), parser)
+
+            # Optimize: Use XPath with namespaces for faster extraction
+            presentation_links = root.xpath('//link:presentationLink', namespaces=nsmap)
+
+            # Optimize: Cache attribute paths
+            xlink_role = '{http://www.w3.org/1999/xlink}role'
+            xlink_from = '{http://www.w3.org/1999/xlink}from'
+            xlink_to = '{http://www.w3.org/1999/xlink}to'
+            xlink_label = '{http://www.w3.org/1999/xlink}label'
+            xlink_href = '{http://www.w3.org/1999/xlink}href'
+
+            for link in presentation_links:
+                role = link.get(xlink_role)
+                if not role:
+                    continue
+
+                # Store role information
+                role_id = role.split('/')[-1] if '/' in role else role
+                role_def = role_id.replace('_', ' ')
+
+                self.presentation_roles[role] = {
+                    'roleUri': role,
+                    'definition': role_def,
+                    'roleId': role_id
+                }
+
+                # Optimize: Pre-build locator map to avoid repeated XPath lookups
+                loc_map = {}
+                for loc in link.xpath('.//link:loc', namespaces=nsmap):
+                    label = loc.get(xlink_label)
+                    if label:
+                        loc_map[label] = loc.get(xlink_href)
+
+                # Optimize: Extract arcs using direct xpath with context
+                arcs = link.xpath('.//link:presentationArc', namespaces=nsmap)
+
+                # Create relationships map - pre-allocate with known size
+                relationships = []
+                relationships_append = relationships.append  # Local function reference for speed
+
+                # Process arcs with optimized locator lookups
+                for arc in arcs:
+                    from_ref = arc.get(xlink_from)
+                    to_ref = arc.get(xlink_to)
+
+                    if not from_ref or not to_ref:
+                        continue
+
+                    # Optimize: Use cached locator references instead of expensive XPath lookups
+                    from_href = loc_map.get(from_ref)
+                    to_href = loc_map.get(to_ref)
+
+                    if not from_href or not to_href:
+                        continue
+
+                    # Parse order attribute correctly
+                    order = self._parse_order_attribute(arc)
+
+                    preferred_label = arc.get('preferredLabel')
+
+                    # Extract element IDs from hrefs
+                    from_element = extract_element_id(from_href)
+                    to_element = extract_element_id(to_href)
+
+                    # Add relationship using local function reference
+                    relationships_append({
+                        'from_element': from_element,
+                        'to_element': to_element,
+                        'order': order,
+                        'preferred_label': preferred_label
+                    })
+
+                # Build presentation tree for this role if we have relationships
+                if relationships:
+                    self._build_presentation_tree(role, relationships)
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing presentation content: {str(e)}") from e
+
+    def _build_presentation_tree(self, role: str, relationships: List[Dict[str, Any]]) -> None:
+        """
+        Build a presentation tree from relationships.
+
+        Args:
+            role: Extended link role URI
+            relationships: List of relationships (from_element, to_element, order, preferred_label)
+        """
+        # Group relationships by source element
+        from_map = {}
+        to_map = {}
+
+        for rel in relationships:
+            from_element = rel['from_element']
+            to_element = rel['to_element']
+
+            if from_element not in from_map:
+                from_map[from_element] = []
+            from_map[from_element].append(rel)
+
+            if to_element not in to_map:
+                to_map[to_element] = []
+            to_map[to_element].append(rel)
+
+        # Find root elements (appear as 'from' but not as 'to')
+        root_elements = set(from_map.keys()) - set(to_map.keys())
+
+        if not root_elements:
+            return  # No root elements found
+
+        # Create presentation tree
+        tree = PresentationTree(
+            role_uri=role,
+            definition=self.presentation_roles[role]['definition'],
+            root_element_id=next(iter(root_elements)),
+            all_nodes={}
+        )
+
+        # Build tree recursively
+        for root_id in root_elements:
+            self._build_presentation_subtree(root_id, None, 0, from_map, tree.all_nodes)
+
+        # Add tree to collection
+        self.presentation_trees[role] = tree
+
+    def _build_presentation_subtree(self, element_id: str, parent_id: Optional[str], depth: int,
+                                 from_map: Dict[str, List[Dict[str, Any]]],
+                                 all_nodes: Dict[str, PresentationNode]) -> None:
+        """
+        Recursively build a presentation subtree.
+
+        Args:
+            element_id: Current element ID
+            parent_id: Parent element ID
+            depth: Current depth in tree
+            from_map: Map of relationships by source element
+            all_nodes: Dictionary to store all nodes
+        """
+        # Create node
+        node = PresentationNode(
+            element_id=element_id,
+            parent=parent_id,
+            children=[],
+            depth=depth
+        )
+
+        # Add element information if available
+        if element_id in self.element_catalog:
+            elem_info = self.element_catalog[element_id]
+            node.element_name = elem_info.name
+            node.standard_label = elem_info.labels.get('http://www.xbrl.org/2003/role/label', elem_info.name)
+
+            # Use enhanced abstract detection (Issue #450 fix)
+            # The element catalog may not have correct abstract info for standard taxonomy concepts
+            from edgar.xbrl.abstract_detection import is_abstract_concept
+            node.is_abstract = is_abstract_concept(
+                concept_name=elem_info.name,
+                schema_abstract=elem_info.abstract,
+                has_children=False,  # Will be updated after children are processed
+                has_values=False     # Will be determined later when facts are loaded
+            )
+
+            node.labels = elem_info.labels
+
+        # Add to collection
+        all_nodes[element_id] = node
+
+        # Process children
+        if element_id in from_map:
+            # Sort children by order
+            children = sorted(from_map[element_id], key=lambda r: r['order'])
+
+            for rel in children:
+                child_id = rel['to_element']
+
+                # Add child to parent's children list
+                node.children.append(child_id)
+
+                # Set preferred label
+                preferred_label = rel['preferred_label']
+
+                # Recursively build child subtree
+                self._build_presentation_subtree(
+                    child_id, element_id, depth + 1, from_map, all_nodes
+                )
+
+                # Update preferred label and order after child is built
+                if child_id in all_nodes:
+                    if preferred_label:
+                        all_nodes[child_id].preferred_label = preferred_label
+                    all_nodes[child_id].order = rel['order']
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/schema.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/schema.py
@@ -0,0 +1,210 @@
+"""
+Schema parser for XBRL documents.
+
+This module handles parsing of XBRL taxonomy schemas and element catalog
+creation with element definitions and properties.
+"""
+
+from pathlib import Path
+from typing import Dict, Union
+
+from lxml import etree as ET
+
+from edgar.core import log
+from edgar.xbrl.models import ElementCatalog, XBRLProcessingError
+
+from .base import BaseParser
+
+
+class SchemaParser(BaseParser):
+    """Parser for XBRL taxonomy schemas."""
+
+    def __init__(self, element_catalog: Dict[str, ElementCatalog]):
+        """
+        Initialize schema parser with data structure references.
+
+        Args:
+            element_catalog: Reference to element catalog dictionary
+        """
+        super().__init__()
+
+        # Store references to data structures
+        self.element_catalog = element_catalog
+
+        # Will be set by coordinator when needed
+        self.parse_labels_content = None
+        self.parse_presentation_content = None
+        self.parse_calculation_content = None
+        self.parse_definition_content = None
+
+    def set_linkbase_parsers(self, labels_parser, presentation_parser, calculation_parser, definition_parser):
+        """
+        Set references to other parsers for embedded linkbase processing.
+
+        Args:
+            labels_parser: LabelsParser instance
+            presentation_parser: PresentationParser instance
+            calculation_parser: CalculationParser instance
+            definition_parser: DefinitionParser instance
+        """
+        self.parse_labels_content = labels_parser.parse_labels_content
+        self.parse_presentation_content = presentation_parser.parse_presentation_content
+        self.parse_calculation_content = calculation_parser.parse_calculation_content
+        self.parse_definition_content = definition_parser.parse_definition_content
+
+    def parse_schema(self, file_path: Union[str, Path]) -> None:
+        """Parse schema file and extract element information."""
+        try:
+            content = Path(file_path).read_text()
+            self.parse_schema_content(content)
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing schema file {file_path}: {str(e)}") from e
+
+    def parse_schema_content(self, content: str) -> None:
+        """Parse schema content and extract element information."""
+        try:
+            # Use the safe XML parsing helper
+            root = self._safe_parse_xml(content)
+
+            # Extract element declarations
+            for element in root.findall('.//{http://www.w3.org/2001/XMLSchema}element'):
+                element_id = element.get('id') or element.get('name')
+                if not element_id:
+                    continue
+
+                # Extract element properties
+                data_type = element.get('type', '')
+
+                # Check for balance and period type
+                # First check as attributes on the element (modern XBRL style)
+                balance_type = element.get('{http://www.xbrl.org/2003/instance}balance')
+                period_type = element.get('{http://www.xbrl.org/2003/instance}periodType')
+                abstract = element.get('abstract', 'false').lower() == 'true'
+
+                # If not found as attributes, look in nested annotations (legacy style)
+                if not balance_type or not period_type:
+                    annotation = element.find('.//{http://www.w3.org/2001/XMLSchema}annotation')
+                    if annotation is not None:
+                        for appinfo in annotation.findall('.//{http://www.w3.org/2001/XMLSchema}appinfo'):
+                            if not balance_type:
+                                balance_element = appinfo.find('.//{http://www.xbrl.org/2003/instance}balance')
+                                if balance_element is not None:
+                                    balance_type = balance_element.text
+
+                            if not period_type:
+                                period_element = appinfo.find('.//{http://www.xbrl.org/2003/instance}periodType')
+                                if period_element is not None:
+                                    period_type = period_element.text
+
+                # Create element catalog entry
+                self.element_catalog[element_id] = ElementCatalog(
+                    name=element_id,
+                    data_type=data_type,
+                    period_type=period_type or "duration",  # Default to duration
+                    balance=balance_type,
+                    abstract=abstract,
+                    labels={}
+                )
+
+            # Extract embedded linkbases if present
+            embedded_linkbases = self._extract_embedded_linkbases(content)
+
+            # If embedded linkbases were found, parse them
+            if embedded_linkbases and 'linkbases' in embedded_linkbases:
+                if 'label' in embedded_linkbases['linkbases'] and self.parse_labels_content:
+                    label_content = embedded_linkbases['linkbases']['label']
+                    self.parse_labels_content(label_content)
+
+                if 'presentation' in embedded_linkbases['linkbases'] and self.parse_presentation_content:
+                    presentation_content = embedded_linkbases['linkbases']['presentation']
+                    self.parse_presentation_content(presentation_content)
+
+                if 'calculation' in embedded_linkbases['linkbases'] and self.parse_calculation_content:
+                    calculation_content = embedded_linkbases['linkbases']['calculation']
+                    self.parse_calculation_content(calculation_content)
+
+                if 'definition' in embedded_linkbases['linkbases'] and self.parse_definition_content:
+                    definition_content = embedded_linkbases['linkbases']['definition']
+                    self.parse_definition_content(definition_content)
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing schema content: {str(e)}") from e
+
+    def _extract_embedded_linkbases(self, schema_content: str) -> Dict[str, Dict[str, str]]:
+        """
+        Extract embedded linkbases and role types from the schema file.
+
+        Args:
+            schema_content: XML content of the schema file
+
+        Returns:
+            Dictionary containing embedded linkbases and role type information
+        """
+        embedded_data = {
+            'linkbases': {},
+            'role_types': {}
+        }
+
+        try:
+            # Use the safe XML parsing helper
+            root = self._safe_parse_xml(schema_content)
+
+            # Create namespace map for use with XPath
+            nsmap = {
+                'xsd': 'http://www.w3.org/2001/XMLSchema',
+                'link': 'http://www.xbrl.org/2003/linkbase'
+            }
+
+            # Find all appinfo elements using optimized XPath
+            for appinfo in root.xpath('.//xsd:appinfo', namespaces=nsmap):
+                # Extract role types
+                for role_type in appinfo.xpath('./link:roleType', namespaces=nsmap):
+                    role_uri = role_type.get('roleURI')
+                    role_id = role_type.get('id')
+
+                    # Use optimized XPath to find definition
+                    definition = role_type.find('./link:definition', nsmap)
+                    definition_text = definition.text if definition is not None else ""
+
+                    # Use optimized XPath to find usedOn elements
+                    used_on = [elem.text for elem in role_type.xpath('./link:usedOn', namespaces=nsmap) if elem.text]
+
+                    if role_uri:
+                        embedded_data['role_types'][role_uri] = {
+                            'id': role_id,
+                            'definition': definition_text,
+                            'used_on': used_on
+                        }
+
+                # Find the linkbase element with optimized XPath
+                linkbase = appinfo.find('./link:linkbase', nsmap)
+                if linkbase is not None:
+                    # Extract the entire linkbase element as a string - with proper encoding
+                    linkbase_string = ET.tostring(linkbase, encoding='unicode', method='xml')
+
+                    # Extract each type of linkbase with optimized XPath
+                    for linkbase_type in ['presentation', 'label', 'calculation', 'definition']:
+                        # Use direct child XPath for better performance
+                        xpath_expr = f'./link:{linkbase_type}Link'
+                        linkbase_elements = linkbase.xpath(xpath_expr, namespaces=nsmap)
+
+                        if linkbase_elements:
+                            # Convert all linkbase elements of this type to strings
+                            linkbase_strings = [
+                                ET.tostring(elem, encoding='unicode', method='xml')
+                                for elem in linkbase_elements
+                            ]
+
+                            # Join multiple linkbase elements efficiently
+                            linkbase_header = linkbase_string.split('>', 1)[0] + '>'
+                            embedded_data['linkbases'][linkbase_type] = (
+                                f"{linkbase_header}\n" +
+                                '\n'.join(linkbase_strings) +
+                                "\n</link:linkbase>"
+                            )
+
+            return embedded_data
+        except Exception as e:
+            # Log the error but don't fail - just return empty embedded data
+            log.warning(f"Warning: Error extracting embedded linkbases: {str(e)}")
+            return embedded_data