edgartools/venv/lib/python3.10/site-packages/edgar/xbrl/stitching/ordering.py

"""
XBRL Statement Ordering - Intelligent Ordering for Multi-Period Statements

This module provides consistent ordering for financial statements across multiple periods
by combining template-based, reference-based, and semantic positioning strategies.
"""

import re
from enum import Enum
from typing import Dict, List, Optional, Tuple

try:
    from rapidfuzz import fuzz
except ImportError:
    # Fallback to difflib if rapidfuzz is not available
    from difflib import SequenceMatcher

    class fuzz:
        @staticmethod
        def ratio(s1: str, s2: str) -> float:
            return SequenceMatcher(None, s1, s2).ratio() * 100


class StatementType(str, Enum):
    """Supported statement types for ordering"""
    INCOME_STATEMENT = "IncomeStatement"
    BALANCE_SHEET = "BalanceSheet"
    CASH_FLOW = "CashFlowStatement"
    EQUITY = "StatementOfEquity"


class FinancialStatementTemplates:
    """Canonical ordering templates for financial statements based on XBRL concepts"""

    INCOME_STATEMENT_TEMPLATE = [
        # Revenue Section (0-99)
        (0, "revenue_section", [
            # Product/Service Revenue Components
            "us-gaap:SalesRevenueGoodsNet",
            "us-gaap:ProductSales",
            "us-gaap:SalesRevenueServicesNet",
            "us-gaap:SubscriptionRevenue",
            # Contract Revenue
            "us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax",
            "us-gaap:RevenueFromContractWithCustomerIncludingAssessedTax",
            # Total Revenue
            "us-gaap:Revenue",
            "us-gaap:Revenues",
            "us-gaap:SalesRevenueNet",
            "us-gaap:OperatingRevenue"
        ]),

        # Cost Section (100-199)
        (100, "cost_section", [
            "us-gaap:CostOfRevenueAbstract",  # Abstract
            "us-gaap:CostOfRevenue",  # Total
            "us-gaap:CostOfGoodsSold",
            "us-gaap:CostOfGoodsAndServicesSold",
            "us-gaap:CostOfSales",
            "us-gaap:DirectOperatingCosts",
            "us-gaap:CostsAndExpenses"
        ]),

        # Gross Profit (200-299)
        (200, "gross_profit", [
            "us-gaap:GrossProfit"
        ]),

        # Operating Expenses (300-399)
        (300, "operating_expenses", [
            # R&D Expenses
            "us-gaap:ResearchAndDevelopmentCosts",
            "us-gaap:ResearchAndDevelopmentExpense",
            # SG&A Expenses
            "us-gaap:SellingGeneralAndAdministrativeExpense",
            "us-gaap:GeneralAndAdministrativeExpense",
            "us-gaap:AdministrativeExpense",
            "us-gaap:SellingAndMarketingExpense",
            "us-gaap:SellingExpense",
            "us-gaap:MarketingExpense",
            "us-gaap:AdvertisingExpense",
            # Total Operating Expenses
            "us-gaap:NoninterestExpense",
            "us-gaap:OperatingCostsAndExpenses",
            "us-gaap:OperatingExpenses"
        ]),

        # Operating Income (400-499)
        (400, "operating_income", [
            "us-gaap:OperatingIncomeLoss",
            "us-gaap:OperatingIncome",
            "us-gaap:IncomeLossFromContinuingOperationsBeforeInterestAndTaxes"
        ]),

        # Non-Operating (500-599)
        (500, "non_operating", [
            "us-gaap:InterestIncomeExpenseNet",
            "us-gaap:InterestAndDebtExpense",
            "us-gaap:InterestExpense",
            "us-gaap:InterestExpenseNonoperating",  # ADBE uses this for non-operating interest expense
            "us-gaap:InterestIncome",
            "us-gaap:InvestmentIncomeInterest",  # NVIDIA uses this variant
            "us-gaap:OtherNonoperatingIncomeExpense",
            "us-gaap:NonoperatingIncomeExpense",
            "orcl:NonoperatingIncomeExpenseIncludingEliminationOfNetIncomeLossAttributableToNoncontrollingInterests"
        ]),

        # Pre-Tax Income (600-699)
        (600, "pretax_income", [
            "us-gaap:IncomeLossBeforeIncomeTaxes",
            "us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxes",
            "us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest",
            "orcl:IncomeLossFromContinuingOperationsIncludingNoncontrollingInterestBeforeIncomeTaxesExtraordinaryItems"
        ]),

        # Tax (700-799)
        (700, "tax", [
            "us-gaap:IncomeTaxesPaidNet",
            "us-gaap:IncomeTaxExpenseBenefit"
        ]),

        # Net Income (800-899)
        (800, "net_income", [
            "us-gaap:IncomeLossFromContinuingOperationsIncludingPortionAttributableToNoncontrollingInterest",
            "us-gaap:IncomeLossFromContinuingOperations",
            "us-gaap:NetIncome",
            "us-gaap:NetIncomeLoss",
            "us-gaap:ProfitLoss",
            "us-gaap:NetIncomeLossAttributableToNonredeemableNoncontrollingInterest",
            "us-gaap:NetIncomeLossAttributableToNoncontrollingInterest"
        ]),

        # Per Share Data (900-999)
        (900, "per_share", [
            "us-gaap:EarningsPerShareAbstract",
            "us-gaap:EarningsPerShareBasic",
            "us-gaap:EarningsPerShareDiluted",
            "us-gaap:WeightedAverageNumberOfSharesOutstandingAbstract",
            "us-gaap:WeightedAverageNumberOfSharesOutstandingBasic",
            "us-gaap:WeightedAverageNumberOfDilutedSharesOutstanding"
        ])
    ]

    BALANCE_SHEET_TEMPLATE = [
        # Current Assets (0-199)
        (0, "current_assets", [
            "Cash and Cash Equivalents",
            "Cash",
            "Short-term Investments",
            "Marketable Securities",
            "Accounts Receivable",
            "Trade Receivables",
            "Inventory",
            "Prepaid Expenses",
            "Other Current Assets",
            "Total Current Assets"
        ]),

        # Non-Current Assets (200-399)
        (200, "noncurrent_assets", [
            "Property, Plant and Equipment",
            "Property and Equipment",
            "Long-term Investments",
            "Goodwill",
            "Intangible Assets",
            "Other Non-current Assets",
            "Total Non-current Assets",
            "Total Assets"
        ]),

        # Current Liabilities (400-599)
        (400, "current_liabilities", [
            "Accounts Payable",
            "Trade Payables",
            "Accrued Liabilities",
            "Accrued Expenses",
            "Short-term Debt",
            "Current Portion of Long-term Debt",
            "Other Current Liabilities",
            "Total Current Liabilities"
        ]),

        # Non-Current Liabilities (600-799)
        (600, "noncurrent_liabilities", [
            "Long-term Debt",
            "Deferred Revenue",
            "Deferred Tax Liabilities",
            "Other Non-current Liabilities",
            "Total Non-current Liabilities",
            "Total Liabilities"
        ]),

        # Equity (800-999)
        (800, "equity", [
            "Common Stock",
            "Additional Paid-in Capital",
            "Retained Earnings",
            "Accumulated Other Comprehensive Income",
            "Treasury Stock",
            "Total Stockholders' Equity",
            "Total Shareholders' Equity",
            "Total Equity"
        ])
    ]

    def get_template_position(self, item_concept: str, item_label: str, statement_type: str) -> Optional[float]:
        """
        Get template position for an item, prioritizing concept-based matching over label matching.

        Args:
            item_concept: The XBRL concept (e.g., "us-gaap:Revenue")
            item_label: The display label (e.g., "Contract Revenue")
            statement_type: Type of statement ("IncomeStatement", "BalanceSheet", etc.)

        Returns:
            Float position in template, or None if no match found
        """
        # Handle different statement type formats
        if statement_type == "IncomeStatement":
            template_name = "INCOME_STATEMENT_TEMPLATE"
        elif statement_type == "BalanceSheet":
            template_name = "BALANCE_SHEET_TEMPLATE"
        else:
            template_name = f"{statement_type.upper()}_TEMPLATE"

        template = getattr(self, template_name, None)
        if not template:
            return None

        # Strategy 1: Direct concept matching (highest priority)
        if item_concept:
            normalized_concept = self._normalize_xbrl_concept(item_concept)
            for base_pos, _section_name, template_concepts in template:
                for i, template_concept in enumerate(template_concepts):
                    template_normalized = self._normalize_xbrl_concept(template_concept)
                    if normalized_concept == template_normalized:
                        return float(base_pos + i)

        # Strategy 2: Label-based matching as fallback (for compatibility)
        if item_label:
            for base_pos, _section_name, template_concepts in template:
                for i, template_concept in enumerate(template_concepts):
                    if self._labels_match(item_label, template_concept):
                        return float(base_pos + i)

        return None

    def _normalize_xbrl_concept(self, concept: str) -> str:
        """
        Normalize XBRL concept for matching.

        Handles variations in concept format:
        - "us-gaap:Revenue" vs "us-gaap_Revenue"
        - Case sensitivity
        - Namespace prefixes
        """
        if not concept:
            return ""

        # Normalize separators (: vs _)
        normalized = concept.lower()
        normalized = normalized.replace(':', '_')

        # Handle common namespace variations
        # us-gaap, usgaap, gaap all should match
        if normalized.startswith('us-gaap_') or normalized.startswith('usgaap_'):
            normalized = 'us-gaap_' + normalized.split('_', 1)[1]
        elif normalized.startswith('gaap_'):
            normalized = 'us-gaap_' + normalized.split('_', 1)[1]

        return normalized

    def _labels_match(self, label1: str, label2: str) -> bool:
        """Check if two labels represent the same financial item (fallback for non-concept matching)"""
        if not label1 or not label2:
            return False

        # For XBRL concepts in templates, don't try to match against labels
        if ':' in label2 or '_gaap_' in label2.lower():
            return False

        # Use existing normalization logic for label matching
        norm1 = self._normalize_concept(label1)
        norm2 = self._normalize_concept(label2)

        # Exact match
        if norm1 == norm2:
            return True

        # Fuzzy matching for similar concepts
        similarity = fuzz.ratio(norm1, norm2) / 100.0
        return similarity > 0.7

    def _concepts_match(self, concept1: str, concept2: str) -> bool:
        """Check if two concepts represent the same financial item"""
        # Normalize for comparison
        norm1 = self._normalize_concept(concept1)
        norm2 = self._normalize_concept(concept2)

        # Exact match
        if norm1 == norm2:
            return True

        # Fuzzy matching for similar concepts
        similarity = fuzz.ratio(norm1, norm2) / 100.0
        return similarity > 0.7  # Lowered threshold for better matching

    def _normalize_concept(self, concept: str) -> str:
        """Normalize concept for comparison"""
        if not concept:
            return ""

        # Remove common variations
        normalized = concept.lower()
        normalized = re.sub(r'\s+', ' ', normalized)  # Normalize whitespace
        normalized = re.sub(r'[,\.]', '', normalized)  # Remove punctuation
        normalized = re.sub(r'\(.*?\)', '', normalized)  # Remove parenthetical
        normalized = re.sub(r'\bexpense\b', '', normalized)  # Remove 'expense' suffix
        normalized = re.sub(r'\bincome\b', '', normalized)  # Remove 'income' suffix for matching
        return normalized.strip()


class ReferenceOrderingStrategy:
    """Extract ordering from reference statement"""

    def establish_reference_order(self, statements: List[Dict]) -> Dict[str, float]:
        """Establish reference ordering from best available statement"""

        if not statements:
            return {}

        # Strategy: Use most recent statement (statements are ordered newest first)
        reference_statement = statements[0]

        reference_order = {}
        for i, item in enumerate(reference_statement.get('data', [])):
            concept = item.get('concept')
            label = item.get('label')

            if concept:
                # Store by both concept ID and label for flexibility
                reference_order[concept] = float(i)
                if label:
                    reference_order[label] = float(i)

        return reference_order


class SemanticPositioning:
    """Position concepts based on financial statement semantics"""

    def __init__(self, statement_type: str):
        self.statement_type = statement_type
        self.section_defaults = self._get_section_defaults()

    def _get_section_defaults(self) -> Dict[str, float]:
        """Default positions for each section when no other guidance available"""
        if self.statement_type == "IncomeStatement":
            return {
                "revenue": 50.0,
                "cost": 150.0,
                "gross_profit": 250.0,
                "expense": 350.0,
                "operating_income": 450.0,
                "non_operating": 550.0,
                "pretax_income": 650.0,
                "tax": 750.0,
                "net_income": 850.0,
                "per_share": 950.0
            }
        elif self.statement_type == "BalanceSheet":
            return {
                "current_assets": 100.0,
                "noncurrent_assets": 300.0,
                "current_liabilities": 500.0,
                "noncurrent_liabilities": 700.0,
                "equity": 900.0
            }
        return {}

    def infer_position(self, concept: str, existing_order: Dict[str, float]) -> float:
        """Infer semantic position for a new concept"""

        # Rule-based positioning
        section = self._classify_concept_section(concept)
        if section:
            return self._position_in_section(concept, section, existing_order)

        # Parent-child relationship positioning
        parent = self._find_parent_concept(concept, existing_order)
        if parent:
            return existing_order[parent] + 0.1  # Just after parent

        # Similarity-based positioning
        similar_concept = self._find_most_similar_concept(concept, existing_order)
        if similar_concept:
            return existing_order[similar_concept] + 0.1

        # Default to end
        return 999.0

    def _classify_concept_section(self, concept: str) -> Optional[str]:
        """Classify concept into financial statement section"""
        if not concept:
            return None

        concept_lower = concept.lower()

        if self.statement_type == "IncomeStatement":
            # Revenue indicators
            if any(term in concept_lower for term in ['revenue', 'sales']) and not any(term in concept_lower for term in ['cost', 'expense']):
                return "revenue"
            # Cost indicators
            elif any(term in concept_lower for term in ['cost of', 'cogs']):
                return "cost"
            # Gross profit
            elif 'gross profit' in concept_lower or 'gross margin' in concept_lower:
                return "gross_profit"
            # Operating expenses
            elif any(term in concept_lower for term in ['r&d', 'research', 'selling', 'administrative', 'marketing']) or ('expense' in concept_lower and 'tax' not in concept_lower):
                return "expense"
            # Operating income
            elif 'operating income' in concept_lower or 'operating profit' in concept_lower:
                return "operating_income"
            # Non-operating
            elif any(term in concept_lower for term in ['interest', 'other income', 'nonoperating']):
                return "non_operating"
            # Pre-tax income
            elif 'before tax' in concept_lower or 'pretax' in concept_lower:
                return "pretax_income"
            # Tax
            elif 'tax' in concept_lower and 'expense' in concept_lower:
                return "tax"
            # Net income
            elif 'net income' in concept_lower or 'net earnings' in concept_lower:
                return "net_income"
            # Per share
            elif any(term in concept_lower for term in ['per share', 'earnings per', 'shares outstanding']):
                return "per_share"

        elif self.statement_type == "BalanceSheet":
            if any(term in concept_lower for term in ['cash', 'receivable', 'inventory', 'prepaid']) or ('current' in concept_lower and 'asset' in concept_lower):
                return "current_assets"
            elif any(term in concept_lower for term in ['property', 'equipment', 'goodwill', 'intangible']) or ('asset' in concept_lower and 'current' not in concept_lower):
                return "noncurrent_assets"
            elif any(term in concept_lower for term in ['payable', 'accrued']) or ('current' in concept_lower and 'liabilit' in concept_lower):
                return "current_liabilities"
            elif 'debt' in concept_lower or ('liabilit' in concept_lower and 'current' not in concept_lower):
                return "noncurrent_liabilities"
            elif any(term in concept_lower for term in ['equity', 'stock', 'retained earnings', 'capital']):
                return "equity"

        return None

    def _position_in_section(self, concept: str, section: str, existing_order: Dict[str, float]) -> float:
        """Position concept within its identified section"""
        section_concepts = [
            (label, pos) for label, pos in existing_order.items()
            if self._classify_concept_section(label) == section
        ]

        if not section_concepts:
            # Section doesn't exist yet - use template defaults
            return self.section_defaults.get(section, 999.0)

        # Find best position within section
        section_concepts.sort(key=lambda x: x[1])  # Sort by position

        # Simple strategy: place at end of section
        last_pos = section_concepts[-1][1]
        return last_pos + 0.1

    def _find_parent_concept(self, concept: str, existing_order: Dict[str, float]) -> Optional[str]:
        """Find parent concept in hierarchy"""
        if not concept:
            return None

        # Look for hierarchical relationships
        # e.g., "Software Revenue" -> "Revenue"
        concept_words = set(concept.lower().split())

        candidates = []
        for existing_concept in existing_order.keys():
            if not existing_concept:
                continue

            existing_words = set(existing_concept.lower().split())

            # Check if existing concept is a parent (subset of words)
            # Also check for common patterns like "expense" being a parent of "X expense"
            if (existing_words.issubset(concept_words) and len(existing_words) < len(concept_words)) or \
               (existing_concept.lower() in concept.lower() and existing_concept.lower() != concept.lower()):
                candidates.append((existing_concept, len(existing_words)))

        if candidates:
            # Return the most specific parent (most words in common)
            return max(candidates, key=lambda x: x[1])[0]

        return None

    def _find_most_similar_concept(self, concept: str, existing_order: Dict[str, float]) -> Optional[str]:
        """Find most similar existing concept"""
        if not concept:
            return None

        best_match = None
        best_similarity = 0.0

        for existing_concept in existing_order.keys():
            if not existing_concept:
                continue

            similarity = fuzz.ratio(concept.lower(), existing_concept.lower()) / 100.0
            if similarity > best_similarity and similarity > 0.5:  # Minimum threshold
                best_similarity = similarity
                best_match = existing_concept

        return best_match


class StatementOrderingManager:
    """Manages consistent ordering across multi-period statements"""

    def __init__(self, statement_type: str):
        self.statement_type = statement_type
        self.templates = FinancialStatementTemplates()
        self.reference_strategy = ReferenceOrderingStrategy()
        self.semantic_positioning = SemanticPositioning(statement_type)

    def determine_ordering(self, statements: List[Dict]) -> Dict[str, float]:
        """
        Determine unified ordering for all concepts across statements.

        Returns:
            Dict mapping concept -> sort_key (float for interpolation)
        """
        if not statements:
            return {}

        all_concepts = self._extract_all_concepts(statements)

        # Strategy 1: Template-based ordering (highest priority)
        template_positioned = self._apply_template_ordering(all_concepts, statements)

        # Strategy 2: Reference statement ordering for non-template items
        reference_positioned = self._apply_reference_ordering(
            all_concepts, statements, template_positioned
        )

        # Strategy 3: Semantic positioning for orphan concepts
        semantic_positioned = self._apply_semantic_positioning(
            all_concepts, template_positioned, reference_positioned
        )

        # Strategy 4: Section-aware consolidation to maintain template groupings
        final_ordering = self._consolidate_section_ordering(
            semantic_positioned, template_positioned, statements
        )

        return final_ordering

    def _extract_all_concepts(self, statements: List[Dict]) -> set:
        """Extract all unique concepts from statements"""
        all_concepts = set()

        for statement in statements:
            for item in statement.get('data', []):
                concept = item.get('concept')
                label = item.get('label')
                if concept:
                    all_concepts.add(concept)
                if label:
                    all_concepts.add(label)

        return all_concepts

    def _apply_template_ordering(self, concepts: set, statements: List[Dict]) -> Dict[str, float]:
        """Apply template-based ordering for known concepts using concept-first matching"""
        template_order = {}

        # Build a mapping of concepts/labels to their actual XBRL concepts for better matching
        concept_to_xbrl = {}
        label_to_xbrl = {}

        for statement in statements:
            for item in statement.get('data', []):
                concept = item.get('concept')
                label = item.get('label')

                if concept and label:
                    concept_to_xbrl[concept] = concept
                    label_to_xbrl[label] = concept
                elif concept:
                    concept_to_xbrl[concept] = concept

        # Apply template ordering with concept priority
        for concept_or_label in concepts:
            # Determine if this is a concept or label
            is_concept = concept_or_label in concept_to_xbrl
            is_label = concept_or_label in label_to_xbrl

            # Get the actual XBRL concept and label for this item
            if is_concept:
                xbrl_concept = concept_or_label
                # Try to find the corresponding label
                corresponding_label = None
                for stmt in statements:
                    for item in stmt.get('data', []):
                        if item.get('concept') == concept_or_label:
                            corresponding_label = item.get('label')
                            break
                    if corresponding_label:
                        break
            elif is_label:
                xbrl_concept = label_to_xbrl.get(concept_or_label)
                corresponding_label = concept_or_label
            else:
                # Neither concept nor label found in mappings
                xbrl_concept = None
                corresponding_label = concept_or_label

            # Try concept-based matching first, then label-based
            template_pos = self.templates.get_template_position(
                item_concept=xbrl_concept,
                item_label=corresponding_label,
                statement_type=self.statement_type
            )

            if template_pos is not None:
                template_order[concept_or_label] = template_pos

                # IMPORTANT: If we found a template position for a concept,
                # also apply it to the corresponding label (and vice versa)
                # This ensures consistent ordering regardless of whether the
                # stitcher uses concept or label as the key
                if is_concept and corresponding_label and corresponding_label in concepts:
                    template_order[corresponding_label] = template_pos
                elif is_label and xbrl_concept and xbrl_concept in concepts:
                    template_order[xbrl_concept] = template_pos

        return template_order

    def _apply_reference_ordering(self, concepts: set, statements: List[Dict],
                                 template_positioned: Dict[str, float]) -> Dict[str, float]:
        """Apply reference statement ordering for remaining concepts"""
        reference_order = self.reference_strategy.establish_reference_order(statements)

        combined_order = template_positioned.copy()

        for concept in concepts:
            if concept not in combined_order and concept in reference_order:
                combined_order[concept] = reference_order[concept]

        return combined_order

    def _apply_semantic_positioning(self, concepts: set, template_positioned: Dict[str, float],
                                   reference_positioned: Dict[str, float]) -> Dict[str, float]:
        """Apply semantic positioning for orphan concepts"""
        final_order = reference_positioned.copy()

        # Position remaining concepts using semantic rules
        for concept in concepts:
            if concept not in final_order:
                semantic_pos = self.semantic_positioning.infer_position(concept, final_order)
                final_order[concept] = semantic_pos

        return final_order

    def _consolidate_section_ordering(self, semantic_positioned: Dict[str, float],
                                     template_positioned: Dict[str, float],
                                     statements: List[Dict]) -> Dict[str, float]:
        """
        Consolidate ordering to maintain template section groupings.

        This prevents reference ordering from breaking up logical template sections
        like per-share data (EPS + Shares Outstanding).
        """
        # Identify template sections and their concepts
        template_sections = self._identify_template_sections(template_positioned)

        # Separate template-positioned from non-template items
        template_items = {}
        non_template_items = {}

        for concept, position in semantic_positioned.items():
            if concept in template_positioned:
                template_items[concept] = position
            else:
                non_template_items[concept] = position

        # Re-organize to ensure section integrity
        final_ordering = {}

        # Process template sections in order
        for section_name, section_concepts in template_sections.items():
            # Find all template items (concepts and labels) that belong to this section
            section_template_items = []

            for concept in section_concepts:
                if concept in template_items:
                    section_template_items.append(concept)

            # Also find labels that correspond to concepts in this section
            # by checking if any template_items have the same template position
            section_template_positions = set()
            for concept in section_concepts:
                if concept in template_positioned:
                    section_template_positions.add(template_positioned[concept])

            # Find labels that have the same template positions as section concepts
            for item, pos in template_items.items():
                if pos in section_template_positions and item not in section_template_items:
                    section_template_items.append(item)

            if section_template_items:
                # Use the template base position for this section to ensure strong grouping
                section_base_pos = self._get_section_base_position(section_name)

                # For critical sections like per_share, use an even stronger override
                if section_name == "per_share":
                    # Force per-share items to be at the very end, regardless of hierarchy
                    section_base_pos = 950.0

                # Ensure all items in this section stay grouped together
                for i, item in enumerate(sorted(section_template_items,
                                               key=lambda x: template_items.get(x, 999.0))):
                    final_ordering[item] = section_base_pos + i * 0.1

        # Add non-template items, adjusting positions to avoid breaking template sections
        section_ranges = self._get_section_ranges(final_ordering, template_sections)

        for concept, position in non_template_items.items():
            # Find appropriate insertion point that doesn't break template sections
            adjusted_position = self._find_insertion_point(position, section_ranges)
            final_ordering[concept] = adjusted_position

        return final_ordering

    def _get_section_base_position(self, section_name: str) -> float:
        """Get the base position for a template section"""
        if self.statement_type == "IncomeStatement":
            template = self.templates.INCOME_STATEMENT_TEMPLATE
        elif self.statement_type == "BalanceSheet":
            template = self.templates.BALANCE_SHEET_TEMPLATE
        else:
            return 999.0

        for base_pos, name, _concepts in template:
            if name == section_name:
                return float(base_pos)

        return 999.0

    def _identify_template_sections(self, template_positioned: Dict[str, float]) -> Dict[str, List[str]]:
        """Identify which concepts belong to which template sections"""
        sections = {}

        # Get the template for this statement type
        if self.statement_type == "IncomeStatement":
            template = self.templates.INCOME_STATEMENT_TEMPLATE
        elif self.statement_type == "BalanceSheet":
            template = self.templates.BALANCE_SHEET_TEMPLATE
        else:
            return {}

        # Build mapping of concepts to sections
        for _base_pos, section_name, template_concepts in template:
            section_concepts = []

            for concept in template_positioned.keys():
                # Check if this concept matches any template concept in this section
                for template_concept in template_concepts:
                    if self._concept_matches_template(concept, template_concept):
                        section_concepts.append(concept)
                        break

            if section_concepts:
                sections[section_name] = section_concepts

        return sections

    def _concept_matches_template(self, concept: str, template_concept: str) -> bool:
        """Check if a concept matches a template concept"""
        # For XBRL concepts, do direct comparison
        if ':' in template_concept or '_gaap_' in template_concept.lower():
            return self._normalize_xbrl_concept(concept) == self._normalize_xbrl_concept(template_concept)

        # For labels, use fuzzy matching
        return self._labels_match(concept, template_concept)

    def _get_section_ranges(self, final_ordering: Dict[str, float],
                           template_sections: Dict[str, List[str]]) -> List[Tuple[float, float, str]]:
        """Get the position ranges occupied by each template section"""
        ranges = []

        for section_name, concepts in template_sections.items():
            section_positions = [final_ordering[c] for c in concepts if c in final_ordering]

            if section_positions:
                min_pos = min(section_positions)
                max_pos = max(section_positions)
                ranges.append((min_pos, max_pos, section_name))

        return sorted(ranges)

    def _find_insertion_point(self, desired_position: float,
                             section_ranges: List[Tuple[float, float, str]]) -> float:
        """Find appropriate insertion point that doesn't break template sections"""

        # Check if desired position conflicts with any template section
        for min_pos, max_pos, section_name in section_ranges:
            if min_pos <= desired_position <= max_pos:
                # Position conflicts with a template section
                # Place it just before the section (unless it should logically be after)

                # Special handling for per-share section
                if section_name == "per_share" and desired_position < min_pos:
                    # Items that should come before per-share data
                    return min_pos - 1.0
                else:
                    # Place after the section
                    return max_pos + 1.0

        # No conflicts, use desired position
        return desired_position

    def _normalize_xbrl_concept(self, concept: str) -> str:
        """Delegate to templates class for concept normalization"""
        return self.templates._normalize_xbrl_concept(concept)

    def _labels_match(self, label1: str, label2: str) -> bool:
        """Delegate to templates class for label matching"""
        return self.templates._labels_match(label1, label2)