Files
2025-12-09 12:13:01 +01:00

834 lines
33 KiB
Python

"""
XBRL Statement Ordering - Intelligent Ordering for Multi-Period Statements
This module provides consistent ordering for financial statements across multiple periods
by combining template-based, reference-based, and semantic positioning strategies.
"""
import re
from enum import Enum
from typing import Dict, List, Optional, Tuple
try:
from rapidfuzz import fuzz
except ImportError:
# Fallback to difflib if rapidfuzz is not available
from difflib import SequenceMatcher
class fuzz:
@staticmethod
def ratio(s1: str, s2: str) -> float:
return SequenceMatcher(None, s1, s2).ratio() * 100
class StatementType(str, Enum):
"""Supported statement types for ordering"""
INCOME_STATEMENT = "IncomeStatement"
BALANCE_SHEET = "BalanceSheet"
CASH_FLOW = "CashFlowStatement"
EQUITY = "StatementOfEquity"
class FinancialStatementTemplates:
"""Canonical ordering templates for financial statements based on XBRL concepts"""
INCOME_STATEMENT_TEMPLATE = [
# Revenue Section (0-99)
(0, "revenue_section", [
# Product/Service Revenue Components
"us-gaap:SalesRevenueGoodsNet",
"us-gaap:ProductSales",
"us-gaap:SalesRevenueServicesNet",
"us-gaap:SubscriptionRevenue",
# Contract Revenue
"us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax",
"us-gaap:RevenueFromContractWithCustomerIncludingAssessedTax",
# Total Revenue
"us-gaap:Revenue",
"us-gaap:Revenues",
"us-gaap:SalesRevenueNet",
"us-gaap:OperatingRevenue"
]),
# Cost Section (100-199)
(100, "cost_section", [
"us-gaap:CostOfRevenueAbstract", # Abstract
"us-gaap:CostOfRevenue", # Total
"us-gaap:CostOfGoodsSold",
"us-gaap:CostOfGoodsAndServicesSold",
"us-gaap:CostOfSales",
"us-gaap:DirectOperatingCosts",
"us-gaap:CostsAndExpenses"
]),
# Gross Profit (200-299)
(200, "gross_profit", [
"us-gaap:GrossProfit"
]),
# Operating Expenses (300-399)
(300, "operating_expenses", [
# R&D Expenses
"us-gaap:ResearchAndDevelopmentCosts",
"us-gaap:ResearchAndDevelopmentExpense",
# SG&A Expenses
"us-gaap:SellingGeneralAndAdministrativeExpense",
"us-gaap:GeneralAndAdministrativeExpense",
"us-gaap:AdministrativeExpense",
"us-gaap:SellingAndMarketingExpense",
"us-gaap:SellingExpense",
"us-gaap:MarketingExpense",
"us-gaap:AdvertisingExpense",
# Total Operating Expenses
"us-gaap:NoninterestExpense",
"us-gaap:OperatingCostsAndExpenses",
"us-gaap:OperatingExpenses"
]),
# Operating Income (400-499)
(400, "operating_income", [
"us-gaap:OperatingIncomeLoss",
"us-gaap:OperatingIncome",
"us-gaap:IncomeLossFromContinuingOperationsBeforeInterestAndTaxes"
]),
# Non-Operating (500-599)
(500, "non_operating", [
"us-gaap:InterestIncomeExpenseNet",
"us-gaap:InterestAndDebtExpense",
"us-gaap:InterestExpense",
"us-gaap:InterestExpenseNonoperating", # ADBE uses this for non-operating interest expense
"us-gaap:InterestIncome",
"us-gaap:InvestmentIncomeInterest", # NVIDIA uses this variant
"us-gaap:OtherNonoperatingIncomeExpense",
"us-gaap:NonoperatingIncomeExpense",
"orcl:NonoperatingIncomeExpenseIncludingEliminationOfNetIncomeLossAttributableToNoncontrollingInterests"
]),
# Pre-Tax Income (600-699)
(600, "pretax_income", [
"us-gaap:IncomeLossBeforeIncomeTaxes",
"us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxes",
"us-gaap:IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest",
"orcl:IncomeLossFromContinuingOperationsIncludingNoncontrollingInterestBeforeIncomeTaxesExtraordinaryItems"
]),
# Tax (700-799)
(700, "tax", [
"us-gaap:IncomeTaxesPaidNet",
"us-gaap:IncomeTaxExpenseBenefit"
]),
# Net Income (800-899)
(800, "net_income", [
"us-gaap:IncomeLossFromContinuingOperationsIncludingPortionAttributableToNoncontrollingInterest",
"us-gaap:IncomeLossFromContinuingOperations",
"us-gaap:NetIncome",
"us-gaap:NetIncomeLoss",
"us-gaap:ProfitLoss",
"us-gaap:NetIncomeLossAttributableToNonredeemableNoncontrollingInterest",
"us-gaap:NetIncomeLossAttributableToNoncontrollingInterest"
]),
# Per Share Data (900-999)
(900, "per_share", [
"us-gaap:EarningsPerShareAbstract",
"us-gaap:EarningsPerShareBasic",
"us-gaap:EarningsPerShareDiluted",
"us-gaap:WeightedAverageNumberOfSharesOutstandingAbstract",
"us-gaap:WeightedAverageNumberOfSharesOutstandingBasic",
"us-gaap:WeightedAverageNumberOfDilutedSharesOutstanding"
])
]
BALANCE_SHEET_TEMPLATE = [
# Current Assets (0-199)
(0, "current_assets", [
"Cash and Cash Equivalents",
"Cash",
"Short-term Investments",
"Marketable Securities",
"Accounts Receivable",
"Trade Receivables",
"Inventory",
"Prepaid Expenses",
"Other Current Assets",
"Total Current Assets"
]),
# Non-Current Assets (200-399)
(200, "noncurrent_assets", [
"Property, Plant and Equipment",
"Property and Equipment",
"Long-term Investments",
"Goodwill",
"Intangible Assets",
"Other Non-current Assets",
"Total Non-current Assets",
"Total Assets"
]),
# Current Liabilities (400-599)
(400, "current_liabilities", [
"Accounts Payable",
"Trade Payables",
"Accrued Liabilities",
"Accrued Expenses",
"Short-term Debt",
"Current Portion of Long-term Debt",
"Other Current Liabilities",
"Total Current Liabilities"
]),
# Non-Current Liabilities (600-799)
(600, "noncurrent_liabilities", [
"Long-term Debt",
"Deferred Revenue",
"Deferred Tax Liabilities",
"Other Non-current Liabilities",
"Total Non-current Liabilities",
"Total Liabilities"
]),
# Equity (800-999)
(800, "equity", [
"Common Stock",
"Additional Paid-in Capital",
"Retained Earnings",
"Accumulated Other Comprehensive Income",
"Treasury Stock",
"Total Stockholders' Equity",
"Total Shareholders' Equity",
"Total Equity"
])
]
def get_template_position(self, item_concept: str, item_label: str, statement_type: str) -> Optional[float]:
"""
Get template position for an item, prioritizing concept-based matching over label matching.
Args:
item_concept: The XBRL concept (e.g., "us-gaap:Revenue")
item_label: The display label (e.g., "Contract Revenue")
statement_type: Type of statement ("IncomeStatement", "BalanceSheet", etc.)
Returns:
Float position in template, or None if no match found
"""
# Handle different statement type formats
if statement_type == "IncomeStatement":
template_name = "INCOME_STATEMENT_TEMPLATE"
elif statement_type == "BalanceSheet":
template_name = "BALANCE_SHEET_TEMPLATE"
else:
template_name = f"{statement_type.upper()}_TEMPLATE"
template = getattr(self, template_name, None)
if not template:
return None
# Strategy 1: Direct concept matching (highest priority)
if item_concept:
normalized_concept = self._normalize_xbrl_concept(item_concept)
for base_pos, _section_name, template_concepts in template:
for i, template_concept in enumerate(template_concepts):
template_normalized = self._normalize_xbrl_concept(template_concept)
if normalized_concept == template_normalized:
return float(base_pos + i)
# Strategy 2: Label-based matching as fallback (for compatibility)
if item_label:
for base_pos, _section_name, template_concepts in template:
for i, template_concept in enumerate(template_concepts):
if self._labels_match(item_label, template_concept):
return float(base_pos + i)
return None
def _normalize_xbrl_concept(self, concept: str) -> str:
"""
Normalize XBRL concept for matching.
Handles variations in concept format:
- "us-gaap:Revenue" vs "us-gaap_Revenue"
- Case sensitivity
- Namespace prefixes
"""
if not concept:
return ""
# Normalize separators (: vs _)
normalized = concept.lower()
normalized = normalized.replace(':', '_')
# Handle common namespace variations
# us-gaap, usgaap, gaap all should match
if normalized.startswith('us-gaap_') or normalized.startswith('usgaap_'):
normalized = 'us-gaap_' + normalized.split('_', 1)[1]
elif normalized.startswith('gaap_'):
normalized = 'us-gaap_' + normalized.split('_', 1)[1]
return normalized
def _labels_match(self, label1: str, label2: str) -> bool:
"""Check if two labels represent the same financial item (fallback for non-concept matching)"""
if not label1 or not label2:
return False
# For XBRL concepts in templates, don't try to match against labels
if ':' in label2 or '_gaap_' in label2.lower():
return False
# Use existing normalization logic for label matching
norm1 = self._normalize_concept(label1)
norm2 = self._normalize_concept(label2)
# Exact match
if norm1 == norm2:
return True
# Fuzzy matching for similar concepts
similarity = fuzz.ratio(norm1, norm2) / 100.0
return similarity > 0.7
def _concepts_match(self, concept1: str, concept2: str) -> bool:
"""Check if two concepts represent the same financial item"""
# Normalize for comparison
norm1 = self._normalize_concept(concept1)
norm2 = self._normalize_concept(concept2)
# Exact match
if norm1 == norm2:
return True
# Fuzzy matching for similar concepts
similarity = fuzz.ratio(norm1, norm2) / 100.0
return similarity > 0.7 # Lowered threshold for better matching
def _normalize_concept(self, concept: str) -> str:
"""Normalize concept for comparison"""
if not concept:
return ""
# Remove common variations
normalized = concept.lower()
normalized = re.sub(r'\s+', ' ', normalized) # Normalize whitespace
normalized = re.sub(r'[,\.]', '', normalized) # Remove punctuation
normalized = re.sub(r'\(.*?\)', '', normalized) # Remove parenthetical
normalized = re.sub(r'\bexpense\b', '', normalized) # Remove 'expense' suffix
normalized = re.sub(r'\bincome\b', '', normalized) # Remove 'income' suffix for matching
return normalized.strip()
class ReferenceOrderingStrategy:
"""Extract ordering from reference statement"""
def establish_reference_order(self, statements: List[Dict]) -> Dict[str, float]:
"""Establish reference ordering from best available statement"""
if not statements:
return {}
# Strategy: Use most recent statement (statements are ordered newest first)
reference_statement = statements[0]
reference_order = {}
for i, item in enumerate(reference_statement.get('data', [])):
concept = item.get('concept')
label = item.get('label')
if concept:
# Store by both concept ID and label for flexibility
reference_order[concept] = float(i)
if label:
reference_order[label] = float(i)
return reference_order
class SemanticPositioning:
"""Position concepts based on financial statement semantics"""
def __init__(self, statement_type: str):
self.statement_type = statement_type
self.section_defaults = self._get_section_defaults()
def _get_section_defaults(self) -> Dict[str, float]:
"""Default positions for each section when no other guidance available"""
if self.statement_type == "IncomeStatement":
return {
"revenue": 50.0,
"cost": 150.0,
"gross_profit": 250.0,
"expense": 350.0,
"operating_income": 450.0,
"non_operating": 550.0,
"pretax_income": 650.0,
"tax": 750.0,
"net_income": 850.0,
"per_share": 950.0
}
elif self.statement_type == "BalanceSheet":
return {
"current_assets": 100.0,
"noncurrent_assets": 300.0,
"current_liabilities": 500.0,
"noncurrent_liabilities": 700.0,
"equity": 900.0
}
return {}
def infer_position(self, concept: str, existing_order: Dict[str, float]) -> float:
"""Infer semantic position for a new concept"""
# Rule-based positioning
section = self._classify_concept_section(concept)
if section:
return self._position_in_section(concept, section, existing_order)
# Parent-child relationship positioning
parent = self._find_parent_concept(concept, existing_order)
if parent:
return existing_order[parent] + 0.1 # Just after parent
# Similarity-based positioning
similar_concept = self._find_most_similar_concept(concept, existing_order)
if similar_concept:
return existing_order[similar_concept] + 0.1
# Default to end
return 999.0
def _classify_concept_section(self, concept: str) -> Optional[str]:
"""Classify concept into financial statement section"""
if not concept:
return None
concept_lower = concept.lower()
if self.statement_type == "IncomeStatement":
# Revenue indicators
if any(term in concept_lower for term in ['revenue', 'sales']) and not any(term in concept_lower for term in ['cost', 'expense']):
return "revenue"
# Cost indicators
elif any(term in concept_lower for term in ['cost of', 'cogs']):
return "cost"
# Gross profit
elif 'gross profit' in concept_lower or 'gross margin' in concept_lower:
return "gross_profit"
# Operating expenses
elif any(term in concept_lower for term in ['r&d', 'research', 'selling', 'administrative', 'marketing']) or ('expense' in concept_lower and 'tax' not in concept_lower):
return "expense"
# Operating income
elif 'operating income' in concept_lower or 'operating profit' in concept_lower:
return "operating_income"
# Non-operating
elif any(term in concept_lower for term in ['interest', 'other income', 'nonoperating']):
return "non_operating"
# Pre-tax income
elif 'before tax' in concept_lower or 'pretax' in concept_lower:
return "pretax_income"
# Tax
elif 'tax' in concept_lower and 'expense' in concept_lower:
return "tax"
# Net income
elif 'net income' in concept_lower or 'net earnings' in concept_lower:
return "net_income"
# Per share
elif any(term in concept_lower for term in ['per share', 'earnings per', 'shares outstanding']):
return "per_share"
elif self.statement_type == "BalanceSheet":
if any(term in concept_lower for term in ['cash', 'receivable', 'inventory', 'prepaid']) or ('current' in concept_lower and 'asset' in concept_lower):
return "current_assets"
elif any(term in concept_lower for term in ['property', 'equipment', 'goodwill', 'intangible']) or ('asset' in concept_lower and 'current' not in concept_lower):
return "noncurrent_assets"
elif any(term in concept_lower for term in ['payable', 'accrued']) or ('current' in concept_lower and 'liabilit' in concept_lower):
return "current_liabilities"
elif 'debt' in concept_lower or ('liabilit' in concept_lower and 'current' not in concept_lower):
return "noncurrent_liabilities"
elif any(term in concept_lower for term in ['equity', 'stock', 'retained earnings', 'capital']):
return "equity"
return None
def _position_in_section(self, concept: str, section: str, existing_order: Dict[str, float]) -> float:
"""Position concept within its identified section"""
section_concepts = [
(label, pos) for label, pos in existing_order.items()
if self._classify_concept_section(label) == section
]
if not section_concepts:
# Section doesn't exist yet - use template defaults
return self.section_defaults.get(section, 999.0)
# Find best position within section
section_concepts.sort(key=lambda x: x[1]) # Sort by position
# Simple strategy: place at end of section
last_pos = section_concepts[-1][1]
return last_pos + 0.1
def _find_parent_concept(self, concept: str, existing_order: Dict[str, float]) -> Optional[str]:
"""Find parent concept in hierarchy"""
if not concept:
return None
# Look for hierarchical relationships
# e.g., "Software Revenue" -> "Revenue"
concept_words = set(concept.lower().split())
candidates = []
for existing_concept in existing_order.keys():
if not existing_concept:
continue
existing_words = set(existing_concept.lower().split())
# Check if existing concept is a parent (subset of words)
# Also check for common patterns like "expense" being a parent of "X expense"
if (existing_words.issubset(concept_words) and len(existing_words) < len(concept_words)) or \
(existing_concept.lower() in concept.lower() and existing_concept.lower() != concept.lower()):
candidates.append((existing_concept, len(existing_words)))
if candidates:
# Return the most specific parent (most words in common)
return max(candidates, key=lambda x: x[1])[0]
return None
def _find_most_similar_concept(self, concept: str, existing_order: Dict[str, float]) -> Optional[str]:
"""Find most similar existing concept"""
if not concept:
return None
best_match = None
best_similarity = 0.0
for existing_concept in existing_order.keys():
if not existing_concept:
continue
similarity = fuzz.ratio(concept.lower(), existing_concept.lower()) / 100.0
if similarity > best_similarity and similarity > 0.5: # Minimum threshold
best_similarity = similarity
best_match = existing_concept
return best_match
class StatementOrderingManager:
"""Manages consistent ordering across multi-period statements"""
def __init__(self, statement_type: str):
self.statement_type = statement_type
self.templates = FinancialStatementTemplates()
self.reference_strategy = ReferenceOrderingStrategy()
self.semantic_positioning = SemanticPositioning(statement_type)
def determine_ordering(self, statements: List[Dict]) -> Dict[str, float]:
"""
Determine unified ordering for all concepts across statements.
Returns:
Dict mapping concept -> sort_key (float for interpolation)
"""
if not statements:
return {}
all_concepts = self._extract_all_concepts(statements)
# Strategy 1: Template-based ordering (highest priority)
template_positioned = self._apply_template_ordering(all_concepts, statements)
# Strategy 2: Reference statement ordering for non-template items
reference_positioned = self._apply_reference_ordering(
all_concepts, statements, template_positioned
)
# Strategy 3: Semantic positioning for orphan concepts
semantic_positioned = self._apply_semantic_positioning(
all_concepts, template_positioned, reference_positioned
)
# Strategy 4: Section-aware consolidation to maintain template groupings
final_ordering = self._consolidate_section_ordering(
semantic_positioned, template_positioned, statements
)
return final_ordering
def _extract_all_concepts(self, statements: List[Dict]) -> set:
"""Extract all unique concepts from statements"""
all_concepts = set()
for statement in statements:
for item in statement.get('data', []):
concept = item.get('concept')
label = item.get('label')
if concept:
all_concepts.add(concept)
if label:
all_concepts.add(label)
return all_concepts
def _apply_template_ordering(self, concepts: set, statements: List[Dict]) -> Dict[str, float]:
"""Apply template-based ordering for known concepts using concept-first matching"""
template_order = {}
# Build a mapping of concepts/labels to their actual XBRL concepts for better matching
concept_to_xbrl = {}
label_to_xbrl = {}
for statement in statements:
for item in statement.get('data', []):
concept = item.get('concept')
label = item.get('label')
if concept and label:
concept_to_xbrl[concept] = concept
label_to_xbrl[label] = concept
elif concept:
concept_to_xbrl[concept] = concept
# Apply template ordering with concept priority
for concept_or_label in concepts:
# Determine if this is a concept or label
is_concept = concept_or_label in concept_to_xbrl
is_label = concept_or_label in label_to_xbrl
# Get the actual XBRL concept and label for this item
if is_concept:
xbrl_concept = concept_or_label
# Try to find the corresponding label
corresponding_label = None
for stmt in statements:
for item in stmt.get('data', []):
if item.get('concept') == concept_or_label:
corresponding_label = item.get('label')
break
if corresponding_label:
break
elif is_label:
xbrl_concept = label_to_xbrl.get(concept_or_label)
corresponding_label = concept_or_label
else:
# Neither concept nor label found in mappings
xbrl_concept = None
corresponding_label = concept_or_label
# Try concept-based matching first, then label-based
template_pos = self.templates.get_template_position(
item_concept=xbrl_concept,
item_label=corresponding_label,
statement_type=self.statement_type
)
if template_pos is not None:
template_order[concept_or_label] = template_pos
# IMPORTANT: If we found a template position for a concept,
# also apply it to the corresponding label (and vice versa)
# This ensures consistent ordering regardless of whether the
# stitcher uses concept or label as the key
if is_concept and corresponding_label and corresponding_label in concepts:
template_order[corresponding_label] = template_pos
elif is_label and xbrl_concept and xbrl_concept in concepts:
template_order[xbrl_concept] = template_pos
return template_order
def _apply_reference_ordering(self, concepts: set, statements: List[Dict],
template_positioned: Dict[str, float]) -> Dict[str, float]:
"""Apply reference statement ordering for remaining concepts"""
reference_order = self.reference_strategy.establish_reference_order(statements)
combined_order = template_positioned.copy()
for concept in concepts:
if concept not in combined_order and concept in reference_order:
combined_order[concept] = reference_order[concept]
return combined_order
def _apply_semantic_positioning(self, concepts: set, template_positioned: Dict[str, float],
reference_positioned: Dict[str, float]) -> Dict[str, float]:
"""Apply semantic positioning for orphan concepts"""
final_order = reference_positioned.copy()
# Position remaining concepts using semantic rules
for concept in concepts:
if concept not in final_order:
semantic_pos = self.semantic_positioning.infer_position(concept, final_order)
final_order[concept] = semantic_pos
return final_order
def _consolidate_section_ordering(self, semantic_positioned: Dict[str, float],
template_positioned: Dict[str, float],
statements: List[Dict]) -> Dict[str, float]:
"""
Consolidate ordering to maintain template section groupings.
This prevents reference ordering from breaking up logical template sections
like per-share data (EPS + Shares Outstanding).
"""
# Identify template sections and their concepts
template_sections = self._identify_template_sections(template_positioned)
# Separate template-positioned from non-template items
template_items = {}
non_template_items = {}
for concept, position in semantic_positioned.items():
if concept in template_positioned:
template_items[concept] = position
else:
non_template_items[concept] = position
# Re-organize to ensure section integrity
final_ordering = {}
# Process template sections in order
for section_name, section_concepts in template_sections.items():
# Find all template items (concepts and labels) that belong to this section
section_template_items = []
for concept in section_concepts:
if concept in template_items:
section_template_items.append(concept)
# Also find labels that correspond to concepts in this section
# by checking if any template_items have the same template position
section_template_positions = set()
for concept in section_concepts:
if concept in template_positioned:
section_template_positions.add(template_positioned[concept])
# Find labels that have the same template positions as section concepts
for item, pos in template_items.items():
if pos in section_template_positions and item not in section_template_items:
section_template_items.append(item)
if section_template_items:
# Use the template base position for this section to ensure strong grouping
section_base_pos = self._get_section_base_position(section_name)
# For critical sections like per_share, use an even stronger override
if section_name == "per_share":
# Force per-share items to be at the very end, regardless of hierarchy
section_base_pos = 950.0
# Ensure all items in this section stay grouped together
for i, item in enumerate(sorted(section_template_items,
key=lambda x: template_items.get(x, 999.0))):
final_ordering[item] = section_base_pos + i * 0.1
# Add non-template items, adjusting positions to avoid breaking template sections
section_ranges = self._get_section_ranges(final_ordering, template_sections)
for concept, position in non_template_items.items():
# Find appropriate insertion point that doesn't break template sections
adjusted_position = self._find_insertion_point(position, section_ranges)
final_ordering[concept] = adjusted_position
return final_ordering
def _get_section_base_position(self, section_name: str) -> float:
"""Get the base position for a template section"""
if self.statement_type == "IncomeStatement":
template = self.templates.INCOME_STATEMENT_TEMPLATE
elif self.statement_type == "BalanceSheet":
template = self.templates.BALANCE_SHEET_TEMPLATE
else:
return 999.0
for base_pos, name, _concepts in template:
if name == section_name:
return float(base_pos)
return 999.0
def _identify_template_sections(self, template_positioned: Dict[str, float]) -> Dict[str, List[str]]:
"""Identify which concepts belong to which template sections"""
sections = {}
# Get the template for this statement type
if self.statement_type == "IncomeStatement":
template = self.templates.INCOME_STATEMENT_TEMPLATE
elif self.statement_type == "BalanceSheet":
template = self.templates.BALANCE_SHEET_TEMPLATE
else:
return {}
# Build mapping of concepts to sections
for _base_pos, section_name, template_concepts in template:
section_concepts = []
for concept in template_positioned.keys():
# Check if this concept matches any template concept in this section
for template_concept in template_concepts:
if self._concept_matches_template(concept, template_concept):
section_concepts.append(concept)
break
if section_concepts:
sections[section_name] = section_concepts
return sections
def _concept_matches_template(self, concept: str, template_concept: str) -> bool:
"""Check if a concept matches a template concept"""
# For XBRL concepts, do direct comparison
if ':' in template_concept or '_gaap_' in template_concept.lower():
return self._normalize_xbrl_concept(concept) == self._normalize_xbrl_concept(template_concept)
# For labels, use fuzzy matching
return self._labels_match(concept, template_concept)
def _get_section_ranges(self, final_ordering: Dict[str, float],
template_sections: Dict[str, List[str]]) -> List[Tuple[float, float, str]]:
"""Get the position ranges occupied by each template section"""
ranges = []
for section_name, concepts in template_sections.items():
section_positions = [final_ordering[c] for c in concepts if c in final_ordering]
if section_positions:
min_pos = min(section_positions)
max_pos = max(section_positions)
ranges.append((min_pos, max_pos, section_name))
return sorted(ranges)
def _find_insertion_point(self, desired_position: float,
section_ranges: List[Tuple[float, float, str]]) -> float:
"""Find appropriate insertion point that doesn't break template sections"""
# Check if desired position conflicts with any template section
for min_pos, max_pos, section_name in section_ranges:
if min_pos <= desired_position <= max_pos:
# Position conflicts with a template section
# Place it just before the section (unless it should logically be after)
# Special handling for per-share section
if section_name == "per_share" and desired_position < min_pos:
# Items that should come before per-share data
return min_pos - 1.0
else:
# Place after the section
return max_pos + 1.0
# No conflicts, use desired position
return desired_position
def _normalize_xbrl_concept(self, concept: str) -> str:
"""Delegate to templates class for concept normalization"""
return self.templates._normalize_xbrl_concept(concept)
def _labels_match(self, label1: str, label2: str) -> bool:
"""Delegate to templates class for label matching"""
return self.templates._labels_match(label1, label2)