Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/README.md
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/README.md
@@ -0,0 +1,44 @@
+# XBRL2 Standardization
+
+This package provides functionality for standardizing XBRL concepts across different company filings.
+
+## Overview
+
+The standardization module maps company-specific XBRL concepts to standardized concept names, 
+enabling consistent presentation of financial statements regardless of the filing entity.
+
+This is particularly useful for:
+- Comparing financial data across different companies
+- Building standardized reports and visualizations
+- Creating consistent financial datasets for analysis
+
+## Components
+
+- `StandardConcept`: An enumeration of standard financial statement concepts
+- `MappingStore`: Storage for mappings between company-specific and standard concepts
+- `ConceptMapper`: Maps company-specific concepts to standard concepts using various techniques
+- `standardize_statement`: Function to standardize a statement's labels
+
+## Usage
+
+```python
+from edgar.xbrl.standardization import StandardConcept, initialize_default_mappings, ConceptMapper,
+    standardize_statement
+
+# Get the default mappings
+store = initialize_default_mappings()
+
+# Create a mapper
+mapper = ConceptMapper(store)
+
+# Standardize a statement
+standardized_data = standardize_statement(statement_data, mapper)
+```
+
+## Concept Mappings
+
+The standardized concept mappings are stored in the `concept_mappings.json` file included 
+in this package. This file maps standard concept names to lists of company-specific concept IDs.
+
+The file is automatically loaded when initializing the `MappingStore` and can be extended 
+with new mappings as needed.
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/init.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/init.py
@@ -0,0 +1,17 @@
+"""
+XBRL concept standardization package.
+
+This package provides functionality to map company-specific XBRL concepts
+to standardized concept names, enabling consistent presentation of financial
+statements regardless of the filing entity.
+"""
+
+from edgar.xbrl.standardization.core import ConceptMapper, MappingStore, StandardConcept, initialize_default_mappings, standardize_statement
+
+__all__ = [
+    'StandardConcept',
+    'MappingStore', 
+    'ConceptMapper', 
+    'standardize_statement',
+    'initialize_default_mappings'
+]
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/pycache/init.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/pycache/init.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/pycache/core.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/pycache/core.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/company_mappings/brka_mappings.json
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/company_mappings/brka_mappings.json
@@ -0,0 +1,21 @@
+{
+  "concept_mappings": {
+    "Sales and Service Revenue": [
+      "brka_SalesAndServiceRevenue"
+    ]
+  },
+  "hierarchy_rules": {
+    "Revenue": {
+      "components": [
+        "Sales and Service Revenue",
+        "Operating Lease Revenue"
+      ],
+      "description": "Total revenue comprises sales/service revenue and operating lease income for holding company"
+    }
+  },
+  "business_context": {
+    "entity_type": "holding_company",
+    "industry": "diversified_conglomerate", 
+    "description": "Berkshire Hathaway operates diverse businesses including insurance, utilities, railroads, and manufacturing"
+  }
+}
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/company_mappings/msft_mappings.json
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/company_mappings/msft_mappings.json
@@ -0,0 +1,64 @@
+{
+  "entity_info": {
+    "name": "Microsoft Corporation",
+    "cik": "0000789019",
+    "ticker": "MSFT",
+    "description": "Microsoft-specific concept mappings for unique business terminology"
+  },
+  
+  "concept_mappings": {
+    "_comment_msft_revenue": "Microsoft uses specific revenue categorization that differs from standard tech companies",
+    
+    "Product Revenue": [
+      "msft_ProductRevenue",
+      "msft_WindowsCommercialRevenue",
+      "msft_WindowsConsumerRevenue",
+      "msft_OfficeCommercialRevenue"
+    ],
+    
+    "Service Revenue": [
+      "msft_ServiceRevenue", 
+      "msft_CloudServicesRevenue",
+      "msft_ConsultingServicesRevenue"
+    ],
+    
+    "Subscription Revenue": [
+      "msft_Office365CommercialRevenue",
+      "msft_Office365ConsumerRevenue",
+      "msft_DynamicsRevenue"
+    ],
+    
+    "Platform Revenue": [
+      "msft_AzureRevenue",
+      "msft_XboxContentAndServicesRevenue"
+    ],
+    
+    "_comment_msft_expenses": "Microsoft has unique expense categorizations for sales and marketing vs G&A",
+    
+    "Sales and Marketing Expense": [
+      "msft_SalesAndMarketingExpense",
+      "msft_AdvertisingAndPromotionExpense"
+    ],
+    
+    "Technical Support Expense": [
+      "msft_TechnicalSupportExpense",
+      "msft_CustomerSupportExpense"
+    ]
+  },
+  
+  "hierarchy_rules": {
+    "_comment": "Rules for handling Microsoft-specific hierarchical relationships",
+    
+    "revenue_hierarchy": {
+      "parent": "Revenue",
+      "children": ["Product Revenue", "Service Revenue", "Subscription Revenue", "Platform Revenue"],
+      "calculation_rule": "sum"
+    },
+    
+    "expense_hierarchy": {
+      "parent": "Operating Expenses", 
+      "children": ["Sales and Marketing Expense", "Technical Support Expense"],
+      "calculation_rule": "sum"
+    }
+  }
+}
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/company_mappings/tsla_mappings.json
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/company_mappings/tsla_mappings.json
@@ -0,0 +1,54 @@
+{
+  "metadata": {
+    "entity_identifier": "tsla",
+    "company_name": "Tesla, Inc.",
+    "cik": "1318605",
+    "priority": "high",
+    "created_date": "2024-06-25",
+    "last_updated": "2024-06-25",
+    "description": "Tesla-specific concept mappings to handle automotive, energy, and service revenue streams"
+  },
+  "concept_mappings": {
+    "Automotive Revenue": [
+      "tsla_AutomotiveRevenue",
+      "tsla_AutomotiveSales", 
+      "tsla_VehicleRevenue"
+    ],
+    "Automotive Leasing Revenue": [
+      "tsla_AutomotiveLeasing",
+      "tsla_AutomotiveLeasingRevenue",
+      "tsla_VehicleLeasingRevenue"
+    ],
+    "Energy Revenue": [
+      "tsla_EnergyGenerationAndStorageRevenue",
+      "tsla_EnergyRevenue",
+      "tsla_SolarRevenue",
+      "tsla_EnergyStorageRevenue"
+    ],
+    "Service Revenue": [
+      "tsla_ServicesAndOtherRevenue",
+      "tsla_ServiceRevenue",
+      "tsla_SuperchargerRevenue"
+    ]
+  },
+  "hierarchy_rules": {
+    "Revenue": {
+      "children": [
+        "Automotive Revenue",
+        "Energy Revenue",
+        "Service Revenue"
+      ]
+    },
+    "Automotive Revenue": {
+      "children": [
+        "Automotive Leasing Revenue"
+      ]
+    }
+  },
+  "business_context": {
+    "primary_revenue_streams": ["automotive", "energy", "services"],
+    "revenue_model": "product_and_service",
+    "key_metrics": ["vehicle_deliveries", "energy_deployments"],
+    "industry": "automotive_technology"
+  }
+}
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/concept_mappings.json
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/concept_mappings.json
@@ -0,0 +1,353 @@
+{
+  "_comment_revenue_hierarchy": "REVENUE HIERARCHY FIX: Separated total revenue from component revenue types to prevent duplicate labels. Contract and product revenue are components that should have distinct labels from total revenue.",
+  "Revenue": [
+    "us-gaap_Revenue",
+    "us-gaap_Revenues",
+    "us-gaap_SalesRevenueNet",
+    "us-gaap_OperatingRevenue"
+  ],
+  "Contract Revenue": [
+    "us-gaap_RevenueFromContractWithCustomerExcludingAssessedTax",
+    "us-gaap_RevenueFromContractWithCustomerIncludingAssessedTax"
+  ],
+  "Product Revenue": [
+    "us-gaap_SalesRevenueGoodsNet",
+    "us-gaap_ProductSales"
+  ],
+  "Operating Lease Revenue": [
+    "us-gaap_OperatingLeaseLeaseIncome"
+  ],
+  "_comment_cost_of_revenue_hierarchy": "COST OF REVENUE HIERARCHY FIX: Separated different cost types to prevent duplicate labels. Different business models (manufacturing, service, mixed) use different cost concepts that should have distinct labels for clarity.",
+  "Cost of Revenue": [
+    "us-gaap_CostOfRevenueAbstract"
+  ],
+  "Total Cost of Revenue": [
+    "us-gaap_CostOfRevenue"
+  ],
+  "Cost of Goods Sold": [
+    "us-gaap_CostOfGoodsSold"
+  ],
+  "Cost of Goods and Services Sold": [
+    "us-gaap_CostOfGoodsAndServicesSold"
+  ],
+  "Cost of Sales": [
+    "us-gaap_CostOfSales"
+  ],
+  "Cost of Goods and Services Excluding Depreciation": [
+    "us-gaap_CostOfGoodsAndServiceExcludingDepreciationDepletionAndAmortization"
+  ],
+  "Direct Operating Costs": [
+    "us-gaap_DirectOperatingCosts"
+  ],
+  "Costs and Expenses": [
+    "us-gaap_CostsAndExpenses"
+  ],
+  "Gross Profit": [
+    "us-gaap_GrossProfit"
+  ],
+  "Operating Expenses": [
+    "us-gaap_NoninterestExpense",
+    "us-gaap_OperatingCostsAndExpenses",
+    "us-gaap_OperatingExpenses"
+  ],
+  "Research and Development Expense": [
+    "us-gaap_ResearchAndDevelopmentCosts",
+    "us-gaap_ResearchAndDevelopmentExpense"
+  ],
+  "_comment_sga_hierarchy": "SG&A HIERARCHY FIX: Separated total SG&A from components to prevent duplicate labels. Previously all three concepts below mapped to 'Selling, General and Administrative Expense' causing confusion when companies report both total and components.",
+  "Selling, General and Administrative Expense": [
+    "us-gaap_SellingGeneralAndAdministrativeExpense"
+  ],
+  "General and Administrative Expense": [
+    "us-gaap_GeneralAndAdministrativeExpense",
+    "us-gaap_AdministrativeExpense"
+  ],
+  "Selling Expense": [
+    "us-gaap_SellingAndMarketingExpense",
+    "us-gaap_SellingExpense"
+  ],
+  "Marketing Expense": [
+    "us-gaap_MarketingExpense",
+    "us-gaap_AdvertisingExpense"
+  ],
+  "Operating Income": [
+    "us-gaap_OperatingIncomeLoss",
+    "us-gaap_OperatingIncome",
+    "us-gaap_IncomeLossFromContinuingOperationsBeforeInterestAndTaxes"
+  ],
+  "Nonoperating Income/Expense": [
+    "orcl_NonoperatingIncomeExpenseIncludingEliminationOfNetIncomeLossAttributableToNoncontrollingInterests",
+    "us-gaap_NonoperatingIncomeExpense"
+  ],
+  "Interest Expense": [
+    "us-gaap_InterestAndDebtExpense",
+    "us-gaap_InterestExpense",
+    "us-gaap_InterestIncomeExpenseNet"
+  ],
+    "Interest Expense (operating)": [
+    "us-gaap_InterestExpenseOperating"
+  ],
+  "Interest Expense (non-operating)": [
+    "us-gaap_InterestExpenseNonoperating"
+  ],
+  "_comment_income_before_tax_hierarchy": "INCOME BEFORE TAX HIERARCHY FIX: Separated total income before tax from component types to prevent duplicate labels. Continuing operations and extraordinary items are components that should have distinct labels.",
+  "Income Before Tax": [
+    "us-gaap_IncomeLossBeforeIncomeTaxes"
+  ],
+  "Income Before Tax from Continuing Operations": [
+    "us-gaap_IncomeLossFromContinuingOperationsBeforeIncomeTaxes",
+    "us-gaap_IncomeLossFromContinuingOperationsBeforeIncomeTaxesExtraordinaryItemsNoncontrollingInterest",
+    "orcl_IncomeLossFromContinuingOperationsIncludingNoncontrollingInterestBeforeIncomeTaxesExtraordinaryItems"
+  ],
+  "Income Tax Expense": [
+    "us-gaap_IncomeTaxesPaidNet",
+    "us-gaap_IncomeTaxExpenseBenefit"
+  ],
+  "_comment_net_income_hierarchy": "NET INCOME HIERARCHY FIX: Separated total net income from component income types to prevent duplicate labels. Continuing operations income and profit/loss are components that should have distinct labels from total net income.",
+  "Net Income": [
+    "us-gaap_NetIncome",
+    "us-gaap_NetIncomeLoss"
+  ],
+  "Net Income from Continuing Operations": [
+    "us-gaap_IncomeLossFromContinuingOperationsIncludingPortionAttributableToNoncontrollingInterest",
+    "us-gaap_IncomeLossFromContinuingOperations"
+  ],
+  "Profit or Loss": [
+    "us-gaap_ProfitLoss"
+  ],
+  "Net Income Attributable to Noncontrolling Interest": [
+    "us-gaap_NetIncomeLossAttributableToNonredeemableNoncontrollingInterest",
+    "us-gaap_NetIncomeLossAttributableToNoncontrollingInterest"
+  ],
+  "Basic Net Income Available to Common Shareholders": [
+    "us-gaap_NetIncomeLossAvailableToCommonStockholdersBasic"
+  ],
+    "Diluted Net Income Available to Common Shareholders": [
+    "us-gaap_NetIncomeLossAvailableToCommonStockholdersDiluted"
+  ],
+  "Accumulated Other Comprehensive Income/Loss": [
+    "us-gaap_AccumulatedOtherComprehensiveIncomeLossNetOfTax"
+  ],
+  "Earnings Per Share": [
+    "us-gaap_EarningsPerShareAbstract"
+  ],
+  "Earnings Per Share (Basic)": [
+    "us-gaap_EarningsPerShareBasic"
+  ],
+  "Earnings Per Share (Diluted)": [
+    "us-gaap_EarningsPerShareDiluted"
+  ],
+  "Shares Outstanding": [
+    "us-gaap_WeightedAverageNumberOfSharesOutstandingAbstract"
+  ],
+  "Shares Outstanding (Basic)": [
+    "us-gaap_WeightedAverageNumberOfSharesOutstandingBasic"
+  ],
+  "Shares Outstanding (Diluted)": [
+    "us-gaap_WeightedAverageNumberOfDilutedSharesOutstanding"
+  ],
+  "Cash and Cash Equivalents": [
+    "us-gaap_CashEquivalentsAtCarryingValue",
+    "us-gaap_Cash",
+    "us-gaap_CashAndCashEquivalentsAtCarryingValue",
+    "us-gaap_CashCashEquivalentsAndShortTermInvestments"
+  ],
+  "Accounts Receivable": [
+    "us-gaap_AccountsReceivableNet",
+    "us-gaap_ReceivablesNetCurrent",
+    "us-gaap_AccountsReceivableNetCurrent",
+    "us-gaap_AccountsReceivableGross"
+  ],
+  "Inventory": [
+    "us-gaap_InventoryGross",
+    "us-gaap_InventoryFinishedGoods",
+    "us-gaap_InventoryNet"
+  ],
+  "Prepaid Expenses": [
+    "us-gaap_PrepaidExpenseAndOtherAssetsCurrent",
+    "us-gaap_PrepaidExpenseCurrent"
+  ],
+  "Current Marketable Securities": [
+    "us-gaap_AvailableForSaleSecuritiesDebtSecuritiesCurrent",
+    "us-gaap_MarketableSecuritiesCurrent"
+  ],
+  "Non Current Marketable Securities": [
+    "us-gaap_MarketableSecuritiesNoncurrent"
+  ],
+  "Total Current Assets": [
+    "us-gaap_AssetsCurrent"
+  ],
+    "Total Non Current Assets": [
+    "us-gaap_AssetsNoncurrent"
+  ],
+  "Property, Plant and Equipment": [
+    "us-gaap_PropertyPlantAndEquipmentGross",
+    "us-gaap_PropertyPlantAndEquipmentNet",
+    "us-gaap_FixedAssets"
+  ],
+  "Goodwill": [
+    "us-gaap_Goodwill"
+  ],
+  "Intangible Assets": [
+    "us-gaap_IntangibleAssetsNetIncludingGoodwill",
+    "us-gaap_IntangibleAssetsNetExcludingGoodwill",
+    "us-gaap_FiniteLivedIntangibleAssetsNet"
+  ],
+  "Total Assets": [
+    "us-gaap_Assets",
+    "us-gaap_AssetsTotal"
+  ],
+  "Long-Term Investments": [
+    "us-gaap_LongTermInvestments"
+  ],
+  "Accounts Payable": [
+    "us-gaap_AccountsPayableCurrent",
+    "us-gaap_AccountsPayableTradeCurrent"
+  ],
+  "Accrued Liabilities": [
+    "us-gaap_OtherAccruedLiabilitiesCurrent",
+    "us-gaap_AccruedLiabilitiesCurrent",
+    "us-gaap_EmployeeRelatedLiabilitiesCurrent"
+  ],
+  "Short Term Debt": [
+    "us-gaap_DebtCurrent",
+    "us-gaap_ShortTermBorrowings",
+    "us-gaap_LongTermDebtCurrent"
+  ],
+  "Total Current Liabilities": [
+    "us-gaap_LiabilitiesCurrent"
+  ],
+    "Total Non Current Liabilities": [
+    "us-gaap_LiabilitiesNoncurrent"
+  ],
+  "Long Term Debt": [
+    "us-gaap_LongTermDebtAndCapitalLeaseObligations",
+    "us-gaap_LongTermDebt",
+    "us-gaap_LongTermBorrowings",
+    "us-gaap_LongTermDebtNoncurrent"
+  ],
+  "Notes Payable, Current": [
+    "us-gaap_NotesPayableCurrent"
+  ],
+  "Notes Payable, Non Current": [
+    "us-gaap_LongTermNotesAndLoans"
+  ],
+  "Deferred Revenue": [
+    "us-gaap_DeferredRevenueNoncurrent",
+    "us-gaap_DeferredRevenueCurrent",
+    "us-gaap_DeferredRevenue"
+  ],
+  "Total Liabilities": [
+    "us-gaap_LiabilitiesTotal",
+    "us-gaap_Liabilities"
+  ],
+  "Common Stock Shares Outstanding": [
+    "us-gaap_CommonStockSharesOutstanding"
+  ],
+  "Common Stock Shares Issued": [
+    "us-gaap_CommonStockSharesIssued"
+  ],
+  "Common Stock": [
+    "us-gaap_CommonStocksIncludingAdditionalPaidInCapital",
+    "us-gaap_StockholdersEquityCommonStock",
+    "us-gaap_CommonStockValue"
+  ],
+  "Preferred Stock": [
+    "us-gaap_PreferredStockValue"
+  ],
+  "Treasury Stock Common Value": [
+    "us-gaap_TreasuryStockCommonValue",
+    "us-gaap_TreasuryStockValue"
+  ],
+  "Retained Earnings": [
+    "us-gaap_RetainedEarnings",
+    "us-gaap_RetainedEarningsAccumulatedDeficit"
+  ],
+  "Minority Interest": [
+    "us-gaap_MinorityInterest",
+    "us-gaap_NoncontrollingInterest"
+  ],
+  "Total Stockholders' Equity": [
+    "us-gaap_EquityAttributableToParent",
+    "us-gaap_StockholdersEquity",
+    "us-gaap_StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest",
+    "us-gaap_StockholdersEquityAttributableToParent"
+  ],
+  "Total Liabilities and Stockholders' Equity": [
+    "us-gaap_LiabilitiesAndStockholdersEquity"
+  ],
+  "Net Cash from Operating Activities": [
+    "us-gaap_NetCashProvidedByUsedInOperatingActivities",
+    "us-gaap_NetCashProvidedByUsedInOperatingActivitiesContinuingOperations"
+  ],
+  "Net Cash from Investing Activities": [
+    "us-gaap_NetCashProvidedByUsedInInvestingActivities",
+    "us-gaap_NetCashProvidedByUsedInInvestingActivitiesContinuingOperations"
+  ],
+  "Net Cash from Financing Activities": [
+    "us-gaap_NetCashProvidedByUsedInFinancingActivitiesContinuingOperations",
+    "us-gaap_NetCashProvidedByUsedInFinancingActivities"
+  ],
+  "Net Change in Cash": [
+    "us-gaap_IncreaseDecreaseInCashAndCashEquivalents",
+    "us-gaap_CashAndCashEquivalentsPeriodIncreaseDecrease",
+    "us-gaap_CashCashEquivalentsRestrictedCashAndRestrictedCashEquivalentsPeriodIncreaseDecreaseIncludingExchangeRateEffect"
+  ],
+  "Payments for Property, Plant and Equipment": [
+    "us-gaap_PaymentsToAcquirePropertyPlantAndEquipment"
+  ],
+  "Payments of Dividends": [
+    "us-gaap_PaymentsOfDividends"
+  ],
+  "Tax Withholding for Share-Based Compensation": [
+    "us-gaap_PaymentsRelatedToTaxWithholdingForShareBasedCompensation"
+  ],
+  "Payments to Acquire Businesses": [
+    "us-gaap_PaymentsToAcquireBusinessesNetOfCashAcquired"
+  ],
+  "Proceeds from Issuance of Common Stock": [
+    "us-gaap_ProceedsFromIssuanceOfCommonStock"
+  ],
+  "Proceeds from Issuance of Long-Term Debt": [
+    "us-gaap_ProceedsFromIssuanceOfLongTermDebt"
+  ],
+  "Proceeds from Maturities, Prepayments and Calls of Securities": [
+    "us-gaap_ProceedsFromMaturitiesPrepaymentsAndCallsOfAvailableForSaleSecurities"
+  ],
+  "Proceeds from Sale and Maturity of Other Investments": [
+    "us-gaap_ProceedsFromSaleAndMaturityOfOtherInvestments"
+  ],
+  "Proceeds from Sale of Debt Securities, ": [
+    "us-gaap_ProceedsFromSaleOfAvailableForSaleSecuritiesDebt"
+  ],
+  "Proceeds from (Repayments of) Commercial Paper": [
+    "us-gaap_ProceedsFromRepaymentsOfCommercialPaper"
+  ],
+  "Other Assets": [
+    "us-gaap_OtherAssets"
+  ],
+  "Other Current Assets": [
+    "us-gaap_OtherAssetsCurrent"
+  ],
+  "Other Non Current Assets": [
+    "us-gaap_OtherAssetsNoncurrent"
+  ],
+  "Deferred Tax Assets": [
+    "us-gaap_DeferredIncomeTaxAssetsNet"
+  ],
+  "Other Liabilities": [
+    "us-gaap_OtherLiabilities"
+
+  ],
+    "Other Current Liabilities": [
+    "us-gaap_OtherLiabilitiesCurrent"
+  ],
+  "Other Non Current Liabilities": [
+    "us-gaap_OtherLiabilitiesNoncurrent"
+  ],
+  "Depreciation and Amortization": [
+    "us-gaap_AmortizationOfIntangibleAssets",
+    "us-gaap_Depreciation",
+    "us-gaap_DepreciationAndAmortization"
+  ]
+}
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/core.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/standardization/core.py
@@ -0,0 +1,817 @@
+"""
+Module for standardizing XBRL concepts across different company filings.
+
+This module provides functionality to map company-specific XBRL concepts
+to standardized concept names, enabling consistent presentation of financial
+statements regardless of the filing entity.
+"""
+
+import json
+import os
+from difflib import SequenceMatcher
+from enum import Enum
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+import pandas as pd
+
+
+class StandardConcept(str, Enum):
+    """
+    Standardized concept names for financial statements.
+
+    The enum value (string) is the display label used for presentation.
+    These labels should match keys in concept_mappings.json.
+    """
+    # Balance Sheet - Assets
+    CASH_AND_EQUIVALENTS = "Cash and Cash Equivalents"
+    ACCOUNTS_RECEIVABLE = "Accounts Receivable"
+    INVENTORY = "Inventory"
+    PREPAID_EXPENSES = "Prepaid Expenses"
+    TOTAL_CURRENT_ASSETS = "Total Current Assets"
+    PROPERTY_PLANT_EQUIPMENT = "Property, Plant and Equipment"
+    GOODWILL = "Goodwill"
+    INTANGIBLE_ASSETS = "Intangible Assets"
+    TOTAL_ASSETS = "Total Assets"
+
+    # Balance Sheet - Liabilities
+    ACCOUNTS_PAYABLE = "Accounts Payable"
+    ACCRUED_LIABILITIES = "Accrued Liabilities"
+    SHORT_TERM_DEBT = "Short Term Debt"
+    TOTAL_CURRENT_LIABILITIES = "Total Current Liabilities"
+    LONG_TERM_DEBT = "Long Term Debt"
+    DEFERRED_REVENUE = "Deferred Revenue"
+    TOTAL_LIABILITIES = "Total Liabilities"
+
+    # Balance Sheet - Equity
+    COMMON_STOCK = "Common Stock"
+    RETAINED_EARNINGS = "Retained Earnings"
+    TOTAL_EQUITY = "Total Stockholders' Equity"
+
+    # Income Statement - Revenue Hierarchy
+    REVENUE = "Revenue"
+    CONTRACT_REVENUE = "Contract Revenue"
+    PRODUCT_REVENUE = "Product Revenue"
+    SERVICE_REVENUE = "Service Revenue"
+    SUBSCRIPTION_REVENUE = "Subscription Revenue"
+    LEASING_REVENUE = "Leasing Revenue"
+
+    # Industry-Specific Revenue Concepts
+    AUTOMOTIVE_REVENUE = "Automotive Revenue"
+    AUTOMOTIVE_LEASING_REVENUE = "Automotive Leasing Revenue"
+    ENERGY_REVENUE = "Energy Revenue"
+    SOFTWARE_REVENUE = "Software Revenue"
+    HARDWARE_REVENUE = "Hardware Revenue"
+    PLATFORM_REVENUE = "Platform Revenue"
+
+    # Income Statement - Expenses
+    COST_OF_REVENUE = "Cost of Revenue"
+    COST_OF_GOODS_SOLD = "Cost of Goods Sold"
+    COST_OF_GOODS_AND_SERVICES_SOLD = "Cost of Goods and Services Sold"
+    COST_OF_SALES = "Cost of Sales"
+    COSTS_AND_EXPENSES = "Costs and Expenses"
+    DIRECT_OPERATING_COSTS = "Direct Operating Costs"
+    GROSS_PROFIT = "Gross Profit"
+    OPERATING_EXPENSES = "Operating Expenses"
+    RESEARCH_AND_DEVELOPMENT = "Research and Development Expense"
+
+    # Enhanced Expense Hierarchy
+    SELLING_GENERAL_ADMIN = "Selling, General and Administrative Expense"
+    SELLING_EXPENSE = "Selling Expense"
+    GENERAL_ADMIN_EXPENSE = "General and Administrative Expense"
+    MARKETING_EXPENSE = "Marketing Expense"
+    SALES_EXPENSE = "Sales Expense"
+
+    # Other Income Statement
+    OPERATING_INCOME = "Operating Income"
+    INTEREST_EXPENSE = "Interest Expense"
+    INCOME_BEFORE_TAX = "Income Before Tax"
+    INCOME_BEFORE_TAX_CONTINUING_OPS = "Income Before Tax from Continuing Operations"
+    INCOME_TAX_EXPENSE = "Income Tax Expense"
+    NET_INCOME = "Net Income"
+    NET_INCOME_CONTINUING_OPS = "Net Income from Continuing Operations"
+    NET_INCOME_NONCONTROLLING = "Net Income Attributable to Noncontrolling Interest"
+    PROFIT_OR_LOSS = "Profit or Loss"
+
+    # Cash Flow Statement
+    CASH_FROM_OPERATIONS = "Net Cash from Operating Activities"
+    CASH_FROM_INVESTING = "Net Cash from Investing Activities"
+    CASH_FROM_FINANCING = "Net Cash from Financing Activities"
+    NET_CHANGE_IN_CASH = "Net Change in Cash"
+
+    @classmethod
+    def get_from_label(cls, label: str) -> Optional['StandardConcept']:
+        """
+        Get a StandardConcept enum by its label value.
+
+        Args:
+            label: The label string to look up
+
+        Returns:
+            The corresponding StandardConcept or None if not found
+        """
+        for concept in cls:
+            if concept.value == label:
+                return concept
+        return None
+
+    @classmethod
+    def get_all_values(cls) -> Set[str]:
+        """
+        Get all label values defined in the enum.
+
+        Returns:
+            Set of all label strings
+        """
+        return {concept.value for concept in cls}
+
+
+class MappingStore:
+    """
+    Storage for mappings between company-specific concepts and standard concepts.
+
+    Attributes:
+        source (str): Path to the JSON file storing the mappings
+        mappings (Dict[str, Set[str]]): Dictionary mapping standard concepts to sets of company concepts
+        company_mappings (Dict[str, Dict]): Company-specific mappings loaded from company_mappings/
+        merged_mappings (Dict[str, List[Tuple]]): Merged mappings with priority scoring
+    """
+
+    def __init__(self, source: Optional[str] = None, validate_with_enum: bool = False, read_only: bool = False):
+        """
+        Initialize the mapping store.
+
+        Args:
+            source: Path to the JSON file storing the mappings. If None, uses default location.
+            validate_with_enum: Whether to validate JSON keys against StandardConcept enum
+            read_only: If True, never save changes back to the file (used in testing)
+        """
+        self.read_only = read_only
+
+
+        if source is None:
+            # Try a few different ways to locate the file, handling both development
+            # and installed package scenarios
+            self.source = None
+
+            # Default to a file in the same directory as this module (development mode)
+            module_dir = os.path.dirname(os.path.abspath(__file__))
+            potential_path = os.path.join(module_dir, "concept_mappings.json")
+            if os.path.exists(potential_path):
+                self.source = potential_path
+
+            # If not found, try to load from package data (installed package)
+            if self.source is None:
+                try:
+                    import importlib.resources as pkg_resources
+                    try:
+                        # For Python 3.9+
+                        with pkg_resources.files('edgar.xbrl.standardization').joinpath('concept_mappings.json').open('r') as f:
+                            # Just read the file to see if it exists, we'll load it properly later
+                            f.read(1)
+                            self.source = potential_path  # Use the same path as before
+                    except (ImportError, FileNotFoundError, AttributeError):
+                        # Fallback for older Python versions
+                        try:
+                            import pkg_resources as legacy_resources
+                            if legacy_resources.resource_exists('edgar.xbrl.standardization', 'concept_mappings.json'):
+                                self.source = potential_path  # Use the same path as before
+                        except (ImportError, FileNotFoundError):
+                            pass
+                except ImportError:
+                    pass
+
+            # If we still haven't found the file, use the default path anyway
+            # (it will fail gracefully in _load_mappings)
+            if self.source is None:
+                self.source = potential_path
+        else:
+            self.source = source
+
+        self.mappings = self._load_mappings()
+
+        # Load company-specific mappings (always enabled)
+        self.company_mappings = self._load_all_company_mappings()
+        self.merged_mappings = self._create_merged_mappings()
+        self.hierarchy_rules = self._load_hierarchy_rules()
+
+        # Validate the loaded mappings against StandardConcept enum
+        if validate_with_enum:
+            self.validate_against_enum()
+
+    def validate_against_enum(self) -> Tuple[bool, List[str]]:
+        """
+        Validate that all keys in the mappings exist in StandardConcept enum.
+
+        Returns:
+            Tuple of (is_valid, list_of_missing_keys)
+        """
+        standard_values = StandardConcept.get_all_values()
+        json_keys = set(self.mappings.keys())
+
+        # Find keys in JSON that aren't in enum
+        missing_in_enum = json_keys - standard_values
+
+        # Find enum values not in JSON (just for information)
+        missing_in_json = standard_values - json_keys
+
+        import logging
+        logger = logging.getLogger(__name__)
+
+        if missing_in_enum:
+            logger.warning("Found %d keys in concept_mappings.json that don't exist in StandardConcept enum: %s", len(missing_in_enum), sorted(missing_in_enum))
+
+        if missing_in_json:
+            logger.info("Found %d StandardConcept values without mappings in concept_mappings.json: %s", len(missing_in_json), sorted(missing_in_json))
+
+        return len(missing_in_enum) == 0, list(missing_in_enum)
+
+    def to_dataframe(self) -> pd.DataFrame:
+        """
+        Convert mappings to a pandas DataFrame for analysis and visualization.
+
+        Returns:
+            DataFrame with columns for standard_concept and company_concept
+        """
+        try:
+            import pandas as pd
+        except ImportError:
+            raise ImportError("pandas is required for to_dataframe() but is not installed") from None
+
+        rows = []
+        for standard_concept, company_concepts in self.mappings.items():
+            for company_concept in company_concepts:
+                rows.append({
+                    'standard_concept': standard_concept,
+                    'company_concept': company_concept
+                })
+
+        return pd.DataFrame(rows)
+
+
+    def _load_all_company_mappings(self) -> Dict[str, Dict]:
+        """Load all company-specific mapping files from company_mappings/ directory."""
+        mappings = {}
+        company_dir = os.path.join(os.path.dirname(self.source or __file__), "company_mappings")
+
+        if os.path.exists(company_dir):
+            for file in os.listdir(company_dir):
+                if file.endswith("_mappings.json"):
+                    entity_id = file.replace("_mappings.json", "")
+                    try:
+                        with open(os.path.join(company_dir, file), 'r') as f:
+                            company_data = json.load(f)
+                            mappings[entity_id] = company_data
+                    except (FileNotFoundError, json.JSONDecodeError) as e:
+                        import logging
+                        logger = logging.getLogger(__name__)
+                        logger.warning("Failed to load %s: %s", file, e)
+
+        return mappings
+
+    def _create_merged_mappings(self) -> Dict[str, List[Tuple[str, str, int]]]:
+        """Create merged mappings with priority scoring.
+
+        Priority levels:
+        1. Core mappings (lowest)
+        2. Company mappings (higher)
+        3. Company-specific matches (highest when company detected)
+
+        Returns:
+            Dict mapping standard concepts to list of (company_concept, source, priority) tuples
+        """
+        merged = {}
+
+        # Add core mappings (priority 1 - lowest)
+        for std_concept, company_concepts in self.mappings.items():
+            merged[std_concept] = []
+            for concept in company_concepts:
+                merged[std_concept].append((concept, "core", 1))
+
+        # Add company mappings (priority 2 - higher)
+        for entity_id, company_data in self.company_mappings.items():
+            concept_mappings = company_data.get("concept_mappings", {})
+            priority_level = 2
+
+            for std_concept, company_concepts in concept_mappings.items():
+                if std_concept not in merged:
+                    merged[std_concept] = []
+                for concept in company_concepts:
+                    merged[std_concept].append((concept, entity_id, priority_level))
+
+        return merged
+
+    def _load_hierarchy_rules(self) -> Dict[str, Dict]:
+        """Load hierarchy rules from company mappings."""
+        all_rules = {}
+
+        # Add company hierarchy rules
+        for _entity_id, company_data in self.company_mappings.items():
+            hierarchy_rules = company_data.get("hierarchy_rules", {})
+            all_rules.update(hierarchy_rules)
+
+        return all_rules
+
+    def _detect_entity_from_concept(self, concept: str) -> Optional[str]:
+        """Detect entity identifier from concept name prefix."""
+        if '_' in concept:
+            prefix = concept.split('_')[0].lower()
+            # Check if this prefix corresponds to a known company
+            if prefix in self.company_mappings:
+                return prefix
+        return None
+
+    def _load_mappings(self) -> Dict[str, Set[str]]:
+        """
+        Load mappings from the JSON file.
+
+        Returns:
+            Dictionary mapping standard concepts to sets of company concepts
+        """
+        data = None
+
+        # First try direct file access
+        try:
+            with open(self.source, 'r') as f:
+                data = json.load(f)
+        except (FileNotFoundError, IOError, PermissionError):
+            # If direct file access fails, try package resources
+            try:
+                try:
+                    # Modern importlib.resources approach (Python 3.9+)
+                    import importlib.resources as pkg_resources
+                    try:
+                        # For Python 3.9+
+                        with pkg_resources.files('edgar.xbrl.standardization').joinpath('concept_mappings.json').open('r') as f:
+                            data = json.load(f)
+                    except (ImportError, FileNotFoundError, AttributeError):
+                        # Fallback to legacy pkg_resources
+                        import pkg_resources as legacy_resources
+                        resource_string = legacy_resources.resource_string('edgar.xbrl.standardization', 'concept_mappings.json')
+                        data = json.loads(resource_string)
+                except ImportError:
+                    pass
+            except Exception:
+                # If all attempts fail, log a warning
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.warning("Could not load concept_mappings.json. Standardization will be limited.")
+
+        # If we have data, process it based on its structure
+        if data:
+            # Check if the structure is flat or nested
+            if any(isinstance(value, dict) for value in data.values()):
+                # Nested structure by statement type
+                flattened = {}
+                for _statement_type, concepts in data.items():
+                    for standard_concept, company_concepts in concepts.items():
+                        flattened[standard_concept] = set(company_concepts)
+                return flattened
+            else:
+                # Flat structure
+                return {k: set(v) for k, v in data.items()}
+
+        # If all methods fail, return empty mappings
+        # The initialize_default_mappings function will create a file if needed
+        return {}
+
+    def _save_mappings(self) -> None:
+        """Save mappings to the JSON file, unless in read_only mode."""
+        # Skip saving if in read_only mode
+        if self.read_only:
+            return
+
+        # Ensure directory exists
+        directory = os.path.dirname(self.source)
+        if directory and not os.path.exists(directory):
+            os.makedirs(directory, exist_ok=True)
+
+        # Convert sets to lists for JSON serialization
+        serializable_mappings = {k: list(v) for k, v in self.mappings.items()}
+
+        with open(self.source, 'w') as f:
+            json.dump(serializable_mappings, f, indent=2)
+
+    def add(self, company_concept: str, standard_concept: str) -> None:
+        """
+        Add a mapping from a company concept to a standard concept.
+
+        Args:
+            company_concept: The company-specific concept
+            standard_concept: The standard concept
+        """
+        if standard_concept not in self.mappings:
+            self.mappings[standard_concept] = set()
+
+        self.mappings[standard_concept].add(company_concept)
+        self._save_mappings()
+
+    def get_standard_concept(self, company_concept: str, context: Dict = None) -> Optional[str]:
+        """
+        Get the standard concept for a given company concept with priority-based resolution.
+
+        Args:
+            company_concept: The company-specific concept
+            context: Optional context information (not used in current implementation)
+
+        Returns:
+            The standard concept or None if not found
+        """
+        # Use merged mappings with priority-based resolution
+        if self.merged_mappings:
+            # Detect company from concept prefix (e.g., 'tsla:Revenue' -> 'tsla')
+            detected_entity = self._detect_entity_from_concept(company_concept)
+
+            # Search through merged mappings with priority
+            candidates = []
+
+            for std_concept, mapping_list in self.merged_mappings.items():
+                for concept, source, priority in mapping_list:
+                    if concept == company_concept:
+                        # Boost priority if it matches detected entity
+                        effective_priority = priority
+                        if detected_entity and source == detected_entity:
+                            effective_priority = 4  # Highest priority for exact company match
+
+                        candidates.append((std_concept, effective_priority, source))
+
+            # Return highest priority match
+            if candidates:
+                best_match = max(candidates, key=lambda x: x[1])
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.debug("Mapping applied: %s -> %s (source: %s, priority: %s)", company_concept, best_match[0], best_match[2], best_match[1])
+                return best_match[0]
+
+        # Fallback to core mappings
+        for standard_concept, company_concepts in self.mappings.items():
+            if company_concept in company_concepts:
+                return standard_concept
+        return None
+
+    def get_company_concepts(self, standard_concept: str) -> Set[str]:
+        """
+        Get all company concepts mapped to a standard concept.
+
+        Args:
+            standard_concept: The standard concept
+
+        Returns:
+            Set of company concepts mapped to the standard concept
+        """
+        return self.mappings.get(standard_concept, set())
+
+
+class ConceptMapper:
+    """
+    Maps company-specific concepts to standard concepts using various techniques.
+
+    Attributes:
+        mapping_store (MappingStore): Storage for concept mappings
+        pending_mappings (Dict): Low-confidence mappings pending review
+        _cache (Dict): In-memory cache of mapped concepts
+    """
+
+    def __init__(self, mapping_store: MappingStore):
+        """
+        Initialize the concept mapper.
+
+        Args:
+            mapping_store: Storage for concept mappings
+        """
+        self.mapping_store = mapping_store
+        self.pending_mappings = {}
+        # Cache for faster lookups of previously mapped concepts
+        self._cache = {}
+        # Precompute lowercased standard concept values for faster comparison
+        self._std_concept_values = [(concept, concept.value.lower()) for concept in StandardConcept]
+
+        # Statement-specific keyword sets for faster contextual matching
+        self._bs_keywords = {'assets', 'liabilities', 'equity', 'cash', 'debt', 'inventory', 'receivable', 'payable'}
+        self._is_keywords = {'revenue', 'sales', 'income', 'expense', 'profit', 'loss', 'tax', 'earnings'}
+        self._cf_keywords = {'cash', 'operating', 'investing', 'financing', 'activities'}
+
+    def map_concept(self, company_concept: str, label: str, context: Dict[str, Any]) -> Optional[str]:
+        """
+        Map a company concept to a standard concept.
+
+        Args:
+            company_concept: The company-specific concept
+            label: The label for the concept
+            context: Additional context information (statement type, calculation relationships, etc.)
+
+        Returns:
+            The standard concept or None if no mapping found
+        """
+        # Use cache for faster lookups
+        cache_key = (company_concept, context.get('statement_type', ''))
+        if cache_key in self._cache:
+            return self._cache[cache_key]
+
+        # Check if we already have a mapping in the store
+        standard_concept = self.mapping_store.get_standard_concept(company_concept)
+        if standard_concept:
+            self._cache[cache_key] = standard_concept
+            return standard_concept
+
+        # Cache negative results too to avoid repeated inference
+        self._cache[cache_key] = None
+        return None
+
+    def _infer_mapping(self, company_concept: str, label: str, context: Dict[str, Any]) -> Tuple[Optional[str], float]:
+        """
+        Infer a mapping between a company concept and a standard concept.
+
+        Args:
+            company_concept: The company-specific concept
+            label: The label for the concept
+            context: Additional context information
+
+        Returns:
+            Tuple of (standard_concept, confidence)
+        """
+        # Fast path for common patterns
+        label_lower = label.lower()
+
+        # Quick matching for common concepts without full sequence matching
+        if "total assets" in label_lower:
+            return StandardConcept.TOTAL_ASSETS.value, 0.95
+        elif "revenue" in label_lower and len(label_lower) < 30:  # Only match short labels to avoid false positives
+            return StandardConcept.REVENUE.value, 0.9
+        elif "net income" in label_lower and "parent" not in label_lower:
+            return StandardConcept.NET_INCOME.value, 0.9
+
+        # Faster direct match checking with precomputed lowercase values
+        for std_concept, std_value_lower in self._std_concept_values:
+            if std_value_lower == label_lower:
+                return std_concept.value, 1.0  # Perfect match
+
+        # Fall back to sequence matching for similarity
+        best_match = None
+        best_score = 0
+
+        # Only compute similarity if some relevant keywords are present to reduce workload
+        statement_type = context.get("statement_type", "")
+
+        # Statement type based filtering to reduce unnecessary comparisons
+        limited_concepts = []
+        if statement_type == "BalanceSheet":
+            if any(kw in label_lower for kw in self._bs_keywords):
+                # Filter to balance sheet concepts only
+                limited_concepts = [c for c, v in self._std_concept_values
+                                  if any(kw in v for kw in self._bs_keywords)]
+        elif statement_type == "IncomeStatement":
+            if any(kw in label_lower for kw in self._is_keywords):
+                # Filter to income statement concepts only
+                limited_concepts = [c for c, v in self._std_concept_values
+                                  if any(kw in v for kw in self._is_keywords)]
+        elif statement_type == "CashFlowStatement":
+            if any(kw in label_lower for kw in self._cf_keywords):
+                # Filter to cash flow concepts only
+                limited_concepts = [c for c, v in self._std_concept_values
+                                  if any(kw in v for kw in self._cf_keywords)]
+
+        # Use limited concepts if available, otherwise use all
+        concepts_to_check = limited_concepts if limited_concepts else [c for c, _ in self._std_concept_values]
+
+        # Calculate similarities for candidate concepts
+        for std_concept in concepts_to_check:
+            # Calculate similarity between labels
+            similarity = SequenceMatcher(None, label_lower, std_concept.value.lower()).ratio()
+
+            # Check if this is the best match so far
+            if similarity > best_score:
+                best_score = similarity
+                best_match = std_concept.value
+
+        # Apply specific contextual rules based on statement type
+        if statement_type == "BalanceSheet":
+            if "assets" in label_lower and "total" in label_lower:
+                if best_match == StandardConcept.TOTAL_ASSETS.value:
+                    best_score = min(1.0, best_score + 0.2)
+            elif "liabilities" in label_lower and "total" in label_lower:
+                if best_match == StandardConcept.TOTAL_LIABILITIES.value:
+                    best_score = min(1.0, best_score + 0.2)
+            elif "equity" in label_lower and ("total" in label_lower or "stockholders" in label_lower):
+                if best_match == StandardConcept.TOTAL_EQUITY.value:
+                    best_score = min(1.0, best_score + 0.2)
+
+        elif statement_type == "IncomeStatement":
+            if any(term in label_lower for term in ["revenue", "sales"]):
+                if best_match == StandardConcept.REVENUE.value:
+                    best_score = min(1.0, best_score + 0.2)
+            elif "net income" in label_lower:
+                if best_match == StandardConcept.NET_INCOME.value:
+                    best_score = min(1.0, best_score + 0.2)
+
+        # Promote to 0.5 confidence if score close enough to help match
+        # more items that are almost at threshold
+        if 0.45 <= best_score < 0.5:
+            best_score = 0.5
+
+        # If confidence is too low, return None
+        if best_score < 0.5:
+            return None, 0.0
+
+        return best_match, best_score
+
+    def learn_mappings(self, filings: List[Dict[str, Any]]) -> None:
+        """
+        Learn mappings from a list of filings.
+
+        Args:
+            filings: List of dicts with XBRL data
+        """
+        # Pre-filter to only process unmapped concepts
+        mapped_concepts = set()
+        for _std_concept, company_concepts in self.mapping_store.mappings.items():
+            mapped_concepts.update(company_concepts)
+
+        # Process only unmapped filings
+        unmapped_filings = [f for f in filings if f.get("concept") not in mapped_concepts]
+
+        # Create a batch of mappings to add
+        mappings_to_add = {}
+
+        for filing in unmapped_filings:
+            concept = filing["concept"]
+            label = filing["label"]
+            context = {
+                "statement_type": filing.get("statement_type", ""),
+                "calculation_parent": filing.get("calculation_parent", ""),
+                "position": filing.get("position", "")
+            }
+
+            # Infer mapping and confidence
+            standard_concept, confidence = self._infer_mapping(concept, label, context)
+
+            # Handle based on confidence
+            if standard_concept and confidence >= 0.9:
+                if standard_concept not in mappings_to_add:
+                    mappings_to_add[standard_concept] = set()
+                mappings_to_add[standard_concept].add(concept)
+            elif standard_concept and confidence >= 0.5:
+                if standard_concept not in self.pending_mappings:
+                    self.pending_mappings[standard_concept] = []
+                self.pending_mappings[standard_concept].append((concept, confidence, label))
+
+        # Batch add all mappings at once
+        for std_concept, concepts in mappings_to_add.items():
+            for concept in concepts:
+                self.mapping_store.add(concept, std_concept)
+                # Update cache
+                cache_key = (concept, filing.get("statement_type", ""))
+                self._cache[cache_key] = std_concept
+
+    def save_pending_mappings(self, destination: str) -> None:
+        """
+        Save pending mappings to a file.
+
+        Args:
+            destination: Path to save the pending mappings
+        """
+        # Convert to serializable format
+        serializable_mappings = {}
+        for std_concept, mappings in self.pending_mappings.items():
+            serializable_mappings[std_concept] = [
+                {"concept": c, "confidence": conf, "label": lbl} 
+                for c, conf, lbl in mappings
+            ]
+
+        with open(destination, 'w') as f:
+            json.dump(serializable_mappings, f, indent=2)
+
+
+def standardize_statement(statement_data: List[Dict[str, Any]], mapper: ConceptMapper) -> List[Dict[str, Any]]:
+    """
+    Standardize labels in a statement using the concept mapper.
+
+    Args:
+        statement_data: List of statement line items
+        mapper: ConceptMapper instance
+
+    Returns:
+        Statement data with standardized labels where possible
+    """
+    # Pre-filter to identify which items need standardization
+    # This avoids unnecessary copying and processing
+    items_to_standardize = []
+    statement_type = statement_data[0].get("statement_type", "") if statement_data else ""
+
+    # First pass - identify which items need standardization and prepare context
+    for i, item in enumerate(statement_data):
+        # Skip abstract elements and dimensions as they don't need standardization
+        if item.get("is_abstract", False) or item.get("is_dimension", False):
+            continue
+
+        concept = item.get("concept", "")
+        if not concept:
+            continue
+
+        label = item.get("label", "")
+        if not label:
+            continue
+
+        # Build minimal context once, reuse for multiple calls
+        context = {
+            "statement_type": item.get("statement_type", "") or statement_type,
+            "level": item.get("level", 0),
+            "is_total": "total" in label.lower() or item.get("is_total", False)
+        }
+
+        items_to_standardize.append((i, concept, label, context))
+
+    # If no items need standardization, return early with unchanged data
+    if not items_to_standardize:
+        return statement_data
+
+    # Second pass - create result list with standardized items
+    result = []
+
+    # Track which indices need standardization for faster lookup
+    standardize_indices = {i for i, _, _, _ in items_to_standardize}
+
+    # Process all items
+    for i, item in enumerate(statement_data):
+        if i not in standardize_indices:
+            # Items that don't need standardization are used as-is
+            result.append(item)
+            continue
+
+        # Get the prepared data for this item
+        _, concept, label, context = next((x for x in items_to_standardize if x[0] == i), (None, None, None, None))
+
+        # Try to map the concept
+        standard_label = mapper.map_concept(concept, label, context)
+
+        # If we found a mapping, create a modified copy
+        if standard_label:
+            # Create a shallow copy only when needed
+            standardized_item = item.copy()
+            standardized_item["label"] = standard_label
+            standardized_item["original_label"] = label
+            result.append(standardized_item)
+        else:
+            # No mapping found, use original item
+            result.append(item)
+
+    return result
+
+
+def create_default_mappings_file(file_path: str) -> None:
+    """
+    Create the initial concept_mappings.json file with default mappings.
+    This can be called during package installation or initialization.
+
+    Args:
+        file_path: Path where to create the file
+    """
+    # Ensure directory exists
+    directory = os.path.dirname(file_path)
+    if directory and not os.path.exists(directory):
+        os.makedirs(directory, exist_ok=True)
+
+    # The file already exists, don't overwrite it
+    if os.path.exists(file_path):
+        return
+
+    # Create a minimal set of mappings to get started
+    minimal_mappings = {
+        StandardConcept.REVENUE.value: [
+            "us-gaap_Revenue", 
+            "us-gaap_SalesRevenueNet",
+            "us-gaap_Revenues"
+        ],
+        StandardConcept.NET_INCOME.value: [
+            "us-gaap_NetIncome",
+            "us-gaap_NetIncomeLoss", 
+            "us-gaap_ProfitLoss"
+        ],
+        StandardConcept.TOTAL_ASSETS.value: [
+            "us-gaap_Assets",
+            "us-gaap_AssetsTotal"
+        ]
+    }
+
+    # Write the file
+    with open(file_path, 'w') as f:
+        json.dump(minimal_mappings, f, indent=2)
+
+# Initialize MappingStore - only loads from JSON
+def initialize_default_mappings(read_only: bool = False) -> MappingStore:
+    """
+    Initialize a MappingStore with mappings from the concept_mappings.json file.
+
+    Args:
+        read_only: If True, prevent writing changes back to the file (used in testing)
+
+    Returns:
+        MappingStore initialized with mappings from JSON file
+    """
+    store = MappingStore(read_only=read_only)
+
+    # If JSON file doesn't exist, create it with minimal default mappings
+    # Only do this in non-read_only mode to avoid test-initiated file creation
+    if not read_only and not os.path.exists(store.source):
+        create_default_mappings_file(store.source)
+
+    return store