Files
edgartools/venv/lib/python3.10/site-packages/edgar/entity/mappings_loader.py
2025-12-09 12:13:01 +01:00

138 lines
3.7 KiB
Python

"""
Loader for learned statement mappings and canonical structures.
This module handles loading and caching of learned mappings from the
structural learning process.
"""
import json
import logging
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, Optional
log = logging.getLogger(__name__)
@lru_cache(maxsize=1)
def load_learned_mappings() -> Dict[str, Dict[str, Any]]:
"""
Load learned statement mappings from package data.
Returns:
Dictionary of concept -> mapping info
"""
try:
# Get the data file path
data_dir = Path(__file__).parent / 'data'
mappings_file = data_dir / 'statement_mappings_v1.json'
if not mappings_file.exists():
log.warning("Learned mappings file not found: %s", mappings_file)
return {}
with open(mappings_file, 'r') as f:
data = json.load(f)
mappings = data.get('mappings', {})
metadata = data.get('metadata', {})
log.info("Loaded %d learned concept mappings (version: %s)", len(mappings), metadata.get('version', 'unknown'))
return mappings
except Exception as e:
log.error("Error loading learned mappings: %s", e)
return {}
@lru_cache(maxsize=1)
def load_canonical_structures() -> Dict[str, Any]:
"""
Load canonical statement structures.
Returns:
Dictionary of statement_type -> canonical structure
"""
try:
data_dir = Path(__file__).parent / 'data'
structures_file = data_dir / 'learned_mappings.json'
if not structures_file.exists():
log.warning("Canonical structures file not found: %s", structures_file)
return {}
with open(structures_file, 'r') as f:
structures = json.load(f)
log.info("Loaded canonical structures for %d statement types", len(structures))
return structures
except Exception as e:
log.error("Error loading canonical structures: %s", e)
return {}
@lru_cache(maxsize=1)
def load_virtual_trees() -> Dict[str, Any]:
"""
Load virtual presentation trees.
Returns:
Dictionary of statement_type -> virtual tree
"""
try:
data_dir = Path(__file__).parent / 'data'
trees_file = data_dir / 'virtual_trees.json'
if not trees_file.exists():
log.warning("Virtual trees file not found: %s", trees_file)
return {}
with open(trees_file, 'r') as f:
trees = json.load(f)
log.info("Loaded virtual trees for %d statement types", len(trees))
return trees
except Exception as e:
log.error("Error loading virtual trees: %s", e)
return {}
def get_concept_mapping(concept: str) -> Optional[Dict[str, Any]]:
"""
Get mapping information for a specific concept.
Args:
concept: Concept name (without namespace)
Returns:
Mapping info dict or None if not found
"""
mappings = load_learned_mappings()
return mappings.get(concept)
def get_statement_concepts(statement_type: str,
min_confidence: float = 0.5) -> Dict[str, Dict[str, Any]]:
"""
Get all concepts for a specific statement type.
Args:
statement_type: Type of statement (BalanceSheet, IncomeStatement, etc.)
min_confidence: Minimum confidence threshold
Returns:
Dictionary of concept -> mapping info
"""
mappings = load_learned_mappings()
result = {}
for concept, info in mappings.items():
if (info.get('statement_type') == statement_type and
info.get('confidence', 0) >= min_confidence):
result[concept] = info
return result