Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/analyze_period_durations.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/analyze_period_durations.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/check_period_ends.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/check_period_ends.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/check_renderer_usage.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/check_renderer_usage.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_apple_periods.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_apple_periods.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_apple_periods2.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_apple_periods2.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_dedup_issue.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_dedup_issue.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_duration_fix.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_duration_fix.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_final_check.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_final_check.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_fix_test.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_fix_test.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_fix_test2.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_fix_test2.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_msft_table.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_msft_table.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_rich_rendering.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_rich_rendering.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_statement_building.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_statement_building.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_table_structure.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_table_structure.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_table_structure_parsing.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/debug_table_structure_parsing.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_edge_cases_detailed.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_edge_cases_detailed.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_final_fix.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_final_fix.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_improved_header_detection.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_improved_header_detection.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_improved_renderer.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_improved_renderer.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_solution_edge_cases.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_solution_edge_cases.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_specific_header_detection.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/test_specific_header_detection.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/verify_fiscal_year_pattern.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/pycache/verify_fiscal_year_pattern.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/analyze_period_durations.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/analyze_period_durations.py
@@ -0,0 +1,45 @@
+from edgar import Company
+from collections import defaultdict
+
+# Get Apple facts
+aapl = Company("AAPL")
+facts = aapl.facts
+raw_facts = facts._facts
+
+print("Analyzing period durations for FY facts:\n")
+
+# Group facts by (fiscal_year, fiscal_period, period_end)
+fact_groups = defaultdict(list)
+for fact in raw_facts:
+    if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+        if fact.fiscal_year and fact.fiscal_year >= 2019 and fact.fiscal_year <= 2021:
+            if 'RevenueFromContract' in str(fact.concept) and 'Liability' not in str(fact.concept):
+                key = (fact.fiscal_year, fact.fiscal_period, fact.period_end)
+                fact_groups[key].append(fact)
+
+# Analyze each group
+for key in sorted(fact_groups.keys()):
+    year, period, end_date = key
+    facts_in_group = fact_groups[key]
+    
+    if len(facts_in_group) > 1:
+        print(f"\nFY {year} ending {end_date}: {len(facts_in_group)} facts")
+        for fact in facts_in_group:
+            duration = None
+            if fact.period_start and fact.period_end:
+                duration = (fact.period_end - fact.period_start).days
+            
+            period_type = "Annual" if duration and duration > 300 else "Quarterly" if duration else "Unknown"
+            print(f"  ${fact.value:,.0f} - Duration: {duration} days ({period_type})")
+            print(f"    Period: {fact.period_start} to {fact.period_end}")
+            print(f"    Filed: {fact.filing_date}")
+            if hasattr(fact, 'form'):
+                print(f"    Form: {fact.form}")
+            if hasattr(fact, 'accession'):
+                print(f"    Accession: {fact.accession}")
+
+print("\n\nSummary:")
+print("The issue: Both annual and quarterly revenue are marked as 'FY'")
+print("Solution: Use period duration to distinguish:")
+print("  - Annual: period_start to period_end > 300 days")
+print("  - Quarterly: period_start to period_end < 100 days")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/check_period_ends.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/check_period_ends.py
@@ -0,0 +1,57 @@
+from edgar import Company
+from collections import defaultdict
+
+# Get Apple facts
+aapl = Company("AAPL")
+facts = aapl.facts
+raw_facts = facts._facts
+
+# Check all FY income statement facts for 2019-2024
+print("Checking FY facts and their period_end dates:\n")
+print("fiscal_year | fiscal_period | period_end | period_end.year | Match?")
+print("-" * 70)
+
+fy_facts = defaultdict(list)
+for fact in raw_facts:
+    if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+        if fact.fiscal_year and fact.fiscal_year >= 2019:
+            fy_facts[fact.fiscal_year].append(fact)
+
+# Show all FY entries grouped by fiscal_year
+for year in sorted(fy_facts.keys(), reverse=True):
+    facts_for_year = fy_facts[year]
+    # Get unique period_end dates for this fiscal year
+    unique_ends = set()
+    for fact in facts_for_year:
+        if fact.period_end:
+            unique_ends.add(fact.period_end)
+    
+    print(f"\nFY {year} has {len(unique_ends)} unique period_end dates:")
+    for end_date in sorted(unique_ends):
+        if end_date:
+            match = "✓" if end_date.year == year else "✗"
+            print(f"  {year:4d} | FY | {end_date} | {end_date.year} | {match}")
+
+# Now check if we have the correct matches
+print("\n\nChecking if we have correct year matches:")
+correct_matches = defaultdict(set)
+for fact in raw_facts:
+    if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+        if fact.period_end and fact.fiscal_year:
+            if fact.period_end.year == fact.fiscal_year:
+                correct_matches[fact.fiscal_year].add(fact.period_end)
+
+print("\nFiscal years with matching period_end.year:")
+for year in sorted(correct_matches.keys(), reverse=True)[:6]:
+    for end_date in correct_matches[year]:
+        print(f"  FY {year} -> {end_date} ✓")
+
+# Check revenue values for correct matches
+print("\n\nRevenue values for CORRECT year matches:")
+for fact in raw_facts:
+    if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+        if fact.period_end and fact.fiscal_year:
+            if fact.period_end.year == fact.fiscal_year:
+                if 'RevenueFromContract' in str(fact.concept) and 'Liability' not in str(fact.concept):
+                    if fact.fiscal_year >= 2019 and fact.fiscal_year <= 2024:
+                        print(f"  FY {fact.fiscal_year} (ends {fact.period_end}): ${fact.value:,.0f}")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/check_renderer_usage.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/check_renderer_usage.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""
+Check which renderer is actually being used in the MSFT table.
+"""
+
+import sys
+sys.path.insert(0, '/Users/dwight/PycharmProjects/edgartools')
+
+from edgar.documents.parser import HTMLParser
+from edgar.documents.config import ParserConfig
+from edgar.documents.table_nodes import TableNode
+
+def check_renderer_usage():
+    print("🔍 CHECKING WHICH RENDERER IS ACTUALLY BEING USED")
+    print("=" * 60)
+    
+    try:
+        # Parse with default config
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        # Check what the default config actually has
+        config = ParserConfig()
+        print(f"Default ParserConfig.fast_table_rendering: {config.fast_table_rendering}")
+        
+        parser = HTMLParser(config)
+        document = parser.parse(html_content)
+        
+        # Find target table
+        target_table = None
+        def find_target(node):
+            nonlocal target_table
+            if isinstance(node, TableNode):
+                try:
+                    if "Weighted average outstanding shares" in node.text():
+                        target_table = node
+                        return
+                except:
+                    pass
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    find_target(child)
+        
+        find_target(document.root)
+        
+        if not target_table:
+            print("❌ Target table not found")
+            return
+        
+        print(f"✅ Found target table")
+        print(f"Table has _config: {'✅' if hasattr(target_table, '_config') else '❌'}")
+        
+        if hasattr(target_table, '_config'):
+            print(f"Table config fast_table_rendering: {target_table._config.fast_table_rendering}")
+        
+        # Test the decision logic in TableNode.text()
+        print(f"\n🔍 TRACING TableNode.text() DECISION LOGIC:")
+        
+        # Check if cache exists
+        has_cache = hasattr(target_table, '_text_cache') and target_table._text_cache is not None
+        print(f"Has cached text: {has_cache}")
+        
+        if has_cache:
+            print(f"❗ Using cached result - clearing cache to test renderer...")
+            target_table._text_cache = None
+        
+        # Check the config decision
+        config_obj = getattr(target_table, '_config', None)
+        should_use_fast = config_obj and getattr(config_obj, 'fast_table_rendering', False)
+        print(f"Config object exists: {'✅' if config_obj else '❌'}")
+        print(f"Should use fast rendering: {'✅' if should_use_fast else '❌'}")
+        
+        # Test both renderers directly
+        print(f"\n🧪 TESTING BOTH RENDERERS DIRECTLY:")
+        
+        # Test Rich renderer
+        try:
+            print("Rich renderer test:")
+            rich_table = target_table.render(width=195)
+            from edgar.richtools import rich_to_text
+            rich_text = rich_to_text(rich_table)
+            rich_has_pipes = '|' in rich_text
+            print(f"  Rich output has pipes: {'✅' if rich_has_pipes else '❌'}")
+            print(f"  Rich output length: {len(rich_text)} chars")
+            print(f"  Rich preview: {rich_text[:80]}...")
+        except Exception as e:
+            print(f"  Rich renderer error: {e}")
+        
+        # Test Fast renderer
+        try:
+            print("Fast renderer test:")
+            fast_text = target_table._fast_text_rendering()
+            fast_has_pipes = '|' in fast_text
+            print(f"  Fast output has pipes: {'✅' if fast_has_pipes else '❌'}")
+            print(f"  Fast output length: {len(fast_text)} chars")
+            print(f"  Fast preview: {fast_text[:80]}...")
+        except Exception as e:
+            print(f"  Fast renderer error: {e}")
+        
+        # Test current text() method
+        print("Current text() method:")
+        current_text = target_table.text()
+        current_has_pipes = '|' in current_text
+        print(f"  Current output has pipes: {'✅' if current_has_pipes else '❌'}")
+        print(f"  Current output length: {len(current_text)} chars")
+        print(f"  Current preview: {current_text[:80]}...")
+        
+        # Determine which renderer is actually being used
+        if current_has_pipes and len(current_text) < 2000:
+            print(f"\n🎯 CONCLUSION: Currently using FAST RENDERER ✅")
+        elif not current_has_pipes and len(current_text) > 1500:
+            print(f"\n🎯 CONCLUSION: Currently using RICH RENDERER ❌")
+        else:
+            print(f"\n🤔 CONCLUSION: Unclear which renderer is being used")
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+
+def test_explicit_configurations():
+    """Test with explicit fast and rich configurations."""
+    print(f"\n🧪 TESTING EXPLICIT CONFIGURATIONS")
+    print("=" * 60)
+    
+    configs = [
+        ("Explicit Fast", ParserConfig(fast_table_rendering=True)),
+        ("Explicit Rich", ParserConfig(fast_table_rendering=False)),
+    ]
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        for config_name, config in configs:
+            print(f"\n🔧 {config_name} (fast_table_rendering={config.fast_table_rendering}):")
+            
+            parser = HTMLParser(config)
+            document = parser.parse(html_content)
+            
+            # Find table
+            target_table = None
+            def find_target(node):
+                nonlocal target_table
+                if isinstance(node, TableNode):
+                    try:
+                        if "Weighted average outstanding shares" in node.text():
+                            target_table = node
+                            return
+                    except:
+                        pass
+                if hasattr(node, 'children'):
+                    for child in node.children:
+                        find_target(child)
+            
+            find_target(document.root)
+            
+            if target_table:
+                table_text = target_table.text()
+                has_pipes = '|' in table_text
+                print(f"  Output has pipes: {'✅' if has_pipes else '❌'}")
+                print(f"  Output length: {len(table_text)} chars")
+                print(f"  Preview: {table_text[:60]}...")
+            else:
+                print(f"  ❌ Table not found")
+    
+    except Exception as e:
+        print(f"❌ Error: {e}")
+
+if __name__ == "__main__":
+    check_renderer_usage()
+    test_explicit_configurations()
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_apple_periods.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_apple_periods.py
@@ -0,0 +1,46 @@
+from edgar import Company
+from collections import defaultdict
+import json
+
+# Get Apple facts
+aapl = Company("AAPL")
+facts = aapl.facts
+
+# Get raw facts data - access internal facts list
+raw_facts = facts._facts  # Access internal facts list
+
+# Look for Revenue facts in 2020 and 2019
+revenue_facts = []
+for fact in raw_facts:
+    if fact.concept and 'Revenue' in fact.concept:
+        if fact.fiscal_year in [2019, 2020]:
+            revenue_facts.append({
+                'concept': fact.concept,
+                'value': fact.value,
+                'fy': fact.fiscal_year,
+                'fp': fact.fiscal_period,
+                'period_end': str(fact.period_end) if fact.period_end else None,
+                'period_duration': getattr(fact, 'period_duration', None),
+                'statement': fact.statement_type,
+                'filing_date': str(fact.filing_date) if fact.filing_date else None
+            })
+
+print("Revenue facts for 2019-2020:")
+print(json.dumps(revenue_facts, indent=2, default=str))
+
+# Group by fiscal year and period
+by_year_period = defaultdict(list)
+for fact in revenue_facts:
+    key = f"{fact['fy']}-{fact['fp']}"
+    by_year_period[key].append(fact)
+    
+print("\n\nGrouped by fiscal year and period:")
+for key in sorted(by_year_period.keys()):
+    print(f"\n{key}:")
+    for fact in by_year_period[key]:
+        print(f"  {fact['concept']}: ${fact['value']:,} (duration: {fact['period_duration']} days)")
+        
+# Now check what the income statement method returns
+print("\n\nIncome statement for 2019-2020 (annual=True):")
+income = facts.income_statement(annual=True, periods=6)
+print(income)
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_apple_periods2.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_apple_periods2.py
@@ -0,0 +1,89 @@
+from edgar import Company
+from collections import defaultdict
+
+# Get Apple facts
+aapl = Company("AAPL")
+facts = aapl.facts
+
+# Get raw facts data - access internal facts list
+raw_facts = facts._facts  # Access internal facts list
+
+# Look for all facts in Income Statement for 2019-2020
+income_facts = defaultdict(lambda: defaultdict(list))
+for fact in raw_facts:
+    if fact.statement_type == 'IncomeStatement':
+        if fact.fiscal_year in [2019, 2020]:
+            key = f"{fact.fiscal_year}-{fact.fiscal_period}"
+            income_facts[fact.concept][key].append({
+                'value': fact.value,
+                'period_end': fact.period_end,
+                'filing_date': fact.filing_date
+            })
+
+# Find Revenue/Revenues concept
+revenue_concepts = []
+for concept in income_facts.keys():
+    if 'Revenue' in concept and 'Contract' not in concept:
+        revenue_concepts.append(concept)
+        
+print("Revenue concepts found:", revenue_concepts)
+print("\nRevenue values by year-period:")
+
+for concept in revenue_concepts:
+    print(f"\n{concept}:")
+    for period in sorted(income_facts[concept].keys()):
+        facts_list = income_facts[concept][period]
+        for f in facts_list:
+            print(f"  {period}: ${f['value']:,}")
+            
+# Check what periods are actually marked as FY
+print("\n\nAll FY periods in Income Statement:")
+fy_periods = set()
+for fact in raw_facts:
+    if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+        fy_periods.add((fact.fiscal_year, fact.fiscal_period, fact.period_end))
+        
+for year, period, end_date in sorted(fy_periods):
+    print(f"  {year} {period} (ends {end_date})")
+    
+# Now check what exact facts are selected for 2019 and 2020
+print("\n\nChecking what's selected for income statement:")
+from edgar.entity.enhanced_statement import EnhancedStatementBuilder
+
+builder = EnhancedStatementBuilder()
+stmt_facts = [f for f in raw_facts if f.statement_type == 'IncomeStatement']
+
+# Build period info like the builder does
+period_info = {}
+period_facts_map = defaultdict(list)
+
+for fact in stmt_facts:
+    period_key = (fact.fiscal_year, fact.fiscal_period)
+    period_label = f"{fact.fiscal_period} {fact.fiscal_year}"
+    
+    period_facts_map[period_label].append(fact)
+    
+    if period_key not in period_info:
+        period_info[period_key] = {
+            'label': period_label,
+            'end_date': fact.period_end,
+            'is_annual': fact.fiscal_period == 'FY',
+            'filing_date': fact.filing_date,
+            'fiscal_year': fact.fiscal_year,
+            'fiscal_period': fact.fiscal_period
+        }
+
+# Get annual periods
+annual_periods = [(pk, info) for pk, info in period_info.items() if info['is_annual']]
+annual_periods.sort(key=lambda x: x[0][0] if x[0][0] else 0, reverse=True)
+
+print("\nAnnual periods found (sorted newest first):")
+for (year, period), info in annual_periods[:10]:
+    print(f"  {info['label']} - ends {info['end_date']}")
+    
+# Check if there are any revenue facts for FY 2019 and FY 2020
+print("\n\nRevenue facts for FY periods:")
+for fact in raw_facts:
+    if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+        if fact.fiscal_year in [2019, 2020] and 'Revenue' in str(fact.concept):
+            print(f"  {fact.fiscal_year} {fact.fiscal_period}: {fact.concept} = ${fact.value:,}")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_dedup_issue.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_dedup_issue.py
@@ -0,0 +1,37 @@
+from edgar import Company
+from collections import defaultdict
+
+# Get Apple facts
+aapl = Company("AAPL")
+facts = aapl.facts
+raw_facts = facts._facts
+
+# Check how period_info is built
+stmt_facts = [f for f in raw_facts if f.statement_type == 'IncomeStatement']
+
+# Track all unique combinations
+all_combos = set()
+period_end_by_key = defaultdict(set)
+
+for fact in stmt_facts:
+    if fact.fiscal_period == 'FY' and fact.fiscal_year and fact.fiscal_year >= 2019:
+        period_key = (fact.fiscal_year, fact.fiscal_period)
+        all_combos.add((fact.fiscal_year, fact.fiscal_period, fact.period_end))
+        period_end_by_key[period_key].add(fact.period_end)
+
+print("Period keys and their different period_end dates:")
+for key in sorted(period_end_by_key.keys(), reverse=True):
+    year, period = key
+    if year >= 2019 and year <= 2024:
+        ends = period_end_by_key[key]
+        print(f"\n({year}, '{period}'): {len(ends)} different period_ends")
+        for end in sorted(ends):
+            match = "✓" if end and end.year == year else "✗"
+            print(f"    {end} {match}")
+
+# The problem: period_info dict only keeps ONE per key
+print("\n\nProblem: The current code builds period_info as a dict,")
+print("so it only keeps ONE fact per (fiscal_year, fiscal_period) key!")
+print("We lose all the other period_end variations when we do:")
+print("  if period_key not in period_info:")
+print("    period_info[period_key] = {...}  # Only first one is kept!")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_duration_fix.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_duration_fix.py
@@ -0,0 +1,83 @@
+from edgar import Company
+from edgar.entity.enhanced_statement import EnhancedStatementBuilder
+from collections import defaultdict
+
+# Get Apple facts
+aapl = Company("AAPL")
+facts = aapl.facts
+raw_facts = facts._facts
+
+# Build statement manually to debug
+builder = EnhancedStatementBuilder()
+stmt_facts = [f for f in raw_facts if f.statement_type == 'IncomeStatement']
+
+# Build period info with new key structure
+period_info = {}
+period_facts = defaultdict(list)
+
+for fact in stmt_facts:
+    period_key = (fact.fiscal_year, fact.fiscal_period, fact.period_end)
+    
+    if period_key not in period_info:
+        period_info[period_key] = {
+            'label': f"{fact.fiscal_period} {fact.fiscal_year}",
+            'end_date': fact.period_end,
+            'is_annual': fact.fiscal_period == 'FY',
+            'filing_date': fact.filing_date,
+            'fiscal_year': fact.fiscal_year,
+            'fiscal_period': fact.fiscal_period
+        }
+    
+    period_facts[period_key].append(fact)
+
+# Apply the annual filtering logic
+period_list = [(pk, info) for pk, info in period_info.items()]
+
+true_annual_periods = []
+for pk, info in period_list:
+    if not info['is_annual']:
+        continue
+    
+    fiscal_year = pk[0]
+    period_end_date = pk[2]
+    
+    # Check if fiscal_year matches period_end.year
+    if not (period_end_date and period_end_date.year == fiscal_year):
+        continue
+    
+    # Check duration
+    period_fact_list = period_facts.get(pk, [])
+    if period_fact_list:
+        sample_fact = period_fact_list[0]
+        if sample_fact.period_start and sample_fact.period_end:
+            duration = (sample_fact.period_end - sample_fact.period_start).days
+            if duration > 300:
+                true_annual_periods.append((pk, info))
+                # Find revenue for this period
+                for fact in period_fact_list:
+                    if 'RevenueFromContract' in str(fact.concept) and 'Liability' not in str(fact.concept):
+                        print(f"Selected: FY {fiscal_year} ends {period_end_date}: ${fact.value:,.0f} (duration: {duration} days)")
+                        break
+
+print(f"\nTotal true annual periods found: {len(true_annual_periods)}")
+
+# Check what's in the final selection
+annual_by_year = {}
+for pk, info in true_annual_periods:
+    fiscal_year = pk[0]
+    period_end_date = pk[2]
+    if fiscal_year not in annual_by_year or period_end_date > annual_by_year[fiscal_year][0][2]:
+        annual_by_year[fiscal_year] = (pk, info)
+
+sorted_periods = sorted(annual_by_year.items(), key=lambda x: x[0], reverse=True)
+selected = [period_info for year, period_info in sorted_periods[:6]]
+
+print(f"\nFinal selected periods:")
+for (year, period, end), info in selected:
+    print(f"  FY {year} ends {end}")
+    # Find revenue for this period
+    for fact in period_facts[(year, period, end)]:
+        if 'RevenueFromContract' in str(fact.concept) and 'Liability' not in str(fact.concept):
+            duration = (fact.period_end - fact.period_start).days if fact.period_start else None
+            print(f"    Revenue: ${fact.value:,.0f} (duration: {duration} days)")
+            break
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_final_check.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_final_check.py
@@ -0,0 +1,33 @@
+from edgar import Company
+
+# Get Apple facts and display income statement
+aapl = Company("AAPL")
+facts = aapl.facts
+
+print("Testing with annual=True, periods=6:")
+income = facts.income_statement(annual=True, periods=6)
+
+# Get the internal data
+items = income.items
+
+# Find the Total Revenue item
+for item in items:
+    if "Revenue" in item.label and "Total" in item.label:
+        print(f"\n{item.label}:")
+        print(f"  Values: {item.values}")
+        print(f"  Periods: {income.periods}")
+        
+        # Show what values we have
+        for i, (period, value) in enumerate(zip(income.periods, item.values)):
+            if value:
+                print(f"    {period}: {value}")
+
+# Let's also check what raw facts we have
+print("\n\nChecking raw facts for FY 2019 and FY 2020:")
+raw_facts = facts._facts
+for fact in raw_facts:
+    if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+        if fact.fiscal_year in [2019, 2020]:
+            if 'RevenueFromContract' in str(fact.concept) and 'Liability' not in str(fact.concept):
+                match = "✓" if fact.period_end and fact.period_end.year == fact.fiscal_year else "✗"
+                print(f"  FY {fact.fiscal_year} ends {fact.period_end}: ${fact.value:,.0f} {match}")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_fix_test.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_fix_test.py
@@ -0,0 +1,71 @@
+from edgar import Company
+from edgar.entity.enhanced_statement import EnhancedStatementBuilder
+
+# Get Apple facts
+aapl = Company("AAPL")
+facts = aapl.facts
+raw_facts = facts._facts
+
+# Build statement manually to debug
+builder = EnhancedStatementBuilder()
+stmt_facts = [f for f in raw_facts if f.statement_type == 'IncomeStatement']
+
+# Build period info
+from collections import defaultdict
+period_info = {}
+period_facts_map = defaultdict(list)
+
+for fact in stmt_facts:
+    period_key = (fact.fiscal_year, fact.fiscal_period)
+    period_label = f"{fact.fiscal_period} {fact.fiscal_year}"
+    
+    period_facts_map[period_label].append(fact)
+    
+    if period_key not in period_info:
+        period_info[period_key] = {
+            'label': period_label,
+            'end_date': fact.period_end,
+            'is_annual': fact.fiscal_period == 'FY',
+            'filing_date': fact.filing_date,
+            'fiscal_year': fact.fiscal_year,
+            'fiscal_period': fact.fiscal_period
+        }
+
+# Create list of periods
+period_list = [(pk, info) for pk, info in period_info.items()]
+
+# Filter for annual
+annual_periods = [(pk, info) for pk, info in period_list if info['is_annual']]
+print(f"Total annual periods before sort: {len(annual_periods)}")
+
+# Sort by end_date
+annual_periods.sort(key=lambda x: x[1]['end_date'], reverse=True)
+
+print("\nFirst 10 annual periods after sorting by end_date:")
+for i, ((year, period), info) in enumerate(annual_periods[:10]):
+    print(f"  {i}: FY {year} - ends {info['end_date']}")
+
+# Deduplicate by fiscal year
+seen_years = set()
+unique_annual_periods = []
+for pk, info in annual_periods:
+    fiscal_year = pk[0]
+    if fiscal_year not in seen_years:
+        seen_years.add(fiscal_year)
+        unique_annual_periods.append((pk, info))
+        print(f"  Keeping: FY {fiscal_year} ending {info['end_date']}")
+
+print(f"\nUnique annual periods: {len(unique_annual_periods)}")
+print("\nFirst 6 unique periods:")
+for (year, period), info in unique_annual_periods[:6]:
+    print(f"  FY {year} - ends {info['end_date']}")
+
+# Check what revenue value we have for those periods
+print("\nRevenue values for selected periods:")
+for (year, fp), info in unique_annual_periods[:6]:
+    period_label = info['label']
+    # Find revenue fact for this period
+    for fact in period_facts_map[period_label]:
+        if 'RevenueFromContract' in str(fact.concept) and 'Liability' not in str(fact.concept):
+            print(f"  {period_label}: {fact.concept} = ${fact.value:,}")
+            break
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_fix_test2.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_fix_test2.py
@@ -0,0 +1,71 @@
+from edgar import Company
+from edgar.entity.enhanced_statement import EnhancedStatementBuilder
+from collections import defaultdict
+
+# Get Apple facts
+aapl = Company("AAPL")
+facts = aapl.facts
+raw_facts = facts._facts
+
+# Build statement manually to debug
+builder = EnhancedStatementBuilder()
+stmt_facts = [f for f in raw_facts if f.statement_type == 'IncomeStatement']
+
+# Build period info
+period_info = {}
+period_facts_map = defaultdict(list)
+
+for fact in stmt_facts:
+    period_key = (fact.fiscal_year, fact.fiscal_period)
+    period_label = f"{fact.fiscal_period} {fact.fiscal_year}"
+    
+    period_facts_map[period_label].append(fact)
+    
+    if period_key not in period_info:
+        period_info[period_key] = {
+            'label': period_label,
+            'end_date': fact.period_end,
+            'is_annual': fact.fiscal_period == 'FY',
+            'filing_date': fact.filing_date,
+            'fiscal_year': fact.fiscal_year,
+            'fiscal_period': fact.fiscal_period
+        }
+
+# Apply the fix logic
+period_list = [(pk, info) for pk, info in period_info.items()]
+annual_periods = [(pk, info) for pk, info in period_list if info['is_annual']]
+
+print(f"Total annual periods: {len(annual_periods)}")
+
+# Apply the matching logic
+correct_annual_periods = {}
+for pk, info in annual_periods:
+    fiscal_year = pk[0]
+    if info['end_date'] and info['end_date'].year == fiscal_year:
+        if fiscal_year not in correct_annual_periods or \
+           info['end_date'] > correct_annual_periods[fiscal_year][1]['end_date']:
+            correct_annual_periods[fiscal_year] = (pk, info)
+            print(f"  Selected FY {fiscal_year}: ends {info['end_date']}")
+
+print(f"\nCorrect annual periods found: {len(correct_annual_periods)}")
+
+# Sort and select
+sorted_periods = sorted(correct_annual_periods.items(), key=lambda x: x[0], reverse=True)
+selected_period_info = [period_info for year, period_info in sorted_periods[:6]]
+
+print(f"\nSelected {len(selected_period_info)} periods:")
+for (year, period), info in selected_period_info:
+    print(f"  {info['label']}")
+    
+# Check what revenue facts we have for these periods
+print("\nRevenue facts for selected periods:")
+for (year, fp), info in selected_period_info:
+    period_label = info['label']
+    revenue_found = False
+    for fact in period_facts_map[period_label]:
+        if 'RevenueFromContract' in str(fact.concept) and 'Liability' not in str(fact.concept):
+            print(f"  {period_label}: ${fact.value:,.0f}")
+            revenue_found = True
+            break
+    if not revenue_found:
+        print(f"  {period_label}: No revenue found")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_msft_table.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_msft_table.py
@@ -0,0 +1,262 @@
+#!/usr/bin/env python3
+"""
+Debug script to investigate table parsing/rendering issues in MSFT 10-K.
+Focus on the "Weighted average outstanding shares of common stock (B)" table.
+"""
+
+import sys
+sys.path.insert(0, '/Users/dwight/PycharmProjects/edgartools')
+
+from edgar.documents.parser import HTMLParser
+from edgar.documents.config import ParserConfig
+from edgar.documents.table_nodes import TableNode
+from bs4 import BeautifulSoup
+
+def find_table_in_html():
+    """Find and examine the table HTML structure around the target text."""
+    print("🔍 EXAMINING TABLE HTML STRUCTURE")
+    print("=" * 50)
+    
+    try:
+        # Read the MSFT file
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        print(f"File size: {len(html_content)} characters")
+        
+        # Find the table containing our target text
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        # Search for the specific text
+        target_elements = soup.find_all(text=lambda text: text and "Weighted average outstanding shares of common stock" in text)
+        
+        print(f"\nFound {len(target_elements)} elements with target text")
+        
+        for i, element in enumerate(target_elements):
+            print(f"\n📍 Element {i+1}:")
+            print(f"  Text: {element.strip()[:80]}...")
+            
+            # Find the containing table
+            parent = element.parent
+            while parent and parent.name != 'table':
+                parent = parent.parent
+                
+            if parent and parent.name == 'table':
+                print(f"  Found containing table!")
+                
+                # Analyze the table structure
+                rows = parent.find_all('tr')
+                print(f"  Table has {len(rows)} rows")
+                
+                # Look at first few rows
+                for j, row in enumerate(rows[:5]):
+                    cells = row.find_all(['td', 'th'])
+                    print(f"    Row {j+1}: {len(cells)} cells")
+                    for k, cell in enumerate(cells[:3]):  # First 3 cells
+                        cell_text = cell.get_text().strip()[:30].replace('\n', ' ')
+                        print(f"      Cell {k+1}: '{cell_text}...'")
+                
+                return parent
+            else:
+                print(f"  No containing table found")
+        
+        return None
+        
+    except Exception as e:
+        print(f"❌ Error examining HTML: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+def test_parser_on_msft():
+    """Test the document parser on the MSFT file."""
+    print("\n🚀 TESTING DOCUMENT PARSER")
+    print("=" * 50)
+    
+    try:
+        # Read the MSFT file
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        # Parse with different configurations
+        configs_to_test = [
+            ("Default", ParserConfig()),
+            ("Performance", ParserConfig.for_performance()),
+            ("Accuracy", ParserConfig.for_accuracy()),
+        ]
+        
+        for config_name, config in configs_to_test:
+            print(f"\n🧪 Testing with {config_name} config...")
+            
+            parser = HTMLParser(config)
+            document = parser.parse(html_content)
+            
+            print(f"  Document parsed successfully")
+            print(f"  Root children: {len(document.root.children)}")
+            
+            # Find tables with our target text
+            matching_tables = []
+            
+            def find_target_tables(node):
+                if isinstance(node, TableNode):
+                    table_text = node.text()
+                    if "Weighted average outstanding shares of common stock" in table_text:
+                        matching_tables.append(node)
+                for child in node.children:
+                    find_target_tables(child)
+            
+            find_target_tables(document.root)
+            
+            print(f"  Found {len(matching_tables)} table(s) with target text")
+            
+            for i, table in enumerate(matching_tables):
+                print(f"\n  📋 Table {i+1}:")
+                print(f"    Headers: {len(table.headers)} row(s)")
+                print(f"    Data rows: {len(table.rows)}")
+                print(f"    Table type: {table.table_type}")
+                
+                # Show table structure
+                if table.headers:
+                    print(f"    Header structure:")
+                    for j, header_row in enumerate(table.headers):
+                        print(f"      Row {j+1}: {len(header_row)} cells")
+                        for k, cell in enumerate(header_row[:3]):
+                            cell_text = cell.text().strip()[:20].replace('\n', ' ')
+                            print(f"        Cell {k+1}: '{cell_text}...'")
+                
+                print(f"    First few data rows:")
+                for j, row in enumerate(table.rows[:3]):
+                    print(f"      Row {j+1}: {len(row.cells)} cells")
+                    for k, cell in enumerate(row.cells[:3]):
+                        cell_text = cell.text().strip()[:20].replace('\n', ' ')
+                        print(f"        Cell {k+1}: '{cell_text}...'")
+                
+                # Get the text output
+                table_text = table.text()
+                print(f"\n    Text output ({len(table_text)} chars):")
+                print("    " + "-" * 40)
+                
+                # Show first few lines
+                lines = table_text.split('\n')
+                for line_num, line in enumerate(lines[:10]):
+                    print(f"    {line_num+1:2d}: {line}")
+                
+                if len(lines) > 10:
+                    print(f"    ... ({len(lines)-10} more lines)")
+                    
+                print("    " + "-" * 40)
+                
+                # Check for issues
+                issues = []
+                if len(table_text.strip()) == 0:
+                    issues.append("Empty text output")
+                if "Weighted average outstanding shares" not in table_text:
+                    issues.append("Missing target text in output")
+                if table_text.count('|') < 5:  # Should have multiple columns
+                    issues.append("Possibly missing column separators")
+                if len(lines) < 3:
+                    issues.append("Very few output lines")
+                
+                if issues:
+                    print(f"    ⚠️  Issues detected: {', '.join(issues)}")
+                    return table  # Return problematic table for further analysis
+                else:
+                    print(f"    ✅ Table appears to render correctly")
+                    
+        return None
+        
+    except Exception as e:
+        print(f"❌ Parser test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+def analyze_table_structure(table):
+    """Deep analysis of a problematic table."""
+    print("\n🔬 DEEP TABLE ANALYSIS")
+    print("=" * 50)
+    
+    if not table:
+        print("No table to analyze")
+        return
+    
+    print(f"Table type: {table.table_type}")
+    print(f"Caption: {table.caption}")
+    print(f"Summary: {table.summary}")
+    
+    # Analyze headers
+    print(f"\n📋 HEADERS ({len(table.headers)} rows):")
+    for i, header_row in enumerate(table.headers):
+        print(f"  Row {i+1} ({len(header_row)} cells):")
+        for j, cell in enumerate(header_row):
+            print(f"    Cell {j+1}: colspan={cell.colspan}, rowspan={cell.rowspan}")
+            print(f"             text='{cell.text()[:40]}...'")
+            print(f"             is_header={cell.is_header}")
+    
+    # Analyze data rows
+    print(f"\n📊 DATA ROWS ({len(table.rows)} rows):")
+    for i, row in enumerate(table.rows[:5]):  # First 5 rows
+        print(f"  Row {i+1} ({len(row.cells)} cells):")
+        for j, cell in enumerate(row.cells):
+            print(f"    Cell {j+1}: colspan={cell.colspan}, rowspan={cell.rowspan}")
+            print(f"             text='{cell.text()[:40]}...'")
+            print(f"             is_numeric={cell.is_numeric}")
+    
+    if len(table.rows) > 5:
+        print(f"  ... and {len(table.rows)-5} more rows")
+    
+    # Test different rendering approaches
+    print(f"\n🖼️  TESTING DIFFERENT RENDERERS:")
+    
+    # Rich renderer
+    try:
+        rich_table = table.render(width=120)
+        from edgar.richtools import rich_to_text
+        rich_text = rich_to_text(rich_table)
+        print(f"  Rich renderer: {len(rich_text)} chars")
+        print(f"    Preview: {rich_text[:100]}...")
+    except Exception as e:
+        print(f"  Rich renderer failed: {e}")
+    
+    # Fast renderer
+    try:
+        fast_text = table._fast_text_rendering()
+        print(f"  Fast renderer: {len(fast_text)} chars")
+        print(f"    Preview: {fast_text[:100]}...")
+    except Exception as e:
+        print(f"  Fast renderer failed: {e}")
+    
+    # Compare outputs
+    try:
+        current_text = table.text()
+        print(f"  Current text() method: {len(current_text)} chars")
+        if "Weighted average outstanding shares" in current_text:
+            print(f"    ✅ Contains target text")
+        else:
+            print(f"    ❌ Missing target text")
+    except Exception as e:
+        print(f"  Current text() method failed: {e}")
+
+if __name__ == "__main__":
+    print("🎯 DEBUGGING MSFT TABLE PARSING ISSUE")
+    print("Target: 'Weighted average outstanding shares of common stock (B)' table")
+    print()
+    
+    # Step 1: Examine HTML structure
+    table_element = find_table_in_html()
+    
+    # Step 2: Test parser with different configurations
+    problematic_table = test_parser_on_msft()
+    
+    # Step 3: Deep analysis if issues found
+    if problematic_table:
+        analyze_table_structure(problematic_table)
+        
+        print(f"\n🎯 CONCLUSION:")
+        print("A problematic table was identified. Check the analysis above")
+        print("for specific issues with parsing or rendering.")
+    else:
+        print(f"\n✅ CONCLUSION:")
+        print("No obvious parsing issues were detected. The table appears to")
+        print("be parsing and rendering correctly with the current parser.")
+        print("If there are still issues, they may be subtle formatting problems.")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_rich_rendering.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_rich_rendering.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+"""
+Debug why Rich table rendering is still producing poor structure even with headers detected.
+"""
+
+import sys
+sys.path.insert(0, '/Users/dwight/PycharmProjects/edgartools')
+
+from edgar.documents.parser import HTMLParser
+from edgar.documents.config import ParserConfig
+from edgar.documents.table_nodes import TableNode
+
+def debug_rich_rendering_issue():
+    print("🔍 DEBUGGING RICH RENDERING WITH DETECTED HEADERS")
+    print("=" * 60)
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        config = ParserConfig()
+        parser = HTMLParser(config)
+        document = parser.parse(html_content)
+        
+        # Find target table
+        target_table = None
+        def find_target(node):
+            nonlocal target_table
+            if isinstance(node, TableNode):
+                try:
+                    if "Weighted average outstanding shares" in node.text():
+                        target_table = node
+                        return
+                except:
+                    pass
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    find_target(child)
+        
+        find_target(document.root)
+        
+        if not target_table:
+            print("❌ Target table not found")
+            return
+        
+        print("✅ Found target table")
+        print(f"Headers: {len(target_table.headers)}")
+        print(f"Data rows: {len(target_table.rows)}")
+        
+        # Examine the table structure in detail
+        print(f"\n🔍 DETAILED TABLE STRUCTURE ANALYSIS:")
+        
+        # Check headers
+        if target_table.headers:
+            for i, header_row in enumerate(target_table.headers):
+                print(f"\nHeader row {i+1}: {len(header_row)} cells")
+                for j, cell in enumerate(header_row[:8]):  # First 8 cells
+                    print(f"  Cell {j+1}: '{cell.text()}' (colspan={cell.colspan}, rowspan={cell.rowspan})")
+        
+        # Check data row structure
+        print(f"\n📊 DATA ROW ANALYSIS:")
+        for i, row in enumerate(target_table.rows[:5]):  # First 5 data rows
+            content_cells = [j for j, cell in enumerate(row.cells) if cell.text().strip()]
+            print(f"Row {i+1}: {len(row.cells)} total cells, content in positions {content_cells}")
+            
+            # Show first few cells with content
+            for j in content_cells[:3]:
+                if j < len(row.cells):
+                    cell = row.cells[j]
+                    print(f"  Cell {j+1}: '{cell.text()[:30]}...' (align={cell.align})")
+        
+        # Check table dimensions
+        max_cols = max(len(row.cells) for row in target_table.rows) if target_table.rows else 0
+        header_cols = len(target_table.headers[0]) if target_table.headers else 0
+        print(f"\n📏 TABLE DIMENSIONS:")
+        print(f"  Header columns: {header_cols}")
+        print(f"  Max data columns: {max_cols}")
+        print(f"  Dimension mismatch: {'YES' if header_cols != max_cols else 'NO'}")
+        
+        # Count empty vs content cells
+        total_cells = sum(len(row.cells) for row in target_table.rows)
+        empty_cells = sum(1 for row in target_table.rows for cell in row.cells if not cell.text().strip())
+        print(f"  Total data cells: {total_cells}")
+        print(f"  Empty data cells: {empty_cells} ({empty_cells/total_cells*100:.1f}%)")
+        
+        # Test Rich table creation manually
+        print(f"\n🎨 TESTING RICH TABLE CREATION:")
+        try:
+            rich_table = target_table.render(width=120)
+            print(f"✅ Rich table created successfully")
+            print(f"Rich table type: {type(rich_table)}")
+            
+            # Check Rich table properties
+            if hasattr(rich_table, 'columns'):
+                print(f"Rich columns: {len(rich_table.columns)}")
+            if hasattr(rich_table, 'rows'):
+                print(f"Rich rows: {len(rich_table.rows)}")
+            
+        except Exception as e:
+            print(f"❌ Rich table creation failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return
+        
+        # Test text conversion
+        print(f"\n📝 TESTING TEXT CONVERSION:")
+        try:
+            from edgar.richtools import rich_to_text
+            rich_text = rich_to_text(rich_table)
+            
+            lines = rich_text.split('\n')
+            print(f"Text output: {len(lines)} lines, {len(rich_text)} chars")
+            
+            # Analyze line types
+            empty_lines = sum(1 for line in lines if not line.strip())
+            border_lines = sum(1 for line in lines if any(c in line for c in '┌┐└┘├┤│─'))
+            content_lines = sum(1 for line in lines if line.strip() and not all(c in '┌┐└┘├┤│─ ' for c in line))
+            
+            print(f"  Empty lines: {empty_lines}")
+            print(f"  Border lines: {border_lines}")
+            print(f"  Content lines: {content_lines}")
+            
+            # Show actual structure
+            print(f"\nFirst 10 lines of output:")
+            for i, line in enumerate(lines[:10]):
+                line_type = "EMPTY" if not line.strip() else "BORDER" if any(c in line for c in '┌┐└┘├┤│─') else "CONTENT"
+                print(f"  {i+1:2d} [{line_type:7}]: {line[:60]}{'...' if len(line) > 60 else ''}")
+            
+            # The problem might be that Rich is creating a table but with poor formatting
+            # Let's see if we can identify the issue
+            if border_lines < 3:
+                print(f"\n❌ DIAGNOSIS: Very few border lines - Rich table structure is poor")
+                print("This suggests the table has structural issues that prevent proper rendering.")
+                print("Possible causes:")
+                print("1. Column count mismatch between headers and data")
+                print("2. Too many empty cells causing poor layout")
+                print("3. Cell spanning issues")
+                print("4. Table too wide for rendering width")
+            else:
+                print(f"\n✅ Rich table structure appears normal")
+                
+        except Exception as e:
+            print(f"❌ Text conversion failed: {e}")
+            return
+        
+        return target_table
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+if __name__ == "__main__":
+    debug_rich_rendering_issue()
+    
+    print(f"\n🎯 NEXT STEPS:")
+    print("Based on the analysis above, we can identify specific issues preventing")
+    print("proper Rich table rendering and address them systematically.")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_statement_building.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_statement_building.py
@@ -0,0 +1,61 @@
+from edgar import Company
+from edgar.entity.enhanced_statement import EnhancedStatementBuilder
+
+# Get Apple facts
+aapl = Company("AAPL")
+facts = aapl.facts
+
+# Build the income statement
+builder = EnhancedStatementBuilder()
+stmt = builder.build_multi_period_statement(
+    facts=facts._facts,
+    statement_type='IncomeStatement',
+    periods=6,
+    annual=True
+)
+
+print(f"Selected periods: {stmt.periods}")
+print("\nChecking Revenue item values:")
+
+# Find the revenue item
+for item in stmt.items:
+    if item.label and 'Revenue' in item.label and 'Total' in item.label:
+        print(f"\n{item.label}:")
+        for i, (period, value) in enumerate(zip(stmt.periods, item.values)):
+            print(f"  {period}: {value}")
+        
+        # Check what concept this maps to
+        if hasattr(item, 'concept'):
+            print(f"  Concept: {item.concept}")
+            
+# Now let's check what facts are in period_facts_by_label
+print("\n\nChecking what facts are in the FY 2020 period:")
+from collections import defaultdict
+
+# Recreate what the builder does
+raw_facts = facts._facts
+stmt_facts = [f for f in raw_facts if f.statement_type == 'IncomeStatement']
+
+# Build period_facts with the new key structure
+period_facts = defaultdict(list)
+for fact in stmt_facts:
+    period_key = (fact.fiscal_year, fact.fiscal_period, fact.period_end)
+    period_facts[period_key].append(fact)
+
+# Look for FY 2020 periods
+for key in period_facts.keys():
+    if key[0] == 2020 and key[1] == 'FY':
+        if key[2] and key[2].year == 2020:  # Correct match
+            print(f"\nKey: {key}")
+            # Check revenue facts in this period
+            for fact in period_facts[key]:
+                if 'RevenueFromContract' in str(fact.concept) and 'Liability' not in str(fact.concept):
+                    duration = None
+                    if fact.period_start:
+                        duration = (fact.period_end - fact.period_start).days
+                    print(f"  Revenue: ${fact.value:,.0f} (duration: {duration})")
+                    
+# The issue might be in how period_facts_by_label is built
+print("\n\nChecking period_facts_by_label mapping:")
+# This is what happens in the builder after selection
+# It remaps from period_key to label, but multiple keys can have the same label!
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_table_structure.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_table_structure.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+"""
+Debug the table structure to understand why we're getting so many empty columns.
+"""
+
+import sys
+sys.path.insert(0, '/Users/dwight/PycharmProjects/edgartools')
+
+from edgar.documents.parser import HTMLParser
+from edgar.documents.config import ParserConfig
+from edgar.documents.table_nodes import TableNode
+
+def analyze_table_structure():
+    print("🔍 ANALYZING TABLE STRUCTURE")
+    print("=" * 50)
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        config = ParserConfig(fast_table_rendering=True)
+        parser = HTMLParser(config)
+        document = parser.parse(html_content)
+        
+        # Find target table
+        target_table = None
+        def find_target(node):
+            nonlocal target_table
+            if isinstance(node, TableNode):
+                try:
+                    if "Weighted average outstanding shares" in node.text():
+                        target_table = node
+                        return
+                except:
+                    pass
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    find_target(child)
+        
+        find_target(document.root)
+        
+        if not target_table:
+            print("❌ Target table not found")
+            return
+        
+        print("✅ Found target table")
+        
+        # Analyze the structure
+        print(f"\nTable structure:")
+        print(f"  Headers: {len(target_table.headers)} rows")
+        print(f"  Data rows: {len(target_table.rows)}")
+        
+        # Analyze header structure
+        print(f"\n📋 HEADER ANALYSIS:")
+        for i, header_row in enumerate(target_table.headers):
+            print(f"  Header row {i+1}: {len(header_row)} cells")
+            for j, cell in enumerate(header_row[:10]):  # First 10 cells
+                text = cell.text().strip()
+                display_text = text[:20] if text else "[EMPTY]"
+                print(f"    Cell {j+1}: '{display_text}' (colspan={cell.colspan})")
+        
+        # Analyze data rows
+        print(f"\n📊 DATA ROW ANALYSIS:")
+        for i, row in enumerate(target_table.rows[:5]):  # First 5 rows
+            print(f"  Row {i+1}: {len(row.cells)} cells")
+            for j, cell in enumerate(row.cells[:10]):  # First 10 cells
+                text = cell.text().strip()
+                display_text = text[:20] if text else "[EMPTY]"
+                print(f"    Cell {j+1}: '{display_text}' (colspan={cell.colspan})")
+        
+        # Count empty vs filled cells
+        total_cells = 0
+        empty_cells = 0
+        
+        for header_row in target_table.headers:
+            for cell in header_row:
+                total_cells += 1
+                if not cell.text().strip():
+                    empty_cells += 1
+        
+        for row in target_table.rows:
+            for cell in row.cells:
+                total_cells += 1
+                if not cell.text().strip():
+                    empty_cells += 1
+        
+        print(f"\n📊 CELL STATISTICS:")
+        print(f"  Total cells: {total_cells}")
+        print(f"  Empty cells: {empty_cells}")
+        print(f"  Filled cells: {total_cells - empty_cells}")
+        print(f"  Empty percentage: {empty_cells/total_cells*100:.1f}%")
+        
+        # Check maximum meaningful columns
+        max_meaningful_cols = 0
+        for row in target_table.rows:
+            meaningful_cols = 0
+            for cell in row.cells:
+                if cell.text().strip():
+                    meaningful_cols = len([c for c in row.cells[:len(row.cells)] if c.text().strip()])
+                    break
+            max_meaningful_cols = max(max_meaningful_cols, meaningful_cols)
+        
+        print(f"  Maximum meaningful columns in any row: {max_meaningful_cols}")
+        
+        return target_table
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+def test_column_filtering():
+    """Test filtering out empty columns."""
+    print(f"\n🔧 TESTING COLUMN FILTERING")
+    print("=" * 50)
+    
+    target_table = analyze_table_structure()
+    if not target_table:
+        return
+    
+    # Analyze which columns actually have content
+    if not target_table.rows:
+        print("No data rows to analyze")
+        return
+    
+    max_cols = max(len(row.cells) for row in target_table.rows)
+    print(f"Maximum columns: {max_cols}")
+    
+    # Check each column for meaningful content
+    meaningful_columns = []
+    for col_idx in range(max_cols):
+        has_content = False
+        
+        # Check headers
+        for header_row in target_table.headers:
+            if col_idx < len(header_row) and header_row[col_idx].text().strip():
+                has_content = True
+                break
+        
+        # Check data rows
+        if not has_content:
+            for row in target_table.rows:
+                if col_idx < len(row.cells) and row.cells[col_idx].text().strip():
+                    has_content = True
+                    break
+        
+        if has_content:
+            meaningful_columns.append(col_idx)
+            
+    print(f"Meaningful columns: {meaningful_columns} ({len(meaningful_columns)} total)")
+    
+    # Test rendering with only meaningful columns
+    print(f"\n📊 FILTERED TABLE PREVIEW:")
+    
+    # Show first data row with only meaningful columns
+    if target_table.rows:
+        first_row = target_table.rows[0]
+        filtered_cells = []
+        for col_idx in meaningful_columns:
+            if col_idx < len(first_row.cells):
+                cell_text = first_row.cells[col_idx].text().strip()
+                filtered_cells.append(cell_text if cell_text else "[EMPTY]")
+            else:
+                filtered_cells.append("[MISSING]")
+        
+        print("First row filtered:", " | ".join(filtered_cells))
+        
+    return meaningful_columns
+
+if __name__ == "__main__":
+    print("🎯 DEBUGGING TABLE STRUCTURE ISSUE")
+    print("Focus: Understanding why we get so many empty columns")
+    print()
+    
+    meaningful_cols = test_column_filtering()
+    
+    if meaningful_cols:
+        print(f"\n🎯 FINDINGS:")
+        print(f"The table has many empty spacing columns.")
+        print(f"Only {len(meaningful_cols)} out of many columns have actual content.")
+        print(f"The FastTableRenderer should filter out empty columns.")
+        
+        print(f"\n🔧 SOLUTION:")
+        print("Update FastTableRenderer to:")
+        print("1. Identify columns with meaningful content")
+        print("2. Filter out purely empty/spacing columns")
+        print("3. Only render the meaningful columns")
+    else:
+        print("❌ Could not analyze column structure")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_table_structure_parsing.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/debug_table_structure_parsing.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+"""
+Debug why tables are losing their structure during parsing.
+"""
+
+import sys
+sys.path.insert(0, '/Users/dwight/PycharmProjects/edgartools')
+
+from edgar.documents.parser import HTMLParser
+from edgar.documents.config import ParserConfig
+from edgar.documents.table_nodes import TableNode
+from bs4 import BeautifulSoup
+
+def examine_raw_html_table():
+    """Examine the raw HTML structure of the problematic table."""
+    print("🔍 EXAMINING RAW HTML TABLE STRUCTURE")
+    print("=" * 55)
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        # Find the table HTML
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        # Look for table containing our target text
+        target_elements = soup.find_all(string=lambda text: text and "Weighted average outstanding shares" in text)
+        
+        if not target_elements:
+            print("❌ Target text not found in HTML")
+            return None
+        
+        target_element = target_elements[0]
+        
+        # Find the containing table
+        table_element = target_element
+        while table_element and table_element.name != 'table':
+            table_element = table_element.parent
+        
+        if not table_element:
+            print("❌ No containing table found")
+            return None
+        
+        print("✅ Found containing HTML table")
+        
+        # Analyze the HTML table structure
+        rows = table_element.find_all('tr')
+        print(f"HTML table has {len(rows)} rows")
+        
+        # Look for thead, tbody structure
+        thead = table_element.find('thead')
+        tbody = table_element.find('tbody')
+        print(f"Has <thead>: {'✅' if thead else '❌'}")
+        print(f"Has <tbody>: {'✅' if tbody else '❌'}")
+        
+        # Analyze first few rows
+        print(f"\nFirst few rows analysis:")
+        for i, row in enumerate(rows[:10]):
+            cells = row.find_all(['td', 'th'])
+            cell_info = []
+            for cell in cells[:5]:  # First 5 cells
+                text = cell.get_text().strip()[:20]
+                tag = cell.name
+                colspan = cell.get('colspan', '1')
+                cell_info.append(f"{tag}({colspan}):'{text}'")
+            
+            print(f"  Row {i+1}: {len(cells)} cells - {', '.join(cell_info)}")
+            if len(cells) > 5:
+                print(f"         ... and {len(cells)-5} more cells")
+        
+        # Check if there are any TH (header) cells
+        th_cells = table_element.find_all('th')
+        print(f"\nTotal <th> header cells: {len(th_cells)}")
+        
+        # Look for potential header patterns
+        header_candidates = []
+        for i, row in enumerate(rows[:5]):  # Check first 5 rows for headers
+            cells = row.find_all(['td', 'th'])
+            row_text = ' '.join(cell.get_text().strip() for cell in cells).strip()
+            if any(keyword in row_text.lower() for keyword in ['year', 'ended', '2025', '2024', '2023']):
+                header_candidates.append(i)
+                print(f"  Potential header row {i+1}: {row_text[:80]}...")
+        
+        return table_element
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+def debug_table_parsing_pipeline():
+    """Debug how the table gets processed through the parsing pipeline."""
+    print(f"\n🔧 DEBUGGING TABLE PARSING PIPELINE")
+    print("=" * 55)
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        config = ParserConfig(fast_table_rendering=False)
+        parser = HTMLParser(config)
+        document = parser.parse(html_content)
+        
+        # Find target table
+        target_table = None
+        def find_target(node):
+            nonlocal target_table
+            if isinstance(node, TableNode):
+                try:
+                    if "Weighted average outstanding shares" in node.text():
+                        target_table = node
+                        return
+                except:
+                    pass
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    find_target(child)
+        
+        find_target(document.root)
+        
+        if not target_table:
+            print("❌ Target table not found in parsed document")
+            return
+        
+        print("✅ Found target table in parsed document")
+        
+        # Analyze how the table was parsed
+        print(f"\nParsed table analysis:")
+        print(f"  Table type: {target_table.table_type}")
+        print(f"  Has headers: {'✅' if target_table.headers else '❌'}")
+        print(f"  Header rows: {len(target_table.headers)}")
+        print(f"  Data rows: {len(target_table.rows)}")
+        print(f"  Caption: {target_table.caption}")
+        
+        # Check if headers were detected
+        if target_table.headers:
+            print(f"\n  Header structure:")
+            for i, header_row in enumerate(target_table.headers):
+                header_texts = [cell.text().strip()[:20] for cell in header_row]
+                print(f"    Header row {i+1}: {header_texts}")
+        else:
+            print(f"\n  ❌ NO HEADERS DETECTED - This is likely the problem!")
+            print(f"  The parser failed to identify header rows in the HTML table.")
+            
+            # Check if any of the first few data rows look like headers
+            print(f"\n  First few data rows (might be misclassified headers):")
+            for i, row in enumerate(target_table.rows[:5]):
+                row_texts = [cell.text().strip()[:20] for cell in row.cells[:5]]
+                print(f"    Data row {i+1}: {row_texts}")
+                
+                # Check if this row looks like a header
+                row_text = ' '.join(cell.text().strip() for cell in row.cells)
+                if any(keyword in row_text.lower() for keyword in ['year', 'ended', '2025', '2024', '2023', 'millions']):
+                    print(f"      ⚠️  This looks like it should be a header row!")
+        
+        # Test manual header detection
+        print(f"\n🔍 MANUAL HEADER DETECTION TEST:")
+        potential_headers = []
+        
+        for i, row in enumerate(target_table.rows[:5]):
+            row_text = ' '.join(cell.text().strip() for cell in row.cells).strip()
+            
+            # Score this row as a potential header
+            header_score = 0
+            
+            # Check for typical header keywords
+            header_keywords = ['millions', 'year ended', 'june 30', '2025', '2024', '2023']
+            for keyword in header_keywords:
+                if keyword in row_text.lower():
+                    header_score += 1
+            
+            # Check for mostly empty cells (common in header spacing rows)
+            empty_cells = sum(1 for cell in row.cells if not cell.text().strip())
+            if empty_cells / len(row.cells) > 0.7:  # More than 70% empty
+                header_score -= 1
+            
+            # Check for meaningful content vs pure spacing
+            meaningful_cells = sum(1 for cell in row.cells if len(cell.text().strip()) > 2)
+            if meaningful_cells >= 2:  # At least 2 cells with meaningful content
+                header_score += 1
+            
+            potential_headers.append((i, row, header_score, row_text))
+            print(f"  Row {i+1}: score={header_score}, text='{row_text[:60]}...'")
+        
+        # Find the best header candidate
+        best_header = max(potential_headers, key=lambda x: x[2])
+        if best_header[2] > 0:
+            print(f"\n  ✅ Best header candidate: Row {best_header[0]+1} (score={best_header[2]})")
+            print(f"     Text: {best_header[3]}")
+        else:
+            print(f"\n  ❌ No good header candidates found")
+        
+        return target_table
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+if __name__ == "__main__":
+    print("🎯 DEBUGGING TABLE STRUCTURE PARSING")
+    print("Focus: Why tables lose structure during parsing")
+    print()
+    
+    # Step 1: Examine raw HTML
+    html_table = examine_raw_html_table()
+    
+    # Step 2: Debug parsing pipeline
+    parsed_table = debug_table_parsing_pipeline()
+    
+    print(f"\n🎯 DIAGNOSIS:")
+    if html_table and parsed_table:
+        print("The table exists in HTML and is being parsed into a TableNode.")
+        print("The issue is likely in header detection - the parser isn't")
+        print("properly identifying which rows should be headers vs data.")
+        
+        print(f"\n🔧 SOLUTION:")
+        print("1. Improve header detection logic in table parsing")
+        print("2. Look for rows with year indicators (2025, 2024, 2023) as headers")
+        print("3. Handle tables without explicit <th> tags better")
+        print("4. Keep Rich rendering as default for beautiful output")
+    else:
+        print("Basic table parsing is failing - need to investigate further.")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_edge_cases_detailed.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_edge_cases_detailed.py
@@ -0,0 +1,209 @@
+"""
+Check specific edge cases in our solution
+"""
+
+from edgar import Company
+
+def check_instant_facts():
+    """Check how we handle instant facts (balance sheet items)"""
+    print("\n1. INSTANT FACTS (Balance Sheet Items)")
+    print("-" * 50)
+    
+    aapl = Company("AAPL")
+    facts = aapl.facts._facts
+    
+    # Look for balance sheet instant facts
+    instant_count = 0
+    duration_count = 0
+    
+    for fact in facts:
+        if fact.statement_type == 'BalanceSheet' and fact.fiscal_period == 'FY':
+            if fact.fiscal_year == 2023:
+                if fact.period_start:
+                    duration_count += 1
+                else:
+                    instant_count += 1
+    
+    print(f"  Balance Sheet FY 2023 facts:")
+    print(f"    - With duration (period_start exists): {duration_count}")
+    print(f"    - Instant (no period_start): {instant_count}")
+    print(f"  ✓ Our solution handles instant facts correctly (no duration check)")
+
+def check_fiscal_year_boundaries():
+    """Check companies with different fiscal year ends"""
+    print("\n2. FISCAL YEAR BOUNDARY ISSUES")
+    print("-" * 50)
+    
+    # Microsoft has June year-end
+    msft = Company("MSFT")
+    facts = msft.facts._facts
+    
+    print("  Microsoft (June year-end):")
+    for fact in facts:
+        if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+            if fact.fiscal_year == 2023 and 'Revenue' in str(fact.concept):
+                if fact.period_start and fact.period_end:
+                    duration = (fact.period_end - fact.period_start).days
+                    if duration > 300:
+                        print(f"    FY 2023: {fact.period_start} to {fact.period_end}")
+                        print(f"    Period end year: {fact.period_end.year}")
+                        print(f"    Fiscal year: {fact.fiscal_year}")
+                        match = "✓" if fact.period_end.year == fact.fiscal_year else "✗"
+                        print(f"    Year match: {match}")
+                        break
+    
+    # Walmart has January year-end  
+    print("\n  Walmart (January year-end):")
+    wmt = Company("WMT")
+    facts = wmt.facts._facts
+    
+    for fact in facts:
+        if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+            if fact.fiscal_year == 2023 and 'Revenue' in str(fact.concept):
+                if fact.period_start and fact.period_end:
+                    duration = (fact.period_end - fact.period_start).days
+                    if duration > 300:
+                        print(f"    FY 2023: {fact.period_start} to {fact.period_end}")
+                        print(f"    Period end year: {fact.period_end.year}")
+                        print(f"    Fiscal year: {fact.fiscal_year}")
+                        match = "✓" if fact.period_end.year == fact.fiscal_year else "✗"
+                        print(f"    Year match: {match}")
+                        break
+
+def check_duration_edge_cases():
+    """Check edge cases around our 300-day threshold"""
+    print("\n3. DURATION EDGE CASES")
+    print("-" * 50)
+    
+    # Collect all annual durations across companies
+    test_tickers = ['AAPL', 'MSFT', 'WMT', 'JNJ', 'TSLA']
+    all_durations = []
+    
+    for ticker in test_tickers:
+        try:
+            company = Company(ticker)
+            facts = company.facts._facts
+            
+            for fact in facts:
+                if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+                    if fact.fiscal_year >= 2020 and 'Revenue' in str(fact.concept):
+                        if fact.period_start and fact.period_end:
+                            duration = (fact.period_end - fact.period_start).days
+                            if duration > 200:  # Collect all potentially annual
+                                all_durations.append((ticker, duration))
+        except:
+            pass
+    
+    # Analyze distribution
+    from collections import Counter
+    duration_counts = Counter([d for _, d in all_durations])
+    
+    print("  Duration distribution for FY Revenue facts:")
+    for duration in sorted(set([d for _, d in all_durations])):
+        count = duration_counts[duration]
+        if duration < 300:
+            status = "❌ Would be filtered out"
+        elif duration > 400:
+            status = "⚠️  Unusually long"
+        else:
+            status = "✓ Accepted as annual"
+        print(f"    {duration} days: {count} facts - {status}")
+    
+    # Check if any annual facts are < 300 days
+    short_annuals = [d for _, d in all_durations if d >= 250 and d < 300]
+    if short_annuals:
+        print(f"\n  ⚠️  WARNING: Found {len(short_annuals)} facts between 250-300 days")
+        print(f"     These might be annual but would be filtered out")
+
+def check_leap_year_impact():
+    """Check if leap years affect our logic"""
+    print("\n4. LEAP YEAR IMPACT")
+    print("-" * 50)
+    
+    # 2020 was a leap year
+    aapl = Company("AAPL")
+    facts = aapl.facts._facts
+    
+    leap_year_durations = []
+    regular_year_durations = []
+    
+    for fact in facts:
+        if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+            if 'Revenue' in str(fact.concept):
+                if fact.period_start and fact.period_end:
+                    duration = (fact.period_end - fact.period_start).days
+                    if duration > 300:
+                        if fact.fiscal_year == 2020:
+                            leap_year_durations.append(duration)
+                        elif fact.fiscal_year in [2019, 2021]:
+                            regular_year_durations.append(duration)
+    
+    if leap_year_durations and regular_year_durations:
+        print(f"  Leap year (2020) durations: {set(leap_year_durations)}")
+        print(f"  Regular year durations: {set(regular_year_durations)}")
+        print(f"  ✓ Difference is minimal, 300-day threshold handles both")
+
+def check_amended_filings():
+    """Check how amended filings affect our logic"""
+    print("\n5. AMENDED FILINGS")
+    print("-" * 50)
+    
+    # Look for duplicate facts from amendments
+    aapl = Company("AAPL")
+    facts = aapl.facts._facts
+    
+    # Track facts by fiscal year and duration
+    from collections import defaultdict
+    facts_by_year_duration = defaultdict(list)
+    
+    for fact in facts:
+        if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+            if fact.fiscal_year == 2023 and 'Revenue' in str(fact.concept):
+                if fact.period_start and fact.period_end:
+                    duration = (fact.period_end - fact.period_start).days
+                    if duration > 300:
+                        key = (fact.fiscal_year, duration, fact.period_end)
+                        facts_by_year_duration[key].append({
+                            'value': fact.value,
+                            'filing_date': fact.filing_date,
+                            'accession': fact.accession if hasattr(fact, 'accession') else None
+                        })
+    
+    # Check for duplicates
+    for key, facts_list in facts_by_year_duration.items():
+        if len(facts_list) > 1:
+            year, duration, end_date = key
+            print(f"  Found {len(facts_list)} facts for FY {year} ({duration} days, ends {end_date}):")
+            for f in facts_list:
+                print(f"    Value: ${f['value']:,.0f}, Filed: {f['filing_date']}")
+            print("  ⚠️  Multiple facts for same period - might need to pick latest filing")
+
+# Run all checks
+if __name__ == "__main__":
+    print("=" * 60)
+    print("EDGE CASE ANALYSIS FOR DURATION-BASED SOLUTION")
+    print("=" * 60)
+    
+    check_instant_facts()
+    check_fiscal_year_boundaries()
+    check_duration_edge_cases()
+    check_leap_year_impact()
+    check_amended_filings()
+    
+    print("\n" + "=" * 60)
+    print("SUMMARY OF FINDINGS")
+    print("=" * 60)
+    print("\n✓ STRENGTHS:")
+    print("  1. 300-day threshold works well for standard annual periods (363-365 days)")
+    print("  2. Instant facts (balance sheet) handled correctly")
+    print("  3. Leap years don't cause issues")
+    print("\n⚠️  POTENTIAL ISSUES:")
+    print("  1. Fiscal year boundary: Some companies' FY doesn't match calendar year")
+    print("     - WMT FY 2023 ends in Jan 2023 (year mismatch)")
+    print("  2. Amended filings might create duplicates")
+    print("  3. No handling for multi-year aggregates (>400 days)")
+    print("\nRECOMMENDED IMPROVEMENTS:")
+    print("  1. For fiscal year matching, be more flexible:")
+    print("     - Allow FY to match period_end.year OR period_end.year + 1")
+    print("  2. When duplicates exist, prefer latest filing_date")
+    print("  3. Add upper bound check (duration < 400) to exclude multi-year")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_final_fix.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_final_fix.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""
+Test that the table parsing issue is actually fixed with proper config propagation.
+"""
+
+import sys
+sys.path.insert(0, '/Users/dwight/PycharmProjects/edgartools')
+
+from edgar.documents.parser import HTMLParser
+from edgar.documents.config import ParserConfig
+from edgar.documents.table_nodes import TableNode
+
+def test_msft_table_with_proper_config():
+    """Test MSFT table with proper config propagation."""
+    print("🧪 TESTING MSFT TABLE WITH PROPER CONFIG")
+    print("=" * 60)
+    
+    try:
+        # Parse the document with explicit config
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        # Test with explicit fast rendering config
+        config = ParserConfig(fast_table_rendering=True)
+        parser = HTMLParser(config)
+        document = parser.parse(html_content)
+        
+        print(f"Config fast_table_rendering: {config.fast_table_rendering}")
+        
+        # Find the target table
+        target_table = None
+        def find_target(node):
+            nonlocal target_table
+            if isinstance(node, TableNode):
+                try:
+                    if "Weighted average outstanding shares" in node.text():
+                        target_table = node
+                        return
+                except:
+                    pass
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    find_target(child)
+        
+        find_target(document.root)
+        
+        if not target_table:
+            print("❌ Target table not found")
+            return False
+        
+        print("✅ Found target table!")
+        
+        # Ensure config is set on the table
+        target_table._config = config
+        
+        # Test the output
+        table_text = target_table.text()
+        
+        print(f"\nTable output ({len(table_text)} characters):")
+        print("-" * 40)
+        print(table_text)
+        print("-" * 40)
+        
+        # Check for proper formatting
+        lines = table_text.split('\n')
+        pipe_lines = [line for line in lines if '|' in line and line.strip()]
+        
+        print(f"\nFormatting analysis:")
+        print(f"  Total lines: {len(lines)}")
+        print(f"  Lines with pipes: {len(pipe_lines)}")
+        print(f"  Contains target text: {'✅' if 'Weighted average outstanding shares' in table_text else '❌'}")
+        
+        if len(pipe_lines) > 5 and 'Weighted average outstanding shares' in table_text:
+            print("✅ TABLE IS PROPERLY FORMATTED!")
+            return True
+        else:
+            print("❌ Table formatting issues persist")
+            return False
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+def verify_config_propagation():
+    """Verify that table nodes receive the config during parsing."""
+    print(f"\n🔧 VERIFYING CONFIG PROPAGATION")
+    print("=" * 60)
+    
+    # We need to check if the HTMLParser properly sets config on table nodes
+    # This might require modifications to ensure config propagation
+    
+    print("Checking if TableNodes receive config during parsing...")
+    
+    # Create a simple test HTML
+    simple_html = """
+    <html>
+    <body>
+        <table>
+            <tr><td>Header 1</td><td>Header 2</td></tr>
+            <tr><td>Data 1</td><td>Data 2</td></tr>
+        </table>
+    </body>
+    </html>
+    """
+    
+    config = ParserConfig(fast_table_rendering=True)
+    parser = HTMLParser(config)
+    document = parser.parse(simple_html)
+    
+    # Find table and check config
+    table_found = False
+    def check_table_config(node):
+        nonlocal table_found
+        if isinstance(node, TableNode):
+            table_found = True
+            has_config = hasattr(node, '_config')
+            config_matches = has_config and node._config.fast_table_rendering == True
+            print(f"  Table found: ✅")
+            print(f"  Has _config attribute: {'✅' if has_config else '❌'}")
+            print(f"  Config fast_table_rendering: {'✅' if config_matches else '❌'}")
+            
+            if not has_config:
+                print("  🔧 Setting config manually...")
+                node._config = config
+                test_text = node.text()
+                print(f"  Manual config test: {'✅' if '|' in test_text else '❌'}")
+                print(f"    Test output preview: {test_text[:50]}...")
+            
+            return has_config and config_matches
+                
+        if hasattr(node, 'children'):
+            for child in node.children:
+                check_table_config(child)
+                
+    config_working = check_table_config(document.root)
+    
+    if not table_found:
+        print("  ❌ No table found in simple test")
+        return False
+        
+    return config_working
+
+if __name__ == "__main__":
+    print("🎯 FINAL TEST: MSFT TABLE PARSING FIX")
+    print()
+    
+    # Test config propagation
+    config_ok = verify_config_propagation()
+    
+    # Test MSFT table
+    table_ok = test_msft_table_with_proper_config()
+    
+    print(f"\n🏁 FINAL RESULTS:")
+    print(f"  Config propagation: {'✅' if config_ok else '❌'}")
+    print(f"  MSFT table formatting: {'✅' if table_ok else '❌'}")
+    
+    if table_ok:
+        print(f"\n🎉 SUCCESS!")
+        print("The MSFT table parsing issue has been resolved!")
+        print("Tables now render with proper pipe formatting.")
+    else:
+        print(f"\n🔧 NEEDS WORK:")
+        if not config_ok:
+            print("- Config propagation to TableNodes needs to be implemented")
+        if not table_ok:
+            print("- Table formatting still has issues")
+            
+        print("\nRecommended fix: Ensure HTMLParser sets _config on all TableNode instances")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_improved_header_detection.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_improved_header_detection.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+"""
+Test the improved header detection logic.
+"""
+
+import sys
+sys.path.insert(0, '/Users/dwight/PycharmProjects/edgartools')
+
+from edgar.documents.parser import HTMLParser
+from edgar.documents.config import ParserConfig
+from edgar.documents.table_nodes import TableNode
+
+def test_header_detection_improvement():
+    print("🔧 TESTING IMPROVED HEADER DETECTION")
+    print("=" * 50)
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        # Use default config (Rich rendering)
+        config = ParserConfig()
+        parser = HTMLParser(config)
+        document = parser.parse(html_content)
+        
+        # Find target table
+        target_table = None
+        def find_target(node):
+            nonlocal target_table
+            if isinstance(node, TableNode):
+                try:
+                    if "Weighted average outstanding shares" in node.text():
+                        target_table = node
+                        return
+                except:
+                    pass
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    find_target(child)
+        
+        find_target(document.root)
+        
+        if not target_table:
+            print("❌ Target table not found")
+            return False
+        
+        print("✅ Found target table")
+        
+        # Check the results
+        print(f"\nImproved parsing results:")
+        print(f"  Headers detected: {len(target_table.headers)} rows")
+        print(f"  Data rows: {len(target_table.rows)}")
+        
+        if target_table.headers:
+            print(f"\n📋 DETECTED HEADERS:")
+            for i, header_row in enumerate(target_table.headers):
+                header_texts = [cell.text().strip() for cell in header_row if cell.text().strip()]
+                print(f"    Header row {i+1}: {header_texts}")
+        else:
+            print(f"\n❌ Still no headers detected")
+            return False
+        
+        # Test Rich rendering with proper headers
+        print(f"\n🎨 TESTING RICH RENDERING:")
+        rich_table = target_table.render(width=120)
+        from edgar.richtools import rich_to_text
+        rich_text = rich_to_text(rich_table)
+        
+        # Check if Rich now produces structured output
+        lines = rich_text.split('\n')
+        structured_lines = [line for line in lines if any(c in line for c in '┌┐└┘├┤│─')]
+        
+        print(f"  Rich output length: {len(rich_text)} chars")
+        print(f"  Total lines: {len(lines)}")
+        print(f"  Structured lines: {len(structured_lines)}")
+        
+        if len(structured_lines) > 5:
+            print(f"  ✅ Rich output is now properly structured!")
+            
+            # Show a sample of the structured output
+            print(f"\n📊 RICH TABLE SAMPLE:")
+            for i, line in enumerate(lines[:10]):
+                if line.strip():
+                    print(f"    {line}")
+            
+            return True
+        else:
+            print(f"  ❌ Rich output still lacks proper structure")
+            print(f"  Sample lines:")
+            for i, line in enumerate(lines[:5]):
+                print(f"    {i+1}: {line[:60]}{'...' if len(line) > 60 else ''}")
+            
+            return False
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+def compare_before_after():
+    """Compare table quality across all tables after the fix."""
+    print(f"\n📊 COMPARING TABLE QUALITY ACROSS ALL TABLES")
+    print("=" * 50)
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        config = ParserConfig()
+        parser = HTMLParser(config)
+        document = parser.parse(html_content)
+        
+        # Collect all tables
+        all_tables = []
+        def collect_tables(node):
+            if isinstance(node, TableNode):
+                all_tables.append(node)
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    collect_tables(child)
+        
+        collect_tables(document.root)
+        
+        print(f"Found {len(all_tables)} total tables")
+        
+        # Analyze table quality
+        good_tables = 0
+        tables_with_headers = 0
+        
+        from edgar.richtools import rich_to_text
+        
+        for i, table in enumerate(all_tables):
+            try:
+                # Count tables with headers
+                if table.headers:
+                    tables_with_headers += 1
+                
+                # Test Rich rendering quality
+                rich_table = table.render(width=120)
+                rich_text = rich_to_text(rich_table)
+                
+                lines = rich_text.split('\n')
+                structured_lines = [line for line in lines if any(c in line for c in '┌┐└┘├┤│─')]
+                
+                if len(structured_lines) > 3:
+                    good_tables += 1
+                    
+            except Exception as e:
+                pass  # Skip problematic tables
+        
+        print(f"\nTable quality summary:")
+        print(f"  Tables with headers: {tables_with_headers}/{len(all_tables)} ({tables_with_headers/len(all_tables)*100:.1f}%)")
+        print(f"  Well-structured tables: {good_tables}/{len(all_tables)} ({good_tables/len(all_tables)*100:.1f}%)")
+        
+        if tables_with_headers > 0:
+            print(f"  ✅ Header detection is working!")
+        else:
+            print(f"  ❌ Header detection still needs work")
+        
+        if good_tables > 0:
+            print(f"  ✅ Some tables now render with proper structure!")
+        else:
+            print(f"  ❌ Rich rendering still needs improvement")
+            
+        return tables_with_headers > 0 and good_tables > 0
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        return False
+
+if __name__ == "__main__":
+    print("🎯 TESTING IMPROVED TABLE PARSING")
+    print("Focus: Better header detection for Rich table rendering")
+    print()
+    
+    # Test specific target table
+    target_success = test_header_detection_improvement()
+    
+    # Test overall improvement
+    overall_success = compare_before_after()
+    
+    print(f"\n🏁 FINAL RESULTS:")
+    print(f"  Target table fixed: {'✅' if target_success else '❌'}")
+    print(f"  Overall improvement: {'✅' if overall_success else '❌'}")
+    
+    if target_success and overall_success:
+        print(f"\n🎉 SUCCESS!")
+        print("The table parsing issue has been resolved!")
+        print("Tables now render with beautiful Rich formatting!")
+    elif target_success:
+        print(f"\n🎯 PARTIAL SUCCESS!")
+        print("The target table is fixed, but more work needed on other tables.")
+    else:
+        print(f"\n🔧 MORE WORK NEEDED")
+        print("Header detection improvements aren't sufficient yet.")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_improved_renderer.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_improved_renderer.py
@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+"""
+Test the improved FastTableRenderer with column filtering.
+"""
+
+import sys
+sys.path.insert(0, '/Users/dwight/PycharmProjects/edgartools')
+
+from edgar.documents.parser import HTMLParser
+from edgar.documents.config import ParserConfig
+from edgar.documents.table_nodes import TableNode
+
+def test_improved_rendering():
+    print("🧪 TESTING IMPROVED FAST TABLE RENDERER")
+    print("=" * 55)
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        config = ParserConfig(fast_table_rendering=True)
+        parser = HTMLParser(config)
+        document = parser.parse(html_content)
+        
+        # Find target table
+        target_table = None
+        def find_target(node):
+            nonlocal target_table
+            if isinstance(node, TableNode):
+                try:
+                    if "Weighted average outstanding shares" in node.text():
+                        target_table = node
+                        return
+                except:
+                    pass
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    find_target(child)
+        
+        find_target(document.root)
+        
+        if not target_table:
+            print("❌ Target table not found")
+            return False
+        
+        print("✅ Found target table")
+        
+        # Clear cache to get fresh rendering
+        if hasattr(target_table, '_text_cache'):
+            target_table._text_cache = None
+        
+        # Get new table text
+        table_text = target_table.text()
+        
+        print(f"\nImproved table output ({len(table_text)} characters):")
+        print("-" * 60)
+        print(table_text)
+        print("-" * 60)
+        
+        # Analyze the improvement
+        lines = [line for line in table_text.split('\n') if line.strip()]
+        pipe_lines = [line for line in lines if '|' in line]
+        
+        if pipe_lines:
+            # Count columns in the first content line
+            first_content_line = pipe_lines[0]
+            column_count = first_content_line.count('|') - 1  # Subtract 1 for border
+            print(f"\nTable structure analysis:")
+            print(f"  Total lines: {len(lines)}")
+            print(f"  Lines with pipes: {len(pipe_lines)}")
+            print(f"  Columns: {column_count}")
+            
+            # Check if it looks reasonable (should be ~4 columns: Description, 2025, 2024, 2023)
+            if 3 <= column_count <= 6:
+                print(f"  ✅ Column count looks reasonable ({column_count} columns)")
+            else:
+                print(f"  ⚠️  Column count still seems high ({column_count} columns)")
+        
+        # Check for specific improvements
+        improvements = []
+        issues = []
+        
+        if "Weighted average outstanding shares" in table_text:
+            improvements.append("Contains target text")
+        else:
+            issues.append("Missing target text")
+        
+        if "|" in table_text:
+            improvements.append("Has pipe separators")
+        else:
+            issues.append("No pipe separators")
+        
+        # Count empty columns (sequences of | | | with only spaces between)
+        empty_column_pattern = r'\|\s*\|\s*\|'
+        import re
+        empty_sequences = len(re.findall(empty_column_pattern, table_text))
+        if empty_sequences < 5:  # Much fewer than before
+            improvements.append("Reduced empty columns")
+        else:
+            issues.append("Still many empty columns")
+        
+        if len(table_text) < 2000:  # Should be more compact
+            improvements.append("More compact output")
+        else:
+            issues.append("Still verbose output")
+        
+        print(f"\nQuality assessment:")
+        if improvements:
+            print("  ✅ Improvements:")
+            for improvement in improvements:
+                print(f"    - {improvement}")
+        
+        if issues:
+            print("  ⚠️  Remaining issues:")
+            for issue in issues:
+                print(f"    - {issue}")
+        
+        # Show sample of first few lines for readability
+        print(f"\nFirst few lines preview:")
+        for i, line in enumerate(pipe_lines[:5]):
+            print(f"  {i+1}: {line}")
+        
+        return len(issues) == 0
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+def compare_with_rich():
+    """Compare the improved fast renderer with Rich renderer."""
+    print(f"\n🔄 COMPARING WITH RICH RENDERER")
+    print("=" * 55)
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        # Test both renderers
+        configs = [
+            ("Fast Renderer", ParserConfig(fast_table_rendering=True)),
+            ("Rich Renderer", ParserConfig(fast_table_rendering=False)),
+        ]
+        
+        for config_name, config in configs:
+            print(f"\n🔧 {config_name}:")
+            
+            parser = HTMLParser(config)
+            document = parser.parse(html_content)
+            
+            # Find target table
+            target_table = None
+            def find_target(node):
+                nonlocal target_table
+                if isinstance(node, TableNode):
+                    try:
+                        if "Weighted average outstanding shares" in node.text():
+                            target_table = node
+                            return
+                    except:
+                        pass
+                if hasattr(node, 'children'):
+                    for child in node.children:
+                        find_target(child)
+            
+            find_target(document.root)
+            
+            if target_table:
+                table_text = target_table.text()
+                lines = table_text.split('\n')
+                pipe_lines = [line for line in lines if '|' in line and line.strip()]
+                
+                print(f"  Length: {len(table_text)} chars")
+                print(f"  Lines: {len(lines)}")
+                print(f"  Pipe lines: {len(pipe_lines)}")
+                print(f"  Contains target: {'✅' if 'Weighted average outstanding shares' in table_text else '❌'}")
+                print(f"  First line: {lines[0][:60]}..." if lines else "  No lines")
+            else:
+                print("  ❌ Table not found")
+    
+    except Exception as e:
+        print(f"❌ Comparison failed: {e}")
+
+if __name__ == "__main__":
+    success = test_improved_rendering()
+    compare_with_rich()
+    
+    if success:
+        print(f"\n🎉 SUCCESS!")
+        print("The improved FastTableRenderer is working well!")
+    else:
+        print(f"\n🔧 NEEDS MORE WORK")
+        print("The renderer still needs improvements.")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_solution_edge_cases.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_solution_edge_cases.py
@@ -0,0 +1,134 @@
+"""
+Test our duration-based solution across different companies to identify edge cases
+"""
+
+from edgar import Company
+from collections import defaultdict
+import sys
+
+def analyze_company_periods(ticker, company_name):
+    """Analyze period durations for a company"""
+    print(f"\n{'='*60}")
+    print(f"Analyzing {company_name} ({ticker})")
+    print('='*60)
+    
+    try:
+        company = Company(ticker)
+        facts = company.facts
+        raw_facts = facts._facts
+        
+        # Find FY facts with different durations
+        fy_facts_by_duration = defaultdict(list)
+        
+        for fact in raw_facts:
+            if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+                if fact.fiscal_year and fact.fiscal_year >= 2019:
+                    # Check for revenue facts
+                    if 'Revenue' in str(fact.concept):
+                        duration = None
+                        if fact.period_start and fact.period_end:
+                            duration = (fact.period_end - fact.period_start).days
+                            duration_bucket = "No duration"
+                            if duration:
+                                if duration < 100:
+                                    duration_bucket = f"Quarterly (~{duration} days)"
+                                elif duration > 300 and duration < 400:
+                                    duration_bucket = f"Annual (~{duration} days)"
+                                elif duration > 180 and duration < 200:
+                                    duration_bucket = f"Semi-annual (~{duration} days)"
+                                elif duration > 700:
+                                    duration_bucket = f"Multi-year (~{duration} days)"
+                                else:
+                                    duration_bucket = f"Other ({duration} days)"
+                            
+                            fy_facts_by_duration[duration_bucket].append({
+                                'year': fact.fiscal_year,
+                                'value': fact.value,
+                                'duration': duration,
+                                'period_end': fact.period_end
+                            })
+        
+        # Report findings
+        for bucket in sorted(fy_facts_by_duration.keys()):
+            facts_list = fy_facts_by_duration[bucket]
+            print(f"\n{bucket}: {len(facts_list)} facts")
+            # Show a few examples
+            for fact in facts_list[:3]:
+                print(f"  FY {fact['year']}: ${fact['value']:,.0f}")
+                
+        return fy_facts_by_duration
+        
+    except Exception as e:
+        print(f"  Error: {e}")
+        return None
+
+# Test various types of companies
+test_companies = [
+    ('AAPL', 'Apple - Tech Giant'),
+    ('MSFT', 'Microsoft - Different fiscal year end'),
+    ('WMT', 'Walmart - Retail with Jan year end'),
+    ('BAC', 'Bank of America - Financial institution'),
+    ('JNJ', 'Johnson & Johnson - Healthcare'),
+    ('TSLA', 'Tesla - Newer company'),
+    ('AMZN', 'Amazon - E-commerce'),
+    ('XOM', 'Exxon - Energy sector'),
+]
+
+# Analyze each company
+results = {}
+for ticker, name in test_companies:
+    result = analyze_company_periods(ticker, name)
+    if result:
+        results[ticker] = result
+
+# Summary of potential issues
+print("\n" + "="*60)
+print("POTENTIAL ISSUES WITH OUR SOLUTION")
+print("="*60)
+
+print("\n1. DURATION THRESHOLD (>300 days):")
+print("   Our fix assumes annual = >300 days")
+print("   Potential issues:")
+
+# Check for edge cases around 300 days
+for ticker in results:
+    for bucket in results[ticker]:
+        if "Other" in bucket or "Semi-annual" in bucket:
+            print(f"   - {ticker} has unusual duration: {bucket}")
+
+print("\n2. NO DURATION DATA:")
+print("   Some facts might not have period_start")
+for ticker in results:
+    if "No duration" in results[ticker]:
+        count = len(results[ticker]["No duration"])
+        print(f"   - {ticker}: {count} facts without duration")
+
+print("\n3. FISCAL YEAR VARIATIONS:")
+print("   Companies have different fiscal year ends:")
+fiscal_year_ends = {
+    'AAPL': 'September',
+    'MSFT': 'June', 
+    'WMT': 'January',
+    'BAC': 'December',
+    'JNJ': 'December',
+    'TSLA': 'December',
+    'AMZN': 'December',
+    'XOM': 'December'
+}
+for ticker, month in fiscal_year_ends.items():
+    print(f"   - {ticker}: Fiscal year ends in {month}")
+
+print("\n4. MULTI-YEAR FACTS:")
+print("   Some companies might report multi-year aggregates")
+for ticker in results:
+    if "Multi-year" in results[ticker]:
+        count = len(results[ticker]["Multi-year"])
+        print(f"   - {ticker}: {count} multi-year facts found")
+
+print("\nRECOMMENDATIONS:")
+print("1. The 300-day threshold works for most companies")
+print("2. Consider 350-380 days as 'normal' annual range")
+print("3. Handle edge cases:")
+print("   - No duration: Could check fiscal_period or use other heuristics")
+print("   - Multi-year: Filter out (duration > 400)")
+print("   - Semi-annual: Rare but should be filtered for annual=True")
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_specific_header_detection.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/test_specific_header_detection.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+"""
+Test specific header detection logic on the target table rows.
+"""
+
+import sys
+sys.path.insert(0, '/Users/dwight/PycharmProjects/edgartools')
+
+import re
+from edgar.documents.parser import HTMLParser
+from edgar.documents.config import ParserConfig
+from edgar.documents.table_nodes import TableNode
+
+def test_header_detection_logic():
+    print("🔍 TESTING SPECIFIC HEADER DETECTION LOGIC")
+    print("=" * 50)
+    
+    try:
+        with open('/Users/dwight/PycharmProjects/edgartools/data/html/MSFT.10-K.html', 'r') as f:
+            html_content = f.read()
+        
+        # Parse document
+        config = ParserConfig()
+        parser = HTMLParser(config)
+        document = parser.parse(html_content)
+        
+        # Find target table
+        target_table = None
+        def find_target(node):
+            nonlocal target_table
+            if isinstance(node, TableNode):
+                try:
+                    if "Weighted average outstanding shares" in node.text():
+                        target_table = node
+                        return
+                except:
+                    pass
+            if hasattr(node, 'children'):
+                for child in node.children:
+                    find_target(child)
+        
+        find_target(document.root)
+        
+        if not target_table:
+            print("❌ Target table not found")
+            return
+        
+        print("✅ Found target table")
+        print(f"Current status: {len(target_table.headers)} headers, {len(target_table.rows)} data rows")
+        
+        # Test our header detection logic on each of the first few rows
+        print(f"\n🔧 TESTING HEADER DETECTION ON FIRST 7 ROWS:")
+        
+        for i, row in enumerate(target_table.rows[:7]):
+            print(f"\n--- ROW {i+1} ---")
+            
+            # Get the row text
+            row_text = ' '.join(cell.text().strip() for cell in row.cells)
+            print(f"Row text: '{row_text}'")
+            
+            # Test each part of our header detection logic
+            score = 0
+            reasons = []
+            
+            # 1. Check for year patterns in the combined text
+            year_pattern = r'\b(19\d{2}|20\d{2})\b'
+            years_found = re.findall(year_pattern, row_text)
+            if len(years_found) >= 2:
+                if 'total' not in row_text.lower()[:20]:
+                    score += 3
+                    reasons.append(f"Multiple years found: {years_found}")
+            
+            # 2. Enhanced year detection - check individual cells
+            year_cells = 0
+            date_phrases = 0
+            cell_contents = []
+            for cell in row.cells:
+                cell_text = cell.text().strip()
+                cell_contents.append(f"'{cell_text}'")
+                if cell_text:
+                    # Check for individual years
+                    if re.match(r'^\s*(19\d{2}|20\d{2})\s*$', cell_text):
+                        year_cells += 1
+                    # Check for date phrases
+                    elif 'june 30' in cell_text.lower() or 'december 31' in cell_text.lower():
+                        date_phrases += 1
+            
+            print(f"Cell contents: {cell_contents[:5]}{'...' if len(cell_contents) > 5 else ''}")
+            print(f"Year cells: {year_cells}, Date phrases: {date_phrases}")
+            
+            if year_cells >= 2 or (year_cells >= 1 and date_phrases >= 1):
+                if 'total' not in row_text.lower()[:20]:
+                    score += 4
+                    reasons.append(f"Enhanced year detection: {year_cells} year cells, {date_phrases} date phrases")
+            
+            # 3. Check for financial header patterns
+            row_text_lower = row_text.lower()
+            financial_patterns = [
+                r'year\s+ended\s+(june|december|march|september)',
+                r'(three|six|nine|twelve)\s+months?\s+ended',
+                r'\(in\s+(millions|thousands|billions)\)',
+                r'fiscal\s+year\s+ended'
+            ]
+            
+            for pattern in financial_patterns:
+                if re.search(pattern, row_text_lower):
+                    score += 2
+                    reasons.append(f"Financial pattern: {pattern}")
+            
+            # 4. Check for period indicators
+            period_keywords = ['quarter', 'q1', 'q2', 'q3', 'q4', 'month', 
+                              'january', 'february', 'march', 'april', 'may', 'june',
+                              'july', 'august', 'september', 'october', 'november', 'december',
+                              'ended', 'three months', 'six months', 'nine months']
+            
+            matching_keywords = [kw for kw in period_keywords if kw in row_text_lower]
+            if matching_keywords:
+                score += 1
+                reasons.append(f"Period keywords: {matching_keywords}")
+            
+            print(f"HEADER SCORE: {score}")
+            if reasons:
+                print(f"Reasons: {', '.join(reasons)}")
+            
+            # Determine if this should be considered a header
+            should_be_header = score >= 3
+            print(f"SHOULD BE HEADER: {'YES' if should_be_header else 'NO'}")
+            
+            if should_be_header and i == 4:  # Row 5 (index 4) is our expected header
+                print("🎯 This matches our expected header row!")
+            elif should_be_header:
+                print("⚠️  This would be detected as a header but wasn't expected")
+            elif i == 4:
+                print("❌ This should be the header row but isn't being detected!")
+        
+        return target_table
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+if __name__ == "__main__":
+    test_header_detection_logic()
--- a/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/verify_fiscal_year_pattern.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/.debug/bug_408/verify_fiscal_year_pattern.py
@@ -0,0 +1,98 @@
+"""
+Verify the fiscal year pattern across companies
+"""
+
+from edgar import Company
+
+def check_fiscal_year_pattern(ticker, name):
+    """Check the relationship between fiscal_year and period_end.year"""
+    print(f"\n{name} ({ticker}):")
+    print("-" * 40)
+    
+    try:
+        company = Company(ticker)
+        facts = company.facts._facts
+        
+        # Collect FY facts with revenue
+        fy_facts = []
+        for fact in facts:
+            if fact.statement_type == 'IncomeStatement' and fact.fiscal_period == 'FY':
+                if fact.fiscal_year and fact.fiscal_year >= 2019 and fact.fiscal_year <= 2024:
+                    if 'Revenue' in str(fact.concept):
+                        if fact.period_start and fact.period_end:
+                            duration = (fact.period_end - fact.period_start).days
+                            if duration > 300 and duration < 400:  # Annual only
+                                fy_facts.append({
+                                    'fiscal_year': fact.fiscal_year,
+                                    'period_end': fact.period_end,
+                                    'period_end_year': fact.period_end.year,
+                                    'difference': fact.fiscal_year - fact.period_end.year
+                                })
+        
+        # Deduplicate and sort
+        unique_facts = {}
+        for f in fy_facts:
+            key = (f['fiscal_year'], f['period_end'])
+            unique_facts[key] = f
+        
+        # Analyze the pattern
+        differences = set()
+        for f in unique_facts.values():
+            differences.add(f['difference'])
+            
+        print(f"  Fiscal Year vs Period End Year differences: {sorted(differences)}")
+        
+        # Show examples
+        print("\n  Examples:")
+        for f in sorted(unique_facts.values(), key=lambda x: x['fiscal_year'], reverse=True)[:5]:
+            print(f"    FY {f['fiscal_year']} → ends {f['period_end']} (diff: {f['difference']} years)")
+            
+        # What's the consistent pattern?
+        if len(differences) == 1:
+            diff = list(differences)[0]
+            print(f"\n  ✓ Consistent pattern: fiscal_year = period_end.year + {diff}")
+        else:
+            print(f"\n  ⚠️  Multiple patterns found: {differences}")
+            
+        return differences
+        
+    except Exception as e:
+        print(f"  Error: {e}")
+        return set()
+
+# Test various companies
+companies = [
+    ('AAPL', 'Apple (Sept year-end)'),
+    ('MSFT', 'Microsoft (June year-end)'),
+    ('WMT', 'Walmart (Jan year-end)'),
+    ('AMZN', 'Amazon (Dec year-end)'),
+    ('JNJ', 'J&J (Dec year-end)'),
+    ('TSLA', 'Tesla (Dec year-end)'),
+]
+
+all_differences = set()
+for ticker, name in companies:
+    diffs = check_fiscal_year_pattern(ticker, name)
+    all_differences.update(diffs)
+
+print("\n" + "="*60)
+print("CONCLUSION")
+print("="*60)
+
+if len(all_differences) == 1:
+    diff = list(all_differences)[0]
+    print(f"\n✓ ALL companies show the same pattern:")
+    print(f"  fiscal_year = period_end.year + {diff}")
+    print("\nThis appears to be how the SEC Facts API structures the data!")
+    print("The 'fiscal_year' field indicates when the data was filed/reported,")
+    print("not the actual year of the fiscal period.")
+else:
+    print(f"\n⚠️  Different companies show different patterns: {all_differences}")
+    print("The most common pattern seems to be a 2-year difference.")
+    
+print("\nIMPLICATION FOR OUR FIX:")
+print("We should NOT require fiscal_year == period_end.year")
+print("Instead, we should:")
+print("1. Use duration (>300 days) as the primary filter")
+print("2. Match facts where fiscal_year is within 0-3 years of period_end.year")
+print("3. Deduplicate by keeping the latest period_end for each actual year")
--- a/venv/lib/python3.10/site-packages/edgar/entity/init.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/init.py
@@ -0,0 +1,99 @@
+"""
+Entity module for the EdgarTools library.
+
+This module provides the Entity, Company, Fund, and related classes
+for working with SEC filers.
+"""
+# Import for backward compatibility
+from edgar.entity.constants import COMPANY_FORMS
+from edgar.entity.core import (
+    Company,
+    Entity,
+    SecFiler,
+    get_company,
+    get_entity,
+    public_companies,
+)
+from edgar.entity.utils import has_company_filings, normalize_cik
+from edgar.entity.data import Address, CompanyData, EntityData
+from edgar.entity.entity_facts import (
+    EntityFacts,
+    NoCompanyFactsFound,
+    get_company_facts,
+)
+from edgar.entity.filings import EntityFiling, EntityFilings
+from edgar.entity.search import CompanySearchIndex, CompanySearchResults, find_company
+from edgar.entity.submissions import (
+    create_company_from_file,
+    create_entity_from_file,
+    create_entity_from_submissions_json,
+    download_entity_submissions_from_sec,
+    get_entity_submissions,
+)
+from edgar.entity.tickers import find_cik, find_ticker, get_cik_lookup_data, get_company_tickers, get_icon_from_ticker, get_ticker_to_cik_lookup
+
+# Import from the funds package instead of entity.funds
+from edgar.funds import FundData, FundSeries
+
+# Aliases for backward compatibility
+CompanyFiling = EntityFiling
+CompanyFilings = EntityFilings
+
+__all__ = [
+    # Core classes
+    'SecFiler',
+    'Entity',
+    'Company',
+    'FundSeries',
+
+    # Data classes
+    'EntityData',
+    'CompanyData',
+    'FundData',
+    'Address',
+
+    # Filing classes
+    'EntityFiling',
+    'EntityFilings',
+    'EntityFacts',
+
+    # Factory functions
+    'get_entity',
+    'get_company',
+    'public_companies',
+
+    # Search functions
+    'find_company',
+    'CompanySearchResults',
+    'CompanySearchIndex',
+
+    # Ticker functions
+    'get_icon_from_ticker',
+    'get_company_tickers',
+    'get_ticker_to_cik_lookup',
+    'get_cik_lookup_data',
+    'find_cik',
+    'find_ticker',
+
+    # Submission functions
+    'get_entity_submissions',
+    'download_entity_submissions_from_sec',
+    'create_entity_from_submissions_json',
+    'create_entity_from_file',
+    'create_company_from_file',
+
+    # Fact functions
+    'get_company_facts',
+
+    # Exceptions
+    'NoCompanyFactsFound',
+
+    # Constants and utilities
+    'COMPANY_FORMS',
+    'has_company_filings',
+    'normalize_cik',
+
+    # Backwards compatibility
+    'CompanyFiling',
+    'CompanyFilings',
+]
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/init.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/init.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/constants.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/constants.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/core.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/core.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/data.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/data.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/enhanced_statement.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/enhanced_statement.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/entity_facts.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/entity_facts.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/filings.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/filings.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/mappings_loader.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/mappings_loader.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/models.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/models.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/parser.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/parser.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/query.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/query.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/search.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/search.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/statement.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/statement.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/statement_builder.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/statement_builder.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/submissions.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/submissions.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/terminal_styles.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/terminal_styles.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/tickers.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/tickers.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/tools.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/tools.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/unit_handling.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/unit_handling.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/pycache/utils.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/pycache/utils.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/constants.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/constants.py
@@ -0,0 +1,80 @@
+"""
+Constants for entity classification and form types.
+
+This module contains constants used throughout the entity package for
+determining entity types and form classifications.
+"""
+
+# Performance optimization: use set for O(1) lookups
+COMPANY_FORMS = {
+    # Registration statements
+    "S-1", "S-3", "S-4", "S-8", "S-11",
+    # Foreign issuers registration forms
+    "F-1", "F-3", "F-4", "F-6", "F-7", "F-8", "F-9", "F-10", "F-80",
+    # Foreign form amendments and effectiveness
+    "F-6EF", "F-6 POS", "F-3ASR", "F-4MEF", "F-10EF", "F-3D", "F-3MEF",
+    # Exchange Act registration
+    "10-12B", "10-12G",
+    # Periodic reports
+    "10-K", "10-Q", "10-K/A", "10-Q/A",
+    "20-F", "40-F",  # Foreign issuers
+    "11-K",  # Employee benefit plans
+    # Current reports
+    "8-K", "6-K",
+    # Proxy materials
+    "DEF 14A", "PRE 14A", "DEFA14A", "DEFM14A",
+    # Other corporate filings
+    "424B1", "424B2", "424B3", "424B4", "424B5",
+    "ARS", "NT 10-K", "NT 10-Q",
+    "SC 13D", "SC 13G", "SC TO-I", "SC TO-T",
+    "SD", "PX14A6G",
+    # Specialized corporate filings
+    "N-CSR", "N-Q", "N-MFP", "N-CEN",
+    "X-17A-5", "17-H",
+    "TA-1", "TA-2",
+    "ATS-N",
+    # Corporate disclosures
+    "EFFECT", "FWP", "425", "CB",
+    "POS AM", "CORRESP", "UPLOAD"
+}
+
+# Fund-specific form types
+FUND_FORMS = {
+    # Investment company registration
+    "N-1A", "N-2", "N-3", "N-4", "N-5", "N-6",
+    # Investment company periodic reports
+    "N-CSR", "N-Q", "N-CEN", "N-MFP",
+    # Investment adviser forms
+    "ADV", "ADV-E", "ADV-H", "ADV-NR", "ADV-W",
+    # Private fund forms
+    "PF", "CPO-PQR", "CTA-PR",
+    # Municipal advisor forms
+    "MA", "MA-I", "MA-NR", "MA-W",
+    # Investment company shareholder reports
+    "N-30B-2", "N-30D", "485APOS", "485BPOS",
+    # Variable insurance products
+    "N-3/A", "N-4/A", "N-6/A",
+    # Closed-end funds
+    "N-2/A", "N-5/A",
+    # Business development companies
+    "N-6F", "N-54A", "N-54C",
+    # Exchange-traded funds
+    "N-1A/A",
+    # Portfolio holdings
+    "NPORT-P", "NPORT-EX", "N-PORT", "N-PORT/A"
+}
+
+# Individual/insider forms
+INDIVIDUAL_FORMS = {
+    # Ownership reports
+    "3", "4", "5", "3/A", "4/A", "5/A",
+    # Beneficial ownership
+    "SC 13D", "SC 13G", "SC 13D/A", "SC 13G/A",
+    # Tender offer schedules
+    "SC TO-I", "SC TO-C", "SC TO-T",
+    # Investment adviser representatives
+    "ADV-E", "DRS"
+}
+
+# All known form types for validation
+ALL_FORM_TYPES = COMPANY_FORMS | FUND_FORMS | INDIVIDUAL_FORMS
--- a/venv/lib/python3.10/site-packages/edgar/entity/core.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/core.py
@@ -0,0 +1,923 @@
+"""
+Core entity classes for working with SEC filings.
+
+This module provides the main classes for interacting with SEC entities,
+including companies, funds, and individuals.
+"""
+from abc import ABC, abstractmethod
+from functools import cached_property
+from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple, TypeVar, Union
+
+if TYPE_CHECKING:
+    import pyarrow
+
+    from edgar.entity.enhanced_statement import StructuredStatement
+    from edgar.entity.filings import EntityFilings
+    from edgar.enums import FormType, PeriodType
+
+from rich import box
+from rich.columns import Columns
+from rich.console import Group
+from rich.padding import Padding
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+from edgar._filings import Filings
+from edgar.company_reports import TenK, TenQ
+from edgar.entity.data import Address, CompanyData, EntityData
+from edgar.entity.entity_facts import EntityFacts, NoCompanyFactsFound, get_company_facts
+from edgar.entity.tickers import get_icon_from_ticker
+from edgar.financials import Financials
+from edgar.formatting import datefmt, reverse_name
+from edgar.reference.tickers import find_cik
+from edgar.richtools import Docs, repr_rich
+
+if TYPE_CHECKING:
+    from edgar.enums import FormType
+
+# Import constants and utilities from separate modules
+from edgar.entity.constants import COMPANY_FORMS
+from edgar.entity.utils import has_company_filings, normalize_cik
+
+# Type variables for better type annotations
+T = TypeVar('T')
+
+__all__ = [
+    'SecFiler',
+    'Entity',
+    'Company',
+    'EntityData',
+    'CompanyData',
+    'get_entity',
+    'get_company',
+    'NoCompanyFactsFound',
+    'has_company_filings',
+    'COMPANY_FORMS',
+]
+
+class SecFiler(ABC):
+    """
+    Abstract base class for all SEC filing entities.
+
+    This is the root of the entity hierarchy and defines the common interface
+    that all entity types must implement.
+    """
+
+    @abstractmethod
+    def get_filings(self, **kwargs) -> Filings:
+        """Get filings for this entity."""
+        pass
+
+    @abstractmethod
+    def get_facts(self) -> Optional[EntityFacts]:
+        """Get structured facts about this entity."""
+        pass
+
+    @property
+    @abstractmethod
+    def cik(self) -> int:
+        """Get the CIK number for this entity."""
+        pass
+
+    @property
+    @abstractmethod
+    def data(self) -> 'EntityData':
+        """Get detailed data for this entity."""
+        pass
+
+
+class Entity(SecFiler):
+    """
+    Represents any entity that files with the SEC.
+
+    This is the base concrete implementation that can be used directly
+    or specialized for specific entity types.
+    """
+
+    def __init__(self, cik_or_identifier: Union[str, int]):
+        # If it's a ticker, convert to CIK first
+        if isinstance(cik_or_identifier, str) and not cik_or_identifier.isdigit():
+            cik = find_cik(cik_or_identifier)
+            if cik is None:
+                self._cik = -999999999
+            else:
+                self._cik = cik
+        else:
+            self._cik = normalize_cik(cik_or_identifier)
+
+        self._data = None
+
+    @property
+    def cik(self) -> int:
+        """Get the CIK number for this entity."""
+        return self._cik
+
+    @property
+    def name(self):
+        """Get the name of the company."""
+        if hasattr(self.data, 'name'):
+            return self.data.name
+        return None
+
+    @cached_property
+    def display_name(self) -> str:
+        """Reverse the name if it is a company"""
+        if self.is_company:
+            return self.name
+        return reverse_name(self.name)
+
+    @cached_property
+    def data(self) -> 'EntityData':
+        """Get detailed data for this entity."""
+        if self._data is None:
+            # Import locally to avoid circular imports
+            from edgar.entity.submissions import get_entity_submissions
+
+            # get_entity_submissions returns the EntityData directly
+            entity_data = get_entity_submissions(self.cik)
+
+            if entity_data:
+                self._data = entity_data
+                self._data._not_found = False
+            else:
+                # Instead of raising an error, create a default EntityData
+                #log.warning(f"Could not find entity data for CIK {self.cik}, using placeholder data")
+                from edgar.entity.data import create_default_entity_data
+                self._data = create_default_entity_data(self.cik)
+                self._data._not_found = True
+        return self._data
+
+    def mailing_address(self) -> Optional[Address]:
+        """Get the mailing address of the entity."""
+        if hasattr(self.data, 'mailing_address') and self.data.mailing_address:
+            return self.data.mailing_address
+
+    def business_address(self) -> Optional[Address]:
+        """Get the business address of the entity."""
+        if hasattr(self.data, 'business_address') and self.data.business_address:
+            return self.data.business_address
+
+
+    @property
+    def not_found(self) -> bool:
+        """
+        Check if the entity data was not found.
+
+        Returns:
+            True if the entity data could not be found, False otherwise
+        """
+        if not hasattr(self, '_data') or self._data is None:
+            # We haven't loaded the data yet, so we don't know if it's not found
+            # Loading the data will set the not_found flag
+            _ = self.data
+
+        return getattr(self._data, '_not_found', False)
+
+    @property
+    def is_company(self) -> bool:
+        """
+        Check if this entity is a company.
+
+        Returns:
+            True if the entity is a company, False otherwise
+        """
+        return self.data.is_company
+
+    @property
+    def is_individual(self) -> bool:
+        """
+        Check if this entity is an individual.
+
+        Returns:
+            True if the entity is an individual, False otherwise
+        """
+        return not self.is_company
+
+
+    def get_filings(self, 
+                   *,
+                   year: Union[int, List[int]] = None,
+                   quarter: Union[int, List[int]] = None,
+                   form: Union[str, 'FormType', List[Union[str, 'FormType']]] = None,
+                   accession_number: Union[str, List] = None,
+                   file_number: Union[str, List] = None,
+                   filing_date: Union[str, Tuple[str, str]] = None,
+                   date: Union[str, Tuple[str, str]] = None,
+                   amendments: bool = True,
+                   is_xbrl: bool = None,
+                   is_inline_xbrl: bool = None,
+                   sort_by: Union[str, List[Tuple[str, str]]] = None,
+                   trigger_full_load: bool = True) -> 'EntityFilings':
+        """
+        Get the entity's filings and optionally filter by multiple criteria.
+
+        This method has a special behavior for loading filings. When first called,
+        it only loads the most recent filings. If trigger_full_load=True, it will 
+        automatically fetch all historical filings from the SEC (potentially making 
+        multiple API calls) as needed.
+
+        Args:
+            year: The year or list of years to filter by (e.g. 2023, [2022, 2023])
+            quarter: The quarter or list of quarters to filter by (1-4, e.g. 4, [3, 4])
+            form: The form type (e.g. FormType.ANNUAL_REPORT, '10-K', or ['10-Q', '10-K'])
+            accession_number: The accession number that identifies a filing
+            file_number: The file number e.g. 001-39504
+            filing_date: Filter by filing date (YYYY-MM-DD or range)
+            date: Alias for filing_date
+            amendments: Whether to include amendments (default: True)
+            is_xbrl: Whether the filing is XBRL
+            is_inline_xbrl: Whether the filing is Inline XBRL
+            sort_by: Sort criteria
+            trigger_full_load: Whether to load all historical filings if not already loaded
+
+        Returns:
+            Filtered filings matching the criteria
+        """
+        # Simply delegate to the EntityData implementation
+        # This preserves the lazy-loading behavior while keeping the API clean
+        return self.data.get_filings(
+            year=year,
+            quarter=quarter,
+            form=form,
+            accession_number=accession_number,
+            file_number=file_number,
+            filing_date=filing_date or date,
+            amendments=amendments,
+            is_xbrl=is_xbrl,
+            is_inline_xbrl=is_inline_xbrl,
+            sort_by=sort_by,
+            trigger_full_load=trigger_full_load
+        )
+
+    def get_facts(self, period_type: Optional[Union[str, 'PeriodType']] = None) -> Optional[EntityFacts]:
+        """
+        Get structured facts about this entity.
+
+        Args:
+            period_type: Optional filter by period type. Can be PeriodType enum
+                        or string ('annual', 'quarterly', 'monthly').
+
+        Returns:
+            EntityFacts object, optionally filtered by period type
+        """
+        try:
+            facts = get_company_facts(self.cik)
+            if facts and period_type:
+                # Apply period type filtering to the facts
+                return facts.filter_by_period_type(period_type)
+            return facts
+        except NoCompanyFactsFound:
+            return None
+
+    def get_structured_statement(self, 
+                                statement_type: str,
+                                fiscal_year: Optional[int] = None,
+                                fiscal_period: Optional[str] = None,
+                                use_canonical: bool = True,
+                                include_missing: bool = False) -> Optional['StructuredStatement']:
+        """
+        Get a hierarchically structured financial statement.
+
+        This method uses learned canonical structures to build complete financial
+        statements with proper hierarchy and relationships, filling in missing
+        concepts when requested.
+
+        Args:
+            statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', 'CashFlow')
+            fiscal_year: Fiscal year to retrieve (defaults to latest)
+            fiscal_period: Fiscal period ('FY', 'Q1', 'Q2', 'Q3', 'Q4')
+            use_canonical: Use canonical structure for organization (recommended)
+            include_missing: Include placeholders for missing canonical concepts
+
+        Returns:
+            StructuredStatement with hierarchical organization or None if no data
+
+        Example:
+            >>> company = Company('AAPL')
+            >>> stmt = company.get_structured_statement('IncomeStatement', 2024, 'Q4')
+            >>> print(stmt.get_hierarchical_display())
+        """
+        from edgar.entity.statement_builder import StatementBuilder
+
+        facts_data = self.get_facts()
+        if not facts_data:
+            return None
+
+        # Get all facts
+        all_facts = facts_data.get_all_facts()
+        if not all_facts:
+            return None
+
+        # Build the statement
+        builder = StatementBuilder(cik=str(self.cik))
+        structured_stmt = builder.build_statement(
+            facts=all_facts,
+            statement_type=statement_type,
+            fiscal_year=fiscal_year,
+            fiscal_period=fiscal_period,
+            use_canonical=use_canonical,
+            include_missing=include_missing
+        )
+
+        # Add company metadata
+        structured_stmt.company_name = self.name
+
+        return structured_stmt
+
+    def latest(self, form: str, n=1):
+        """Get the latest filing(s) for a given form."""
+        return self.get_filings(form=form, trigger_full_load=False).latest(n)
+
+    def __str__(self):
+        if hasattr(self, 'data'):
+            return f"Entity({self.data.name} [{self.cik}])"
+        return f"Entity(CIK={self.cik})"
+
+    def __rich__(self):
+        return self.data.__rich__()
+
+    def __repr__(self):
+        return repr_rich(self.__rich__())
+
+    def __bool__(self):
+        """
+        Allow truthiness check for entities.
+
+        Returns False if the entity doesn't exist (has a sentinel CIK value or not_found is True).
+        This enables code patterns like: `if company: do_something()`
+        """
+        # Check for sentinel CIK value (-999999999) or not_found flag
+        return self.cik != -999999999 and not self.not_found
+
+
+class Company(Entity):
+    """
+    Represents a public company that files with the SEC.
+
+    Provides company-specific functionality like financial statements,
+    ticker lookup, etc.
+    """
+
+    def __init__(self, cik_or_ticker: Union[str, int]):
+
+
+        super().__init__(cik_or_ticker)
+
+    @property
+    def data(self) -> 'EntityData':  # We'll return the base type to simplify
+        """Get detailed data for this company."""
+        # For simplicity, return the base EntityData
+        # Type checkers will still see this as a CompanyData due to the annotation
+        return super().data
+
+    @property
+    def tickers(self):
+        """Get all ticker symbols for this company."""
+        if hasattr(self.data, 'tickers'):
+            return self.data.tickers
+        return []
+
+    def get_ticker(self) -> Optional[str]:
+        """Get the primary ticker symbol for this company."""
+        if self.data and self.data.tickers and len(self.data.tickers) > 0:
+            return self.data.tickers[0]
+        return None
+
+    def get_exchanges(self ):
+        """Get all exchanges for this company."""
+        if hasattr(self.data, 'exchanges'):
+            return self.data.exchanges
+        return []
+
+    def get_financials(self) -> Optional[Financials]:
+        """Get financial statements for this company."""
+        tenk_filing = self.latest_tenk
+        if tenk_filing is not None:
+            return tenk_filing.financials
+        return None
+
+    def get_quarterly_financials(self) -> Optional[Financials]:
+        """Get quarterly financial statements for this company."""
+        tenq_filing = self.latest_tenq
+        if tenq_filing is not None:
+            return tenq_filing.financials
+        return None
+
+    @property
+    def fiscal_year_end(self):
+        """Get the fiscal year end date for this company."""
+        if hasattr(self.data, 'fiscal_year_end'):
+            return self.data.fiscal_year_end
+        return None
+
+    @property
+    def sic(self):
+        """Get the SIC code for this company."""
+        if hasattr(self.data, 'sic'):
+            return self.data.sic
+        return None
+
+    @property
+    def industry(self):
+        """Get the industry description for this company."""
+        if hasattr(self.data, 'sic_description'):
+            return self.data.sic_description
+        return None
+
+    @property
+    def latest_tenk(self) -> Optional[TenK]:
+        """Get the latest 10-K filing for this company."""
+        latest_10k = self.get_filings(form='10-K', trigger_full_load=False).latest()
+        if latest_10k is not None:
+            return latest_10k.obj()
+        return None
+
+    @property
+    def latest_tenq(self) -> Optional[TenQ]:
+        """Get the latest 10-Q filing for this company."""
+        latest_10q = self.get_filings(form='10-Q', trigger_full_load=False).latest()
+        if latest_10q is not None:
+            return latest_10q.obj()
+        return None
+
+    def get_icon(self):
+        return get_icon_from_ticker(self.tickers[0])
+
+    # Enhanced financial data properties and methods
+    @property
+    def facts(self) -> Optional[EntityFacts]:
+        """Get enhanced structured facts about this company."""
+        return self.get_facts()
+
+    @property
+    def docs(self):
+        """Access comprehensive Company API documentation."""
+        return Docs(self)
+
+    @property
+    def public_float(self) -> Optional[float]:
+        """Get the public float value for this company."""
+        facts = self.facts
+        if facts:
+            return facts.public_float
+        return None
+
+    @property
+    def shares_outstanding(self) -> Optional[float]:
+        """Get the shares outstanding for this company.""" 
+        facts = self.facts
+        if facts:
+            return facts.shares_outstanding
+        return None
+
+    def income_statement(self, periods: int = 4, annual: bool = True, as_dataframe: bool = False, concise_format: bool = False):
+        """
+        Get income statement data for this company.
+
+        Args:
+            periods: Number of periods to retrieve
+            annual: If True, prefer annual periods; if False, get quarterly
+            as_dataframe: If True, return DataFrame; if False, return MultiPeriodStatement
+            concise_format: If True, display values as $1.0B, if False display as $1,000,000,000
+
+        Returns:
+            MultiPeriodStatement or DataFrame with income statement data, or None if not available
+        """
+        facts = self.facts
+        if facts:
+            try:
+                return facts.income_statement(periods=periods, annual=annual, as_dataframe=as_dataframe, concise_format=concise_format)
+            except Exception as e:
+                from edgar.core import log
+                log.debug(f"Error getting income statement for {self.name}: {e}")
+        return None
+
+    def balance_sheet(self, periods: int = 4, annual: bool = True, as_dataframe: bool = False, concise_format: bool = False):
+        """
+        Get balance sheet data for this company.
+
+        Args:
+            periods: Number of periods to retrieve
+            annual: If True, prefer annual periods; if False, get quarterly
+            as_dataframe: If True, return DataFrame; if False, return MultiPeriodStatement
+            concise_format: If True, display values as $1.0B, if False display as $1,000,000,000
+
+        Returns:
+            MultiPeriodStatement or DataFrame with balance sheet data, or None if not available
+        """
+        facts = self.facts
+        if facts:
+            try:
+                return facts.balance_sheet(periods=periods, annual=annual, as_dataframe=as_dataframe, concise_format=concise_format)
+            except Exception as e:
+                from edgar.core import log
+                log.debug(f"Error getting balance sheet for {self.name}: {e}")
+        return None
+
+    def cash_flow(self, periods: int = 4, annual: bool = True, as_dataframe: bool = False, concise_format: bool = False):
+        """
+        Get cash flow statement data for this company.
+
+        Args:
+            periods: Number of periods to retrieve
+            annual: If True, prefer annual periods; if False, get quarterly
+            as_dataframe: If True, return DataFrame; if False, return MultiPeriodStatement
+            concise_format: If True, display values as $1.0B, if False display as $1,000,000,000
+
+        Returns:
+            MultiPeriodStatement or DataFrame with cash flow data, or None if not available
+        """
+        facts = self.facts
+        if facts:
+            try:
+                return facts.cash_flow(periods=periods, annual=annual, as_dataframe=as_dataframe, concise_format=concise_format)
+            except Exception as e:
+                from edgar.core import log
+                log.debug(f"Error getting cash flow for {self.name}: {e}")
+        return None
+
+    def __str__(self):
+        ticker = self.get_ticker()
+        ticker_str = f" - {ticker}" if ticker else ""
+        if hasattr(self, 'data'):
+            return f"Company({self.data.name} [{self.cik}]{ticker_str})"
+        return f"Company(CIK={self.cik}{ticker_str})"
+
+    def __repr__(self):
+        # Delegate to the rich representation for consistency with the old implementation
+        return repr_rich(self.__rich__())
+
+    def text(self, max_tokens: int = 2000) -> str:
+        """
+        Get AI-optimized plain text representation.
+
+        Uses Markdown-KV format (60.7% accuracy, 25% fewer tokens than JSON) optimized
+        for LLM consumption. For terminal display, use print(company) instead.
+
+        Research basis: improvingagents.com/blog/best-input-data-format-for-llms
+
+        Args:
+            max_tokens: Approximate token limit using 4 chars/token heuristic (default: 2000)
+
+        Returns:
+            Markdown-formatted key-value representation optimized for LLMs
+
+        Example:
+            >>> from edgar import Company
+            >>> company = Company("AAPL")
+            >>> text = company.text()
+            >>> print(text)
+            **Company:** Apple Inc.
+            **CIK:** 0000320193
+            **Ticker:** AAPL
+            **Exchange:** NASDAQ
+            ...
+        """
+        lines = []
+
+        # Basic identification
+        lines.append(f"**Company:** {self.data.name}")
+        lines.append(f"**CIK:** {str(self.cik).zfill(10)}")
+
+        # Ticker and exchange
+        ticker = self.get_ticker()
+        if ticker:
+            lines.append(f"**Ticker:** {ticker}")
+
+        if hasattr(self.data, 'exchanges') and self.data.exchanges:
+            exchanges_str = ", ".join(self.data.exchanges) if isinstance(self.data.exchanges, (list, tuple)) else str(self.data.exchanges)
+            lines.append(f"**Exchange:** {exchanges_str}")
+
+        # Industry classification
+        if hasattr(self.data, 'sic') and self.data.sic:
+            sic_desc = getattr(self.data, 'sic_description', '')
+            if sic_desc:
+                lines.append(f"**Industry:** {sic_desc} (SIC {self.data.sic})")
+            else:
+                lines.append(f"**SIC Code:** {self.data.sic}")
+
+        # Entity type
+        if hasattr(self.data, 'entity_type') and self.data.entity_type:
+            lines.append(f"**Entity Type:** {self.data.entity_type.title()}")
+
+        # Category
+        if hasattr(self.data, 'category') and self.data.category:
+            lines.append(f"**Category:** {self.data.category}")
+
+        # Fiscal year end
+        if hasattr(self.data, 'fiscal_year_end') and self.data.fiscal_year_end:
+            lines.append(f"**Fiscal Year End:** {self._format_fiscal_year_date(self.data.fiscal_year_end)}")
+
+        # Business address
+        if hasattr(self.data, 'business_address') and self.data.business_address:
+            addr = self.data.business_address
+            lines.append("")
+            lines.append("**Business Address:**")
+            if hasattr(addr, 'street1') and addr.street1:
+                lines.append(f"{addr.street1}")
+            if hasattr(addr, 'street2') and addr.street2:
+                lines.append(f"{addr.street2}")
+            if hasattr(addr, 'city') and hasattr(addr, 'state_or_country') and addr.city and addr.state_or_country:
+                zip_code = f" {addr.zip_code}" if hasattr(addr, 'zip_code') and addr.zip_code else ""
+                lines.append(f"{addr.city}, {addr.state_or_country}{zip_code}")
+
+        # Contact information
+        if hasattr(self.data, 'phone') and self.data.phone:
+            lines.append(f"**Phone:** {self.data.phone}")
+
+        # Mailing address (if different from business address)
+        if hasattr(self.data, 'mailing_address') and self.data.mailing_address:
+            mail_addr = self.data.mailing_address
+            if hasattr(self.data, 'business_address'):
+                # Only include if different
+                business_addr = self.data.business_address
+                if (not hasattr(business_addr, 'street1') or
+                    mail_addr.street1 != business_addr.street1):
+                    lines.append("")
+                    lines.append("**Mailing Address:**")
+                    if hasattr(mail_addr, 'street1') and mail_addr.street1:
+                        lines.append(f"{mail_addr.street1}")
+                    if hasattr(mail_addr, 'city') and hasattr(mail_addr, 'state_or_country'):
+                        zip_code = f" {mail_addr.zip_code}" if hasattr(mail_addr, 'zip_code') and mail_addr.zip_code else ""
+                        lines.append(f"{mail_addr.city}, {mail_addr.state_or_country}{zip_code}")
+
+        text = "\n".join(lines)
+
+        # Token limiting (4 chars/token heuristic)
+        max_chars = max_tokens * 4
+        if len(text) > max_chars:
+            text = text[:max_chars] + "\n\n[Truncated for token limit]"
+
+        return text
+
+    def __rich__(self):
+        """Creates a rich representation of the company with detailed information."""
+
+        # The title of the panel
+        ticker = self.get_ticker()
+        if self.data.is_company:
+            entity_title = Text.assemble("🏢 ",
+                                  (self.data.name, "bold green"),
+                                  " ",
+                                  (ticker if ticker else "", "bold yellow")
+                                  )
+        else:
+            entity_title = Text.assemble("👤", (self.data.name, "bold green"))
+
+        # Primary Information Table
+        main_info = Table(box=box.SIMPLE_HEAVY, show_header=False, padding=(0, 1))
+        main_info.add_column("Row", style="")  # Single column for the entire row
+
+        row_parts = []
+        row_parts.extend([Text("CIK", style="grey60"), Text(str(self.cik), style="bold deep_sky_blue3")])
+        if hasattr(self.data, 'entity_type') and self.data.entity_type:
+            if self.data.is_individual:
+                row_parts.extend([Text("Type", style="grey60"),
+                              Text("Individual", style="bold yellow")])
+            else:
+                row_parts.extend([Text("Type", style="grey60"),
+                              Text(self.data.entity_type.title(), style="bold yellow"),
+                              Text(self._get_operating_type_emoticon(self.data.entity_type), style="bold yellow")])
+        main_info.add_row(*row_parts)
+
+        # Detailed Information Table
+        details = Table(box=box.SIMPLE, show_header=True, padding=(0, 1))
+        details.add_column("Category")
+        details.add_column("Industry")
+        details.add_column("Fiscal Year End")
+
+        details.add_row(
+            getattr(self.data, 'category', '-') or "-",
+            f"{getattr(self.data, 'sic', '')}: {getattr(self.data, 'sic_description', '')}" if hasattr(self.data, 'sic') and self.data.sic else "-",
+            self._format_fiscal_year_date(getattr(self.data, 'fiscal_year_end', '')) if hasattr(self.data, 'fiscal_year_end') and self.data.fiscal_year_end else "-"
+        )
+
+        # Combine main_info and details in a single panel
+        if self.data.is_company:
+            basic_info_renderables = [main_info, details]
+        else:
+            basic_info_renderables = [main_info]
+        basic_info_panel = Panel(
+            Group(*basic_info_renderables),
+            title="📋 Entity",
+            border_style="grey50"
+        )
+
+        # Trading Information
+        if hasattr(self.data, 'tickers') and hasattr(self.data, 'exchanges') and self.data.tickers and self.data.exchanges:
+            trading_info = Table(box=box.SIMPLE, show_header=True, padding=(0, 1))
+            trading_info.add_column("Exchange")
+            trading_info.add_column("Symbol", style="bold yellow")
+
+            for exchange, ticker in zip(self.data.exchanges, self.data.tickers, strict=False):
+                trading_info.add_row(exchange, ticker)
+
+            trading_panel = Panel(
+                trading_info,
+                title="📈 Exchanges",
+                border_style="grey50"
+            )
+        else:
+            trading_panel = Panel(
+                Text("No trading information available", style="grey58"),
+                title="📈 Trading Information",
+                border_style="grey50"
+            )
+
+        # Contact Information
+        contact_info = Table(box=box.SIMPLE, show_header=False, padding=(0, 1))
+        contact_info.add_column("Label", style="bold grey70")
+        contact_info.add_column("Value")
+
+        has_contact_info = any([
+            hasattr(self.data, 'phone') and self.data.phone, 
+            hasattr(self.data, 'website') and self.data.website, 
+            hasattr(self.data, 'investor_website') and self.data.investor_website
+        ])
+
+        if hasattr(self.data, 'website') and self.data.website:
+            contact_info.add_row("Website", self.data.website)
+        if hasattr(self.data, 'investor_website') and self.data.investor_website:
+            contact_info.add_row("Investor Relations", self.data.investor_website)
+        if hasattr(self.data, 'phone') and self.data.phone:
+            contact_info.add_row("Phone", self.data.phone)
+
+        # Three-column layout for addresses and contact info
+        contact_renderables = []
+        if hasattr(self.data, 'business_address') and not self.data.business_address.empty:
+            contact_renderables.append(Panel(
+                Text(str(self.data.business_address)),
+                title="🏢 Business Address",
+                border_style="grey50"
+            ))
+        if hasattr(self.data, 'mailing_address') and not self.data.mailing_address.empty:
+            contact_renderables.append(Panel(
+                Text(str(self.data.mailing_address)),
+                title="📫 Mailing Address",
+                border_style="grey50"
+            ))
+        if has_contact_info:
+            contact_renderables.append(Panel(
+                contact_info,
+                title="📞 Contact Information",
+                border_style="grey50"
+            ))
+
+        # Former Names Table (if any exist)
+        former_names_panel = None
+        if hasattr(self.data, 'former_names') and self.data.former_names:
+
+
+            former_names_table = Table(box=box.SIMPLE, show_header=False, padding=(0, 1))
+            former_names_table.add_column("Previous Company Names")
+            former_names_table.add_column("")  # Empty column for better spacing
+
+            for former_name in self.data.former_names:
+                from_date = datefmt(former_name['from'], '%B %Y')
+                to_date = datefmt(former_name['to'], '%B %Y')
+                former_names_table.add_row(Text(former_name['name'], style="italic"), f"{from_date} to {to_date}")
+
+            former_names_panel = Panel(
+                former_names_table,
+                title="📜 Former Names",
+                border_style="grey50"
+            )
+
+        # Combine all sections using Group
+        if self.data.is_company:
+            content_renderables = [Padding("", (1, 0, 0, 0)), basic_info_panel, trading_panel]
+            if len(contact_renderables):
+                contact_and_addresses = Columns(contact_renderables, equal=True, expand=True)
+                content_renderables.append(contact_and_addresses)
+            if former_names_panel:
+                content_renderables.append(former_names_panel)
+        else:
+            content_renderables = [Padding("", (1, 0, 0, 0)), basic_info_panel]
+            if len(contact_renderables):
+                contact_and_addresses = Columns(contact_renderables, equal=True, expand=True)
+                content_renderables.append(contact_and_addresses)
+
+        content = Group(*content_renderables)
+
+        # Create the main panel
+        return Panel(
+            content,
+            title=entity_title,
+            subtitle=Text.assemble(
+                ("SEC Entity Data", "dim"),
+                " • ",
+                ("company.docs", "cyan dim"),
+                (" for usage guide", "dim")
+            ),
+            border_style="grey50"
+        )
+
+    @staticmethod
+    def _get_operating_type_emoticon(entity_type: str) -> str:
+        """
+        Generate a meaningful single-width symbol based on the SEC entity type.
+        All symbols are chosen to be single-width to work well with rich borders.
+
+        Args:
+            entity_type (str): The SEC entity type (case-insensitive)
+
+        Returns:
+            str: A single-width symbol representing the entity type
+        """
+        symbols = {
+            "operating": "○",  # Circle for active operations
+            "subsidiary": "→",  # Arrow showing connection to parent
+            "inactive": "×",  # Cross for inactive
+            "holding company": "■",  # Square for solid corporate structure
+            "investment company": "$",  # Dollar for investment focus
+            "investment trust": "$",  # Dollar for investment focus
+            "shell": "□",  # Empty square for shell
+            "development stage": "∆",  # Triangle for growth/development
+            "financial services": "¢",  # Cent sign for financial services
+            "reit": "⌂",  # House symbol
+            "spv": "◊",  # Diamond for special purpose
+            "joint venture": "∞"  # Infinity for partnership
+        }
+
+        # Clean input: convert to lowercase and strip whitespace
+        cleaned_type = entity_type.lower().strip()
+
+        # Handle some common variations
+        if "investment" in cleaned_type:
+            return symbols["investment company"]
+        if "real estate" in cleaned_type or "reit" in cleaned_type:
+            return symbols["reit"]
+
+        # Return default question mark if type not found
+        return symbols.get(cleaned_type, "")
+
+    @staticmethod
+    def _format_fiscal_year_date(date_str):
+        """Format fiscal year end date in a human-readable format."""
+        if not date_str:
+            return "-"
+
+        # Dictionary of months
+        months = {
+            "01": "Jan", "02": "Feb", "03": "Mar",
+            "04": "Apr", "05": "May", "06": "Jun",
+            "07": "Jul", "08": "Aug", "09": "Sep",
+            "10": "Oct", "11": "Nov", "12": "Dec"
+        }
+
+        # Extract month and day
+        month = date_str[:2]
+        if month not in months:
+            return date_str
+
+        try:
+            day = str(int(date_str[2:]))  # Remove leading zero
+            return f"{months[month]} {day}"
+        except (ValueError, IndexError):
+            return date_str
+
+
+# Factory functions for backward compatibility
+
+def get_entity(cik_or_identifier: Union[str, int]) -> Entity:
+    """
+    Get any SEC filing entity by CIK or identifier.
+
+    Args:
+        cik_or_identifier: CIK number (as int or str) or other identifier
+
+    Returns:
+        Entity instance
+    """
+    return Entity(cik_or_identifier)
+
+
+def get_company(cik_or_ticker: Union[str, int]) -> Company:
+    """
+    Get a public company by CIK or ticker.
+
+    Args:
+        cik_or_ticker: CIK number or ticker symbol
+
+    Returns:
+        Company instance
+    """
+    return Company(cik_or_ticker)
+
+
+def public_companies() -> Iterable[Company]:
+    """
+    Iterator over all known public companies.
+
+    Returns:
+        Iterable of Company objects
+    """
+    from edgar.reference.tickers import get_cik_tickers
+
+    df = get_cik_tickers()
+    for _, row in df.iterrows():
+        c = Company(row.cik)
+        yield c
+
+
--- a/venv/lib/python3.10/site-packages/edgar/entity/data.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/data.py
@@ -0,0 +1,854 @@
+"""
+Data classes for the Entity package.
+
+This module contains classes for working with entity data, including
+addresses, facts, and other structured data from SEC filings.
+"""
+import re
+from functools import cached_property
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import pyarrow as pa
+import pyarrow.compute as pc
+
+from edgar.core import listify, log
+from edgar.dates import InvalidDateException
+from edgar.entity.filings import EntityFilings
+from edgar.filtering import filter_by_date, filter_by_form, filter_by_year_quarter
+from edgar.formatting import reverse_name
+from edgar.storage import is_using_local_storage
+
+# Module-level import cache for lazy imports
+_IMPORT_CACHE = {}
+
+
+def lazy_import(module_path):
+    """
+    Lazily import a module or attribute and cache the result to avoid repeated imports.
+
+    Args:
+        module_path: String path to the module or attribute
+
+    Returns:
+        The imported module or attribute
+    """
+    if module_path not in _IMPORT_CACHE:
+        parts = module_path.split('.')
+        if len(parts) == 1:
+            # Simple module import
+            _IMPORT_CACHE[module_path] = __import__(module_path)
+        else:
+            # Import from module (potentially nested)
+            module_name = '.'.join(parts[:-1])
+            attr_name = parts[-1]
+
+            module = __import__(module_name, fromlist=[attr_name])
+            _IMPORT_CACHE[module_path] = getattr(module, attr_name)
+
+    return _IMPORT_CACHE[module_path]
+
+
+__all__ = [
+    'Address',
+    'EntityData',
+    'CompanyData',
+    'preprocess_company',
+    'parse_entity_submissions',
+    'extract_company_filings_table',
+    'create_company_filings',
+    'create_default_entity_data'
+]
+
+
+def extract_company_filings_table(filings_json: Dict[str, Any]) -> pa.Table:
+    """
+    Extract company filings from the json response.
+
+    Args:
+        filings_json: The JSON data containing filings
+
+    Returns:
+        A PyArrow Table containing the filings data
+    """
+    # Import this here to avoid circular imports
+    from edgar.core import parse_acceptance_datetime
+
+    # Handle case of no data
+    if not filings_json.get('accessionNumber'):
+        # Create an empty table with the right schema
+        schema = pa.schema([
+            ('accession_number', pa.string()),
+            ('filing_date', pa.date32()),
+            ('reportDate', pa.string()),
+            ('acceptanceDateTime', pa.timestamp('us')),
+            ('act', pa.string()),
+            ('form', pa.string()),
+            ('fileNumber', pa.string()),
+            ('items', pa.string()),
+            ('size', pa.string()),
+            ('isXBRL', pa.string()),
+            ('isInlineXBRL', pa.string()),
+            ('primaryDocument', pa.string()),
+            ('primaryDocDescription', pa.string())
+        ])
+        return pa.Table.from_arrays([[] for _ in range(13)], schema=schema)
+    else:
+        # Convert acceptanceDateTime string to datetime
+        acceptance_datetimes = [
+            parse_acceptance_datetime(dt) for dt in filings_json['acceptanceDateTime']
+        ]
+
+        fields = {
+            'accession_number': filings_json['accessionNumber'],
+            'filing_date': pc.cast(pc.strptime(pa.array(filings_json['filingDate']), '%Y-%m-%d', 'us'), pa.date32()),
+            'reportDate': filings_json['reportDate'],
+            'acceptanceDateTime': acceptance_datetimes,
+            'act': filings_json['act'],
+            'form': filings_json['form'],
+            'fileNumber': filings_json['fileNumber'],
+            'items': filings_json['items'],
+            'size': filings_json['size'],
+            'isXBRL': filings_json['isXBRL'],
+            'isInlineXBRL': filings_json['isInlineXBRL'],
+            'primaryDocument': filings_json['primaryDocument'],
+            'primaryDocDescription': filings_json['primaryDocDescription']
+        }
+
+        # Create table using dictionary
+        return pa.Table.from_arrays(
+            arrays=[pa.array(v) if k not in ['filing_date', 'acceptanceDateTime']
+                    else v for k, v in fields.items()],
+            names=list(fields.keys())
+        )
+
+
+def create_company_filings(filings_json: Dict[str, Any], cik: int, company_name: str) -> EntityFilings:
+    """
+    Extract company filings from the json response.
+
+    Args:
+        filings_json: The JSON data containing filings
+        cik: The company CIK
+        company_name: The company name
+
+    Returns:
+        An EntityFilings object containing the filings
+    """
+    recent_filings = extract_company_filings_table(filings_json['recent'])
+    return EntityFilings(recent_filings, cik=cik, company_name=company_name)
+
+
+def parse_entity_submissions(cjson: Dict[str, Any]) -> 'CompanyData':
+    """
+    Parse entity submissions from the SEC API.
+
+    Args:
+        cjson: The JSON data from the SEC submissions API
+
+    Returns:
+        A CompanyData object representing the entity
+    """
+    mailing_addr = cjson['addresses']['mailing']
+    business_addr = cjson['addresses']['business']
+    cik = cjson['cik']
+    company_name = cjson["name"]
+    former_names = cjson.get('formerNames', [])
+
+    for former_name in former_names:
+        former_name['from'] = former_name['from'][:10] if former_name['from'] else former_name['from']
+        former_name['to'] = former_name['to'][:10] if former_name['to'] else former_name['to']
+
+    return CompanyData(
+        cik=int(cik),
+        name=company_name,
+        tickers=cjson['tickers'],
+        exchanges=cjson['exchanges'],
+        sic=cjson['sic'],
+        sic_description=cjson['sicDescription'],
+        category=cjson['category'].replace("<br>", " | ") if cjson['category'] else None,
+        fiscal_year_end=cjson['fiscalYearEnd'],
+        entity_type=cjson['entityType'],
+        phone=cjson['phone'],
+        flags=cjson['flags'],
+        mailing_address=Address(
+            street1=mailing_addr['street1'],
+            street2=mailing_addr['street2'],
+            city=mailing_addr['city'],
+            state_or_country_desc=mailing_addr['stateOrCountryDescription'],
+            state_or_country=mailing_addr['stateOrCountry'],
+            zipcode=mailing_addr['zipCode'],
+        ),
+        business_address=Address(
+            street1=business_addr['street1'],
+            street2=business_addr['street2'],
+            city=business_addr['city'],
+            state_or_country_desc=business_addr['stateOrCountryDescription'],
+            state_or_country=business_addr['stateOrCountry'],
+            zipcode=business_addr['zipCode'],
+        ),
+        filings=create_company_filings(cjson['filings'], cik=cik, company_name=company_name),
+        insider_transaction_for_owner_exists=bool(cjson['insiderTransactionForOwnerExists']),
+        insider_transaction_for_issuer_exists=bool(cjson['insiderTransactionForIssuerExists']),
+        ein=cjson['ein'],
+        description=cjson['description'],
+        website=cjson['website'],
+        investor_website=cjson['investorWebsite'],
+        state_of_incorporation=cjson['stateOfIncorporation'],
+        state_of_incorporation_description=cjson['stateOfIncorporationDescription'],
+        former_names=former_names,
+        files=cjson['filings']['files']
+    )
+
+
+class Address:
+    """
+    Represents a physical address.
+
+    This class is optimized for memory usage and performance.
+    """
+    __slots__ = ('street1', 'street2', 'city', 'state_or_country', 'zipcode', 'state_or_country_desc', '_str_cache')
+
+    def __init__(self,
+                 street1: str,
+                 street2: Optional[str],
+                 city: str,
+                 state_or_country: str,
+                 zipcode: str,
+                 state_or_country_desc: str
+                 ):
+        """
+        Initialize an Address object.
+
+        Args:
+            street1: First line of street address
+            street2: Second line of street address (optional)
+            city: City name
+            state_or_country: State or country code
+            zipcode: Postal/ZIP code
+            state_or_country_desc: Human-readable state or country name
+        """
+        # Store empty strings instead of None to avoid type checks later
+        self.street1: str = street1 or ""
+        self.street2: Optional[str] = street2 or ""
+        self.city: str = city or ""
+        self.state_or_country: str = state_or_country or ""
+        self.zipcode: str = zipcode or ""
+        self.state_or_country_desc: str = state_or_country_desc or ""
+        self._str_cache = None
+
+    @property
+    def empty(self) -> bool:
+        """Check if the address is empty. Optimized to avoid multiple attribute checks when possible."""
+        # Short-circuit on common empty case
+        if not self.street1:
+            if not self.city and not self.zipcode:
+                return True
+
+        # Full check
+        return not (self.street1 or self.street2 or self.city or self.state_or_country or self.zipcode)
+
+    def __str__(self):
+        """
+        Generate a formatted string representation of the address.
+        Caches result for repeated calls.
+        """
+        if self._str_cache is not None:
+            return self._str_cache
+
+        if not self.street1:
+            self._str_cache = ""
+            return ""
+
+        # Build string only once and cache it
+        parts = []
+        parts.append(self.street1)
+
+        if self.street2:
+            parts.append(self.street2)
+
+        parts.append(f"{self.city}, {self.state_or_country_desc} {self.zipcode}")
+
+        self._str_cache = "\n".join(parts)
+        return self._str_cache
+
+    def __repr__(self):
+        """Generate a string representation suitable for debugging."""
+        # Simplified representation that avoids unnecessary string operations
+        return f'Address(street1="{self.street1}", street2="{self.street2}", city="{self.city}", zipcode="{self.zipcode}")'
+
+    def to_json(self) -> Dict[str, str]:
+        """Convert the address to a JSON-serializable dict."""
+        # Direct dictionary creation is faster than multiple assignments
+        return {
+            'street1': self.street1,
+            'street2': self.street2,
+            'city': self.city,
+            'state_or_country': self.state_or_country,
+            'zipcode': self.zipcode,
+            'state_or_country_desc': self.state_or_country_desc
+        }
+
+
+class EntityData:
+    """
+    Container for entity data loaded from SEC submissions API.
+
+    This class provides access to entity metadata and filings.
+    """
+
+    def __init__(self,
+                 cik: int,
+                 name: str,
+                 tickers: List[str],
+                 exchanges: List[str],
+                 sic: str,
+                 sic_description: str,
+                 ein: str,
+                 entity_type: str,
+                 fiscal_year_end: str,
+                 filings: EntityFilings,
+                 business_address: Address,
+                 mailing_address: Address,
+                 state_of_incorporation: str,
+                 **kwargs):
+        """
+        Initialize a new EntityData instance.
+
+        Args:
+            cik: The CIK number
+            name: The entity name
+            sic: The Standard Industrial Classification code
+            ein: The Employer Identification Number
+            fiscal_year_end: The fiscal year end date
+            tickers: List of ticker symbols
+            exchanges: List of exchanges
+            entity_type: The entity type
+            filings: The entity's filings
+            business_address: The business address
+            mailing_address: The mailing address
+            state_of_incorporation: The state of incorporation
+            **kwargs: Additional attributes
+        """
+        self.cik: int = cik
+        self.name: str = name
+        self.sic = sic
+        self.sic_description: str = sic_description
+        self.ein: str = ein
+        self.fiscal_year_end: str = fiscal_year_end
+        self.tickers: List[str] = tickers
+        self.exchanges: List[str] = exchanges
+        self.filings: EntityFilings = filings
+        self.entity_type = entity_type
+        self.business_address: Address = business_address
+        self.mailing_address: Address = mailing_address
+        self.state_of_incorporation: str = state_of_incorporation
+
+        # Store all other attributes
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+
+        # Initialize lazy loading flag
+        self._loaded_all_filings: bool = False
+        self._files = kwargs.get('files', [])
+
+    def _load_older_filings(self):
+        """
+        Load older filings that were not included in the initial data.
+
+        This method implements the lazy loading behavior of filings.
+        When first creating an entity, only the most recent filings are loaded
+        to keep API response times fast. When more filings are needed, this
+        method will load additional filings from the SEC.
+        """
+        # If we have no files to load, we're done
+        if not self._files:
+            return
+
+        # Import locally to avoid circular imports using the lazy import cache
+        download_json = lazy_import('edgar.httprequests.download_json')
+
+        # Load additional filings from the SEC
+        filing_tables = [self.filings.data]
+        for file in self._files:
+            submissions = download_json("https://data.sec.gov/submissions/" + file['name'])
+            filing_table = extract_company_filings_table(submissions)
+            filing_tables.append(filing_table)
+
+        # Combine all filing tables
+        combined_tables = pa.concat_tables(filing_tables)
+
+        # Update filings
+        EntityFilings = lazy_import('edgar.entity.filings.EntityFilings')
+        self.filings = EntityFilings(combined_tables, cik=self.cik, company_name=self.name)
+
+    def get_filings(self,
+                    year: Union[int, List[int]] = None,
+                    quarter: Union[int, List[int]] = None,
+                    form: Union[str, List] = None,
+                    accession_number: Union[str, List] = None,
+                    file_number: Union[str, List] = None,
+                    filing_date: Union[str, Tuple[str, str]] = None,
+                    date: Union[str, Tuple[str, str]] = None,
+                    amendments: bool = True,
+                    is_xbrl: bool = None,
+                    is_inline_xbrl: bool = None,
+                    sort_by: Union[str, List[Tuple[str, str]]] = None,
+                    trigger_full_load: bool = True
+                    ) -> EntityFilings:
+        """
+        Get entity filings with lazy loading behavior.
+
+        Args:
+            year: Filter by year(s) (e.g. 2023, [2022, 2023])
+            quarter: Filter by quarter(s) (1-4, e.g. 4, [3, 4])
+            form: Filter by form type(s)
+            accession_number: Filter by accession number(s)
+            file_number: Filter by file number(s)
+            filing_date: Filter by filing date (YYYY-MM-DD or range)
+            date: Alias for filing_date
+            amendments: Whether to include amendments (default: True)
+            is_xbrl: Filter by XBRL status
+            is_inline_xbrl: Filter by inline XBRL status
+            sort_by: Sort criteria
+            trigger_full_load: Whether to load all historical filings if not already loaded
+
+        Returns:
+            Filtered filings
+        """
+
+        # Lazy loading behavior
+        if not self._loaded_all_filings and not is_using_local_storage() and trigger_full_load:
+            self._load_older_filings()
+            self._loaded_all_filings = True
+
+        # Get filings data
+        company_filings = self.filings.data
+
+        # Filter by year/quarter first (most selective)
+        if year is not None:
+            company_filings = filter_by_year_quarter(company_filings, year, quarter)
+
+        # Filter by accession number
+        if accession_number:
+            company_filings = company_filings.filter(
+                pc.is_in(company_filings['accession_number'], pa.array(listify(accession_number))))
+            if len(company_filings) >= 1:
+                # We found the filing(s)
+                return EntityFilings(company_filings, cik=self.cik, company_name=self.name)
+
+        # Filter by form (with amendments support)
+        if form:
+            company_filings = filter_by_form(company_filings, form, amendments)
+
+        # Filter by file number
+        if file_number:
+            company_filings = company_filings.filter(
+                pc.is_in(company_filings['fileNumber'], pa.array(listify(file_number))))
+
+        # Filter by XBRL status
+        if is_xbrl is not None:
+            company_filings = company_filings.filter(pc.equal(company_filings['isXBRL'], int(is_xbrl)))
+
+        # Filter by inline XBRL status
+        if is_inline_xbrl is not None:
+            company_filings = company_filings.filter(pc.equal(company_filings['isInlineXBRL'], int(is_inline_xbrl)))
+
+        # Filter by filing date
+        filing_date = filing_date or date
+        if filing_date:
+            try:
+                company_filings = filter_by_date(company_filings, filing_date, 'filing_date')
+            except InvalidDateException as e:
+                log.error(e)
+                return None
+
+        # Sort filings
+        if sort_by:
+            company_filings = company_filings.sort_by(sort_by)
+
+        # Return filtered filings
+        return EntityFilings(company_filings, cik=self.cik, company_name=self.name)
+
+    @property
+    def is_company(self) -> bool:
+        """Determine if this entity is a company."""
+        return not self.is_individual
+
+    @cached_property
+    def is_individual(self) -> bool:
+        """
+        Determine if this entity is an individual.
+
+        Tricky logic to detect if a company is an individual or a company.
+        Companies have an ein, individuals do not. Oddly Warren Buffet has an EIN but not a state of incorporation
+        There may be other edge cases.
+        If you have a ticker or exchange you are a company.
+        """
+        # Import locally using the lazy import cache
+        has_company_filings = lazy_import('edgar.entity.core.has_company_filings')
+
+        if len(self.tickers) > 0 or len(self.exchanges) > 0:
+            return False
+        elif hasattr(self,
+                     'state_of_incorporation') and self.state_of_incorporation is not None and self.state_of_incorporation != '':
+            if self.cik == 1033331: # Reed Hastings exception
+                return True
+            return False
+        elif hasattr(self, 'entity_type') and self.entity_type not in ['', 'other']:
+            return False
+        elif has_company_filings(self.filings.data['form']):
+            if self.cik == 315090:  # The Warren Buffett exception
+                return True
+            return False
+        elif not hasattr(self, 'ein') or self.ein is None or self.ein == "000000000":
+            return True
+        else:
+            return False
+
+    def __str__(self):
+        return f"EntityData({self.name} [{self.cik}])"
+
+    def __repr__(self):
+        repr_rich = lazy_import('edgar.richtools.repr_rich')
+        return repr_rich(self.__rich__())
+
+    def __rich__(self):
+        """Creates a rich representation of the entity with clear information hierarchy."""
+        # Use lazy imports for rich components
+        box = lazy_import('rich.box')
+        Group = lazy_import('rich.console.Group')
+        Columns = lazy_import('rich.columns.Columns')
+        Padding = lazy_import('rich.padding.Padding')
+        Panel = lazy_import('rich.panel.Panel')
+        Table = lazy_import('rich.table.Table')
+        Text = lazy_import('rich.text.Text')
+        find_ticker = lazy_import('edgar.reference.tickers.find_ticker')
+        zip_longest = lazy_import('itertools.zip_longest')
+        datefmt = lazy_import('edgar.formatting.datefmt')
+
+        # Primary entity identification section
+        if self.is_company:
+            ticker = find_ticker(self.cik)
+            ticker = f"{ticker}" if ticker else ""
+
+            # The title of the panel
+            entity_title = Text.assemble("🏢 ",
+                                         (self.display_name, "bold green"),
+                                         " ",
+                                         (f"[{self.cik}] ", "dim"),
+                                         (ticker, "bold yellow")
+                                         )
+        else:
+            entity_title = Text.assemble("👤", (self.display_name, "bold green"))
+
+        # Primary Information Table
+        main_info = Table(box=box.SIMPLE_HEAVY, show_header=False, padding=(0, 1))
+        main_info.add_column("Row", style="")  # Single column for the entire row
+
+        row_parts = []
+        row_parts.extend([Text("CIK", style="grey60"), Text(str(self.cik), style="bold deep_sky_blue3")])
+        if hasattr(self, 'entity_type') and self.entity_type:
+            if self.is_individual:
+                row_parts.extend([Text("Type", style="grey60"),
+                                  Text("Individual", style="bold yellow")])
+            else:
+                row_parts.extend([Text("Type", style="grey60"),
+                                  Text(self.entity_type.title(), style="bold yellow"),
+                                  Text(self._get_operating_type_emoticon(self.entity_type), style="bold yellow")])
+        main_info.add_row(*row_parts)
+
+        # Detailed Information Table
+        details = Table(box=box.SIMPLE, show_header=True, padding=(0, 1))
+        details.add_column("Category")
+        details.add_column("Industry")
+        details.add_column("Fiscal Year End")
+
+        details.add_row(
+            getattr(self, 'category', '-') or "-",
+            f"{getattr(self, 'sic', '')}: {getattr(self, 'sic_description', '')}" if hasattr(self,
+                                                                                             'sic') and self.sic else "-",
+            self._format_fiscal_year_date(getattr(self, 'fiscal_year_end', '')) if hasattr(self,
+                                                                                           'fiscal_year_end') and self.fiscal_year_end else "-"
+        )
+
+        # Combine main_info and details in a single panel
+        if self.is_company:
+            basic_info_renderables = [main_info, details]
+        else:
+            basic_info_renderables = [main_info]
+        basic_info_panel = Panel(
+            Group(*basic_info_renderables),
+            title="📋 Entity",
+            border_style="grey50"
+        )
+
+        # Trading Information
+        if self.tickers and self.exchanges:
+            trading_info = Table(box=box.SIMPLE, show_header=True, padding=(0, 1))
+            trading_info.add_column("Exchange")
+            trading_info.add_column("Symbol", style="bold yellow")
+
+            for exchange, ticker in zip_longest(self.exchanges, self.tickers, fillvalue="-"):
+                trading_info.add_row(exchange, ticker)
+
+            trading_panel = Panel(
+                trading_info,
+                title="📈 Exchanges",
+                border_style="grey50"
+            )
+        else:
+            trading_panel = Panel(
+                Text("No trading information available", style="grey58"),
+                title="📈 Trading Information",
+                border_style="grey50"
+            )
+
+        # Contact Information
+        contact_info = Table(box=box.SIMPLE, show_header=False, padding=(0, 1))
+        contact_info.add_column("Label", style="bold grey70")
+        contact_info.add_column("Value")
+
+        has_contact_info = any([
+            hasattr(self, 'phone') and self.phone,
+            hasattr(self, 'website') and self.website,
+            hasattr(self, 'investor_website') and self.investor_website
+        ])
+
+        if hasattr(self, 'website') and self.website:
+            contact_info.add_row("Website", self.website)
+        if hasattr(self, 'investor_website') and self.investor_website:
+            contact_info.add_row("Investor Relations", self.investor_website)
+        if hasattr(self, 'phone') and self.phone:
+            contact_info.add_row("Phone", self.phone)
+
+        # Three-column layout for addresses and contact info
+        contact_renderables = []
+        if hasattr(self, 'business_address') and not self.business_address.empty:
+            contact_renderables.append(Panel(
+                Text(str(self.business_address)),
+                title="🏢 Business Address",
+                border_style="grey50"
+            ))
+        if hasattr(self, 'mailing_address') and not self.mailing_address.empty:
+            contact_renderables.append(Panel(
+                Text(str(self.mailing_address)),
+                title="📫 Mailing Address",
+                border_style="grey50"
+            ))
+        if has_contact_info:
+            contact_renderables.append(Panel(
+                contact_info,
+                title="📞 Contact Information",
+                border_style="grey50"
+            ))
+
+        # Former Names Table (if any exist)
+        former_names_panel = None
+        if hasattr(self, 'former_names') and self.former_names:
+            former_names_table = Table(box=box.SIMPLE, show_header=False, padding=(0, 1))
+            former_names_table.add_column("Previous Company Names")
+            former_names_table.add_column("")  # Empty column for better spacing
+
+            for former_name in self.former_names:
+                from_date = datefmt(former_name['from'], '%B %Y')
+                to_date = datefmt(former_name['to'], '%B %Y')
+                former_names_table.add_row(Text(former_name['name'], style="italic"), f"{from_date} to {to_date}")
+
+            former_names_panel = Panel(
+                former_names_table,
+                title="📜 Former Names",
+                border_style="grey50"
+            )
+
+        # Combine all sections using Group
+        if self.is_company:
+            content_renderables = [Padding("", (1, 0, 0, 0)), basic_info_panel, trading_panel]
+            if len(contact_renderables):
+                contact_and_addresses = Columns(contact_renderables, equal=True, expand=True)
+                content_renderables.append(contact_and_addresses)
+            if former_names_panel:
+                content_renderables.append(former_names_panel)
+        else:
+            content_renderables = [Padding("", (1, 0, 0, 0)), basic_info_panel]
+            if len(contact_renderables):
+                contact_and_addresses = Columns(contact_renderables, equal=True, expand=True)
+                content_renderables.append(contact_and_addresses)
+
+        content = Group(*content_renderables)
+
+        # Create the main panel
+        return Panel(
+            content,
+            title=entity_title,
+            subtitle="SEC Entity Data",
+            border_style="grey50"
+        )
+
+    @property
+    def display_name(self) -> str:
+        """Reverse the name if it is a company"""
+        if self.is_company:
+            return self.name
+
+        return reverse_name(self.name)
+
+    @staticmethod
+    def _get_operating_type_emoticon(entity_type: str) -> str:
+        """
+        Generate a meaningful single-width symbol based on the SEC entity type.
+        All symbols are chosen to be single-width to work well with rich borders.
+
+        Args:
+            entity_type (str): The SEC entity type (case-insensitive)
+
+        Returns:
+            str: A single-width symbol representing the entity type
+        """
+        symbols = {
+            "operating": "○",  # Circle for active operations
+            "subsidiary": "→",  # Arrow showing connection to parent
+            "inactive": "×",  # Cross for inactive
+            "holding company": "■",  # Square for solid corporate structure
+            "investment company": "$",  # Dollar for investment focus
+            "investment trust": "$",  # Dollar for investment focus
+            "shell": "□",  # Empty square for shell
+            "development stage": "∆",  # Triangle for growth/development
+            "financial services": "¢",  # Cent sign for financial services
+            "reit": "⌂",  # House symbol
+            "spv": "◊",  # Diamond for special purpose
+            "joint venture": "∞"  # Infinity for partnership
+        }
+
+        # Clean input: convert to lowercase and strip whitespace
+        cleaned_type = entity_type.lower().strip()
+
+        # Handle some common variations
+        if "investment" in cleaned_type:
+            return symbols["investment company"]
+        if "real estate" in cleaned_type or "reit" in cleaned_type:
+            return symbols["reit"]
+
+        # Return default question mark if type not found
+        return symbols.get(cleaned_type, "")
+
+    @staticmethod
+    def _format_fiscal_year_date(date_str):
+        """Format fiscal year end date in a human-readable format."""
+        if not date_str:
+            return "-"
+
+        # Dictionary of months
+        months = {
+            "01": "Jan", "02": "Feb", "03": "Mar",
+            "04": "Apr", "05": "May", "06": "Jun",
+            "07": "Jul", "08": "Aug", "09": "Sep",
+            "10": "Oct", "11": "Nov", "12": "Dec"
+        }
+
+        # Extract month and day
+        month = date_str[:2]
+        if month not in months:
+            return date_str
+
+        try:
+            day = str(int(date_str[2:]))  # Remove leading zero
+            return f"{months[month]} {day}"
+        except (ValueError, IndexError):
+            return date_str
+
+
+class CompanyData(EntityData):
+    """
+    Specialized container for company data loaded from SEC submissions API.
+
+    This is a specialized version of EntityData specifically for companies.
+    It adds company-specific methods and properties.
+    """
+
+    def __init__(self, **kwargs):
+        """Construct a new CompanyData object."""
+        super().__init__(**kwargs)
+
+    @property
+    def industry(self) -> str:
+        """Get the industry description for this company."""
+        return getattr(self, 'sic_description', '')
+
+    def get_ticker(self) -> Optional[str]:
+        """Get the primary ticker for this company."""
+        if self.tickers and len(self.tickers) > 0:
+            return self.tickers[0]
+        return None
+
+    def __str__(self):
+        ticker = self.get_ticker()
+        ticker_str = f" - {ticker}" if ticker else ""
+        return f"CompanyData({self.name} [{self.cik}]{ticker_str})"
+
+
+# Compile regex patterns for better performance
+_COMPANY_TYPES_PATTERN = re.compile(r"(L\.?L\.?C\.?|Inc\.?|Ltd\.?|L\.?P\.?|/[A-Za-z]{2,3}/?| CORP(ORATION)?|PLC| AG)$",
+                                    re.IGNORECASE)
+_PUNCTUATION_PATTERN = re.compile(r"\.|,")
+
+
+def preprocess_company(company: str) -> str:
+    """preprocess the company name for storing in the search index"""
+    comp = _COMPANY_TYPES_PATTERN.sub("", company.lower())
+    comp = _PUNCTUATION_PATTERN.sub("", comp)
+    return comp.strip()
+
+
+def create_default_entity_data(cik: int) -> 'EntityData':
+    """
+    Create a default EntityData instance for when entity data cannot be found.
+
+    Args:
+        cik: The CIK number to use for the entity
+
+    Returns:
+        A minimal EntityData instance with default values
+    """
+    # Create a minimal EntityData with blank/empty values
+    empty_address = Address(
+        street1="",
+        street2="",
+        city="",
+        state_or_country="",
+        zipcode="",
+        state_or_country_desc=""
+    )
+
+    # Import using lazy import cache
+    empty_company_filings = lazy_import('edgar.entity.filings.empty_company_filings')
+
+    # Use the CIK as the name since we don't know the real name
+    name = f"Entity {cik}"
+
+    # Create a minimal entity data
+    return EntityData(
+        cik=cik,
+        name=name,
+        tickers=[],
+        exchanges=[],
+        filings=empty_company_filings(cik, name),
+        business_address=empty_address,
+        mailing_address=empty_address,
+        category="",
+        sic=None,
+        sic_description="",
+        fiscal_year_end="",
+        entity_type="",
+        phone="",
+        flags="",
+        insider_transaction_for_owner_exists=False,
+        insider_transaction_for_issuer_exists=False,
+        ein="",
+        description="",
+        website="",
+        investor_website="",
+        state_of_incorporation="",
+        state_of_incorporation_description="",
+        former_names=[],
+        files=[]
+    )
--- a/venv/lib/python3.10/site-packages/edgar/entity/data/pycache/process_mappings.cpython-310.pyc
+++ b/venv/lib/python3.10/site-packages/edgar/entity/data/pycache/process_mappings.cpython-310.pyc
--- a/venv/lib/python3.10/site-packages/edgar/entity/data/learned_mappings.json
+++ b/venv/lib/python3.10/site-packages/edgar/entity/data/learned_mappings.json
--- a/venv/lib/python3.10/site-packages/edgar/entity/data/process_mappings.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/data/process_mappings.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+"""
+Process the learned canonical structures into a simplified mappings file
+optimized for the Facts API.
+"""
+
+import json
+
+
+def process_mappings():
+    """Convert canonical structures to simple concept->statement mappings."""
+    
+    # Load canonical structures
+    with open('learned_mappings.json', 'r') as f:
+        canonical = json.load(f)
+    
+    # Create simplified mappings
+    mappings = {}
+    metadata = {
+        'version': '1.0.0',
+        'generated': '2025-08-13',
+        'companies_analyzed': 133,
+        'source': 'structural_learning_production_run'
+    }
+    
+    # Process each statement type
+    for statement_type, concepts in canonical.items():
+        for concept_data in concepts:
+            concept = concept_data['concept']
+            
+            # Only include high-confidence mappings
+            if concept_data['occurrence_rate'] >= 0.3:  # 30% threshold
+                mappings[concept] = {
+                    'statement_type': statement_type,
+                    'confidence': concept_data['occurrence_rate'],
+                    'label': concept_data['label'],
+                    'parent': concept_data.get('parent'),
+                    'is_abstract': concept_data.get('is_abstract', False),
+                    'is_total': concept_data.get('is_total', False),
+                    'section': concept_data.get('section'),
+                    'avg_depth': concept_data.get('avg_depth', 0)
+                }
+    
+    # Save processed mappings
+    output = {
+        'metadata': metadata,
+        'mappings': mappings
+    }
+    
+    with open('statement_mappings_v1.json', 'w') as f:
+        json.dump(output, f, indent=2)
+    
+    print(f"Processed {len(mappings)} concept mappings")
+    print("Statement distribution:")
+    
+    stmt_counts = {}
+    for concept, data in mappings.items():
+        stmt = data['statement_type']
+        stmt_counts[stmt] = stmt_counts.get(stmt, 0) + 1
+    
+    for stmt, count in sorted(stmt_counts.items()):
+        print(f"  {stmt}: {count}")
+
+if __name__ == "__main__":
+    process_mappings()
--- a/venv/lib/python3.10/site-packages/edgar/entity/data/statement_mappings_v1.json
+++ b/venv/lib/python3.10/site-packages/edgar/entity/data/statement_mappings_v1.json
--- a/venv/lib/python3.10/site-packages/edgar/entity/data/virtual_trees.json
+++ b/venv/lib/python3.10/site-packages/edgar/entity/data/virtual_trees.json
--- a/venv/lib/python3.10/site-packages/edgar/entity/docs/Company.md
+++ b/venv/lib/python3.10/site-packages/edgar/entity/docs/Company.md
--- a/venv/lib/python3.10/site-packages/edgar/entity/docs/EntityFiling.md
+++ b/venv/lib/python3.10/site-packages/edgar/entity/docs/EntityFiling.md
@@ -0,0 +1,557 @@
+# EntityFiling Class Documentation
+
+## Overview
+
+The `EntityFiling` class extends the base `Filing` class with additional entity-specific metadata and functionality. When you access filings through a `Company` object, you get `EntityFiling` instances that include enriched information from the SEC's company submissions API.
+
+**Key Differences from Base Filing:**
+- Additional metadata (items, acceptance datetime, file number, etc.)
+- `related_filings()` method to find filings by file number
+- XBRL format indicators (is_xbrl, is_inline_xbrl)
+- Report date separate from filing date
+- Access to entity context
+
+## Getting EntityFilings
+
+### From Company
+
+```python
+from edgar import Company
+
+# Get company
+company = Company("AAPL")
+
+# Get filings - returns EntityFiling instances
+filings = company.get_filings(form="10-K")
+filing = filings.latest()
+
+# filing is now an EntityFiling, not base Filing
+print(type(filing))  # <class 'edgar.entity.filings.EntityFiling'>
+```
+
+### Automatic Enhancement
+
+When you call `company.get_filings()`, the filings are automatically EntityFiling instances with additional metadata.
+
+## Common Actions
+
+Quick reference for the most frequently used EntityFiling methods:
+
+### Access Filing Content
+```python
+# Get HTML content
+html = filing.html()
+
+# Get plain text
+text = filing.text()
+
+# Get markdown formatted content
+markdown = filing.markdown()
+```
+
+### Get Structured Data
+```python
+# Get form-specific object (10-K, 10-Q, 8-K, etc.)
+report = filing.obj()
+
+# Get XBRL financial data
+xbrl = filing.xbrl()
+```
+
+### Entity-Specific Features
+```python
+# Find related filings (amendments, etc.)
+related = filing.related_filings()
+
+# Check XBRL availability
+if filing.is_xbrl:
+    xbrl = filing.xbrl()
+
+# Access entity-specific metadata
+print(filing.report_date)        # Period end date
+print(filing.items)               # 8-K items
+print(filing.file_number)         # SEC file number
+```
+
+### View in Browser
+```python
+# Open filing in web browser
+filing.open()
+```
+
+### Get Attachments
+```python
+# Access all filing attachments
+attachments = filing.attachments
+```
+
+## EntityFiling-Specific Attributes
+
+### Additional Metadata
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `report_date` | str | Period end date for the report (YYYY-MM-DD) |
+| `acceptance_datetime` | str | SEC acceptance timestamp |
+| `file_number` | str | SEC file number for tracking related filings |
+| `items` | str | 8-K items (e.g., "2.02,9.01") |
+| `size` | int | Filing size in bytes |
+| `primary_document` | str | Primary document filename |
+| `primary_doc_description` | str | Description of primary document |
+| `is_xbrl` | bool | Whether filing has XBRL data |
+| `is_inline_xbrl` | bool | Whether filing uses inline XBRL |
+
+### Accessing Additional Metadata
+
+```python
+filing = company.get_filings(form="10-K").latest()
+
+# Entity-specific attributes
+print(f"Report Date: {filing.report_date}")
+print(f"Accepted: {filing.acceptance_datetime}")
+print(f"File Number: {filing.file_number}")
+print(f"Has XBRL: {filing.is_xbrl}")
+print(f"Inline XBRL: {filing.is_inline_xbrl}")
+print(f"Size: {filing.size:,} bytes")
+```
+
+## Working with 8-K Items
+
+The `items` attribute is especially useful for 8-K current reports, which can cover multiple topics.
+
+### Understanding 8-K Items
+
+8-K items indicate what events or information the filing reports:
+- **2.02** - Results of Operations and Financial Condition
+- **5.02** - Departure/Election of Directors or Officers
+- **8.01** - Other Events
+- **9.01** - Financial Statements and Exhibits
+
+```python
+# Get 8-K filings
+filings_8k = company.get_filings(form="8-K")
+
+# Filter by items
+for filing in filings_8k:
+    if filing.items and "2.02" in filing.items:
+        print(f"Earnings 8-K: {filing.filing_date}")
+        print(f"  Items: {filing.items}")
+```
+
+### Important Note on Legacy Filings
+
+**Data Source Limitation**: The `items` value comes from SEC metadata, not from parsing the filing document.
+
+**For Legacy SGML Filings (1999-2001)**: The SEC's historical metadata may be incorrect or incomplete. Modern XML filings (2005+) have accurate metadata.
+
+**Workaround**: For accurate item extraction from legacy SGML 8-K filings, parse the filing text directly:
+
+```python
+# For legacy filings, parse the document
+filing_text = filing.text()
+
+# Use regex to find items (adjust pattern as needed)
+import re
+items_pattern = r'Item\s+(\d+\.\d+)'
+found_items = re.findall(items_pattern, filing_text, re.IGNORECASE)
+```
+
+## Related Filings
+
+### Finding Related Filings by File Number
+
+Use the `file_number` to find amendments, related documents, or filings from the same series:
+
+```python
+# Get original filing
+filing = company.get_filings(form="10-K").latest()
+
+# Find all related filings (amendments, etc.)
+related = filing.related_filings()
+
+print(f"Original filing: {filing.accession_no}")
+print(f"Related filings: {len(related)}")
+
+for f in related:
+    print(f"  {f.form} - {f.filing_date}")
+```
+
+### Use Cases for Related Filings
+
+**1. Find Amendments:**
+```python
+# Get original 10-K
+filing_10k = company.get_filings(form="10-K").latest()
+
+# Find any amendments
+related = filing_10k.related_filings()
+amendments = related.filter(form="10-K/A")
+
+if len(amendments) > 0:
+    print("Filing was amended:")
+    for amendment in amendments:
+        print(f"  {amendment.filing_date}: {amendment.accession_no}")
+```
+
+**2. Track Filing Series:**
+```python
+# Get S-1 registration
+s1 = company.get_filings(form="S-1").latest()
+
+# Find all related S-1 amendments
+series = s1.related_filings()
+print(f"Registration series: {len(series)} filings")
+```
+
+## XBRL Indicators
+
+The `is_xbrl` and `is_inline_xbrl` attributes help determine if structured financial data is available.
+
+### Checking XBRL Availability
+
+```python
+filing = company.get_filings(form="10-K").latest()
+
+if filing.is_xbrl:
+    print("Filing has XBRL data")
+
+    if filing.is_inline_xbrl:
+        print("  Uses inline XBRL format")
+        xbrl = filing.xbrl()  # Parse XBRL data
+    else:
+        print("  Uses traditional XBRL format")
+else:
+    print("No XBRL data available")
+```
+
+### Filtering by XBRL
+
+```python
+# Get only filings with XBRL data
+filings = company.get_filings(form="10-Q")
+
+xbrl_filings = [f for f in filings if f.is_xbrl]
+print(f"{len(xbrl_filings)} of {len(filings)} have XBRL")
+
+# Check inline XBRL adoption
+inline_count = sum(1 for f in xbrl_filings if f.is_inline_xbrl)
+print(f"{inline_count} use inline XBRL format")
+```
+
+## Report Date vs Filing Date
+
+EntityFiling provides both `report_date` and `filing_date`:
+
+- **`report_date`**: Period end date (what the filing reports on)
+- **`filing_date`**: When the filing was submitted to SEC
+
+```python
+filing = company.get_filings(form="10-Q").latest()
+
+print(f"Period Ended: {filing.report_date}")
+print(f"Filed On: {filing.filing_date}")
+
+# Calculate filing lag
+from datetime import datetime
+report_dt = datetime.strptime(filing.report_date, '%Y-%m-%d')
+filing_dt = datetime.strptime(filing.filing_date, '%Y-%m-%d')
+lag_days = (filing_dt - report_dt).days
+
+print(f"Filing lag: {lag_days} days")
+```
+
+## Common Workflows
+
+### Analyzing 8-K Patterns
+
+```python
+# Get all 8-K filings
+filings_8k = company.get_filings(form="8-K")
+
+# Categorize by item
+from collections import Counter
+item_counts = Counter()
+
+for filing in filings_8k:
+    if filing.items:
+        for item in filing.items.split(','):
+            item_counts[item.strip()] += 1
+
+# Show most common 8-K topics
+print("Most common 8-K items:")
+for item, count in item_counts.most_common(5):
+    print(f"  Item {item}: {count} filings")
+```
+
+### Track Amendment Activity
+
+```python
+# Get all 10-K filings including amendments
+all_10k = company.get_filings(form=["10-K", "10-K/A"])
+
+# Group by year
+from collections import defaultdict
+by_year = defaultdict(list)
+
+for filing in all_10k:
+    year = filing.report_date[:4]
+    by_year[year].append(filing)
+
+# Check which years had amendments
+for year in sorted(by_year.keys(), reverse=True):
+    filings = by_year[year]
+    has_amendment = any('/A' in f.form for f in filings)
+    status = "amended" if has_amendment else "original"
+    print(f"{year}: {len(filings)} filing(s) - {status}")
+```
+
+### Find Earnings Announcements
+
+```python
+# Find 8-K filings with earnings (Item 2.02)
+earnings_8k = []
+
+for filing in company.get_filings(form="8-K"):
+    if filing.items and "2.02" in filing.items:
+        earnings_8k.append(filing)
+
+print(f"Found {len(earnings_8k)} earnings 8-K filings")
+
+# Show filing timeline
+for filing in earnings_8k[-5:]:  # Last 5
+    print(f"{filing.report_date}: {filing.filing_date}")
+```
+
+### Check XBRL Adoption Timeline
+
+```python
+# Track when company started using XBRL
+filings = company.get_filings(form="10-K")
+
+for filing in filings:
+    xbrl_status = "inline XBRL" if filing.is_inline_xbrl else "XBRL" if filing.is_xbrl else "no XBRL"
+    print(f"{filing.filing_date}: {xbrl_status}")
+```
+
+## Integration with Base Filing Features
+
+EntityFiling inherits all methods from the base Filing class:
+
+```python
+filing = company.get_filings(form="10-K").latest()
+
+# All base Filing methods work
+html = filing.html()
+text = filing.text()
+markdown = filing.markdown()
+xbrl = filing.xbrl()
+filing.open()
+
+# PLUS entity-specific features
+related = filing.related_filings()
+print(f"8-K items: {filing.items}")
+print(f"Has XBRL: {filing.is_xbrl}")
+```
+
+## Comparison: EntityFiling vs Base Filing
+
+### When You Get Each Type
+
+**EntityFiling** - From Company context:
+```python
+company = Company("AAPL")
+filing = company.get_filings(form="10-K").latest()
+# Type: EntityFiling (with extra metadata)
+```
+
+**Base Filing** - From general search:
+```python
+from edgar import get_filings
+filings = get_filings(2024, 3, form="10-K")
+filing = filings[0]
+# Type: Filing (base class)
+```
+
+### Feature Comparison
+
+| Feature | Base Filing | EntityFiling |
+|---------|-------------|--------------|
+| Basic metadata | ✅ | ✅ |
+| Content access (html, text) | ✅ | ✅ |
+| XBRL parsing | ✅ | ✅ |
+| Report date | ❌ | ✅ |
+| Acceptance datetime | ❌ | ✅ |
+| File number | ❌ | ✅ |
+| 8-K items | ❌ | ✅ |
+| XBRL indicators | ❌ | ✅ |
+| related_filings() | ❌ | ✅ |
+
+## Best Practices
+
+### 1. Use EntityFiling for Company Analysis
+
+When working with a specific company, always access filings through the Company object to get EntityFiling benefits:
+
+```python
+# Good - get EntityFiling with metadata
+company = Company("AAPL")
+filing = company.get_filings(form="10-K").latest()
+
+# Less ideal - get base Filing without metadata
+filings = get_filings(2024, 3, form="10-K").filter(ticker="AAPL")
+filing = filings[0]
+```
+
+### 2. Check XBRL Availability Before Parsing
+
+```python
+filing = company.get_filings(form="10-K").latest()
+
+if filing.is_xbrl:
+    xbrl = filing.xbrl()
+    statements = xbrl.statements
+else:
+    print("No structured financial data available")
+```
+
+### 3. Handle Missing Items Gracefully
+
+```python
+# Items may be None or empty string
+if filing.items:
+    items_list = filing.items.split(',')
+else:
+    items_list = []
+```
+
+### 4. Use Related Filings to Track Changes
+
+```python
+# Find if filing was amended
+filing = company.get_filings(form="10-K").latest()
+related = filing.related_filings()
+
+amendments = [f for f in related if '/A' in f.form]
+if amendments:
+    print(f"This filing has {len(amendments)} amendment(s)")
+    latest_amendment = amendments[-1]
+    print(f"Most recent: {latest_amendment.filing_date}")
+```
+
+## Error Handling
+
+### Missing Attributes
+
+Not all filings have all attributes populated:
+
+```python
+filing = company.get_filings(form="8-K").latest()
+
+# Some filings may not have items
+items = filing.items if filing.items else "Not specified"
+
+# File number should always be present for EntityFiling
+if filing.file_number:
+    print(f"File number: {filing.file_number}")
+```
+
+### XBRL Parsing Failures
+
+Even if `is_xbrl` is True, parsing can fail:
+
+```python
+if filing.is_xbrl:
+    try:
+        xbrl = filing.xbrl()
+        statements = xbrl.statements
+    except Exception as e:
+        print(f"XBRL parsing failed: {e}")
+        # Fall back to text parsing
+        text = filing.text()
+```
+
+## Performance Considerations
+
+### Efficient Filtering
+
+Use EntityFiling metadata to filter before expensive operations:
+
+```python
+# Filter by XBRL availability first
+filings = company.get_filings(form="10-Q")
+xbrl_filings = [f for f in filings if f.is_xbrl]
+
+# Then parse only those with XBRL
+for filing in xbrl_filings:
+    xbrl = filing.xbrl()
+    # Process XBRL data...
+```
+
+### Batch Operations
+
+When processing many filings, check size first:
+
+```python
+filings = company.get_filings()
+
+# Process smaller filings first
+sorted_filings = sorted(filings, key=lambda f: f.size)
+
+for filing in sorted_filings[:10]:  # Process 10 smallest
+    html = filing.html()
+    # Process content...
+```
+
+## Troubleshooting
+
+### "EntityFiling has no attribute 'X'"
+
+You're trying to use EntityFiling-specific features on a base Filing object:
+
+```python
+# Problem: Base filing doesn't have entity attributes
+filings = get_filings(2024, 3)
+filing = filings[0]
+# filing.report_date  # AttributeError!
+
+# Solution: Get from company for EntityFiling
+company = Company(filing.cik)
+entity_filing = company.get_filings(
+    accession_number=filing.accession_no
+)[0]
+# entity_filing.report_date  # Works!
+```
+
+### Related Filings Returns Empty
+
+The file number might not link to other filings:
+
+```python
+related = filing.related_filings()
+
+if len(related) == 0:
+    print("No related filings found")
+    # This is normal for standalone filings
+else:
+    print(f"Found {len(related)} related filing(s)")
+```
+
+### Items Not Showing for 8-K
+
+Check if it's a legacy filing:
+
+```python
+filing = company.get_filings(form="8-K")[0]
+
+if not filing.items or filing.items == "":
+    # Check filing year
+    filing_year = int(filing.filing_date[:4])
+
+    if filing_year < 2005:
+        print("Legacy SGML filing - items may be missing from metadata")
+        print("Parse filing text for accurate item identification")
+    else:
+        print("Modern filing with no items specified")
+```
+
+This comprehensive guide covers the unique features and workflows available when working with EntityFiling objects in edgartools.
--- a/venv/lib/python3.10/site-packages/edgar/entity/docs/EntityFilings.md
+++ b/venv/lib/python3.10/site-packages/edgar/entity/docs/EntityFilings.md
@@ -0,0 +1,671 @@
+# EntityFilings Class Documentation
+
+## Overview
+
+The `EntityFilings` class extends the base `Filings` class with entity-specific functionality. When you access filings through a `Company` object, you get an `EntityFilings` collection that maintains entity context (CIK and company name) and returns `EntityFiling` instances with enriched metadata.
+
+**Key Differences from Base Filings:**
+- Maintains entity context (CIK, company name)
+- Returns `EntityFiling` instances (not base `Filing`)
+- All filtering/selection methods preserve `EntityFilings` type
+- Additional metadata from SEC company submissions API
+- Direct access to entity-specific features
+
+## Getting EntityFilings
+
+### From Company
+
+```python
+from edgar import Company
+
+# Get company
+company = Company("AAPL")
+
+# Get filings - returns EntityFilings collection
+filings = company.get_filings()
+
+# filings is EntityFilings, not base Filings
+print(type(filings))  # <class 'edgar.entity.filings.EntityFilings'>
+
+# Each filing in the collection is EntityFiling
+filing = filings[0]
+print(type(filing))  # <class 'edgar.entity.filings.EntityFiling'>
+```
+
+### With Form Filters
+
+```python
+# Get specific form types
+filings_10k = company.get_filings(form="10-K")
+filings_8k = company.get_filings(form="8-K")
+filings_multi = company.get_filings(form=["10-K", "10-Q"])
+```
+
+## Common Actions
+
+Quick reference for the most frequently used EntityFilings methods:
+
+### Get Individual Filings
+```python
+# Get most recent filing
+latest = filings.latest()
+
+# Get multiple recent filings
+latest_5 = filings.latest(5)
+
+# Get filing by index
+filing = filings[0]
+filing = filings.get_filing_at(5)
+```
+
+### Filter the Collection
+```python
+# Filter by form type
+annual_reports = filings.filter(form="10-K")
+
+# Filter by date
+recent = filings.filter(filing_date="2024-01-01:")
+
+# Exclude amendments
+originals_only = filings.filter(amendments=False)
+
+# Combined filters
+filtered = filings.filter(
+    form=["10-K", "10-Q"],
+    filing_date="2023-01-01:2023-12-31",
+    amendments=False
+)
+```
+
+### Navigate Pages
+```python
+# For large collections (multiple pages)
+next_page = filings.next()
+prev_page = filings.previous()
+```
+
+### Convert to DataFrame
+```python
+# Export to pandas
+df = filings.to_pandas()
+
+# Select specific columns
+df = filings.to_pandas('form', 'filing_date', 'accession_number')
+```
+
+### Select Subsets
+```python
+# Get first/last n filings
+first_10 = filings.head(10)
+last_10 = filings.tail(10)
+
+# Random sample
+sample = filings.sample(20)
+```
+
+## EntityFilings-Specific Features
+
+### Entity Context
+
+EntityFilings maintains the entity context throughout operations:
+
+```python
+filings = company.get_filings()
+
+# Access entity information
+print(filings.cik)           # Company CIK
+print(filings.company_name)  # Company name
+
+# Context preserved through operations
+filtered = filings.filter(form="10-K")
+print(filtered.cik)          # Same CIK
+print(filtered.company_name) # Same company name
+```
+
+### Returns EntityFiling Instances
+
+All methods that return individual filings return `EntityFiling` (not base `Filing`):
+
+```python
+# Get latest returns EntityFiling
+filing = filings.latest()
+print(type(filing))  # EntityFiling
+
+# Indexing returns EntityFiling
+filing = filings[0]
+print(type(filing))  # EntityFiling
+
+# Access EntityFiling-specific attributes
+print(filing.report_date)    # Period end date
+print(filing.items)          # 8-K items
+print(filing.is_xbrl)        # XBRL indicator
+```
+
+### Type Preservation
+
+All collection methods preserve the `EntityFilings` type:
+
+```python
+# filter() returns EntityFilings
+filtered = filings.filter(form="10-K")
+print(type(filtered))  # EntityFilings
+
+# head() returns EntityFilings
+first_10 = filings.head(10)
+print(type(first_10))  # EntityFilings
+
+# latest(n) with n>1 returns EntityFilings
+latest_5 = filings.latest(5)
+print(type(latest_5))  # EntityFilings
+```
+
+## Core Methods
+
+### latest(n=1)
+
+Get the most recent filing(s):
+
+```python
+# Get single latest filing (returns EntityFiling)
+latest = filings.latest()
+print(f"Most recent: {latest.form} on {latest.filing_date}")
+
+# Get multiple latest filings (returns EntityFilings)
+latest_5 = filings.latest(5)
+for filing in latest_5:
+    print(f"{filing.form}: {filing.filing_date}")
+```
+
+### filter()
+
+Filter filings by various criteria:
+
+```python
+# Filter by form type
+filings_10k = filings.filter(form="10-K")
+filings_8k = filings.filter(form="8-K")
+filings_annual = filings.filter(form=["10-K", "10-K/A"])
+
+# Filter by date
+recent = filings.filter(filing_date="2024-01-01:")
+date_range = filings.filter(filing_date="2023-01-01:2023-12-31")
+specific_date = filings.filter(filing_date="2024-03-15")
+
+# Exclude amendments
+no_amendments = filings.filter(amendments=False)
+
+# Filter by accession number
+specific = filings.filter(accession_number="0000320193-24-000123")
+
+# Combined filters
+filtered = filings.filter(
+    form="10-Q",
+    filing_date="2024-01-01:",
+    amendments=False
+)
+```
+
+**Note**: Unlike base `Filings.filter()`, `EntityFilings.filter()` doesn't support `cik` or `ticker` parameters since the collection is already scoped to a single entity.
+
+### head(n) / tail(n)
+
+Get first or last n filings:
+
+```python
+# Get first 10 filings
+first_10 = filings.head(10)
+
+# Get last 10 filings
+last_10 = filings.tail(10)
+
+# Both return EntityFilings collections
+print(type(first_10))  # EntityFilings
+print(type(last_10))   # EntityFilings
+```
+
+### sample(n)
+
+Get random sample of filings:
+
+```python
+# Get random sample of 20 filings
+sample = filings.sample(20)
+
+# Returns EntityFilings collection
+print(type(sample))  # EntityFilings
+```
+
+### Access by Index
+
+```python
+# Direct indexing
+first_filing = filings[0]
+tenth_filing = filings[9]
+
+# Explicit method
+filing = filings.get_filing_at(5)
+
+# All return EntityFiling instances
+```
+
+## Pagination
+
+For large filing collections, EntityFilings supports pagination:
+
+### next() / previous()
+
+```python
+# Display shows page info if multiple pages
+print(filings)
+# Shows: "Showing 1 to 50 of 250 filings. Page using ← prev() and next() →"
+
+# Navigate to next page
+next_page = filings.next()
+
+# Navigate to previous page
+prev_page = filings.previous()
+
+# Both return EntityFilings with new page of data
+```
+
+### Page Navigation Example
+
+```python
+# Start with first page
+current_page = company.get_filings()
+print(current_page)
+
+# Move through pages
+page_2 = current_page.next()
+page_3 = page_2.next()
+
+# Go back
+page_2_again = page_3.previous()
+
+# At end of pages
+last_page = current_page
+while True:
+    next_page = last_page.next()
+    if next_page is None:
+        break
+    last_page = next_page
+```
+
+## Data Conversion & Export
+
+### to_pandas()
+
+Convert to pandas DataFrame:
+
+```python
+# All columns
+df = filings.to_pandas()
+
+# Specific columns
+df = filings.to_pandas('form', 'filing_date', 'accession_number')
+
+# Shows entity-specific columns:
+# form, filing_date, reportDate, acceptanceDateTime, fileNumber,
+# items, size, primaryDocument, isXBRL, isInlineXBRL, etc.
+```
+
+### to_dict()
+
+Convert to dictionary:
+
+```python
+# Convert to dict
+data = filings.to_dict()
+
+# Limit rows
+data = filings.to_dict(max_rows=100)
+```
+
+### save() / save_parquet()
+
+Save to Parquet file:
+
+```python
+# Save as Parquet
+filings.save_parquet("company_filings.parquet")
+
+# Alternative
+filings.save("company_filings.parquet")
+```
+
+## Common Workflows
+
+### Get Most Recent Annual Report
+
+```python
+company = Company("AAPL")
+
+# Get all 10-K filings
+filings_10k = company.get_filings(form="10-K")
+
+# Get most recent
+latest_10k = filings_10k.latest()
+
+print(f"Latest 10-K: {latest_10k.filing_date}")
+print(f"Period: {latest_10k.report_date}")
+
+# Access XBRL if available
+if latest_10k.is_xbrl:
+    xbrl = latest_10k.xbrl()
+```
+
+### Analyze Quarterly Reports
+
+```python
+# Get all 10-Q filings
+filings_10q = company.get_filings(form="10-Q")
+
+# Get last 4 quarters
+last_4_quarters = filings_10q.latest(4)
+
+# Analyze each quarter
+for filing in last_4_quarters:
+    print(f"Quarter ending {filing.report_date}:")
+    print(f"  Filed: {filing.filing_date}")
+    print(f"  XBRL: {filing.is_xbrl}")
+```
+
+### Find 8-K Earnings Announcements
+
+```python
+# Get all 8-K filings
+filings_8k = company.get_filings(form="8-K")
+
+# Filter for earnings-related items
+earnings_filings = []
+for filing in filings_8k:
+    if filing.items and "2.02" in filing.items:
+        earnings_filings.append(filing)
+
+print(f"Found {len(earnings_filings)} earnings 8-Ks")
+
+# Show recent earnings dates
+for filing in earnings_filings[:5]:
+    print(f"{filing.filing_date}: Items {filing.items}")
+```
+
+### Track Amendment Activity
+
+```python
+# Get all 10-K filings including amendments
+all_10k = company.get_filings(form=["10-K", "10-K/A"])
+
+# Separate originals from amendments
+originals = all_10k.filter(amendments=False)
+amendments = all_10k.filter(form="10-K/A")
+
+print(f"Original 10-Ks: {len(originals)}")
+print(f"Amended 10-Ks: {len(amendments)}")
+
+# Show amendment details
+for amendment in amendments:
+    print(f"{amendment.filing_date}: {amendment.accession_no}")
+```
+
+### Export Filings to DataFrame
+
+```python
+# Get recent filings
+filings = company.get_filings(form=["10-K", "10-Q"])
+
+# Filter to recent year
+recent = filings.filter(filing_date="2024-01-01:")
+
+# Convert to DataFrame
+df = recent.to_pandas()
+
+# Analyze
+print(f"Total filings: {len(df)}")
+print(f"Forms: {df['form'].value_counts()}")
+print(f"XBRL filings: {df['isXBRL'].sum()}")
+
+# Export
+df.to_csv("aapl_recent_filings.csv", index=False)
+```
+
+### Compare XBRL Adoption
+
+```python
+# Get all annual reports
+filings_10k = company.get_filings(form="10-K")
+
+# Convert to DataFrame
+df = filings_10k.to_pandas()
+
+# Group by year
+df['year'] = pd.to_datetime(df['filing_date']).dt.year
+
+# Check XBRL adoption by year
+xbrl_by_year = df.groupby('year').agg({
+    'isXBRL': 'sum',
+    'isInlineXBRL': 'sum',
+    'form': 'count'
+}).rename(columns={'form': 'total'})
+
+print(xbrl_by_year)
+```
+
+## Display & Representation
+
+### Rich Display
+
+EntityFilings displays as a rich table with pagination info:
+
+```python
+print(filings)
+```
+
+Shows:
+- Table of filings with: #, Form, Description, Filing Date, Accession Number
+- Pagination info (if multiple pages): "Showing 1 to 50 of 250 filings"
+- Panel title: "Filings for [Company Name] [CIK]"
+- Panel subtitle: Date range of filings
+
+### Properties
+
+```python
+# Check if empty
+if filings.empty:
+    print("No filings found")
+
+# Get date range
+start, end = filings.date_range
+print(f"Filings from {start} to {end}")
+
+# Get summary
+print(filings.summary)
+```
+
+## Comparison: EntityFilings vs Base Filings
+
+### When You Get Each Type
+
+**EntityFilings** - From Company context:
+```python
+company = Company("AAPL")
+filings = company.get_filings()
+# Type: EntityFilings (with entity context)
+```
+
+**Base Filings** - From general search:
+```python
+from edgar import get_filings
+filings = get_filings(2024, 1, form="10-K")
+# Type: Filings (base class)
+```
+
+### Feature Comparison
+
+| Feature | Base Filings | EntityFilings |
+|---------|-------------|---------------|
+| Filter by form | ✅ | ✅ |
+| Filter by date | ✅ | ✅ |
+| Filter by CIK/ticker | ✅ | ❌ (already scoped to entity) |
+| Returns EntityFiling | ❌ | ✅ |
+| Entity context (CIK, name) | ❌ | ✅ |
+| Type preserved in operations | Filings | EntityFilings |
+| From Company.get_filings() | ❌ | ✅ |
+
+## Best Practices
+
+### 1. Use EntityFilings for Company Analysis
+
+When working with a specific company, always use `Company.get_filings()`:
+
+```python
+# Good - get EntityFilings with context
+company = Company("AAPL")
+filings = company.get_filings(form="10-K")
+
+# Less ideal - get base Filings, requires filtering
+from edgar import get_filings
+all_filings = get_filings(2024, 1, form="10-K")
+apple_filings = all_filings.filter(ticker="AAPL")
+```
+
+### 2. Check Empty Collections
+
+```python
+filings = company.get_filings(form="RARE-FORM")
+
+if filings.empty:
+    print("No filings found")
+else:
+    latest = filings.latest()
+```
+
+### 3. Use latest() for Single Most Recent
+
+```python
+# Get single filing
+filing = filings.latest()
+
+# Not this (gets collection of 1)
+filings_one = filings.head(1)
+filing = filings_one[0]
+```
+
+### 4. Preserve Type Through Operations
+
+```python
+# All these return EntityFilings
+filtered = filings.filter(form="10-K")
+recent = filtered.filter(filing_date="2024-01-01:")
+sample = recent.sample(10)
+
+# All maintain entity context
+print(sample.cik)           # Still accessible
+print(sample.company_name)  # Still accessible
+```
+
+## Error Handling
+
+### Empty Collections
+
+```python
+filings = company.get_filings(form="NONEXISTENT")
+
+if filings.empty:
+    print("No filings found")
+else:
+    # Safe to access
+    latest = filings.latest()
+```
+
+### Pagination at Boundaries
+
+```python
+# At end of pages
+last_page = filings
+while True:
+    next_page = last_page.next()
+    if next_page is None:
+        print("Reached end of filings")
+        break
+    last_page = next_page
+```
+
+### Invalid Index
+
+```python
+# Check length first
+if len(filings) > 5:
+    filing = filings[5]
+else:
+    print("Collection has fewer than 6 filings")
+```
+
+## Performance Considerations
+
+### Efficient Filtering
+
+Filter early to reduce data size:
+
+```python
+# Good: filter first, then process
+recent_10k = company.get_filings(form="10-K", filing_date="2023-01-01:")
+for filing in recent_10k:
+    process(filing)
+
+# Less efficient: get all, then filter in Python
+all_filings = company.get_filings()
+for filing in all_filings:
+    if filing.form == "10-K" and filing.filing_date >= "2023-01-01":
+        process(filing)
+```
+
+### Use Pagination
+
+For very large collections, use pagination:
+
+```python
+# Process page by page
+current_page = company.get_filings()
+while current_page:
+    # Process current page
+    for filing in current_page:
+        process(filing)
+
+    # Move to next page
+    current_page = current_page.next()
+```
+
+### DataFrame Conversion
+
+Only convert to pandas when needed:
+
+```python
+# Good: operate on EntityFilings directly
+filings_10k = filings.filter(form="10-K")
+latest = filings_10k.latest()
+
+# Less efficient: convert to DataFrame first
+df = filings.to_pandas()
+df_10k = df[df['form'] == '10-K']
+# Now you've lost EntityFiling functionality
+```
+
+## Integration with Company
+
+EntityFilings is the primary interface between Company and Filing objects:
+
+```python
+company = Company("AAPL")
+
+# Company.get_filings() returns EntityFilings
+filings = company.get_filings()
+
+# EntityFilings contains EntityFiling instances
+filing = filings[0]
+
+# EntityFiling knows its entity
+entity = filing.get_entity()
+# entity is the same Company object
+```
+
+This creates a seamless workflow for entity-focused analysis while maintaining proper type separation and functionality at each level.
--- a/venv/lib/python3.10/site-packages/edgar/entity/enhanced_statement.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/enhanced_statement.py
--- a/venv/lib/python3.10/site-packages/edgar/entity/entity_facts.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/entity_facts.py
--- a/venv/lib/python3.10/site-packages/edgar/entity/filings.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/filings.py
@@ -0,0 +1,454 @@
+"""
+Filings-related classes for the Entity package.
+
+This module contains classes related to SEC filings for entities, including
+collections of filings and filing facts.
+"""
+from typing import List, Union
+
+import pandas as pd
+import pyarrow as pa
+import pyarrow.compute as pc
+from rich.box import SIMPLE
+from rich.console import Group
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+from edgar._filings import Filing, Filings, PagingState
+from edgar.core import IntString, log
+from edgar.formatting import accession_number_text, display_size
+from edgar.reference.forms import describe_form
+from edgar.richtools import Docs, df_to_rich_table, repr_rich
+
+__all__ = [
+    'EntityFiling',
+    'EntityFilings',
+    'EntityFacts',
+    'empty_company_filings'
+]
+
+
+class EntityFiling(Filing):
+    """
+    Represents a single SEC filing for an entity.
+
+    This extends the base Filing class with additional information
+    and methods specific to SEC entities.
+
+    Attributes:
+        items (str): Filing items from SEC metadata. For 8-K filings, this indicates
+            which items are included (e.g., "2.02,9.01").
+
+            **Data Source**: This value comes from SEC filing metadata, not from parsing
+            the filing document itself.
+
+            **Legacy SGML Limitation**: For legacy SGML filings (1999-2001), the SEC's
+            historical metadata may be incorrect or incomplete. Modern XML filings (2005+)
+            have accurate metadata.
+
+            **Workaround for Legacy Filings**: For accurate item extraction from legacy
+            SGML 8-K filings, parse the filing text directly using regex patterns.
+            See GitHub Issue #462 for example code.
+    """
+
+    def __init__(self,
+                 cik: int,
+                 company: str,
+                 form: str,
+                 filing_date: str,
+                 report_date: str,
+                 acceptance_datetime: str,
+                 accession_no: str,
+                 file_number: str,
+                 items: str,
+                 size: int,
+                 primary_document: str,
+                 primary_doc_description: str,
+                 is_xbrl: bool,
+                 is_inline_xbrl: bool):
+        super().__init__(cik=cik, company=company, form=form, filing_date=filing_date, accession_no=accession_no)
+        self.report_date = report_date
+        self.acceptance_datetime = acceptance_datetime
+        self.file_number: str = file_number
+        self.items: str = items  # See class docstring for important notes on data source and limitations
+        self.size: int = size
+        self.primary_document: str = primary_document
+        self.primary_doc_description: str = primary_doc_description
+        self.is_xbrl: bool = is_xbrl
+        self.is_inline_xbrl: bool = is_inline_xbrl
+
+    def related_filings(self):
+        """Get all the filings related to this one by file number."""
+        return self.get_entity().get_filings(file_number=self.file_number, sort_by="filing_date")
+
+    def __str__(self):
+        return (f"Filing(company='{self.company}', cik={self.cik}, form='{self.form}', "
+                f"filing_date='{self.filing_date}', accession_no='{self.accession_no}')"
+                )
+
+
+class EntityFilings(Filings):
+    """
+    Collection of SEC filings for an entity.
+
+    This extends the base Filings class with additional methods and properties
+    specific to entity filings.
+    """
+
+    def __init__(self,
+                 data: pa.Table,
+                 cik: int,
+                 company_name: str,
+                 original_state: PagingState = None):
+        super().__init__(data, original_state=original_state)
+        self.cik = cik
+        self.company_name = company_name
+
+    @property
+    def docs(self):
+        return Docs(self)
+
+    def __getitem__(self, item):
+        return self.get_filing_at(item)
+
+    @property
+    def empty(self):
+        return len(self.data) == 0
+
+    def get_filing_at(self, item: int):
+        """Get the filing at the specified index."""
+        return EntityFiling(
+            cik=self.cik,
+            company=self.company_name,
+            form=self.data['form'][item].as_py(),
+            filing_date=self.data['filing_date'][item].as_py(),
+            report_date=self.data['reportDate'][item].as_py(),
+            acceptance_datetime=self.data['acceptanceDateTime'][item].as_py(),
+            accession_no=self.data['accession_number'][item].as_py(),
+            file_number=self.data['fileNumber'][item].as_py(),
+            items=self.data['items'][item].as_py(),
+            size=self.data['size'][item].as_py(),
+            primary_document=self.data['primaryDocument'][item].as_py(),
+            primary_doc_description=self.data['primaryDocDescription'][item].as_py(),
+            is_xbrl=self.data['isXBRL'][item].as_py(),
+            is_inline_xbrl=self.data['isInlineXBRL'][item].as_py()
+        )
+
+    def filter(self,
+               form: Union[str, List[str]] = None,
+               amendments: bool = None,
+               filing_date: str = None,
+               date: str = None,
+               cik: Union[int, str, List[Union[int, str]]] = None,
+               ticker: Union[str, List[str]] = None,
+               accession_number: Union[str, List[str]] = None):
+        """
+        Filter the filings based on various criteria.
+
+        Args:
+            form: Filter by form type
+            amendments: Include amendments
+            filing_date: Filter by filing date
+            date: Alias for filing_date
+            cik: Filter by CIK
+            ticker: Filter by ticker
+            accession_number: Filter by accession number
+
+        Returns:
+            Filtered EntityFilings
+        """
+        # The super filter returns Filings. We want EntityFilings
+        res = super().filter(form=form,
+                             amendments=amendments,
+                             filing_date=filing_date,
+                             date=date,
+                             cik=cik,
+                             ticker=ticker,
+                             accession_number=accession_number)
+        return EntityFilings(data=res.data, cik=self.cik, company_name=self.company_name)
+
+    def latest(self, n: int = 1):
+        """
+        Get the latest n filings.
+
+        Args:
+            n: Number of filings to return
+
+        Returns:
+            Latest filing(s) - single filing if n=1, otherwise EntityFilings
+        """
+        sort_indices = pc.sort_indices(self.data, sort_keys=[("filing_date", "descending")])
+        sort_indices_top = sort_indices[:min(n, len(sort_indices))]
+        latest_filing_index = pc.take(data=self.data, indices=sort_indices_top)
+        filings = EntityFilings(latest_filing_index,
+                               cik=self.cik,
+                               company_name=self.company_name)
+        if filings.empty:
+            return None
+        if len(filings) == 1:
+            return filings[0]
+        else:
+            return filings
+
+    def head(self, n: int):
+        """
+        Get the first n filings.
+
+        Args:
+            n: Number of filings to return
+
+        Returns:
+            EntityFilings containing the first n filings
+        """
+        selection = self._head(n)
+        return EntityFilings(data=selection, cik=self.cik, company_name=self.company_name)
+
+    def tail(self, n: int):
+        """
+        Get the last n filings.
+
+        Args:
+            n: Number of filings to return
+
+        Returns:
+            EntityFilings containing the last n filings
+        """
+        selection = self._tail(n)
+        return EntityFilings(data=selection, cik=self.cik, company_name=self.company_name)
+
+    def sample(self, n: int):
+        """
+        Get a random sample of n filings.
+
+        Args:
+            n: Number of filings to sample
+
+        Returns:
+            EntityFilings containing n random filings
+        """
+        selection = self._sample(n)
+        return EntityFilings(data=selection, cik=self.cik, company_name=self.company_name)
+
+
+    @staticmethod
+    def summarize(data) -> pd.DataFrame:
+        """
+        Summarize filing data as a pandas DataFrame.
+
+        Args:
+            data: Filing data to summarize
+
+        Returns:
+            DataFrame with summarized data
+        """
+        return (data
+                .assign(size=lambda df: df['size'].apply(display_size),
+                        isXBRL=lambda df: df.isXBRL.map({'1': "\u2713", 1: "\u2713"}).fillna(""),
+                        )
+                .filter(["form", "filing_date", "accession_number", "isXBRL"])
+                .rename(columns={"filing_date": "filed", "isXBRL": "xbrl"})
+                )
+
+    def next(self):
+        """
+        Show the next page of filings.
+
+        Returns:
+            EntityFilings with the next page of data, or None if at the end
+        """
+        data_page = self.data_pager.next()
+        if data_page is None:
+            log.warning("End of data .. use prev() \u2190 ")
+            return None
+        start_index, _ = self.data_pager._current_range
+        filings_state = PagingState(page_start=start_index, num_records=len(self))
+        return EntityFilings(data_page,
+                            cik=self.cik,
+                            company_name=self.company_name,
+                            original_state=filings_state)
+
+    def previous(self):
+        """
+        Show the previous page of filings.
+
+        Returns:
+            EntityFilings with the previous page of data, or None if at the beginning
+        """
+        data_page = self.data_pager.previous()
+        if data_page is None:
+            log.warning(" No previous data .. use next() \u2192 ")
+            return None
+        start_index, _ = self.data_pager._current_range
+        filings_state = PagingState(page_start=start_index, num_records=len(self))
+        return EntityFilings(data_page,
+                            cik=self.cik,
+                            company_name=self.company_name,
+                            original_state=filings_state)
+
+    def __repr__(self):
+        return repr_rich(self.__rich__())
+
+    def __rich__(self):
+        # Create table with appropriate columns and styling
+        table = Table(
+            show_header=True,
+            header_style="bold",
+            show_edge=True,
+            expand=False,
+            padding=(0, 1),
+            box=SIMPLE,
+            row_styles=["", "bold"]
+        )
+
+        # Add columns with specific styling and alignment
+        table.add_column("#", style="dim", justify="right")
+        table.add_column("Form", width=10, style="bold yellow")
+        table.add_column("Description", width=60, style="bold blue"),
+        table.add_column("Filing Date", width=11)
+        table.add_column("Accession Number", width=20)
+
+        # Get current page from data pager
+        current_page = self.data_pager.current()
+
+        # Calculate start index for proper indexing
+        start_idx = self._original_state.page_start if self._original_state else self.data_pager.start_index
+
+        # Iterate through rows in current page
+        for i in range(len(current_page)):
+            form = current_page['form'][i].as_py()
+            description = describe_form(current_page['form'][i].as_py(), prepend_form=False)
+
+            row = [
+                str(start_idx + i),
+                form,
+                description,
+                str(current_page['filing_date'][i].as_py()),
+                accession_number_text(current_page['accession_number'][i].as_py())
+            ]
+            table.add_row(*row)
+
+        # Show paging information only if there are multiple pages
+        elements = [table]
+
+        if self.data_pager.total_pages > 1:
+            total_filings = self._original_state.num_records
+            current_count = len(current_page)
+            start_num = start_idx + 1
+            end_num = start_idx + current_count
+
+            page_info = Text.assemble(
+                ("Showing ", "dim"),
+                (f"{start_num:,}", "bold red"),
+                (" to ", "dim"),
+                (f"{end_num:,}", "bold red"),
+                (" of ", "dim"),
+                (f"{total_filings:,}", "bold"),
+                (" filings.", "dim"),
+                (" Page using ", "dim"),
+                ("← prev()", "bold gray54"),
+                (" and ", "dim"),
+                ("next() →", "bold gray54")
+            )
+
+            elements.extend([Text("\n"), page_info])
+
+        # Get the title
+        title = Text.assemble(
+            ("Filings for ", "bold"),
+            (f"{self.company_name}", "bold green"),
+            (" [", "dim"),
+            (f"{self.cik}", "bold yellow"),
+            ("]", "dim")
+        )
+
+        # Get the subtitle
+        start_date, end_date = self.date_range
+        date_range_text = f"Company filings between {start_date:%Y-%m-%d} and {end_date:%Y-%m-%d}" if start_date else "Company filings"
+        subtitle = Text.assemble(
+            (date_range_text, "dim"),
+            " • ",
+            ("filings.docs", "cyan dim"),
+            (" for usage guide", "dim")
+        )
+        return Panel(
+            Group(*elements),
+            title=title,
+            subtitle=subtitle,
+            border_style="bold grey54",
+            expand=False
+        )
+
+
+class EntityFacts:
+    """
+    Contains structured facts data about an entity from XBRL filings.
+    """
+
+    def __init__(self,
+                 cik: int,
+                 name: str,
+                 facts: pa.Table,
+                 fact_meta: pd.DataFrame):
+        self.cik: int = cik
+        self.name: str = name
+        self.facts: pa.Table = facts
+        self.fact_meta: pd.DataFrame = fact_meta
+
+    def to_pandas(self) -> pd.DataFrame:
+        """Convert facts to a pandas DataFrame."""
+        return self.facts.to_pandas()
+
+    def __len__(self):
+        return len(self.facts)
+
+    def num_facts(self) -> int:
+        """Get the number of facts."""
+        return len(self.fact_meta)
+
+    def __rich__(self):
+        return Panel(
+            Group(
+                df_to_rich_table(self.facts)
+            ), title=f"Company Facts({self.name} [{self.cik}] {len(self.facts):,} total facts)"
+        )
+
+    def __repr__(self):
+        return repr_rich(self.__rich__())
+
+COMPANY_FILINGS_SCHEMA = schema = pa.schema([
+            ('accession_number', pa.string()),
+            ('filing_date', pa.date32()),
+            ('reportDate', pa.string()),
+            ('acceptanceDateTime', pa.timestamp('us')),  # Changed to timestamp
+            ('act', pa.string()),
+            ('form', pa.string()),
+            ('fileNumber', pa.string()),
+            ('items', pa.string()),
+            ('size', pa.string()),
+            ('isXBRL', pa.string()),
+            ('isInlineXBRL', pa.string()),
+            ('primaryDocument', pa.string()),
+            ('primaryDocDescription', pa.string())
+        ])
+
+def empty_company_filings(cik:IntString, company_name:str):
+    """
+    Create an empty filings container.
+
+    Args:
+        cik: The CIK number
+        company_name: The company name
+
+    Returns:
+        EntityFilings: An empty filings container
+    """
+    table = pa.Table.from_arrays([[] for _ in range(13)], schema=COMPANY_FILINGS_SCHEMA)
+    return EntityFilings(table, cik=cik, company_name=company_name)
+
+
+# For backward compatibility
+CompanyFiling = EntityFiling
+CompanyFilings = EntityFilings
+CompanyFacts = EntityFacts
+
--- a/venv/lib/python3.10/site-packages/edgar/entity/mappings_loader.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/mappings_loader.py
@@ -0,0 +1,137 @@
+"""
+Loader for learned statement mappings and canonical structures.
+
+This module handles loading and caching of learned mappings from the
+structural learning process.
+"""
+
+import json
+import logging
+from functools import lru_cache
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+log = logging.getLogger(__name__)
+
+
+@lru_cache(maxsize=1)
+def load_learned_mappings() -> Dict[str, Dict[str, Any]]:
+    """
+    Load learned statement mappings from package data.
+
+    Returns:
+        Dictionary of concept -> mapping info
+    """
+    try:
+        # Get the data file path
+        data_dir = Path(__file__).parent / 'data'
+        mappings_file = data_dir / 'statement_mappings_v1.json'
+
+        if not mappings_file.exists():
+            log.warning("Learned mappings file not found: %s", mappings_file)
+            return {}
+
+        with open(mappings_file, 'r') as f:
+            data = json.load(f)
+
+        mappings = data.get('mappings', {})
+        metadata = data.get('metadata', {})
+
+        log.info("Loaded %d learned concept mappings (version: %s)", len(mappings), metadata.get('version', 'unknown'))
+
+        return mappings
+
+    except Exception as e:
+        log.error("Error loading learned mappings: %s", e)
+        return {}
+
+
+@lru_cache(maxsize=1)
+def load_canonical_structures() -> Dict[str, Any]:
+    """
+    Load canonical statement structures.
+
+    Returns:
+        Dictionary of statement_type -> canonical structure
+    """
+    try:
+        data_dir = Path(__file__).parent / 'data'
+        structures_file = data_dir / 'learned_mappings.json'
+
+        if not structures_file.exists():
+            log.warning("Canonical structures file not found: %s", structures_file)
+            return {}
+
+        with open(structures_file, 'r') as f:
+            structures = json.load(f)
+
+        log.info("Loaded canonical structures for %d statement types", len(structures))
+        return structures
+
+    except Exception as e:
+        log.error("Error loading canonical structures: %s", e)
+        return {}
+
+
+@lru_cache(maxsize=1)
+def load_virtual_trees() -> Dict[str, Any]:
+    """
+    Load virtual presentation trees.
+
+    Returns:
+        Dictionary of statement_type -> virtual tree
+    """
+    try:
+        data_dir = Path(__file__).parent / 'data'
+        trees_file = data_dir / 'virtual_trees.json'
+
+        if not trees_file.exists():
+            log.warning("Virtual trees file not found: %s", trees_file)
+            return {}
+
+        with open(trees_file, 'r') as f:
+            trees = json.load(f)
+
+        log.info("Loaded virtual trees for %d statement types", len(trees))
+        return trees
+
+    except Exception as e:
+        log.error("Error loading virtual trees: %s", e)
+        return {}
+
+
+def get_concept_mapping(concept: str) -> Optional[Dict[str, Any]]:
+    """
+    Get mapping information for a specific concept.
+
+    Args:
+        concept: Concept name (without namespace)
+
+    Returns:
+        Mapping info dict or None if not found
+    """
+    mappings = load_learned_mappings()
+    return mappings.get(concept)
+
+
+def get_statement_concepts(statement_type: str, 
+                         min_confidence: float = 0.5) -> Dict[str, Dict[str, Any]]:
+    """
+    Get all concepts for a specific statement type.
+
+    Args:
+        statement_type: Type of statement (BalanceSheet, IncomeStatement, etc.)
+        min_confidence: Minimum confidence threshold
+
+    Returns:
+        Dictionary of concept -> mapping info
+    """
+    mappings = load_learned_mappings()
+
+    result = {}
+    for concept, info in mappings.items():
+        if (info.get('statement_type') == statement_type and
+            info.get('confidence', 0) >= min_confidence):
+            result[concept] = info
+
+    return result
--- a/venv/lib/python3.10/site-packages/edgar/entity/models.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/models.py
@@ -0,0 +1,262 @@
+"""
+Data models for the enhanced Entity Facts API.
+
+This module provides the unified data models for financial facts,
+optimized for both traditional analysis and AI consumption.
+"""
+
+from dataclasses import dataclass, field
+from datetime import date
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Union
+
+
+class DataQuality(Enum):
+    """Data quality indicators for facts"""
+    HIGH = "high"          # Direct from XBRL, validated
+    MEDIUM = "medium"      # Derived or calculated
+    LOW = "low"           # Estimated or inferred
+
+
+@dataclass
+class FinancialFact:
+    """
+    Unified fact representation optimized for both traditional analysis and AI consumption.
+
+    This class represents a single financial fact with rich contextual information,
+    quality indicators, and AI-ready metadata.
+    """
+
+    # Core identification
+    concept: str                    # Standardized concept (e.g., 'us-gaap:Revenue')
+    taxonomy: str                   # Taxonomy namespace (us-gaap, ifrs, etc.)
+    label: str                      # Human-readable label
+
+    # Values with proper typing
+    value: Union[float, int, str]   # The actual value
+    numeric_value: Optional[float]  # Numeric representation for calculations
+    unit: str                       # Unit of measure (USD, shares, etc.)
+    scale: Optional[int] = None     # Scale factor (thousands=1000, millions=1000000)
+
+    # Temporal context
+    period_start: Optional[date] = None
+    period_end: date = None
+    period_type: Literal['instant', 'duration'] = 'instant'
+    fiscal_year: int = 0
+    fiscal_period: str = ''         # FY, Q1, Q2, Q3, Q4
+
+    # Filing context
+    filing_date: date = None
+    form_type: str = ''             # 10-K, 10-Q, 8-K, etc.
+    accession: str = ''             # SEC accession number
+
+    # Quality and provenance
+    data_quality: DataQuality = DataQuality.MEDIUM
+    is_audited: bool = False
+    is_restated: bool = False
+    is_estimated: bool = False
+    confidence_score: float = 0.8   # 0.0 to 1.0
+
+    # AI-ready context
+    semantic_tags: List[str] = field(default_factory=list)  # ['revenue', 'recurring', 'operating']
+    business_context: str = ''      # "Product revenue from iPhone sales"
+    calculation_context: Optional[str] = None  # "Derived from segment data"
+
+    # Optional XBRL specifics
+    context_ref: Optional[str] = None
+    dimensions: Dict[str, str] = field(default_factory=dict)
+    statement_type: Optional[str] = None
+    line_item_sequence: Optional[int] = None
+
+    # Structural metadata (from learned mappings)
+    depth: Optional[int] = None            # Hierarchy depth in statement
+    parent_concept: Optional[str] = None   # Parent concept in hierarchy
+    section: Optional[str] = None          # Statement section (e.g., "Current Assets")
+    is_abstract: bool = False              # Abstract/header item
+    is_total: bool = False                 # Total/sum item
+    presentation_order: Optional[float] = None  # Order in presentation
+
+    def to_llm_context(self) -> Dict[str, Any]:
+        """
+        Generate rich context for LLM consumption.
+
+        Returns a dictionary with formatted values and contextual information
+        optimized for language model understanding.
+        """
+        # Format the value appropriately
+        if self.numeric_value is not None:
+            if self.unit.upper() in ['USD', 'EUR', 'GBP', 'JPY']:
+                # Currency formatting
+                formatted_value = f"{self.numeric_value:,.0f}"
+                if self.scale:
+                    if self.scale == 1000:
+                        formatted_value += " thousand"
+                    elif self.scale == 1000000:
+                        formatted_value += " million"
+                    elif self.scale == 1000000000:
+                        formatted_value += " billion"
+            else:
+                formatted_value = f"{self.numeric_value:,.2f}"
+        else:
+            formatted_value = str(self.value)
+
+        # Format the period
+        if self.period_type == 'instant':
+            period_desc = f"as of {self.period_end}"
+        else:
+            period_desc = f"for {self.fiscal_period} {self.fiscal_year}"
+            if self.period_start and self.period_end:
+                period_desc += f" ({self.period_start} to {self.period_end})"
+
+        return {
+            "concept": self.label,
+            "value": formatted_value,
+            "unit": self.unit,
+            "period": period_desc,
+            "context": self.business_context,
+            "quality": self.data_quality.value,
+            "confidence": self.confidence_score,
+            "tags": self.semantic_tags,
+            "source": f"{self.form_type} filed {self.filing_date}" if self.filing_date else "Unknown source",
+            "is_audited": self.is_audited,
+            "is_estimated": self.is_estimated,
+            "dimensions": self.dimensions if self.dimensions else None
+        }
+
+    def get_display_period_key(self) -> str:
+        """
+        Generate a display-friendly period key based on actual period dates.
+
+        This method creates period keys like "Q1 2024" based on the actual period 
+        covered by the data, not the filing year. It uses the period_end date to 
+        determine the calendar year and quarter.
+
+        Returns:
+            A period key in format like "Q1 2024", "FY 2023", etc.
+        """
+        if not self.period_end:
+            # Fallback to fiscal year/period if no period_end
+            return f"{self.fiscal_period} {self.fiscal_year}"
+
+        # Extract calendar year from period_end
+        calendar_year = self.period_end.year
+
+        # For fiscal years, use "FY" prefix
+        if self.fiscal_period == 'FY':
+            return f"FY {calendar_year}"
+
+        # For quarters, determine the calendar quarter from the end date
+        if self.fiscal_period in ['Q1', 'Q2', 'Q3', 'Q4']:
+            end_month = self.period_end.month
+
+            # Map end month to calendar quarter
+            if end_month in [1, 2, 3]:
+                quarter = 'Q1'
+            elif end_month in [4, 5, 6]:
+                quarter = 'Q2'
+            elif end_month in [7, 8, 9]:
+                quarter = 'Q3'
+            else:  # 10, 11, 12
+                quarter = 'Q4'
+
+            return f"{quarter} {calendar_year}"
+
+        # For other periods, use the fiscal period with calendar year
+        return f"{self.fiscal_period} {calendar_year}"
+
+    def get_formatted_value(self) -> str:
+        """
+        Format the numeric value for display, avoiding scientific notation.
+
+        Returns:
+            Formatted string representation of the value
+        """
+        if self.numeric_value is None:
+            return str(self.value)
+
+        # For currency values
+        if self.unit.upper() in ['USD', 'EUR', 'GBP', 'JPY', 'CAD', 'CHF']:
+            # Round to nearest whole number for large values
+            if abs(self.numeric_value) >= 1000:
+                return f"{self.numeric_value:,.0f}"
+            else:
+                return f"{self.numeric_value:,.2f}"
+
+        # For share counts
+        elif self.unit.lower() in ['shares', 'share']:
+            return f"{self.numeric_value:,.0f}"
+
+        # For percentages and ratios
+        elif self.unit.lower() in ['pure', 'percent', '%']:
+            return f"{self.numeric_value:.2f}"
+
+        # Default formatting
+        else:
+            if abs(self.numeric_value) >= 1000:
+                return f"{self.numeric_value:,.0f}"
+            else:
+                return f"{self.numeric_value:,.2f}"
+
+    def __repr__(self) -> str:
+        """String representation focusing on key information"""
+        value_str = f"{self.numeric_value:,.0f}" if self.numeric_value else str(self.value)
+        return f"FinancialFact({self.concept}={value_str} {self.unit}, {self.fiscal_period} {self.fiscal_year})"
+
+
+@dataclass
+class ConceptMetadata:
+    """
+    Metadata about a financial concept.
+
+    This provides additional context about what a concept represents,
+    how it's calculated, and how it relates to other concepts.
+    """
+    concept: str                    # The concept identifier
+    label: str                      # Primary display label
+    definition: str                 # Detailed definition
+
+    # Concept relationships
+    parent_concepts: List[str] = field(default_factory=list)
+    child_concepts: List[str] = field(default_factory=list)
+    calculation_components: List[str] = field(default_factory=list)
+
+    # Classification
+    statement_type: Optional[str] = None  # BalanceSheet, IncomeStatement, etc.
+    is_monetary: bool = True
+    is_duration: bool = True        # True for flow concepts, False for stock concepts
+    normal_balance: Optional[Literal['debit', 'credit']] = None
+
+    # Usage guidance
+    common_names: List[str] = field(default_factory=list)  # Alternative labels
+    usage_notes: str = ''           # Special considerations
+    typical_scale: Optional[int] = None  # Common scale factor
+
+
+@dataclass
+class FactCollection:
+    """
+    A collection of related facts, typically for a specific time period or statement.
+
+    This is used internally to group facts for efficient processing and analysis.
+    """
+    facts: List[FinancialFact]
+    period_key: str                 # e.g., "2024-Q4", "2024-FY"
+    statement_type: Optional[str] = None
+
+    def get_fact(self, concept: str) -> Optional[FinancialFact]:
+        """Get a specific fact by concept"""
+        for fact in self.facts:
+            if fact.concept == concept or fact.label == concept:
+                return fact
+        return None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary keyed by concept"""
+        return {
+            fact.concept: {
+                'value': fact.numeric_value or fact.value,
+                'label': fact.label,
+                'unit': fact.unit
+            }
+            for fact in self.facts
+        }
--- a/venv/lib/python3.10/site-packages/edgar/entity/parser.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/parser.py
@@ -0,0 +1,382 @@
+"""
+Parser for converting SEC API data to the new Entity Facts format.
+
+This module handles the conversion of raw SEC company facts JSON data
+into the new unified FinancialFact model.
+"""
+
+import logging
+from datetime import date, datetime
+from typing import Any, Dict, List, Optional
+
+from edgar.entity.entity_facts import EntityFacts
+from edgar.entity.mappings_loader import load_learned_mappings
+from edgar.entity.models import DataQuality, FinancialFact
+
+log = logging.getLogger(__name__)
+
+
+class EntityFactsParser:
+    """
+    Parser for converting SEC company facts to EntityFacts.
+
+    This class handles the transformation of raw SEC API data into
+    the new unified fact model with proper typing and AI-ready metadata.
+    """
+
+    # Concept mapping for common financial statement items
+    STATEMENT_MAPPING = {
+        # Income Statement
+        'Revenue': 'IncomeStatement',
+        'Revenues': 'IncomeStatement',  # Fix for Issue #438 - ensure us-gaap:Revenues maps properly
+        'RevenueFromContractWithCustomerExcludingAssessedTax': 'IncomeStatement',
+        'SalesRevenueNet': 'IncomeStatement',
+        'CostOfRevenue': 'IncomeStatement',
+        'GrossProfit': 'IncomeStatement',
+        'OperatingExpenses': 'IncomeStatement',
+        'OperatingIncomeLoss': 'IncomeStatement',
+        'NetIncomeLoss': 'IncomeStatement',
+        'EarningsPerShareDiluted': 'IncomeStatement',
+
+        # Balance Sheet
+        'Assets': 'BalanceSheet',
+        'AssetsCurrent': 'BalanceSheet',
+        'CurrentAssets': 'BalanceSheet',
+        'AssetsNoncurrent': 'BalanceSheet',
+        'Liabilities': 'BalanceSheet',
+        'LiabilitiesCurrent': 'BalanceSheet',
+        'CurrentLiabilities': 'BalanceSheet',
+        'LiabilitiesNoncurrent': 'BalanceSheet',
+        'StockholdersEquity': 'BalanceSheet',
+        'CashAndCashEquivalentsAtCarryingValue': 'BalanceSheet',
+
+        # Cash Flow
+        'NetCashProvidedByUsedInOperatingActivities': 'CashFlow',
+        'NetCashProvidedByUsedInInvestingActivities': 'CashFlow',
+        'NetCashProvidedByUsedInFinancingActivities': 'CashFlow',
+        'CashAndCashEquivalentsPeriodIncreaseDecrease': 'CashFlow'
+    }
+
+    # Semantic tags for concepts
+    SEMANTIC_TAGS = {
+        'Revenue': ['revenue', 'sales', 'operating'],
+        'NetIncomeLoss': ['profit', 'earnings', 'bottom_line'],
+        'Assets': ['assets', 'resources', 'balance_sheet'],
+        'CashAndCashEquivalentsAtCarryingValue': ['cash', 'liquidity', 'current_assets']
+    }
+
+    @classmethod
+    def parse_company_facts(cls, json_data: Dict[str, Any]) -> Optional[EntityFacts]:
+        """
+        Parse SEC company facts JSON into EntityFacts.
+
+        Args:
+            json_data: Raw JSON from SEC API
+
+        Returns:
+            EntityFacts object or None if parsing fails
+        """
+        try:
+            cik = int(json_data.get('cik', 0))
+            entity_name = json_data.get('entityName', 'Unknown')
+
+            facts = []
+
+            # Process facts from different taxonomies
+            facts_data = json_data.get('facts', {})
+
+            for taxonomy, taxonomy_facts in facts_data.items():
+                for concept, concept_data in taxonomy_facts.items():
+                    # Process units for this concept
+                    units = concept_data.get('units', {})
+                    label = concept_data.get('label', concept)
+                    description = concept_data.get('description', '')
+
+                    for unit, unit_facts in units.items():
+                        for fact_data in unit_facts:
+                            fact = cls._parse_single_fact(
+                                concept=concept,
+                                taxonomy=taxonomy,
+                                label=label,
+                                description=description,
+                                unit=unit,
+                                fact_data=fact_data
+                            )
+                            if fact:
+                                facts.append(fact)
+
+            if not facts:
+                log.warning("No facts found for CIK %s", cik)
+                return None
+
+            return EntityFacts(cik=cik, name=entity_name, facts=facts)
+
+        except Exception as e:
+            log.error("Error parsing company facts: %s", e)
+            return None
+
+    @classmethod
+    def _parse_single_fact(cls, 
+                          concept: str,
+                          taxonomy: str,
+                          label: str,
+                          description: str,
+                          unit: str,
+                          fact_data: Dict[str, Any]) -> Optional[FinancialFact]:
+        """
+        Parse a single fact from SEC data.
+
+        Args:
+            concept: Concept identifier
+            taxonomy: Taxonomy namespace
+            label: Human-readable label
+            description: Concept description
+            unit: Unit of measure
+            fact_data: Raw fact data
+
+        Returns:
+            FinancialFact or None if parsing fails
+        """
+
+        # Extract core values
+        value = fact_data.get('val')
+        if value is None:
+            return None
+
+        # Parse dates
+        period_end = cls._parse_date(fact_data.get('end'))
+        period_start = cls._parse_date(fact_data.get('start'))
+        filing_date = cls._parse_date(fact_data.get('filed'))
+
+        # Determine period type
+        if period_start:
+            period_type = 'duration'
+        else:
+            period_type = 'instant'
+
+        # Parse fiscal period info
+        fiscal_year = cls._parse_fiscal_year(fact_data.get('fy'))
+        fiscal_period = fact_data.get('fp', '')
+
+        # Determine numeric value
+        numeric_value = None
+        if isinstance(value, (int, float)):
+            numeric_value = float(value)
+        elif isinstance(value, str) and value.replace('-', '').replace('.', '').isdigit():
+            try:
+                numeric_value = float(value)
+            except ValueError:
+                pass
+
+        # Determine statement type
+        statement_type = cls._determine_statement_type(concept)
+
+        # Get semantic tags
+        semantic_tags = cls._get_semantic_tags(concept)
+
+        # Get structural metadata from learned mappings
+        structural_info = cls._get_structural_info(concept)
+
+        # Determine data quality
+        data_quality = cls._assess_data_quality(fact_data, fiscal_period)
+
+        # Create business context
+        business_context = cls._generate_business_context(label, description, unit)
+
+        # Clean unit representation
+        clean_unit = cls._clean_unit(unit)
+
+        # Determine scale
+        scale = cls._determine_scale(unit)
+
+        return FinancialFact(
+                concept=f"{taxonomy}:{concept}",
+                taxonomy=taxonomy,
+                label=label,
+                value=value,
+                numeric_value=numeric_value,
+                unit=clean_unit,
+                scale=scale,
+                period_start=period_start,
+                period_end=period_end,
+                period_type=period_type,
+                fiscal_year=fiscal_year,
+                fiscal_period=fiscal_period,
+                filing_date=filing_date,
+                form_type=fact_data.get('form', ''),
+                accession=fact_data.get('accn', ''),
+                data_quality=data_quality,
+                is_audited=fiscal_period == 'FY',  # Annual reports are typically audited
+                is_restated=False,  # Would need additional logic to detect
+                is_estimated=False,  # Would need additional logic to detect
+                confidence_score=0.9 if data_quality == DataQuality.HIGH else 0.7,
+                semantic_tags=semantic_tags,
+                business_context=business_context,
+                statement_type=statement_type,
+                # Add structural metadata
+                depth=structural_info.get('depth'),
+                parent_concept=structural_info.get('parent'),
+                section=structural_info.get('section'),
+                is_abstract=structural_info.get('is_abstract', False),
+                is_total=structural_info.get('is_total', False),
+                presentation_order=structural_info.get('avg_depth')
+            )
+
+
+
+    @staticmethod
+    def _parse_date(date_str: Optional[str]) -> Optional[date]:
+        """Parse date string to date object"""
+        if not date_str:
+            return None
+
+        try:
+            # Try common date formats
+            for fmt in ['%Y-%m-%d', '%Y%m%d', '%m/%d/%Y']:
+                try:
+                    return datetime.strptime(date_str, fmt).date()
+                except ValueError:
+                    continue
+
+            # If all formats fail, try to parse as ISO format
+            return datetime.fromisoformat(date_str).date()
+
+        except Exception:
+            return None
+
+    @staticmethod
+    def _parse_fiscal_year(fy_value: Any) -> int:
+        """Parse fiscal year value"""
+        if not fy_value:
+            return 0
+
+        try:
+            return int(fy_value)
+        except (ValueError, TypeError):
+            return 0
+
+    @classmethod
+    def _determine_statement_type(cls, concept: str) -> Optional[str]:
+        """
+        Determine which financial statement a concept belongs to.
+
+        First checks static mappings, then falls back to learned mappings
+        with confidence threshold.
+        """
+        # Remove namespace if present
+        if ':' in concept:
+            concept = concept.split(':')[-1]
+
+        # Check static mappings first (highest confidence)
+        if concept in cls.STATEMENT_MAPPING:
+            return cls.STATEMENT_MAPPING[concept]
+
+        # Check learned mappings
+        try:
+            learned_mappings = load_learned_mappings()
+            if concept in learned_mappings:
+                mapping = learned_mappings[concept]
+                # Only use high-confidence learned mappings
+                if mapping.get('confidence', 0) >= 0.5:  # 50% threshold
+                    return mapping['statement_type']
+        except Exception as e:
+            log.debug("Error loading learned mappings: %s", e)
+
+        return None
+
+    @classmethod
+    def _get_semantic_tags(cls, concept: str) -> List[str]:
+        """Get semantic tags for a concept"""
+        # Remove namespace if present
+        if ':' in concept:
+            concept = concept.split(':')[-1]
+
+        return cls.SEMANTIC_TAGS.get(concept, [])
+
+    @classmethod
+    def _get_structural_info(cls, concept: str) -> Dict[str, Any]:
+        """
+        Get structural metadata for a concept from learned mappings.
+
+        Returns dict with depth, parent, section, is_abstract, is_total
+        """
+        # Remove namespace if present
+        if ':' in concept:
+            concept = concept.split(':')[-1]
+
+        try:
+            learned_mappings = load_learned_mappings()
+            if concept in learned_mappings:
+                mapping = learned_mappings[concept]
+                return {
+                    'depth': int(mapping.get('avg_depth', 0)) if mapping.get('avg_depth') else None,
+                    'parent': mapping.get('parent'),
+                    'section': mapping.get('section'),
+                    'is_abstract': mapping.get('is_abstract', False),
+                    'is_total': mapping.get('is_total', False)
+                }
+        except Exception as e:
+            log.debug("Error getting structural info: %s", e)
+
+        return {}
+
+    @staticmethod
+    def _assess_data_quality(fact_data: Dict[str, Any], fiscal_period: str) -> DataQuality:
+        """Assess the quality of a fact"""
+        # Annual data is typically higher quality
+        if fiscal_period == 'FY':
+            return DataQuality.HIGH
+
+        # Quarterly data
+        if fiscal_period in ['Q1', 'Q2', 'Q3', 'Q4']:
+            return DataQuality.HIGH
+
+        # Other data
+        return DataQuality.MEDIUM
+
+    @staticmethod
+    def _generate_business_context(label: str, description: str, unit: str) -> str:
+        """Generate business context for a fact"""
+        # Handle null/None values
+        if not label:
+            label = ""
+        if not description:
+            description = ""
+
+        # Return description if it's longer and more informative than label
+        if description and len(description) > len(label):
+            return description
+
+        # Generate context based on label and unit
+        if label and 'Revenue' in label:
+            return "Total revenue generated from operations"
+        elif label and 'Income' in label:
+            return "Net earnings after all expenses and taxes"
+        elif label and 'Assets' in label:
+            return "Total resources owned by the company"
+
+        # Return label if available, otherwise empty string
+        return label if label else ""
+
+    @staticmethod
+    def _clean_unit(unit: str) -> str:
+        """Clean and standardize unit representation"""
+        if not unit:
+            return ""
+
+        unit_mapping = {
+            'USD': 'USD',
+            'usd': 'USD',
+            'pure': 'number',
+            'shares': 'shares',
+            'USD/shares': 'USD per share'
+        }
+
+        return unit_mapping.get(unit, unit)
+
+    @staticmethod
+    def _determine_scale(unit: str) -> Optional[int]:
+        """Determine scale factor from unit"""
+        # SEC data is typically already scaled
+        # This would need more sophisticated logic based on the actual data
+        return None
--- a/venv/lib/python3.10/site-packages/edgar/entity/query.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/query.py
--- a/venv/lib/python3.10/site-packages/edgar/entity/search.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/search.py
@@ -0,0 +1,116 @@
+"""
+Search functionality for SEC entities.
+This module provides functions and classes for searching for SEC entities.
+"""
+from functools import lru_cache
+from typing import Any, Dict, List
+
+import pandas as pd
+from rich import box
+from rich.table import Column, Table
+
+from edgar.entity import Company
+from edgar.entity.tickers import get_company_tickers
+from edgar.richtools import repr_rich
+from edgar.search.datasearch import FastSearch, company_ticker_preprocess, company_ticker_score
+
+__all__ = [
+    'find_company',
+    'CompanySearchResults',
+    'CompanySearchIndex'
+]
+
+
+class CompanySearchResults:
+    """
+    Results from a company search.
+    """
+    def __init__(self, query: str,
+                 search_results: List[Dict[str, Any]]):
+        self.query: str = query
+        self.results: pd.DataFrame = pd.DataFrame(search_results, columns=['cik', 'ticker', 'company', 'score'])
+
+    @property
+    def tickers(self):
+        return self.results.ticker.tolist()
+
+    @property
+    def ciks(self):
+        return self.results.cik.tolist()
+
+    @property
+    def empty(self):
+        return self.results.empty
+
+    def __len__(self):
+        return len(self.results)
+
+    def __getitem__(self, item):
+        if 0 <= item < len(self):
+            row = self.results.iloc[item]
+            cik: int = int(row.cik)
+            return Company(cik)
+
+    def __rich__(self):
+        table = Table(Column(""),
+                      Column("Ticker", justify="left"),
+                      Column("Name", justify="left"),
+                      Column("Score", justify="left"),
+                      title=f"Search results for '{self.query}'",
+                      box=box.SIMPLE)
+        for index, row in enumerate(self.results.itertuples()):
+            table.add_row(str(index), row.ticker.rjust(6), row.company, f"{int(row.score)}%")
+        return table
+
+    def __repr__(self):
+        return repr_rich(self.__rich__())
+
+
+class CompanySearchIndex(FastSearch):
+    """
+    Search index for companies.
+    """
+    def __init__(self):
+        data = get_company_tickers(as_dataframe=False)
+        super().__init__(data, ['company', 'ticker'],
+                         preprocess_func=company_ticker_preprocess,
+                         score_func=company_ticker_score)
+
+    def search(self, query: str, top_n: int = 10, threshold: float = 60) -> CompanySearchResults:
+        results = super().search(query, top_n, threshold)
+        return CompanySearchResults(query=query, search_results=results)
+
+    def __len__(self):
+        return len(self.data)
+
+    def __hash__(self):
+        # Combine column names and last 10 values in the 'company' column to create a hash
+        column_names = tuple(self.data[0].keys())
+        last_10_companies = tuple(entry['company'] for entry in self.data[-10:])
+        return hash((column_names, last_10_companies))
+
+    def __eq__(self, other):
+        if not isinstance(other, CompanySearchIndex):
+            return False
+        return (self.data[-10:], tuple(self.data[0].keys())) == (other.data[-10:], tuple(other.data[0].keys()))
+
+
+@lru_cache(maxsize=1)
+def _get_company_search_index():
+    """Get the company search index."""
+    return CompanySearchIndex()
+
+
+@lru_cache(maxsize=16)
+def find_company(company: str, top_n: int = 10):
+    """
+    Find a company by name.
+
+    Args:
+        company: The company name or ticker to search for
+        top_n: The maximum number of results to return
+
+    Returns:
+        CompanySearchResults: The search results
+    """
+    return _get_company_search_index().search(company, top_n=top_n)
--- a/venv/lib/python3.10/site-packages/edgar/entity/statement.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/statement.py
@@ -0,0 +1,495 @@
+"""
+Financial Statement wrapper classes with rich display and concept-aware formatting.
+
+This module provides Statement classes that wrap pandas DataFrames with:
+- Intelligent formatting based on financial concept types
+- Rich display for professional presentation  
+- Access to underlying data for calculations
+- LLM-ready context generation
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+import pandas as pd
+from rich.box import SIMPLE, SIMPLE_HEAVY
+from rich.console import Group
+from rich.padding import Padding
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+from .terminal_styles import get_current_scheme
+
+
+@dataclass
+class ConceptFormatting:
+    """Formatting rules for specific financial concepts"""
+    decimal_places: int = 2
+    show_currency: bool = True
+    scale_display: bool = True  # Show M, B suffixes
+    percentage: bool = False
+
+
+class FinancialStatement:
+    """
+    A wrapper around pandas DataFrame for financial statements with intelligent formatting.
+
+    This class provides:
+    - Concept-aware formatting (EPS to 2 decimals, revenue in millions, etc.)
+    - Rich display for professional presentation
+    - Access to underlying numeric data
+    - LLM context generation
+    """
+
+    # Formatting rules by concept pattern
+    CONCEPT_FORMATS = {
+        # Earnings per share - always show decimals
+        'earningspershare': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
+        'earnings per share': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
+        'eps': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
+
+        # Ratios and percentages
+        'ratio': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
+        'margin': ConceptFormatting(decimal_places=1, show_currency=False, scale_display=False, percentage=True),
+        'percent': ConceptFormatting(decimal_places=1, show_currency=False, scale_display=False, percentage=True),
+
+        # Per-share values
+        'per share': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
+        'pershare': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
+        'book value': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
+        'dividend': ConceptFormatting(decimal_places=2, show_currency=False, scale_display=False),
+
+        # Share counts - show full numbers with commas
+        'shares outstanding': ConceptFormatting(decimal_places=0, show_currency=False, scale_display=False),
+        'common stock': ConceptFormatting(decimal_places=0, show_currency=False, scale_display=False),
+        'weighted average': ConceptFormatting(decimal_places=0, show_currency=False, scale_display=False),
+
+        # Large financial amounts - show full numbers with commas
+        'revenue': ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False),
+        'income': ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False),
+        'assets': ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False),
+        'liabilities': ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False),
+    }
+
+    def __init__(self, 
+                 data: pd.DataFrame, 
+                 statement_type: str,
+                 entity_name: str = "",
+                 period_lengths: Optional[List[str]] = None,
+                 mixed_periods: bool = False):
+        """
+        Initialize financial statement.
+
+        Args:
+            data: DataFrame with financial data
+            statement_type: Type of statement (IncomeStatement, BalanceSheet, etc.)
+            entity_name: Company name
+            period_lengths: List of period lengths in the data
+            mixed_periods: Whether data contains mixed period lengths
+        """
+        self.data = data
+        self.statement_type = statement_type
+        self.entity_name = entity_name
+        self.period_lengths = period_lengths or []
+        self.mixed_periods = mixed_periods
+
+        # Store original numeric data
+        self._numeric_data = data.copy()
+
+    def get_concept_formatting(self, concept_label: str) -> ConceptFormatting:
+        """
+        Get formatting rules for a specific concept.
+
+        Args:
+            concept_label: Label of the financial concept
+
+        Returns:
+            ConceptFormatting rules for this concept
+        """
+        label_lower = concept_label.lower()
+
+        # Check for exact matches first
+        for pattern, formatting in self.CONCEPT_FORMATS.items():
+            if pattern in label_lower:
+                return formatting
+
+        # Default formatting for large amounts - show full numbers with commas
+        return ConceptFormatting(decimal_places=0, show_currency=True, scale_display=False)
+
+    def format_value(self, value: float, concept_label: str) -> str:
+        """
+        Format a single value based on its concept.
+
+        Args:
+            value: Numeric value to format
+            concept_label: Label of the financial concept
+
+        Returns:
+            Formatted string representation
+        """
+        if pd.isna(value):
+            return ''
+
+        formatting = self.get_concept_formatting(concept_label)
+
+        # Handle percentage formatting
+        if formatting.percentage:
+            return f"{value:.{formatting.decimal_places}f}%"
+
+        # Always use full number formatting with commas - no scaling to preserve precision
+        if formatting.show_currency:
+            return f"${value:,.{formatting.decimal_places}f}"
+        else:
+            return f"{value:,.{formatting.decimal_places}f}"
+
+    def _repr_html_(self) -> str:
+        """
+        Rich HTML representation for Jupyter notebooks.
+
+        Returns:
+            HTML string for rich display
+        """
+        # Create a formatted copy as string DataFrame
+        formatted_data = pd.DataFrame(index=self.data.index, columns=self.data.columns, dtype=str)
+
+        # Apply formatting to each cell
+        for index in self.data.index:
+            concept_label = str(index)
+            for column in self.data.columns:
+                value = self.data.loc[index, column]
+                if pd.notna(value) and isinstance(value, (int, float)):
+                    formatted_data.loc[index, column] = self.format_value(value, concept_label)
+                else:
+                    formatted_data.loc[index, column] = str(value) if pd.notna(value) else ''
+
+        # Create HTML with styling
+        html = f"""
+        <div style="font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;">
+            <h3 style="color: #2c3e50; margin-bottom: 10px;">
+                {self.entity_name} - {self.statement_type.replace('Statement', ' Statement')}
+            </h3>
+        """
+
+        # Add period warning if mixed
+        if self.mixed_periods:
+            html += """
+            <div style="background-color: #fff3cd; border: 1px solid #ffeaa7; 
+                       padding: 8px; margin-bottom: 10px; border-radius: 4px;">
+                <strong>⚠️ Mixed Period Lengths:</strong> This statement contains periods of different lengths 
+                ({periods}). Consider filtering to comparable periods for accurate analysis.
+            </div>
+            """.format(periods=', '.join(self.period_lengths))
+
+        # Add the formatted table
+        html += formatted_data.to_html(classes='financial-statement', 
+                                     table_id='fs-table',
+                                     escape=False)
+
+        # Add CSS styling
+        html += """
+        <style>
+        .financial-statement {
+            border-collapse: collapse;
+            width: 100%;
+            font-size: 12px;
+            margin-top: 10px;
+        }
+        .financial-statement th {
+            background-color: #34495e;
+            color: white;
+            padding: 8px;
+            text-align: right;
+            font-weight: bold;
+        }
+        .financial-statement td {
+            padding: 6px 8px;
+            text-align: right;
+            border-bottom: 1px solid #ecf0f1;
+        }
+        .financial-statement tr:hover {
+            background-color: #f8f9fa;
+        }
+        .financial-statement tr:nth-child(even) {
+            background-color: #fdfdfd;
+        }
+        .financial-statement td:first-child {
+            text-align: left;
+            font-weight: 500;
+        }
+        </style>
+        </div>
+        """
+
+        return html
+
+    def __str__(self) -> str:
+        """
+        String representation for console display.
+
+        Returns:
+            Formatted string representation
+        """
+        # Create formatted version as string DataFrame
+        formatted_data = pd.DataFrame(index=self.data.index, columns=self.data.columns, dtype=str)
+
+        # Apply formatting to each cell
+        for index in self.data.index:
+            concept_label = str(index)
+            for column in self.data.columns:
+                value = self.data.loc[index, column]
+                if pd.notna(value) and isinstance(value, (int, float)):
+                    formatted_data.loc[index, column] = self.format_value(value, concept_label)
+                else:
+                    formatted_data.loc[index, column] = str(value) if pd.notna(value) else ''
+
+        header = f"\n{self.entity_name} - {self.statement_type.replace('Statement', ' Statement')}\n"
+        header += "=" * len(header.strip()) + "\n"
+
+        if self.mixed_periods:
+            header += f"⚠️  Mixed period lengths: {', '.join(self.period_lengths)}\n\n"
+
+        return header + str(formatted_data)
+
+    def __rich__(self):
+        """Creates a rich representation for professional financial statement display."""
+
+
+        colors = get_current_scheme()
+
+        if self.data.empty:
+            return Panel(
+                Text("No data available", style=colors["empty_value"]),
+                title=f"📊 {self.statement_type.replace('Statement', ' Statement')}",
+                border_style=colors["panel_border"]
+            )
+
+        # Statement type icon mapping
+        icon_map = {
+            'IncomeStatement': '💰',
+            'BalanceSheet': '⚖️',
+            'CashFlow': '💵',
+            'Statement': '📊'
+        }
+        icon = icon_map.get(self.statement_type, '📊')
+
+        # Title with company name and statement type
+        if self.entity_name:
+            title = Text.assemble(
+                icon + " ",
+                (self.entity_name, colors["company_name"]),
+                " ",
+                (self.statement_type.replace('Statement', ' Statement'), colors["statement_type"])
+            )
+        else:
+            title = Text.assemble(
+                icon + " ",
+                (self.statement_type.replace('Statement', ' Statement'), colors["statement_type"])
+            )
+
+        # Create the main financial statement table
+        statement_table = Table(box=SIMPLE, show_header=True, padding=(0, 1))
+        statement_table.add_column("Line Item", style=colors["total_item"], no_wrap=True, max_width=30)
+
+        # Add period columns (limit to reasonable number for display)
+        periods = list(self.data.columns)
+        display_periods = periods[:6]  # Show max 6 periods for readability
+        has_more_periods = len(periods) > 6
+
+        for period in display_periods:
+            statement_table.add_column(str(period), justify="right", max_width=15)
+
+        # Add rows with formatted values
+        for index in self.data.index:
+            concept_label = str(index)
+            # Truncate long concept names
+            display_label = concept_label[:28] + "..." if len(concept_label) > 30 else concept_label
+
+            row_values = [display_label]
+            for period in display_periods:
+                value = self.data.loc[index, period]
+                if pd.notna(value) and isinstance(value, (int, float)):
+                    formatted_value = self.format_value(value, concept_label)
+                    row_values.append(formatted_value)
+                else:
+                    row_values.append("-" if pd.isna(value) else str(value)[:12])
+
+            statement_table.add_row(*row_values)
+
+        # Create summary info panel
+        info_table = Table(box=SIMPLE_HEAVY, show_header=False, padding=(0, 1))
+        info_table.add_column("Metric", style=colors["low_confidence_item"])
+        info_table.add_column("Value", style=colors["total_item"])
+
+        info_table.add_row("Line Items", f"{len(self.data.index):,}")
+        info_table.add_row("Periods", f"{len(self.data.columns):,}")
+        if self.period_lengths:
+            info_table.add_row("Period Types", ", ".join(set(self.period_lengths)))
+
+        info_panel = Panel(
+            info_table,
+            title="📋 Statement Info",
+            border_style="bright_black"
+        )
+
+        # Create period warning if needed
+        warning_panel = None
+        if self.mixed_periods:
+            warning_text = Text.assemble(
+                "⚠️  Mixed period lengths detected: ",
+                (", ".join(self.period_lengths), "yellow"),
+                "\nConsider filtering to comparable periods for accurate analysis."
+            )
+            warning_panel = Panel(
+                warning_text,
+                title="🚨 Period Warning",
+                border_style=colors.get("warning", "yellow")
+            )
+
+        # Subtitle with additional info
+        subtitle_parts = [f"{len(self.data.index):,} line items"]
+        if has_more_periods:
+            subtitle_parts.append(f"showing first {len(display_periods)} of {len(periods)} periods")
+        subtitle = " • ".join(subtitle_parts)
+
+        # Main statement panel
+        statement_panel = Panel(
+            statement_table,
+            title="📊 Financial Data",
+            subtitle=subtitle,
+            border_style="bright_black"
+        )
+
+        # Combine all panels
+        content_renderables = [
+            Padding("", (1, 0, 0, 0)),
+            info_panel
+        ]
+
+        if warning_panel:
+            content_renderables.append(warning_panel)
+
+        content_renderables.append(statement_panel)
+
+        content = Group(*content_renderables)
+
+        return Panel(
+            content,
+            title=title,
+            border_style=colors["panel_border"]
+        )
+
+    def __repr__(self):
+        """String representation using rich formatting."""
+        from edgar.richtools import repr_rich
+        return repr_rich(self.__rich__())
+
+    def to_numeric(self) -> pd.DataFrame:
+        """
+        Get the underlying numeric DataFrame for calculations.
+
+        Returns:
+            DataFrame with original numeric values
+        """
+        return self._numeric_data.copy()
+
+    def to_llm_context(self) -> Dict[str, Any]:
+        """
+        Generate LLM-friendly context from the statement.
+
+        Returns:
+            Dictionary with structured financial data for LLM consumption
+        """
+        context = {
+            "entity_name": self.entity_name,
+            "statement_type": self.statement_type,
+            "period_lengths": self.period_lengths,
+            "mixed_periods": self.mixed_periods,
+            "periods": list(self.data.columns),
+            "line_items": {}
+        }
+
+        # Convert each line item to LLM-friendly format
+        for index in self.data.index:
+            concept_label = str(index)
+            line_item = {
+                "label": concept_label,
+                "values": {},
+                "formatting": self.get_concept_formatting(concept_label).__dict__
+            }
+
+            for column in self.data.columns:
+                value = self.data.loc[index, column]
+                if pd.notna(value):
+                    line_item["values"][str(column)] = {
+                        "raw_value": float(value),
+                        "formatted_value": self.format_value(value, concept_label)
+                    }
+
+            context["line_items"][concept_label] = line_item
+
+        return context
+
+    def get_concept(self, concept_name: str) -> Optional[pd.Series]:
+        """
+        Get data for a specific concept across all periods.
+
+        Args:
+            concept_name: Name of the concept to retrieve
+
+        Returns:
+            Series with values across periods, or None if not found
+        """
+        # Try exact match first
+        if concept_name in self.data.index:
+            return self.data.loc[concept_name]
+
+        # Try case-insensitive partial match
+        concept_lower = concept_name.lower()
+        for index in self.data.index:
+            if concept_lower in str(index).lower():
+                return self.data.loc[index]
+
+        return None
+
+    def calculate_growth(self, concept_name: str, periods: int = 2) -> Optional[pd.Series]:
+        """
+        Calculate period-over-period growth for a concept.
+
+        Args:
+            concept_name: Name of the concept
+            periods: Number of periods to calculate growth over
+
+        Returns:
+            Series with growth rates, or None if concept not found
+        """
+        concept_data = self.get_concept(concept_name)
+        if concept_data is None:
+            return None
+
+        # Calculate percentage change
+        return concept_data.pct_change(periods=periods) * 100
+
+    @property
+    def shape(self) -> tuple:
+        """Get the shape of the underlying data."""
+        return self.data.shape
+
+    @property
+    def columns(self) -> pd.Index:
+        """Get the columns of the underlying data."""
+        return self.data.columns
+
+    @property
+    def index(self) -> pd.Index:
+        """Get the index of the underlying data."""
+        return self.data.index
+
+    @property 
+    def empty(self) -> bool:
+        """Check if the underlying DataFrame is empty."""
+        return self.data.empty
+
+    def __len__(self) -> int:
+        """Get the length of the underlying DataFrame."""
+        return len(self.data)
--- a/venv/lib/python3.10/site-packages/edgar/entity/statement_builder.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/statement_builder.py
@@ -0,0 +1,731 @@
+"""
+Statement Builder for reconstructing financial statements using canonical structures.
+
+This module provides intelligent statement reconstruction using learned canonical
+structures and virtual presentation trees.
+"""
+
+import logging
+from collections import defaultdict
+from dataclasses import dataclass, field
+from datetime import date
+from typing import Any, Dict, List, Optional, Set
+
+from rich import box
+from rich.columns import Columns
+from rich.console import Group
+from rich.padding import Padding
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+
+from edgar.entity.mappings_loader import load_canonical_structures, load_virtual_trees
+from edgar.entity.models import FinancialFact
+from edgar.richtools import repr_rich
+
+log = logging.getLogger(__name__)
+
+
+@dataclass
+class StatementItem:
+    """A single item in a reconstructed financial statement."""
+    concept: str
+    label: str
+    value: Optional[float]
+    depth: int
+    parent_concept: Optional[str]
+    children: List['StatementItem'] = field(default_factory=list)
+
+    # Metadata
+    is_abstract: bool = False
+    is_total: bool = False
+    section: Optional[str] = None
+    confidence: float = 1.0
+    source: str = 'fact'  # 'fact', 'calculated', 'canonical', 'placeholder'
+
+    # Original fact if available
+    fact: Optional[FinancialFact] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary representation."""
+        return {
+            'concept': self.concept,
+            'label': self.label,
+            'value': self.value,
+            'depth': self.depth,
+            'is_abstract': self.is_abstract,
+            'is_total': self.is_total,
+            'section': self.section,
+            'confidence': self.confidence,
+            'source': self.source,
+            'children': [child.to_dict() for child in self.children]
+        }
+
+    def get_display_value(self) -> str:
+        """Get formatted value for display."""
+        if self.value is not None:
+            if abs(self.value) >= 1_000_000_000:
+                return f"${self.value/1_000_000_000:.1f}B"
+            elif abs(self.value) >= 1_000_000:
+                return f"${self.value/1_000_000:.1f}M"
+            elif abs(self.value) >= 1_000:
+                return f"${self.value/1_000:.0f}K"
+            else:
+                return f"${self.value:.0f}"
+        elif self.is_abstract:
+            return ""
+        elif self.source == 'placeholder':
+            return "[Missing]"
+        else:
+            return "-"
+
+    def __rich__(self):
+        """Create a rich representation of the statement item."""
+        from rich.tree import Tree
+
+        # Create the node label
+        if self.is_abstract:
+            label = Text(self.label, style="bold cyan")
+        elif self.is_total:
+            label = Text(self.label, style="bold yellow")
+        else:
+            style = "dim" if self.confidence < 0.8 else ""
+            confidence_marker = " ◦" if self.confidence < 0.8 else ""
+            label = Text(f"{self.label}{confidence_marker}", style=style)
+
+        # Add value if present
+        value_str = self.get_display_value()
+        if value_str and value_str != "-":
+            # Color code values
+            if value_str.startswith("$") and self.value and isinstance(self.value, (int, float)):
+                value_style = "red" if self.value < 0 else "green"
+            else:
+                value_style = ""
+
+            label_with_value = Text.assemble(
+                label,
+                " ",
+                (value_str, value_style)
+            )
+        else:
+            label_with_value = label
+
+        # Create tree with this item as root
+        tree = Tree(label_with_value)
+
+        # Add children
+        for child in self.children:
+            tree.add(child.__rich__())
+
+        return tree
+
+    def __repr__(self) -> str:
+        """String representation using rich formatting."""
+        return repr_rich(self.__rich__())
+
+
+@dataclass
+class StructuredStatement:
+    """A complete structured financial statement."""
+    statement_type: str
+    fiscal_year: Optional[int]
+    fiscal_period: Optional[str]
+    period_end: Optional[date]
+
+    items: List[StatementItem]
+
+    # Metadata
+    company_name: Optional[str] = None
+    cik: Optional[str] = None
+    canonical_coverage: float = 0.0
+    facts_used: int = 0
+    facts_total: int = 0
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary representation."""
+        return {
+            'statement_type': self.statement_type,
+            'fiscal_year': self.fiscal_year,
+            'fiscal_period': self.fiscal_period,
+            'period_end': self.period_end.isoformat() if self.period_end else None,
+            'company_name': self.company_name,
+            'cik': self.cik,
+            'canonical_coverage': self.canonical_coverage,
+            'facts_used': self.facts_used,
+            'facts_total': self.facts_total,
+            'items': [item.to_dict() for item in self.items]
+        }
+
+    def get_hierarchical_display(self, max_depth: int = 3) -> str:
+        """Get hierarchical text representation."""
+        lines = []
+
+        def add_item(item: StatementItem, indent: int = 0):
+            if indent > max_depth:
+                return
+
+            indent_str = "  " * indent
+            value_str = item.get_display_value()
+
+            if item.is_abstract:
+                lines.append(f"{indent_str}{item.label}")
+            elif item.is_total:
+                lines.append(f"{indent_str}{item.label:<40} {value_str:>15}")
+                lines.append(f"{indent_str}{'-' * 55}")
+            else:
+                confidence_marker = "" if item.confidence > 0.8 else " *"
+                lines.append(f"{indent_str}{item.label:<40} {value_str:>15}{confidence_marker}")
+
+            for child in item.children:
+                add_item(child, indent + 1)
+
+        for item in self.items:
+            add_item(item)
+
+        return "\n".join(lines)
+
+    def __rich__(self):
+        """Create a rich representation of the structured statement."""
+        # Statement type mapping for better display
+        statement_names = {
+            'IncomeStatement': 'Income Statement',
+            'BalanceSheet': 'Balance Sheet', 
+            'CashFlow': 'Cash Flow Statement',
+            'StatementsOfComprehensiveIncome': 'Comprehensive Income',
+            'StatementsOfShareholdersEquity': 'Shareholders Equity'
+        }
+
+        # Title with company name and period
+        title_parts = []
+        if self.company_name:
+            title_parts.append((self.company_name, "bold green"))
+        else:
+            title_parts.append(("Financial Statement", "bold"))
+
+        title = Text.assemble(*title_parts)
+
+        # Subtitle with statement type and period
+        statement_display = statement_names.get(self.statement_type, self.statement_type)
+        if self.fiscal_period and self.fiscal_year:
+            subtitle = f"{statement_display} • {self.fiscal_period} {self.fiscal_year}"
+        elif self.period_end:
+            subtitle = f"{statement_display} • As of {self.period_end}"
+        else:
+            subtitle = statement_display
+
+        # Main statement table
+        stmt_table = Table(
+            box=box.SIMPLE,
+            show_header=False,
+            padding=(0, 1),
+            expand=True
+        )
+        stmt_table.add_column("Item", style="", ratio=3)
+        stmt_table.add_column("Value", justify="right", style="bold", ratio=1)
+
+        def add_item_to_table(item: StatementItem, depth: int = 0):
+            """Add an item to the table with proper indentation."""
+            indent = "  " * depth
+
+            if item.is_abstract:
+                # Abstract items are headers
+                stmt_table.add_row(
+                    Text(f"{indent}{item.label}", style="bold cyan"),
+                    ""
+                )
+            elif item.is_total:
+                # Total items with underline
+                value_text = Text(item.get_display_value(), style="bold yellow")
+                stmt_table.add_row(
+                    Text(f"{indent}{item.label}", style="bold"),
+                    value_text
+                )
+                # Add a separator line after totals
+                if depth == 0:
+                    stmt_table.add_row("", "")
+                    stmt_table.add_row(
+                        Text("─" * 40, style="dim"),
+                        Text("─" * 15, style="dim")
+                    )
+            else:
+                # Regular items
+                style = "dim" if item.confidence < 0.8 else ""
+                confidence_marker = " ◦" if item.confidence < 0.8 else ""
+                label_text = f"{indent}{item.label}{confidence_marker}"
+
+                # Color code positive/negative values
+                value_str = item.get_display_value()
+                if value_str and value_str.startswith("$"):
+                    try:
+                        # Extract numeric value for coloring
+                        if item.value and isinstance(item.value, (int, float)):
+                            if item.value < 0:
+                                value_style = "red"
+                            else:
+                                value_style = "green"
+                        else:
+                            value_style = ""
+                    except:
+                        value_style = ""
+                else:
+                    value_style = ""
+
+                stmt_table.add_row(
+                    Text(label_text, style=style),
+                    Text(value_str, style=value_style) if value_str else ""
+                )
+
+            # Add children recursively
+            for child in item.children:
+                if depth < 3:  # Limit depth for display
+                    add_item_to_table(child, depth + 1)
+
+        # Add all items to the table
+        for item in self.items:
+            add_item_to_table(item)
+
+        # Metadata summary
+        metadata = Table(box=box.SIMPLE, show_header=False, padding=(0, 1))
+        metadata.add_column("Metric", style="dim")
+        metadata.add_column("Value", style="bold")
+
+        metadata.add_row("Facts Used", f"{self.facts_used:,}")
+        if self.facts_total > 0:
+            metadata.add_row("Total Facts", f"{self.facts_total:,}")
+
+        if self.canonical_coverage > 0:
+            coverage_pct = self.canonical_coverage * 100
+            coverage_style = "green" if coverage_pct >= 50 else "yellow" if coverage_pct >= 25 else "red"
+            metadata.add_row(
+                "Canonical Coverage",
+                Text(f"{coverage_pct:.1f}%", style=coverage_style)
+            )
+
+        if self.cik:
+            metadata.add_row("CIK", self.cik)
+
+        # Data quality indicators
+        quality_notes = []
+
+        # Count items by confidence
+        low_confidence_count = sum(
+            1 for item in self._flatten_items()
+            if not item.is_abstract and item.confidence < 0.8
+        )
+
+        if low_confidence_count > 0:
+            quality_notes.append(
+                Text(f"◦ {low_confidence_count} items with lower confidence", style="dim yellow")
+            )
+
+        # Count calculated vs actual values
+        calculated_count = sum(
+            1 for item in self._flatten_items()
+            if item.source == 'calculated'
+        )
+
+        if calculated_count > 0:
+            quality_notes.append(
+                Text(f"◦ {calculated_count} calculated values", style="dim cyan")
+            )
+
+        # Combine metadata and quality notes
+        metadata_panel = Panel(
+            metadata,
+            title="📊 Statement Metadata",
+            border_style="bright_black"
+        )
+
+        # Create the main content group
+        content_parts = [
+            Padding("", (1, 0, 0, 0)),
+            stmt_table
+        ]
+
+        # Add metadata in a column layout
+        if self.facts_used > 0:
+            bottom_content = [metadata_panel]
+
+            if quality_notes:
+                quality_panel = Panel(
+                    Group(*quality_notes),
+                    title="📝 Data Quality Notes",
+                    border_style="bright_black"
+                )
+                bottom_content.append(quality_panel)
+
+            content_parts.append(Padding("", (1, 0)))
+            content_parts.append(Columns(bottom_content, equal=True, expand=True))
+
+        content = Group(*content_parts)
+
+        # Create the main panel
+        return Panel(
+            content,
+            title=title,
+            subtitle=subtitle,
+            border_style="blue",
+            expand=True
+        )
+
+    def _flatten_items(self) -> List[StatementItem]:
+        """Flatten the hierarchical items into a flat list."""
+        flat_items = []
+
+        def flatten(item: StatementItem):
+            flat_items.append(item)
+            for child in item.children:
+                flatten(child)
+
+        for item in self.items:
+            flatten(item)
+
+        return flat_items
+
+    def __repr__(self) -> str:
+        """String representation using rich formatting."""
+        return repr_rich(self.__rich__())
+
+
+class StatementBuilder:
+    """
+    Builds structured financial statements using canonical templates.
+
+    This class reconstructs complete financial statements by combining
+    actual facts with canonical structures, filling in missing concepts
+    and maintaining proper hierarchy.
+    """
+
+    def __init__(self, cik: Optional[str] = None):
+        """
+        Initialize the statement builder.
+
+        Args:
+            cik: Company CIK for context
+        """
+        self.cik = cik
+        self.canonical_structures = load_canonical_structures()
+        self.virtual_trees = load_virtual_trees()
+
+    def build_statement(self, 
+                       facts: List[FinancialFact],
+                       statement_type: str,
+                       fiscal_year: Optional[int] = None,
+                       fiscal_period: Optional[str] = None,
+                       use_canonical: bool = True,
+                       include_missing: bool = False) -> StructuredStatement:
+        """
+        Build a structured financial statement from facts.
+
+        Args:
+            facts: List of financial facts
+            statement_type: Type of statement (BalanceSheet, IncomeStatement, etc.)
+            fiscal_year: Fiscal year to filter for
+            fiscal_period: Fiscal period (FY, Q1, Q2, Q3, Q4)
+            use_canonical: Whether to use canonical structure for organization
+            include_missing: Whether to include placeholder for missing concepts
+
+        Returns:
+            StructuredStatement with hierarchical organization
+        """
+        # Filter facts for this statement and period
+        filtered_facts = self._filter_facts(facts, statement_type, fiscal_year, fiscal_period)
+
+        # Create fact lookup
+        fact_map = self._create_fact_map(filtered_facts)
+
+        # Get period end date
+        period_end = self._get_period_end(filtered_facts)
+
+        if use_canonical and statement_type in self.virtual_trees:
+            # Build using canonical structure
+            items = self._build_with_canonical(
+                fact_map, 
+                self.virtual_trees[statement_type],
+                include_missing
+            )
+
+            # Add unmatched facts
+            unmatched = self._find_unmatched_facts(fact_map, self.virtual_trees[statement_type])
+            items.extend(self._create_items_from_facts(unmatched))
+        else:
+            # Build from facts only
+            items = self._build_from_facts(fact_map)
+
+        # Calculate metadata
+        facts_used = len(fact_map)
+        canonical_coverage = self._calculate_coverage(fact_map, statement_type) if use_canonical else 0.0
+
+        return StructuredStatement(
+            statement_type=statement_type,
+            fiscal_year=fiscal_year,
+            fiscal_period=fiscal_period,
+            period_end=period_end,
+            items=items,
+            cik=self.cik,
+            canonical_coverage=canonical_coverage,
+            facts_used=facts_used,
+            facts_total=len(facts)
+        )
+
+    def _filter_facts(self, facts: List[FinancialFact], 
+                     statement_type: str,
+                     fiscal_year: Optional[int],
+                     fiscal_period: Optional[str]) -> List[FinancialFact]:
+        """Filter facts for the requested statement and period."""
+        filtered = []
+
+        for fact in facts:
+            # Check statement type
+            if fact.statement_type != statement_type:
+                continue
+
+            # Check fiscal year
+            if fiscal_year and fact.fiscal_year != fiscal_year:
+                continue
+
+            # Check fiscal period
+            if fiscal_period and fact.fiscal_period != fiscal_period:
+                continue
+
+            filtered.append(fact)
+
+        return filtered
+
+    def _create_fact_map(self, facts: List[FinancialFact]) -> Dict[str, FinancialFact]:
+        """Create a map of concept to fact."""
+        fact_map = {}
+
+        for fact in facts:
+            # Extract clean concept name
+            concept = fact.concept
+            if ':' in concept:
+                concept = concept.split(':', 1)[1]
+
+            # Use most recent fact for duplicates
+            if concept not in fact_map or fact.filing_date > fact_map[concept].filing_date:
+                fact_map[concept] = fact
+
+        return fact_map
+
+    def _get_period_end(self, facts: List[FinancialFact]) -> Optional[date]:
+        """Get the period end date from facts."""
+        for fact in facts:
+            if fact.period_end:
+                return fact.period_end
+        return None
+
+    def _build_with_canonical(self, fact_map: Dict[str, FinancialFact],
+                             virtual_tree: Dict[str, Any],
+                             include_missing: bool) -> List[StatementItem]:
+        """Build statement using canonical structure."""
+        items = []
+        processed = set()
+
+        # Process root nodes
+        for root_concept in virtual_tree.get('roots', []):
+            item = self._build_canonical_item(
+                root_concept, 
+                virtual_tree['nodes'],
+                fact_map,
+                processed,
+                include_missing,
+                depth=0
+            )
+            if item:
+                items.append(item)
+
+        return items
+
+    def _build_canonical_item(self, concept: str,
+                             nodes: Dict[str, Any],
+                             fact_map: Dict[str, FinancialFact],
+                             processed: Set[str],
+                             include_missing: bool,
+                             depth: int = 0,
+                             parent: Optional[str] = None) -> Optional[StatementItem]:
+        """Build a single canonical item with children."""
+        if concept in processed:
+            return None
+
+        processed.add(concept)
+
+        # Get node info
+        node = nodes.get(concept, {})
+
+        # Check if we have a fact for this concept
+        fact = fact_map.get(concept)
+
+        # Determine if we should include this item
+        if not fact and not include_missing and not node.get('is_abstract'):
+            # Skip missing concrete concepts unless required
+            if node.get('occurrence_rate', 0) < 0.8:  # Not a core concept
+                return None
+
+        # Create the item
+        item = StatementItem(
+            concept=concept,
+            label=fact.label if fact else node.get('label', concept),
+            value=fact.numeric_value if fact else None,
+            depth=depth,
+            parent_concept=parent,
+            is_abstract=node.get('is_abstract', False),
+            is_total=node.get('is_total', False),
+            section=node.get('section'),
+            confidence=node.get('occurrence_rate', 1.0) if not fact else 1.0,
+            source='fact' if fact else ('canonical' if not include_missing else 'placeholder'),
+            fact=fact
+        )
+
+        # Process children
+        for child_concept in node.get('children', []):
+            child_item = self._build_canonical_item(
+                child_concept,
+                nodes,
+                fact_map,
+                processed,
+                include_missing,
+                depth + 1,
+                concept
+            )
+            if child_item:
+                item.children.append(child_item)
+
+        # Try to calculate total if missing
+        if item.is_total and item.value is None and item.children:
+            calculated_value = self._calculate_total(item.children)
+            if calculated_value is not None:
+                item.value = calculated_value
+                item.source = 'calculated'
+
+        return item
+
+    def _calculate_total(self, children: List[StatementItem]) -> Optional[float]:
+        """Calculate total from children values."""
+        total = 0
+        has_values = False
+
+        for child in children:
+            if not child.is_abstract and child.value is not None:
+                total += child.value
+                has_values = True
+
+        return total if has_values else None
+
+    def _find_unmatched_facts(self, fact_map: Dict[str, FinancialFact],
+                             virtual_tree: Dict[str, Any]) -> Dict[str, FinancialFact]:
+        """Find facts that don't match canonical concepts."""
+        canonical_concepts = set(virtual_tree.get('nodes', {}).keys())
+        unmatched = {}
+
+        for concept, fact in fact_map.items():
+            if concept not in canonical_concepts:
+                unmatched[concept] = fact
+
+        return unmatched
+
+    def _create_items_from_facts(self, facts: Dict[str, FinancialFact]) -> List[StatementItem]:
+        """Create statement items from unmatched facts."""
+        items = []
+
+        for concept, fact in facts.items():
+            item = StatementItem(
+                concept=concept,
+                label=fact.label,
+                value=fact.numeric_value,
+                depth=1,  # Default depth
+                parent_concept=None,
+                is_abstract=fact.is_abstract,
+                is_total=fact.is_total,
+                section=fact.section,
+                confidence=0.7,  # Lower confidence for unmatched
+                source='fact',
+                fact=fact
+            )
+            items.append(item)
+
+        return items
+
+    def _build_from_facts(self, fact_map: Dict[str, FinancialFact]) -> List[StatementItem]:
+        """Build statement directly from facts without canonical structure."""
+        # Group facts by parent
+        hierarchy = defaultdict(list)
+        roots = []
+
+        for concept, fact in fact_map.items():
+            if fact.parent_concept:
+                hierarchy[fact.parent_concept].append(concept)
+            else:
+                roots.append(concept)
+
+        # Build items recursively
+        items = []
+        for root_concept in roots:
+            item = self._build_fact_item(root_concept, fact_map, hierarchy)
+            if item:
+                items.append(item)
+
+        # Add orphaned facts
+        for concept, fact in fact_map.items():
+            if concept not in roots and not fact.parent_concept:
+                item = StatementItem(
+                    concept=concept,
+                    label=fact.label,
+                    value=fact.numeric_value,
+                    depth=0,
+                    parent_concept=None,
+                    is_abstract=fact.is_abstract,
+                    is_total=fact.is_total,
+                    section=fact.section,
+                    confidence=1.0,
+                    source='fact',
+                    fact=fact
+                )
+                items.append(item)
+
+        return items
+
+    def _build_fact_item(self, concept: str, 
+                        fact_map: Dict[str, FinancialFact],
+                        hierarchy: Dict[str, List[str]],
+                        depth: int = 0) -> Optional[StatementItem]:
+        """Build item from fact with children."""
+        if concept not in fact_map:
+            return None
+
+        fact = fact_map[concept]
+
+        item = StatementItem(
+            concept=concept,
+            label=fact.label,
+            value=fact.numeric_value,
+            depth=depth,
+            parent_concept=fact.parent_concept,
+            is_abstract=fact.is_abstract,
+            is_total=fact.is_total,
+            section=fact.section,
+            confidence=1.0,
+            source='fact',
+            fact=fact
+        )
+
+        # Add children
+        for child_concept in hierarchy.get(concept, []):
+            child_item = self._build_fact_item(child_concept, fact_map, hierarchy, depth + 1)
+            if child_item:
+                item.children.append(child_item)
+
+        return item
+
+    def _calculate_coverage(self, fact_map: Dict[str, FinancialFact],
+                          statement_type: str) -> float:
+        """Calculate canonical coverage percentage."""
+        if statement_type not in self.virtual_trees:
+            return 0.0
+
+        canonical_concepts = set(self.virtual_trees[statement_type].get('nodes', {}).keys())
+        if not canonical_concepts:
+            return 0.0
+
+        matched = len(set(fact_map.keys()) & canonical_concepts)
+        return matched / len(canonical_concepts)
--- a/venv/lib/python3.10/site-packages/edgar/entity/submissions.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/submissions.py
@@ -0,0 +1,216 @@
+"""
+Functions for retrieving entity submission data from the SEC.
+"""
+import json
+from typing import Any, Dict, Optional
+
+import httpx
+
+from edgar.core import log
+from edgar.entity.data import parse_entity_submissions
+from edgar.httprequests import download_json
+from edgar.storage import get_edgar_data_directory, is_using_local_storage
+
+__all__ = [
+    'get_entity_submissions',
+    'download_entity_submissions_from_sec',
+    'load_company_submissions_from_local',
+    'create_entity_from_submissions_json',
+    'create_entity_from_file',
+    'create_company_from_file'
+]
+
+
+def load_company_submissions_from_local(cik: int) -> Optional[Dict[str, Any]]:
+    """
+    Load company submissions from local data.
+
+    If the cached file is corrupted or empty, it will be re-downloaded automatically.
+    """
+    submissions_dir = get_edgar_data_directory() / "submissions"
+    if not submissions_dir.exists():
+        return None
+    submissions_file = submissions_dir / f"CIK{cik:010}.json"
+
+    # If file doesn't exist, download it
+    if not submissions_file.exists():
+        submissions_json = download_entity_submissions_from_sec(cik)
+        if submissions_json:
+            with open(submissions_file, "w", encoding='utf-8') as f:
+                json.dump(submissions_json, f)
+        return submissions_json
+
+    # File exists, try to parse it
+    try:
+        return json.loads(submissions_file.read_text())
+    except (json.JSONDecodeError, UnicodeDecodeError) as e:
+        # File is corrupted, log warning and re-download
+        log.warning(f"Corrupted submissions cache file for CIK {cik}: {e}. Re-downloading...")
+        try:
+            submissions_json = download_entity_submissions_from_sec(cik)
+            if submissions_json:
+                # Write the fresh data to cache
+                with open(submissions_file, "w", encoding='utf-8') as f:
+                    json.dump(submissions_json, f)
+                return submissions_json
+            else:
+                # If download failed, remove the corrupted file
+                submissions_file.unlink(missing_ok=True)
+                return None
+        except Exception as download_error:
+            log.error(f"Failed to re-download submissions for CIK {cik}: {download_error}")
+            # Remove the corrupted file so it can be retried later
+            submissions_file.unlink(missing_ok=True)
+            return None
+
+
+def download_entity_submissions_from_sec(cik: int) -> Optional[Dict[str, Any]]:
+    """
+    Get the company filings for a given cik.
+
+    Note: This function no longer uses @lru_cache (removed in Issue #471 fix) to allow
+    HttpxThrottleCache to control freshness. The HTTP cache now has a 30-second TTL
+    for submissions, providing a balance between freshness and performance.
+
+    Args:
+        cik: The company CIK
+
+    Returns:
+        Optional[Dict[str, Any]]: The entity submissions JSON data, or None if not found
+    """
+    try:
+        submission_json = download_json(f"https://data.sec.gov/submissions/CIK{cik:010}.json")
+    except httpx.HTTPStatusError as e:
+        # Handle the case where the cik is invalid and not found on Edgar
+        if e.response.status_code == 404:
+            return None
+        else:
+            raise
+    return submission_json
+
+
+def get_entity_submissions(cik: int) -> Optional[Any]:
+    """
+    Get the entity data from the SEC submissions endpoint.
+
+    Note: This function no longer uses @lru_cache (removed in Issue #471 fix) to allow
+    HttpxThrottleCache to control freshness with a 30-second TTL.
+
+    Args:
+        cik: The company CIK
+
+    Returns:
+        Optional[EntityData]: The entity data, or None if not found
+    """
+    # Check the environment var EDGAR_USE_LOCAL_DATA
+    if is_using_local_storage():
+        submissions_json = load_company_submissions_from_local(cik)
+        if not submissions_json:
+            submissions_json = download_entity_submissions_from_sec(cik)
+    else:
+        submissions_json = download_entity_submissions_from_sec(cik)
+    if submissions_json:
+        return parse_entity_submissions(submissions_json)
+
+
+def create_entity_from_submissions_json(
+    submissions_json: Dict[str, Any],
+    entity_type: str = 'auto'
+) -> Any:
+    """
+    Create an Entity object from a submissions JSON dictionary.
+
+    This is particularly useful for testing, as it allows creating
+    Entity objects from local JSON files or mock data, without
+    making any API calls.
+
+    Args:
+        submissions_json: The submissions JSON dictionary (either from a file or API)
+        entity_type: The type of entity to create ('company', 'fund', or 'auto' to detect)
+
+    Returns:
+        An Entity, Company, or Fund object, depending on the entity_type parameter.
+        If entity_type is 'auto', it tries to detect the entity type from the data.
+    """
+    # Import locally to avoid circular imports
+    from edgar.entity.core import Company, Entity
+    from edgar.entity.data import parse_entity_submissions
+    from edgar.funds import FundCompany
+
+    # First, parse the submissions JSON to get the entity data
+    entity_data = parse_entity_submissions(submissions_json)
+
+    # Create the appropriate entity object based on the entity_type parameter
+    if entity_type == 'auto':
+        # Try to detect the entity type - if it has tickers or exchanges, it's likely a company
+        if entity_data.tickers or hasattr(entity_data, 'exchanges') and entity_data.exchanges:
+            entity_type = 'company'
+        # More detection logic could be added here
+        else:
+            # Default to generic entity if we can't detect the type
+            entity_type = 'entity'
+
+    # Create and return the appropriate entity type
+    if entity_type.lower() == 'company':
+        entity = Company(entity_data.cik)
+    elif entity_type.lower() == 'fund':
+        entity = FundCompany(entity_data.cik)
+    else:
+        entity = Entity(entity_data.cik)
+
+    # Set the data directly to avoid making API calls
+    entity._data = entity_data
+    entity._data._not_found = False
+
+    # Mark the entity as having already loaded all filings to prevent fetching more
+    entity._data._loaded_all_filings = True
+
+    return entity
+
+
+def create_entity_from_file(
+    file_path: str,
+    entity_type: str = 'auto'
+) -> Any:
+    """
+    Create an Entity object from a local submissions JSON file.
+
+    This is a convenience function that loads a JSON file and creates
+    an Entity object from it, without making any API calls.
+
+    Args:
+        file_path: Path to a submissions JSON file
+        entity_type: The type of entity to create ('company', 'fund', or 'auto' to detect)
+
+    Returns:
+        An Entity, Company, or Fund object, depending on the entity_type parameter.
+    """
+    import json
+    from pathlib import Path
+
+    # Load the JSON file
+    try:
+        with open(Path(file_path).expanduser(), 'r') as f:
+            submissions_json = json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError) as e:
+        log.error(f"Error loading submissions JSON file: {e}")
+        return None
+
+    # Create the entity from the loaded JSON
+    return create_entity_from_submissions_json(submissions_json, entity_type)
+
+
+def create_company_from_file(file_path: str) -> Any:
+    """
+    Create a Company object from a local submissions JSON file.
+
+    This is a convenience function specifically for creating companies,
+    which is the most common use case.
+
+    Args:
+        file_path: Path to a submissions JSON file
+
+    Returns:
+        A Company object
+    """
+    return create_entity_from_file(file_path, entity_type='company')
--- a/venv/lib/python3.10/site-packages/edgar/entity/terminal_styles.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/terminal_styles.py
@@ -0,0 +1,138 @@
+"""
+Terminal-friendly color schemes for financial statement display.
+Provides better contrast and readability in various terminal environments.
+"""
+
+from typing import Dict
+
+# Default scheme - the current implementation
+DEFAULT_SCHEME = {
+    "abstract_item": "bold cyan",
+    "total_item": "bold",
+    "regular_item": "",
+    "low_confidence_item": "dim",
+    "positive_value": "green",
+    "negative_value": "red",
+    "total_value_prefix": "bold yellow",
+    "separator": "dim",
+    "company_name": "bold green",
+    "statement_type": "bold blue",
+    "panel_border": "blue",
+    "empty_value": "dim",
+}
+
+# High contrast scheme - better for terminals with poor dim text support
+HIGH_CONTRAST_SCHEME = {
+    "abstract_item": "bold bright_cyan",
+    "total_item": "bold bright_white",
+    "regular_item": "white",
+    "low_confidence_item": "bright_black",  # Usually renders as gray
+    "positive_value": "bright_green",
+    "negative_value": "bright_red",
+    "total_value_prefix": "bold bright_yellow",
+    "separator": "bright_black",
+    "company_name": "bold bright_green",
+    "statement_type": "bold bright_blue",
+    "panel_border": "bright_blue",
+    "empty_value": "bright_black",
+}
+
+# Professional scheme - emphasizes important items without dim text
+PROFESSIONAL_SCHEME = {
+    "abstract_item": "bold blue",
+    "total_item": "bold bright_white",
+    "regular_item": "",
+    "low_confidence_item": "italic",  # Use italic instead of dim
+    "positive_value": "green",
+    "negative_value": "red", 
+    "total_value_prefix": "bold",
+    "separator": "blue",
+    "company_name": "bold bright_white",
+    "statement_type": "bold blue",
+    "panel_border": "white",
+    "empty_value": "bright_black",
+}
+
+# Minimal scheme - focuses on structure over color
+MINIMAL_SCHEME = {
+    "abstract_item": "bold",
+    "total_item": "bold bright_white",
+    "regular_item": "",
+    "low_confidence_item": "italic",
+    "positive_value": "",
+    "negative_value": "red",  # Keep red for negative values
+    "total_value_prefix": "bold",
+    "separator": "white",
+    "company_name": "bold",
+    "statement_type": "bold",
+    "panel_border": "white",
+    "empty_value": "bright_black",
+}
+
+# Color-blind friendly scheme
+ACCESSIBLE_SCHEME = {
+    "abstract_item": "bold blue",
+    "total_item": "bold bright_white underline",  # Use underline for emphasis
+    "regular_item": "",
+    "low_confidence_item": "italic",
+    "positive_value": "blue",  # Avoid green/red
+    "negative_value": "magenta",  # Avoid green/red
+    "total_value_prefix": "bold underline",
+    "separator": "white",
+    "company_name": "bold bright_white",
+    "statement_type": "bold blue",
+    "panel_border": "white",
+    "empty_value": "bright_black",
+}
+
+# SEC filing style - mimics actual printed filings
+FILING_SCHEME = {
+    "abstract_item": "bold",           # Major sections (ASSETS, LIABILITIES) - just bold
+    "total_item": "bold",              # Subtotals - bold only
+    "regular_item": "",                # Regular items - no styling
+    "low_confidence_item": "dim",      # Low confidence items - dimmed
+    "positive_value": "",              # Positive values - no color (like printed filings)
+    "negative_value": "",              # Negative values - no color (parentheses show negative)
+    "total_value_prefix": "bold",      # Total values - bold only
+    "separator": "dim",                # Table separators - dimmed
+    "company_name": "bold",            # Company name - just bold
+    "statement_type": "bold",          # Statement title - just bold
+    "panel_border": "white",           # Panel borders - white
+    "empty_value": "dim",              # Empty values - dimmed
+}
+
+# Available schemes
+SCHEMES: Dict[str, Dict[str, str]] = {
+    "default": DEFAULT_SCHEME,
+    "high_contrast": HIGH_CONTRAST_SCHEME,
+    "professional": PROFESSIONAL_SCHEME,
+    "minimal": MINIMAL_SCHEME,
+    "accessible": ACCESSIBLE_SCHEME,
+    "filing": FILING_SCHEME,
+}
+
+def get_color_scheme(scheme_name: str = "professional") -> Dict[str, str]:
+    """
+    Get a color scheme by name.
+
+    Args:
+        scheme_name: Name of the scheme (default, high_contrast, professional, minimal, accessible, filing)
+
+    Returns:
+        Dictionary of style mappings
+    """
+    return SCHEMES.get(scheme_name, PROFESSIONAL_SCHEME)
+
+# Environment variable support
+import os
+
+
+def get_current_scheme() -> Dict[str, str]:
+    """
+    Get the current color scheme based on environment variable or default.
+
+    Environment variable: EDGAR_FINANCIALS_COLOR_SCHEME
+    Values: default, high_contrast, professional, minimal, accessible, filing
+    """
+    scheme_name = os.environ.get("EDGAR_FINANCIALS_COLOR_SCHEME", "professional")
+    return get_color_scheme(scheme_name)
--- a/venv/lib/python3.10/site-packages/edgar/entity/tickers.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/tickers.py
@@ -0,0 +1,56 @@
+"""
+Ticker-related functionality for the entity package.
+This module re-exports ticker-related functions from edgar.reference.tickers.
+"""
+
+# We need to create our own implementation of these functions
+from functools import lru_cache
+
+import pandas as pd
+
+from edgar.httprequests import download_text
+from edgar.reference.tickers import find_cik, find_ticker, get_company_tickers, get_icon_from_ticker
+
+
+@lru_cache(maxsize=1)
+def get_ticker_to_cik_lookup():
+    """
+    Create a dictionary that maps from ticker symbol to CIK.
+    """
+    df = get_company_tickers()
+    ticker_to_cik = {}
+    for _, row in df.iterrows():
+        ticker_to_cik[row['ticker']] = row['cik']
+    return ticker_to_cik
+
+
+def _parse_cik_lookup_data(content):
+    """Parse CIK lookup data from content."""
+    return [
+        {
+            # for companies with : in the name
+            'name': ":".join(line.split(':')[:-2]),
+            'cik': int(line.split(':')[-2])
+        } for line in content.split("\n") if line != '']
+
+
+@lru_cache(maxsize=1)
+def get_cik_lookup_data() -> pd.DataFrame:
+    """
+    Get a dataframe of company/entity names and their cik
+    or a Dict of int(cik) to str(name)
+    DECADE CAPITAL MANAGEMENT LLC:0001426822:
+    DECADE COMPANIES INCOME PROPERTIES:0000775840:
+    """
+    content = download_text("https://www.sec.gov/Archives/edgar/cik-lookup-data.txt")
+    cik_lookup_df = pd.DataFrame(_parse_cik_lookup_data(content))
+    return cik_lookup_df
+
+__all__ = [
+    'get_icon_from_ticker',
+    'get_company_tickers',
+    'get_ticker_to_cik_lookup',
+    'get_cik_lookup_data',
+    'find_cik',
+    'find_ticker'
+]
--- a/venv/lib/python3.10/site-packages/edgar/entity/tools.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/tools.py
@@ -0,0 +1,17 @@
+from edgar import Company
+
+
+def income_statement(ticker:str, annual:bool=True, periods:int=4):
+    company = Company(ticker)
+    if company:
+        return company.income_statement(annual=annual, periods=periods)
+
+def balance_sheet(ticker:str, annual:bool=True, periods:int=4):
+    company = Company(ticker)
+    if company:
+        return company.balance_sheet(annual=annual, periods=periods)
+
+def cash_flow_statement(ticker:str, annual:bool=True, periods:int=4):
+    company = Company(ticker)
+    if company:
+        return company.cash_flow_statement(annual=annual, periods=periods)
--- a/venv/lib/python3.10/site-packages/edgar/entity/unit_handling.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/unit_handling.py
@@ -0,0 +1,419 @@
+"""
+Unit handling and normalization for financial facts.
+
+This module provides comprehensive unit normalization and conversion capabilities
+to address unit inconsistencies across different companies' SEC filings.
+
+Key features:
+- Currency unit normalization (USD, EUR, GBP, etc.)
+- Share-based unit standardization
+- Scale-aware unit matching
+- Unit compatibility checking
+- Error reporting with unit mismatch details
+
+Usage:
+    from edgar.entity.unit_handling import UnitNormalizer, UnitResult
+
+    # Normalize a unit
+    normalized = UnitNormalizer.normalize_unit("US DOLLAR")  # Returns "USD"
+
+    # Check unit compatibility
+    compatible = UnitNormalizer.are_compatible("USD", "DOLLARS")  # Returns True
+
+    # Get unit with error details
+    result = UnitNormalizer.get_normalized_value(fact, target_unit="USD")
+"""
+
+import logging
+from dataclasses import dataclass
+from enum import Enum
+from typing import Dict, List, Optional
+
+from edgar.entity.models import FinancialFact
+
+logger = logging.getLogger(__name__)
+
+
+class UnitType(Enum):
+    """Types of financial units."""
+    CURRENCY = "currency"
+    SHARES = "shares"
+    RATIO = "ratio"
+    BUSINESS = "business"
+    TIME = "time"
+    AREA = "area"
+    OTHER = "other"
+
+
+@dataclass
+class UnitResult:
+    """Result of unit normalization with error details."""
+    value: Optional[float]
+    normalized_unit: Optional[str]
+    original_unit: str
+    success: bool
+    error_reason: Optional[str] = None
+    scale_applied: Optional[int] = None
+    unit_type: Optional[UnitType] = None
+    suggestions: List[str] = None
+
+    def __post_init__(self):
+        if self.suggestions is None:
+            self.suggestions = []
+
+
+class UnitNormalizer:
+    """Comprehensive unit normalization for financial facts."""
+
+    # Currency unit mappings
+    CURRENCY_MAPPINGS = {
+        'USD': ['USD', 'US DOLLAR', 'DOLLARS', 'usd', 'US$', 'DOLLAR'],
+        'EUR': ['EUR', 'EURO', 'EUROS', 'eur', '€', 'EUROPEAN UNION EURO'],
+        'GBP': ['GBP', 'POUND', 'POUNDS', 'gbp', '£', 'BRITISH POUND', 'POUND STERLING'],
+        'JPY': ['JPY', 'YEN', 'yen', 'jpy', '¥', 'JAPANESE YEN'],
+        'CAD': ['CAD', 'CANADIAN DOLLAR', 'CANADIAN DOLLARS', 'cad'],
+        'CHF': ['CHF', 'SWISS FRANC', 'SWISS FRANCS', 'chf'],
+        'AUD': ['AUD', 'AUSTRALIAN DOLLAR', 'AUSTRALIAN DOLLARS', 'aud'],
+        'CNY': ['CNY', 'YUAN', 'CHINESE YUAN', 'cny', '¥'],
+    }
+
+    # Share unit mappings
+    SHARE_MAPPINGS = {
+        'shares': ['shares', 'share', 'SHARES', 'SHARE', 'STOCK', 'EQUITY'],
+        'shares_unit': ['shares_unit', 'share_unit', 'SHARES_UNIT'],
+        'partnership_unit': ['USD/PartnershipUnit', 'PartnershipUnit', 'partnership_unit']
+    }
+
+    # Ratio/dimensionless unit mappings
+    RATIO_MAPPINGS = {
+        'pure': ['pure', 'number', 'ratio', 'percent', '%', 'PURE', 'NUMBER'],
+        'basis_points': ['bp', 'bps', 'basis_points', 'BASIS_POINTS']
+    }
+
+    # Per-share combinations
+    PER_SHARE_MAPPINGS = {
+        'USD_per_share': ['USD/shares', 'USD per share', 'USD/share', 'usd/shares'],
+        'USD_per_share_unit': ['USD/shares_unit', 'USD per share unit', 'USD/share_unit']
+    }
+
+    # Business/operational unit mappings
+    BUSINESS_MAPPINGS = {
+        'customer': ['Customer', 'customer', 'CUSTOMER'],
+        'store': ['Store', 'store', 'STORE'],
+        'entity': ['Entity', 'entity', 'ENTITY'],
+        'segment': ['Segment', 'segment', 'SEGMENT', 'reportable_segment'],
+        'instrument': ['instrument', 'INSTRUMENT', 'financial_instrument'],
+        'contract': ['USD/Contract', 'contract', 'CONTRACT'],
+        'investment': ['USD/Investment', 'investment', 'INVESTMENT']
+    }
+
+    # Time-based unit mappings
+    TIME_MAPPINGS = {
+        'years': ['Year', 'years', 'YEAR', 'YEARS'],
+        'months': ['Month', 'months', 'MONTH', 'MONTHS'],
+        'days': ['Day', 'days', 'DAY', 'DAYS']
+    }
+
+    # Area unit mappings
+    AREA_MAPPINGS = {
+        'sqft': ['sqft', 'square_feet', 'SQFT', 'sq_ft'],
+        'sqm': ['sqm', 'square_meters', 'SQMETER', 'sq_m']
+    }
+
+    # Comprehensive mapping combining all categories
+    ALL_MAPPINGS = {
+        **CURRENCY_MAPPINGS,
+        **SHARE_MAPPINGS,
+        **RATIO_MAPPINGS,
+        **PER_SHARE_MAPPINGS,
+        **BUSINESS_MAPPINGS,
+        **TIME_MAPPINGS,
+        **AREA_MAPPINGS
+    }
+
+    # Reverse mapping for faster lookups
+    _REVERSE_MAPPING = None
+
+    @classmethod
+    def _build_reverse_mapping(cls) -> Dict[str, str]:
+        """Build reverse mapping from variant to normalized unit."""
+        if cls._REVERSE_MAPPING is not None:
+            return cls._REVERSE_MAPPING
+
+        reverse_map = {}
+        for normalized_unit, variants in cls.ALL_MAPPINGS.items():
+            for variant in variants:
+                reverse_map[variant.upper()] = normalized_unit
+
+        cls._REVERSE_MAPPING = reverse_map
+        return reverse_map
+
+    @classmethod
+    def normalize_unit(cls, unit: str) -> str:
+        """
+        Normalize a unit string to its canonical form.
+
+        Args:
+            unit: Raw unit string from SEC filing
+
+        Returns:
+            Normalized unit string
+
+        Example:
+            >>> UnitNormalizer.normalize_unit("US DOLLAR")
+            'USD'
+            >>> UnitNormalizer.normalize_unit("shares_unit")
+            'shares_unit'
+        """
+        if not unit:
+            return ""
+
+        reverse_map = cls._build_reverse_mapping()
+        normalized = reverse_map.get(unit.upper())
+
+        return normalized if normalized else unit
+
+    @classmethod
+    def get_unit_type(cls, unit: str) -> UnitType:
+        """
+        Determine the type of a unit.
+
+        Args:
+            unit: Unit string (normalized or raw)
+
+        Returns:
+            UnitType enum value
+        """
+        normalized = cls.normalize_unit(unit)
+
+        if normalized in cls.CURRENCY_MAPPINGS:
+            return UnitType.CURRENCY
+        elif normalized in cls.PER_SHARE_MAPPINGS:
+            # Per-share units are a special currency-like type (amount per share)
+            return UnitType.CURRENCY  # Treat per-share as currency-derived
+        elif normalized in cls.SHARE_MAPPINGS:
+            return UnitType.SHARES
+        elif normalized in cls.RATIO_MAPPINGS:
+            return UnitType.RATIO
+        elif normalized in cls.BUSINESS_MAPPINGS:
+            return UnitType.BUSINESS
+        elif normalized in cls.TIME_MAPPINGS:
+            return UnitType.TIME
+        elif normalized in cls.AREA_MAPPINGS:
+            return UnitType.AREA
+        else:
+            return UnitType.OTHER
+
+    @classmethod
+    def are_compatible(cls, unit1: str, unit2: str) -> bool:
+        """
+        Check if two units are compatible for calculations.
+
+        Args:
+            unit1: First unit
+            unit2: Second unit
+
+        Returns:
+            True if units are compatible
+        """
+        norm1 = cls.normalize_unit(unit1)
+        norm2 = cls.normalize_unit(unit2)
+
+        # Exact match
+        if norm1 == norm2:
+            return True
+
+        # Same unit type
+        type1 = cls.get_unit_type(norm1)
+        type2 = cls.get_unit_type(norm2)
+
+        if type1 == type2:
+            # Special cases for compatible unit types
+            if type1 == UnitType.CURRENCY:
+                # Regular currencies are compatible, but per-share must match exactly
+                if norm1 in cls.PER_SHARE_MAPPINGS or norm2 in cls.PER_SHARE_MAPPINGS:
+                    # Per-share units must match exactly (USD_per_share != USD_per_share_unit)
+                    return norm1 == norm2
+                return True  # Regular currencies could be converted
+            elif type1 == UnitType.SHARES:
+                # shares and shares_unit are compatible for some calculations
+                return norm1 in ['shares', 'shares_unit'] and norm2 in ['shares', 'shares_unit']
+
+        return False
+
+    @classmethod
+    def get_normalized_value(
+        cls,
+        fact: FinancialFact,
+        target_unit: Optional[str] = None,
+        apply_scale: bool = True,
+        strict_unit_match: bool = False
+    ) -> UnitResult:
+        """
+        Get a normalized value from a financial fact with detailed error reporting.
+
+        Args:
+            fact: FinancialFact to normalize
+            target_unit: Desired unit (if None, just normalize existing unit)
+            apply_scale: Whether to apply scale factor
+            strict_unit_match: If True, require exact unit match. If False, allow compatible units.
+
+        Returns:
+            UnitResult with value and metadata
+        """
+        if fact.numeric_value is None:
+            return UnitResult(
+                value=None,
+                normalized_unit=None,
+                original_unit=fact.unit,
+                success=False,
+                error_reason="No numeric value available"
+            )
+
+        original_unit = fact.unit or ""
+        normalized_unit = cls.normalize_unit(original_unit)
+        unit_type = cls.get_unit_type(normalized_unit)
+
+        # Apply scale factor if requested
+        value = fact.numeric_value
+        scale_applied = None
+        if apply_scale and fact.scale:
+            value *= fact.scale
+            scale_applied = fact.scale
+
+        # If no target unit specified, return normalized value
+        if target_unit is None:
+            return UnitResult(
+                value=value,
+                normalized_unit=normalized_unit,
+                original_unit=original_unit,
+                success=True,
+                scale_applied=scale_applied,
+                unit_type=unit_type
+            )
+
+        # Check compatibility with target unit
+        target_normalized = cls.normalize_unit(target_unit)
+
+        if normalized_unit == target_normalized:
+            # Exact match
+            return UnitResult(
+                value=value,
+                normalized_unit=target_normalized,
+                original_unit=original_unit,
+                success=True,
+                scale_applied=scale_applied,
+                unit_type=unit_type
+            )
+
+        elif not strict_unit_match and cls.are_compatible(normalized_unit, target_normalized):
+            # Compatible units - could potentially convert (only if not in strict mode)
+            suggestions = []
+            if cls.get_unit_type(normalized_unit) == UnitType.CURRENCY:
+                suggestions.append(f"Consider currency conversion from {normalized_unit} to {target_normalized}")
+
+            return UnitResult(
+                value=value,
+                normalized_unit=normalized_unit,  # Keep original, mark as compatible
+                original_unit=original_unit,
+                success=True,
+                scale_applied=scale_applied,
+                unit_type=unit_type,
+                suggestions=suggestions
+            )
+
+        else:
+            # Incompatible units
+            suggestions = cls._get_unit_suggestions(normalized_unit, target_normalized)
+
+            return UnitResult(
+                value=None,
+                normalized_unit=normalized_unit,
+                original_unit=original_unit,
+                success=False,
+                error_reason=f"Unit mismatch: {normalized_unit} is not compatible with {target_normalized}",
+                unit_type=unit_type,
+                suggestions=suggestions
+            )
+
+    @classmethod
+    def _get_unit_suggestions(cls, actual_unit: str, target_unit: str) -> List[str]:
+        """Generate helpful suggestions for unit mismatches."""
+        suggestions = []
+
+        actual_type = cls.get_unit_type(actual_unit)
+        target_type = cls.get_unit_type(target_unit)
+
+        if actual_type != target_type:
+            suggestions.append(f"Unit type mismatch: {actual_unit} is {actual_type.value}, "
+                             f"but {target_unit} is {target_type.value}")
+
+        # Specific suggestions based on unit types
+        if target_type == UnitType.CURRENCY and actual_type != UnitType.CURRENCY:
+            suggestions.append("Consider using a financial amount concept instead of a ratio/count")
+
+        elif target_type == UnitType.SHARES and actual_type != UnitType.SHARES:
+            suggestions.append("Consider using a share-based concept instead of a monetary amount")
+
+        # Alternative units in the same category
+        if actual_type == target_type:
+            if actual_type == UnitType.CURRENCY:
+                suggestions.append("Use currency conversion or specify the correct currency unit")
+            elif actual_type == UnitType.SHARES:
+                suggestions.append("Try using 'shares' instead of 'shares_unit' or vice versa")
+
+        return suggestions
+
+
+def apply_scale_factor(value: float, scale: Optional[int]) -> float:
+    """
+    Apply scale factor to a value.
+
+    Args:
+        value: Numeric value
+        scale: Scale factor (e.g., 1000 for thousands)
+
+    Returns:
+        Scaled value
+    """
+    if scale and scale != 1:
+        return value * scale
+    return value
+
+
+def format_unit_error(unit_result: UnitResult) -> str:
+    """
+    Format a unit error message for user display.
+
+    Args:
+        unit_result: UnitResult with error details
+
+    Returns:
+        Formatted error message
+    """
+    if unit_result.success:
+        return "No error"
+
+    message = f"Unit handling error: {unit_result.error_reason}"
+
+    if unit_result.suggestions:
+        message += "\n  Suggestions:\n"
+        for suggestion in unit_result.suggestions:
+            message += f"    - {suggestion}\n"
+
+    message += f"  Original unit: '{unit_result.original_unit}'"
+    if unit_result.normalized_unit != unit_result.original_unit:
+        message += f"  Normalized to: '{unit_result.normalized_unit}'"
+
+    return message
+
+
+# Legacy support - maintain compatibility with existing code
+def normalize_unit_legacy(unit: str) -> str:
+    """Legacy unit normalization for backward compatibility."""
+    return UnitNormalizer.normalize_unit(unit)
+
+
+def are_units_compatible_legacy(unit1: str, unit2: str) -> bool:
+    """Legacy unit compatibility check for backward compatibility."""
+    return UnitNormalizer.are_compatible(unit1, unit2)
--- a/venv/lib/python3.10/site-packages/edgar/entity/utils.py
+++ b/venv/lib/python3.10/site-packages/edgar/entity/utils.py
@@ -0,0 +1,132 @@
+"""
+Utility functions for entity processing.
+
+This module contains utility functions used throughout the entity package
+for data processing, normalization, and validation.
+"""
+from typing import TYPE_CHECKING, Union
+
+if TYPE_CHECKING:
+    import pyarrow
+
+from edgar.entity.constants import COMPANY_FORMS
+
+
+def has_company_filings(filings_form_array: 'pyarrow.ChunkedArray', max_filings: int = 50) -> bool:
+    """
+    Efficiently check if any form in the PyArrow ChunkedArray matches company-only forms.
+    Limited to checking the first max_filings entries for performance.
+
+    Args:
+        filings_form_array: PyArrow ChunkedArray containing form values
+        max_filings: Maximum number of filings to check
+
+    Returns:
+        True if any form matches a company form, False otherwise
+    """
+
+    # Early exit for empty arrays
+    if filings_form_array.null_count == filings_form_array.length:
+        return False
+
+    # Handle case with fewer than max_filings
+    total_filings = filings_form_array.length()
+    filings_to_check = min(total_filings, max_filings)
+
+    # Track how many we've checked so far
+    checked_count = 0
+
+    # Process chunks in the ChunkedArray until we hit our limit
+    for chunk in filings_form_array.chunks:
+        chunk_size = len(chunk)
+
+        # If this chunk would exceed our limit, slice it
+        if checked_count + chunk_size > filings_to_check:
+            # Only check remaining forms needed to reach filings_to_check
+            remaining = filings_to_check - checked_count
+            sliced_chunk = chunk.slice(0, remaining)
+
+            # Use safer iteration over array values
+            for i in range(len(sliced_chunk)):
+                # Get value safely, handling nulls
+                val = sliced_chunk.take([i]).to_pylist()[0]
+                if val is not None and val in COMPANY_FORMS:
+                    return True
+        else:
+            # Process full chunk safely
+            for val in chunk.to_pylist():
+                if val is not None and val in COMPANY_FORMS:
+                    return True
+
+        # Update count of checked filings
+        if checked_count + chunk_size > filings_to_check:
+            checked_count += (filings_to_check - checked_count)
+        else:
+            checked_count += chunk_size
+
+        # Stop if we've checked enough
+        if checked_count >= filings_to_check:
+            break
+
+    return False
+
+
+def normalize_cik(cik_or_identifier: Union[str, int]) -> int:
+    """
+    Normalize a CIK to an integer by removing leading zeros.
+
+    Args:
+        cik_or_identifier: CIK as string or integer
+
+    Returns:
+        Normalized CIK as integer
+
+    Raises:
+        ValueError: If the identifier cannot be converted to a valid CIK
+    """
+    if isinstance(cik_or_identifier, int):
+        return cik_or_identifier
+
+    if isinstance(cik_or_identifier, str):
+        # Remove leading zeros and convert to int
+        try:
+            return int(cik_or_identifier.lstrip('0') or '0')
+        except ValueError:
+            raise ValueError(f"Invalid CIK format: {cik_or_identifier}")
+
+    raise ValueError(f"CIK must be string or integer, got {type(cik_or_identifier)}")
+
+
+def validate_cik(cik: int) -> bool:
+    """
+    Validate that a CIK is within the expected range.
+
+    Args:
+        cik: CIK to validate
+
+    Returns:
+        True if CIK is valid, False otherwise
+    """
+    # CIKs are typically 1-10 digits, with valid range roughly 1 to 2,000,000,000
+    return isinstance(cik, int) and 1 <= cik <= 2_000_000_000
+
+
+def format_cik(cik: Union[str, int], zero_pad: int = 10) -> str:
+    """
+    Format a CIK with zero padding for display or API calls.
+
+    Args:
+        cik: CIK to format
+        zero_pad: Number of digits to pad to (default 10)
+
+    Returns:
+        Zero-padded CIK string
+
+    Example:
+        >>> format_cik(320193)
+        '0000320193'
+        >>> format_cik('320193', zero_pad=6)
+        '320193'
+    """
+    normalized_cik = normalize_cik(cik)
+    return str(normalized_cik).zfill(zero_pad)