Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/instance.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/parsers/instance.py
@@ -0,0 +1,768 @@
+"""
+Instance parser for XBRL documents.
+
+This module handles parsing of XBRL instance documents including facts, contexts,
+units, footnotes, and entity information extraction.
+"""
+
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Union
+
+from lxml import etree as ET
+
+from edgar.core import log
+from edgar.xbrl.core import NAMESPACES, classify_duration
+from edgar.xbrl.models import Context, Fact, XBRLProcessingError
+
+from .base import BaseParser
+
+
+class InstanceParser(BaseParser):
+    """Parser for XBRL instance documents."""
+
+    def __init__(self, contexts: Dict[str, Context], facts: Dict[str, Fact],
+                 units: Dict[str, Any], footnotes: Dict[str, Any],
+                 calculation_trees: Dict[str, Any], entity_info: Dict[str, Any],
+                 reporting_periods: List[Dict[str, Any]], context_period_map: Dict[str, str]):
+        """
+        Initialize instance parser with data structure references.
+
+        Args:
+            contexts: Reference to contexts dictionary
+            facts: Reference to facts dictionary
+            units: Reference to units dictionary
+            footnotes: Reference to footnotes dictionary
+            calculation_trees: Reference to calculation trees dictionary
+            entity_info: Reference to entity info dictionary
+            reporting_periods: Reference to reporting periods list
+            context_period_map: Reference to context period map
+        """
+        super().__init__()
+
+        # Store references to data structures
+        self.contexts = contexts
+        self.facts = facts
+        self.units = units
+        self.footnotes = footnotes
+        self.calculation_trees = calculation_trees
+        self.entity_info = entity_info
+        self.reporting_periods = reporting_periods
+        self.context_period_map = context_period_map
+
+        # DEI facts extracted during entity info processing
+        self.dei_facts: Dict[str, Fact] = {}
+
+    def _create_normalized_fact_key(self, element_id: str, context_ref: str, instance_id: int = None) -> str:
+        """
+        Create a normalized fact key using underscore format.
+
+        Args:
+            element_id: The element ID
+            context_ref: The context reference
+            instance_id: Optional instance ID for duplicate facts
+
+        Returns:
+            Normalized key in format: element_id_context_ref[_instance_id]
+        """
+        normalized_element_id = element_id
+        if ':' in element_id:
+            prefix, name = element_id.split(':', 1)
+            normalized_element_id = f"{prefix}_{name}"
+        if instance_id is not None:
+            return f"{normalized_element_id}_{context_ref}_{instance_id}"
+        return f"{normalized_element_id}_{context_ref}"
+
+    def parse_instance(self, file_path: Union[str, Path]) -> None:
+        """Parse instance document file and extract contexts, facts, and units."""
+        try:
+            content = Path(file_path).read_text()
+            self.parse_instance_content(content)
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing instance file {file_path}: {str(e)}") from e
+
+    def parse_instance_content(self, content: str) -> None:
+        """Parse instance document content and extract contexts, facts, and units."""
+        try:
+            # Use lxml's optimized parser with smart string handling and recovery mode
+            parser = ET.XMLParser(remove_blank_text=True, recover=True, huge_tree=True)
+
+            # Convert to bytes for faster parsing if not already
+            if isinstance(content, str):
+                content_bytes = content.encode('utf-8')
+            else:
+                content_bytes = content
+
+            # Parse content with optimized settings
+            root = ET.XML(content_bytes, parser)
+
+            # Extract data in optimal order (contexts first, then units, then facts)
+            # This ensures dependencies are resolved before they're needed
+            self._extract_contexts(root)
+            self._extract_units(root)
+            self._extract_facts(root)
+            self._extract_footnotes(root)
+
+            # Post-processing steps after all raw data is extracted
+            self._extract_entity_info()
+            self._build_reporting_periods()
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error parsing instance content: {str(e)}") from e
+
+    def count_facts(self, content: str) -> tuple:
+        """Count the number of facts in the instance document
+        This function counts both unique facts and total fact instances in the XBRL document.
+
+        Returns:
+            tuple: (unique_facts_count, total_fact_instances)
+        """
+
+        # Use lxml's optimized parser with smart string handling and recovery mode
+        parser = ET.XMLParser(remove_blank_text=True, recover=True, huge_tree=True)
+
+        # Convert to bytes for faster parsing if not already
+        if isinstance(content, str):
+            content_bytes = content.encode('utf-8')
+        else:
+            content_bytes = content
+
+        # Parse content with optimized settings
+        root = ET.XML(content_bytes, parser)
+
+        # Fast path to identify non-fact elements to skip
+        skip_tag_endings = {'}context', '}unit', '}schemaRef'}
+
+        # Track both total instances and unique facts
+        total_fact_instances = 0  # Total number of fact references in the document
+        unique_facts = set()      # Set of unique element_id + context_ref combinations
+        create_key = self._create_normalized_fact_key
+
+        # Define counting function
+        def count_element(element):
+            """Process a single element as a potential fact."""
+            nonlocal total_fact_instances
+
+            # Skip known non-fact elements
+            tag = element.tag
+            for ending in skip_tag_endings:
+                if tag.endswith(ending):
+                    return
+
+            # Get context reference - key check to identify facts
+            context_ref = element.get('contextRef')
+            if context_ref is None:
+                return
+
+            # Extract element namespace and name - optimized split
+            if '}' in tag:
+                namespace, element_name = tag.split('}', 1)
+                namespace = namespace[1:]  # Faster than strip('{')
+            else:
+                element_name = tag
+                namespace = None
+
+            # Get namespace prefix - cached for performance
+            prefix = None
+            for std_prefix, std_uri_base in NAMESPACES.items():
+                if namespace.startswith(std_uri_base):
+                    prefix = std_prefix
+                    break
+
+            if not prefix and namespace:
+                # Try to extract prefix from the namespace
+                parts = namespace.split('/')
+                prefix = parts[-1] if parts else ''
+
+            # Construct element ID with optimized string concatenation
+            if prefix:
+                element_id = f"{prefix}:{element_name}" if prefix else element_name
+            else:
+                element_id = element_name
+
+            # Create a normalized key using underscore format for consistency
+            normalized_key = create_key(element_id, context_ref)
+
+            # Track unique facts
+            unique_facts.add(normalized_key)
+
+            # Increment total instances count
+            total_fact_instances += 1
+
+        # Optimize traversal using lxml's iterchildren and iterdescendants if available
+        if hasattr(root, 'iterchildren'):
+            # Use lxml's optimized traversal methods
+            for child in root.iterchildren():
+                count_element(child)
+                # Process nested elements with optimized iteration
+                for descendant in child.iterdescendants():
+                    count_element(descendant)
+        else:
+            # Fallback for ElementTree
+            for child in root:
+                count_element(child)
+                for descendant in child.findall('.//*'):
+                    count_element(descendant)
+
+        # Return tuple of counts (unique_facts_count, total_fact_instances)
+        return len(unique_facts), total_fact_instances
+
+    def _extract_contexts(self, root: ET.Element) -> None:
+        """Extract contexts from instance document."""
+        try:
+            # Find all context elements
+            for context_elem in root.findall('.//{http://www.xbrl.org/2003/instance}context'):
+                context_id = context_elem.get('id')
+                if not context_id:
+                    continue
+
+                # Create context object
+                context = Context(context_id=context_id)
+
+                # Extract entity information
+                entity_elem = context_elem.find('.//{http://www.xbrl.org/2003/instance}entity')
+                if entity_elem is not None:
+                    # Get identifier
+                    identifier_elem = entity_elem.find('.//{http://www.xbrl.org/2003/instance}identifier')
+                    if identifier_elem is not None:
+                        scheme = identifier_elem.get('scheme', '')
+                        identifier = identifier_elem.text
+                        context.entity = {
+                            'scheme': scheme,
+                            'identifier': identifier
+                        }
+
+                    # Get segment dimensions if present
+                    segment_elem = entity_elem.find('.//{http://www.xbrl.org/2003/instance}segment')
+                    if segment_elem is not None:
+                        # Extract explicit dimensions
+                        for dim_elem in segment_elem.findall('.//{http://xbrl.org/2006/xbrldi}explicitMember'):
+                            dimension = dim_elem.get('dimension')
+                            value = dim_elem.text
+                            if dimension and value:
+                                context.dimensions[dimension] = value
+
+                        # Extract typed dimensions
+                        for dim_elem in segment_elem.findall('.//{http://xbrl.org/2006/xbrldi}typedMember'):
+                            dimension = dim_elem.get('dimension')
+                            if dimension:
+                                # The typed dimension value is the text content of the first child element
+                                for child in dim_elem:
+                                    # Extract the text content, which contains the actual typed member value
+                                    if child.text and child.text.strip():
+                                        context.dimensions[dimension] = child.text.strip()
+                                    else:
+                                        # Fallback to tag if no text content
+                                        context.dimensions[dimension] = child.tag
+                                    break
+
+                # Extract period information
+                period_elem = context_elem.find('.//{http://www.xbrl.org/2003/instance}period')
+                if period_elem is not None:
+                    # Check for instant period
+                    instant_elem = period_elem.find('.//{http://www.xbrl.org/2003/instance}instant')
+                    if instant_elem is not None and instant_elem.text:
+                        context.period = {
+                            'type': 'instant',
+                            'instant': instant_elem.text
+                        }
+
+                    # Check for duration period
+                    start_elem = period_elem.find('.//{http://www.xbrl.org/2003/instance}startDate')
+                    end_elem = period_elem.find('.//{http://www.xbrl.org/2003/instance}endDate')
+                    if start_elem is not None and end_elem is not None and start_elem.text and end_elem.text:
+                        context.period = {
+                            'type': 'duration',
+                            'startDate': start_elem.text,
+                            'endDate': end_elem.text
+                        }
+
+                    # Check for forever period
+                    forever_elem = period_elem.find('.//{http://www.xbrl.org/2003/instance}forever')
+                    if forever_elem is not None:
+                        context.period = {
+                            'type': 'forever'
+                        }
+
+                # Add context to registry
+                self.contexts[context_id] = context
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error extracting contexts: {str(e)}") from e
+
+    def _extract_units(self, root: ET.Element) -> None:
+        """Extract units from instance document."""
+        try:
+            # Find all unit elements
+            for unit_elem in root.findall('.//{http://www.xbrl.org/2003/instance}unit'):
+                unit_id = unit_elem.get('id')
+                if not unit_id:
+                    continue
+
+                # Check for measure
+                measure_elem = unit_elem.find('.//{http://www.xbrl.org/2003/instance}measure')
+                if measure_elem is not None and measure_elem.text:
+                    self.units[unit_id] = {
+                        'type': 'simple',
+                        'measure': measure_elem.text
+                    }
+                    continue
+
+                # Check for divide
+                divide_elem = unit_elem.find('.//{http://www.xbrl.org/2003/instance}divide')
+                if divide_elem is not None:
+                    # Get numerator
+                    numerator_elem = divide_elem.find('.//{http://www.xbrl.org/2003/instance}unitNumerator')
+                    denominator_elem = divide_elem.find('.//{http://www.xbrl.org/2003/instance}unitDenominator')
+
+                    if numerator_elem is not None and denominator_elem is not None:
+                        # Get measures
+                        numerator_measures = [elem.text for elem in numerator_elem.findall('.//{http://www.xbrl.org/2003/instance}measure') if elem.text]
+                        denominator_measures = [elem.text for elem in denominator_elem.findall('.//{http://www.xbrl.org/2003/instance}measure') if elem.text]
+
+                        self.units[unit_id] = {
+                            'type': 'divide',
+                            'numerator': numerator_measures,
+                            'denominator': denominator_measures
+                        }
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error extracting units: {str(e)}") from e
+
+    def _extract_facts(self, root: ET.Element) -> None:
+        """Extract facts from instance document."""
+        try:
+            # Get direct access to nsmap if using lxml (much faster than regex extraction)
+            if hasattr(root, 'nsmap'):
+                # Leverage lxml's native nsmap functionality
+                prefix_map = {uri: prefix for prefix, uri in root.nsmap.items() if prefix is not None}
+            else:
+                # Fallback for ElementTree - precompile regex patterns for namespace extraction
+                xmlns_pattern = '{http://www.w3.org/2000/xmlns/}'
+                prefix_map = {}
+
+                # Extract namespace declarations from root
+                for attr_name, attr_value in root.attrib.items():
+                    if attr_name.startswith(xmlns_pattern) or attr_name.startswith('xmlns:'):
+                        # Extract the prefix more efficiently
+                        if attr_name.startswith(xmlns_pattern):
+                            prefix = attr_name[len(xmlns_pattern):]
+                        else:
+                            prefix = attr_name.split(':', 1)[1]
+                        prefix_map[attr_value] = prefix
+
+            # Initialize counters and tracking
+            fact_count = 0
+            facts_dict = {}
+            base_keys = {}
+
+            # Fast path to identify non-fact elements to skip - compile as set for O(1) lookup
+            skip_tag_endings = {
+                'schemaRef',
+                'roleRef',
+                'arcroleRef',
+                'linkbaseRef',
+                'context',
+                'unit'
+            }
+
+            def process_element(element):
+                """Process a single element as a potential fact."""
+                nonlocal fact_count
+
+                # Skip annotation nodes and other non element nodes
+                if not ET.iselement(element):
+                    return
+                # Skip known non-fact elements - faster check with set membership
+                # If the tag is not a string, try calling () to get the string value (in rare cases)
+                if callable(element.tag):
+                    if isinstance(element, ET._Comment):
+                        return
+                    if not element.values():
+                        return
+                tag = element.tag
+                for ending in skip_tag_endings:
+                    if tag.endswith(ending):
+                        return
+
+                # Get context reference - key check to identify facts
+                context_ref = element.get('contextRef')
+                if not context_ref:
+                    return
+
+                # Get fact ID if present (for footnote linkage)
+                fact_id = element.get('id')
+
+                # Extract element namespace and name - optimized split
+                if '}' in tag:
+                    namespace, element_name = tag.split('}', 1)
+                    namespace = namespace[1:]  # Faster than strip('{')
+
+                    # Try to extract prefix from the namespace
+                    prefix = prefix_map.get(namespace)
+                    if not prefix:
+                        parts = namespace.split('/')
+                        prefix = parts[-1] if parts else ''
+                else:
+                    element_name = tag
+                    prefix = ''
+
+                # Construct element ID with optimized string concatenation
+                element_id = f"{prefix}:{element_name}" if prefix else element_name
+
+                # Get unit reference
+                unit_ref = element.get('unitRef')
+
+                # Get value - optimize string handling
+                value = element.text
+                if not value or not value.strip():
+                    # Only check children if text is empty - use direct iteration for speed
+                    for sub_elem in element:
+                        sub_text = sub_elem.text
+                        if sub_text and sub_text.strip():
+                            value = sub_text
+                            break
+
+                # Optimize string handling - inline conditional
+                value = value.strip() if value else ""
+
+                # Get decimals attribute - direct access
+                decimals = element.get('decimals')
+
+                # Optimize numeric conversion with faster try/except
+                numeric_value = None
+                if value:
+                    try:
+                        numeric_value = float(value)
+                    except (ValueError, TypeError):
+                        pass
+
+                # Create base key for duplicate detection
+                base_key = self._create_normalized_fact_key(element_id, context_ref)
+
+                # Handle duplicates
+                instance_id = None
+                if base_key in base_keys:
+                    # This is a duplicate - convert existing fact to use instance_id if needed
+                    if base_key in facts_dict:
+                        existing_fact = facts_dict[base_key]
+                        # Move existing fact to new key with instance_id=0
+                        del facts_dict[base_key]
+                        existing_fact.instance_id = 0
+                        facts_dict[self._create_normalized_fact_key(element_id, context_ref, 0)] = existing_fact
+                    # Add new fact with next instance_id
+                    instance_id = len(base_keys[base_key])
+                    base_keys[base_key].append(True)
+                else:
+                    # First instance of this fact
+                    base_keys[base_key] = [True]
+
+                # Create fact object
+                fact = Fact(
+                    element_id=element_id,
+                    context_ref=context_ref,
+                    value=value,
+                    unit_ref=unit_ref,
+                    decimals=decimals,
+                    numeric_value=numeric_value,
+                    instance_id=instance_id,
+                    fact_id=fact_id
+                )
+
+                # Store fact with appropriate key
+                key = self._create_normalized_fact_key(element_id, context_ref, instance_id)
+                facts_dict[key] = fact
+                fact_count += 1
+
+            # Use lxml's optimized traversal methods
+            if hasattr(root, 'iterchildren'):
+                # Use lxml's optimized traversal methods
+                for child in root.iterchildren():
+                    process_element(child)
+                    # Process nested elements with optimized iteration
+                    for descendant in child.iterdescendants():
+                        process_element(descendant)
+            else:
+                # Fallback for ElementTree
+                for child in root:
+                    process_element(child)
+                    for descendant in child.findall('.//*'):
+                        process_element(descendant)
+
+            # Update instance facts
+            self.facts.update(facts_dict)
+
+            log.debug(f"Extracted {fact_count} facts ({len(base_keys)} unique fact identifiers)")
+
+        except Exception as e:
+            raise XBRLProcessingError(f"Error extracting facts: {str(e)}") from e
+
+    def _extract_footnotes(self, root: ET.Element) -> None:
+        """Extract footnotes from instance document.
+
+        Footnotes in XBRL are linked to facts via footnoteLink elements that contain:
+        1. footnote elements with the actual text content
+        2. footnoteArc elements that connect fact IDs to footnote IDs
+        """
+        try:
+            from edgar.xbrl.models import Footnote
+
+            # Find all footnoteLink elements
+            for footnote_link in root.findall('.//{http://www.xbrl.org/2003/linkbase}footnoteLink'):
+                # First, extract all footnote definitions
+                for footnote_elem in footnote_link.findall('{http://www.xbrl.org/2003/linkbase}footnote'):
+                    # Try both 'id' and 'xlink:label' attributes
+                    footnote_id = footnote_elem.get('id') or footnote_elem.get('{http://www.w3.org/1999/xlink}label')
+                    if not footnote_id:
+                        continue
+
+                    # Get footnote attributes
+                    lang = footnote_elem.get('{http://www.w3.org/XML/1998/namespace}lang', 'en-US')
+                    role = footnote_elem.get('{http://www.w3.org/1999/xlink}role')
+
+                    # Extract text content, handling XHTML formatting
+                    footnote_text = ""
+                    # Check for XHTML content
+                    xhtml_divs = footnote_elem.findall('.//{http://www.w3.org/1999/xhtml}div')
+                    if xhtml_divs:
+                        # Concatenate all text within XHTML elements
+                        for div in xhtml_divs:
+                            footnote_text += "".join(div.itertext()).strip()
+                    else:
+                        # Fall back to direct text content
+                        footnote_text = "".join(footnote_elem.itertext()).strip()
+
+                    # Create Footnote object
+                    footnote = Footnote(
+                        footnote_id=footnote_id,
+                        text=footnote_text,
+                        lang=lang,
+                        role=role,
+                        related_fact_ids=[]
+                    )
+                    self.footnotes[footnote_id] = footnote
+
+                # Second, process footnoteArc elements to link facts to footnotes
+                for arc_elem in footnote_link.findall('{http://www.xbrl.org/2003/linkbase}footnoteArc'):
+                    fact_id = arc_elem.get('{http://www.w3.org/1999/xlink}from')
+                    footnote_id = arc_elem.get('{http://www.w3.org/1999/xlink}to')
+
+                    if fact_id and footnote_id:
+                        # Add fact ID to footnote's related facts
+                        if footnote_id in self.footnotes:
+                            self.footnotes[footnote_id].related_fact_ids.append(fact_id)
+                        else:
+                            log.warning(f"Footnote arc references undefined footnote: {footnote_id}")
+
+                        # Also update the fact's footnotes list if we can find it
+                        # This requires finding the fact by its fact_id
+                        for fact in self.facts.values():
+                            if fact.fact_id == fact_id:
+                                if footnote_id not in fact.footnotes:
+                                    fact.footnotes.append(footnote_id)
+                                break
+
+            log.debug(f"Extracted {len(self.footnotes)} footnotes")
+
+        except Exception as e:
+            # Log the error but don't fail - footnotes are optional
+            log.warning(f"Error extracting footnotes: {str(e)}")
+
+    def _extract_entity_info(self) -> None:
+        """Extract entity information from contexts and DEI facts."""
+        try:
+            # Extract CIK/identifier from first context
+            identifier = None
+            if self.contexts:
+                first = next(iter(self.contexts.values()))
+                ident = first.entity.get('identifier')
+                if ident and ident.isdigit():
+                    identifier = ident.lstrip('0')
+
+            # Collect all DEI facts into a dict: concept -> Fact
+            self.dei_facts: Dict[str, Fact] = {}
+            for fact in self.facts.values():
+                eid = fact.element_id
+                if eid.startswith('dei:'):
+                    concept = eid.split(':', 1)[1]
+                elif eid.startswith('dei_'):
+                    concept = eid.split('_', 1)[1]
+                else:
+                    continue
+                self.dei_facts[concept] = fact
+
+            # Helper: get the first available DEI fact value
+            def get_dei(*names):
+                for n in names:
+                    f = self.dei_facts.get(n)
+                    if f:
+                        return f.value
+                return None
+
+            # Build entity_info preserving existing keys
+            self.entity_info.update({
+                'entity_name':             get_dei('EntityRegistrantName'),
+                'ticker':                  get_dei('TradingSymbol'),
+                'identifier':              identifier,
+                'document_type':           get_dei('DocumentType'),
+                'reporting_end_date':      None,
+                'document_period_end_date':get_dei('DocumentPeriodEndDate'),
+                'fiscal_year':             get_dei('DocumentFiscalYearFocus','FiscalYearFocus','FiscalYear'),
+                'fiscal_period':           get_dei('DocumentFiscalPeriodFocus','FiscalPeriodFocus'),
+                'fiscal_year_end_month':   None,
+                'fiscal_year_end_day':     None,
+                'annual_report':           False,
+                'quarterly_report':        False,
+                'amendment':               False,
+            })
+
+            # Determine reporting_end_date from contexts
+            for ctx in self.contexts.values():
+                period = getattr(ctx, 'period', {})
+                if period.get('type') == 'instant':
+                    ds = period.get('instant')
+                    if ds:
+                        try:
+                            dt_obj = datetime.strptime(ds, '%Y-%m-%d').date()
+                            curr = self.entity_info['reporting_end_date']
+                            if curr is None or dt_obj > curr:
+                                self.entity_info['reporting_end_date'] = dt_obj
+                        except Exception:
+                            pass
+
+            # Parse fiscal year end date into month/day
+            fye = get_dei('CurrentFiscalYearEndDate','FiscalYearEnd')
+            if fye:
+                try:
+                    s = fye
+                    if s.startswith('--'):
+                        s = s[2:]
+                    if '-' in s:
+                        m, d = s.split('-', 1)
+                        if m.isdigit() and d.isdigit():
+                            self.entity_info['fiscal_year_end_month'] = int(m)
+                            self.entity_info['fiscal_year_end_day'] = int(d)
+                except Exception:
+                    pass
+
+            # Flags based on document_type
+            dt_val = self.entity_info['document_type'] or ''
+            self.entity_info['annual_report']    = (dt_val == '10-K')
+            self.entity_info['quarterly_report'] = (dt_val == '10-Q')
+            self.entity_info['amendment']        = ('/A' in dt_val)
+
+            log.debug(f"Entity info: {self.entity_info}")
+        except Exception as e:
+            log.warning(f"Warning: Error extracting entity info: {str(e)}")
+
+    def _build_reporting_periods(self) -> None:
+        """Build reporting periods from contexts."""
+        try:
+            # Clear existing periods
+            self.reporting_periods.clear()
+            self.context_period_map.clear()
+
+            # Collect unique periods from contexts
+            instant_periods = {}
+            duration_periods = {}
+
+            for context_id, context in self.contexts.items():
+                if 'period' in context.model_dump() and 'type' in context.period:
+                    period_type = context.period.get('type')
+
+                    if period_type == 'instant':
+                        date_str = context.period.get('instant')
+                        if date_str:
+                            if date_str not in instant_periods:
+                                instant_periods[date_str] = []
+
+                            # Add context ID to this period
+                            instant_periods[date_str].append(context_id)
+
+                            # Map context to period key
+                            period_key = f"instant_{date_str}"
+                            self.context_period_map[context_id] = period_key
+
+                    elif period_type == 'duration':
+                        start_date = context.period.get('startDate')
+                        end_date = context.period.get('endDate')
+                        if start_date and end_date:
+                            duration_key = f"{start_date}_{end_date}"
+                            if duration_key not in duration_periods:
+                                duration_periods[duration_key] = []
+
+                            # Add context ID to this period
+                            duration_periods[duration_key].append(context_id)
+
+                            # Map context to period key
+                            period_key = f"duration_{start_date}_{end_date}"
+                            self.context_period_map[context_id] = period_key
+
+            # Process instant periods
+            for date_str, context_ids in instant_periods.items():
+                try:
+                    date_obj = datetime.strptime(date_str, '%Y-%m-%d').date()
+                    formatted_date = date_obj.strftime('%B %d, %Y')
+
+                    period = {
+                        'type': 'instant',
+                        'date': date_str,
+                        'date_obj': date_obj,
+                        'label': formatted_date,
+                        'context_ids': context_ids,
+                        'key': f"instant_{date_str}"
+                    }
+                    self.reporting_periods.append(period)
+                except (ValueError, TypeError):
+                    # Skip invalid dates
+                    continue
+
+            # Process duration periods
+            for period_key, context_ids in duration_periods.items():
+                start_date, end_date = period_key.split('_')
+                try:
+                    start_obj = datetime.strptime(start_date, '%Y-%m-%d').date()
+                    end_obj = datetime.strptime(end_date, '%Y-%m-%d').date()
+                    formatted_start = start_obj.strftime('%B %d, %Y')
+                    formatted_end = end_obj.strftime('%B %d, %Y')
+
+                    # Calculate duration in days
+                    days = (end_obj - start_obj).days
+
+                    # Determine period type based on duration
+                    period_description = classify_duration(days)
+
+                    period = {
+                        'type': 'duration',
+                        'start_date': start_date,
+                        'end_date': end_date,
+                        'start_obj': start_obj,
+                        'end_obj': end_obj,
+                        'days': days,
+                        'period_type': period_description,
+                        'label': f"{period_description}: {formatted_start} to {formatted_end}",
+                        'context_ids': context_ids,
+                        'key': f"duration_{start_date}_{end_date}"
+                    }
+                    self.reporting_periods.append(period)
+                except (ValueError, TypeError):
+                    # Skip invalid dates
+                    continue
+
+            # Sort periods by date (most recent first)
+            self.reporting_periods.sort(key=lambda p: p['date_obj'] if p['type'] == 'instant' else p['end_obj'], reverse=True)
+
+            # Debug printout to verify periods are extracted
+            if len(self.reporting_periods) > 0:
+                log.debug(f"Found {len(self.reporting_periods)} reporting periods.")
+                log.debug(f"First period: {self.reporting_periods[0]['label']}")
+            else:
+                log.debug("Warning: No reporting periods found!")
+
+            # Debug context period map
+            log.debug(f"Context period map has {len(self.context_period_map)} entries.")
+
+        except Exception as e:
+            # Log error but don't fail
+            log.debug(f"Warning: Error building reporting periods: {str(e)}")
+            self.reporting_periods.clear()