edgartools/venv/lib/python3.10/site-packages/edgar/xbrl/stitching/periods.py

"""
XBRL Statement Stitching - Period Optimization (Refactored)

This module provides functionality to determine optimal periods for stitching
statements across multiple XBRL filings, handling period selection and
fiscal period matching.

Refactored to use a clean class-based architecture for better maintainability,
testability, and extensibility.
"""

import logging
from dataclasses import dataclass
from datetime import date
from typing import Any, Dict, List, Optional, Tuple

from edgar.xbrl.core import format_date, parse_date
from edgar.xbrl.xbrl import XBRL

logger = logging.getLogger(__name__)


@dataclass
class PeriodSelectionConfig:
    """Configuration for period selection behavior"""

    # Duration ranges for different period types
    annual_duration_range: Tuple[int, int] = (350, 380)
    quarterly_duration_range: Tuple[int, int] = (80, 100)
    q2_ytd_range: Tuple[int, int] = (175, 190)
    q3_ytd_range: Tuple[int, int] = (260, 285)
    q4_annual_range: Tuple[int, int] = (350, 380)

    # Target durations for optimization
    target_annual_days: int = 365
    target_quarterly_days: int = 90
    target_q2_ytd_days: int = 180
    target_q3_ytd_days: int = 270

    # Behavior flags
    require_exact_matches: bool = True
    allow_fallback_when_no_doc_date: bool = True
    max_periods_default: int = 8


class PeriodMatcher:
    """Handles exact period matching logic"""

    def __init__(self, config: PeriodSelectionConfig):
        self.config = config

    def find_exact_instant_match(self, periods: List[Dict], target_date: date) -> Optional[Dict]:
        """Find instant period that exactly matches target date"""
        for period in periods:
            try:
                period_date = parse_date(period['date'])
                if period_date == target_date:
                    return period
            except (ValueError, TypeError) as e:
                logger.warning("Failed to parse period date '%s': %s", period.get('date'), e)
                continue
        return None

    def find_exact_duration_match(self, periods: List[Dict], target_date: date) -> Optional[Dict]:
        """Find duration period that ends exactly on target date"""
        for period in periods:
            try:
                end_date = parse_date(period['end_date'])
                if end_date == target_date:
                    return period
            except (ValueError, TypeError) as e:
                logger.warning("Failed to parse period end date '%s': %s", period.get('end_date'), e)
                continue
        return None

    def filter_by_duration_range(self, periods: List[Dict], min_days: int, max_days: int, target_days: int) -> List[Dict]:
        """Filter periods by duration and sort by proximity to target"""
        filtered_periods = []

        for period in periods:
            duration_days = period.get('duration_days')
            if duration_days is None:
                try:
                    start_date = parse_date(period['start_date'])
                    end_date = parse_date(period['end_date'])
                    duration_days = (end_date - start_date).days
                    period = period.copy()
                    period['duration_days'] = duration_days
                except (ValueError, TypeError) as e:
                    logger.warning("Failed to calculate duration for period: %s", e)
                    continue

            if min_days <= duration_days <= max_days:
                filtered_periods.append(period)

        # Sort by proximity to target duration
        filtered_periods.sort(key=lambda x: abs(x['duration_days'] - target_days))
        return filtered_periods


class FiscalPeriodClassifier:
    """Classifies and filters periods based on fiscal information"""

    def __init__(self, config: PeriodSelectionConfig):
        self.config = config

    def classify_annual_periods(self, periods: List[Dict]) -> List[Dict]:
        """Identify annual periods (350-380 days)"""
        min_days, max_days = self.config.annual_duration_range
        target_days = self.config.target_annual_days

        annual_periods = []
        for period in periods:
            duration_days = period.get('duration_days', 0)
            if min_days <= duration_days <= max_days:
                annual_periods.append(period)

        # Sort by proximity to target annual duration
        annual_periods.sort(key=lambda x: abs(x.get('duration_days', 0) - target_days))
        return annual_periods

    def classify_quarterly_periods(self, periods: List[Dict]) -> List[Dict]:
        """Identify quarterly periods (80-100 days)"""
        min_days, max_days = self.config.quarterly_duration_range
        target_days = self.config.target_quarterly_days

        quarterly_periods = []
        for period in periods:
            duration_days = period.get('duration_days', 0)
            if min_days <= duration_days <= max_days:
                quarterly_periods.append(period)

        # Sort by proximity to target quarterly duration
        quarterly_periods.sort(key=lambda x: abs(x.get('duration_days', 0) - target_days))
        return quarterly_periods

    def classify_ytd_periods(self, periods: List[Dict], fiscal_period: str) -> List[Dict]:
        """Identify YTD periods based on fiscal quarter"""
        if fiscal_period not in ['Q2', 'Q3', 'Q4']:
            return []

        # Get expected duration range for this fiscal period
        duration_ranges = {
            'Q2': self.config.q2_ytd_range,
            'Q3': self.config.q3_ytd_range,
            'Q4': self.config.q4_annual_range
        }

        target_durations = {
            'Q2': self.config.target_q2_ytd_days,
            'Q3': self.config.target_q3_ytd_days,
            'Q4': self.config.target_annual_days
        }

        min_days, max_days = duration_ranges[fiscal_period]
        target_days = target_durations[fiscal_period]

        ytd_periods = []
        for period in periods:
            duration_days = period.get('duration_days', 0)
            if min_days <= duration_days <= max_days:
                ytd_periods.append(period)

        # Sort by proximity to target duration
        ytd_periods.sort(key=lambda x: abs(x.get('duration_days', 0) - target_days))
        return ytd_periods

    def get_expected_durations(self, fiscal_period: str) -> Dict[str, Tuple[int, int]]:
        """Get expected duration ranges for fiscal period"""
        if fiscal_period == 'FY':
            return {'annual': self.config.annual_duration_range}
        elif fiscal_period in ['Q1', 'Q2', 'Q3', 'Q4']:
            durations = {'quarterly': self.config.quarterly_duration_range}
            if fiscal_period == 'Q2':
                durations['ytd'] = self.config.q2_ytd_range
            elif fiscal_period == 'Q3':
                durations['ytd'] = self.config.q3_ytd_range
            elif fiscal_period == 'Q4':
                durations['ytd'] = self.config.q4_annual_range
            return durations
        else:
            return {}


class StatementTypeSelector:
    """Handles statement-specific period selection logic"""

    def __init__(self, matcher: PeriodMatcher, classifier: FiscalPeriodClassifier):
        self.matcher = matcher
        self.classifier = classifier

    def select_balance_sheet_periods(self, xbrl: XBRL, doc_period_end_date: Optional[date]) -> List[Dict]:
        """Select instant periods for balance sheets"""
        # Filter for instant periods only
        instant_periods = [p for p in xbrl.reporting_periods if p['type'] == 'instant']

        if not instant_periods:
            return []

        # If we have document_period_end_date, find exact match
        if doc_period_end_date:
            exact_match = self.matcher.find_exact_instant_match(instant_periods, doc_period_end_date)
            if exact_match:
                return [exact_match]
            else:
                # No exact match found - don't use fallback to prevent fiscal year boundary issues
                logger.info("No exact instant period match found for %s", doc_period_end_date)
                return []

        # No document_period_end_date available - use most recent period
        instant_periods.sort(key=lambda x: x['date'], reverse=True)
        return [instant_periods[0]]

    def select_income_statement_periods(self, xbrl: XBRL, doc_period_end_date: Optional[date],
                                      fiscal_period: str) -> List[Dict]:
        """Select duration periods for income statements"""
        return self._select_duration_periods(xbrl, doc_period_end_date, fiscal_period)

    def select_cash_flow_periods(self, xbrl: XBRL, doc_period_end_date: Optional[date],
                               fiscal_period: str) -> List[Dict]:
        """Select duration periods for cash flow statements"""
        return self._select_duration_periods(xbrl, doc_period_end_date, fiscal_period)

    def _select_duration_periods(self, xbrl: XBRL, doc_period_end_date: Optional[date],
                               fiscal_period: str) -> List[Dict]:
        """Common logic for selecting duration periods"""
        # Filter for duration periods only
        duration_periods = [p for p in xbrl.reporting_periods if p['type'] == 'duration']

        if not duration_periods:
            return []

        # Add duration_days to all periods
        enriched_periods = []
        for period in duration_periods:
            try:
                start_date = parse_date(period['start_date'])
                end_date = parse_date(period['end_date'])
                period_copy = period.copy()
                period_copy['duration_days'] = (end_date - start_date).days
                enriched_periods.append(period_copy)
            except (ValueError, TypeError) as e:
                logger.warning("Failed to parse period dates: %s", e)
                continue

        if not enriched_periods:
            return []

        # If we have document_period_end_date, find periods that end exactly on that date
        if doc_period_end_date:
            matching_periods = []
            for period in enriched_periods:
                try:
                    end_date = parse_date(period['end_date'])
                    if end_date == doc_period_end_date:
                        matching_periods.append(period)
                except (ValueError, TypeError):
                    continue

            if matching_periods:
                return self._select_appropriate_durations(matching_periods, fiscal_period)
            else:
                # No exact match found - don't use fallback
                logger.info("No exact duration period match found for %s", doc_period_end_date)
                return []

        # No document_period_end_date - use fallback logic
        return self._select_fallback_periods(enriched_periods, fiscal_period)

    def _select_appropriate_durations(self, periods: List[Dict], fiscal_period: str) -> List[Dict]:
        """Select appropriate duration periods based on fiscal period"""
        selected_periods = []

        is_annual = fiscal_period == 'FY'

        if is_annual:
            # For annual reports, select annual periods
            annual_periods = self.classifier.classify_annual_periods(periods)
            if annual_periods:
                selected_periods.append(annual_periods[0])
        else:
            # For quarterly reports, select quarterly period
            quarterly_periods = self.classifier.classify_quarterly_periods(periods)
            if quarterly_periods:
                selected_periods.append(quarterly_periods[0])

            # Also select YTD period if appropriate
            ytd_periods = self.classifier.classify_ytd_periods(periods, fiscal_period)
            if ytd_periods:
                selected_periods.append(ytd_periods[0])

        return selected_periods

    def _select_fallback_periods(self, periods: List[Dict], fiscal_period: str) -> List[Dict]:
        """Fallback period selection when no document_period_end_date is available"""
        is_annual = fiscal_period == 'FY'

        if is_annual:
            # For annual reports, prefer periods closest to 365 days
            annual_periods = self.classifier.classify_annual_periods(periods)
            if annual_periods:
                # Sort by end date and take the most recent
                annual_periods.sort(key=lambda x: x['end_date'], reverse=True)
                return [annual_periods[0]]
        else:
            # For quarterly reports, prefer quarterly duration
            quarterly_periods = self.classifier.classify_quarterly_periods(periods)
            selected_periods = []

            if quarterly_periods:
                quarterly_periods.sort(key=lambda x: x['end_date'], reverse=True)
                selected_periods.append(quarterly_periods[0])

            # Add YTD period if available
            ytd_periods = self.classifier.classify_ytd_periods(periods, fiscal_period)
            if ytd_periods:
                ytd_periods.sort(key=lambda x: x['end_date'], reverse=True)
                selected_periods.append(ytd_periods[0])

            return selected_periods

        # If no appropriate periods found, return the most recent period
        periods.sort(key=lambda x: x['end_date'], reverse=True)
        return [periods[0]]


class PeriodMetadataEnricher:
    """Handles period metadata enrichment"""

    def enrich_period_metadata(self, period: Dict, xbrl_index: int, entity_info: Dict,
                              doc_period_end_date: Optional[date], fiscal_period: str,
                              fiscal_year: str) -> Dict[str, Any]:
        """Add comprehensive metadata to period"""
        period_metadata = {
            'xbrl_index': xbrl_index,
            'period_key': period['key'],
            'period_label': period['label'],
            'period_type': period['type'],
            'entity_info': entity_info,
            'doc_period_end_date': doc_period_end_date,
            'fiscal_period': fiscal_period,
            'fiscal_year': fiscal_year
        }

        # Add date information
        if period['type'] == 'instant':
            period_metadata['date'] = parse_date(period['date'])
            period_metadata['display_date'] = format_date(period_metadata['date'])
        else:  # duration
            period_metadata['start_date'] = parse_date(period['start_date'])
            period_metadata['end_date'] = parse_date(period['end_date'])
            period_metadata['duration_days'] = period.get('duration_days',
                (period_metadata['end_date'] - period_metadata['start_date']).days)
            period_metadata['display_date'] = format_date(period_metadata['end_date'])

        return period_metadata


class PeriodDeduplicator:
    """Handles period deduplication and sorting"""

    def deduplicate_periods(self, periods: List[Dict], statement_type: str) -> List[Dict]:
        """Remove duplicate periods using exact date matching"""
        filtered_periods = []

        for period in periods:
            too_close = False
            for included_period in filtered_periods:
                # Skip if period types don't match
                if period['period_type'] != included_period['period_type']:
                    continue

                # Calculate date difference
                if period['period_type'] == 'instant':
                    date1 = period['date']
                    date2 = included_period['date']
                else:  # duration
                    date1 = period['end_date']
                    date2 = included_period['end_date']

                # Periods are duplicates if they have exactly the same date
                if date1 == date2:
                    too_close = True
                    break

            if not too_close:
                filtered_periods.append(period)

        return filtered_periods

    def sort_periods_chronologically(self, periods: List[Dict], statement_type: str) -> List[Dict]:
        """Sort periods by appropriate date field"""
        if statement_type == 'BalanceSheet':
            return sorted(periods, key=lambda x: x['date'], reverse=True)
        else:
            return sorted(periods, key=lambda x: x['end_date'], reverse=True)

    def limit_periods(self, periods: List[Dict], max_periods: int) -> List[Dict]:
        """Limit to maximum number of periods"""
        return periods[:max_periods] if len(periods) > max_periods else periods


class PeriodOptimizer:
    """Main orchestrator for period optimization"""

    def __init__(self, config: Optional[PeriodSelectionConfig] = None):
        self.config = config or PeriodSelectionConfig()
        self.matcher = PeriodMatcher(self.config)
        self.classifier = FiscalPeriodClassifier(self.config)
        self.selector = StatementTypeSelector(self.matcher, self.classifier)
        self.enricher = PeriodMetadataEnricher()
        self.deduplicator = PeriodDeduplicator()

    def determine_optimal_periods(self, xbrl_list: List[XBRL], statement_type: str,
                                 max_periods: Optional[int] = None) -> List[Dict[str, Any]]:
        """Main entry point - orchestrates the entire process"""
        max_periods = max_periods or self.config.max_periods_default

        # Step 1: Extract periods from all XBRLs
        all_periods = self._extract_all_periods(xbrl_list, statement_type)

        # Step 2: Enrich with metadata
        enriched_periods = self._enrich_with_metadata(all_periods)

        # Step 3: Deduplicate, sort, and limit
        final_periods = self._deduplicate_and_limit(enriched_periods, max_periods, statement_type)

        return final_periods

    def _extract_all_periods(self, xbrl_list: List[XBRL], statement_type: str) -> List[Dict[str, Any]]:
        """Extract periods from all XBRL objects"""
        all_periods = []

        for i, xbrl in enumerate(xbrl_list):
            # Skip None XBRLs (pre-XBRL era filings before 2009)
            if xbrl is None:
                continue

            # Skip XBRLs with no reporting periods
            if not xbrl.reporting_periods:
                continue

            entity_info = xbrl.entity_info or {}
            doc_period_end_date = self._parse_document_period_end_date(entity_info)
            fiscal_period = entity_info.get('fiscal_period')
            fiscal_year = entity_info.get('fiscal_year')

            # Select appropriate periods based on statement type
            selected_periods = self._select_periods_for_statement_type(
                xbrl, statement_type, doc_period_end_date, fiscal_period
            )

            # Add context information to each period
            for period in selected_periods:
                period_with_context = {
                    'period': period,
                    'xbrl_index': i,
                    'entity_info': entity_info,
                    'doc_period_end_date': doc_period_end_date,
                    'fiscal_period': fiscal_period,
                    'fiscal_year': fiscal_year
                }
                all_periods.append(period_with_context)

        return all_periods

    def _parse_document_period_end_date(self, entity_info: Dict) -> Optional[date]:
        """Parse document_period_end_date from entity_info"""
        if 'document_period_end_date' not in entity_info:
            return None

        try:
            doc_period_end_date = entity_info['document_period_end_date']
            if not isinstance(doc_period_end_date, date):
                doc_period_end_date = parse_date(str(doc_period_end_date))
            return doc_period_end_date
        except (ValueError, TypeError) as e:
            logger.warning("Failed to parse document_period_end_date: %s", e)
            return None

    def _select_periods_for_statement_type(self, xbrl: XBRL, statement_type: str,
                                         doc_period_end_date: Optional[date],
                                         fiscal_period: str) -> List[Dict]:
        """Select periods based on statement type"""
        if statement_type == 'BalanceSheet':
            return self.selector.select_balance_sheet_periods(xbrl, doc_period_end_date)
        elif statement_type in ['IncomeStatement', 'CashFlowStatement']:
            if statement_type == 'IncomeStatement':
                return self.selector.select_income_statement_periods(xbrl, doc_period_end_date, fiscal_period)
            else:
                return self.selector.select_cash_flow_periods(xbrl, doc_period_end_date, fiscal_period)
        else:
            # For other statement types, use income statement logic as default
            return self.selector.select_income_statement_periods(xbrl, doc_period_end_date, fiscal_period)

    def _enrich_with_metadata(self, all_periods: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Enrich periods with comprehensive metadata"""
        enriched_periods = []

        for period_context in all_periods:
            period = period_context['period']
            enriched_metadata = self.enricher.enrich_period_metadata(
                period,
                period_context['xbrl_index'],
                period_context['entity_info'],
                period_context['doc_period_end_date'],
                period_context['fiscal_period'],
                period_context['fiscal_year']
            )
            enriched_periods.append(enriched_metadata)

        return enriched_periods

    def _deduplicate_and_limit(self, periods: List[Dict[str, Any]], max_periods: int,
                              statement_type: str) -> List[Dict[str, Any]]:
        """Deduplicate, sort, and limit periods"""
        # Sort periods chronologically
        sorted_periods = self.deduplicator.sort_periods_chronologically(periods, statement_type)

        # Remove duplicates
        deduplicated_periods = self.deduplicator.deduplicate_periods(sorted_periods, statement_type)

        # Limit to maximum number of periods
        final_periods = self.deduplicator.limit_periods(deduplicated_periods, max_periods)

        return final_periods


# Main function that maintains the original API
def determine_optimal_periods(xbrl_list: List[XBRL], statement_type: str, max_periods: int = 8) -> List[Dict[str, Any]]:
    """
    Determine the optimal periods to display for stitched statements from a list of XBRL objects.

    This function analyzes entity info and reporting periods across multiple XBRL instances
    to select the most appropriate periods for display, ensuring consistency in period selection
    when creating stitched statements.

    Args:
        xbrl_list: List of XBRL objects ordered chronologically
        statement_type: Type of statement ('BalanceSheet', 'IncomeStatement', etc.)
        max_periods: Maximum number of periods to return (default is 8)

    Returns:
        List of period metadata dictionaries containing information for display
    """
    optimizer = PeriodOptimizer()
    return optimizer.determine_optimal_periods(xbrl_list, statement_type, max_periods)