Initial commit

2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions
--- a/venv/lib/python3.10/site-packages/edgar/xbrl/analysis/fraud.py
+++ b/venv/lib/python3.10/site-packages/edgar/xbrl/analysis/fraud.py
@@ -0,0 +1,125 @@
+"""Financial fraud detection module.
+
+This module provides tools for detecting potential financial fraud and anomalies:
+- Benford's Law Analysis for digit distribution anomalies
+- Altman Z-Score for bankruptcy risk
+- Beneish M-Score for earnings manipulation
+- Piotroski F-Score for financial strength
+"""
+
+import math
+from collections import Counter
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from ..standardization import StandardConcept
+from .metrics import AltmanZScore, BeneishMScore, PiotroskiFScore
+
+
+@dataclass
+class BenfordResult:
+    """Results from Benford's Law analysis."""
+    observed_dist: Dict[int, float]  # Observed digit distribution
+    expected_dist: Dict[int, float]  # Expected Benford distribution
+    chi_square: float  # Chi-square statistic
+    p_value: float    # P-value for goodness of fit
+    anomalous: bool   # Whether distribution is significantly different
+
+    def __repr__(self) -> str:
+        return f"{'Anomalous' if self.anomalous else 'Normal'} (p={self.p_value:.3f})"
+
+class FraudDetector:
+    """Detect potential financial fraud using multiple methods."""
+
+    def __init__(self, xbrl):
+        """Initialize with an XBRL instance."""
+        self.xbrl = xbrl
+        self.altman = AltmanZScore(xbrl)
+        self.beneish = BeneishMScore(xbrl)
+        self.piotroski = PiotroskiFScore(xbrl)
+
+    def analyze_digit_distribution(self, values: List[float], significance: float = 0.05) -> Optional[BenfordResult]:
+        """Analyze digit distribution using Benford's Law.
+
+        Args:
+            values: List of numeric values to analyze
+            significance: P-value threshold for anomaly detection
+
+        Returns:
+            BenfordResult with analysis results, or None if insufficient data
+        """
+        if len(values) < 10:  # Need reasonable sample size
+            return None
+
+        # Get first digits
+        first_digits = [int(str(abs(float(v))).lstrip('0')[0]) for v in values if v != 0]
+        if not first_digits:
+            return None
+
+        # Calculate observed distribution
+        digit_counts = Counter(first_digits)
+        total = len(first_digits)
+        observed_dist = {d: digit_counts.get(d, 0) / total for d in range(1, 10)}
+
+        # Calculate expected Benford distribution
+        expected_dist = {d: math.log10(1 + 1/d) for d in range(1, 10)}
+
+        # Perform chi-square test
+        chi_square = 0
+        for d in range(1, 10):
+            expected = expected_dist[d] * total
+            observed = digit_counts.get(d, 0)
+            chi_square += (observed - expected) ** 2 / expected
+
+        # Get p-value (8 degrees of freedom for digits 1-9)
+        from scipy.stats import chi2
+        p_value = 1 - chi2.cdf(chi_square, 8)
+
+        return BenfordResult(
+            observed_dist=observed_dist,
+            expected_dist=expected_dist,
+            chi_square=chi_square,
+            p_value=p_value,
+            anomalous=p_value < significance
+        )
+
+    def analyze_all(self) -> Dict[str, Any]:
+        """Run all fraud detection analyses.
+
+        Returns:
+            Dict containing:
+            - altman_z: Altman Z-Score results
+            - beneish_m: Beneish M-Score results
+            - piotroski_f: Piotroski F-Score results
+            - benford: Benford's Law analysis results
+        """
+        # Get financial values for Benford analysis
+        values = []
+        for concept in [
+            StandardConcept.TOTAL_ASSETS,
+            StandardConcept.TOTAL_LIABILITIES,
+            StandardConcept.TOTAL_EQUITY,
+            StandardConcept.REVENUE,
+            StandardConcept.NET_INCOME,
+            StandardConcept.OPERATING_INCOME,
+            StandardConcept.OPERATING_CASH_FLOW
+        ]:
+            if hasattr(self.xbrl.statements, 'balance_sheet'):
+                bs_value = self.altman._get_value(concept)
+                if bs_value:
+                    values.append(bs_value)
+            if hasattr(self.xbrl.statements, 'income_statement'):
+                is_value = self.altman._get_value(concept, "IncomeStatement")
+                if is_value:
+                    values.append(is_value)
+            if hasattr(self.xbrl.statements, 'cash_flow'):
+                cf_value = self.altman._get_value(concept, "CashFlow")
+                if cf_value:
+                    values.append(cf_value)
+
+        return {
+            'altman_z': self.altman.calculate(),
+            'beneish_m': self.beneish.calculate(),
+            'piotroski_f': self.piotroski.calculate(),
+            'benford': self.analyze_digit_distribution(values)
+        }