Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,125 @@
"""Financial fraud detection module.
This module provides tools for detecting potential financial fraud and anomalies:
- Benford's Law Analysis for digit distribution anomalies
- Altman Z-Score for bankruptcy risk
- Beneish M-Score for earnings manipulation
- Piotroski F-Score for financial strength
"""
import math
from collections import Counter
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from ..standardization import StandardConcept
from .metrics import AltmanZScore, BeneishMScore, PiotroskiFScore
@dataclass
class BenfordResult:
"""Results from Benford's Law analysis."""
observed_dist: Dict[int, float] # Observed digit distribution
expected_dist: Dict[int, float] # Expected Benford distribution
chi_square: float # Chi-square statistic
p_value: float # P-value for goodness of fit
anomalous: bool # Whether distribution is significantly different
def __repr__(self) -> str:
return f"{'Anomalous' if self.anomalous else 'Normal'} (p={self.p_value:.3f})"
class FraudDetector:
"""Detect potential financial fraud using multiple methods."""
def __init__(self, xbrl):
"""Initialize with an XBRL instance."""
self.xbrl = xbrl
self.altman = AltmanZScore(xbrl)
self.beneish = BeneishMScore(xbrl)
self.piotroski = PiotroskiFScore(xbrl)
def analyze_digit_distribution(self, values: List[float], significance: float = 0.05) -> Optional[BenfordResult]:
"""Analyze digit distribution using Benford's Law.
Args:
values: List of numeric values to analyze
significance: P-value threshold for anomaly detection
Returns:
BenfordResult with analysis results, or None if insufficient data
"""
if len(values) < 10: # Need reasonable sample size
return None
# Get first digits
first_digits = [int(str(abs(float(v))).lstrip('0')[0]) for v in values if v != 0]
if not first_digits:
return None
# Calculate observed distribution
digit_counts = Counter(first_digits)
total = len(first_digits)
observed_dist = {d: digit_counts.get(d, 0) / total for d in range(1, 10)}
# Calculate expected Benford distribution
expected_dist = {d: math.log10(1 + 1/d) for d in range(1, 10)}
# Perform chi-square test
chi_square = 0
for d in range(1, 10):
expected = expected_dist[d] * total
observed = digit_counts.get(d, 0)
chi_square += (observed - expected) ** 2 / expected
# Get p-value (8 degrees of freedom for digits 1-9)
from scipy.stats import chi2
p_value = 1 - chi2.cdf(chi_square, 8)
return BenfordResult(
observed_dist=observed_dist,
expected_dist=expected_dist,
chi_square=chi_square,
p_value=p_value,
anomalous=p_value < significance
)
def analyze_all(self) -> Dict[str, Any]:
"""Run all fraud detection analyses.
Returns:
Dict containing:
- altman_z: Altman Z-Score results
- beneish_m: Beneish M-Score results
- piotroski_f: Piotroski F-Score results
- benford: Benford's Law analysis results
"""
# Get financial values for Benford analysis
values = []
for concept in [
StandardConcept.TOTAL_ASSETS,
StandardConcept.TOTAL_LIABILITIES,
StandardConcept.TOTAL_EQUITY,
StandardConcept.REVENUE,
StandardConcept.NET_INCOME,
StandardConcept.OPERATING_INCOME,
StandardConcept.OPERATING_CASH_FLOW
]:
if hasattr(self.xbrl.statements, 'balance_sheet'):
bs_value = self.altman._get_value(concept)
if bs_value:
values.append(bs_value)
if hasattr(self.xbrl.statements, 'income_statement'):
is_value = self.altman._get_value(concept, "IncomeStatement")
if is_value:
values.append(is_value)
if hasattr(self.xbrl.statements, 'cash_flow'):
cf_value = self.altman._get_value(concept, "CashFlow")
if cf_value:
values.append(cf_value)
return {
'altman_z': self.altman.calculate(),
'beneish_m': self.beneish.calculate(),
'piotroski_f': self.piotroski.calculate(),
'benford': self.analyze_digit_distribution(values)
}