126 lines
4.5 KiB
Python
126 lines
4.5 KiB
Python
"""Financial fraud detection module.
|
|
|
|
This module provides tools for detecting potential financial fraud and anomalies:
|
|
- Benford's Law Analysis for digit distribution anomalies
|
|
- Altman Z-Score for bankruptcy risk
|
|
- Beneish M-Score for earnings manipulation
|
|
- Piotroski F-Score for financial strength
|
|
"""
|
|
|
|
import math
|
|
from collections import Counter
|
|
from dataclasses import dataclass
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from ..standardization import StandardConcept
|
|
from .metrics import AltmanZScore, BeneishMScore, PiotroskiFScore
|
|
|
|
|
|
@dataclass
|
|
class BenfordResult:
|
|
"""Results from Benford's Law analysis."""
|
|
observed_dist: Dict[int, float] # Observed digit distribution
|
|
expected_dist: Dict[int, float] # Expected Benford distribution
|
|
chi_square: float # Chi-square statistic
|
|
p_value: float # P-value for goodness of fit
|
|
anomalous: bool # Whether distribution is significantly different
|
|
|
|
def __repr__(self) -> str:
|
|
return f"{'Anomalous' if self.anomalous else 'Normal'} (p={self.p_value:.3f})"
|
|
|
|
class FraudDetector:
|
|
"""Detect potential financial fraud using multiple methods."""
|
|
|
|
def __init__(self, xbrl):
|
|
"""Initialize with an XBRL instance."""
|
|
self.xbrl = xbrl
|
|
self.altman = AltmanZScore(xbrl)
|
|
self.beneish = BeneishMScore(xbrl)
|
|
self.piotroski = PiotroskiFScore(xbrl)
|
|
|
|
def analyze_digit_distribution(self, values: List[float], significance: float = 0.05) -> Optional[BenfordResult]:
|
|
"""Analyze digit distribution using Benford's Law.
|
|
|
|
Args:
|
|
values: List of numeric values to analyze
|
|
significance: P-value threshold for anomaly detection
|
|
|
|
Returns:
|
|
BenfordResult with analysis results, or None if insufficient data
|
|
"""
|
|
if len(values) < 10: # Need reasonable sample size
|
|
return None
|
|
|
|
# Get first digits
|
|
first_digits = [int(str(abs(float(v))).lstrip('0')[0]) for v in values if v != 0]
|
|
if not first_digits:
|
|
return None
|
|
|
|
# Calculate observed distribution
|
|
digit_counts = Counter(first_digits)
|
|
total = len(first_digits)
|
|
observed_dist = {d: digit_counts.get(d, 0) / total for d in range(1, 10)}
|
|
|
|
# Calculate expected Benford distribution
|
|
expected_dist = {d: math.log10(1 + 1/d) for d in range(1, 10)}
|
|
|
|
# Perform chi-square test
|
|
chi_square = 0
|
|
for d in range(1, 10):
|
|
expected = expected_dist[d] * total
|
|
observed = digit_counts.get(d, 0)
|
|
chi_square += (observed - expected) ** 2 / expected
|
|
|
|
# Get p-value (8 degrees of freedom for digits 1-9)
|
|
from scipy.stats import chi2
|
|
p_value = 1 - chi2.cdf(chi_square, 8)
|
|
|
|
return BenfordResult(
|
|
observed_dist=observed_dist,
|
|
expected_dist=expected_dist,
|
|
chi_square=chi_square,
|
|
p_value=p_value,
|
|
anomalous=p_value < significance
|
|
)
|
|
|
|
def analyze_all(self) -> Dict[str, Any]:
|
|
"""Run all fraud detection analyses.
|
|
|
|
Returns:
|
|
Dict containing:
|
|
- altman_z: Altman Z-Score results
|
|
- beneish_m: Beneish M-Score results
|
|
- piotroski_f: Piotroski F-Score results
|
|
- benford: Benford's Law analysis results
|
|
"""
|
|
# Get financial values for Benford analysis
|
|
values = []
|
|
for concept in [
|
|
StandardConcept.TOTAL_ASSETS,
|
|
StandardConcept.TOTAL_LIABILITIES,
|
|
StandardConcept.TOTAL_EQUITY,
|
|
StandardConcept.REVENUE,
|
|
StandardConcept.NET_INCOME,
|
|
StandardConcept.OPERATING_INCOME,
|
|
StandardConcept.OPERATING_CASH_FLOW
|
|
]:
|
|
if hasattr(self.xbrl.statements, 'balance_sheet'):
|
|
bs_value = self.altman._get_value(concept)
|
|
if bs_value:
|
|
values.append(bs_value)
|
|
if hasattr(self.xbrl.statements, 'income_statement'):
|
|
is_value = self.altman._get_value(concept, "IncomeStatement")
|
|
if is_value:
|
|
values.append(is_value)
|
|
if hasattr(self.xbrl.statements, 'cash_flow'):
|
|
cf_value = self.altman._get_value(concept, "CashFlow")
|
|
if cf_value:
|
|
values.append(cf_value)
|
|
|
|
return {
|
|
'altman_z': self.altman.calculate(),
|
|
'beneish_m': self.beneish.calculate(),
|
|
'piotroski_f': self.piotroski.calculate(),
|
|
'benford': self.analyze_digit_distribution(values)
|
|
}
|