Files
edgartools/chart_generator.py
2025-12-09 12:13:01 +01:00

419 lines
19 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import sys
import subprocess
import matplotlib.pyplot as plt
# Check if virtual environment is activated
if not os.environ.get('VIRTUAL_ENV'):
print("Virtual environment is not activated.")
print("To activate: . venv/bin/activate")
print("Then run: python chart_generator.py <TICKER>")
exit(1)
import pandas as pd
from edgar import Company, set_identity, set_local_storage_path, use_local_storage, XBRL
from bs4 import BeautifulSoup
import re
# Set your identity (required by SEC)
set_identity("your.email@example.com")
# Enable local storage for caching filings
LOCAL_STORAGE_PATH = os.path.abspath("./edgar_cache")
os.makedirs(LOCAL_STORAGE_PATH, exist_ok=True)
use_local_storage(LOCAL_STORAGE_PATH)
def show_image(image_path):
viewers = ['eog', 'feh', 'gthumb', 'gwenview', 'shotwell', 'display'] # Common Linux image viewers
for viewer in viewers:
if subprocess.run(['which', viewer], capture_output=True).returncode == 0:
try:
subprocess.run([viewer, image_path])
print(f"Displayed chart with {viewer}")
return
except Exception as e:
print(f"Failed to open with {viewer}: {e}")
print("No suitable image viewer found. Chart saved but not displayed.")
def parse_20f_html(html_content, year):
soup = BeautifulSoup(html_content, 'html.parser')
text = soup.get_text().lower()
rev = gp = ni = None
# Use regex to find patterns like "net revenue" followed by large number
rev_match = re.search(r'net revenue.*?(\d{4,}(?:,\d{3})*(?:\.\d+)?)', text)
if rev_match:
rev = float(rev_match.group(1).replace(',', ''))
gp_match = re.search(r'gross profit.*?(\d{4,}(?:,\d{3})*(?:\.\d+)?)', text)
if gp_match:
gp = float(gp_match.group(1).replace(',', ''))
ni_match = re.search(r'net income.*?(\d{4,}(?:,\d{3})*(?:\.\d+)?)', text)
if ni_match:
ni = float(ni_match.group(1).replace(',', ''))
return rev, gp, ni
def extract_number(text):
# Extract number from text, handle commas, parentheses for negative
text = re.sub(r'[^\d,.\-\(\)]', '', text)
if '(' in text and ')' in text:
text = '-' + text.replace('(', '').replace(')', '')
text = text.replace(',', '')
try:
return float(text)
except:
return None
def generate_charts(ticker):
print(f"Generating charts for {ticker}...")
company = Company(ticker)
if company.not_found:
print(f"Company {ticker} not found.")
return
# Get last 20 10-Q filings for quarterly data
filings_10q = company.get_filings(form="10-Q", amendments=False).head(20)
# Get last 5 10-K filings for yearly data
filings_10k = company.get_filings(form="10-K", amendments=False).head(5)
if not filings_10q:
print("No 10-Q filings found.")
return
if not filings_10k:
print("No 10-K filings found.")
return
# Collect data from each filing
rev_dict_quarterly = {}
gp_dict_quarterly = {}
ni_dict_quarterly = {}
rev_dict_yearly = {}
gp_dict_yearly = {}
ni_dict_yearly = {}
for filing in filings_10q:
print(f"Processing filing {filing.accession_number} from {filing.filing_date}")
try:
df = None
xbrl = XBRL.from_filing(filing)
if not xbrl:
print(" No XBRL found")
continue
data = xbrl.to_pandas()
df = data['facts']
print(f"df columns: {list(df.columns)}")
print(f" Available elements: {sorted([e for e in df['element_id'].unique() if 'profit' in e.lower() or 'revenue' in e.lower() or 'income' in e.lower()])}")
print(f" Gross elements: {sorted([e for e in df['element_id'].unique() if 'Gross' in e])}")
print(f" Cost elements: {sorted([e for e in df['element_id'].unique() if 'Cost' in e])}")
# Extract metrics for this filing's period
# Try multiple possible revenue elements
revenue_elements = ['us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax', 'us-gaap:Revenues', 'us-gaap:SalesRevenueNet']
revenues = pd.DataFrame()
for elem in revenue_elements:
temp = df[df['element_id'] == elem]
if not temp.empty:
revenues = temp
print(f" Found revenue element: {elem}")
break
rev_val = gp_val = ni_val = year = period_end = quarter_key = None
if len(revenues) > 0:
period_end = revenues['period_end'].max() # type: ignore
quarter_key = pd.to_datetime(period_end).to_period('Q').strftime('%Y-Q%q')
rev_val = pd.to_numeric(revenues['value'], errors='coerce').max() # type: ignore
print(f" Revenue for {quarter_key}: {rev_val}")
# Try multiple possible gross profit elements, using same period
gp_elements = ['us-gaap:GrossProfit', 'us-gaap:GrossMargin']
gross_profits = pd.DataFrame()
if period_end:
for elem in gp_elements:
temp = df[(df['element_id'].str.contains(elem.split(':')[1])) & (df['period_end'] == period_end)]
print(f" Checking {elem} for period {period_end}: {temp.shape}")
if not temp.empty:
gross_profits = temp
print(f" Found gross profit element: {elem}")
break
else:
for elem in gp_elements:
temp = df[df['element_id'].str.contains(elem.split(':')[1])]
print(f" Checking {elem}: {temp.shape}")
if not temp.empty:
gross_profits = temp
print(f" Found gross profit element: {elem}")
break
# If no direct GP found, try calculating from Revenue - COGS
if gross_profits.empty and period_end and rev_val:
cogs = df[(df['element_id'] == 'us-gaap:CostOfGoodsAndServicesSold') & (df['period_end'] == period_end)]
if not cogs.empty:
cogs_val = pd.to_numeric(cogs['value'], errors='coerce').max()
if cogs_val:
gp_val = rev_val - cogs_val
print(f" Calculated Gross Profit from Revenue - COGS: {gp_val}")
# Set a dummy gross_profits to indicate found
gross_profits = cogs # not used further, just for flag
net_incomes = pd.DataFrame()
if period_end:
net_incomes = df[(df['element_id'] == 'us-gaap:NetIncomeLoss') & (df['period_end'] == period_end)]
else:
net_incomes = df[df['element_id'] == 'us-gaap:NetIncomeLoss']
print(f" Revenues found: {not revenues.empty}, Gross Profit: {not gross_profits.empty}, Net Income: {not net_incomes.empty}")
if not gross_profits.empty:
if gp_val is None:
gp_val = pd.to_numeric(gross_profits['value'], errors='coerce').sum()
print(f" Gross Profit for {quarter_key}: {gp_val}")
elif 'gp_val' in locals():
print(f" Gross Profit for {quarter_key}: {gp_val}")
else:
print(f" Gross Profit not found for {quarter_key}")
if not net_incomes.empty:
ni_val = pd.to_numeric(net_incomes['value'], errors='coerce').max()
print(f" Net Income for {quarter_key}: {ni_val}")
else:
# Parse 20-F HTML
html_content = filing.text()
# Assume year from filing_date
year = pd.to_datetime(filing.filing_date).year - 1 # Filing in next year
rev_val, gp_val, ni_val = parse_20f_html(html_content, year)
print(f" Parsed 20-F: Rev {rev_val}, GP {gp_val}, NI {ni_val} for {year}")
if quarter_key and rev_val is not None:
rev_dict_quarterly[quarter_key] = rev_val
if quarter_key and gp_val is not None:
gp_dict_quarterly[quarter_key] = gp_val
if quarter_key and ni_val is not None:
ni_dict_quarterly[quarter_key] = ni_val
except Exception as e:
print(f"Error processing filing {filing.accession_number}: {e}")
continue
# Process 10-K filings for yearly data
for filing in filings_10k:
print(f"Processing filing {filing.accession_number} from {filing.filing_date}")
try:
xbrl = XBRL.from_filing(filing)
if not xbrl:
print(" No XBRL found")
continue
data = xbrl.to_pandas()
df = data['facts']
print(f"df columns: {list(df.columns)}")
print(f" Available elements: {sorted([e for e in df['element_id'].unique() if 'profit' in e.lower() or 'revenue' in e.lower() or 'income' in e.lower()])}")
print(f" Gross elements: {sorted([e for e in df['element_id'].unique() if 'Gross' in e])}")
print(f" Cost elements: {sorted([e for e in df['element_id'].unique() if 'Cost' in e])}")
# Extract metrics for this filing's period
# Try multiple possible revenue elements
revenue_elements = ['us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax', 'us-gaap:Revenues', 'us-gaap:SalesRevenueNet']
revenues = pd.DataFrame()
for elem in revenue_elements:
temp = df[df['element_id'] == elem]
if not temp.empty:
revenues = temp
print(f" Found revenue element: {elem}")
break
rev_val = gp_val = ni_val = year = period_end = year_key = None
if not revenues.empty:
period_end = revenues['period_end'].max()
year_key = str(pd.to_datetime(period_end).year)
rev_val = pd.to_numeric(revenues['value'], errors='coerce').max()
print(f" Revenue for {year_key}: {rev_val}")
# Try multiple possible gross profit elements, using same period
gp_elements = ['us-gaap:GrossProfit', 'us-gaap:GrossMargin']
gross_profits = pd.DataFrame()
if period_end:
for elem in gp_elements:
temp = df[(df['element_id'].str.contains(elem.split(':')[1])) & (df['period_end'] == period_end)]
print(f" Checking {elem} for period {period_end}: {temp.shape}")
if not temp.empty:
gross_profits = temp
print(f" Found gross profit element: {elem}")
break
else:
for elem in gp_elements:
temp = df[df['element_id'].str.contains(elem.split(':')[1])]
print(f" Checking {elem}: {temp.shape}")
if not temp.empty:
gross_profits = temp
print(f" Found gross profit element: {elem}")
break
# If no direct GP found, try calculating from Revenue - COGS
if gross_profits.empty and period_end and rev_val:
cogs = df[(df['element_id'] == 'us-gaap:CostOfGoodsAndServicesSold') & (df['period_end'] == period_end)]
if not cogs.empty:
cogs_val = pd.to_numeric(cogs['value'], errors='coerce').max()
if cogs_val:
gp_val = rev_val - cogs_val
print(f" Calculated Gross Profit from Revenue - COGS: {gp_val}")
# Set a dummy gross_profits to indicate found
gross_profits = cogs # not used further, just for flag
net_incomes = pd.DataFrame()
if period_end:
net_incomes = df[(df['element_id'] == 'us-gaap:NetIncomeLoss') & (df['period_end'] == period_end)]
else:
net_incomes = df[df['element_id'] == 'us-gaap:NetIncomeLoss']
print(f" Revenues found: {not revenues.empty}, Gross Profit: {not gross_profits.empty}, Net Income: {not net_incomes.empty}")
if not gross_profits.empty:
if gp_val is None:
gp_val = pd.to_numeric(gross_profits['value'], errors='coerce').sum()
print(f" Gross Profit for {year_key}: {gp_val}")
elif 'gp_val' in locals():
print(f" Gross Profit for {year_key}: {gp_val}")
else:
print(f" Gross Profit not found for {year_key}")
if not net_incomes.empty:
ni_val = pd.to_numeric(net_incomes['value'], errors='coerce').max()
print(f" Net Income for {year_key}: {ni_val}")
if year_key and rev_val is not None:
rev_dict_yearly[year_key] = rev_val
if year_key and gp_val is not None:
gp_dict_yearly[year_key] = gp_val
if year_key and ni_val is not None:
ni_dict_yearly[year_key] = ni_val
except Exception as e:
print(f"Error processing filing {filing.accession_number}: {e}")
continue
if not rev_dict_quarterly or not ni_dict_quarterly:
print("Insufficient quarterly financial data across filings.")
return
print(f"Quarterly Rev dict: {rev_dict_quarterly}")
print(f"Quarterly GP dict: {gp_dict_quarterly}")
print(f"Quarterly NI dict: {ni_dict_quarterly}")
# Convert to Series
rev_data = pd.Series(rev_dict_quarterly)
gp_data = pd.Series(gp_dict_quarterly)
ni_data = pd.Series(ni_dict_quarterly)
# Get last 20 quarters
all_quarters = sorted(set(rev_data.index) | set(gp_data.index) | set(ni_data.index), key=lambda x: pd.Period(x, 'Q'))
quarters = all_quarters[-20:] if len(all_quarters) > 20 else all_quarters
rev_data = rev_data.reindex(quarters).fillna(0)
gp_data = gp_data.reindex(quarters).fillna(0)
ni_data = ni_data.reindex(quarters).fillna(0)
# Calculate margins
gross_margin = (gp_data / rev_data * 100).fillna(0)
net_margin = (ni_data / rev_data * 100).fillna(0)
# Plot
fig, ax1 = plt.subplots(figsize=(12, 8))
# Bars for Revenue, Gross Profit, Net Income
x = range(len(quarters))
width = 0.25
bars1 = ax1.bar([i - width for i in x], rev_data.values / 1e9, width, label='Revenue', color='blue')
bars2 = ax1.bar(x, gp_data.values / 1e9, width, label='Gross Profit', color='orange')
bars3 = ax1.bar([i + width for i in x], ni_data.values / 1e9, width, label='Net Income', color='green')
# Add value labels on bars
def format_value(val):
if abs(val) >= 1:
return f'${val:.1f}B'
else:
return f'${val*1000:.0f}M'
for bars in [bars1, bars2, bars3]:
for bar in bars:
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height + max(rev_data.values / 1e9)*0.01,
format_value(height), ha='center', va='bottom', fontsize=8)
ax1.set_xlabel('Quarter')
ax1.set_ylabel('Billions USD')
ax1.set_title(f'{company.name} - Financial Metrics (Last 20 Quarters)')
ax1.set_xticks(x)
ax1.set_xticklabels(quarters)
ax1.legend(loc='upper left')
ax1.grid(axis='y')
# Secondary Y-axis for margins
ax2 = ax1.twinx()
ax2.plot(x, gross_margin.values, label='Gross Margin %', color='red', marker='o', linestyle='-')
ax2.plot(x, net_margin.values, label='Net Margin %', color='purple', marker='s', linestyle='-')
ax2.set_ylabel('Profit Margin (%)')
ax2.legend(loc='upper right')
plt.tight_layout()
chart_file = f"charts/{ticker}_chart.png"
plt.savefig(chart_file)
print(f"Quarterly chart saved to {chart_file}")
show_image(chart_file)
# Yearly chart
if rev_dict_yearly or ni_dict_yearly:
print(f"Yearly Rev dict: {rev_dict_yearly}")
print(f"Yearly GP dict: {gp_dict_yearly}")
print(f"Yearly NI dict: {ni_dict_yearly}")
# Convert to Series
rev_data_yearly = pd.Series(rev_dict_yearly)
gp_data_yearly = pd.Series(gp_dict_yearly)
ni_data_yearly = pd.Series(ni_dict_yearly)
# Get last 5 years
all_years = sorted(set(rev_data_yearly.index) | set(gp_data_yearly.index) | set(ni_data_yearly.index))
if len(all_years) < 5:
min_year = min(all_years) if all_years else 2024
years = list(range(min_year - (5 - len(all_years)), min_year + len(all_years)))
else:
years = all_years[-5:]
rev_data_yearly = rev_data_yearly.reindex(years).fillna(0)
gp_data_yearly = gp_data_yearly.reindex(years).fillna(0)
ni_data_yearly = ni_data_yearly.reindex(years).fillna(0)
# Calculate margins
gross_margin_yearly = (gp_data_yearly / rev_data_yearly * 100).fillna(0)
net_margin_yearly = (ni_data_yearly / rev_data_yearly * 100).fillna(0)
# Plot
fig, ax1 = plt.subplots(figsize=(12, 8))
# Bars for Revenue, Gross Profit, Net Income
x = range(len(years))
width = 0.25
bars1 = ax1.bar([i - width for i in x], rev_data_yearly.values / 1e9, width, label='Revenue', color='blue')
bars2 = ax1.bar(x, gp_data_yearly.values / 1e9, width, label='Gross Profit', color='orange')
bars3 = ax1.bar([i + width for i in x], ni_data_yearly.values / 1e9, width, label='Net Income', color='green')
# Add value labels on bars
def format_value(val):
if abs(val) >= 1:
return f'${val:.1f}B'
else:
return f'${val*1000:.0f}M'
for bars in [bars1, bars2, bars3]:
for bar in bars:
height = bar.get_height()
ax1.text(bar.get_x() + bar.get_width()/2., height + max(rev_data_yearly.values / 1e9)*0.01,
format_value(height), ha='center', va='bottom', fontsize=8)
ax1.set_xlabel('Fiscal Year')
ax1.set_ylabel('Billions USD')
ax1.set_title(f'{company.name} - Financial Metrics (Last 5 Years)')
ax1.set_xticks(x)
ax1.set_xticklabels(years)
ax1.legend(loc='upper left')
ax1.grid(axis='y')
# Secondary Y-axis for margins
ax2 = ax1.twinx()
ax2.plot(x, gross_margin_yearly.values, label='Gross Margin %', color='red', marker='o', linestyle='-')
ax2.plot(x, net_margin_yearly.values, label='Net Margin %', color='purple', marker='s', linestyle='-')
ax2.set_ylabel('Profit Margin (%)')
ax2.legend(loc='upper right')
plt.tight_layout()
chart_file_yearly = f"charts/{ticker}_yearly_chart.png"
plt.savefig(chart_file_yearly)
print(f"Yearly chart saved to {chart_file_yearly}")
show_image(chart_file_yearly)
if __name__ == "__main__":
if len(sys.argv) > 1:
ticker = sys.argv[1].upper()
else:
ticker = input("Enter ticker: ").strip().upper()
generate_charts(ticker)