Initial commit
This commit is contained in:
419
chart_generator.py
Executable file
419
chart_generator.py
Executable file
@@ -0,0 +1,419 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# Check if virtual environment is activated
|
||||
if not os.environ.get('VIRTUAL_ENV'):
|
||||
print("Virtual environment is not activated.")
|
||||
print("To activate: . venv/bin/activate")
|
||||
print("Then run: python chart_generator.py <TICKER>")
|
||||
exit(1)
|
||||
|
||||
import pandas as pd
|
||||
from edgar import Company, set_identity, set_local_storage_path, use_local_storage, XBRL
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
# Set your identity (required by SEC)
|
||||
set_identity("your.email@example.com")
|
||||
|
||||
# Enable local storage for caching filings
|
||||
LOCAL_STORAGE_PATH = os.path.abspath("./edgar_cache")
|
||||
os.makedirs(LOCAL_STORAGE_PATH, exist_ok=True)
|
||||
use_local_storage(LOCAL_STORAGE_PATH)
|
||||
|
||||
def show_image(image_path):
|
||||
viewers = ['eog', 'feh', 'gthumb', 'gwenview', 'shotwell', 'display'] # Common Linux image viewers
|
||||
for viewer in viewers:
|
||||
if subprocess.run(['which', viewer], capture_output=True).returncode == 0:
|
||||
try:
|
||||
subprocess.run([viewer, image_path])
|
||||
print(f"Displayed chart with {viewer}")
|
||||
return
|
||||
except Exception as e:
|
||||
print(f"Failed to open with {viewer}: {e}")
|
||||
print("No suitable image viewer found. Chart saved but not displayed.")
|
||||
|
||||
def parse_20f_html(html_content, year):
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
text = soup.get_text().lower()
|
||||
rev = gp = ni = None
|
||||
# Use regex to find patterns like "net revenue" followed by large number
|
||||
rev_match = re.search(r'net revenue.*?(\d{4,}(?:,\d{3})*(?:\.\d+)?)', text)
|
||||
if rev_match:
|
||||
rev = float(rev_match.group(1).replace(',', ''))
|
||||
gp_match = re.search(r'gross profit.*?(\d{4,}(?:,\d{3})*(?:\.\d+)?)', text)
|
||||
if gp_match:
|
||||
gp = float(gp_match.group(1).replace(',', ''))
|
||||
ni_match = re.search(r'net income.*?(\d{4,}(?:,\d{3})*(?:\.\d+)?)', text)
|
||||
if ni_match:
|
||||
ni = float(ni_match.group(1).replace(',', ''))
|
||||
return rev, gp, ni
|
||||
|
||||
def extract_number(text):
|
||||
# Extract number from text, handle commas, parentheses for negative
|
||||
text = re.sub(r'[^\d,.\-\(\)]', '', text)
|
||||
if '(' in text and ')' in text:
|
||||
text = '-' + text.replace('(', '').replace(')', '')
|
||||
text = text.replace(',', '')
|
||||
try:
|
||||
return float(text)
|
||||
except:
|
||||
return None
|
||||
|
||||
def generate_charts(ticker):
|
||||
print(f"Generating charts for {ticker}...")
|
||||
|
||||
company = Company(ticker)
|
||||
if company.not_found:
|
||||
print(f"Company {ticker} not found.")
|
||||
return
|
||||
|
||||
# Get last 20 10-Q filings for quarterly data
|
||||
filings_10q = company.get_filings(form="10-Q", amendments=False).head(20)
|
||||
# Get last 5 10-K filings for yearly data
|
||||
filings_10k = company.get_filings(form="10-K", amendments=False).head(5)
|
||||
|
||||
if not filings_10q:
|
||||
print("No 10-Q filings found.")
|
||||
return
|
||||
if not filings_10k:
|
||||
print("No 10-K filings found.")
|
||||
return
|
||||
|
||||
# Collect data from each filing
|
||||
rev_dict_quarterly = {}
|
||||
gp_dict_quarterly = {}
|
||||
ni_dict_quarterly = {}
|
||||
rev_dict_yearly = {}
|
||||
gp_dict_yearly = {}
|
||||
ni_dict_yearly = {}
|
||||
|
||||
for filing in filings_10q:
|
||||
print(f"Processing filing {filing.accession_number} from {filing.filing_date}")
|
||||
try:
|
||||
df = None
|
||||
xbrl = XBRL.from_filing(filing)
|
||||
if not xbrl:
|
||||
print(" No XBRL found")
|
||||
continue
|
||||
data = xbrl.to_pandas()
|
||||
df = data['facts']
|
||||
print(f"df columns: {list(df.columns)}")
|
||||
|
||||
print(f" Available elements: {sorted([e for e in df['element_id'].unique() if 'profit' in e.lower() or 'revenue' in e.lower() or 'income' in e.lower()])}")
|
||||
|
||||
print(f" Gross elements: {sorted([e for e in df['element_id'].unique() if 'Gross' in e])}")
|
||||
|
||||
print(f" Cost elements: {sorted([e for e in df['element_id'].unique() if 'Cost' in e])}")
|
||||
|
||||
# Extract metrics for this filing's period
|
||||
# Try multiple possible revenue elements
|
||||
revenue_elements = ['us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax', 'us-gaap:Revenues', 'us-gaap:SalesRevenueNet']
|
||||
revenues = pd.DataFrame()
|
||||
for elem in revenue_elements:
|
||||
temp = df[df['element_id'] == elem]
|
||||
if not temp.empty:
|
||||
revenues = temp
|
||||
print(f" Found revenue element: {elem}")
|
||||
break
|
||||
|
||||
rev_val = gp_val = ni_val = year = period_end = quarter_key = None
|
||||
if len(revenues) > 0:
|
||||
period_end = revenues['period_end'].max() # type: ignore
|
||||
quarter_key = pd.to_datetime(period_end).to_period('Q').strftime('%Y-Q%q')
|
||||
rev_val = pd.to_numeric(revenues['value'], errors='coerce').max() # type: ignore
|
||||
print(f" Revenue for {quarter_key}: {rev_val}")
|
||||
|
||||
# Try multiple possible gross profit elements, using same period
|
||||
gp_elements = ['us-gaap:GrossProfit', 'us-gaap:GrossMargin']
|
||||
gross_profits = pd.DataFrame()
|
||||
if period_end:
|
||||
for elem in gp_elements:
|
||||
temp = df[(df['element_id'].str.contains(elem.split(':')[1])) & (df['period_end'] == period_end)]
|
||||
print(f" Checking {elem} for period {period_end}: {temp.shape}")
|
||||
if not temp.empty:
|
||||
gross_profits = temp
|
||||
print(f" Found gross profit element: {elem}")
|
||||
break
|
||||
else:
|
||||
for elem in gp_elements:
|
||||
temp = df[df['element_id'].str.contains(elem.split(':')[1])]
|
||||
print(f" Checking {elem}: {temp.shape}")
|
||||
if not temp.empty:
|
||||
gross_profits = temp
|
||||
print(f" Found gross profit element: {elem}")
|
||||
break
|
||||
|
||||
# If no direct GP found, try calculating from Revenue - COGS
|
||||
if gross_profits.empty and period_end and rev_val:
|
||||
cogs = df[(df['element_id'] == 'us-gaap:CostOfGoodsAndServicesSold') & (df['period_end'] == period_end)]
|
||||
if not cogs.empty:
|
||||
cogs_val = pd.to_numeric(cogs['value'], errors='coerce').max()
|
||||
if cogs_val:
|
||||
gp_val = rev_val - cogs_val
|
||||
print(f" Calculated Gross Profit from Revenue - COGS: {gp_val}")
|
||||
# Set a dummy gross_profits to indicate found
|
||||
gross_profits = cogs # not used further, just for flag
|
||||
|
||||
net_incomes = pd.DataFrame()
|
||||
if period_end:
|
||||
net_incomes = df[(df['element_id'] == 'us-gaap:NetIncomeLoss') & (df['period_end'] == period_end)]
|
||||
else:
|
||||
net_incomes = df[df['element_id'] == 'us-gaap:NetIncomeLoss']
|
||||
|
||||
print(f" Revenues found: {not revenues.empty}, Gross Profit: {not gross_profits.empty}, Net Income: {not net_incomes.empty}")
|
||||
|
||||
if not gross_profits.empty:
|
||||
if gp_val is None:
|
||||
gp_val = pd.to_numeric(gross_profits['value'], errors='coerce').sum()
|
||||
print(f" Gross Profit for {quarter_key}: {gp_val}")
|
||||
elif 'gp_val' in locals():
|
||||
print(f" Gross Profit for {quarter_key}: {gp_val}")
|
||||
else:
|
||||
print(f" Gross Profit not found for {quarter_key}")
|
||||
if not net_incomes.empty:
|
||||
ni_val = pd.to_numeric(net_incomes['value'], errors='coerce').max()
|
||||
print(f" Net Income for {quarter_key}: {ni_val}")
|
||||
else:
|
||||
# Parse 20-F HTML
|
||||
html_content = filing.text()
|
||||
# Assume year from filing_date
|
||||
year = pd.to_datetime(filing.filing_date).year - 1 # Filing in next year
|
||||
rev_val, gp_val, ni_val = parse_20f_html(html_content, year)
|
||||
print(f" Parsed 20-F: Rev {rev_val}, GP {gp_val}, NI {ni_val} for {year}")
|
||||
|
||||
if quarter_key and rev_val is not None:
|
||||
rev_dict_quarterly[quarter_key] = rev_val
|
||||
if quarter_key and gp_val is not None:
|
||||
gp_dict_quarterly[quarter_key] = gp_val
|
||||
if quarter_key and ni_val is not None:
|
||||
ni_dict_quarterly[quarter_key] = ni_val
|
||||
except Exception as e:
|
||||
print(f"Error processing filing {filing.accession_number}: {e}")
|
||||
continue
|
||||
|
||||
# Process 10-K filings for yearly data
|
||||
for filing in filings_10k:
|
||||
print(f"Processing filing {filing.accession_number} from {filing.filing_date}")
|
||||
try:
|
||||
xbrl = XBRL.from_filing(filing)
|
||||
if not xbrl:
|
||||
print(" No XBRL found")
|
||||
continue
|
||||
data = xbrl.to_pandas()
|
||||
df = data['facts']
|
||||
print(f"df columns: {list(df.columns)}")
|
||||
print(f" Available elements: {sorted([e for e in df['element_id'].unique() if 'profit' in e.lower() or 'revenue' in e.lower() or 'income' in e.lower()])}")
|
||||
print(f" Gross elements: {sorted([e for e in df['element_id'].unique() if 'Gross' in e])}")
|
||||
print(f" Cost elements: {sorted([e for e in df['element_id'].unique() if 'Cost' in e])}")
|
||||
# Extract metrics for this filing's period
|
||||
# Try multiple possible revenue elements
|
||||
revenue_elements = ['us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax', 'us-gaap:Revenues', 'us-gaap:SalesRevenueNet']
|
||||
revenues = pd.DataFrame()
|
||||
for elem in revenue_elements:
|
||||
temp = df[df['element_id'] == elem]
|
||||
if not temp.empty:
|
||||
revenues = temp
|
||||
print(f" Found revenue element: {elem}")
|
||||
break
|
||||
rev_val = gp_val = ni_val = year = period_end = year_key = None
|
||||
if not revenues.empty:
|
||||
period_end = revenues['period_end'].max()
|
||||
year_key = str(pd.to_datetime(period_end).year)
|
||||
rev_val = pd.to_numeric(revenues['value'], errors='coerce').max()
|
||||
print(f" Revenue for {year_key}: {rev_val}")
|
||||
# Try multiple possible gross profit elements, using same period
|
||||
gp_elements = ['us-gaap:GrossProfit', 'us-gaap:GrossMargin']
|
||||
gross_profits = pd.DataFrame()
|
||||
if period_end:
|
||||
for elem in gp_elements:
|
||||
temp = df[(df['element_id'].str.contains(elem.split(':')[1])) & (df['period_end'] == period_end)]
|
||||
print(f" Checking {elem} for period {period_end}: {temp.shape}")
|
||||
if not temp.empty:
|
||||
gross_profits = temp
|
||||
print(f" Found gross profit element: {elem}")
|
||||
break
|
||||
else:
|
||||
for elem in gp_elements:
|
||||
temp = df[df['element_id'].str.contains(elem.split(':')[1])]
|
||||
print(f" Checking {elem}: {temp.shape}")
|
||||
if not temp.empty:
|
||||
gross_profits = temp
|
||||
print(f" Found gross profit element: {elem}")
|
||||
break
|
||||
# If no direct GP found, try calculating from Revenue - COGS
|
||||
if gross_profits.empty and period_end and rev_val:
|
||||
cogs = df[(df['element_id'] == 'us-gaap:CostOfGoodsAndServicesSold') & (df['period_end'] == period_end)]
|
||||
if not cogs.empty:
|
||||
cogs_val = pd.to_numeric(cogs['value'], errors='coerce').max()
|
||||
if cogs_val:
|
||||
gp_val = rev_val - cogs_val
|
||||
print(f" Calculated Gross Profit from Revenue - COGS: {gp_val}")
|
||||
# Set a dummy gross_profits to indicate found
|
||||
gross_profits = cogs # not used further, just for flag
|
||||
net_incomes = pd.DataFrame()
|
||||
if period_end:
|
||||
net_incomes = df[(df['element_id'] == 'us-gaap:NetIncomeLoss') & (df['period_end'] == period_end)]
|
||||
else:
|
||||
net_incomes = df[df['element_id'] == 'us-gaap:NetIncomeLoss']
|
||||
print(f" Revenues found: {not revenues.empty}, Gross Profit: {not gross_profits.empty}, Net Income: {not net_incomes.empty}")
|
||||
if not gross_profits.empty:
|
||||
if gp_val is None:
|
||||
gp_val = pd.to_numeric(gross_profits['value'], errors='coerce').sum()
|
||||
print(f" Gross Profit for {year_key}: {gp_val}")
|
||||
elif 'gp_val' in locals():
|
||||
print(f" Gross Profit for {year_key}: {gp_val}")
|
||||
else:
|
||||
print(f" Gross Profit not found for {year_key}")
|
||||
if not net_incomes.empty:
|
||||
ni_val = pd.to_numeric(net_incomes['value'], errors='coerce').max()
|
||||
print(f" Net Income for {year_key}: {ni_val}")
|
||||
if year_key and rev_val is not None:
|
||||
rev_dict_yearly[year_key] = rev_val
|
||||
if year_key and gp_val is not None:
|
||||
gp_dict_yearly[year_key] = gp_val
|
||||
if year_key and ni_val is not None:
|
||||
ni_dict_yearly[year_key] = ni_val
|
||||
except Exception as e:
|
||||
print(f"Error processing filing {filing.accession_number}: {e}")
|
||||
continue
|
||||
|
||||
if not rev_dict_quarterly or not ni_dict_quarterly:
|
||||
print("Insufficient quarterly financial data across filings.")
|
||||
return
|
||||
|
||||
print(f"Quarterly Rev dict: {rev_dict_quarterly}")
|
||||
print(f"Quarterly GP dict: {gp_dict_quarterly}")
|
||||
print(f"Quarterly NI dict: {ni_dict_quarterly}")
|
||||
|
||||
# Convert to Series
|
||||
rev_data = pd.Series(rev_dict_quarterly)
|
||||
gp_data = pd.Series(gp_dict_quarterly)
|
||||
ni_data = pd.Series(ni_dict_quarterly)
|
||||
|
||||
# Get last 20 quarters
|
||||
all_quarters = sorted(set(rev_data.index) | set(gp_data.index) | set(ni_data.index), key=lambda x: pd.Period(x, 'Q'))
|
||||
quarters = all_quarters[-20:] if len(all_quarters) > 20 else all_quarters
|
||||
rev_data = rev_data.reindex(quarters).fillna(0)
|
||||
gp_data = gp_data.reindex(quarters).fillna(0)
|
||||
ni_data = ni_data.reindex(quarters).fillna(0)
|
||||
|
||||
# Calculate margins
|
||||
gross_margin = (gp_data / rev_data * 100).fillna(0)
|
||||
net_margin = (ni_data / rev_data * 100).fillna(0)
|
||||
|
||||
# Plot
|
||||
fig, ax1 = plt.subplots(figsize=(12, 8))
|
||||
|
||||
# Bars for Revenue, Gross Profit, Net Income
|
||||
x = range(len(quarters))
|
||||
width = 0.25
|
||||
bars1 = ax1.bar([i - width for i in x], rev_data.values / 1e9, width, label='Revenue', color='blue')
|
||||
bars2 = ax1.bar(x, gp_data.values / 1e9, width, label='Gross Profit', color='orange')
|
||||
bars3 = ax1.bar([i + width for i in x], ni_data.values / 1e9, width, label='Net Income', color='green')
|
||||
|
||||
# Add value labels on bars
|
||||
def format_value(val):
|
||||
if abs(val) >= 1:
|
||||
return f'${val:.1f}B'
|
||||
else:
|
||||
return f'${val*1000:.0f}M'
|
||||
|
||||
for bars in [bars1, bars2, bars3]:
|
||||
for bar in bars:
|
||||
height = bar.get_height()
|
||||
ax1.text(bar.get_x() + bar.get_width()/2., height + max(rev_data.values / 1e9)*0.01,
|
||||
format_value(height), ha='center', va='bottom', fontsize=8)
|
||||
|
||||
ax1.set_xlabel('Quarter')
|
||||
ax1.set_ylabel('Billions USD')
|
||||
ax1.set_title(f'{company.name} - Financial Metrics (Last 20 Quarters)')
|
||||
ax1.set_xticks(x)
|
||||
ax1.set_xticklabels(quarters)
|
||||
ax1.legend(loc='upper left')
|
||||
ax1.grid(axis='y')
|
||||
|
||||
# Secondary Y-axis for margins
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(x, gross_margin.values, label='Gross Margin %', color='red', marker='o', linestyle='-')
|
||||
ax2.plot(x, net_margin.values, label='Net Margin %', color='purple', marker='s', linestyle='-')
|
||||
ax2.set_ylabel('Profit Margin (%)')
|
||||
ax2.legend(loc='upper right')
|
||||
|
||||
plt.tight_layout()
|
||||
chart_file = f"charts/{ticker}_chart.png"
|
||||
plt.savefig(chart_file)
|
||||
print(f"Quarterly chart saved to {chart_file}")
|
||||
show_image(chart_file)
|
||||
|
||||
# Yearly chart
|
||||
if rev_dict_yearly or ni_dict_yearly:
|
||||
print(f"Yearly Rev dict: {rev_dict_yearly}")
|
||||
print(f"Yearly GP dict: {gp_dict_yearly}")
|
||||
print(f"Yearly NI dict: {ni_dict_yearly}")
|
||||
# Convert to Series
|
||||
rev_data_yearly = pd.Series(rev_dict_yearly)
|
||||
gp_data_yearly = pd.Series(gp_dict_yearly)
|
||||
ni_data_yearly = pd.Series(ni_dict_yearly)
|
||||
# Get last 5 years
|
||||
all_years = sorted(set(rev_data_yearly.index) | set(gp_data_yearly.index) | set(ni_data_yearly.index))
|
||||
if len(all_years) < 5:
|
||||
min_year = min(all_years) if all_years else 2024
|
||||
years = list(range(min_year - (5 - len(all_years)), min_year + len(all_years)))
|
||||
else:
|
||||
years = all_years[-5:]
|
||||
rev_data_yearly = rev_data_yearly.reindex(years).fillna(0)
|
||||
gp_data_yearly = gp_data_yearly.reindex(years).fillna(0)
|
||||
ni_data_yearly = ni_data_yearly.reindex(years).fillna(0)
|
||||
# Calculate margins
|
||||
gross_margin_yearly = (gp_data_yearly / rev_data_yearly * 100).fillna(0)
|
||||
net_margin_yearly = (ni_data_yearly / rev_data_yearly * 100).fillna(0)
|
||||
# Plot
|
||||
fig, ax1 = plt.subplots(figsize=(12, 8))
|
||||
# Bars for Revenue, Gross Profit, Net Income
|
||||
x = range(len(years))
|
||||
width = 0.25
|
||||
bars1 = ax1.bar([i - width for i in x], rev_data_yearly.values / 1e9, width, label='Revenue', color='blue')
|
||||
bars2 = ax1.bar(x, gp_data_yearly.values / 1e9, width, label='Gross Profit', color='orange')
|
||||
bars3 = ax1.bar([i + width for i in x], ni_data_yearly.values / 1e9, width, label='Net Income', color='green')
|
||||
# Add value labels on bars
|
||||
def format_value(val):
|
||||
if abs(val) >= 1:
|
||||
return f'${val:.1f}B'
|
||||
else:
|
||||
return f'${val*1000:.0f}M'
|
||||
for bars in [bars1, bars2, bars3]:
|
||||
for bar in bars:
|
||||
height = bar.get_height()
|
||||
ax1.text(bar.get_x() + bar.get_width()/2., height + max(rev_data_yearly.values / 1e9)*0.01,
|
||||
format_value(height), ha='center', va='bottom', fontsize=8)
|
||||
ax1.set_xlabel('Fiscal Year')
|
||||
ax1.set_ylabel('Billions USD')
|
||||
ax1.set_title(f'{company.name} - Financial Metrics (Last 5 Years)')
|
||||
ax1.set_xticks(x)
|
||||
ax1.set_xticklabels(years)
|
||||
ax1.legend(loc='upper left')
|
||||
ax1.grid(axis='y')
|
||||
# Secondary Y-axis for margins
|
||||
ax2 = ax1.twinx()
|
||||
ax2.plot(x, gross_margin_yearly.values, label='Gross Margin %', color='red', marker='o', linestyle='-')
|
||||
ax2.plot(x, net_margin_yearly.values, label='Net Margin %', color='purple', marker='s', linestyle='-')
|
||||
ax2.set_ylabel('Profit Margin (%)')
|
||||
ax2.legend(loc='upper right')
|
||||
plt.tight_layout()
|
||||
chart_file_yearly = f"charts/{ticker}_yearly_chart.png"
|
||||
plt.savefig(chart_file_yearly)
|
||||
print(f"Yearly chart saved to {chart_file_yearly}")
|
||||
show_image(chart_file_yearly)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
ticker = sys.argv[1].upper()
|
||||
else:
|
||||
ticker = input("Enter ticker: ").strip().upper()
|
||||
|
||||
generate_charts(ticker)
|
||||
Reference in New Issue
Block a user