#!/usr/bin/env python3 import os import sys import subprocess import matplotlib.pyplot as plt # Check if virtual environment is activated if not os.environ.get('VIRTUAL_ENV'): print("Virtual environment is not activated.") print("To activate: . venv/bin/activate") print("Then run: python chart_generator.py ") exit(1) import pandas as pd from edgar import Company, set_identity, set_local_storage_path, use_local_storage, XBRL from bs4 import BeautifulSoup import re # Set your identity (required by SEC) set_identity("your.email@example.com") # Enable local storage for caching filings LOCAL_STORAGE_PATH = os.path.abspath("./edgar_cache") os.makedirs(LOCAL_STORAGE_PATH, exist_ok=True) use_local_storage(LOCAL_STORAGE_PATH) def show_image(image_path): viewers = ['eog', 'feh', 'gthumb', 'gwenview', 'shotwell', 'display'] # Common Linux image viewers for viewer in viewers: if subprocess.run(['which', viewer], capture_output=True).returncode == 0: try: subprocess.run([viewer, image_path]) print(f"Displayed chart with {viewer}") return except Exception as e: print(f"Failed to open with {viewer}: {e}") print("No suitable image viewer found. Chart saved but not displayed.") def parse_20f_html(html_content, year): soup = BeautifulSoup(html_content, 'html.parser') text = soup.get_text().lower() rev = gp = ni = None # Use regex to find patterns like "net revenue" followed by large number rev_match = re.search(r'net revenue.*?(\d{4,}(?:,\d{3})*(?:\.\d+)?)', text) if rev_match: rev = float(rev_match.group(1).replace(',', '')) gp_match = re.search(r'gross profit.*?(\d{4,}(?:,\d{3})*(?:\.\d+)?)', text) if gp_match: gp = float(gp_match.group(1).replace(',', '')) ni_match = re.search(r'net income.*?(\d{4,}(?:,\d{3})*(?:\.\d+)?)', text) if ni_match: ni = float(ni_match.group(1).replace(',', '')) return rev, gp, ni def extract_number(text): # Extract number from text, handle commas, parentheses for negative text = re.sub(r'[^\d,.\-\(\)]', '', text) if '(' in text and ')' in text: text = '-' + text.replace('(', '').replace(')', '') text = text.replace(',', '') try: return float(text) except: return None def generate_charts(ticker): print(f"Generating charts for {ticker}...") company = Company(ticker) if company.not_found: print(f"Company {ticker} not found.") return # Get last 20 10-Q filings for quarterly data filings_10q = company.get_filings(form="10-Q", amendments=False).head(20) # Get last 5 10-K filings for yearly data filings_10k = company.get_filings(form="10-K", amendments=False).head(5) if not filings_10q: print("No 10-Q filings found.") return if not filings_10k: print("No 10-K filings found.") return # Collect data from each filing rev_dict_quarterly = {} gp_dict_quarterly = {} ni_dict_quarterly = {} rev_dict_yearly = {} gp_dict_yearly = {} ni_dict_yearly = {} for filing in filings_10q: print(f"Processing filing {filing.accession_number} from {filing.filing_date}") try: df = None xbrl = XBRL.from_filing(filing) if not xbrl: print(" No XBRL found") continue data = xbrl.to_pandas() df = data['facts'] print(f"df columns: {list(df.columns)}") print(f" Available elements: {sorted([e for e in df['element_id'].unique() if 'profit' in e.lower() or 'revenue' in e.lower() or 'income' in e.lower()])}") print(f" Gross elements: {sorted([e for e in df['element_id'].unique() if 'Gross' in e])}") print(f" Cost elements: {sorted([e for e in df['element_id'].unique() if 'Cost' in e])}") # Extract metrics for this filing's period # Try multiple possible revenue elements revenue_elements = ['us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax', 'us-gaap:Revenues', 'us-gaap:SalesRevenueNet'] revenues = pd.DataFrame() for elem in revenue_elements: temp = df[df['element_id'] == elem] if not temp.empty: revenues = temp print(f" Found revenue element: {elem}") break rev_val = gp_val = ni_val = year = period_end = quarter_key = None if len(revenues) > 0: period_end = revenues['period_end'].max() # type: ignore quarter_key = pd.to_datetime(period_end).to_period('Q').strftime('%Y-Q%q') rev_val = pd.to_numeric(revenues['value'], errors='coerce').max() # type: ignore print(f" Revenue for {quarter_key}: {rev_val}") # Try multiple possible gross profit elements, using same period gp_elements = ['us-gaap:GrossProfit', 'us-gaap:GrossMargin'] gross_profits = pd.DataFrame() if period_end: for elem in gp_elements: temp = df[(df['element_id'].str.contains(elem.split(':')[1])) & (df['period_end'] == period_end)] print(f" Checking {elem} for period {period_end}: {temp.shape}") if not temp.empty: gross_profits = temp print(f" Found gross profit element: {elem}") break else: for elem in gp_elements: temp = df[df['element_id'].str.contains(elem.split(':')[1])] print(f" Checking {elem}: {temp.shape}") if not temp.empty: gross_profits = temp print(f" Found gross profit element: {elem}") break # If no direct GP found, try calculating from Revenue - COGS if gross_profits.empty and period_end and rev_val: cogs = df[(df['element_id'] == 'us-gaap:CostOfGoodsAndServicesSold') & (df['period_end'] == period_end)] if not cogs.empty: cogs_val = pd.to_numeric(cogs['value'], errors='coerce').max() if cogs_val: gp_val = rev_val - cogs_val print(f" Calculated Gross Profit from Revenue - COGS: {gp_val}") # Set a dummy gross_profits to indicate found gross_profits = cogs # not used further, just for flag net_incomes = pd.DataFrame() if period_end: net_incomes = df[(df['element_id'] == 'us-gaap:NetIncomeLoss') & (df['period_end'] == period_end)] else: net_incomes = df[df['element_id'] == 'us-gaap:NetIncomeLoss'] print(f" Revenues found: {not revenues.empty}, Gross Profit: {not gross_profits.empty}, Net Income: {not net_incomes.empty}") if not gross_profits.empty: if gp_val is None: gp_val = pd.to_numeric(gross_profits['value'], errors='coerce').sum() print(f" Gross Profit for {quarter_key}: {gp_val}") elif 'gp_val' in locals(): print(f" Gross Profit for {quarter_key}: {gp_val}") else: print(f" Gross Profit not found for {quarter_key}") if not net_incomes.empty: ni_val = pd.to_numeric(net_incomes['value'], errors='coerce').max() print(f" Net Income for {quarter_key}: {ni_val}") else: # Parse 20-F HTML html_content = filing.text() # Assume year from filing_date year = pd.to_datetime(filing.filing_date).year - 1 # Filing in next year rev_val, gp_val, ni_val = parse_20f_html(html_content, year) print(f" Parsed 20-F: Rev {rev_val}, GP {gp_val}, NI {ni_val} for {year}") if quarter_key and rev_val is not None: rev_dict_quarterly[quarter_key] = rev_val if quarter_key and gp_val is not None: gp_dict_quarterly[quarter_key] = gp_val if quarter_key and ni_val is not None: ni_dict_quarterly[quarter_key] = ni_val except Exception as e: print(f"Error processing filing {filing.accession_number}: {e}") continue # Process 10-K filings for yearly data for filing in filings_10k: print(f"Processing filing {filing.accession_number} from {filing.filing_date}") try: xbrl = XBRL.from_filing(filing) if not xbrl: print(" No XBRL found") continue data = xbrl.to_pandas() df = data['facts'] print(f"df columns: {list(df.columns)}") print(f" Available elements: {sorted([e for e in df['element_id'].unique() if 'profit' in e.lower() or 'revenue' in e.lower() or 'income' in e.lower()])}") print(f" Gross elements: {sorted([e for e in df['element_id'].unique() if 'Gross' in e])}") print(f" Cost elements: {sorted([e for e in df['element_id'].unique() if 'Cost' in e])}") # Extract metrics for this filing's period # Try multiple possible revenue elements revenue_elements = ['us-gaap:RevenueFromContractWithCustomerExcludingAssessedTax', 'us-gaap:Revenues', 'us-gaap:SalesRevenueNet'] revenues = pd.DataFrame() for elem in revenue_elements: temp = df[df['element_id'] == elem] if not temp.empty: revenues = temp print(f" Found revenue element: {elem}") break rev_val = gp_val = ni_val = year = period_end = year_key = None if not revenues.empty: period_end = revenues['period_end'].max() year_key = str(pd.to_datetime(period_end).year) rev_val = pd.to_numeric(revenues['value'], errors='coerce').max() print(f" Revenue for {year_key}: {rev_val}") # Try multiple possible gross profit elements, using same period gp_elements = ['us-gaap:GrossProfit', 'us-gaap:GrossMargin'] gross_profits = pd.DataFrame() if period_end: for elem in gp_elements: temp = df[(df['element_id'].str.contains(elem.split(':')[1])) & (df['period_end'] == period_end)] print(f" Checking {elem} for period {period_end}: {temp.shape}") if not temp.empty: gross_profits = temp print(f" Found gross profit element: {elem}") break else: for elem in gp_elements: temp = df[df['element_id'].str.contains(elem.split(':')[1])] print(f" Checking {elem}: {temp.shape}") if not temp.empty: gross_profits = temp print(f" Found gross profit element: {elem}") break # If no direct GP found, try calculating from Revenue - COGS if gross_profits.empty and period_end and rev_val: cogs = df[(df['element_id'] == 'us-gaap:CostOfGoodsAndServicesSold') & (df['period_end'] == period_end)] if not cogs.empty: cogs_val = pd.to_numeric(cogs['value'], errors='coerce').max() if cogs_val: gp_val = rev_val - cogs_val print(f" Calculated Gross Profit from Revenue - COGS: {gp_val}") # Set a dummy gross_profits to indicate found gross_profits = cogs # not used further, just for flag net_incomes = pd.DataFrame() if period_end: net_incomes = df[(df['element_id'] == 'us-gaap:NetIncomeLoss') & (df['period_end'] == period_end)] else: net_incomes = df[df['element_id'] == 'us-gaap:NetIncomeLoss'] print(f" Revenues found: {not revenues.empty}, Gross Profit: {not gross_profits.empty}, Net Income: {not net_incomes.empty}") if not gross_profits.empty: if gp_val is None: gp_val = pd.to_numeric(gross_profits['value'], errors='coerce').sum() print(f" Gross Profit for {year_key}: {gp_val}") elif 'gp_val' in locals(): print(f" Gross Profit for {year_key}: {gp_val}") else: print(f" Gross Profit not found for {year_key}") if not net_incomes.empty: ni_val = pd.to_numeric(net_incomes['value'], errors='coerce').max() print(f" Net Income for {year_key}: {ni_val}") if year_key and rev_val is not None: rev_dict_yearly[year_key] = rev_val if year_key and gp_val is not None: gp_dict_yearly[year_key] = gp_val if year_key and ni_val is not None: ni_dict_yearly[year_key] = ni_val except Exception as e: print(f"Error processing filing {filing.accession_number}: {e}") continue if not rev_dict_quarterly or not ni_dict_quarterly: print("Insufficient quarterly financial data across filings.") return print(f"Quarterly Rev dict: {rev_dict_quarterly}") print(f"Quarterly GP dict: {gp_dict_quarterly}") print(f"Quarterly NI dict: {ni_dict_quarterly}") # Convert to Series rev_data = pd.Series(rev_dict_quarterly) gp_data = pd.Series(gp_dict_quarterly) ni_data = pd.Series(ni_dict_quarterly) # Get last 20 quarters all_quarters = sorted(set(rev_data.index) | set(gp_data.index) | set(ni_data.index), key=lambda x: pd.Period(x, 'Q')) quarters = all_quarters[-20:] if len(all_quarters) > 20 else all_quarters rev_data = rev_data.reindex(quarters).fillna(0) gp_data = gp_data.reindex(quarters).fillna(0) ni_data = ni_data.reindex(quarters).fillna(0) # Calculate margins gross_margin = (gp_data / rev_data * 100).fillna(0) net_margin = (ni_data / rev_data * 100).fillna(0) # Plot fig, ax1 = plt.subplots(figsize=(12, 8)) # Bars for Revenue, Gross Profit, Net Income x = range(len(quarters)) width = 0.25 bars1 = ax1.bar([i - width for i in x], rev_data.values / 1e9, width, label='Revenue', color='blue') bars2 = ax1.bar(x, gp_data.values / 1e9, width, label='Gross Profit', color='orange') bars3 = ax1.bar([i + width for i in x], ni_data.values / 1e9, width, label='Net Income', color='green') # Add value labels on bars def format_value(val): if abs(val) >= 1: return f'${val:.1f}B' else: return f'${val*1000:.0f}M' for bars in [bars1, bars2, bars3]: for bar in bars: height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height + max(rev_data.values / 1e9)*0.01, format_value(height), ha='center', va='bottom', fontsize=8) ax1.set_xlabel('Quarter') ax1.set_ylabel('Billions USD') ax1.set_title(f'{company.name} - Financial Metrics (Last 20 Quarters)') ax1.set_xticks(x) ax1.set_xticklabels(quarters) ax1.legend(loc='upper left') ax1.grid(axis='y') # Secondary Y-axis for margins ax2 = ax1.twinx() ax2.plot(x, gross_margin.values, label='Gross Margin %', color='red', marker='o', linestyle='-') ax2.plot(x, net_margin.values, label='Net Margin %', color='purple', marker='s', linestyle='-') ax2.set_ylabel('Profit Margin (%)') ax2.legend(loc='upper right') plt.tight_layout() chart_file = f"charts/{ticker}_chart.png" plt.savefig(chart_file) print(f"Quarterly chart saved to {chart_file}") show_image(chart_file) # Yearly chart if rev_dict_yearly or ni_dict_yearly: print(f"Yearly Rev dict: {rev_dict_yearly}") print(f"Yearly GP dict: {gp_dict_yearly}") print(f"Yearly NI dict: {ni_dict_yearly}") # Convert to Series rev_data_yearly = pd.Series(rev_dict_yearly) gp_data_yearly = pd.Series(gp_dict_yearly) ni_data_yearly = pd.Series(ni_dict_yearly) # Get last 5 years all_years = sorted(set(rev_data_yearly.index) | set(gp_data_yearly.index) | set(ni_data_yearly.index)) if len(all_years) < 5: min_year = min(all_years) if all_years else 2024 years = list(range(min_year - (5 - len(all_years)), min_year + len(all_years))) else: years = all_years[-5:] rev_data_yearly = rev_data_yearly.reindex(years).fillna(0) gp_data_yearly = gp_data_yearly.reindex(years).fillna(0) ni_data_yearly = ni_data_yearly.reindex(years).fillna(0) # Calculate margins gross_margin_yearly = (gp_data_yearly / rev_data_yearly * 100).fillna(0) net_margin_yearly = (ni_data_yearly / rev_data_yearly * 100).fillna(0) # Plot fig, ax1 = plt.subplots(figsize=(12, 8)) # Bars for Revenue, Gross Profit, Net Income x = range(len(years)) width = 0.25 bars1 = ax1.bar([i - width for i in x], rev_data_yearly.values / 1e9, width, label='Revenue', color='blue') bars2 = ax1.bar(x, gp_data_yearly.values / 1e9, width, label='Gross Profit', color='orange') bars3 = ax1.bar([i + width for i in x], ni_data_yearly.values / 1e9, width, label='Net Income', color='green') # Add value labels on bars def format_value(val): if abs(val) >= 1: return f'${val:.1f}B' else: return f'${val*1000:.0f}M' for bars in [bars1, bars2, bars3]: for bar in bars: height = bar.get_height() ax1.text(bar.get_x() + bar.get_width()/2., height + max(rev_data_yearly.values / 1e9)*0.01, format_value(height), ha='center', va='bottom', fontsize=8) ax1.set_xlabel('Fiscal Year') ax1.set_ylabel('Billions USD') ax1.set_title(f'{company.name} - Financial Metrics (Last 5 Years)') ax1.set_xticks(x) ax1.set_xticklabels(years) ax1.legend(loc='upper left') ax1.grid(axis='y') # Secondary Y-axis for margins ax2 = ax1.twinx() ax2.plot(x, gross_margin_yearly.values, label='Gross Margin %', color='red', marker='o', linestyle='-') ax2.plot(x, net_margin_yearly.values, label='Net Margin %', color='purple', marker='s', linestyle='-') ax2.set_ylabel('Profit Margin (%)') ax2.legend(loc='upper right') plt.tight_layout() chart_file_yearly = f"charts/{ticker}_yearly_chart.png" plt.savefig(chart_file_yearly) print(f"Yearly chart saved to {chart_file_yearly}") show_image(chart_file_yearly) if __name__ == "__main__": if len(sys.argv) > 1: ticker = sys.argv[1].upper() else: ticker = input("Enter ticker: ").strip().upper() generate_charts(ticker)