import pandas as pd from bs4 import BeautifulSoup import re # Read the local HTML file with open('DUGGAN_SEC FORM 4.html', 'r', encoding='utf-8') as f: html_content = f.read() # Parse the HTML soup = BeautifulSoup(html_content, 'html.parser') # Extract basic info reporting_person = soup.find(lambda tag: tag.name == 'td' and 'DUGGAN ROBERT W' in tag.get_text()) if reporting_person: print("Reporting Person:", reporting_person.get_text(strip=True)) issuer = soup.find('a', href=re.compile(r'CIK=0001599298')) if issuer: print("Issuer:", issuer.get_text(strip=True)) # Find Table I table_i = None for table in soup.find_all('table'): if table.find('th', text=re.compile('Table I')): table_i = table break if table_i: tbody = table_i.find('tbody') if tbody: rows = tbody.find_all('tr') transactions = [] for row in rows: cols = row.find_all('td') if len(cols) >= 11: transaction = { 'title': cols[0].get_text(strip=True), 'transaction_date': cols[1].get_text(strip=True), 'code': cols[3].get_text(strip=True), 'amount': cols[5].get_text(strip=True).replace(',', ''), 'a_or_d': cols[6].get_text(strip=True), 'price': cols[7].get_text(strip=True).replace('$', '').strip(), 'beneficially_owned': cols[8].get_text(strip=True).replace(',', ''), 'ownership_form': cols[9].get_text(strip=True), 'nature': cols[10].get_text(strip=True) if len(cols) > 10 else '' } transactions.append(transaction) print("\nNon-Derivative Securities Transactions:") for tx in transactions: print(f"Title: {tx['title']}") print(f"Date: {tx['transaction_date']}, Code: {tx['code']}, Amount: {tx['amount']}, A/D: {tx['a_or_d']}, Price: ${tx['price']}") print(f"Beneficially Owned: {tx['beneficially_owned']}, Form: {tx['ownership_form']}") if tx['nature']: print(f"Nature: {tx['nature']}") print("-" * 50) # Extract footnotes footnotes = soup.find_all('td', class_='FootnoteData') if footnotes: print("\nFootnotes:") for i, footnote in enumerate(footnotes, 1): print(f"{i}. {footnote.get_text(strip=True)}") print("\nInvestigation complete.")