Files
edgartools/parse_duggan_filing.py
2025-12-09 12:13:01 +01:00

65 lines
2.4 KiB
Python

import pandas as pd
from bs4 import BeautifulSoup
import re
# Read the local HTML file
with open('DUGGAN_SEC FORM 4.html', 'r', encoding='utf-8') as f:
html_content = f.read()
# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')
# Extract basic info
reporting_person = soup.find(lambda tag: tag.name == 'td' and 'DUGGAN ROBERT W' in tag.get_text())
if reporting_person:
print("Reporting Person:", reporting_person.get_text(strip=True))
issuer = soup.find('a', href=re.compile(r'CIK=0001599298'))
if issuer:
print("Issuer:", issuer.get_text(strip=True))
# Find Table I
table_i = None
for table in soup.find_all('table'):
if table.find('th', text=re.compile('Table I')):
table_i = table
break
if table_i:
tbody = table_i.find('tbody')
if tbody:
rows = tbody.find_all('tr')
transactions = []
for row in rows:
cols = row.find_all('td')
if len(cols) >= 11:
transaction = {
'title': cols[0].get_text(strip=True),
'transaction_date': cols[1].get_text(strip=True),
'code': cols[3].get_text(strip=True),
'amount': cols[5].get_text(strip=True).replace(',', ''),
'a_or_d': cols[6].get_text(strip=True),
'price': cols[7].get_text(strip=True).replace('$', '').strip(),
'beneficially_owned': cols[8].get_text(strip=True).replace(',', ''),
'ownership_form': cols[9].get_text(strip=True),
'nature': cols[10].get_text(strip=True) if len(cols) > 10 else ''
}
transactions.append(transaction)
print("\nNon-Derivative Securities Transactions:")
for tx in transactions:
print(f"Title: {tx['title']}")
print(f"Date: {tx['transaction_date']}, Code: {tx['code']}, Amount: {tx['amount']}, A/D: {tx['a_or_d']}, Price: ${tx['price']}")
print(f"Beneficially Owned: {tx['beneficially_owned']}, Form: {tx['ownership_form']}")
if tx['nature']:
print(f"Nature: {tx['nature']}")
print("-" * 50)
# Extract footnotes
footnotes = soup.find_all('td', class_='FootnoteData')
if footnotes:
print("\nFootnotes:")
for i, footnote in enumerate(footnotes, 1):
print(f"{i}. {footnote.get_text(strip=True)}")
print("\nInvestigation complete.")