edgartools/parse_duggan_filing.py

import pandas as pd
from bs4 import BeautifulSoup
import re

# Read the local HTML file
with open('DUGGAN_SEC FORM 4.html', 'r', encoding='utf-8') as f:
    html_content = f.read()

# Parse the HTML
soup = BeautifulSoup(html_content, 'html.parser')

# Extract basic info
reporting_person = soup.find(lambda tag: tag.name == 'td' and 'DUGGAN ROBERT W' in tag.get_text())
if reporting_person:
    print("Reporting Person:", reporting_person.get_text(strip=True))

issuer = soup.find('a', href=re.compile(r'CIK=0001599298'))
if issuer:
    print("Issuer:", issuer.get_text(strip=True))

# Find Table I
table_i = None
for table in soup.find_all('table'):
    if table.find('th', text=re.compile('Table I')):
        table_i = table
        break

if table_i:
    tbody = table_i.find('tbody')
    if tbody:
        rows = tbody.find_all('tr')
        transactions = []
        for row in rows:
            cols = row.find_all('td')
            if len(cols) >= 11:
                transaction = {
                    'title': cols[0].get_text(strip=True),
                    'transaction_date': cols[1].get_text(strip=True),
                    'code': cols[3].get_text(strip=True),
                    'amount': cols[5].get_text(strip=True).replace(',', ''),
                    'a_or_d': cols[6].get_text(strip=True),
                    'price': cols[7].get_text(strip=True).replace('$', '').strip(),
                    'beneficially_owned': cols[8].get_text(strip=True).replace(',', ''),
                    'ownership_form': cols[9].get_text(strip=True),
                    'nature': cols[10].get_text(strip=True) if len(cols) > 10 else ''
                }
                transactions.append(transaction)

        print("\nNon-Derivative Securities Transactions:")
        for tx in transactions:
            print(f"Title: {tx['title']}")
            print(f"Date: {tx['transaction_date']}, Code: {tx['code']}, Amount: {tx['amount']}, A/D: {tx['a_or_d']}, Price: ${tx['price']}")
            print(f"Beneficially Owned: {tx['beneficially_owned']}, Form: {tx['ownership_form']}")
            if tx['nature']:
                print(f"Nature: {tx['nature']}")
            print("-" * 50)

# Extract footnotes
footnotes = soup.find_all('td', class_='FootnoteData')
if footnotes:
    print("\nFootnotes:")
    for i, footnote in enumerate(footnotes, 1):
        print(f"{i}. {footnote.get_text(strip=True)}")

print("\nInvestigation complete.")