Files
2025-12-09 12:13:01 +01:00

1031 lines
44 KiB
Python

import re
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Dict, List, Optional
from rich import box
from rich.columns import Columns
from rich.console import Group
from rich.panel import Panel
from rich.table import Column, Table
from rich.text import Text
from edgar._party import Address, get_addresses_as_columns
from edgar.core import log
from edgar.formatting import datefmt, reverse_name
from edgar.reference import describe_form, states
from edgar.richtools import repr_rich
# Title text
mailing_address_title = "\U0001F4EC Mailing Address"
business_address_title = "\U0001F4EC Business Address"
company_title = "\U0001F3E2 Company Information"
filing_information_title = "\U0001F4D1 Filing Information"
reporting_owner_title = "\U0001F468 Reporting Owner"
issuer_title = "\U0001F4B5 Issuer"
filing_title = "\U0001F4D1 Filing"
__all__ = ['FilingMetadata', 'CompanyInformation', 'FilingInformation', 'FormerCompany', 'Filer', 'Owner',
'ReportingOwner', 'SubjectCompany', 'Issuer', 'FilingHeader']
def collect_repeated_tags(text: str, tag_name: str) -> list[str]:
"""
Collects values from sequences of unclosed tags with the same name.
Example:
<ITEMS>06b
<ITEMS>3C
Returns: ['06b', '3C']
"""
pattern = f"<{tag_name}>([^\n<]+)" # Match tag and capture until newline or next tag
return [match.group(1).strip() for match in re.finditer(pattern, text, re.MULTILINE)]
def preprocess_old_headers(text: str) -> str:
"""
Preprocess old SEC headers to convert from a tag-based format to a tab-indented format
and ensure no lines with hanging tags are included in the output.
"""
# Pattern to find content enclosed within full tags, capturing tag names and content between them
full_tag_pattern = re.compile(r'<([\w-]+)>\n(.*?)\n</\1>', re.DOTALL)
# Convert full tag content to tabbed format without the tag name
def full_tag_to_tabbed(match):
content = match.group(2).strip()
# Indent the content
indented_content = '\n'.join('\t' + line for line in content.split('\n'))
return f'{indented_content}'
# Apply the full tag conversion
result = full_tag_pattern.sub(full_tag_to_tabbed, text)
# Remove any leftover standalone tags and any text following them on the same line
result = re.sub(r'<[^/>]+>.*$', '', result, flags=re.MULTILINE) # Removing entire lines with standalone tags
# Ensure no hanging start or end tags remain
result = re.sub(r'</?[\w-]+>', '', result) # Now correctly handles tags with hyphens
return result
class FilingMetadata:
def __init__(self, metadata: Dict[str, Any]):
self.metadata = metadata
def get(self, key: str):
value = self.metadata.get(key)
if value:
# Adjusted regular expressions to match correct date formats
if re.match(r"^(20|19)\d{12}$", value): # YYYY-MM-DD HH:MM:SS
value = datefmt(value, "%Y-%m-%d %H:%M:%S")
elif re.match(r"^(20|19)\d{6}$", value): # YYYY-MM-DD
value = datefmt(value, "%Y-%m-%d")
return value
def update(self, property:str, value:str):
self.metadata[property] = value
@property
def num_documents(self):
count = self.metadata.get("PUBLIC DOCUMENT COUNT")
if count and count.isdigit():
return int(count)
def __getitem__(self, key: str):
return self.get(key)
def __rich__(self):
# Ordered keys to be displayed first
ordered_keys = ["ACCESSION NUMBER", "FILED AS OF DATE", "ACCEPTANCE-DATETIME", "CONFORMED SUBMISSION TYPE"]
table = Table("", "", row_styles=["bold", ""], show_header=False, box=box.ROUNDED)
# Add rows for ordered keys first if present
for key in ordered_keys:
value = self.get(key)
if value is not None:
table.add_row(f"{key}:", value)
# Add the rest of the keys
for key in self.metadata:
if key not in ordered_keys:
value = self.get(key)
if value is not None:
table.add_row(f"{key}:", value)
return table
@dataclass(frozen=True)
class CompanyInformation:
name: str
cik: str
sic: str
irs_number: str
state_of_incorporation: str
fiscal_year_end: str
def __rich__(self):
table = Table(Column("Company", style="bold deep_sky_blue1"), "Industry", "Incorporated", "Year End",
box=box.ROUNDED)
table.add_row(f"{self.name} [{self.cik}]",
self.sic,
states.get(self.state_of_incorporation, self.state_of_incorporation),
self.fiscal_year_end)
return table
def __repr__(self):
return repr_rich(self.__rich__())
@dataclass(frozen=True)
class FilingInformation:
form: str
file_number: str
sec_act: str
film_number: str
def __rich__(self):
table = Table("File Number", "SEC Act", "Film #", "Form", box=box.ROUNDED)
table.add_row(self.file_number, self.sec_act, self.film_number, self.form)
return table
def __repr__(self):
return repr_rich(self.__rich__())
@dataclass(frozen=True)
class FormerCompany:
name: str
date_of_change: str
@dataclass(frozen=True)
class Filer:
company_information: CompanyInformation
filing_information: FilingInformation
business_address: Address
mailing_address: Address
former_company_names: Optional[List[FormerCompany]] = None
def __str__(self):
return f"{self.company_information.name} [{self.company_information.cik}]"
def __rich__(self):
filer_renderables = [self.company_information]
# Addresses
if self.business_address or self.mailing_address:
filer_renderables.append(
get_addresses_as_columns(business_address=self.business_address, mailing_address=self.mailing_address))
# Former Company Names
if self.former_company_names:
former_company_table = Table("Former Name", "Changed", box=box.ROUNDED)
for company_name in self.former_company_names:
former_company_table.add_row(company_name.name, datefmt(company_name.date_of_change, '%b %d, %Y'))
filer_renderables.append(former_company_table)
return Panel(
Group(*filer_renderables),
title="Filer"
)
def __repr__(self):
return repr_rich(self.__rich__())
@dataclass(frozen=True)
class Owner:
name: str
cik: str
@dataclass(frozen=True)
class ReportingOwner:
owner: Owner
company_information: CompanyInformation
filing_information: FilingInformation
business_address: Address
mailing_address: Address
def __rich__(self):
top_renderables = []
# Owner Table
if self.owner:
reporting_owner_table = Table(Column("Owner", style="bold deep_sky_blue1"), "CIK", box=box.ROUNDED)
reporting_owner_table.add_row(self.owner.name, self.owner.cik)
top_renderables = [reporting_owner_table]
# Reporting Owner Filing Values
if self.filing_information:
filing_values_table = Table("File Number", "SEC Act", "Film #", box=box.ROUNDED)
filing_values_table.add_row(self.filing_information.file_number,
self.filing_information.sec_act,
self.filing_information.film_number)
top_renderables.append(filing_values_table)
reporting_owner_renderables = [Columns(top_renderables)]
# Addresses
if self.business_address or self.mailing_address:
reporting_owner_renderables.append(
get_addresses_as_columns(business_address=self.business_address, mailing_address=self.mailing_address))
return Panel(
Group(
*reporting_owner_renderables
),
title=reporting_owner_title
)
def __repr__(self):
return repr_rich(self.__rich__())
@dataclass(frozen=True)
class SubjectCompany:
company_information: CompanyInformation
filing_information: FilingInformation
business_address: Address
mailing_address: Address
former_company_names: Optional[List[FormerCompany]] = None
def __rich__(self):
subject_company_renderables = [self.company_information]
# Addresses
if self.business_address is not None or self.mailing_address is not None:
subject_company_renderables.append(get_addresses_as_columns(business_address=self.business_address,
mailing_address=self.mailing_address))
if self.former_company_names or self.filing_information:
name_and_filing_columns = []
# Former Company Names
if self.former_company_names:
former_company_table = Table("Former Name", "Changed", box=box.ROUNDED)
for company_name in self.former_company_names:
former_company_table.add_row(company_name.name, datefmt(company_name.date_of_change, '%b %d, %Y'))
name_and_filing_columns.append(former_company_table)
# Filing Information
if self.filing_information:
name_and_filing_columns.append(self.filing_information)
subject_company_renderables.append(Columns(name_and_filing_columns))
return Panel(
Group(
*subject_company_renderables
),
title="Subject Company"
)
def __repr__(self):
return repr_rich(self.__rich__())
@dataclass(frozen=True)
class Issuer:
company_information: CompanyInformation
business_address: Address
mailing_address: Address
former_company_names: Optional[List[FormerCompany]] = None
def __rich__(self):
issuer_table = Table(Column("Company", style="bold deep_sky_blue1"), "CIK", "SIC", "Fiscal Year End",
box=box.ROUNDED)
issuer_table.add_row(self.company_information.name,
self.company_information.cik,
self.company_information.sic,
self.company_information.fiscal_year_end)
# The list of renderables for the issuer panel
issuer_renderables = [issuer_table]
# Addresses
if self.business_address or self.mailing_address:
issuer_renderables.append(
get_addresses_as_columns(business_address=self.business_address, mailing_address=self.mailing_address))
return Panel(
Group(
*issuer_renderables
),
title=issuer_title
)
def __repr__(self):
return repr_rich(self.__rich__())
class FilingHeader:
"""
Contains the parsed representation of the SEC-HEADER text at the top of the full submission text
<SEC-HEADER>
</SEC-HEADER>
"""
def __init__(self,
text: str,
filing_metadata: Dict[str, str],
filers: List[Filer] = None,
reporting_owners: List[ReportingOwner] = None,
issuer: Issuer = None,
subject_companies: List[SubjectCompany] = None):
self.text: str = text
self.filing_metadata: FilingMetadata = FilingMetadata(filing_metadata)
self.filers: List[Filer] = filers
self.reporting_owners: List[ReportingOwner] = reporting_owners
self.issuer: Issuer = issuer
self.subject_companies: List[SubjectCompany] = subject_companies
def is_empty(self):
return not self.filing_metadata.metadata
@property
def accession_number(self):
return self.filing_metadata.get("ACCESSION NUMBER")
@property
def cik(self):
cik = self.filing_metadata.get("CIK")
if cik:
return int(cik)
# Get from the filers
if self.filers and len(self.filers) > 0:
company = self.filers[0].company_information
if company and company.cik:
return int(company.cik)
return cik
@property
def form(self):
return self.filing_metadata.get("CONFORMED SUBMISSION TYPE")
@property
def period_of_report(self):
return self.filing_metadata.get("CONFORMED PERIOD OF REPORT")
@property
def filing_date(self):
return self.filing_metadata.get("FILED AS OF DATE")
@property
def date_as_of_change(self):
return self.filing_metadata.get("DATE AS OF CHANGE")
@property
def document_count(self):
count = self.filing_metadata.get("PUBLIC DOCUMENT COUNT")
if count and count.isdigit():
return int(count)
@property
def acceptance_datetime(self):
acceptance = self.filing_metadata.get("ACCEPTANCE-DATETIME")
if acceptance:
return datetime.strptime(acceptance, "%Y-%m-%d %H:%M:%S")
@property
def file_numbers(self):
"""Return the file numbers associated with this filing"""
numbers = []
if self.filers:
numbers.extend([filer.filing_information.file_number for filer in self.filers])
if self.reporting_owners:
numbers.extend(
[reporting_owner.filing_information.file_number for reporting_owner in self.reporting_owners])
if self.subject_companies:
numbers.extend(
[subject_company.filing_information.file_number for subject_company in self.subject_companies])
return list(set(numbers))
@classmethod
def parse_submission_format_header(cls, parsed_data: Dict[str, Any]):
"""Parse SUBMISSION format into same data structure"""
# Transform SUBMISSION format into expected structure
filers = []
reporting_owners = []
subject_companies = []
metadata = {
"ACCESSION NUMBER": parsed_data.get("ACCESSION-NUMBER"),
"CONFORMED SUBMISSION TYPE": parsed_data.get("TYPE"),
"FILED AS OF DATE": parsed_data.get("FILING-DATE"),
"DATE AS OF CHANGE": parsed_data.get("DATE-OF-FILING-DATE-CHANGE"),
"EFFECTIVE DATE": parsed_data.get("EFFECTIVENESS-DATE"),
}
# Handle FILER section
for filer_data in parsed_data.get('FILER', []):
# Create Filer object from COMPANY-DATA
company_data = filer_data.get('COMPANY-DATA', {})
company_info = CompanyInformation(
name=company_data.get('CONFORMED-NAME'),
cik=company_data.get('CIK'),
sic=company_data.get('STANDARD INDUSTRIAL CLASSIFICATION'),
irs_number=company_data.get('IRS NUMBER'),
state_of_incorporation=company_data.get('STATE-OF-INCORPORATION'),
fiscal_year_end=company_data.get('FISCAL-YEAR-END')
)
# Create Filing Information from FILING-VALUES
filing_values = filer_data.get('FILING-VALUES', {})
filing_info = FilingInformation(
form=filing_values.get('FORM-TYPE'),
file_number=filing_values.get('FILE-NUMBER'),
sec_act=filing_values.get('ACT'),
film_number=filing_values.get('FILM-NUMBER')
)
# Create Address objects
business_address = Address.from_dict(
filer_data.get('BUSINESS-ADDRESS', {})) if 'BUSINESS-ADDRESS' in filer_data else None
mail_address = Address.from_dict(
filer_data.get('MAIL-ADDRESS', {})) if 'MAIL-ADDRESS' in filer_data else None
# Create Filer object
filer = Filer(
company_information=company_info,
filing_information=filing_info,
business_address=business_address,
mailing_address=mail_address
)
filers.append(filer)
# Handle REPORTING-OWNER section
for reporting_owner_data in parsed_data.get('REPORTING-OWNER', []):
# Create Owner object
owner = Owner(
name=reporting_owner_data.get('OWNER-DATA', {}).get('CONFORMED-NAME'),
cik=reporting_owner_data.get('OWNER-DATA', {}).get('CIK')
)
# Create Company Information object
company_data = reporting_owner_data.get('COMPANY-DATA', {})
company_info = CompanyInformation(
name=company_data.get('CONFORMED-NAME'),
cik=company_data.get('CIK'),
sic=company_data.get('STANDARD-INDUSTRIAL-CLASSIFICATION'),
irs_number=company_data.get('IRS-NUMBER'),
state_of_incorporation=company_data.get('STATE-OF-INCORPORATION'),
fiscal_year_end=company_data.get('FISCAL-YEAR-END')
)
# Create Filing Information object
filing_values = reporting_owner_data.get('FILING-VALUES', {})
filing_info = FilingInformation(
form=filing_values.get('FORM-TYPE'),
file_number=filing_values.get('FILE-NUMBER'),
sec_act=filing_values.get('ACT'),
film_number=filing_values.get('FILM-NUMBER')
)
business_address_record = reporting_owner_data.get('BUSINESS-ADDRESS')
if business_address_record:
# Create Address objects
business_address = Address(
street1=business_address_record.get('STREET1'),
city=business_address_record.get('CITY'),
state_or_country=business_address_record.get('STATE'),
zipcode=business_address_record.get('ZIP')
)
else:
business_address = None
# The mailing address
mail_address_record = reporting_owner_data.get('MAIL-ADDRESS')
if mail_address_record:
mail_address = Address(
street1=reporting_owner_data.get('MAIL-ADDRESS', {}).get('STREET1'),
city=reporting_owner_data.get('MAIL-ADDRESS', {}).get('CITY'),
state_or_country=reporting_owner_data.get('MAIL-ADDRESS', {}).get('STATE'),
zipcode=reporting_owner_data.get('MAIL-ADDRESS', {}).get('ZIP')
)
else:
mail_address = None
# Create Reporting Owner object
reporting_owner = ReportingOwner(
owner=owner,
company_information=company_info,
filing_information=filing_info,
business_address=business_address,
mailing_address=mail_address
)
reporting_owners.append(reporting_owner)
# Handle ISSUER section
issuer_record = parsed_data.get('ISSUER', [])
if issuer_record:
# Create Address objects
business_address = Address.from_dict(
issuer_record.get('BUSINESS-ADDRESS', {})) if 'BUSINESS-ADDRESS' in issuer_record else None
mail_address = Address.from_dict(
issuer_record.get('MAIL-ADDRESS', {})) if 'MAIL-ADDRESS' in issuer_record else None
# Former Company Names
former_company_names = []
for former_company in issuer_record.get('FORMER-COMPANY', []):
former_company_names.append(FormerCompany(
name=former_company.get('FORMER-CONFORMED-NAME'),
date_of_change=former_company.get('DATE-CHANGED')
))
issuer = Issuer(
company_information=CompanyInformation(
name=issuer_record.get('COMPANY-DATA', {}).get('CONFORMED-NAME'),
cik=issuer_record.get('COMPANY-DATA', {}).get('CIK'),
sic=issuer_record.get('COMPANY-DATA', {}).get('STANDARD-INDUSTRIAL-CLASSIFICATION'),
irs_number=issuer_record.get('COMPANY-DATA', {}).get('IRS-NUMBER'),
state_of_incorporation=issuer_record.get('COMPANY-DATA', {}).get('STATE-OF-INCORPORATION'),
fiscal_year_end=issuer_record.get('COMPANY-DATA', {}).get('FISCAL-YEAR-END')
),
business_address=business_address,
mailing_address=mail_address,
former_company_names=former_company_names
)
else:
issuer = None
# Handle SUBJECT-COMPANY section
for subject_company_data in parsed_data.get('SUBJECT-COMPANY', []):
# Create Company Information object
company_data = subject_company_data.get('COMPANY-DATA', {})
company_info = CompanyInformation(
name=company_data.get('CONFORMED-NAME'),
cik=company_data.get('CIK'),
sic=company_data.get('STANDARD-INDUSTRIAL-CLASSIFICATION'),
irs_number=company_data.get('IRS-NUMBER'),
state_of_incorporation=company_data.get('STATE-OF-INCORPORATION'),
fiscal_year_end=company_data.get('FISCAL-YEAR-END')
)
# Create Filing Information object
filing_values = subject_company_data.get('FILING-VALUES', {})
filing_info = FilingInformation(
form=filing_values.get('FORM-TYPE'),
file_number=filing_values.get('FILE-NUMBER'),
sec_act=filing_values.get('ACT'),
film_number=filing_values.get('FILM-NUMBER')
)
business_address_record = subject_company_data.get('BUSINESS-ADDRESS')
if business_address_record:
# Create Address objects
business_address = Address(
street1=business_address_record.get('STREET1'),
city=business_address_record.get('CITY'),
state_or_country=business_address_record.get('STATE'),
zipcode=business_address_record.get('ZIP')
)
else:
business_address = None
# The mailing address
mail_address_record = subject_company_data.get('MAIL-ADDRESS')
if mail_address_record:
mail_address = Address(
street1=subject_company_data.get('MAIL-ADDRESS', {}).get('STREET1'),
city=subject_company_data.get('MAIL-ADDRESS', {}).get('CITY'),
state_or_country=subject_company_data.get('MAIL-ADDRESS', {}).get('STATE'),
zipcode=subject_company_data.get('MAIL-ADDRESS', {}).get('ZIP')
)
else:
mail_address = None
# Former Company Names
former_company_names = []
for former_company in subject_company_data.get('FORMER-COMPANY', []):
former_company_names.append(FormerCompany(
name=former_company.get('FORMER-CONFORMED-NAME'),
date_of_change=former_company.get('DATE-CHANGED')
))
# Create Subject Company object
subject_company = SubjectCompany(
company_information=company_info,
filing_information=filing_info,
business_address=business_address,
mailing_address=mail_address,
former_company_names=former_company_names
)
subject_companies.append(subject_company)
return cls(
text='header_text',
filing_metadata=metadata,
filers=filers,
reporting_owners=reporting_owners,
issuer=issuer,
subject_companies=subject_companies
)
@staticmethod
def _is_valid_sgml_tag(line: str) -> bool:
"""
Check if line contains a valid SGML header tag (not HTML/XBRL content).
SGML header tags are uppercase with no namespace prefixes.
HTML/XBRL tags often have lowercase letters or namespace prefixes like 'ix:'.
Args:
line: The line to check
Returns:
bool: True if line contains a valid SGML tag, False otherwise
"""
stripped = line.strip()
if not stripped.startswith('<'):
return False
# Find the end of the tag
tag_end = stripped.find('>')
if tag_end == -1:
return False
# Extract tag name (without the < >)
tag = stripped[1:tag_end]
# Skip closing tags
if tag.startswith('/'):
return False
# SGML header tags characteristics:
# 1. No namespace prefixes (no ':' character)
# 2. Uppercase letters, numbers, and hyphens only
# 3. Should not contain attributes or spaces
if ':' in tag or ' ' in tag:
return False
# Check if tag is uppercase (SGML convention)
if tag != tag.upper():
return False
# Additional check: Should contain only letters, numbers, and hyphens
import re
if not re.match(r'^[A-Z0-9\-]+$', tag):
return False
return True
@classmethod
def parse_from_sgml_text(cls, header_text: str, preprocess=False):
"""
Parse the SEC-HEADER text at the top of the submission text
"""
data: Dict[str, Any] = {}
current_header = None
current_subheader = None
# Preprocess the text to handle a different format from the 1990's
if preprocess:
header_text = preprocess_old_headers(header_text)
# In case there are double newlines, replace them with a single newline
header_text = header_text.replace('\n\n', '\n')
# Read the lines in the content. This starts with <ACCEPTANCE-DATETIME>20230606213204
lines = header_text.split('\n')
for index, line in enumerate(header_text.split('\n')):
if not line:
continue
# Keep track of the nesting level
nesting_level = len(line) - len(line.lstrip('\t'))
# Nested increases
nesting_will_increase = index < len(lines) - 1 and nesting_level < len(lines[index + 1]) - len(
lines[index + 1].lstrip('\t'))
# The line ends with a ':' meaning nested content follows e.g. "REPORTING-OWNER:"
line_ends_with_colon = line.rstrip('\t').endswith(':')
is_header = (nesting_level == 0 and line_ends_with_colon) or nesting_will_increase
if is_header:
# Nested line means a subheader e.g. "OWNER DATA:"
if line.startswith('\t'):
current_subheader = line.strip().split(':')[0]
if current_subheader == "FORMER COMPANY": # Special case. This is a list of companies
if current_subheader not in data[current_header][-1]:
data[current_header][-1][current_subheader] = []
data[current_header][-1][current_subheader].append({})
else:
data[current_header][-1][current_subheader] = {} # Expect only one record per key
# Top level header
else:
current_header = line.strip().split(':')[0]
if current_header not in data:
data[current_header] = []
if isinstance(data[current_header], list):
data[current_header].append({})
else:
if line.strip().startswith("<"):
# Only process valid SGML header tags, skip HTML/XBRL content
if not cls._is_valid_sgml_tag(line):
continue
# The line looks like this <KEY>VALUE
# Handle lines with multiple '>' characters (e.g., XBRL inline content)
split_parts = line.split('>', 1) # Split only on first '>' character
if len(split_parts) >= 2:
key, value = split_parts[0], split_parts[1]
# Strip the leading '<' from the key
key = key[1:]
else:
# Skip malformed lines that don't have a '>' character
continue
# If the key already exists, we should convert it to a list
if key in data:
if isinstance(data[key], list):
data[key].append(value)
else:
data[key] = [data[key], value]
else:
data[key] = value
elif ':' in line:
parts = line.strip().split(':')
if len(parts) == 2:
key, value = line.strip().split(':')
else:
key, value = parts[0], ":".join(parts[1:])
value = value.strip()
if not current_header:
# If the key already exists, we should convert it to a list
if key in data:
if isinstance(data[key], list):
data[key].append(value)
else:
data[key] = [data[key], value]
else:
data[key] = value
elif not current_subheader:
continue
else:
if current_subheader == "FORMER COMPANY":
subheader_obj = data[current_header][-1][current_subheader][-1]
subheader_obj[key.strip()] = value
else:
try:
data[current_header][-1][current_subheader][key.strip()] = value
except KeyError:
# Some filings from the 2000's have an issue with malformed headers
log.warning("Subheader '%s' not found in header '%s'", current_subheader, current_header)
# The filer
filers = []
for filer_values in data.get('FILER', data.get('FILED BY', {})):
filer_company_values = filer_values.get('COMPANY DATA')
company_obj = None
if filer_company_values:
company_obj = CompanyInformation(
name=filer_company_values.get('COMPANY CONFORMED NAME'),
cik=filer_company_values.get('CENTRAL INDEX KEY'),
sic=filer_company_values.get('STANDARD INDUSTRIAL CLASSIFICATION'),
irs_number=filer_company_values.get('IRS NUMBER'),
state_of_incorporation=filer_company_values.get('STATE OF INCORPORATION'),
fiscal_year_end=filer_company_values.get('FISCAL YEAR END')
)
# Filing Values
filing_values_text_section = filer_values.get('FILING VALUES')
filing_values_obj = None
if filing_values_text_section:
filing_values_obj = FilingInformation(
form=filing_values_text_section.get('FORM TYPE'),
sec_act=filing_values_text_section.get('SEC ACT'),
file_number=filing_values_text_section.get('SEC FILE NUMBER'),
film_number=filing_values_text_section.get('FILM NUMBER')
)
# Now create the filer
filer = Filer(
company_information=company_obj,
filing_information=filing_values_obj,
business_address=Address(
street1=filer_values['BUSINESS ADDRESS'].get('STREET 1'),
street2=filer_values['BUSINESS ADDRESS'].get('STREET 2'),
city=filer_values['BUSINESS ADDRESS'].get('CITY'),
state_or_country=filer_values['BUSINESS ADDRESS'].get('STATE'),
zipcode=filer_values['BUSINESS ADDRESS'].get('ZIP'),
) if 'BUSINESS ADDRESS' in filer_values else None,
mailing_address=Address(
street1=filer_values['MAIL ADDRESS'].get('STREET 1'),
street2=filer_values['MAIL ADDRESS'].get('STREET 2'),
city=filer_values['MAIL ADDRESS'].get('CITY'),
state_or_country=filer_values['MAIL ADDRESS'].get('STATE'),
zipcode=filer_values['MAIL ADDRESS'].get('ZIP'),
) if 'MAIL ADDRESS' in filer_values else None,
former_company_names=[FormerCompany(date_of_change=record.get('DATE OF NAME CHANGE'),
name=record.get('FORMER CONFORMED NAME'))
for record in filer_values['FORMER COMPANY']
]
if 'FORMER COMPANY' in filer_values else None
)
filers.append(filer)
# Reporting Owner
reporting_owners = []
for reporting_owner_values in data.get('REPORTING-OWNER', []):
reporting_owner = None
if reporting_owner_values:
owner, name, cik = None, None, None
if "OWNER DATA" in reporting_owner_values:
name = reporting_owner_values.get('OWNER DATA').get('COMPANY CONFORMED NAME')
cik = reporting_owner_values.get('OWNER DATA').get('CENTRAL INDEX KEY')
elif 'COMPANY DATA' in reporting_owner_values:
name = reporting_owner_values['COMPANY DATA'].get('COMPANY CONFORMED NAME')
cik = reporting_owner_values['COMPANY DATA'].get('CENTRAL INDEX KEY')
if cik:
from edgar.entity import Entity
entity: Entity = Entity(cik)
if entity and not entity.data.is_company:
name = reverse_name(name)
owner = Owner(name=name, cik=cik)
# Company Information
company_information = CompanyInformation(
name=reporting_owner_values.get('COMPANY DATA').get('COMPANY CONFORMED NAME'),
cik=reporting_owner_values.get('COMPANY DATA').get('CENTRAL INDEX KEY'),
sic=reporting_owner_values.get('COMPANY DATA').get('STANDARD INDUSTRIAL CLASSIFICATION'),
irs_number=reporting_owner_values.get('COMPANY DATA').get('IRS NUMBER'),
state_of_incorporation=reporting_owner_values.get('COMPANY DATA').get('STATE OF INCORPORATION'),
fiscal_year_end=reporting_owner_values.get('COMPANY DATA').get('FISCAL YEAR END')
) if "COMPANY DATA" in reporting_owner_values else None
# Filing Information
filing_information = FilingInformation(
form=reporting_owner_values.get('FILING VALUES').get('FORM TYPE'),
sec_act=reporting_owner_values.get('FILING VALUES').get('SEC ACT'),
file_number=reporting_owner_values.get('FILING VALUES').get('SEC FILE NUMBER'),
film_number=reporting_owner_values.get('FILING VALUES').get('FILM NUMBER')
) if ('FILING VALUES' in reporting_owner_values and
reporting_owner_values.get('FILING VALUES').get('SEC FILE NUMBER')) else None
# Business Address
business_address = Address(
street1=reporting_owner_values.get('BUSINESS ADDRESS').get('STREET 1'),
street2=reporting_owner_values.get('BUSINESS ADDRESS').get('STREET 2'),
city=reporting_owner_values.get('BUSINESS ADDRESS').get('CITY'),
state_or_country=reporting_owner_values.get('BUSINESS ADDRESS').get('STATE'),
zipcode=reporting_owner_values.get('BUSINESS ADDRESS').get('ZIP'),
) if 'BUSINESS ADDRESS' in reporting_owner_values else None
# Mailing Address
mailing_address = Address(
street1=reporting_owner_values.get('MAIL ADDRESS').get('STREET 1'),
street2=reporting_owner_values.get('MAIL ADDRESS').get('STREET 2'),
city=reporting_owner_values.get('MAIL ADDRESS').get('CITY'),
state_or_country=reporting_owner_values.get('MAIL ADDRESS').get('STATE'),
zipcode=reporting_owner_values.get('MAIL ADDRESS').get('ZIP'),
) if 'MAIL ADDRESS' in reporting_owner_values else None
# Now create the reporting owner
reporting_owner = ReportingOwner(
owner=owner,
company_information=company_information,
filing_information=filing_information,
business_address=business_address,
mailing_address=mailing_address
)
reporting_owners.append(reporting_owner)
# Issuer
issuer_data = data.get('ISSUER')
# This will be a list but we only expect one record
issuer_record = issuer_data[0] if issuer_data else None
issuer = Issuer(
company_information=CompanyInformation(
name=issuer_record.get('COMPANY DATA').get('COMPANY CONFORMED NAME'),
cik=issuer_record.get('COMPANY DATA').get('CENTRAL INDEX KEY'),
sic=issuer_record.get('COMPANY DATA').get('STANDARD INDUSTRIAL CLASSIFICATION'),
irs_number=issuer_record.get('COMPANY DATA').get('IRS NUMBER'),
state_of_incorporation=issuer_record.get('COMPANY DATA').get('STATE OF INCORPORATION'),
fiscal_year_end=issuer_record.get('COMPANY DATA').get('FISCAL YEAR END')
) if 'COMPANY DATA' in issuer_record else None,
business_address=Address(
street1=issuer_record.get('BUSINESS ADDRESS').get('STREET 1'),
street2=issuer_record.get('BUSINESS ADDRESS').get('STREET 2'),
city=issuer_record.get('BUSINESS ADDRESS').get('CITY'),
state_or_country=issuer_record.get('BUSINESS ADDRESS').get('STATE'),
zipcode=issuer_record.get('BUSINESS ADDRESS').get('ZIP'),
) if 'BUSINESS ADDRESS' in issuer_record else None,
mailing_address=Address(
street1=issuer_record.get('MAIL ADDRESS').get('STREET 1'),
street2=issuer_record.get('MAIL ADDRESS').get('STREET 2'),
city=issuer_record.get('MAIL ADDRESS').get('CITY'),
state_or_country=issuer_record.get('MAIL ADDRESS').get('STATE'),
zipcode=issuer_record.get('MAIL ADDRESS').get('ZIP'),
) if 'MAIL ADDRESS' in issuer_record else None
) if issuer_record else None
subject_companies = []
for subject_company_values in data.get('SUBJECT COMPANY', []):
subject_company = SubjectCompany(
company_information=CompanyInformation(
name=subject_company_values.get('COMPANY DATA').get('COMPANY CONFORMED NAME'),
cik=subject_company_values.get('COMPANY DATA').get('CENTRAL INDEX KEY'),
sic=subject_company_values.get('COMPANY DATA').get('STANDARD INDUSTRIAL CLASSIFICATION'),
irs_number=subject_company_values.get('COMPANY DATA').get('IRS NUMBER'),
state_of_incorporation=subject_company_values.get('COMPANY DATA').get('STATE OF INCORPORATION'),
fiscal_year_end=subject_company_values.get('COMPANY DATA').get('FISCAL YEAR END')
) if 'COMPANY DATA' in subject_company_values else None,
filing_information=FilingInformation(
form=subject_company_values.get('FILING VALUES').get('FORM TYPE'),
sec_act=subject_company_values.get('FILING VALUES').get('SEC ACT'),
file_number=subject_company_values.get('FILING VALUES').get('SEC FILE NUMBER'),
film_number=subject_company_values.get('FILING VALUES').get('FILM NUMBER')
) if 'FILING VALUES' in subject_company_values else None,
business_address=Address(
street1=subject_company_values.get('BUSINESS ADDRESS').get('STREET 1'),
street2=subject_company_values.get('BUSINESS ADDRESS').get('STREET 2'),
city=subject_company_values.get('BUSINESS ADDRESS').get('CITY'),
state_or_country=subject_company_values.get('BUSINESS ADDRESS').get('STATE'),
zipcode=subject_company_values.get('BUSINESS ADDRESS').get('ZIP'),
) if 'BUSINESS ADDRESS' in subject_company_values else None,
mailing_address=Address(
street1=subject_company_values.get('MAIL ADDRESS').get('STREET 1'),
street2=subject_company_values.get('MAIL ADDRESS').get('STREET 2'),
city=subject_company_values.get('MAIL ADDRESS').get('CITY'),
state_or_country=subject_company_values.get('MAIL ADDRESS').get('STATE'),
zipcode=subject_company_values.get('MAIL ADDRESS').get('ZIP'),
) if 'MAIL ADDRESS' in subject_company_values else None,
former_company_names=[FormerCompany(date_of_change=record.get('DATE OF NAME CHANGE'),
name=record.get('FORMER CONFORMED NAME'))
for record in subject_company_values['FORMER COMPANY']
]
if 'FORMER COMPANY' in subject_company_values else None
)
subject_companies.append(subject_company)
# Convert all lists to strings
for key, value in data.items():
if isinstance(value, list) and all(isinstance(item, str) for item in value):
data[key] = ', '.join(value)
# Create a dict of the values in data that are not nested dicts
filing_metadata = {key: value
for key, value in data.items()
if isinstance(value, str) and value}
# The header text contains <ACCEPTANCE-DATETIME>20230612172243. Replace with the formatted date
header_text = re.sub(r'<ACCEPTANCE-DATETIME>(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})',
r'ACCEPTANCE-DATETIME: \1-\2-\3 \4:\5:\6', header_text)
# Remove empty lines from header_text
header_text = '\n'.join([line for line in header_text.split('\n') if line.strip()])
# Create the Header object
return cls(
text=header_text,
filing_metadata=filing_metadata,
filers=filers,
reporting_owners=reporting_owners,
issuer=issuer,
subject_companies=subject_companies
)
def __rich__(self):
# Filing Metadata
metadata_table = self.filing_metadata.__rich__()
# Keep a list of renderables for rich
renderables = [metadata_table]
# SUBJECT COMPANY
for subject_company in self.subject_companies:
renderables.append(subject_company.__rich__())
# FILER
for filer in self.filers:
renderables.append(filer.__rich__())
# REPORTING OWNER
for reporting_owner in self.reporting_owners:
renderables.append(reporting_owner.__rich__())
# ISSUER
if self.issuer:
renderables.append(self.issuer.__rich__())
return Panel(
Group(
*renderables
),
title=Text(describe_form(self.form), style="bold"),
subtitle=Text(f"Form {self.form}")
)
def __repr__(self):
return repr_rich(self.__rich__())