Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
"""
13F Holdings Report Parser
Parses SEC Form 13F-HR (Quarterly Holdings Report) filings from institutional investment managers.
Supports both XML format (2013+) and TXT format (2012 and earlier).
"""
from edgar.thirteenf.models import (
ThirteenF,
THIRTEENF_FORMS,
FilingManager,
OtherManager,
CoverPage,
SummaryPage,
Signature,
PrimaryDocument13F,
format_date,
)
# For backward compatibility, also export parser functions
from edgar.thirteenf.parsers import (
parse_primary_document_xml,
parse_infotable_xml,
parse_infotable_txt,
)
__all__ = [
'ThirteenF',
'THIRTEENF_FORMS',
'FilingManager',
'OtherManager',
'CoverPage',
'SummaryPage',
'Signature',
'PrimaryDocument13F',
'format_date',
'parse_primary_document_xml',
'parse_infotable_xml',
'parse_infotable_txt',
]

View File

@@ -0,0 +1,210 @@
"""Portfolio manager lookup functionality for 13F filings."""
import json
from functools import lru_cache
from pathlib import Path
__all__ = [
'lookup_portfolio_managers',
'is_filing_signer_likely_portfolio_manager',
]
def lookup_portfolio_managers(company_name: str, cik: int = None, include_approximate: bool = False) -> list[dict]:
"""
Lookup portfolio managers for a given company.
This uses a curated database of well-known fund managers loaded from an external JSON file.
The data is compiled from public sources and may not be complete or current.
Args:
company_name: Company name to search for
cik: Optional CIK for more accurate matching
include_approximate: If True, includes non-active managers
Returns:
list[dict]: List of portfolio manager information
"""
try:
db = _load_portfolio_manager_db()
# Try CIK-based search first (more accurate)
if cik:
managers = _search_manager_database_by_cik(db, cik, include_approximate)
if managers:
return managers
# Fallback to name-based search
return _search_manager_database(db, company_name, include_approximate)
except Exception as e:
# Fallback to empty list if database loading fails
import warnings
warnings.warn(f"Could not load portfolio manager database: {e}")
return []
@lru_cache(maxsize=1)
def _load_portfolio_manager_db() -> dict:
"""
Load the portfolio manager database from external JSON file.
Returns:
dict: The loaded database, or empty dict if file not found
"""
# Try to load from external JSON file
data_file = Path(__file__).parent.parent / 'reference' / 'data' / 'portfolio_managers.json'
if data_file.exists():
try:
with open(data_file, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, IOError) as e:
import warnings
warnings.warn(f"Could not parse portfolio manager database: {e}")
return {}
else:
# Fallback to basic hardcoded database for backwards compatibility
return {
"metadata": {
"version": "fallback",
"description": "Minimal fallback database",
"total_companies": 3,
"last_updated": "2024-12-01"
},
"managers": {
"berkshire_hathaway": {
"company_name": "Berkshire Hathaway Inc",
"match_patterns": ["berkshire hathaway", "brk", "berkshire"],
"managers": [
{
"name": "Warren Buffett",
"title": "Chairman & CEO",
"status": "active",
"confidence": "high",
"last_verified": "2024-12-01"
}
]
}
}
}
def _search_manager_database(db: dict, company_name: str, include_approximate: bool = False) -> list[dict]:
"""
Search the manager database for a company.
Args:
db: The loaded database dictionary
company_name: Company name to search for
include_approximate: Whether to include non-active managers
Returns:
list[dict]: List of matching managers
"""
if not db or 'managers' not in db:
return []
managers_data = db['managers']
normalized_name = company_name.lower()
# Search through all companies
for company_key, company_data in managers_data.items():
# Check match patterns
match_patterns = company_data.get('match_patterns', [company_key])
for pattern in match_patterns:
if pattern.lower() in normalized_name:
managers = company_data.get('managers', [])
if include_approximate:
return managers
else:
# Only return active managers unless requested otherwise
return [m for m in managers if m.get('status') == 'active']
# No matches found
return []
def _search_manager_database_by_cik(db: dict, cik: int, include_approximate: bool = False) -> list[dict]:
"""
Search the manager database by CIK (more accurate than name matching).
Args:
db: The loaded database dictionary
cik: The CIK to search for
include_approximate: Whether to include non-active managers
Returns:
list[dict]: List of matching managers
"""
if not db or 'managers' not in db:
return []
managers_data = db['managers']
# Search through all companies for CIK match
for _company_key, company_data in managers_data.items():
company_cik = company_data.get('cik')
if company_cik == cik:
managers = company_data.get('managers', [])
if include_approximate:
return managers
else:
# Only return active managers unless requested otherwise
return [m for m in managers if m.get('status') == 'active']
# No CIK matches found
return []
def is_filing_signer_likely_portfolio_manager(filing_signer_title: str) -> bool:
"""
Determine if the filing signer is likely to be a portfolio manager.
This uses heuristics based on the signer's title to assess whether they
might be involved in investment decisions rather than just administrative functions.
Args:
filing_signer_title: The title of the person who signed the filing
Returns:
bool: True if signer appears to be investment-focused, False if administrative
Example:
>>> is_filing_signer_likely_portfolio_manager("Chief Financial Officer")
False
>>> is_filing_signer_likely_portfolio_manager("Portfolio Manager")
True
"""
if not filing_signer_title:
return False
title = filing_signer_title.upper()
# Investment-focused titles
investment_titles = [
'PORTFOLIO MANAGER', 'FUND MANAGER', 'INVESTMENT MANAGER',
'CHIEF INVESTMENT OFFICER', 'CIO', 'MANAGING DIRECTOR',
'CHAIRMAN', 'CEO', 'PRESIDENT', 'FOUNDER'
]
# Administrative titles
admin_titles = [
'CFO', 'CCO', 'COMPLIANCE', 'SECRETARY', 'TREASURER',
'VICE PRESIDENT', 'VP', 'ASSISTANT', 'COUNSEL'
]
# Check for investment titles first
for inv_title in investment_titles:
if inv_title in title:
return True
# Check for administrative titles
for admin_title in admin_titles:
if admin_title in title:
return False
# If unclear, err on the side of caution
return False

View File

@@ -0,0 +1,484 @@
from dataclasses import dataclass
from datetime import datetime
from decimal import Decimal
from functools import lru_cache
from typing import List, Union
import pyarrow.compute as pc
from edgar._party import Address
__all__ = [
'FilingManager',
'OtherManager',
'CoverPage',
'SummaryPage',
'Signature',
'PrimaryDocument13F',
'ThirteenF',
'THIRTEENF_FORMS',
'format_date',
]
THIRTEENF_FORMS = ['13F-HR', "13F-HR/A", "13F-NT", "13F-NT/A", "13F-CTR", "13F-CTR/A"]
def format_date(date: Union[str, datetime]) -> str:
if isinstance(date, str):
return date
return date.strftime("%Y-%m-%d")
@dataclass(frozen=True)
class FilingManager:
name: str
address: Address
@dataclass(frozen=True)
class OtherManager:
cik: str
name: str
file_number: str
@dataclass(frozen=True)
class CoverPage:
report_calendar_or_quarter: str
report_type: str
filing_manager: FilingManager
other_managers: List[OtherManager]
@dataclass(frozen=True)
class SummaryPage:
other_included_managers_count: int
total_value: Decimal
total_holdings: int
@dataclass(frozen=True)
class Signature:
name: str
title: str
phone: str
signature: str
city: str
state_or_country: str
date: str
@dataclass(frozen=True)
class PrimaryDocument13F:
report_period: datetime
cover_page: CoverPage
summary_page: SummaryPage
signature: Signature
additional_information: str
class ThirteenF:
"""
A 13F-HR is a quarterly report filed by institutional investment managers that have over $100 million in qualifying
assets under management. The report is filed with the Securities & Exchange Commission (SEC) and discloses all
the firm's equity holdings that it held at the end of the quarter. The report is due within 45 days of the end
of the quarter. The 13F-HR is a public document that is available on the SEC's website.
"""
def __init__(self, filing, use_latest_period_of_report=False):
from edgar.thirteenf.parsers.primary_xml import parse_primary_document_xml
assert filing.form in THIRTEENF_FORMS, f"Form {filing.form} is not a valid 13F form"
# The filing might not be the filing for the current period. We need to use the related filing filed on the same
# date as the current filing that has the latest period of report
self._related_filings = filing.related_filings().filter(filing_date=filing.filing_date, form=filing.form)
self._actual_filing = filing # The filing passed in
if use_latest_period_of_report:
# Use the last related filing.
# It should also be the one that has the CONFORMED_PERIOD_OF_REPORT closest to filing_date
self.filing = self._related_filings[-1]
else:
# Use the exact filing that was passed in
self.filing = self._actual_filing
# Parse primary document if XML is available (2013+ filings)
# For older TXT-only filings (2012 and earlier), primary_form_information will be None
primary_xml = self.filing.xml()
self.primary_form_information = parse_primary_document_xml(primary_xml) if primary_xml else None
def has_infotable(self):
return self.filing.form in ['13F-HR', "13F-HR/A"]
@property
def form(self):
return self.filing.form
@property
@lru_cache(maxsize=1)
def infotable_xml(self):
"""Returns XML content if available (2013+ filings)"""
if self.has_infotable():
result = self._get_infotable_from_attachment()
if result and result[0] and result[1] == 'xml' and "informationTable" in result[0]:
return result[0]
return None
def _get_infotable_from_attachment(self):
"""
Use the filing homepage to get the infotable file.
Returns a tuple of (content, format) where format is 'xml' or 'txt'.
"""
if self.has_infotable():
# Try XML format first (2013+)
query = "document_type=='INFORMATION TABLE' and document.lower().endswith('.xml')"
attachments = self.filing.attachments.query(query)
if len(attachments) > 0:
return (attachments.get_by_index(0).download(), 'xml')
# Fall back to TXT format (2012 and earlier)
# The primary document itself contains the table in TXT format
# Try various description patterns first
query = "description=='FORM 13F' or description=='INFORMATION TABLE'"
attachments = self.filing.attachments.query(query)
if len(attachments) > 0:
# Filter for .txt files only
txt_attachments = [att for att in attachments if att.document.lower().endswith('.txt')]
if txt_attachments:
return (txt_attachments[0].download(), 'txt')
# Final fallback: For older filings, descriptions may be unreliable
# Look for sequence number 1 with .txt extension
try:
att = self.filing.attachments.get_by_sequence(1)
if att and att.document.lower().endswith('.txt'):
return (att.download(), 'txt')
except (KeyError, AttributeError):
pass
return (None, None)
@property
@lru_cache(maxsize=1)
def infotable_txt(self):
"""Returns TXT content if available (pre-2013 filings)"""
if self.has_infotable():
result = self._get_infotable_from_attachment()
if result and result[0] and result[1] == 'txt':
return result[0]
# Fallback: Some filings have the information table embedded in the main HTML
# instead of as a separate attachment. Try to extract it from the main HTML.
if not result or not result[0]:
html = self.filing.html()
if html and "Form 13F Information Table" in html:
return html
return None
@property
@lru_cache(maxsize=1)
def infotable_html(self):
if self.has_infotable():
query = "document_type=='INFORMATION TABLE' and document.lower().endswith('.html')"
attachments = self.filing.attachments.query(query)
return attachments[0].download()
@property
@lru_cache(maxsize=1)
def infotable(self):
"""
Returns the information table as a pandas DataFrame.
Supports both XML format (2013+) and TXT format (2012 and earlier).
"""
from edgar.thirteenf.parsers.infotable_xml import parse_infotable_xml
from edgar.thirteenf.parsers.infotable_txt import parse_infotable_txt
if self.has_infotable():
# Try XML format first
if self.infotable_xml:
return parse_infotable_xml(self.infotable_xml)
# Fall back to TXT format
elif self.infotable_txt:
return parse_infotable_txt(self.infotable_txt)
return None
@property
def accession_number(self):
return self.filing.accession_no
@property
def total_value(self):
"""Total value of holdings in thousands of dollars"""
if self.primary_form_information:
return self.primary_form_information.summary_page.total_value
# For TXT-only filings, calculate from infotable
infotable = self.infotable
if infotable is not None and len(infotable) > 0:
return Decimal(int(infotable['Value'].sum()))
return None
@property
def total_holdings(self):
"""Total number of holdings"""
if self.primary_form_information:
return self.primary_form_information.summary_page.total_holdings
# For TXT-only filings, count from infotable
infotable = self.infotable
if infotable is not None:
return len(infotable)
return None
@property
def report_period(self):
"""Report period end date"""
if self.primary_form_information:
return format_date(self.primary_form_information.report_period)
# For TXT-only filings, use CONFORMED_PERIOD_OF_REPORT from filing header
if hasattr(self.filing, 'period_of_report') and self.filing.period_of_report:
return format_date(self.filing.period_of_report)
return None
@property
def filing_date(self):
return format_date(self.filing.filing_date)
@property
def investment_manager(self):
# This is really the firm e.g. Spark Growth Management Partners II, LLC
if self.primary_form_information:
return self.primary_form_information.cover_page.filing_manager
return None
@property
def signer(self):
# This is the person who signed the filing. Could be the Reporting Manager but could be someone else
# like the CFO
if self.primary_form_information:
return self.primary_form_information.signature.name
return None
# Enhanced manager name properties for better clarity
@property
def management_company_name(self) -> str:
"""
The legal name of the investment management company that filed the 13F.
This is the institutional entity (e.g., "Berkshire Hathaway Inc", "Vanguard Group Inc")
that is legally responsible for managing the assets, not an individual person's name.
Returns:
str: The legal name of the management company, or company name from filing if not available
Example:
>>> thirteen_f.management_company_name
'Berkshire Hathaway Inc'
"""
if self.investment_manager:
return self.investment_manager.name
# For TXT-only filings, use company name from filing
return self.filing.company
@property
def filing_signer_name(self) -> str:
"""
The name of the individual who signed the 13F filing.
This is typically an administrative officer (CFO, CCO, Compliance Officer, etc.)
rather than the famous portfolio manager. For example, Berkshire Hathaway's 13F
is signed by "Marc D. Hamburg" (SVP), not Warren Buffett.
Returns:
str: The name of the person who signed the filing
Example:
>>> thirteen_f.filing_signer_name
'Marc D. Hamburg'
"""
return self.signer
@property
def filing_signer_title(self) -> str:
"""
The business title of the individual who signed the 13F filing.
Common titles include: CFO, CCO, Senior Vice President, Chief Compliance Officer,
Secretary, Treasurer, etc. This helps distinguish administrative signers from
portfolio managers.
Returns:
str: The business title of the filing signer, or None if not available
Example:
>>> thirteen_f.filing_signer_title
'Senior Vice President'
"""
if self.primary_form_information:
return self.primary_form_information.signature.title
return None
@property
def manager_name(self) -> str:
"""
DEPRECATED: Use management_company_name instead.
Returns the management company name for backwards compatibility.
This property name was misleading as it suggested an individual manager's name.
Returns:
str: The management company name
Warning:
This property is deprecated and may be removed in future versions.
Use management_company_name for the company name, or see get_portfolio_managers()
if you need information about individual portfolio managers.
"""
import warnings
warnings.warn(
"manager_name is deprecated and misleading. Use management_company_name for the "
"company name, or get_portfolio_managers() for individual manager information.",
DeprecationWarning,
stacklevel=2
)
return self.management_company_name
def get_portfolio_managers(self, include_approximate: bool = False) -> list[dict]:
"""
Get information about the actual portfolio managers for this fund.
Note: 13F filings do not contain individual portfolio manager names.
This method provides a curated mapping for well-known funds based on
public information. Results may not be current or complete.
Args:
include_approximate (bool): If True, includes approximate/historical
manager information even if not current
Returns:
list[dict]: List of portfolio manager information with keys:
'name', 'title', 'status', 'source', 'last_updated'
Example:
>>> thirteen_f.get_portfolio_managers()
[
{
'name': 'Warren Buffett',
'title': 'Chairman & CEO',
'status': 'active',
'source': 'public_records',
'last_updated': '2024-01-01'
}
]
"""
from edgar.thirteenf.manager_lookup import lookup_portfolio_managers
return lookup_portfolio_managers(
self.management_company_name,
getattr(self.filing, 'cik', None),
include_approximate=include_approximate
)
def _lookup_portfolio_managers(self, company_name: str, include_approximate: bool = False) -> list[dict]:
"""
Private method for testing - looks up portfolio managers by company name.
Args:
company_name: Name of the management company
include_approximate: Whether to include approximate/historical data
Returns:
list[dict]: List of portfolio manager information
"""
from edgar.thirteenf.manager_lookup import lookup_portfolio_managers
return lookup_portfolio_managers(company_name, cik=None, include_approximate=include_approximate)
def get_manager_info_summary(self) -> dict:
"""
Get a comprehensive summary of all available manager information.
This provides a clear breakdown of what information is available from the 13F
filing versus external sources, helping users understand the data limitations.
Returns:
dict: Summary with keys 'from_13f_filing', 'external_sources', 'limitations'
Example:
>>> thirteen_f.get_manager_info_summary()
{
'from_13f_filing': {
'management_company': 'Berkshire Hathaway Inc',
'filing_signer': 'Marc D. Hamburg',
'signer_title': 'Senior Vice President'
},
'external_sources': {
'portfolio_managers': [
{'name': 'Warren Buffett', 'title': 'Chairman & CEO', 'status': 'active'}
]
},
'limitations': [
'13F filings do not contain individual portfolio manager names',
'External manager data may not be current or complete',
'Filing signer is typically an administrative officer, not the portfolio manager'
]
}
"""
portfolio_managers = self.get_portfolio_managers()
return {
'from_13f_filing': {
'management_company': self.management_company_name,
'filing_signer': self.filing_signer_name,
'signer_title': self.filing_signer_title,
'form': self.form,
'period_of_report': str(self.report_period)
},
'external_sources': {
'portfolio_managers': portfolio_managers,
'manager_count': len(portfolio_managers)
},
'limitations': [
'13F filings do not contain individual portfolio manager names',
'External manager data may not be current or complete',
'Filing signer is typically an administrative officer, not the portfolio manager',
'Portfolio manager information is sourced from public records and may be outdated'
]
}
def is_filing_signer_likely_portfolio_manager(self) -> bool:
"""
Determine if the filing signer is likely to be a portfolio manager.
This uses heuristics based on the signer's title to assess whether they
might be involved in investment decisions rather than just administrative functions.
Returns:
bool: True if signer appears to be investment-focused, False if administrative
Example:
>>> thirteen_f.is_filing_signer_likely_portfolio_manager()
False # For administrative titles like CFO, CCO, etc.
"""
from edgar.thirteenf.manager_lookup import is_filing_signer_likely_portfolio_manager
return is_filing_signer_likely_portfolio_manager(self.filing_signer_title)
@lru_cache(maxsize=8)
def previous_holding_report(self):
if len(self.report_period) == 1:
return None
# Look in the related filings data for the row with this accession number
idx = pc.equal(self._related_filings.data['accession_number'], self.accession_number).index(True).as_py()
if idx == 0:
return None
previous_filing = self._related_filings[idx - 1]
return ThirteenF(previous_filing, use_latest_period_of_report=False)
def __rich__(self):
from edgar.thirteenf.rendering import render_rich
return render_rich(self)
def __repr__(self):
from edgar.richtools import repr_rich
return repr_rich(self.__rich__())
# For backward compatibility, expose parse methods as static methods
ThirteenF.parse_primary_document_xml = staticmethod(lambda xml: __import__('edgar.thirteenf.parsers.primary_xml', fromlist=['parse_primary_document_xml']).parse_primary_document_xml(xml))
ThirteenF.parse_infotable_xml = staticmethod(lambda xml: __import__('edgar.thirteenf.parsers.infotable_xml', fromlist=['parse_infotable_xml']).parse_infotable_xml(xml))
ThirteenF.parse_infotable_txt = staticmethod(lambda txt: __import__('edgar.thirteenf.parsers.infotable_txt', fromlist=['parse_infotable_txt']).parse_infotable_txt(txt))

View File

@@ -0,0 +1,11 @@
"""13F filing parsers for different document formats."""
from .primary_xml import parse_primary_document_xml
from .infotable_xml import parse_infotable_xml
from .infotable_txt import parse_infotable_txt
__all__ = [
'parse_primary_document_xml',
'parse_infotable_xml',
'parse_infotable_txt',
]

View File

@@ -0,0 +1,119 @@
"""TXT format information table parsers with automatic format detection.
Supports two TXT formats from 2012 filings:
- Format 1 (Multiline): Company names can span multiple lines
- Format 2 (Columnar): All data on single line with <S> and <C> tags
"""
import re
import pandas as pd
from .format_multiline import parse_multiline_format
from .format_columnar import parse_columnar_format
__all__ = ['parse_infotable_txt']
def parse_infotable_txt(infotable_txt: str) -> pd.DataFrame:
"""
Parse TXT format information table, auto-detecting format.
Supports:
- Format 1 (Multiline): Berkshire-style with multi-line company names
- Format 2 (Columnar): JANA-style with all data on single line
Args:
infotable_txt: TXT content containing the information table
Returns:
pd.DataFrame: Holdings data with same structure as XML parser
"""
if _is_columnar_format(infotable_txt):
return parse_columnar_format(infotable_txt)
else:
return parse_multiline_format(infotable_txt)
def _is_columnar_format(infotable_txt: str) -> bool:
"""
Detect if this is columnar format by looking for <S> tags in data rows.
Columnar format has <S> at the start of each data row, followed by data.
Multiline format only has <S> and <C> in the header row.
Args:
infotable_txt: TXT content to analyze
Returns:
bool: True if columnar format, False if multiline format
"""
# Find the Form 13F Information Table section (case-insensitive)
match = re.search(r'FORM\s+13F\s+INFORMATION\s+TABLE', infotable_txt, re.IGNORECASE)
if not match:
return False
# Extract tables (case-insensitive)
# Note: Search from beginning since <TABLE> tag may come before the header text
table_pattern = r'<TABLE>(.*?)</TABLE>'
tables = re.findall(table_pattern, infotable_txt, re.DOTALL | re.IGNORECASE)
if len(tables) == 0:
return False
# Determine which table to check
# If 2+ tables: check second table (first holdings table, after managers table)
# If 1 table: check that single table
if len(tables) >= 2:
holdings_table = tables[1]
else:
holdings_table = tables[0]
lines = holdings_table.split('\n')
# Count data rows with <S> tags that also have CUSIPs
# In columnar format, data rows start with <S> and have CUSIP on same line
# In multiline format, only header has <S>, and CUSIP is on second line of company
data_rows_with_s_and_cusip = 0
data_rows_checked = 0
for line in lines:
line = line.strip()
line_upper = line.upper()
# Skip empty lines, CAPTION, and header rows (case-insensitive)
if not line or '<CAPTION>' in line_upper:
continue
# Skip if this looks like a header (has <S> but no digits)
if '<S>' in line_upper and not re.search(r'\d', line):
continue
# Check if this line has both <S> tag and a CUSIP (9 chars with digit, with or without spaces)
cusip_match = re.search(r'\b([A-Za-z0-9]{9})\b', line)
has_valid_cusip = cusip_match and any(c.isdigit() for c in cusip_match.group(1))
# Also check for spaced CUSIPs
if not has_valid_cusip:
spaced_matches = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
for match in spaced_matches:
cleaned = match.group(1).replace(' ', '')
if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
has_valid_cusip = True
break
if '<S>' in line_upper and has_valid_cusip:
data_rows_with_s_and_cusip += 1
data_rows_checked += 1
elif has_valid_cusip:
# Has CUSIP but no <S> - multiline format
data_rows_checked += 1
# If we've checked 3 data rows, that's enough to decide
if data_rows_checked >= 3:
break
# If most data rows with CUSIPs also have <S> tags, it's columnar format
if data_rows_checked > 0 and data_rows_with_s_and_cusip >= data_rows_checked * 0.5:
return True
return False

View File

@@ -0,0 +1,286 @@
"""Parser for columnar TXT format (Format 2) used in some 2012 filings.
This format has <S> and <C> tags for each field, with all data on a single line.
Example:
<S> <C> <C> <C> <C>
AETNA INC NEW COM 00817Y108 92,760 2,342,435 SH SOLE 2,238,895 103,540 0
"""
import re
import pandas as pd
from edgar.reference import cusip_ticker_mapping
__all__ = ['parse_columnar_format']
def parse_columnar_format(infotable_txt: str) -> pd.DataFrame:
"""
Parse columnar TXT format (Format 2) information table.
This parser handles the format where all data is on a single line with
<S> and <C> tags marking column boundaries.
Args:
infotable_txt: TXT content containing the information table
Returns:
pd.DataFrame: Holdings data with same structure as XML parser
"""
# Find the Form 13F Information Table section (case-insensitive)
match = re.search(r'FORM\s+13F\s+INFORMATION\s+TABLE', infotable_txt, re.IGNORECASE)
if not match:
return pd.DataFrame()
# Extract all table content between <TABLE> and </TABLE> tags (case-insensitive)
# Note: Search from beginning since <TABLE> tag may come before the header text
table_pattern = r'<TABLE>(.*?)</TABLE>'
tables = re.findall(table_pattern, infotable_txt, re.DOTALL | re.IGNORECASE)
if len(tables) == 0:
return pd.DataFrame()
# Determine which tables to process:
# - If 2+ tables: Skip first table (usually managers list), process rest
# - If 1 table: Check if it has holdings data (CUSIPs with <S> tags), if so process it
if len(tables) >= 2:
holdings_tables = tables[1:] # Skip first table (managers)
elif len(tables) == 1:
# Check if the single table has holdings data (contains CUSIPs with <S> tags)
# Look for lines that have both <S> tag and valid CUSIP (with or without spaces)
potential_lines = [line for line in tables[0].split('\n') if '<S>' in line.upper()]
has_data = False
for line in potential_lines[:10]: # Check first 10 <S> lines
# Try non-spaced CUSIPs first
cusip_match = re.search(r'\b([A-Za-z0-9]{9})\b', line)
if cusip_match and any(c.isdigit() for c in cusip_match.group(1)):
has_data = True
break
# Try spaced CUSIPs
spaced_matches = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
for match in spaced_matches:
cleaned = match.group(1).replace(' ', '')
if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
has_data = True
break
if has_data:
break
if has_data:
holdings_tables = tables # Process the single table
else:
return pd.DataFrame() # No holdings data
else:
return pd.DataFrame()
parsed_rows = []
for holdings_table in holdings_tables:
# Skip if this is the totals table (very short, < 200 chars)
if len(holdings_table.strip()) < 200:
continue
lines = holdings_table.split('\n')
for line in lines:
line = line.strip()
# Skip empty lines, CAPTION lines, header rows (case-insensitive)
line_upper = line.upper()
if not line or '<CAPTION>' in line_upper:
continue
# Skip header rows with just tags (case-insensitive)
# Header rows have <S> but no valid CUSIPs (9 chars with at least one digit, with or without spaces)
if line_upper.startswith('<S>'):
# Check for normal 9-char CUSIP
has_cusip = False
cusip_check = re.search(r'\b([A-Za-z0-9]{9})\b', line)
if cusip_check and any(c.isdigit() for c in cusip_check.group(1)):
has_cusip = True
# If not found, check for spaced CUSIP
if not has_cusip:
spaced_check = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
for match in spaced_check:
cleaned = match.group(1).replace(' ', '')
if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
has_cusip = True
break
if not has_cusip:
continue
if line.startswith(('Total', 'Title', 'NAME OF ISSUER', 'of', 'Market Value')):
continue
# Look for data rows with <S> tag and a CUSIP (case-insensitive)
if '<S>' not in line_upper:
continue
# CUSIP is a reliable anchor - it's always 9 alphanumeric characters (case-insensitive)
# Must contain at least one digit to avoid matching company names or words like "SPONSORED"
# Some filings have spaces in CUSIPs: "00724F 10 1" should be "00724F101"
# Find ALL potential CUSIP sequences (with or without spaces), then pick the first valid one
# First try without spaces (faster path)
cusip_match = None
cusip = None
all_cusip_matches = re.finditer(r'\b([A-Za-z0-9]{9})\b', line)
for match in all_cusip_matches:
if any(c.isdigit() for c in match.group(1)):
cusip_match = match
cusip = match.group(1)
break
# If not found, try matching with spaces and cleaning
if not cusip_match:
# Match sequences of 9-15 chars that might contain spaces
spaced_matches = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
for match in spaced_matches:
cleaned = match.group(1).replace(' ', '')
# Check if cleaned version is exactly 9 chars and has a digit
if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
cusip_match = match
cusip = cleaned # Use cleaned version
break
if not cusip_match:
continue
# Remove SGML tags and split by whitespace
# Replace <S> and <C> with spaces to help with splitting
cleaned_line = line.replace('<S>', ' ').replace('<C>', ' ')
parts = cleaned_line.split()
# Filter out empty parts
parts = [p for p in parts if p.strip()]
if len(parts) < 10: # Need at least issuer, class, cusip, value, shares, type, discretion, sole, shared, none
continue
try:
# Find CUSIP position in parts
# cusip already set above (either from direct match or cleaned from spaced match)
# Try to find it in parts - it might be spaced or not spaced
cusip_idx = None
cusip_span = 1 # How many elements the CUSIP occupies in parts
# First try to find cleaned CUSIP as a single element
if cusip in parts:
cusip_idx = parts.index(cusip)
else:
# Try to find the original spaced version as a single element
original_cusip = cusip_match.group(1)
if original_cusip in parts:
cusip_idx = parts.index(original_cusip)
else:
# For spaced CUSIPs split across multiple parts (e.g., "00724F 10 1" -> ["00724F", "10", "1"])
# Look for a sequence of parts that, when joined, matches the cleaned CUSIP
for i in range(len(parts) - 2): # Need at least 3 parts for a split CUSIP
# Try joining 2-4 consecutive parts
for span in range(2, 5):
if i + span > len(parts):
break
joined = ''.join(parts[i:i+span])
if joined == cusip:
cusip_idx = i
cusip_span = span
break
if cusip_idx is not None:
break
if cusip_idx is None:
continue
# Before CUSIP: Issuer name and class
# Everything before CUSIP minus the last word (which is the class)
before_cusip = parts[:cusip_idx]
if len(before_cusip) < 2:
continue
# Last part before CUSIP is the class, rest is issuer name
title_class = before_cusip[-1]
issuer_name = ' '.join(before_cusip[:-1])
# After CUSIP: value, shares, type (SH/PRN), discretion, sole, shared, none
# Skip cusip_span elements for spaced CUSIPs (e.g., ["00724F", "10", "1"])
after_cusip = parts[cusip_idx + cusip_span:]
if len(after_cusip) < 7:
continue
# Parse fields after CUSIP
# Expected order: VALUE SHARES TYPE DISCRETION ... SOLE SHARED NONE
value_str = after_cusip[0].replace(',', '').replace('$', '')
shares_str = after_cusip[1].replace(',', '')
value = int(value_str) if value_str and value_str != '-' else 0
shares = int(shares_str) if shares_str and shares_str != '-' else 0
# Type (SH/PRN) is typically at index 2
share_type = after_cusip[2] if len(after_cusip) > 2 else 'SH'
if share_type == 'SH':
share_type_full = 'Shares'
elif share_type == 'PRN':
share_type_full = 'Principal'
else:
share_type_full = 'Shares'
# Find investment discretion (typically "SOLE", "SHARED", "DEFINED", or compound like "SHARED-DEFINED")
# It's the first non-numeric field after type
discretion_idx = 3
investment_discretion = ''
for i in range(3, len(after_cusip) - 3): # Last 3 are voting columns
part = after_cusip[i]
if part and part not in ['-'] and not part.replace(',', '').isdigit():
investment_discretion = part
discretion_idx = i
break
# Voting columns are the last 3 fields
if len(after_cusip) >= 3:
none_voting_str = after_cusip[-1].replace(',', '')
shared_voting_str = after_cusip[-2].replace(',', '')
sole_voting_str = after_cusip[-3].replace(',', '')
non_voting = int(none_voting_str) if none_voting_str and none_voting_str != '-' else 0
shared_voting = int(shared_voting_str) if shared_voting_str and shared_voting_str != '-' else 0
sole_voting = int(sole_voting_str) if sole_voting_str and sole_voting_str != '-' else 0
else:
sole_voting = 0
shared_voting = 0
non_voting = 0
# Create row dict
row_dict = {
'Issuer': issuer_name,
'Class': title_class,
'Cusip': cusip,
'Value': value,
'SharesPrnAmount': shares,
'Type': share_type_full,
'PutCall': '',
'InvestmentDiscretion': investment_discretion,
'SoleVoting': sole_voting,
'SharedVoting': shared_voting,
'NonVoting': non_voting
}
parsed_rows.append(row_dict)
except (ValueError, IndexError) as e:
# Skip rows that don't parse correctly
continue
# Create DataFrame
if not parsed_rows:
return pd.DataFrame()
table = pd.DataFrame(parsed_rows)
# Add ticker symbols using CUSIP mapping
cusip_mapping = cusip_ticker_mapping(allow_duplicate_cusips=False)
table['Ticker'] = table.Cusip.map(cusip_mapping.Ticker)
return table

View File

@@ -0,0 +1,273 @@
"""Parser for multiline TXT format (Format 1) used in some 2012 filings.
This format has company names that can span multiple lines, with the CUSIP
appearing on the same line as the continuation of the company name.
Example:
AMERICAN
EXPRESS CO COM 025816109 110999 1952142 Shared-Defined...
"""
import re
import pandas as pd
from edgar.reference import cusip_ticker_mapping
__all__ = ['parse_multiline_format']
def parse_multiline_format(infotable_txt: str) -> pd.DataFrame:
"""
Parse multiline TXT format (Format 1) information table.
This parser handles the format where company names can span multiple lines,
with the CUSIP appearing on the line that contains the continuation.
Args:
infotable_txt: TXT content containing the information table
Returns:
pd.DataFrame: Holdings data with same structure as XML parser
"""
# Find the Form 13F Information Table section (case-insensitive)
match = re.search(r'FORM\s+13F\s+INFORMATION\s+TABLE', infotable_txt, re.IGNORECASE)
if not match:
return pd.DataFrame()
# Extract all table content between <TABLE> and </TABLE> tags (case-insensitive)
# Note: Search from beginning since <TABLE> tag may come before the header text
table_pattern = r'<TABLE>(.*?)</TABLE>'
tables = re.findall(table_pattern, infotable_txt, re.DOTALL | re.IGNORECASE)
if len(tables) == 0:
return pd.DataFrame()
# Determine which tables to process:
# - If 2+ tables: Skip first table (usually managers list), process rest
# - If 1 table: Check if it has holdings data (CUSIPs), if so process it
if len(tables) >= 2:
holdings_tables = tables[1:] # Skip first table (managers)
elif len(tables) == 1:
# Check if the single table has holdings data (contains CUSIPs with digits)
# Look for 9-char alphanumeric sequences (with or without spaces) that contain at least one digit
potential_cusips = re.findall(r'\b([A-Za-z0-9]{9})\b', tables[0])
# Also check for spaced CUSIPs
spaced_cusips = re.findall(r'\b([A-Za-z0-9 ]{9,15})\b', tables[0])
spaced_cusips_cleaned = [c.replace(' ', '') for c in spaced_cusips if len(c.replace(' ', '')) == 9]
has_valid_cusips = (
any(any(c.isdigit() for c in cusip) for cusip in potential_cusips) or
any(any(c.isdigit() for c in cusip) for cusip in spaced_cusips_cleaned)
)
if has_valid_cusips:
holdings_tables = tables # Process the single table
else:
return pd.DataFrame() # No holdings data
else:
return pd.DataFrame()
parsed_rows = []
for holdings_table in holdings_tables:
# Skip if this is the totals table (very short, < 200 chars)
if len(holdings_table.strip()) < 200:
continue
# Reset pending issuer parts for each table
pending_issuer_parts = []
lines = holdings_table.split('\n')
for line in lines:
orig_line = line
line = line.strip()
# Skip empty lines, CAPTION lines, header rows (case-insensitive)
line_upper = line.upper()
if not line or '<CAPTION>' in line_upper or '<S>' in line_upper or '<C>' in line_upper:
continue
# Skip separator lines (made of dashes and spaces)
if all(c in '- ' for c in line):
continue
# Skip header/title rows
line_upper = line.upper()
if line.startswith(('Total', 'Title', 'Name of Issuer', 'of', 'Market Value')):
continue
# Skip column header rows (contain keywords like COLUMN, VOTING AUTHORITY, SHRS OR PRN, etc.)
if any(keyword in line_upper for keyword in ['COLUMN 1', 'COLUMN 2', 'VOTING AUTHORITY', 'SHRS OR', 'NAME OF ISSUER', 'FORM 13F', 'INFORMATION TABLE']):
continue
# Try to parse as a data row
# CUSIP is a reliable anchor - it's always 9 alphanumeric characters (case-insensitive)
# Must contain at least one digit to avoid matching company names like "Berkshire" or "SPONSORED"
# Some filings have spaces in CUSIPs: "00724F 10 1" should be "00724F101"
# Find ALL potential CUSIP sequences (with or without spaces), then pick the first valid one
# First try without spaces (faster path)
cusip_match = None
cusip = None
all_cusip_matches = re.finditer(r'\b([A-Za-z0-9]{9})\b', line)
for match in all_cusip_matches:
if any(c.isdigit() for c in match.group(1)):
cusip_match = match
cusip = match.group(1)
break
# If not found, try matching with spaces and cleaning
if not cusip_match:
# Match sequences of 9-15 chars that might contain spaces
spaced_matches = re.finditer(r'\b([A-Za-z0-9 ]{9,15})\b', line)
for match in spaced_matches:
cleaned = match.group(1).replace(' ', '')
# Check if cleaned version is exactly 9 chars and has a digit
if len(cleaned) == 9 and any(c.isdigit() for c in cleaned):
cusip_match = match
cusip = cleaned # Use cleaned version
break
if cusip_match:
# This line contains a CUSIP, so it has the main data
# cusip already set above (either from direct match or cleaned from spaced match)
cusip_pos = cusip_match.start()
# Everything before CUSIP is issuer name + class
before_cusip = line[:cusip_pos].strip()
# Everything after CUSIP is the numeric data
# Use match.end() to handle spaced CUSIPs correctly (e.g., "00724F 10 1")
after_cusip = line[cusip_match.end():].strip()
# Split before_cusip into issuer parts
# Combine with any pending issuer parts from previous line
before_parts = before_cusip.split()
# If we have pending parts, this completes a multi-line company name
if pending_issuer_parts:
before_parts = pending_issuer_parts + before_parts
pending_issuer_parts = []
if len(before_parts) < 2:
# Not enough data, skip
continue
# Extract class and issuer name
# Common patterns:
# - "COMPANY NAME COM" → class="COM", issuer="COMPANY NAME"
# - "COMPANY NAME SPONSORED ADR" → class="SPONSORED ADR", issuer="COMPANY NAME"
# - "COMPANY NAME CL A" → class="CL A", issuer="COMPANY NAME"
if len(before_parts) >= 3 and before_parts[-2] == 'SPONSORED' and before_parts[-1] == 'ADR':
title_class = 'SPONSORED ADR'
issuer_parts = before_parts[:-2]
elif len(before_parts) >= 3 and before_parts[-2] == 'CL':
title_class = 'CL ' + before_parts[-1]
issuer_parts = before_parts[:-2]
elif len(before_parts) >= 5 and ' '.join(before_parts[-4:]).startswith('LIB CAP COM'):
# "LIBERTY MEDIA CORPORATION LIB CAP COM A"
title_class = ' '.join(before_parts[-4:])
issuer_parts = before_parts[:-4]
elif len(before_parts) >= 2:
# Default: last word/token is the class
title_class = before_parts[-1]
issuer_parts = before_parts[:-1]
else:
# Only one part - skip this row
continue
issuer_name = ' '.join(issuer_parts)
# Skip if issuer name is empty
if not issuer_name:
continue
# Parse the numeric data after CUSIP
# Flexible format handling since empty columns may not appear
# Expected order: VALUE SHARES [TYPE] [DISCRETION] [MANAGERS] [SOLE] [SHARED] [NONE]
data_parts = after_cusip.split()
if len(data_parts) < 2: # At minimum need value and shares
continue
try:
# Value and Shares are always the first two fields
value_str = data_parts[0].replace(',', '').replace('$', '')
shares_str = data_parts[1].replace(',', '')
value = int(value_str) if value_str and value_str != '-' else 0
shares = float(shares_str) if shares_str and shares_str != '-' else 0
# Parse voting columns from the end (look for numeric values)
# Work backwards from end to find up to 3 numeric voting columns
voting_values = []
for i in range(len(data_parts) - 1, 1, -1): # Start from end, skip first 2 (value/shares)
part = data_parts[i].replace(',', '').replace('.', '')
if part.replace('-', '').isdigit():
# This is a numeric value (could be voting)
val_str = data_parts[i].replace(',', '')
try:
voting_values.insert(0, float(val_str) if val_str != '-' else 0)
if len(voting_values) == 3:
break
except ValueError:
break
else:
# Non-numeric, stop looking for voting columns
break
# Assign voting values (may have 0-3 values)
sole_voting = int(voting_values[0]) if len(voting_values) >= 1 else 0
shared_voting = int(voting_values[1]) if len(voting_values) >= 2 else 0
non_voting = int(voting_values[2]) if len(voting_values) >= 3 else 0
# Find investment discretion by looking for non-numeric field after position 2
# It's typically "Shared-Defined", "SOLE", "Defined", etc.
# Skip position 2 which might be TYPE (SH/PRN)
investment_discretion = ''
num_voting_at_end = len(voting_values)
for i in range(2, len(data_parts) - num_voting_at_end):
part = data_parts[i]
# Investment discretion contains letters and is not a known type marker
if part and part not in ['-', 'SH', 'PRN'] and not part.replace(',', '').replace('.', '').isdigit():
investment_discretion = part
break
# Create row dict
row_dict = {
'Issuer': issuer_name,
'Class': title_class,
'Cusip': cusip,
'Value': value,
'SharesPrnAmount': shares,
'Type': 'Shares',
'PutCall': '',
'InvestmentDiscretion': investment_discretion,
'SoleVoting': sole_voting,
'SharedVoting': shared_voting,
'NonVoting': non_voting
}
parsed_rows.append(row_dict)
except (ValueError, IndexError):
# Skip rows that don't parse correctly
continue
else:
# No CUSIP on this line - might be first part of a multi-line company name
# Store it for the next line
if line and not line.startswith(('Total', 'Title')):
pending_issuer_parts = line.split()
# Create DataFrame
if not parsed_rows:
return pd.DataFrame()
table = pd.DataFrame(parsed_rows)
# Add ticker symbols using CUSIP mapping
cusip_mapping = cusip_ticker_mapping(allow_duplicate_cusips=False)
table['Ticker'] = table.Cusip.map(cusip_mapping.Ticker)
return table

View File

@@ -0,0 +1,56 @@
"""Parser for 13F information table XML format."""
import pandas as pd
from edgar.reference import cusip_ticker_mapping
from edgar.xmltools import child_text, find_element
__all__ = ['parse_infotable_xml']
def parse_infotable_xml(infotable_xml: str) -> pd.DataFrame:
"""
Parse the infotable xml and return a pandas DataFrame
Args:
infotable_xml: XML content of the information table
Returns:
pd.DataFrame: Holdings data with columns matching the XML structure
"""
root = find_element(infotable_xml, "informationTable")
rows = []
shares_or_principal = {"SH": "Shares", "PRN": "Principal"}
for info_tag in root.find_all("infoTable"):
info_table = dict()
info_table['Issuer'] = child_text(info_tag, "nameOfIssuer")
info_table['Class'] = child_text(info_tag, "titleOfClass")
info_table['Cusip'] = child_text(info_tag, "cusip")
info_table['Value'] = int(child_text(info_tag, "value"))
# Shares or principal
shares_tag = info_tag.find("shrsOrPrnAmt")
info_table['SharesPrnAmount'] = child_text(shares_tag, "sshPrnamt")
# Shares or principal
ssh_prnamt_type = child_text(shares_tag, "sshPrnamtType")
info_table['Type'] = shares_or_principal.get(ssh_prnamt_type)
info_table["PutCall"] = child_text(info_tag, "putCall") or ""
info_table['InvestmentDiscretion'] = child_text(info_tag, "investmentDiscretion")
# Voting authority
voting_auth_tag = info_tag.find("votingAuthority")
info_table['SoleVoting'] = int(float(child_text(voting_auth_tag, "Sole")))
info_table['SharedVoting'] = int(float(child_text(voting_auth_tag, "Shared")))
info_table['NonVoting'] = int(float(child_text(voting_auth_tag, "None")))
rows.append(info_table)
table = pd.DataFrame(rows)
# Add the ticker symbol
cusip_mapping = cusip_ticker_mapping(allow_duplicate_cusips=False)
table['Ticker'] = table.Cusip.map(cusip_mapping.Ticker)
return table

View File

@@ -0,0 +1,118 @@
"""Parser for 13F primary document XML format."""
from datetime import datetime
from decimal import Decimal
from functools import lru_cache
from edgar._party import Address
from edgar.thirteenf.models import (
FilingManager,
OtherManager,
CoverPage,
SummaryPage,
Signature,
PrimaryDocument13F
)
from edgar.xmltools import child_text, find_element
__all__ = ['parse_primary_document_xml']
@lru_cache(maxsize=8)
def parse_primary_document_xml(primary_document_xml: str):
"""
Parse the primary 13F XML document.
Args:
primary_document_xml: XML content of the primary document
Returns:
PrimaryDocument13F: Parsed primary document data
"""
root = find_element(primary_document_xml, "edgarSubmission")
# Header data
header_data = root.find("headerData")
filer_info = header_data.find("filerInfo")
report_period = datetime.strptime(child_text(filer_info, "periodOfReport"), "%m-%d-%Y")
# Form Data
form_data = root.find("formData")
cover_page_el = form_data.find("coverPage")
report_calendar_or_quarter = child_text(form_data, "reportCalendarOrQuarter")
report_type = child_text(cover_page_el, "reportType")
# Filing Manager
filing_manager_el = cover_page_el.find("filingManager")
# Address
address_el = filing_manager_el.find("address")
address = Address(
street1=child_text(address_el, "street1"),
street2=child_text(address_el, "street2"),
city=child_text(address_el, "city"),
state_or_country=child_text(address_el, "stateOrCountry"),
zipcode=child_text(address_el, "zipCode")
)
filing_manager = FilingManager(name=child_text(filing_manager_el, "name"), address=address)
# Other managers
other_manager_info_el = cover_page_el.find("otherManagersInfo")
other_managers = [
OtherManager(
cik=child_text(other_manager_el, "cik"),
name=child_text(other_manager_el, "name"),
file_number=child_text(other_manager_el, "form13FFileNumber")
)
for other_manager_el in other_manager_info_el.find_all("otherManager")
] if other_manager_info_el else []
# Summary Page
summary_page_el = form_data.find("summaryPage")
if summary_page_el:
other_included_managers_count = child_text(summary_page_el,
"otherIncludedManagersCount")
if other_included_managers_count:
other_included_managers_count = int(other_included_managers_count)
total_holdings = child_text(summary_page_el, "tableEntryTotal")
if total_holdings:
total_holdings = int(total_holdings)
total_value = child_text(summary_page_el, "tableValueTotal")
if total_value:
total_value = Decimal(total_value)
else:
other_included_managers_count = 0
total_holdings = 0
total_value = 0
# Signature Block
signature_block_el = form_data.find("signatureBlock")
signature = Signature(
name=child_text(signature_block_el, "name"),
title=child_text(signature_block_el, "title"),
phone=child_text(signature_block_el, "phone"),
city=child_text(signature_block_el, "city"),
signature=child_text(signature_block_el, "signature"),
state_or_country=child_text(signature_block_el, "stateOrCountry"),
date=child_text(signature_block_el, "signatureDate")
)
parsed_primary_doc = PrimaryDocument13F(
report_period=report_period,
cover_page=CoverPage(
filing_manager=filing_manager,
report_calendar_or_quarter=report_calendar_or_quarter,
report_type=report_type,
other_managers=other_managers
),
signature=signature,
summary_page=SummaryPage(
other_included_managers_count=other_included_managers_count or 0,
total_holdings=total_holdings or 0,
total_value=total_value or 0
),
additional_information=child_text(cover_page_el, "additionalInformation")
)
return parsed_primary_doc

View File

@@ -0,0 +1,90 @@
"""Rich rendering for 13F holdings reports."""
from rich import box
from rich.console import Group
from rich.panel import Panel
from rich.table import Column, Table
__all__ = ['render_rich', 'infotable_summary']
def infotable_summary(thirteen_f):
"""
Create a summary DataFrame of the information table for display.
Args:
thirteen_f: ThirteenF instance
Returns:
pd.DataFrame or None: Summary of holdings sorted by value
"""
if thirteen_f.has_infotable():
infotable = thirteen_f.infotable
if infotable is not None and len(infotable) > 0:
return (infotable
.filter(['Issuer', 'Class', 'Cusip', 'Ticker', 'Value', 'SharesPrnAmount', 'Type', 'PutCall',
'SoleVoting', 'SharedVoting', 'NonVoting'])
.rename(columns={'SharesPrnAmount': 'Shares'})
.assign(Value=lambda df: df.Value,
Type=lambda df: df.Type.fillna('-'),
Ticker=lambda df: df.Ticker.fillna(''))
.sort_values(['Value'], ascending=False)
)
return None
def render_rich(thirteen_f):
"""
Create Rich Panel display for a 13F filing.
Args:
thirteen_f: ThirteenF instance
Returns:
Panel: Rich Panel containing filing summary and holdings table
"""
title = f"{thirteen_f.form} Holding Report for {thirteen_f.filing.company} for period {thirteen_f.report_period}"
summary = Table(
"Report Period",
Column("Investment Manager", style="bold deep_sky_blue1"),
"Signed By",
"Holdings",
"Value",
"Accession Number",
"Filed",
box=box.SIMPLE)
summary.add_row(
thirteen_f.report_period,
thirteen_f.investment_manager.name if thirteen_f.investment_manager else thirteen_f.manager_name,
thirteen_f.signer or "-",
str(thirteen_f.total_holdings or "-"),
f"${thirteen_f.total_value:,.0f}" if thirteen_f.total_value else "-",
thirteen_f.filing.accession_no,
thirteen_f.filing_date
)
content = [summary]
# info table
infotable_summary_df = infotable_summary(thirteen_f)
if infotable_summary_df is not None:
table = Table("", "Issuer", "Class", "Cusip", "Ticker", "Value", "Type", "Shares", "Put/Call",
row_styles=["bold", ""],
box=box.SIMPLE)
for index, row in enumerate(infotable_summary_df.itertuples()):
table.add_row(str(index),
row.Issuer,
row.Class,
row.Cusip,
row.Ticker,
f"${row.Value:,.0f}",
row.Type,
f"{int(row.Shares):,.0f}",
row.PutCall
)
content.append(table)
return Panel(
Group(*content), title=title, subtitle=title
)