Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,130 @@
"""
Investment funds package for EdgarTools.
This package provides comprehensive tools for working with investment funds,
fund classes, series information, and portfolio holdings from SEC filings.
The primary classes follow the domain model design:
- FundCompany: Represents the legal entity that manages funds (e.g., "Vanguard")
- FundSeries: Represents a specific investment product/strategy (e.g., "Vanguard 500 Index Fund")
- FundClass: Represents a specific share class with its own ticker (e.g., "Vanguard 500 Index Admiral Shares")
Key functions:
- find_fund(): Smart factory that returns the appropriate entity based on any identifier
- get_fund_company(): Get a fund company by CIK
- get_fund_series(): Get a fund series by series ID
- get_fund_class(): Get a fund class by ticker or class ID
This package provides a more organized, intuitive API for working with fund entities:
- fund_entities.py: Defines the domain entities and access functions
- data.py: Provides data access functions and implementations
- reports.py: Handles fund reports like N-PORT filings
"""
# Keep backward compatibility for now
# Note: We don't import from reports and thirteenf modules directly here
# to avoid circular imports. These will be imported directly by clients.
from functools import lru_cache
from edgar.funds.core import (
Fund,
FundClass,
FundCompany,
FundSeries,
find_fund,
get_fund_class,
get_fund_company,
get_fund_series,
)
from edgar.funds.data import FundData, get_fund_information, is_fund_ticker, parse_fund_data, resolve_fund_identifier
from edgar.funds.reports import NPORT_FORMS, CurrentMetric, FundReport, get_fund_portfolio_from_filing
# Backward compatibility function for code that relies on the old API
def get_fund_with_filings(identifier: str):
"""
Get fund with filings for backward compatibility.
This function is maintained for backward compatibility with the
legacy funds.py module. New code should use:
- Fund.get_filings() to get filings for a fund
- get_fund() factory function to create fund objects
Args:
identifier: Fund identifier (class ID, series ID, or CIK)
Returns:
Fund object with filings information
"""
import logging
from edgar.funds.data import direct_get_fund_with_filings
if identifier:
try:
result = direct_get_fund_with_filings(identifier)
if result:
return result
except Exception as e:
logging.warning("Error in get_fund_with_filings: %s", e)
# Create a minimal object with the expected interface as a last resort
class MinimalFundInfo:
def __init__(self, identifier):
self.id = "C000000"
self.name = f"Unknown Fund {identifier}"
self.fund_cik = 0
return MinimalFundInfo(identifier or "Unknown")
# Define FundSeriesAndContracts for backward compatibility
class FundSeriesAndContracts:
"""
Legacy series and contracts object that provides data on fund and classes.
This class is maintained for backward compatibility with the legacy funds.py module.
It stores fund series and class information parsed from SEC filings in a DataFrame.
New code should use the Fund, FundClass, and FundSeries classes from edgar.funds.core
which provide a more robust object model.
"""
def __init__(self, data=None):
import pandas as pd
self.data = data if data is not None else pd.DataFrame()
__all__ = [
# Primary user-facing class
'Fund',
# Domain entity classes
'FundCompany',
'FundSeries',
'FundClass',
# Access functions
'find_fund',
'get_fund_company',
'get_fund_series',
'get_fund_class',
# Data classes
'FundData',
'resolve_fund_identifier',
# Functions now implemented directly in the package
'get_fund_information',
'is_fund_ticker',
'parse_fund_data',
# Portfolio and report functionality
'FundReport',
'CurrentMetric',
'NPORT_FORMS',
'get_fund_portfolio_from_filing',
# Legacy compatibility
'get_fund_with_filings',
'FundSeriesAndContracts',
]

View File

@@ -0,0 +1,582 @@
"""
Core classes for working with investment funds.
This module provides the main classes used to interact with investment funds:
- Fund: Represents an investment fund entity
- FundClass: Represents a specific share class of a fund
- FundSeries: Represents a fund series
"""
import logging
from typing import TYPE_CHECKING, List, Optional, Union, Dict, Any
from rich import box
from rich.console import Group
from rich.panel import Panel
from rich.table import Table
from edgar.entity.core import Entity
from edgar.richtools import repr_rich
if TYPE_CHECKING:
from edgar._filings import Filings
from edgar.entity.data import EntityData
log = logging.getLogger(__name__)
__all__ = ['Fund', 'FundCompany', 'FundClass', 'FundSeries', 'get_fund_company', 'get_fund_class', 'get_fund_series', 'find_fund']
class FundCompany(Entity):
"""
Represents an investment fund that files with the SEC.
Provides fund-specific functionality like share classes, series information,
portfolio holdings, etc.
"""
def __init__(self,
cik_or_identifier: Union[str, int],
fund_name:str=None,
all_series:Optional[List['FundSeries']] = None):
# Import locally to avoid circular imports
from edgar.funds.data import resolve_fund_identifier
# Handle fund-specific identifiers
super().__init__(resolve_fund_identifier(cik_or_identifier))
self._name = fund_name
self.all_series:Optional[List['FundSeries']] = all_series or []
self._cached_portfolio = None
@property
def name(self):
"""Get the name of the company."""
return self._name or super().name
def list_series(self) -> List['FundSeries']:
"""
List all fund series associated with this company.
Returns:
List of FundSeries instances
"""
return self.all_series
@property
def data(self) -> 'EntityData':
"""Get detailed data for this fund."""
base_data = super().data
# If we already have fund-specific data, return it
if hasattr(base_data, 'is_fund') and base_data.is_fund:
return base_data
# Otherwise, try to convert to fund-specific data
# This could be enhanced in the future
return base_data
def __str__(self):
return f"{self.name} [{self.cik}]"
def __rich__(self):
"""Creates a rich representation of the fund with detailed information."""
return super().__rich__()
def __repr__(self):
return repr_rich(self.__rich__())
class FundClass:
"""
Represents a specific class of an investment fund.
Fund classes typically have their own ticker symbols and fee structures,
but belong to the same underlying fund. Each class belongs to a specific
fund series.
"""
def __init__(self, class_id: str, name: Optional[str] = None,
ticker: Optional[str] = None, series: Optional['FundSeries'] = None):
self.class_id = class_id
self.name = name
self.ticker = ticker
self.series = series # The series ID this class belongs to
def __str__(self):
ticker_str = f" - {self.ticker}" if self.ticker else ""
return f"FundClass({self.name} [{self.class_id}]{ticker_str})"
def get_classes(self) -> List['FundClass']:
"""Get all share classes in the same series as this class."""
if self.series and self.series.series_id:
from edgar.funds.data import get_fund_object
full_series = get_fund_object(self.series.series_id)
if full_series and hasattr(full_series, 'get_classes'):
return full_series.get_classes()
return [self] # fallback
def __repr__(self):
return self.__str__()
def __rich__(self):
"""Creates a rich representation of the fund class."""
table = Table(
title=None,
box=box.ROUNDED,
show_header=True
)
table.add_column("Fund", style="bold")
table.add_column("Class ID", style="bold")
table.add_column("Series ID", style="bold cyan")
table.add_column("Ticker", style="bold yellow")
table.add_row(
self.name,
self.class_id,
self.series.series_id or "Unknown",
self.ticker or ""
)
return Panel(
table,
title=f"🏦 {self.name}",
subtitle="Fund Class"
)
class FundSeries:
"""Represents a fund series with multiple share classes."""
def __init__(self, series_id: str, name: str,
fund_classes:Optional[List[FundClass]]=None,
fund_company: Optional[FundCompany] = None):
self.series_id = series_id
self.name = name
self.fund_classes:List[FundClass] = fund_classes or []
self.fund_company: Optional[FundCompany] = fund_company
def get_classes(self) -> List[FundClass]:
"""
Get all share classes in this series.
Returns:
List of FundClass instances belonging to this specific series
"""
return self.fund_classes
def get_filings(self, **kwargs) -> 'Filings':
"""
Get filings for this fund series.
Args:
**kwargs: Filtering parameters passed to get_filings
Returns:
Filings object with filtered filings
"""
return self.fund_company.get_filings(**kwargs)
def __str__(self):
return f"FundSeries({self.name} [{self.series_id}])"
def __repr__(self):
return repr_rich(self.__rich__())
def __rich__(self):
"""Creates a rich representation of the fund series."""
# Classes information
classes = self.get_classes()
classes_table = Table(box=box.SIMPLE, show_header=True, padding=(0, 1))
classes_table.add_column("Class ID")
classes_table.add_column("Class Name")
classes_table.add_column("Ticker", style="bold yellow")
for class_obj in classes:
classes_table.add_row(
class_obj.class_id,
class_obj.name,
class_obj.ticker or "-"
)
classes_panel = Panel(
classes_table,
title="📊 Share Classes",
border_style="grey50"
)
content = Group(classes_panel)
return Panel(
content,
title=f"🏦 {self.name} [{self.series_id}]",
subtitle="Fund Series"
)
def find_fund(identifier: str) -> Union[FundCompany, FundSeries, FundClass]:
"""
Smart factory that finds and returns the most appropriate fund entity.
This function takes any type of fund identifier and returns the most specific
entity that matches it. For a series ID, it returns a FundSeries. For a class ID
or ticker, it returns a FundClass. For a company CIK, it returns a FundCompany.
Args:
identifier: Fund ticker (e.g., 'VFINX'), Series ID (e.g., 'S000001234'),
Class ID (e.g., 'C000012345'), or CIK number
Returns:
The most specific fund entity that matches the identifier:
- FundClass for tickers and class IDs
- FundSeries for series IDs
- FundCompany for company CIKs
"""
# Check for Series ID (S000XXXXX)
if isinstance(identifier, str) and identifier.upper().startswith('S') and identifier[1:].isdigit():
return get_fund_series(identifier)
# Check for Class ID (C000XXXXX)
if isinstance(identifier, str) and identifier.upper().startswith('C') and identifier[1:].isdigit():
return get_fund_class(identifier)
# Check for ticker symbol
if is_fund_class_ticker(identifier):
return get_fund_class(identifier)
# Default to returning a FundCompany
return get_fund_company(identifier)
# === Specialized Getter Functions ===
def get_fund_company(cik_or_identifier: Union[str, int]) -> FundCompany:
"""
Get a fund company by its CIK or identifier.
Args:
cik_or_identifier: CIK number or other identifier
Returns:
FundCompany instance
"""
return FundCompany(cik_or_identifier)
def get_fund_series(series_id: str) -> FundSeries:
"""
Get a fund series by its Series ID.
Args:
series_id: Series ID (e.g., 'S000001234')
Returns:
FundSeries instance
Raises:
ValueError: If the series cannot be found
"""
from edgar.funds.data import get_fund_object
fund_series: Optional[FundSeries] = get_fund_object(series_id)
return fund_series
def get_fund_class(class_id_or_ticker: str) -> FundClass:
"""
Get a fund class by its Class ID or ticker.
Args:
class_id_or_ticker: Class ID (e.g., 'C000012345') or ticker symbol (e.g., 'VFINX')
Returns:
FundClass instance
Raises:
ValueError: If the class cannot be found
"""
from edgar.funds.data import get_fund_object
fund_class: FundClass = get_fund_object(class_id_or_ticker)
return fund_class
# === Helper Functions ===
def is_fund_class_ticker(identifier: str) -> bool:
"""
Determine if the given identifier is a fund class ticker.
Args:
identifier: The identifier to check
Returns:
True if it's a fund class ticker, False otherwise
"""
from edgar.funds.data import is_fund_ticker
return is_fund_ticker(identifier)
class Fund:
"""
Unified wrapper for fund entities that provides a consistent interface
regardless of the identifier type (ticker, series ID, class ID, or CIK).
This class serves as a user-friendly entry point to the fund domain model.
It internally resolves the appropriate entity type and provides access to
the full hierarchy.
Examples:
```python
# Create a Fund object from any identifier
fund = Fund("VFINX") # From ticker
fund = Fund("S000002277") # From series ID
fund = Fund("0000102909") # From CIK
# Access the hierarchy
print(fund.name) # Name of the entity
print(fund.company.name) # Name of the fund company
print(fund.series.name) # Name of the fund series
print(fund.share_class.ticker) # Ticker of the share class
```
"""
def __init__(self, identifier: Union[str, int]):
"""
Initialize a Fund object from any identifier.
Args:
identifier: Any fund identifier (ticker, series ID, class ID, or CIK)
"""
self._original_identifier = str(identifier)
self._target_series_id = None # New: specific series if determinable
# Handle ticker resolution to series
if isinstance(identifier, str) and self._is_fund_ticker(identifier):
from edgar.funds.series_resolution import TickerSeriesResolver
target_series_id = TickerSeriesResolver.get_primary_series(identifier)
if target_series_id:
self._target_series_id = target_series_id
# Use existing find_fund to get the appropriate entity
self._entity = find_fund(identifier)
# Set up references to the full hierarchy
if isinstance(self._entity, FundClass):
self._class = self._entity
self._series = self._class.series
self._company = self._series.fund_company if self._series else None
elif isinstance(self._entity, FundSeries):
self._class = None
self._series = self._entity
self._company = self._series.fund_company
elif isinstance(self._entity, FundCompany):
self._class = None
self._series = None
self._company = self._entity
def _is_fund_ticker(self, identifier: str) -> bool:
"""Check if an identifier appears to be a fund ticker"""
from edgar.funds.series_resolution import TickerSeriesResolver
series_list = TickerSeriesResolver.resolve_ticker_to_series(identifier)
return len(series_list) > 0
@property
def company(self) -> Optional[FundCompany]:
"""Get the fund company (may be None if not resolved)"""
return self._company
@property
def series(self) -> Optional[FundSeries]:
"""Get the fund series (may be None if only company was identified)"""
return self._series
@property
def share_class(self) -> Optional[FundClass]:
"""Get the share class (may be None if only series or company was identified)"""
return self._class
@property
def name(self) -> str:
"""Get the name of the fund entity"""
return self._entity.name
@property
def identifier(self) -> str:
"""Get the primary identifier of the fund entity"""
if isinstance(self._entity, FundClass):
return self._entity.class_id
elif isinstance(self._entity, FundSeries):
return self._entity.series_id
elif isinstance(self._entity, FundCompany):
return str(self._entity.cik)
return ""
@property
def ticker(self) -> Optional[str]:
"""Get the ticker symbol (only available for share classes)"""
if self._class:
return self._class.ticker
return None
def get_filings(self, series_only: bool = False, **kwargs) -> 'Filings':
"""
Get filings for this fund entity.
This delegates to the appropriate entity's get_filings method.
Args:
series_only: If True and we have target series context, filter to only relevant series
**kwargs: Filtering parameters passed to get_filings
Returns:
Filings object with filtered filings
"""
# Get base filings
filings = None
if hasattr(self._entity, 'get_filings'):
filings = self._entity.get_filings(series_only=series_only, **kwargs)
elif self._series and hasattr(self._series, 'get_filings'):
filings = self._series.get_filings(series_only=series_only, **kwargs)
elif self._company and hasattr(self._company, 'get_filings'):
filings = self._company.get_filings(series_only=series_only, **kwargs)
if not filings:
from edgar._filings import Filings
return Filings([])
# Apply series filtering if requested and we have target series context
if series_only and self._target_series_id and kwargs.get('form') in ['NPORT-P', 'NPORT-EX', 'N-PORT', 'N-PORT/A']:
# For now, return the original filings as we'd need to parse each filing
# to determine series match. This could be enhanced in the future.
pass
return filings
def get_series(self) -> Optional[FundSeries]:
"""
Get the specific series for the original ticker if determinable.
Returns:
FundSeries if we can determine a specific series, None otherwise
"""
if self._target_series_id:
# Handle ETF synthetic series IDs
if self._target_series_id.startswith("ETF_"):
# Extract CIK from ETF series ID
cik = self._target_series_id.replace("ETF_", "")
try:
# Create ETF-specific series
from edgar.funds.series_resolution import TickerSeriesResolver
series_list = TickerSeriesResolver.resolve_ticker_to_series(self._original_identifier)
if series_list and len(series_list) > 0:
series_info = series_list[0] # Get the ETF series info
# Create FundSeries for ETF
etf_company = FundCompany(cik_or_identifier=int(cik), fund_name=series_info.series_name)
return FundSeries(
series_id=self._target_series_id,
name=series_info.series_name or f"ETF Series for {self._original_identifier}",
fund_company=etf_company
)
except Exception as e:
log.debug(f"Failed to create ETF series for {self._target_series_id}: {e}")
else:
# Regular mutual fund series - try to get by ID
try:
return get_fund_series(self._target_series_id)
except Exception as e:
log.debug(f"Failed to get fund series {self._target_series_id}: {e}")
# Fallback to current series if available
return self._series
def get_resolution_diagnostics(self) -> Dict[str, Any]:
"""Get detailed information about how this Fund was resolved."""
if self._target_series_id:
if self._target_series_id.startswith("ETF_"):
cik = self._target_series_id.replace("ETF_", "")
return {
'status': 'success',
'method': 'etf_company_fallback',
'series_id': self._target_series_id,
'cik': int(cik),
'original_identifier': self._original_identifier,
'message': f"'{self._original_identifier}' resolved as ETF company ticker"
}
else:
return {
'status': 'success',
'method': 'mutual_fund_lookup',
'series_id': self._target_series_id,
'original_identifier': self._original_identifier,
'message': f"'{self._original_identifier}' resolved as mutual fund ticker"
}
# Check if it's a company ticker (ETF) that we didn't resolve
from edgar.reference.tickers import find_cik
cik = find_cik(self._original_identifier)
if cik:
return {
'status': 'partial_success',
'method': 'company_lookup_unresolved',
'cik': cik,
'original_identifier': self._original_identifier,
'message': f"'{self._original_identifier}' found as company ticker but series resolution failed",
'suggestion': f"Try using CIK {cik} directly: Fund({cik})"
}
return {
'status': 'failed',
'method': 'no_resolution',
'original_identifier': self._original_identifier,
'message': f"'{self._original_identifier}' not found in SEC ticker databases",
'suggestion': "Verify ticker spelling or try with CIK/series ID directly"
}
def list_series(self) -> List[FundSeries]:
"""
List all fund series associated with this fund.
If this is a FundCompany, returns all series.
If this is a FundSeries, returns a list with just this series.
If this is a FundClass, returns a list with its parent series.
Returns:
List of FundSeries instances
"""
if self._company and hasattr(self._company, 'list_series'):
return self._company.list_series()
if self._series:
return [self._series]
return []
def list_classes(self) -> List[FundClass]:
"""
List all share classes associated with this fund.
If this is a FundSeries, returns all classes in the series.
If this is a FundClass, returns a list with just this class.
Returns:
List of FundClass instances
"""
if self._series and hasattr(self._series, 'get_classes'):
return self._series.get_classes()
if self._class:
return [self._class]
return []
def __str__(self) -> str:
return str(self._entity)
def __repr__(self) -> str:
return repr(self._entity)
def __rich__(self):
"""Creates a rich representation of the fund"""
if hasattr(self._entity, '__rich__'):
return self._entity.__rich__()
return str(self)

View File

@@ -0,0 +1,804 @@
"""
Data structures and functions for working with fund data.
This module provides the FundData class and related functions for
accessing and manipulating fund data.
"""
import logging
import re
from functools import lru_cache
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
if TYPE_CHECKING:
from bs4 import Tag
import pandas as pd
import pyarrow as pa
from bs4 import BeautifulSoup
from edgar._filings import Filings
from edgar.datatools import drop_duplicates_pyarrow
from edgar.entity.data import EntityData
from edgar.funds.core import FundClass, FundCompany, FundSeries
from edgar.httprequests import download_text
log = logging.getLogger(__name__)
#
# Direct implementations to replace legacy module dependencies
#
# Direct implementations of fund-related functionality
# These replace the legacy module dependencies
# URL constants for fund searches
fund_series_search_url = "https://www.sec.gov/cgi-bin/series?company="
fund_class_or_series_search_url = "https://www.sec.gov/cgi-bin/browse-edgar?CIK={}"
fund_series_direct_url = "https://www.sec.gov/cgi-bin/browse-edgar?CIK={}&scd=series"
class _FundDTO:
"""
Data Transfer Object for fund information.
Internal class used to return fund data from direct implementations.
This is not part of the public API and should not be used directly.
Use the Fund class from edgar.funds.core instead.
"""
def __init__(self, company_cik, company_name, name, series, ticker,
class_contract_id, class_contract_name):
self.company_cik = company_cik
self.company_name = company_name
self.name = name
self.series = series
self.ticker = ticker
self.class_contract_id = class_contract_id
self.class_contract_name = class_contract_name
def __str__(self):
return f"{self.name} - {self.ticker} [{self.class_contract_id}]"
# Parse SGML fund data (directly implemented)
def parse_fund_data(series_sgml_data: str) -> pd.DataFrame:
"""
Parse the SGML text containing fund series and class information.
Args:
series_sgml_data: SGML text with SERIES-AND-CLASSES-CONTRACTS-DATA
Returns:
DataFrame with parsed fund information
Example SGML data:
<SERIES-AND-CLASSES-CONTRACTS-DATA>
<EXISTING-SERIES-AND-CLASSES-CONTRACTS>
<SERIES>
<OWNER-CIK>0001090372
<SERIES-ID>S000071967
<SERIES-NAME>Jacob Forward ETF
<CLASS-CONTRACT>
<CLASS-CONTRACT-ID>C000227599
<CLASS-CONTRACT-NAME>Jacob Forward ETF
<CLASS-CONTRACT-TICKER-SYMBOL>JFWD
</CLASS-CONTRACT>
</SERIES>
</EXISTING-SERIES-AND-CLASSES-CONTRACTS>
</SERIES-AND-CLASSES-CONTRACTS-DATA>
"""
# Regular expressions to match each relevant tag
series_re = re.compile(r'<SERIES>(.*?)</SERIES>', re.DOTALL)
data_re = re.compile(r'<([^>]+)>([^<]*)')
# Extract SERIES blocks
series_blocks = series_re.findall(series_sgml_data)
# Create an empty DataFrame
columns = [
"OWNER-CIK", "SERIES-ID", "SERIES-NAME",
"CLASS-CONTRACT-ID", "CLASS-CONTRACT-NAME", "CLASS-CONTRACT-TICKER-SYMBOL"
]
# Extract information from SERIES blocks and append to DataFrame
rows = []
for block in series_blocks:
data_matches = data_re.findall(block)
data_dict = {tag: value.strip() for tag, value in data_matches}
class_contract_data = {
"CLASS-CONTRACT-ID": data_dict.get("CLASS-CONTRACT-ID", ""),
"CLASS-CONTRACT-NAME": data_dict.get("CLASS-CONTRACT-NAME", ""),
"CLASS-CONTRACT-TICKER-SYMBOL": data_dict.get("CLASS-CONTRACT-TICKER-SYMBOL", "")
}
# Merge SERIES and CLASS-CONTRACT data
row_data = {**data_dict, **class_contract_data}
rows.append(row_data)
# Create DataFrame and select relevant columns
df = pd.DataFrame(rows, columns=columns).iloc[:, :6]
# Rename columns for consistency
return (df.rename(columns={
"OWNER-CIK": "CIK",
"SERIES-ID": "SeriesID",
"SERIES-NAME": "Fund",
"CLASS-CONTRACT-ID": "ContractID",
"CLASS-CONTRACT-NAME": "Class",
"CLASS-CONTRACT-TICKER-SYMBOL": "Ticker"
})
.filter(["Fund", "Ticker", "SeriesID", "ContractID", "Class", "CIK"])
)
# Direct implementation of FundCompanyInfo
class _FundCompanyInfo:
"""
Internal helper class representing the fund company.
This is parsed from the results page when we get the fund class or series.
Not part of the public API - use the Fund class from edgar.funds.core instead.
"""
def __init__(self,
name: str,
cik: str,
ident_info: Dict[str, str],
addresses: List[str],
filings: Filings):
self.name: str = name
self.cik: str = cik
self.ident_info: Dict[str, str] = ident_info
self.addresses: List[str] = addresses
self.filings = filings
@property
def state(self):
return self.ident_info.get("State location", None)
@property
def state_of_incorporation(self):
return self.ident_info.get("State of Inc.", None)
@lru_cache(maxsize=1)
def id_and_name(self, contract_or_series: str) -> Optional[Tuple[str, str]]:
class_contract_str = self.ident_info.get(contract_or_series, None)
if not class_contract_str:
return None
match = re.match(r'([CS]\d+)(?:\s(.*))?', class_contract_str)
# Storing the results in variables if matched, with a default for description if not present
cik = match.group(1) if match else ""
cik_description = match.group(2) if match and match.group(2) else ""
return cik, cik_description
@classmethod
def from_html(cls, company_info_html: Union[str, 'Tag']):
soup = BeautifulSoup(company_info_html, features="html.parser")
# Parse the fund company info
content_div = soup.find("div", {"id": "contentDiv"})
if content_div is None:
# Should not reach here, but this is precautionary
log.warning("Did not find div with id 'contentDiv'")
return None
ident_info_dict = {}
company_info_div = content_div.find("div", class_="companyInfo")
company_name_tag = company_info_div.find('span', class_='companyName')
company_name = company_name_tag.text.split('CIK')[0].strip()
cik = company_name_tag.a.text.split(' ')[0]
# Extract the identifying information
for tag in company_info_div.find_all('br'):
tag.replace_with('\n')
ident_info = company_info_div.find('p', class_='identInfo')
ident_line = ident_info.get_text().replace("|", "\n").strip()
for line in ident_line.split("\n"):
if ":" in line:
key, value = line.split(":")
ident_info_dict[key.strip()] = value.strip().replace("\xa0", " ")
# Addresses
mailer_divs = content_div.find_all("div", class_="mailer")
addresses = [re.sub(r'\n\s+', '\n', mailer_div.text.strip())
for mailer_div in mailer_divs]
filing_index = cls._extract_filings(soup, company_name, cik)
filings = Filings(filing_index=filing_index)
return cls(name=company_name,
cik=cik,
filings=filings,
ident_info=ident_info_dict,
addresses=addresses)
@classmethod
def _extract_filings(cls, soup, company_name: str, cik: str):
from datetime import datetime
import pyarrow as pa
filings_table = soup.find("table", class_="tableFile2")
rows = filings_table.find_all("tr")[1:]
forms, accession_nos, filing_dates = [], [], []
for row in rows:
cells = row.find_all("td")
form = cells[0].text
forms.append(form)
# Get the link href from cell[1]
link = cells[1].find("a")
href = link.attrs["href"]
accession_no = href.split("/")[-1].replace("-index.htm", "")
accession_nos.append(accession_no)
# Get the filing_date
filing_date = datetime.strptime(cells[3].text, '%Y-%m-%d')
filing_dates.append(filing_date)
schema = pa.schema([
('form', pa.string()),
('company', pa.string()),
('cik', pa.int32()),
('filing_date', pa.date32()),
('accession_number', pa.string()),
])
# Create an empty table with the defined schema
filing_index = pa.Table.from_arrays(arrays=[
pa.array(forms, type=pa.string()),
pa.array([company_name] * len(forms), type=pa.string()),
pa.array([int(cik)] * len(forms), type=pa.int32()),
pa.array(filing_dates, type=pa.date32()),
pa.array(accession_nos, type=pa.string()),
], schema=schema)
return filing_index
# Direct implementation of FundClassOrSeries and subclasses
class _FundClassOrSeries:
"""
Internal base class for fund classes and series.
Not part of the public API - use the FundClass and FundSeries classes
from edgar.funds.core instead.
"""
def __init__(self, company_info: '_FundCompanyInfo', contract_or_series: str):
self.fund = company_info
self._contract_or_series = contract_or_series
@property
def fund_cik(self):
return self.fund.cik
@property
def fund_name(self):
return self.fund.name
@lru_cache(maxsize=1)
def _id_and_name(self) -> Optional[Tuple[str, str]]:
class_contract_str = self.fund.ident_info.get(self._contract_or_series, None)
if not class_contract_str:
return None
match = re.match(r'([CS]\d+)(?:\s(.*))?', class_contract_str)
# Storing the results in variables if matched, with a default for description if not present
cik = match.group(1) if match else ""
cik_description = match.group(2) if match and match.group(2) else ""
return cik, cik_description
@property
def id(self):
id_and_name = self._id_and_name()
if id_and_name:
return id_and_name[0]
return None
@property
def name(self):
id_and_name = self._id_and_name()
if id_and_name:
return id_and_name[1]
return None
@property
def description(self):
return f"{self.fund_name} {self.id} {self.name}"
@property
def filings(self):
return self.fund.filings
class _FundClass(_FundClassOrSeries):
"""
Internal implementation of fund class (contract) information.
Not part of the public API - use the FundClass class from edgar.funds.core instead.
"""
def __init__(self, company_info: '_FundCompanyInfo'):
super().__init__(company_info, "Class/Contract")
@property
def ticker(self):
return self.fund.ident_info.get("Ticker Symbol", None)
@property
def description(self):
return f"{self.fund_name} {self.id} {self.name} {self.ticker or ''}"
class _FundSeries(_FundClassOrSeries):
"""
Internal implementation of fund series information.
Not part of the public API - use the FundSeries class from edgar.funds.core instead.
"""
def __init__(self, company_info: '_FundCompanyInfo'):
super().__init__(company_info, "Series")
# Direct implementation of get_fund_with_filings
def direct_get_fund_with_filings(contract_or_series_id: str):
"""
Get fund class or series information including filings from the SEC website.
Args:
contract_or_series_id: Series ID (S...) or Class ID (C...)
Returns:
FundClass or FundSeries object, or None if not found
"""
# URL template to search for a fund by class or series ID
fund_class_or_series_search_url = "https://www.sec.gov/cgi-bin/browse-edgar?CIK={}"
if not re.match(r"[CS]\d+", contract_or_series_id):
return None
base_url = fund_class_or_series_search_url.format(contract_or_series_id)
# Start at 0 and download 100
search_url = base_url + "&start=0&count=100"
try:
fund_text = download_text(search_url)
if "No matching" in fund_text:
return None
# Company Info
company_info = _FundCompanyInfo.from_html(fund_text)
# Get the remaining filings
start, count = 101, 100
filing_index = company_info.filings.data
while True:
# Get the next page
next_page = base_url + f"&start={start}&count={count}"
fund_text = download_text(next_page)
soup = BeautifulSoup(fund_text, features="html.parser")
filing_index_on_page = _FundCompanyInfo._extract_filings(soup, company_info.name, company_info.cik)
if len(filing_index_on_page) == 0:
break
filing_index = pa.concat_tables([filing_index, filing_index_on_page])
start += count
# Drop duplicate filings by accession number
filing_index = drop_duplicates_pyarrow(filing_index, column_name='accession_number')
company_info.filings = Filings(filing_index=filing_index)
if contract_or_series_id.startswith('C'):
return _FundClass(company_info)
else:
return _FundSeries(company_info)
except Exception as e:
log.warning("Error retrieving fund information for %s: %s", contract_or_series_id, e)
return None
@lru_cache(maxsize=16)
def get_fund_object(identifier: str) -> Optional[Union[FundCompany, FundSeries, FundCompany]]:
"""
Get a Fund related object by it's identifier.
Args:
identifier: A CIK, a series id (e.g. 'S000001234') or class id or Fund ticker (e.g. 'VFINX')
Returns:
A FundCompany or FundSeries or FundClass
"""
if re.match(r'^[CS]\d+$', identifier):
identifier_type = 'Series' if identifier.startswith('S') else 'Class'
fund_search_url = fund_series_search_url + f"&CIK={identifier}"
elif re.match(r"^[A-Z]{4}X$", identifier):
identifier_type = 'Class'
fund_search_url = fund_series_search_url + f"&ticker={identifier}"
elif re.match(r"^0\d{9}$", identifier):
identifier_type = 'FundCompany'
fund_search_url = fund_series_search_url + f"&CIK={identifier}"
else:
log.warning("Invalid fund identifier %s", identifier)
return None
# Download the fund page
fund_text = download_text(fund_search_url)
soup = BeautifulSoup(fund_text, "html.parser")
if 'To retrieve filings, click on the CIK' not in soup.text:
return None
tables = soup.find_all("table")
# The fund table is the 6th table on the page
if len(tables) < 6:
log.warning("Expected fund table not found for %s", identifier)
return None
fund_table = tables[5]
all_series = []
fund_company:Optional[FundCompany] = None
current_series:Optional[FundSeries] = None
current_class:Optional[FundClass] = None
for tr in fund_table.find_all('tr')[4:]: # Skip the first 4 rows as they contain headers
row_data = [td.get_text().strip() for td in tr.find_all('td') if td.get_text().strip()]
if not row_data:
continue
if re.match(r'^0\d{9}$', row_data[0]):
fund_company = FundCompany(cik_or_identifier=row_data[0], fund_name=row_data[1], all_series=all_series)
elif re.match(r'^S\d+$', row_data[0]):
current_series = FundSeries(series_id=row_data[0], name=row_data[1], fund_company=fund_company)
fund_company.all_series.append(current_series)
elif re.match(r'^C\d+$', row_data[0]):
class_id, class_name = row_data[0], row_data[1]
ticker = row_data[2] if len(row_data) > 2 else None
current_class = FundClass(class_id=class_id, name=class_name, ticker=ticker)
current_class.series = current_series
current_series.fund_classes.append(current_class)
if identifier_type == "FundCompany":
return fund_company
elif identifier_type == "Series":
return current_series
elif identifier_type == "Class":
return current_class
def is_fund_ticker(identifier: str) -> bool:
"""
Check if an identifier is a fund ticker.
Args:
identifier: The identifier to check
Returns:
True if it's a fund ticker, False otherwise
"""
# Use our own implementation
if identifier and isinstance(identifier, str):
return bool(re.match(r"^[A-Z]{4}X$", identifier))
return False
class FundData(EntityData):
"""
Fund-specific data container.
Contains specialized properties and methods for fund entities.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.series_id = kwargs.get('series_id')
self.class_ids = kwargs.get('class_ids', [])
self._fund_classes = kwargs.get('fund_classes', [])
@property
def is_fund(self) -> bool:
"""Check if entity is a fund."""
return True
def resolve_fund_identifier(identifier):
"""
Convert fund tickers or series IDs to CIK.
Args:
identifier: Fund ticker, Series ID, or CIK
Returns:
CIK as integer or original identifier if conversion not possible
"""
if isinstance(identifier, str):
# Handle Series ID (S000XXXXX)
if identifier.startswith('S') and identifier[1:].isdigit():
try:
# Try our direct implementation
fund_info = direct_get_fund_with_filings(identifier)
if fund_info and hasattr(fund_info, 'fund_cik'):
return int(fund_info.fund_cik)
except Exception as e:
log.warning("Error resolving series ID %s: %s", identifier, e)
# Handle Class ID (C000XXXXX)
if identifier.startswith('C') and identifier[1:].isdigit():
try:
# Try our direct implementation
fund_info = direct_get_fund_with_filings(identifier)
if fund_info and hasattr(fund_info, 'fund_cik'):
return int(fund_info.fund_cik)
except Exception as e:
log.warning("Error resolving class ID %s: %s", identifier, e)
# Handle fund ticker
if is_fund_ticker(identifier):
try:
# Use our direct implementation for tickers
fund_info = (identifier)
if fund_info and hasattr(fund_info, 'company_cik'):
return int(fund_info.company_cik)
except Exception as e:
log.warning("Error resolving fund ticker %s: %s", identifier, e)
return identifier
def get_fund_information(header):
"""
Extract fund information from a filing header.
Args:
header: Filing header
Returns:
Fund series and contract information
"""
# Import FundSeriesAndContracts here to avoid circular imports
from edgar.funds import FundSeriesAndContracts
if not header or not hasattr(header, 'text'):
return FundSeriesAndContracts()
try:
# Try our direct implementation first
header_text = header.text
series_and_classes_contracts_text = re.search(
r'<SERIES-AND-CLASSES-CONTRACTS-DATA>(.*?)</SERIES-AND-CLASSES-CONTRACTS-DATA>',
header_text,
re.DOTALL
)
if series_and_classes_contracts_text:
# Use our directly implemented parse_fund_data
df = parse_fund_data(series_and_classes_contracts_text.group(1))
return FundSeriesAndContracts(df)
except Exception as e:
log.debug("Error parsing fund information directly: %s", e)
# Fallback implementation - extract fund information from header directly using regex
try:
# Try to extract fund information from the header text with regex
if header and hasattr(header, 'text'):
# Look for SERIES-ID and CONTRACT-ID in the header
series_matches = re.findall(r'SERIES-ID[^>]*>([^<]+)', str(header.text))
contract_matches = re.findall(r'CONTRACT-ID[^>]*>([^<]+)', str(header.text))
name_matches = re.findall(r'FILER[^>]*>.*?COMPANY-DATA[^>]*>.*?CONFORMED-NAME[^>]*>([^<]+)', str(header.text))
ticker_matches = re.findall(r'TICKER-SYMBOL[^>]*>([^<]+)', str(header.text))
# If we found any matches, create a DataFrame with the information
if series_matches or contract_matches:
data = []
# Join series and contract IDs as rows
for i in range(max(len(series_matches), len(contract_matches))):
series_id = series_matches[i] if i < len(series_matches) else None
contract_id = contract_matches[i] if i < len(contract_matches) else None
fund_name = name_matches[0] if name_matches else None
ticker = ticker_matches[0] if ticker_matches else None
data.append({
'SeriesID': series_id,
'ContractID': contract_id,
'Fund': fund_name,
'Ticker': ticker,
'Class': f"Class {contract_id[-1].upper()}" if contract_id else None
})
if data:
return FundSeriesAndContracts(pd.DataFrame(data))
except Exception as e:
log.warning("Error in fallback get_fund_information: %s", e)
# Return an empty container if everything else fails
return FundSeriesAndContracts()
def parse_series_and_classes_from_html(html_content: str, cik:str) -> List[Dict]:
"""
Parse series and class information from the SEC series listing HTML page.
This parses HTML content from the URL https://www.sec.gov/cgi-bin/browse-edgar?CIK=XXXX&scd=series
which contains a structured listing of all series and classes for a fund company.
Args:
html_content: HTML content from the SEC webpage
fund: Fund entity to associate with the series/classes
Returns:
List of dictionaries containing series and class information
"""
import re
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
series_data = []
# Debug information
log.debug("Parsing series HTML content for fund %s", cik)
# The table structure in this specific page has series and classes
# organized in a specific way with indentation levels
try:
# Find the main table - in Kinetics HTML, it's the main table in the content area
tables = soup.find_all('table')
# Find the table that's likely to contain the series information
# In SEC pages, it's typically the one with class/contract and series information
table = None
for t in tables:
# Look for rows with series or class info
if t.find('tr') and re.search(r'Series|Class/Contract', str(t)):
table = t
break
if not table:
log.warning("No suitable table found in series HTML content")
return []
current_series = None
series_data = []
# Loop through all rows and process them
rows = table.find_all('tr')
# Debug information
log.debug("Found %d rows in the table", len(rows))
# Process all rows since the table structure might vary
for _row_idx, row in enumerate(rows):
cells = row.find_all('td')
if not cells or len(cells) < 3:
continue
# Check if this is a series row - marked by an S000 ID in a cell with a link
series_cell = None
series_id = None
series_name = None
# Series IDs are normally in the form S######
for cell in cells:
# Look for <a> tags with S IDs
links = cell.find_all('a', href=True)
for link in links:
if re.search(r'S\d{6,}', link.text):
series_id = re.search(r'S\d{6,}', link.text).group(0)
series_cell = cell
break
if series_cell:
break
# If we found a series ID, extract its name and create a series entry
if series_id:
# Try to find the series name in the next cell or in the same row
series_name = None
for cell in cells:
# Look for a cell with a link that's not the series ID
if cell != series_cell and cell.find('a'):
# Check if the link text doesn't match the series ID - it's likely the name
link_text = cell.find('a').text.strip()
if link_text and series_id not in link_text:
series_name = link_text
break
# If we couldn't find a name, use a default
if not series_name:
series_name = f"Series {series_id}"
# Create a new series entry
current_series = {
'series_id': series_id,
'series_name': series_name,
'classes': []
}
series_data.append(current_series)
log.debug("Found series: %s - %s", series_id, series_name)
# Check if this row contains a class - marked by a C000 ID
# Classes appear after a series and are indented
elif current_series:
class_id = None
class_name = None
class_ticker = ""
# Look for class IDs in the form C######
for cell in cells:
# Search for C IDs in links
links = cell.find_all('a', href=True)
for link in links:
if re.search(r'C\d{6,}', link.text):
class_id = re.search(r'C\d{6,}', link.text).group(0)
break
if class_id:
break
if class_id:
# Find the class name - usually in a cell after the ID
for cell_idx, cell in enumerate(cells):
if class_id in str(cell) and cell_idx + 1 < len(cells):
# Class name is often in the next cell
class_name = cells[cell_idx + 1].text.strip()
break
parts = class_name.split("\n")
class_name = parts[1]
if len(parts) > 2:
class_ticker = parts[2].strip()
# If we couldn't find a name, use a default
if not class_name:
class_name = f"Class {class_id}"
# Add this class to the current series
current_series['classes'].append({
'class_id': class_id,
'class_name': class_name,
'ticker': class_ticker
})
log.debug("Found class: %s - %s (%s)", class_id, class_name, class_ticker)
# Debug information
log.debug("Found %d series with classes", len(series_data))
except Exception as e:
log.warning("Error parsing series HTML: %s", e)
import traceback
log.debug(traceback.format_exc())
return series_data
def get_series_and_classes_from_sec(cik: Union[str, int]) -> List[Dict]:
"""
Directly fetch and parse series and class information from the SEC website.
This uses the SEC's series listing page which provides a comprehensive view
of all series and classes for a fund company.
Args:
cik: The CIK of the fund company
Returns:
List of dictionaries containing parsed series and class information
"""
# Format CIK properly for URL
cik_str = str(cik).zfill(10)
url = fund_series_direct_url.format(cik_str)
# Download the HTML content
html_content = download_text(url)
# Check if we received valid content
if 'No matching' in html_content or 'series for cik' not in html_content.lower():
log.debug("No series information found for CIK %s", cik)
return []
return parse_series_and_classes_from_html(html_content, cik)

View File

@@ -0,0 +1,82 @@
"""
Examples of using the new fund entity API.
This module demonstrates how to use the improved fund entity API
to work with fund companies, series, and classes.
"""
from edgar.funds import (
find_fund,
get_fund_class,
get_fund_company,
get_fund_series,
)
def demonstrate_find_fund():
"""Demonstrate the smart finder function."""
# Find a fund company by CIK
find_fund("0001048636") # T. Rowe Price
# Find a fund series by series ID
find_fund("S000005029") # Kinetics Internet Fund
# Find a fund class by class ID
find_fund("C000013712") # Kinetics Internet Fund Advisor Class C
# Find a fund class by ticker
find_fund("KINCX") # Kinetics Internet Fund Advisor Class C
def demonstrate_specialized_getters():
"""Demonstrate the specialized getter functions."""
# Get a fund company
get_fund_company("0001048636") # T. Rowe Price
# Get a fund series
get_fund_series("S000005029") # Kinetics Internet Fund
# Get a fund class by ID
get_fund_class("C000013712") # Kinetics Internet Fund Advisor Class C
# Get a fund class by ticker
get_fund_class("KINCX") # Should be the same as above
def demonstrate_entity_navigation():
"""Demonstrate navigation between related entities."""
# Start with a fund class
fund_class = get_fund_class("KINCX")
# Navigate to its series
series = fund_class.series
# Navigate to the fund company
company = fund_class.series.fund_company
# Get all series for the company
all_series = company.all_series
for _s in all_series[:3]: # Show first 3
pass
# Get all classes for a series
if series:
series_classes = series.get_classes()
for _c in series_classes:
pass
def main():
"""Main function to run all demonstrations."""
demonstrate_find_fund()
demonstrate_specialized_getters()
demonstrate_entity_navigation()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,26 @@
"""
Fund data models.
This package contains all the data models used for fund reporting,
separated by functional area for better maintainability.
"""
# Import all derivative models for convenience
from edgar.funds.models.derivatives import (
DerivativeInfo,
ForwardDerivative,
FutureDerivative,
OptionDerivative,
SwapDerivative,
SwaptionDerivative,
)
__all__ = [
# Derivative models
'DerivativeInfo',
'ForwardDerivative',
'SwapDerivative',
'FutureDerivative',
'SwaptionDerivative',
'OptionDerivative',
]

View File

@@ -0,0 +1,740 @@
"""
Derivative instrument models for fund portfolio reporting.
This module contains all the data models for different types of derivative
instruments found in fund portfolios, including forwards, swaps, futures,
options, and swaptions.
"""
from decimal import Decimal
from typing import Optional, Union
from pydantic import BaseModel
from edgar.xmltools import child_text, optional_decimal
def optional_decimal_attr(element, attr_name):
"""Helper function to parse optional decimal attributes from XML elements"""
if element is None:
return None
attr_value = element.attrs.get(attr_name)
if not attr_value or attr_value == "N/A":
return None
try:
return Decimal(attr_value)
except (ValueError, TypeError):
return None
class ForwardDerivative(BaseModel):
counterparty_name: Optional[str]
counterparty_lei: Optional[str]
currency_sold: Optional[str]
amount_sold: Optional[Decimal]
currency_purchased: Optional[str]
amount_purchased: Optional[Decimal]
settlement_date: Optional[str]
unrealized_appreciation: Optional[Decimal]
# Additional info from derivAddlInfo (when nested)
deriv_addl_name: Optional[str]
deriv_addl_lei: Optional[str]
deriv_addl_title: Optional[str]
deriv_addl_cusip: Optional[str]
deriv_addl_identifier: Optional[str]
deriv_addl_identifier_type: Optional[str]
deriv_addl_balance: Optional[Decimal]
deriv_addl_units: Optional[str]
deriv_addl_currency: Optional[str]
deriv_addl_value_usd: Optional[Decimal]
deriv_addl_pct_val: Optional[Decimal]
deriv_addl_asset_cat: Optional[str]
deriv_addl_issuer_cat: Optional[str]
deriv_addl_inv_country: Optional[str]
@classmethod
def from_xml(cls, tag):
if tag and tag.name == "fwdDeriv":
counterparties = tag.find("counterparties")
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
# Check for derivAddlInfo (when nested in options)
deriv_addl_name = None
deriv_addl_lei = None
deriv_addl_title = None
deriv_addl_cusip = None
deriv_addl_identifier = None
deriv_addl_identifier_type = None
deriv_addl_balance = None
deriv_addl_units = None
deriv_addl_currency = None
deriv_addl_value_usd = None
deriv_addl_pct_val = None
deriv_addl_asset_cat = None
deriv_addl_issuer_cat = None
deriv_addl_inv_country = None
deriv_addl_info = tag.find("derivAddlInfo")
if deriv_addl_info:
deriv_addl_name = child_text(deriv_addl_info, "name")
deriv_addl_lei = child_text(deriv_addl_info, "lei")
deriv_addl_title = child_text(deriv_addl_info, "title")
deriv_addl_cusip = child_text(deriv_addl_info, "cusip")
deriv_addl_balance = optional_decimal(deriv_addl_info, "balance")
deriv_addl_units = child_text(deriv_addl_info, "units")
deriv_addl_currency = child_text(deriv_addl_info, "curCd")
deriv_addl_value_usd = optional_decimal(deriv_addl_info, "valUSD")
deriv_addl_pct_val = optional_decimal(deriv_addl_info, "pctVal")
deriv_addl_asset_cat = child_text(deriv_addl_info, "assetCat")
deriv_addl_inv_country = child_text(deriv_addl_info, "invCountry")
# Parse issuer conditional
issuer_cond = deriv_addl_info.find("issuerConditional")
if issuer_cond:
deriv_addl_issuer_cat = issuer_cond.attrs.get("issuerCat")
# Parse identifiers
identifiers = deriv_addl_info.find("identifiers")
if identifiers:
other_tag = identifiers.find("other")
if other_tag:
deriv_addl_identifier = other_tag.attrs.get("value")
deriv_addl_identifier_type = other_tag.attrs.get("otherDesc")
return cls(
counterparty_name=counterparty_name,
counterparty_lei=counterparty_lei,
currency_sold=child_text(tag, "curSold"),
amount_sold=optional_decimal(tag, "amtCurSold"),
currency_purchased=child_text(tag, "curPur"),
amount_purchased=optional_decimal(tag, "amtCurPur"),
settlement_date=child_text(tag, "settlementDt"),
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
# Additional info from derivAddlInfo
deriv_addl_name=deriv_addl_name,
deriv_addl_lei=deriv_addl_lei,
deriv_addl_title=deriv_addl_title,
deriv_addl_cusip=deriv_addl_cusip,
deriv_addl_identifier=deriv_addl_identifier,
deriv_addl_identifier_type=deriv_addl_identifier_type,
deriv_addl_balance=deriv_addl_balance,
deriv_addl_units=deriv_addl_units,
deriv_addl_currency=deriv_addl_currency,
deriv_addl_value_usd=deriv_addl_value_usd,
deriv_addl_pct_val=deriv_addl_pct_val,
deriv_addl_asset_cat=deriv_addl_asset_cat,
deriv_addl_issuer_cat=deriv_addl_issuer_cat,
deriv_addl_inv_country=deriv_addl_inv_country
)
class SwapDerivative(BaseModel):
# Basic derivative info
counterparty_name: Optional[str]
counterparty_lei: Optional[str]
notional_amount: Optional[Decimal]
currency: Optional[str]
unrealized_appreciation: Optional[Decimal]
termination_date: Optional[str]
upfront_payment: Optional[Decimal]
payment_currency: Optional[str]
upfront_receipt: Optional[Decimal]
receipt_currency: Optional[str]
reference_entity_name: Optional[str]
reference_entity_title: Optional[str]
reference_entity_cusip: Optional[str]
reference_entity_isin: Optional[str]
reference_entity_ticker: Optional[str]
swap_flag: Optional[str]
# Additional info from derivAddlInfo (when nested)
deriv_addl_name: Optional[str]
deriv_addl_lei: Optional[str]
deriv_addl_title: Optional[str]
deriv_addl_cusip: Optional[str]
deriv_addl_identifier: Optional[str]
deriv_addl_identifier_type: Optional[str]
deriv_addl_balance: Optional[Decimal]
deriv_addl_units: Optional[str]
deriv_addl_desc_units: Optional[str]
deriv_addl_currency: Optional[str]
deriv_addl_value_usd: Optional[Decimal]
deriv_addl_pct_val: Optional[Decimal]
deriv_addl_asset_cat: Optional[str]
deriv_addl_issuer_cat: Optional[str]
deriv_addl_inv_country: Optional[str]
# DIRECTIONAL RECEIVE LEG (what we receive)
fixed_rate_receive: Optional[Decimal]
fixed_amount_receive: Optional[Decimal]
fixed_currency_receive: Optional[str]
floating_index_receive: Optional[str]
floating_spread_receive: Optional[Decimal]
floating_amount_receive: Optional[Decimal]
floating_currency_receive: Optional[str]
floating_tenor_receive: Optional[str]
floating_tenor_unit_receive: Optional[str]
floating_reset_date_tenor_receive: Optional[str]
floating_reset_date_unit_receive: Optional[str]
other_description_receive: Optional[str]
other_type_receive: Optional[str] # fixedOrFloating attribute
# Additional upfront payment/receipt info
upfront_payment: Optional[Decimal]
payment_currency: Optional[str]
upfront_receipt: Optional[Decimal]
receipt_currency: Optional[str]
# DIRECTIONAL PAYMENT LEG (what we pay)
fixed_rate_pay: Optional[Decimal]
fixed_amount_pay: Optional[Decimal]
fixed_currency_pay: Optional[str]
floating_index_pay: Optional[str]
floating_spread_pay: Optional[Decimal]
floating_amount_pay: Optional[Decimal]
floating_currency_pay: Optional[str]
floating_tenor_pay: Optional[str]
floating_tenor_unit_pay: Optional[str]
floating_reset_date_tenor_pay: Optional[str]
floating_reset_date_unit_pay: Optional[str]
other_description_pay: Optional[str]
other_type_pay: Optional[str] # fixedOrFloating attribute
@classmethod
def from_xml(cls, tag):
if tag and tag.name == "swapDeriv":
# Basic counterparty and reference info
counterparties = tag.find("counterparties")
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
# Check for derivAddlInfo (when nested in swaptions)
deriv_addl_name = None
deriv_addl_lei = None
deriv_addl_title = None
deriv_addl_cusip = None
deriv_addl_identifier = None
deriv_addl_identifier_type = None
deriv_addl_balance = None
deriv_addl_units = None
deriv_addl_desc_units = None
deriv_addl_currency = None
deriv_addl_value_usd = None
deriv_addl_pct_val = None
deriv_addl_asset_cat = None
deriv_addl_issuer_cat = None
deriv_addl_inv_country = None
deriv_addl_info = tag.find("derivAddlInfo")
if deriv_addl_info:
deriv_addl_name = child_text(deriv_addl_info, "name")
deriv_addl_lei = child_text(deriv_addl_info, "lei")
deriv_addl_title = child_text(deriv_addl_info, "title")
deriv_addl_cusip = child_text(deriv_addl_info, "cusip")
deriv_addl_balance = optional_decimal(deriv_addl_info, "balance")
deriv_addl_units = child_text(deriv_addl_info, "units")
deriv_addl_desc_units = child_text(deriv_addl_info, "descOthUnits")
deriv_addl_currency = child_text(deriv_addl_info, "curCd")
deriv_addl_value_usd = optional_decimal(deriv_addl_info, "valUSD")
deriv_addl_pct_val = optional_decimal(deriv_addl_info, "pctVal")
deriv_addl_asset_cat = child_text(deriv_addl_info, "assetCat")
deriv_addl_inv_country = child_text(deriv_addl_info, "invCountry")
# Parse issuer conditional
issuer_cond = deriv_addl_info.find("issuerConditional")
if issuer_cond:
deriv_addl_issuer_cat = issuer_cond.attrs.get("issuerCat")
# Parse identifiers
identifiers = deriv_addl_info.find("identifiers")
if identifiers:
other_tag = identifiers.find("other")
if other_tag:
deriv_addl_identifier = other_tag.attrs.get("value")
deriv_addl_identifier_type = other_tag.attrs.get("otherDesc")
# Get reference instrument info (for CDS)
ref_entity_name = None
ref_entity_title = None
ref_entity_cusip = None
ref_entity_isin = None
ref_entity_ticker = None
desc_ref = tag.find("descRefInstrmnt")
if desc_ref:
other_ref = desc_ref.find("otherRefInst")
if other_ref:
ref_entity_name = child_text(other_ref, "issuerName")
ref_entity_title = child_text(other_ref, "issueTitle")
identifiers = other_ref.find("identifiers")
if identifiers:
cusip_tag = identifiers.find("cusip")
if cusip_tag:
ref_entity_cusip = cusip_tag.attrs.get("value")
isin_tag = identifiers.find("isin")
if isin_tag:
ref_entity_isin = isin_tag.attrs.get("value")
ticker_tag = identifiers.find("ticker")
if ticker_tag:
ref_entity_ticker = ticker_tag.attrs.get("value")
# DIRECTIONAL RECEIVE LEG PARSING
fixed_rec_desc = tag.find("fixedRecDesc")
floating_rec_desc = tag.find("floatingRecDesc")
other_rec_desc = tag.find("otherRecDesc")
# Fixed receive leg
fixed_rate_receive = None
fixed_amount_receive = None
fixed_currency_receive = None
if fixed_rec_desc:
fixed_rate_receive = optional_decimal_attr(fixed_rec_desc, "fixedRt")
fixed_amount_receive = optional_decimal_attr(fixed_rec_desc, "amount")
fixed_currency_receive = fixed_rec_desc.attrs.get("curCd")
# Floating receive leg
floating_index_receive = None
floating_spread_receive = None
floating_amount_receive = None
floating_currency_receive = None
floating_tenor_receive = None
floating_tenor_unit_receive = None
floating_reset_date_tenor_receive = None
floating_reset_date_unit_receive = None
if floating_rec_desc:
floating_index_receive = floating_rec_desc.attrs.get("floatingRtIndex")
floating_spread_receive = optional_decimal_attr(floating_rec_desc, "floatingRtSpread")
floating_amount_receive = optional_decimal_attr(floating_rec_desc, "pmntAmt")
floating_currency_receive = floating_rec_desc.attrs.get("curCd")
# Rate reset tenors for receive leg
rate_reset_tenors = floating_rec_desc.find("rtResetTenors")
if rate_reset_tenors:
rate_reset_tenor = rate_reset_tenors.find("rtResetTenor")
if rate_reset_tenor:
floating_tenor_receive = rate_reset_tenor.attrs.get("rateTenor")
floating_tenor_unit_receive = rate_reset_tenor.attrs.get("rateTenorUnit")
floating_reset_date_tenor_receive = rate_reset_tenor.attrs.get("resetDt")
floating_reset_date_unit_receive = rate_reset_tenor.attrs.get("resetDtUnit")
# Other receive leg
other_description_receive = None
other_type_receive = None
if other_rec_desc:
other_type_receive = other_rec_desc.attrs.get("fixedOrFloating")
if other_type_receive == "Other":
other_description_receive = other_rec_desc.text
else:
other_description_receive = other_type_receive
# DIRECTIONAL PAYMENT LEG PARSING
fixed_pmnt_desc = tag.find("fixedPmntDesc")
floating_pmnt_desc = tag.find("floatingPmntDesc")
other_pmnt_desc = tag.find("otherPmntDesc")
# Fixed payment leg
fixed_rate_pay = None
fixed_amount_pay = None
fixed_currency_pay = None
if fixed_pmnt_desc:
fixed_rate_pay = optional_decimal_attr(fixed_pmnt_desc, "fixedRt")
fixed_amount_pay = optional_decimal_attr(fixed_pmnt_desc, "amount")
fixed_currency_pay = fixed_pmnt_desc.attrs.get("curCd")
# Floating payment leg
floating_index_pay = None
floating_spread_pay = None
floating_amount_pay = None
floating_currency_pay = None
floating_tenor_pay = None
floating_tenor_unit_pay = None
floating_reset_date_tenor_pay = None
floating_reset_date_unit_pay = None
if floating_pmnt_desc:
floating_index_pay = floating_pmnt_desc.attrs.get("floatingRtIndex")
floating_spread_pay = optional_decimal_attr(floating_pmnt_desc, "floatingRtSpread")
floating_amount_pay = optional_decimal_attr(floating_pmnt_desc, "pmntAmt")
floating_currency_pay = floating_pmnt_desc.attrs.get("curCd")
# Rate reset tenors for payment leg
rate_reset_tenors = floating_pmnt_desc.find("rtResetTenors")
if rate_reset_tenors:
rate_reset_tenor = rate_reset_tenors.find("rtResetTenor")
if rate_reset_tenor:
floating_tenor_pay = rate_reset_tenor.attrs.get("rateTenor")
floating_tenor_unit_pay = rate_reset_tenor.attrs.get("rateTenorUnit")
floating_reset_date_tenor_pay = rate_reset_tenor.attrs.get("resetDt")
floating_reset_date_unit_pay = rate_reset_tenor.attrs.get("resetDtUnit")
# Other payment leg
other_description_pay = None
other_type_pay = None
if other_pmnt_desc:
other_type_pay = other_pmnt_desc.attrs.get("fixedOrFloating")
if other_type_pay == "Other":
other_description_pay = other_pmnt_desc.text
else:
other_description_pay = other_type_pay
return cls(
# Basic info
counterparty_name=counterparty_name,
counterparty_lei=counterparty_lei,
notional_amount=optional_decimal(tag, "notionalAmt"),
currency=child_text(tag, "curCd"),
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
termination_date=child_text(tag, "terminationDt"),
# Upfront payment/receipt info
upfront_payment=optional_decimal(tag, "upfrontPmnt"),
payment_currency=child_text(tag, "pmntCurCd"),
upfront_receipt=optional_decimal(tag, "upfrontRcpt"),
receipt_currency=child_text(tag, "rcptCurCd"),
reference_entity_name=ref_entity_name,
reference_entity_title=ref_entity_title,
reference_entity_cusip=ref_entity_cusip,
reference_entity_isin=ref_entity_isin,
reference_entity_ticker=ref_entity_ticker,
swap_flag=child_text(tag, "swapFlag"),
# Additional info from derivAddlInfo
deriv_addl_name=deriv_addl_name,
deriv_addl_lei=deriv_addl_lei,
deriv_addl_title=deriv_addl_title,
deriv_addl_cusip=deriv_addl_cusip,
deriv_addl_identifier=deriv_addl_identifier,
deriv_addl_identifier_type=deriv_addl_identifier_type,
deriv_addl_balance=deriv_addl_balance,
deriv_addl_units=deriv_addl_units,
deriv_addl_desc_units=deriv_addl_desc_units,
deriv_addl_currency=deriv_addl_currency,
deriv_addl_value_usd=deriv_addl_value_usd,
deriv_addl_pct_val=deriv_addl_pct_val,
deriv_addl_asset_cat=deriv_addl_asset_cat,
deriv_addl_issuer_cat=deriv_addl_issuer_cat,
deriv_addl_inv_country=deriv_addl_inv_country,
# RECEIVE LEG
fixed_rate_receive=fixed_rate_receive,
fixed_amount_receive=fixed_amount_receive,
fixed_currency_receive=fixed_currency_receive,
floating_index_receive=floating_index_receive,
floating_spread_receive=floating_spread_receive,
floating_amount_receive=floating_amount_receive,
floating_currency_receive=floating_currency_receive,
floating_tenor_receive=floating_tenor_receive,
floating_tenor_unit_receive=floating_tenor_unit_receive,
floating_reset_date_tenor_receive=floating_reset_date_tenor_receive,
floating_reset_date_unit_receive=floating_reset_date_unit_receive,
other_description_receive=other_description_receive,
other_type_receive=other_type_receive,
# PAYMENT LEG
fixed_rate_pay=fixed_rate_pay,
fixed_amount_pay=fixed_amount_pay,
fixed_currency_pay=fixed_currency_pay,
floating_index_pay=floating_index_pay,
floating_spread_pay=floating_spread_pay,
floating_amount_pay=floating_amount_pay,
floating_currency_pay=floating_currency_pay,
floating_tenor_pay=floating_tenor_pay,
floating_tenor_unit_pay=floating_tenor_unit_pay,
floating_reset_date_tenor_pay=floating_reset_date_tenor_pay,
floating_reset_date_unit_pay=floating_reset_date_unit_pay,
other_description_pay=other_description_pay,
other_type_pay=other_type_pay
)
class FutureDerivative(BaseModel):
counterparty_name: Optional[str]
counterparty_lei: Optional[str]
payoff_profile: Optional[str]
expiration_date: Optional[str]
notional_amount: Optional[Decimal]
currency: Optional[str]
unrealized_appreciation: Optional[Decimal]
reference_entity_name: Optional[str]
reference_entity_title: Optional[str]
# Identifiers
reference_entity_cusip: Optional[str]
reference_entity_isin: Optional[str]
reference_entity_ticker: Optional[str]
reference_entity_other_id: Optional[str]
reference_entity_other_id_type: Optional[str]
@classmethod
def from_xml(cls, tag):
if tag and tag.name == "futrDeriv":
counterparties = tag.find("counterparties")
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
# Get reference instrument info
ref_entity_name = None
ref_entity_title = None
ref_entity_cusip = None
ref_entity_isin = None
ref_entity_ticker = None
ref_entity_other_id = None
ref_entity_other_id_type = None
desc_ref = tag.find("descRefInstrmnt")
if desc_ref:
other_ref = desc_ref.find("otherRefInst")
if other_ref:
ref_entity_name = child_text(other_ref, "issuerName")
ref_entity_title = child_text(other_ref, "issueTitle")
# Parse identifiers
identifiers = other_ref.find("identifiers")
if identifiers:
cusip_tag = identifiers.find("cusip")
if cusip_tag:
ref_entity_cusip = cusip_tag.attrs.get("value")
isin_tag = identifiers.find("isin")
if isin_tag:
ref_entity_isin = isin_tag.attrs.get("value")
ticker_tag = identifiers.find("ticker")
if ticker_tag:
ref_entity_ticker = ticker_tag.attrs.get("value")
other_tag = identifiers.find("other")
if other_tag:
ref_entity_other_id = other_tag.attrs.get("value")
ref_entity_other_id_type = other_tag.attrs.get("otherDesc")
return cls(
counterparty_name=counterparty_name,
counterparty_lei=counterparty_lei,
payoff_profile=child_text(tag, "payOffProf"),
expiration_date=child_text(tag, "expDate"),
notional_amount=optional_decimal(tag, "notionalAmt"),
currency=child_text(tag, "curCd"),
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
reference_entity_name=ref_entity_name,
reference_entity_title=ref_entity_title,
reference_entity_cusip=ref_entity_cusip,
reference_entity_isin=ref_entity_isin,
reference_entity_ticker=ref_entity_ticker,
reference_entity_other_id=ref_entity_other_id,
reference_entity_other_id_type=ref_entity_other_id_type
)
class SwaptionDerivative(BaseModel):
"""Swaption derivative (SWO) - option on a swap"""
counterparty_name: Optional[str]
counterparty_lei: Optional[str]
put_or_call: Optional[str]
written_or_purchased: Optional[str]
share_number: Optional[Decimal]
exercise_price: Optional[Decimal]
exercise_price_currency: Optional[str]
expiration_date: Optional[str]
delta: Optional[Union[Decimal, str]] # Can be numeric or 'XXXX'
unrealized_appreciation: Optional[Decimal]
# The underlying swap
nested_swap: Optional['SwapDerivative']
@classmethod
def from_xml(cls, tag):
if tag and tag.name == "optionSwaptionWarrantDeriv":
counterparties = tag.find("counterparties")
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
# Parse nested swap from descRefInstrmnt > nestedDerivInfo
nested_swap = None
desc_ref = tag.find("descRefInstrmnt")
if desc_ref:
nested_deriv_info = desc_ref.find("nestedDerivInfo")
if nested_deriv_info:
swap_tag = nested_deriv_info.find("swapDeriv")
if swap_tag:
nested_swap = SwapDerivative.from_xml(swap_tag)
return cls(
counterparty_name=counterparty_name,
counterparty_lei=counterparty_lei,
put_or_call=child_text(tag, "putOrCall"),
written_or_purchased=child_text(tag, "writtenOrPur"),
share_number=optional_decimal(tag, "shareNo"),
exercise_price=optional_decimal(tag, "exercisePrice"),
exercise_price_currency=child_text(tag, "exercisePriceCurCd"),
expiration_date=child_text(tag, "expDt"),
delta=child_text(tag, "delta"),
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
nested_swap=nested_swap
)
class OptionDerivative(BaseModel):
"""Option derivative (OPT) - can have nested forward, future, or other derivatives"""
counterparty_name: Optional[str]
counterparty_lei: Optional[str]
put_or_call: Optional[str]
written_or_purchased: Optional[str]
share_number: Optional[Decimal]
exercise_price: Optional[Decimal]
exercise_price_currency: Optional[str]
expiration_date: Optional[str]
delta: Optional[Union[Decimal, str]] # Can be numeric or 'XXXX'
unrealized_appreciation: Optional[Decimal]
# Reference entity (for options on individual securities)
reference_entity_name: Optional[str]
reference_entity_title: Optional[str]
reference_entity_cusip: Optional[str]
reference_entity_isin: Optional[str]
reference_entity_ticker: Optional[str]
reference_entity_other_id: Optional[str]
reference_entity_other_id_type: Optional[str]
# Index reference (for options on indices like S&P 500)
index_name: Optional[str]
index_identifier: Optional[str]
# For options with nested derivatives
nested_forward: Optional['ForwardDerivative']
nested_future: Optional['FutureDerivative']
nested_swap: Optional['SwapDerivative']
@classmethod
def from_xml(cls, tag):
if tag and tag.name == "optionSwaptionWarrantDeriv":
counterparties = tag.find("counterparties")
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
# Get reference instrument info
ref_entity_name = None
ref_entity_title = None
ref_entity_cusip = None
ref_entity_isin = None
ref_entity_ticker = None
ref_entity_other_id = None
ref_entity_other_id_type = None
index_name = None
index_identifier = None
nested_forward = None
desc_ref = tag.find("descRefInstrmnt")
if desc_ref:
# Check for nested derivative info (e.g., option on forward, future, swap)
nested_deriv_info = desc_ref.find("nestedDerivInfo")
nested_future = None
nested_swap_nested = None
if nested_deriv_info:
# Parse any type of nested derivative
fwd_tag = nested_deriv_info.find("fwdDeriv")
if fwd_tag:
nested_forward = ForwardDerivative.from_xml(fwd_tag)
fut_tag = nested_deriv_info.find("futrDeriv")
if fut_tag:
nested_future = FutureDerivative.from_xml(fut_tag)
swap_tag = nested_deriv_info.find("swapDeriv")
if swap_tag:
nested_swap_nested = SwapDerivative.from_xml(swap_tag)
else:
# Regular option - parse reference instrument
# Check for index reference first
index_basket = desc_ref.find("indexBasketInfo")
if index_basket:
index_name = child_text(index_basket, "indexName")
index_identifier = child_text(index_basket, "indexIdentifier")
# Then check for other reference instrument
other_ref = desc_ref.find("otherRefInst")
if other_ref:
ref_entity_name = child_text(other_ref, "issuerName")
ref_entity_title = child_text(other_ref, "issueTitle")
identifiers = other_ref.find("identifiers")
if identifiers:
cusip_tag = identifiers.find("cusip")
if cusip_tag:
ref_entity_cusip = cusip_tag.attrs.get("value")
isin_tag = identifiers.find("isin")
if isin_tag:
ref_entity_isin = isin_tag.attrs.get("value")
ticker_tag = identifiers.find("ticker")
if ticker_tag:
ref_entity_ticker = ticker_tag.attrs.get("value")
other_tag = identifiers.find("other")
if other_tag:
ref_entity_other_id = other_tag.attrs.get("value")
ref_entity_other_id_type = other_tag.attrs.get("otherDesc")
return cls(
counterparty_name=counterparty_name,
counterparty_lei=counterparty_lei,
put_or_call=child_text(tag, "putOrCall"),
written_or_purchased=child_text(tag, "writtenOrPur"),
share_number=optional_decimal(tag, "shareNo"),
exercise_price=optional_decimal(tag, "exercisePrice"),
exercise_price_currency=child_text(tag, "exercisePriceCurCd"),
expiration_date=child_text(tag, "expDt"),
delta=child_text(tag, "delta"),
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
reference_entity_name=ref_entity_name,
reference_entity_title=ref_entity_title,
reference_entity_cusip=ref_entity_cusip,
reference_entity_isin=ref_entity_isin,
reference_entity_ticker=ref_entity_ticker,
reference_entity_other_id=ref_entity_other_id,
reference_entity_other_id_type=ref_entity_other_id_type,
index_name=index_name,
index_identifier=index_identifier,
nested_forward=nested_forward,
nested_future=nested_future,
nested_swap=nested_swap_nested
)
class DerivativeInfo(BaseModel):
derivative_category: Optional[str] # FWD, SWP, FUT, OPT, SWO, WAR
forward_derivative: Optional[ForwardDerivative]
swap_derivative: Optional[SwapDerivative]
future_derivative: Optional[FutureDerivative]
option_derivative: Optional[OptionDerivative]
swaption_derivative: Optional[SwaptionDerivative]
@classmethod
def from_xml(cls, tag):
if tag and tag.name == "derivativeInfo":
# Use direct children only to avoid finding nested derivatives
fwd_tag = tag.find("fwdDeriv", recursive=False)
swap_tag = tag.find("swapDeriv", recursive=False)
future_tag = tag.find("futrDeriv", recursive=False)
option_tag = tag.find("optionSwaptionWarrantDeriv", recursive=False)
deriv_cat = None
option_deriv = None
swaption_deriv = None
if fwd_tag:
deriv_cat = fwd_tag.attrs.get("derivCat")
elif swap_tag:
deriv_cat = swap_tag.attrs.get("derivCat")
elif future_tag:
deriv_cat = future_tag.attrs.get("derivCat")
elif option_tag:
deriv_cat = option_tag.attrs.get("derivCat")
# Determine if it's a swaption (SWO) or regular option (OPT/WAR)
if deriv_cat == "SWO":
swaption_deriv = SwaptionDerivative.from_xml(option_tag)
else:
option_deriv = OptionDerivative.from_xml(option_tag)
return cls(
derivative_category=deriv_cat,
forward_derivative=ForwardDerivative.from_xml(fwd_tag) if fwd_tag else None,
swap_derivative=SwapDerivative.from_xml(swap_tag) if swap_tag else None,
future_derivative=FutureDerivative.from_xml(future_tag) if future_tag else None,
option_derivative=option_deriv,
swaption_derivative=swaption_deriv
)

View File

@@ -0,0 +1,565 @@
import logging
import urllib.parse
from dataclasses import dataclass
from functools import lru_cache
from io import StringIO
from typing import Dict, List, Optional, Set, Tuple, Union
import pandas as pd
from bs4 import BeautifulSoup
from edgar.httprequests import download_text
# Base URL for resolving relative links
SEC_BASE_URL = "https://www.sec.gov"
log = logging.getLogger(__name__)
# Data classes for our normalized data model
@dataclass
class FundCompanyRecord:
cik: str
name: str
entity_org_type: str
file_number: str
address_1: Optional[str] = None
address_2: Optional[str] = None
city: Optional[str] = None
state: Optional[str] = None
zip_code: Optional[str] = None
@dataclass
class FundSeriesRecord:
series_id: str
name: str
cik: str # Parent company CIK
@dataclass
class FundClassRecord:
class_id: str
name: str
ticker: Optional[str]
series_id: str # Parent series ID
class FundReferenceData:
"""
A memory-efficient container for fund reference data that provides fast lookups
while minimizing data duplication.
Internally, this class normalizes the data into separate tables for companies,
series, and classes, with relationships maintained through IDs.
Lookups are accelerated through indices on common lookup patterns
like ticker, CIK, series ID, and class ID.
"""
def __init__(self, data: pd.DataFrame = None):
"""
Initialize with a DataFrame of fund data.
The DataFrame should have columns similar to the SEC fund data file:
- 'Reporting File Number', 'CIK Number', 'Entity Name', 'Entity Org Type',
- 'Series ID', 'Series Name', 'Class ID', 'Class Name', 'Class Ticker', etc.
Args:
data: DataFrame containing fund reference data
"""
# Initialize empty containers
self._companies: Dict[str, FundCompanyRecord] = {}
self._series: Dict[str, FundSeriesRecord] = {}
self._classes: Dict[str, FundClassRecord] = {}
# Indexes for fast lookups
self._ticker_to_class: Dict[str, str] = {} # ticker -> class_id
self._series_by_company: Dict[str, Set[str]] = {} # cik -> set of series_ids
self._classes_by_series: Dict[str, Set[str]] = {} # series_id -> set of class_ids
# Load data if provided
if data is not None:
self._load_data(data)
def _load_data(self, data: pd.DataFrame):
"""
Load and normalize data from a DataFrame into the internal data structures.
Args:
data: DataFrame containing fund reference data
"""
# Standardize column names if needed
col_map = {
'CIK Number': 'cik',
'Entity Name': 'company_name',
'Entity Org Type': 'entity_org_type',
'Reporting File Number': 'file_number',
'Series ID': 'series_id',
'Series Name': 'series_name',
'Class ID': 'class_id',
'Class Name': 'class_name',
'Class Ticker': 'ticker',
'Address_1': 'address_1',
'Address_2': 'address_2',
'City': 'city',
'State': 'state',
'Zip Code': 'zip_code'
}
# Rename columns if they don't match our expected names
df = data.copy()
rename_dict = {k: v for k, v in col_map.items() if k in df.columns and v not in df.columns}
if rename_dict:
df = df.rename(columns=rename_dict)
# Process companies (distinct CIKs)
company_df = df.drop_duplicates(subset=['cik'])[
['cik', 'company_name', 'entity_org_type', 'file_number',
'address_1', 'address_2', 'city', 'state', 'zip_code']
].fillna('')
for _, row in company_df.iterrows():
cik = str(row['cik']).zfill(10) # Ensure CIK is properly formatted
self._companies[cik] = FundCompanyRecord(
cik=cik,
name=row['company_name'],
entity_org_type=row['entity_org_type'],
file_number=row['file_number'],
address_1=row['address_1'] if row['address_1'] else None,
address_2=row['address_2'] if row['address_2'] else None,
city=row['city'] if row['city'] else None,
state=row['state'] if row['state'] else None,
zip_code=row['zip_code'] if row['zip_code'] else None
)
# Initialize empty set for series in this company
self._series_by_company[cik] = set()
# Process series (distinct series IDs)
series_df = df.dropna(subset=['series_id']).drop_duplicates(subset=['series_id'])[
['series_id', 'series_name', 'cik']
]
for _, row in series_df.iterrows():
series_id = row['series_id']
cik = str(row['cik']).zfill(10)
# Skip if parent company doesn't exist
if cik not in self._companies:
continue
self._series[series_id] = FundSeriesRecord(
series_id=series_id,
name=row['series_name'],
cik=cik
)
# Add to company's series set
self._series_by_company[cik].add(series_id)
# Initialize empty set for classes in this series
self._classes_by_series[series_id] = set()
# Process classes (distinct class IDs)
class_df = df.dropna(subset=['class_id']).drop_duplicates(subset=['class_id'])[
['class_id', 'class_name', 'ticker', 'series_id']
]
for _, row in class_df.iterrows():
class_id = row['class_id']
series_id = row['series_id']
# Skip if parent series doesn't exist
if series_id not in self._series:
continue
# Handle potentially missing ticker
ticker = row['ticker'] if pd.notna(row['ticker']) else None
self._classes[class_id] = FundClassRecord(
class_id=class_id,
name=row['class_name'],
ticker=ticker,
series_id=series_id
)
# Add to series' classes set
self._classes_by_series[series_id].add(class_id)
# Add ticker to lookup index if available
if ticker:
self._ticker_to_class[ticker] = class_id
@property
def companies_count(self) -> int:
"""Get the total number of fund companies."""
return len(self._companies)
@property
def series_count(self) -> int:
"""Get the total number of fund series."""
return len(self._series)
@property
def classes_count(self) -> int:
"""Get the total number of fund classes."""
return len(self._classes)
def get_company(self, cik: str) -> Optional[FundCompanyRecord]:
"""
Get company information by CIK.
Args:
cik: Company CIK
Returns:
FundCompanyRecord or None if not found
"""
# Ensure consistent formatting of CIK
cik = str(cik).zfill(10)
return self._companies.get(cik)
def get_series(self, series_id: str) -> Optional[FundSeriesRecord]:
"""
Get series information by series ID.
Args:
series_id: Series ID
Returns:
FundSeriesRecord or None if not found
"""
return self._series.get(series_id)
def get_class(self, class_id: str) -> Optional[FundClassRecord]:
"""
Get class information by class ID.
Args:
class_id: Class ID
Returns:
FundClassRecord or None if not found
"""
return self._classes.get(class_id)
def get_class_by_ticker(self, ticker: str) -> Optional[FundClassRecord]:
"""
Get class information by ticker symbol.
Args:
ticker: Ticker symbol
Returns:
FundClassRecord or None if not found
"""
class_id = self._ticker_to_class.get(ticker)
if class_id:
return self._classes.get(class_id)
return None
def get_series_for_company(self, cik: str) -> List[FundSeriesRecord]:
"""
Get all series for a company.
Args:
cik: Company CIK
Returns:
List of FundSeriesRecord objects
"""
cik = str(cik).zfill(10)
series_ids = self._series_by_company.get(cik, set())
return [self._series[s_id] for s_id in series_ids if s_id in self._series]
def get_classes_for_series(self, series_id: str) -> List[FundClassRecord]:
"""
Get all classes for a series.
Args:
series_id: Series ID
Returns:
List of FundClassRecord objects
"""
class_ids = self._classes_by_series.get(series_id, set())
return [self._classes[c_id] for c_id in class_ids if c_id in self._classes]
def find_by_name(self, name_fragment: str, search_type: str = 'company') -> List[Union[FundCompanyRecord, FundSeriesRecord, FundClassRecord]]:
"""
Find entities containing the name fragment.
Args:
name_fragment: Case-insensitive fragment to search for
search_type: Type of entity to search ('company', 'series', or 'class')
Returns:
List of matching records
"""
name_fragment = name_fragment.lower()
if search_type == 'company':
return [company for company in self._companies.values()
if name_fragment in company.name.lower()]
elif search_type == 'series':
return [series for series in self._series.values()
if name_fragment in series.name.lower()]
elif search_type == 'class':
return [cls for cls in self._classes.values()
if name_fragment in cls.name.lower()]
else:
raise ValueError(f"Invalid search_type: {search_type}")
def get_company_for_series(self, series_id: str) -> Optional[FundCompanyRecord]:
"""
Get the parent company for a series.
Args:
series_id: Series ID
Returns:
FundCompanyRecord or None if not found
"""
series = self._series.get(series_id)
if series:
return self._companies.get(series.cik)
return None
def get_series_for_class(self, class_id: str) -> Optional[FundSeriesRecord]:
"""
Get the parent series for a class.
Args:
class_id: Class ID
Returns:
FundSeriesRecord or None if not found
"""
class_record = self._classes.get(class_id)
if class_record:
return self._series.get(class_record.series_id)
return None
def get_company_for_class(self, class_id: str) -> Optional[FundCompanyRecord]:
"""
Get the parent company for a class (traversing through series).
Args:
class_id: Class ID
Returns:
FundCompanyRecord or None if not found
"""
series = self.get_series_for_class(class_id)
if series:
return self._companies.get(series.cik)
return None
def get_hierarchical_info(self, identifier: str) -> Tuple[Optional[FundCompanyRecord], Optional[FundSeriesRecord], Optional[FundClassRecord]]:
"""
Get the complete hierarchy for an identifier (CIK, series ID, class ID, or ticker).
Args:
identifier: Any identifier (CIK, series ID, class ID, or ticker)
Returns:
Tuple of (company, series, class) records, with None for levels not applicable
"""
company = None
series = None
class_record = None
# Check if it's a CIK (10 digits with leading zeros)
if isinstance(identifier, str) and (identifier.isdigit() or identifier.startswith('0')):
cik = str(identifier).zfill(10)
company = self.get_company(cik)
if company:
return company, None, None
# Check if it's a series ID (starts with S)
if isinstance(identifier, str) and identifier.upper().startswith('S'):
series = self.get_series(identifier)
if series:
company = self.get_company(series.cik)
return company, series, None
# Check if it's a class ID (starts with C)
if isinstance(identifier, str) and identifier.upper().startswith('C'):
class_record = self.get_class(identifier)
if class_record:
series = self.get_series(class_record.series_id)
if series:
company = self.get_company(series.cik)
return company, series, class_record
# Check if it's a ticker
class_record = self.get_class_by_ticker(identifier)
if class_record:
series = self.get_series(class_record.series_id)
if series:
company = self.get_company(series.cik)
return company, series, class_record
# Nothing found
return None, None, None
def to_dataframe(self) -> pd.DataFrame:
"""
Convert the normalized data back to a flat DataFrame.
Returns:
DataFrame containing all fund data
"""
records = []
for _class_id, class_record in self._classes.items():
series_id = class_record.series_id
series_record = self._series.get(series_id)
if not series_record:
continue
cik = series_record.cik
company_record = self._companies.get(cik)
if not company_record:
continue
records.append({
'cik': company_record.cik,
'company_name': company_record.name,
'entity_org_type': company_record.entity_org_type,
'file_number': company_record.file_number,
'series_id': series_record.series_id,
'series_name': series_record.name,
'class_id': class_record.class_id,
'class_name': class_record.name,
'ticker': class_record.ticker,
'address_1': company_record.address_1,
'address_2': company_record.address_2,
'city': company_record.city,
'state': company_record.state,
'zip_code': company_record.zip_code
})
return pd.DataFrame(records)
def _find_latest_fund_data_url():
"""Find the URL of the latest fund data CSV file from the SEC website.
The listing looks like this:
| File | Format | Size |
|------------------------------|--------|------|
|[2024](link) Updated 6/5/24 | XML | 1.2 MB|
|[2024](link) Updated 6/5/24 | CSV | 1.2 MB|
|[2023](link) Updated 6/5/24 | XML | 1.2 MB|
|[2023](link) Updated 6/5/24 | CSV | 1.2 MB|
"""
list_url = "https://www.sec.gov/about/opendatasetsshtmlinvestment_company"
html_content = download_text(list_url)
soup = BeautifulSoup(html_content, 'html.parser')
# Find all tables on the page
tables = soup.find_all('table')
for table in tables:
# Look for a table with a header row containing 'File', 'Format', 'Size'
headers = [th.get_text(strip=True) for th in table.find_all('th')]
if 'File' in headers and 'Format' in headers and 'Size' in headers:
# Find the index of the Format and File columns
try:
format_index = headers.index('Format')
file_index = headers.index('File')
except ValueError:
continue # Headers not found in the expected order
# Iterate through the rows of this table
for row in table.find_all('tr'):
cells = row.find_all('td')
if len(cells) > max(format_index, file_index):
# Check if the format is CSV
format_text = cells[format_index].get_text(strip=True)
if 'CSV' in format_text:
# Find the link in the File column
link_tag = cells[file_index].find('a')
if link_tag and 'href' in link_tag.attrs:
relative_url = link_tag['href']
# Construct the absolute URL
absolute_url = urllib.parse.urljoin(SEC_BASE_URL, relative_url)
return absolute_url
# If CSV not found in this suitable table, continue to next table just in case
# but typically the first one found is the correct one.
# If no suitable table or CSV link is found after checking all tables
raise ValueError("No fund data CSV file found on the SEC website.")
@lru_cache(maxsize=1)
def get_bulk_fund_data() -> pd.DataFrame:
"""
Downloads the latest Investment Company tickers and CIKs from the SEC website.
These are the columns
['Reporting File Number', 'CIK Number', 'Entity Name', 'Entity Org Type',
'Series ID', 'Series Name', 'Class ID', 'Class Name', 'Class Ticker',
'Address_1', 'Address_2', 'City', 'State', 'Zip Code']
Returns:
pd.DataFrame: A DataFrame containing the fund ticker data.
Columns typically include 'Ticker', 'CIK', 'Series ID', 'Class ID', etc.
"""
# Find the latest fund data file URL
csv_url = _find_latest_fund_data_url()
raw_data = download_text(csv_url)
fund_data = pd.read_csv(StringIO(raw_data))
return fund_data
@lru_cache(maxsize=1)
def get_fund_reference_data() -> FundReferenceData:
"""
Get a normalized reference data object for all funds, series, and classes.
Returns:
FundReferenceData: An object providing efficient lookups for fund entities
"""
fund_data = get_bulk_fund_data()
return FundReferenceData(fund_data)
if __name__ == "__main__":
try:
# Get the fund reference data
fund_ref_data = get_fund_reference_data()
# Print summary statistics
# Show sample lookups
# Look up a well-known fund
vfinx_class = fund_ref_data.get_class_by_ticker('VFIAX')
if vfinx_class:
# Get parent series
vfinx_series = fund_ref_data.get_series_for_class(vfinx_class.class_id)
if vfinx_series:
# Get all classes in the series
series_classes = fund_ref_data.get_classes_for_series(vfinx_series.series_id)
for _i, _cls in enumerate(series_classes[:5]):
pass
if len(series_classes) > 5:
pass
# Get parent company
vanguard = fund_ref_data.get_company_for_series(vfinx_series.series_id)
if vanguard:
# Get all series for the company
company_series = fund_ref_data.get_series_for_company(vanguard.cik)
except Exception:
pass

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,111 @@
"""
Series resolution service for ETF/Fund ticker-to-series mapping.
This module provides services for resolving ticker symbols to series IDs,
addressing GitHub issue #417.
"""
import logging
from dataclasses import dataclass
from functools import lru_cache
from typing import List, Optional
from edgar.core import log
__all__ = ['SeriesInfo', 'TickerSeriesResolver']
@dataclass
class SeriesInfo:
"""Information about a fund series"""
series_id: str
series_name: Optional[str]
ticker: str
class_id: Optional[str] = None
class_name: Optional[str] = None
class TickerSeriesResolver:
"""Handles ticker to series ID resolution with caching."""
@staticmethod
@lru_cache(maxsize=1000)
def resolve_ticker_to_series(ticker: str) -> List[SeriesInfo]:
"""Resolve ticker to all associated series with ETF fallback."""
if not ticker:
return []
try:
# First try mutual fund data (original behavior)
from edgar.reference.tickers import get_mutual_fund_tickers
mf_data = get_mutual_fund_tickers()
# Find all matches for this ticker
matches = mf_data[mf_data['ticker'].str.upper() == ticker.upper()]
series_list = []
for _, row in matches.iterrows():
series_info = SeriesInfo(
series_id=row['seriesId'],
series_name=None, # Not available in the ticker data
ticker=row['ticker'],
class_id=row['classId']
)
series_list.append(series_info)
# If found in mutual fund data, return those results
if series_list:
return series_list
# NEW: Fallback to company ticker data for ETFs
log.debug(f"Ticker {ticker} not found in mutual fund data, trying company data...")
from edgar.reference.tickers import find_cik, get_company_tickers
cik = find_cik(ticker)
if cik:
# Found as company ticker - likely an ETF
company_data = get_company_tickers()
company_matches = company_data[
(company_data['ticker'].str.upper() == ticker.upper()) &
(company_data['cik'] == cik)
]
if len(company_matches) > 0:
company_match = company_matches.iloc[0]
# Create synthetic series info for ETF
etf_series = SeriesInfo(
series_id=f"ETF_{cik}", # Synthetic series ID for ETFs
series_name=company_match['company'], # Company name as series name
ticker=company_match['ticker'],
class_id=f"ETF_CLASS_{cik}" # Synthetic class ID
)
log.debug(f"Resolved {ticker} as ETF company with CIK {cik}")
return [etf_series]
log.debug(f"Ticker {ticker} not found in either mutual fund or company data")
return []
except Exception as e:
log.warning(f"Error resolving ticker {ticker} to series: {e}")
return []
@staticmethod
def get_primary_series(ticker: str) -> Optional[str]:
"""Get the primary/most relevant series for a ticker."""
series_list = TickerSeriesResolver.resolve_ticker_to_series(ticker)
if not series_list:
return None
# If only one series, return it
if len(series_list) == 1:
return series_list[0].series_id
# If multiple series, return the first one (could be enhanced with better logic)
return series_list[0].series_id
@staticmethod
def has_multiple_series(ticker: str) -> bool:
"""Check if a ticker maps to multiple series."""
series_list = TickerSeriesResolver.resolve_ticker_to_series(ticker)
return len(series_list) > 1

View File

@@ -0,0 +1,106 @@
"""
13F filing module for investment funds.
This module provides classes and functions for working with 13F filings
that report investment fund portfolio holdings.
"""
import logging
import pandas as pd
# Define constants
THIRTEENF_FORMS = ['13F-HR', "13F-HR/A", "13F-NT", "13F-NT/A", "13F-CTR", "13F-CTR/A"]
log = logging.getLogger(__name__)
# We'll define these functions without directly importing them at the module level
# to avoid circular imports
def get_ThirteenF():
"""Dynamically import ThirteenF to avoid circular imports."""
from edgar.thirteenf import ThirteenF as OriginalThirteenF
return OriginalThirteenF
# Create property-like functions that provide lazy loading
def ThirteenF():
"""Get the ThirteenF class, dynamically importing it to avoid circular imports."""
return get_ThirteenF()
def get_thirteenf_portfolio(filing) -> pd.DataFrame:
"""
Extract portfolio holdings from a 13F filing.
Args:
filing: The 13F filing to extract data from
Returns:
DataFrame containing portfolio holdings
"""
try:
# Create a ThirteenF from the filing
thirteenf_class = get_ThirteenF()
thirteenf = thirteenf_class(filing, use_latest_period_of_report=True)
# Check if the filing has an information table
if not thirteenf.has_infotable():
log.info("Filing %s does not have an information table", filing.accession_no)
return pd.DataFrame()
# Extract the information table
infotable = thirteenf.infotable
if infotable is None:
log.warning("Could not extract information table from filing %s", filing.accession_no)
return pd.DataFrame()
# Convert to DataFrame
df = pd.DataFrame(infotable)
# Clean up and organize data
if not df.empty:
# Update column names for consistency
if 'nameOfIssuer' in df.columns:
df = df.rename(columns={
'nameOfIssuer': 'name',
'titleOfClass': 'title',
'cusip': 'cusip',
'value': 'value_usd',
'sshPrnamt': 'shares',
'sshPrnamtType': 'share_type',
'investmentDiscretion': 'investment_discretion',
'votingAuthority': 'voting_authority'
})
# Add ticker mapping if possible
try:
from edgar.reference import cusip_ticker_mapping
cusip_map = cusip_ticker_mapping(allow_duplicate_cusips=False)
df['ticker'] = df['cusip'].map(cusip_map.Ticker)
except Exception as e:
log.warning("Error adding ticker mappings: %s", e)
df['ticker'] = None
# Calculate percent of portfolio
if 'value_usd' in df.columns:
total_value = df['value_usd'].sum()
if total_value > 0:
df['pct_value'] = df['value_usd'] / total_value * 100
else:
df['pct_value'] = 0
# Sort by value
df = df.sort_values('value_usd', ascending=False).reset_index(drop=True)
return df
except Exception as e:
log.warning("Error extracting holdings from 13F filing: %s", e)
# Return empty DataFrame if extraction failed
return pd.DataFrame()
# Functions for export
__all__ = [
'ThirteenF',
'THIRTEENF_FORMS',
'get_thirteenf_portfolio',
]

View File

@@ -0,0 +1,110 @@
"""
Ticker resolution service for ETF/Fund holdings.
This module provides services for resolving ticker symbols from various identifiers
like CUSIP, ISIN, and company names, addressing GitHub issue #418.
"""
import logging
from dataclasses import dataclass
from functools import lru_cache
from typing import Optional
from edgar.core import log
from edgar.reference.tickers import get_ticker_from_cusip
__all__ = ['TickerResolutionResult', 'TickerResolutionService']
@dataclass
class TickerResolutionResult:
"""Result of ticker resolution attempt"""
ticker: Optional[str]
method: str # 'direct', 'cusip', 'failed'
confidence: float # 0.0 to 1.0
error_message: Optional[str] = None
@property
def success(self) -> bool:
return self.ticker is not None and self.confidence > 0.0
class TickerResolutionService:
"""Centralized service for resolving tickers from various identifiers"""
CONFIDENCE_SCORES = {
'direct': 1.0, # Direct from NPORT-P
'cusip': 0.85, # High confidence - official identifier
'isin': 0.75, # Good confidence - international identifier
'name': 0.5, # Lower confidence - fuzzy matching
'failed': 0.0 # No resolution
}
@staticmethod
@lru_cache(maxsize=1000)
def resolve_ticker(ticker: Optional[str] = None,
cusip: Optional[str] = None,
isin: Optional[str] = None,
company_name: Optional[str] = None) -> TickerResolutionResult:
"""
Main resolution entry point
Args:
ticker: Direct ticker from NPORT-P
cusip: CUSIP identifier
isin: ISIN identifier (future use)
company_name: Company name (future use)
Returns:
TickerResolutionResult with ticker and metadata
"""
# 1. Direct ticker resolution
if ticker and ticker.strip():
return TickerResolutionResult(
ticker=ticker.strip().upper(),
method='direct',
confidence=TickerResolutionService.CONFIDENCE_SCORES['direct']
)
# 2. CUSIP-based resolution
if cusip:
resolved_ticker = TickerResolutionService._resolve_via_cusip(cusip)
if resolved_ticker:
return TickerResolutionResult(
ticker=resolved_ticker,
method='cusip',
confidence=TickerResolutionService.CONFIDENCE_SCORES['cusip']
)
# 3. Future: ISIN-based resolution
# if isin:
# resolved_ticker = TickerResolutionService._resolve_via_isin(isin)
# ...
# 4. Future: Name-based resolution
# if company_name:
# resolved_ticker = TickerResolutionService._resolve_via_name(company_name)
# ...
return TickerResolutionResult(
ticker=None,
method='failed',
confidence=0.0,
error_message='No resolution methods succeeded'
)
@staticmethod
def _resolve_via_cusip(cusip: str) -> Optional[str]:
"""Resolve ticker using CUSIP mapping"""
try:
if not cusip or len(cusip.strip()) < 8:
return None
cusip = cusip.strip().upper()
ticker = get_ticker_from_cusip(cusip)
if ticker:
return ticker.upper()
except Exception as e:
log.warning(f"CUSIP ticker resolution failed for {cusip}: {e}")
return None