Initial commit
This commit is contained in:
130
venv/lib/python3.10/site-packages/edgar/funds/__init__.py
Normal file
130
venv/lib/python3.10/site-packages/edgar/funds/__init__.py
Normal file
@@ -0,0 +1,130 @@
|
||||
"""
|
||||
Investment funds package for EdgarTools.
|
||||
|
||||
This package provides comprehensive tools for working with investment funds,
|
||||
fund classes, series information, and portfolio holdings from SEC filings.
|
||||
|
||||
The primary classes follow the domain model design:
|
||||
- FundCompany: Represents the legal entity that manages funds (e.g., "Vanguard")
|
||||
- FundSeries: Represents a specific investment product/strategy (e.g., "Vanguard 500 Index Fund")
|
||||
- FundClass: Represents a specific share class with its own ticker (e.g., "Vanguard 500 Index Admiral Shares")
|
||||
|
||||
Key functions:
|
||||
- find_fund(): Smart factory that returns the appropriate entity based on any identifier
|
||||
- get_fund_company(): Get a fund company by CIK
|
||||
- get_fund_series(): Get a fund series by series ID
|
||||
- get_fund_class(): Get a fund class by ticker or class ID
|
||||
|
||||
This package provides a more organized, intuitive API for working with fund entities:
|
||||
- fund_entities.py: Defines the domain entities and access functions
|
||||
- data.py: Provides data access functions and implementations
|
||||
- reports.py: Handles fund reports like N-PORT filings
|
||||
"""
|
||||
|
||||
# Keep backward compatibility for now
|
||||
# Note: We don't import from reports and thirteenf modules directly here
|
||||
# to avoid circular imports. These will be imported directly by clients.
|
||||
from functools import lru_cache
|
||||
|
||||
from edgar.funds.core import (
|
||||
Fund,
|
||||
FundClass,
|
||||
FundCompany,
|
||||
FundSeries,
|
||||
find_fund,
|
||||
get_fund_class,
|
||||
get_fund_company,
|
||||
get_fund_series,
|
||||
)
|
||||
from edgar.funds.data import FundData, get_fund_information, is_fund_ticker, parse_fund_data, resolve_fund_identifier
|
||||
from edgar.funds.reports import NPORT_FORMS, CurrentMetric, FundReport, get_fund_portfolio_from_filing
|
||||
|
||||
|
||||
# Backward compatibility function for code that relies on the old API
|
||||
def get_fund_with_filings(identifier: str):
|
||||
"""
|
||||
Get fund with filings for backward compatibility.
|
||||
|
||||
This function is maintained for backward compatibility with the
|
||||
legacy funds.py module. New code should use:
|
||||
|
||||
- Fund.get_filings() to get filings for a fund
|
||||
- get_fund() factory function to create fund objects
|
||||
|
||||
Args:
|
||||
identifier: Fund identifier (class ID, series ID, or CIK)
|
||||
|
||||
Returns:
|
||||
Fund object with filings information
|
||||
"""
|
||||
import logging
|
||||
|
||||
from edgar.funds.data import direct_get_fund_with_filings
|
||||
|
||||
if identifier:
|
||||
try:
|
||||
result = direct_get_fund_with_filings(identifier)
|
||||
if result:
|
||||
return result
|
||||
except Exception as e:
|
||||
logging.warning("Error in get_fund_with_filings: %s", e)
|
||||
|
||||
# Create a minimal object with the expected interface as a last resort
|
||||
class MinimalFundInfo:
|
||||
def __init__(self, identifier):
|
||||
self.id = "C000000"
|
||||
self.name = f"Unknown Fund {identifier}"
|
||||
self.fund_cik = 0
|
||||
|
||||
return MinimalFundInfo(identifier or "Unknown")
|
||||
|
||||
# Define FundSeriesAndContracts for backward compatibility
|
||||
class FundSeriesAndContracts:
|
||||
"""
|
||||
Legacy series and contracts object that provides data on fund and classes.
|
||||
|
||||
This class is maintained for backward compatibility with the legacy funds.py module.
|
||||
It stores fund series and class information parsed from SEC filings in a DataFrame.
|
||||
|
||||
New code should use the Fund, FundClass, and FundSeries classes from edgar.funds.core
|
||||
which provide a more robust object model.
|
||||
"""
|
||||
def __init__(self, data=None):
|
||||
import pandas as pd
|
||||
self.data = data if data is not None else pd.DataFrame()
|
||||
|
||||
__all__ = [
|
||||
# Primary user-facing class
|
||||
'Fund',
|
||||
|
||||
# Domain entity classes
|
||||
'FundCompany',
|
||||
'FundSeries',
|
||||
'FundClass',
|
||||
|
||||
# Access functions
|
||||
'find_fund',
|
||||
'get_fund_company',
|
||||
'get_fund_series',
|
||||
'get_fund_class',
|
||||
|
||||
|
||||
# Data classes
|
||||
'FundData',
|
||||
'resolve_fund_identifier',
|
||||
|
||||
# Functions now implemented directly in the package
|
||||
'get_fund_information',
|
||||
'is_fund_ticker',
|
||||
'parse_fund_data',
|
||||
|
||||
# Portfolio and report functionality
|
||||
'FundReport',
|
||||
'CurrentMetric',
|
||||
'NPORT_FORMS',
|
||||
'get_fund_portfolio_from_filing',
|
||||
|
||||
# Legacy compatibility
|
||||
'get_fund_with_filings',
|
||||
'FundSeriesAndContracts',
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
582
venv/lib/python3.10/site-packages/edgar/funds/core.py
Normal file
582
venv/lib/python3.10/site-packages/edgar/funds/core.py
Normal file
@@ -0,0 +1,582 @@
|
||||
"""
|
||||
Core classes for working with investment funds.
|
||||
|
||||
This module provides the main classes used to interact with investment funds:
|
||||
- Fund: Represents an investment fund entity
|
||||
- FundClass: Represents a specific share class of a fund
|
||||
- FundSeries: Represents a fund series
|
||||
"""
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, List, Optional, Union, Dict, Any
|
||||
|
||||
from rich import box
|
||||
from rich.console import Group
|
||||
from rich.panel import Panel
|
||||
from rich.table import Table
|
||||
|
||||
from edgar.entity.core import Entity
|
||||
from edgar.richtools import repr_rich
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from edgar._filings import Filings
|
||||
from edgar.entity.data import EntityData
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
__all__ = ['Fund', 'FundCompany', 'FundClass', 'FundSeries', 'get_fund_company', 'get_fund_class', 'get_fund_series', 'find_fund']
|
||||
|
||||
|
||||
class FundCompany(Entity):
|
||||
"""
|
||||
Represents an investment fund that files with the SEC.
|
||||
|
||||
Provides fund-specific functionality like share classes, series information,
|
||||
portfolio holdings, etc.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
cik_or_identifier: Union[str, int],
|
||||
fund_name:str=None,
|
||||
all_series:Optional[List['FundSeries']] = None):
|
||||
# Import locally to avoid circular imports
|
||||
from edgar.funds.data import resolve_fund_identifier
|
||||
|
||||
# Handle fund-specific identifiers
|
||||
super().__init__(resolve_fund_identifier(cik_or_identifier))
|
||||
self._name = fund_name
|
||||
self.all_series:Optional[List['FundSeries']] = all_series or []
|
||||
self._cached_portfolio = None
|
||||
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""Get the name of the company."""
|
||||
return self._name or super().name
|
||||
|
||||
def list_series(self) -> List['FundSeries']:
|
||||
"""
|
||||
List all fund series associated with this company.
|
||||
|
||||
Returns:
|
||||
List of FundSeries instances
|
||||
"""
|
||||
return self.all_series
|
||||
|
||||
@property
|
||||
def data(self) -> 'EntityData':
|
||||
"""Get detailed data for this fund."""
|
||||
base_data = super().data
|
||||
|
||||
# If we already have fund-specific data, return it
|
||||
if hasattr(base_data, 'is_fund') and base_data.is_fund:
|
||||
return base_data
|
||||
|
||||
# Otherwise, try to convert to fund-specific data
|
||||
# This could be enhanced in the future
|
||||
return base_data
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name} [{self.cik}]"
|
||||
|
||||
|
||||
def __rich__(self):
|
||||
"""Creates a rich representation of the fund with detailed information."""
|
||||
return super().__rich__()
|
||||
|
||||
def __repr__(self):
|
||||
return repr_rich(self.__rich__())
|
||||
|
||||
class FundClass:
|
||||
"""
|
||||
Represents a specific class of an investment fund.
|
||||
|
||||
Fund classes typically have their own ticker symbols and fee structures,
|
||||
but belong to the same underlying fund. Each class belongs to a specific
|
||||
fund series.
|
||||
"""
|
||||
|
||||
def __init__(self, class_id: str, name: Optional[str] = None,
|
||||
ticker: Optional[str] = None, series: Optional['FundSeries'] = None):
|
||||
self.class_id = class_id
|
||||
self.name = name
|
||||
self.ticker = ticker
|
||||
self.series = series # The series ID this class belongs to
|
||||
|
||||
def __str__(self):
|
||||
ticker_str = f" - {self.ticker}" if self.ticker else ""
|
||||
return f"FundClass({self.name} [{self.class_id}]{ticker_str})"
|
||||
|
||||
def get_classes(self) -> List['FundClass']:
|
||||
"""Get all share classes in the same series as this class."""
|
||||
if self.series and self.series.series_id:
|
||||
from edgar.funds.data import get_fund_object
|
||||
full_series = get_fund_object(self.series.series_id)
|
||||
if full_series and hasattr(full_series, 'get_classes'):
|
||||
return full_series.get_classes()
|
||||
return [self] # fallback
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
def __rich__(self):
|
||||
"""Creates a rich representation of the fund class."""
|
||||
table = Table(
|
||||
title=None,
|
||||
box=box.ROUNDED,
|
||||
show_header=True
|
||||
)
|
||||
|
||||
table.add_column("Fund", style="bold")
|
||||
table.add_column("Class ID", style="bold")
|
||||
table.add_column("Series ID", style="bold cyan")
|
||||
table.add_column("Ticker", style="bold yellow")
|
||||
|
||||
table.add_row(
|
||||
self.name,
|
||||
self.class_id,
|
||||
self.series.series_id or "Unknown",
|
||||
self.ticker or ""
|
||||
)
|
||||
|
||||
return Panel(
|
||||
table,
|
||||
title=f"🏦 {self.name}",
|
||||
subtitle="Fund Class"
|
||||
)
|
||||
|
||||
class FundSeries:
|
||||
"""Represents a fund series with multiple share classes."""
|
||||
|
||||
def __init__(self, series_id: str, name: str,
|
||||
fund_classes:Optional[List[FundClass]]=None,
|
||||
fund_company: Optional[FundCompany] = None):
|
||||
self.series_id = series_id
|
||||
self.name = name
|
||||
self.fund_classes:List[FundClass] = fund_classes or []
|
||||
self.fund_company: Optional[FundCompany] = fund_company
|
||||
|
||||
def get_classes(self) -> List[FundClass]:
|
||||
"""
|
||||
Get all share classes in this series.
|
||||
|
||||
Returns:
|
||||
List of FundClass instances belonging to this specific series
|
||||
"""
|
||||
return self.fund_classes
|
||||
|
||||
def get_filings(self, **kwargs) -> 'Filings':
|
||||
"""
|
||||
Get filings for this fund series.
|
||||
|
||||
Args:
|
||||
**kwargs: Filtering parameters passed to get_filings
|
||||
|
||||
Returns:
|
||||
Filings object with filtered filings
|
||||
"""
|
||||
return self.fund_company.get_filings(**kwargs)
|
||||
|
||||
def __str__(self):
|
||||
return f"FundSeries({self.name} [{self.series_id}])"
|
||||
|
||||
def __repr__(self):
|
||||
return repr_rich(self.__rich__())
|
||||
|
||||
def __rich__(self):
|
||||
"""Creates a rich representation of the fund series."""
|
||||
|
||||
# Classes information
|
||||
classes = self.get_classes()
|
||||
classes_table = Table(box=box.SIMPLE, show_header=True, padding=(0, 1))
|
||||
classes_table.add_column("Class ID")
|
||||
classes_table.add_column("Class Name")
|
||||
classes_table.add_column("Ticker", style="bold yellow")
|
||||
|
||||
for class_obj in classes:
|
||||
classes_table.add_row(
|
||||
class_obj.class_id,
|
||||
class_obj.name,
|
||||
class_obj.ticker or "-"
|
||||
)
|
||||
|
||||
classes_panel = Panel(
|
||||
classes_table,
|
||||
title="📊 Share Classes",
|
||||
border_style="grey50"
|
||||
)
|
||||
|
||||
content = Group(classes_panel)
|
||||
return Panel(
|
||||
content,
|
||||
title=f"🏦 {self.name} [{self.series_id}]",
|
||||
subtitle="Fund Series"
|
||||
)
|
||||
|
||||
def find_fund(identifier: str) -> Union[FundCompany, FundSeries, FundClass]:
|
||||
"""
|
||||
Smart factory that finds and returns the most appropriate fund entity.
|
||||
|
||||
This function takes any type of fund identifier and returns the most specific
|
||||
entity that matches it. For a series ID, it returns a FundSeries. For a class ID
|
||||
or ticker, it returns a FundClass. For a company CIK, it returns a FundCompany.
|
||||
|
||||
Args:
|
||||
identifier: Fund ticker (e.g., 'VFINX'), Series ID (e.g., 'S000001234'),
|
||||
Class ID (e.g., 'C000012345'), or CIK number
|
||||
|
||||
Returns:
|
||||
The most specific fund entity that matches the identifier:
|
||||
- FundClass for tickers and class IDs
|
||||
- FundSeries for series IDs
|
||||
- FundCompany for company CIKs
|
||||
"""
|
||||
# Check for Series ID (S000XXXXX)
|
||||
if isinstance(identifier, str) and identifier.upper().startswith('S') and identifier[1:].isdigit():
|
||||
return get_fund_series(identifier)
|
||||
|
||||
# Check for Class ID (C000XXXXX)
|
||||
if isinstance(identifier, str) and identifier.upper().startswith('C') and identifier[1:].isdigit():
|
||||
return get_fund_class(identifier)
|
||||
|
||||
# Check for ticker symbol
|
||||
if is_fund_class_ticker(identifier):
|
||||
return get_fund_class(identifier)
|
||||
|
||||
# Default to returning a FundCompany
|
||||
return get_fund_company(identifier)
|
||||
|
||||
|
||||
# === Specialized Getter Functions ===
|
||||
|
||||
def get_fund_company(cik_or_identifier: Union[str, int]) -> FundCompany:
|
||||
"""
|
||||
Get a fund company by its CIK or identifier.
|
||||
|
||||
Args:
|
||||
cik_or_identifier: CIK number or other identifier
|
||||
|
||||
Returns:
|
||||
FundCompany instance
|
||||
"""
|
||||
return FundCompany(cik_or_identifier)
|
||||
|
||||
|
||||
def get_fund_series(series_id: str) -> FundSeries:
|
||||
"""
|
||||
Get a fund series by its Series ID.
|
||||
|
||||
Args:
|
||||
series_id: Series ID (e.g., 'S000001234')
|
||||
|
||||
Returns:
|
||||
FundSeries instance
|
||||
|
||||
Raises:
|
||||
ValueError: If the series cannot be found
|
||||
"""
|
||||
from edgar.funds.data import get_fund_object
|
||||
|
||||
fund_series: Optional[FundSeries] = get_fund_object(series_id)
|
||||
return fund_series
|
||||
|
||||
|
||||
def get_fund_class(class_id_or_ticker: str) -> FundClass:
|
||||
"""
|
||||
Get a fund class by its Class ID or ticker.
|
||||
|
||||
Args:
|
||||
class_id_or_ticker: Class ID (e.g., 'C000012345') or ticker symbol (e.g., 'VFINX')
|
||||
|
||||
Returns:
|
||||
FundClass instance
|
||||
|
||||
Raises:
|
||||
ValueError: If the class cannot be found
|
||||
"""
|
||||
from edgar.funds.data import get_fund_object
|
||||
fund_class: FundClass = get_fund_object(class_id_or_ticker)
|
||||
return fund_class
|
||||
|
||||
|
||||
# === Helper Functions ===
|
||||
|
||||
def is_fund_class_ticker(identifier: str) -> bool:
|
||||
"""
|
||||
Determine if the given identifier is a fund class ticker.
|
||||
|
||||
Args:
|
||||
identifier: The identifier to check
|
||||
|
||||
Returns:
|
||||
True if it's a fund class ticker, False otherwise
|
||||
"""
|
||||
from edgar.funds.data import is_fund_ticker
|
||||
return is_fund_ticker(identifier)
|
||||
|
||||
|
||||
class Fund:
|
||||
"""
|
||||
Unified wrapper for fund entities that provides a consistent interface
|
||||
regardless of the identifier type (ticker, series ID, class ID, or CIK).
|
||||
|
||||
This class serves as a user-friendly entry point to the fund domain model.
|
||||
It internally resolves the appropriate entity type and provides access to
|
||||
the full hierarchy.
|
||||
|
||||
Examples:
|
||||
```python
|
||||
# Create a Fund object from any identifier
|
||||
fund = Fund("VFINX") # From ticker
|
||||
fund = Fund("S000002277") # From series ID
|
||||
fund = Fund("0000102909") # From CIK
|
||||
|
||||
# Access the hierarchy
|
||||
print(fund.name) # Name of the entity
|
||||
print(fund.company.name) # Name of the fund company
|
||||
print(fund.series.name) # Name of the fund series
|
||||
print(fund.share_class.ticker) # Ticker of the share class
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, identifier: Union[str, int]):
|
||||
"""
|
||||
Initialize a Fund object from any identifier.
|
||||
|
||||
Args:
|
||||
identifier: Any fund identifier (ticker, series ID, class ID, or CIK)
|
||||
"""
|
||||
self._original_identifier = str(identifier)
|
||||
self._target_series_id = None # New: specific series if determinable
|
||||
|
||||
# Handle ticker resolution to series
|
||||
if isinstance(identifier, str) and self._is_fund_ticker(identifier):
|
||||
from edgar.funds.series_resolution import TickerSeriesResolver
|
||||
target_series_id = TickerSeriesResolver.get_primary_series(identifier)
|
||||
if target_series_id:
|
||||
self._target_series_id = target_series_id
|
||||
|
||||
# Use existing find_fund to get the appropriate entity
|
||||
self._entity = find_fund(identifier)
|
||||
|
||||
# Set up references to the full hierarchy
|
||||
if isinstance(self._entity, FundClass):
|
||||
self._class = self._entity
|
||||
self._series = self._class.series
|
||||
self._company = self._series.fund_company if self._series else None
|
||||
elif isinstance(self._entity, FundSeries):
|
||||
self._class = None
|
||||
self._series = self._entity
|
||||
self._company = self._series.fund_company
|
||||
elif isinstance(self._entity, FundCompany):
|
||||
self._class = None
|
||||
self._series = None
|
||||
self._company = self._entity
|
||||
|
||||
def _is_fund_ticker(self, identifier: str) -> bool:
|
||||
"""Check if an identifier appears to be a fund ticker"""
|
||||
from edgar.funds.series_resolution import TickerSeriesResolver
|
||||
series_list = TickerSeriesResolver.resolve_ticker_to_series(identifier)
|
||||
return len(series_list) > 0
|
||||
|
||||
@property
|
||||
def company(self) -> Optional[FundCompany]:
|
||||
"""Get the fund company (may be None if not resolved)"""
|
||||
return self._company
|
||||
|
||||
@property
|
||||
def series(self) -> Optional[FundSeries]:
|
||||
"""Get the fund series (may be None if only company was identified)"""
|
||||
return self._series
|
||||
|
||||
@property
|
||||
def share_class(self) -> Optional[FundClass]:
|
||||
"""Get the share class (may be None if only series or company was identified)"""
|
||||
return self._class
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""Get the name of the fund entity"""
|
||||
return self._entity.name
|
||||
|
||||
@property
|
||||
def identifier(self) -> str:
|
||||
"""Get the primary identifier of the fund entity"""
|
||||
if isinstance(self._entity, FundClass):
|
||||
return self._entity.class_id
|
||||
elif isinstance(self._entity, FundSeries):
|
||||
return self._entity.series_id
|
||||
elif isinstance(self._entity, FundCompany):
|
||||
return str(self._entity.cik)
|
||||
return ""
|
||||
|
||||
@property
|
||||
def ticker(self) -> Optional[str]:
|
||||
"""Get the ticker symbol (only available for share classes)"""
|
||||
if self._class:
|
||||
return self._class.ticker
|
||||
return None
|
||||
|
||||
def get_filings(self, series_only: bool = False, **kwargs) -> 'Filings':
|
||||
"""
|
||||
Get filings for this fund entity.
|
||||
|
||||
This delegates to the appropriate entity's get_filings method.
|
||||
|
||||
Args:
|
||||
series_only: If True and we have target series context, filter to only relevant series
|
||||
**kwargs: Filtering parameters passed to get_filings
|
||||
|
||||
Returns:
|
||||
Filings object with filtered filings
|
||||
"""
|
||||
# Get base filings
|
||||
filings = None
|
||||
if hasattr(self._entity, 'get_filings'):
|
||||
filings = self._entity.get_filings(series_only=series_only, **kwargs)
|
||||
elif self._series and hasattr(self._series, 'get_filings'):
|
||||
filings = self._series.get_filings(series_only=series_only, **kwargs)
|
||||
elif self._company and hasattr(self._company, 'get_filings'):
|
||||
filings = self._company.get_filings(series_only=series_only, **kwargs)
|
||||
|
||||
if not filings:
|
||||
from edgar._filings import Filings
|
||||
return Filings([])
|
||||
|
||||
# Apply series filtering if requested and we have target series context
|
||||
if series_only and self._target_series_id and kwargs.get('form') in ['NPORT-P', 'NPORT-EX', 'N-PORT', 'N-PORT/A']:
|
||||
# For now, return the original filings as we'd need to parse each filing
|
||||
# to determine series match. This could be enhanced in the future.
|
||||
pass
|
||||
|
||||
return filings
|
||||
|
||||
def get_series(self) -> Optional[FundSeries]:
|
||||
"""
|
||||
Get the specific series for the original ticker if determinable.
|
||||
|
||||
Returns:
|
||||
FundSeries if we can determine a specific series, None otherwise
|
||||
"""
|
||||
if self._target_series_id:
|
||||
# Handle ETF synthetic series IDs
|
||||
if self._target_series_id.startswith("ETF_"):
|
||||
# Extract CIK from ETF series ID
|
||||
cik = self._target_series_id.replace("ETF_", "")
|
||||
try:
|
||||
# Create ETF-specific series
|
||||
from edgar.funds.series_resolution import TickerSeriesResolver
|
||||
series_list = TickerSeriesResolver.resolve_ticker_to_series(self._original_identifier)
|
||||
if series_list and len(series_list) > 0:
|
||||
series_info = series_list[0] # Get the ETF series info
|
||||
|
||||
# Create FundSeries for ETF
|
||||
etf_company = FundCompany(cik_or_identifier=int(cik), fund_name=series_info.series_name)
|
||||
return FundSeries(
|
||||
series_id=self._target_series_id,
|
||||
name=series_info.series_name or f"ETF Series for {self._original_identifier}",
|
||||
fund_company=etf_company
|
||||
)
|
||||
except Exception as e:
|
||||
log.debug(f"Failed to create ETF series for {self._target_series_id}: {e}")
|
||||
else:
|
||||
# Regular mutual fund series - try to get by ID
|
||||
try:
|
||||
return get_fund_series(self._target_series_id)
|
||||
except Exception as e:
|
||||
log.debug(f"Failed to get fund series {self._target_series_id}: {e}")
|
||||
|
||||
# Fallback to current series if available
|
||||
return self._series
|
||||
|
||||
def get_resolution_diagnostics(self) -> Dict[str, Any]:
|
||||
"""Get detailed information about how this Fund was resolved."""
|
||||
if self._target_series_id:
|
||||
if self._target_series_id.startswith("ETF_"):
|
||||
cik = self._target_series_id.replace("ETF_", "")
|
||||
return {
|
||||
'status': 'success',
|
||||
'method': 'etf_company_fallback',
|
||||
'series_id': self._target_series_id,
|
||||
'cik': int(cik),
|
||||
'original_identifier': self._original_identifier,
|
||||
'message': f"'{self._original_identifier}' resolved as ETF company ticker"
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'status': 'success',
|
||||
'method': 'mutual_fund_lookup',
|
||||
'series_id': self._target_series_id,
|
||||
'original_identifier': self._original_identifier,
|
||||
'message': f"'{self._original_identifier}' resolved as mutual fund ticker"
|
||||
}
|
||||
|
||||
# Check if it's a company ticker (ETF) that we didn't resolve
|
||||
from edgar.reference.tickers import find_cik
|
||||
cik = find_cik(self._original_identifier)
|
||||
|
||||
if cik:
|
||||
return {
|
||||
'status': 'partial_success',
|
||||
'method': 'company_lookup_unresolved',
|
||||
'cik': cik,
|
||||
'original_identifier': self._original_identifier,
|
||||
'message': f"'{self._original_identifier}' found as company ticker but series resolution failed",
|
||||
'suggestion': f"Try using CIK {cik} directly: Fund({cik})"
|
||||
}
|
||||
|
||||
return {
|
||||
'status': 'failed',
|
||||
'method': 'no_resolution',
|
||||
'original_identifier': self._original_identifier,
|
||||
'message': f"'{self._original_identifier}' not found in SEC ticker databases",
|
||||
'suggestion': "Verify ticker spelling or try with CIK/series ID directly"
|
||||
}
|
||||
|
||||
def list_series(self) -> List[FundSeries]:
|
||||
"""
|
||||
List all fund series associated with this fund.
|
||||
|
||||
If this is a FundCompany, returns all series.
|
||||
If this is a FundSeries, returns a list with just this series.
|
||||
If this is a FundClass, returns a list with its parent series.
|
||||
|
||||
Returns:
|
||||
List of FundSeries instances
|
||||
"""
|
||||
if self._company and hasattr(self._company, 'list_series'):
|
||||
return self._company.list_series()
|
||||
|
||||
if self._series:
|
||||
return [self._series]
|
||||
|
||||
return []
|
||||
|
||||
def list_classes(self) -> List[FundClass]:
|
||||
"""
|
||||
List all share classes associated with this fund.
|
||||
|
||||
If this is a FundSeries, returns all classes in the series.
|
||||
If this is a FundClass, returns a list with just this class.
|
||||
|
||||
Returns:
|
||||
List of FundClass instances
|
||||
"""
|
||||
if self._series and hasattr(self._series, 'get_classes'):
|
||||
return self._series.get_classes()
|
||||
|
||||
if self._class:
|
||||
return [self._class]
|
||||
|
||||
return []
|
||||
|
||||
def __str__(self) -> str:
|
||||
return str(self._entity)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return repr(self._entity)
|
||||
|
||||
def __rich__(self):
|
||||
"""Creates a rich representation of the fund"""
|
||||
if hasattr(self._entity, '__rich__'):
|
||||
return self._entity.__rich__()
|
||||
return str(self)
|
||||
804
venv/lib/python3.10/site-packages/edgar/funds/data.py
Normal file
804
venv/lib/python3.10/site-packages/edgar/funds/data.py
Normal file
@@ -0,0 +1,804 @@
|
||||
"""
|
||||
Data structures and functions for working with fund data.
|
||||
|
||||
This module provides the FundData class and related functions for
|
||||
accessing and manipulating fund data.
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
from functools import lru_cache
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from bs4 import Tag
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from edgar._filings import Filings
|
||||
from edgar.datatools import drop_duplicates_pyarrow
|
||||
from edgar.entity.data import EntityData
|
||||
from edgar.funds.core import FundClass, FundCompany, FundSeries
|
||||
from edgar.httprequests import download_text
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
#
|
||||
# Direct implementations to replace legacy module dependencies
|
||||
#
|
||||
|
||||
# Direct implementations of fund-related functionality
|
||||
# These replace the legacy module dependencies
|
||||
|
||||
# URL constants for fund searches
|
||||
fund_series_search_url = "https://www.sec.gov/cgi-bin/series?company="
|
||||
fund_class_or_series_search_url = "https://www.sec.gov/cgi-bin/browse-edgar?CIK={}"
|
||||
fund_series_direct_url = "https://www.sec.gov/cgi-bin/browse-edgar?CIK={}&scd=series"
|
||||
|
||||
class _FundDTO:
|
||||
"""
|
||||
Data Transfer Object for fund information.
|
||||
|
||||
Internal class used to return fund data from direct implementations.
|
||||
This is not part of the public API and should not be used directly.
|
||||
|
||||
Use the Fund class from edgar.funds.core instead.
|
||||
"""
|
||||
def __init__(self, company_cik, company_name, name, series, ticker,
|
||||
class_contract_id, class_contract_name):
|
||||
self.company_cik = company_cik
|
||||
self.company_name = company_name
|
||||
self.name = name
|
||||
self.series = series
|
||||
self.ticker = ticker
|
||||
self.class_contract_id = class_contract_id
|
||||
self.class_contract_name = class_contract_name
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name} - {self.ticker} [{self.class_contract_id}]"
|
||||
|
||||
# Parse SGML fund data (directly implemented)
|
||||
def parse_fund_data(series_sgml_data: str) -> pd.DataFrame:
|
||||
"""
|
||||
Parse the SGML text containing fund series and class information.
|
||||
|
||||
Args:
|
||||
series_sgml_data: SGML text with SERIES-AND-CLASSES-CONTRACTS-DATA
|
||||
|
||||
Returns:
|
||||
DataFrame with parsed fund information
|
||||
|
||||
Example SGML data:
|
||||
<SERIES-AND-CLASSES-CONTRACTS-DATA>
|
||||
<EXISTING-SERIES-AND-CLASSES-CONTRACTS>
|
||||
<SERIES>
|
||||
<OWNER-CIK>0001090372
|
||||
<SERIES-ID>S000071967
|
||||
<SERIES-NAME>Jacob Forward ETF
|
||||
<CLASS-CONTRACT>
|
||||
<CLASS-CONTRACT-ID>C000227599
|
||||
<CLASS-CONTRACT-NAME>Jacob Forward ETF
|
||||
<CLASS-CONTRACT-TICKER-SYMBOL>JFWD
|
||||
</CLASS-CONTRACT>
|
||||
</SERIES>
|
||||
</EXISTING-SERIES-AND-CLASSES-CONTRACTS>
|
||||
</SERIES-AND-CLASSES-CONTRACTS-DATA>
|
||||
"""
|
||||
# Regular expressions to match each relevant tag
|
||||
series_re = re.compile(r'<SERIES>(.*?)</SERIES>', re.DOTALL)
|
||||
data_re = re.compile(r'<([^>]+)>([^<]*)')
|
||||
|
||||
# Extract SERIES blocks
|
||||
series_blocks = series_re.findall(series_sgml_data)
|
||||
|
||||
# Create an empty DataFrame
|
||||
columns = [
|
||||
"OWNER-CIK", "SERIES-ID", "SERIES-NAME",
|
||||
"CLASS-CONTRACT-ID", "CLASS-CONTRACT-NAME", "CLASS-CONTRACT-TICKER-SYMBOL"
|
||||
]
|
||||
|
||||
# Extract information from SERIES blocks and append to DataFrame
|
||||
rows = []
|
||||
for block in series_blocks:
|
||||
data_matches = data_re.findall(block)
|
||||
data_dict = {tag: value.strip() for tag, value in data_matches}
|
||||
|
||||
class_contract_data = {
|
||||
"CLASS-CONTRACT-ID": data_dict.get("CLASS-CONTRACT-ID", ""),
|
||||
"CLASS-CONTRACT-NAME": data_dict.get("CLASS-CONTRACT-NAME", ""),
|
||||
"CLASS-CONTRACT-TICKER-SYMBOL": data_dict.get("CLASS-CONTRACT-TICKER-SYMBOL", "")
|
||||
}
|
||||
|
||||
# Merge SERIES and CLASS-CONTRACT data
|
||||
row_data = {**data_dict, **class_contract_data}
|
||||
rows.append(row_data)
|
||||
|
||||
# Create DataFrame and select relevant columns
|
||||
df = pd.DataFrame(rows, columns=columns).iloc[:, :6]
|
||||
|
||||
# Rename columns for consistency
|
||||
return (df.rename(columns={
|
||||
"OWNER-CIK": "CIK",
|
||||
"SERIES-ID": "SeriesID",
|
||||
"SERIES-NAME": "Fund",
|
||||
"CLASS-CONTRACT-ID": "ContractID",
|
||||
"CLASS-CONTRACT-NAME": "Class",
|
||||
"CLASS-CONTRACT-TICKER-SYMBOL": "Ticker"
|
||||
})
|
||||
.filter(["Fund", "Ticker", "SeriesID", "ContractID", "Class", "CIK"])
|
||||
)
|
||||
|
||||
# Direct implementation of FundCompanyInfo
|
||||
class _FundCompanyInfo:
|
||||
"""
|
||||
Internal helper class representing the fund company.
|
||||
This is parsed from the results page when we get the fund class or series.
|
||||
|
||||
Not part of the public API - use the Fund class from edgar.funds.core instead.
|
||||
"""
|
||||
def __init__(self,
|
||||
name: str,
|
||||
cik: str,
|
||||
ident_info: Dict[str, str],
|
||||
addresses: List[str],
|
||||
filings: Filings):
|
||||
self.name: str = name
|
||||
self.cik: str = cik
|
||||
self.ident_info: Dict[str, str] = ident_info
|
||||
self.addresses: List[str] = addresses
|
||||
self.filings = filings
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
return self.ident_info.get("State location", None)
|
||||
|
||||
@property
|
||||
def state_of_incorporation(self):
|
||||
return self.ident_info.get("State of Inc.", None)
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def id_and_name(self, contract_or_series: str) -> Optional[Tuple[str, str]]:
|
||||
class_contract_str = self.ident_info.get(contract_or_series, None)
|
||||
if not class_contract_str:
|
||||
return None
|
||||
match = re.match(r'([CS]\d+)(?:\s(.*))?', class_contract_str)
|
||||
|
||||
# Storing the results in variables if matched, with a default for description if not present
|
||||
cik = match.group(1) if match else ""
|
||||
cik_description = match.group(2) if match and match.group(2) else ""
|
||||
return cik, cik_description
|
||||
|
||||
@classmethod
|
||||
def from_html(cls, company_info_html: Union[str, 'Tag']):
|
||||
|
||||
soup = BeautifulSoup(company_info_html, features="html.parser")
|
||||
|
||||
# Parse the fund company info
|
||||
content_div = soup.find("div", {"id": "contentDiv"})
|
||||
|
||||
if content_div is None:
|
||||
# Should not reach here, but this is precautionary
|
||||
log.warning("Did not find div with id 'contentDiv'")
|
||||
return None
|
||||
|
||||
ident_info_dict = {}
|
||||
company_info_div = content_div.find("div", class_="companyInfo")
|
||||
company_name_tag = company_info_div.find('span', class_='companyName')
|
||||
company_name = company_name_tag.text.split('CIK')[0].strip()
|
||||
|
||||
cik = company_name_tag.a.text.split(' ')[0]
|
||||
|
||||
# Extract the identifying information
|
||||
for tag in company_info_div.find_all('br'):
|
||||
tag.replace_with('\n')
|
||||
ident_info = company_info_div.find('p', class_='identInfo')
|
||||
ident_line = ident_info.get_text().replace("|", "\n").strip()
|
||||
for line in ident_line.split("\n"):
|
||||
if ":" in line:
|
||||
key, value = line.split(":")
|
||||
ident_info_dict[key.strip()] = value.strip().replace("\xa0", " ")
|
||||
|
||||
# Addresses
|
||||
mailer_divs = content_div.find_all("div", class_="mailer")
|
||||
addresses = [re.sub(r'\n\s+', '\n', mailer_div.text.strip())
|
||||
for mailer_div in mailer_divs]
|
||||
|
||||
filing_index = cls._extract_filings(soup, company_name, cik)
|
||||
filings = Filings(filing_index=filing_index)
|
||||
|
||||
return cls(name=company_name,
|
||||
cik=cik,
|
||||
filings=filings,
|
||||
ident_info=ident_info_dict,
|
||||
addresses=addresses)
|
||||
|
||||
@classmethod
|
||||
def _extract_filings(cls, soup, company_name: str, cik: str):
|
||||
from datetime import datetime
|
||||
|
||||
import pyarrow as pa
|
||||
|
||||
filings_table = soup.find("table", class_="tableFile2")
|
||||
rows = filings_table.find_all("tr")[1:]
|
||||
|
||||
forms, accession_nos, filing_dates = [], [], []
|
||||
for row in rows:
|
||||
cells = row.find_all("td")
|
||||
form = cells[0].text
|
||||
forms.append(form)
|
||||
|
||||
# Get the link href from cell[1]
|
||||
link = cells[1].find("a")
|
||||
href = link.attrs["href"]
|
||||
accession_no = href.split("/")[-1].replace("-index.htm", "")
|
||||
accession_nos.append(accession_no)
|
||||
|
||||
# Get the filing_date
|
||||
filing_date = datetime.strptime(cells[3].text, '%Y-%m-%d')
|
||||
filing_dates.append(filing_date)
|
||||
|
||||
schema = pa.schema([
|
||||
('form', pa.string()),
|
||||
('company', pa.string()),
|
||||
('cik', pa.int32()),
|
||||
('filing_date', pa.date32()),
|
||||
('accession_number', pa.string()),
|
||||
])
|
||||
|
||||
# Create an empty table with the defined schema
|
||||
filing_index = pa.Table.from_arrays(arrays=[
|
||||
pa.array(forms, type=pa.string()),
|
||||
pa.array([company_name] * len(forms), type=pa.string()),
|
||||
pa.array([int(cik)] * len(forms), type=pa.int32()),
|
||||
pa.array(filing_dates, type=pa.date32()),
|
||||
pa.array(accession_nos, type=pa.string()),
|
||||
], schema=schema)
|
||||
|
||||
return filing_index
|
||||
|
||||
|
||||
# Direct implementation of FundClassOrSeries and subclasses
|
||||
class _FundClassOrSeries:
|
||||
"""
|
||||
Internal base class for fund classes and series.
|
||||
|
||||
Not part of the public API - use the FundClass and FundSeries classes
|
||||
from edgar.funds.core instead.
|
||||
"""
|
||||
def __init__(self, company_info: '_FundCompanyInfo', contract_or_series: str):
|
||||
self.fund = company_info
|
||||
self._contract_or_series = contract_or_series
|
||||
|
||||
@property
|
||||
def fund_cik(self):
|
||||
return self.fund.cik
|
||||
|
||||
@property
|
||||
def fund_name(self):
|
||||
return self.fund.name
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _id_and_name(self) -> Optional[Tuple[str, str]]:
|
||||
class_contract_str = self.fund.ident_info.get(self._contract_or_series, None)
|
||||
if not class_contract_str:
|
||||
return None
|
||||
match = re.match(r'([CS]\d+)(?:\s(.*))?', class_contract_str)
|
||||
|
||||
# Storing the results in variables if matched, with a default for description if not present
|
||||
cik = match.group(1) if match else ""
|
||||
cik_description = match.group(2) if match and match.group(2) else ""
|
||||
return cik, cik_description
|
||||
|
||||
@property
|
||||
def id(self):
|
||||
id_and_name = self._id_and_name()
|
||||
if id_and_name:
|
||||
return id_and_name[0]
|
||||
return None
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
id_and_name = self._id_and_name()
|
||||
if id_and_name:
|
||||
return id_and_name[1]
|
||||
return None
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return f"{self.fund_name} {self.id} {self.name}"
|
||||
|
||||
@property
|
||||
def filings(self):
|
||||
return self.fund.filings
|
||||
|
||||
|
||||
class _FundClass(_FundClassOrSeries):
|
||||
"""
|
||||
Internal implementation of fund class (contract) information.
|
||||
|
||||
Not part of the public API - use the FundClass class from edgar.funds.core instead.
|
||||
"""
|
||||
def __init__(self, company_info: '_FundCompanyInfo'):
|
||||
super().__init__(company_info, "Class/Contract")
|
||||
|
||||
@property
|
||||
def ticker(self):
|
||||
return self.fund.ident_info.get("Ticker Symbol", None)
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
return f"{self.fund_name} {self.id} {self.name} {self.ticker or ''}"
|
||||
|
||||
|
||||
class _FundSeries(_FundClassOrSeries):
|
||||
"""
|
||||
Internal implementation of fund series information.
|
||||
|
||||
Not part of the public API - use the FundSeries class from edgar.funds.core instead.
|
||||
"""
|
||||
def __init__(self, company_info: '_FundCompanyInfo'):
|
||||
super().__init__(company_info, "Series")
|
||||
|
||||
|
||||
# Direct implementation of get_fund_with_filings
|
||||
def direct_get_fund_with_filings(contract_or_series_id: str):
|
||||
"""
|
||||
Get fund class or series information including filings from the SEC website.
|
||||
|
||||
Args:
|
||||
contract_or_series_id: Series ID (S...) or Class ID (C...)
|
||||
|
||||
Returns:
|
||||
FundClass or FundSeries object, or None if not found
|
||||
"""
|
||||
|
||||
|
||||
# URL template to search for a fund by class or series ID
|
||||
fund_class_or_series_search_url = "https://www.sec.gov/cgi-bin/browse-edgar?CIK={}"
|
||||
|
||||
if not re.match(r"[CS]\d+", contract_or_series_id):
|
||||
return None
|
||||
|
||||
base_url = fund_class_or_series_search_url.format(contract_or_series_id)
|
||||
# Start at 0 and download 100
|
||||
search_url = base_url + "&start=0&count=100"
|
||||
|
||||
try:
|
||||
fund_text = download_text(search_url)
|
||||
|
||||
if "No matching" in fund_text:
|
||||
return None
|
||||
|
||||
# Company Info
|
||||
company_info = _FundCompanyInfo.from_html(fund_text)
|
||||
|
||||
# Get the remaining filings
|
||||
start, count = 101, 100
|
||||
|
||||
filing_index = company_info.filings.data
|
||||
while True:
|
||||
# Get the next page
|
||||
next_page = base_url + f"&start={start}&count={count}"
|
||||
fund_text = download_text(next_page)
|
||||
soup = BeautifulSoup(fund_text, features="html.parser")
|
||||
filing_index_on_page = _FundCompanyInfo._extract_filings(soup, company_info.name, company_info.cik)
|
||||
if len(filing_index_on_page) == 0:
|
||||
break
|
||||
filing_index = pa.concat_tables([filing_index, filing_index_on_page])
|
||||
start += count
|
||||
|
||||
# Drop duplicate filings by accession number
|
||||
filing_index = drop_duplicates_pyarrow(filing_index, column_name='accession_number')
|
||||
company_info.filings = Filings(filing_index=filing_index)
|
||||
|
||||
if contract_or_series_id.startswith('C'):
|
||||
return _FundClass(company_info)
|
||||
else:
|
||||
return _FundSeries(company_info)
|
||||
except Exception as e:
|
||||
log.warning("Error retrieving fund information for %s: %s", contract_or_series_id, e)
|
||||
return None
|
||||
|
||||
@lru_cache(maxsize=16)
|
||||
def get_fund_object(identifier: str) -> Optional[Union[FundCompany, FundSeries, FundCompany]]:
|
||||
"""
|
||||
Get a Fund related object by it's identifier.
|
||||
|
||||
Args:
|
||||
identifier: A CIK, a series id (e.g. 'S000001234') or class id or Fund ticker (e.g. 'VFINX')
|
||||
|
||||
Returns:
|
||||
A FundCompany or FundSeries or FundClass
|
||||
"""
|
||||
|
||||
if re.match(r'^[CS]\d+$', identifier):
|
||||
identifier_type = 'Series' if identifier.startswith('S') else 'Class'
|
||||
fund_search_url = fund_series_search_url + f"&CIK={identifier}"
|
||||
elif re.match(r"^[A-Z]{4}X$", identifier):
|
||||
identifier_type = 'Class'
|
||||
fund_search_url = fund_series_search_url + f"&ticker={identifier}"
|
||||
elif re.match(r"^0\d{9}$", identifier):
|
||||
identifier_type = 'FundCompany'
|
||||
fund_search_url = fund_series_search_url + f"&CIK={identifier}"
|
||||
else:
|
||||
log.warning("Invalid fund identifier %s", identifier)
|
||||
return None
|
||||
|
||||
# Download the fund page
|
||||
fund_text = download_text(fund_search_url)
|
||||
|
||||
soup = BeautifulSoup(fund_text, "html.parser")
|
||||
if 'To retrieve filings, click on the CIK' not in soup.text:
|
||||
return None
|
||||
|
||||
tables = soup.find_all("table")
|
||||
|
||||
# The fund table is the 6th table on the page
|
||||
if len(tables) < 6:
|
||||
log.warning("Expected fund table not found for %s", identifier)
|
||||
return None
|
||||
|
||||
fund_table = tables[5]
|
||||
|
||||
all_series = []
|
||||
fund_company:Optional[FundCompany] = None
|
||||
|
||||
current_series:Optional[FundSeries] = None
|
||||
current_class:Optional[FundClass] = None
|
||||
for tr in fund_table.find_all('tr')[4:]: # Skip the first 4 rows as they contain headers
|
||||
row_data = [td.get_text().strip() for td in tr.find_all('td') if td.get_text().strip()]
|
||||
|
||||
if not row_data:
|
||||
continue
|
||||
if re.match(r'^0\d{9}$', row_data[0]):
|
||||
fund_company = FundCompany(cik_or_identifier=row_data[0], fund_name=row_data[1], all_series=all_series)
|
||||
elif re.match(r'^S\d+$', row_data[0]):
|
||||
current_series = FundSeries(series_id=row_data[0], name=row_data[1], fund_company=fund_company)
|
||||
fund_company.all_series.append(current_series)
|
||||
elif re.match(r'^C\d+$', row_data[0]):
|
||||
class_id, class_name = row_data[0], row_data[1]
|
||||
ticker = row_data[2] if len(row_data) > 2 else None
|
||||
current_class = FundClass(class_id=class_id, name=class_name, ticker=ticker)
|
||||
current_class.series = current_series
|
||||
current_series.fund_classes.append(current_class)
|
||||
|
||||
if identifier_type == "FundCompany":
|
||||
return fund_company
|
||||
elif identifier_type == "Series":
|
||||
return current_series
|
||||
elif identifier_type == "Class":
|
||||
return current_class
|
||||
|
||||
|
||||
def is_fund_ticker(identifier: str) -> bool:
|
||||
"""
|
||||
Check if an identifier is a fund ticker.
|
||||
|
||||
Args:
|
||||
identifier: The identifier to check
|
||||
|
||||
Returns:
|
||||
True if it's a fund ticker, False otherwise
|
||||
"""
|
||||
# Use our own implementation
|
||||
if identifier and isinstance(identifier, str):
|
||||
return bool(re.match(r"^[A-Z]{4}X$", identifier))
|
||||
return False
|
||||
|
||||
|
||||
class FundData(EntityData):
|
||||
"""
|
||||
Fund-specific data container.
|
||||
|
||||
Contains specialized properties and methods for fund entities.
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.series_id = kwargs.get('series_id')
|
||||
self.class_ids = kwargs.get('class_ids', [])
|
||||
self._fund_classes = kwargs.get('fund_classes', [])
|
||||
|
||||
@property
|
||||
def is_fund(self) -> bool:
|
||||
"""Check if entity is a fund."""
|
||||
return True
|
||||
|
||||
|
||||
def resolve_fund_identifier(identifier):
|
||||
"""
|
||||
Convert fund tickers or series IDs to CIK.
|
||||
|
||||
Args:
|
||||
identifier: Fund ticker, Series ID, or CIK
|
||||
|
||||
Returns:
|
||||
CIK as integer or original identifier if conversion not possible
|
||||
"""
|
||||
if isinstance(identifier, str):
|
||||
# Handle Series ID (S000XXXXX)
|
||||
if identifier.startswith('S') and identifier[1:].isdigit():
|
||||
try:
|
||||
# Try our direct implementation
|
||||
fund_info = direct_get_fund_with_filings(identifier)
|
||||
if fund_info and hasattr(fund_info, 'fund_cik'):
|
||||
return int(fund_info.fund_cik)
|
||||
except Exception as e:
|
||||
log.warning("Error resolving series ID %s: %s", identifier, e)
|
||||
|
||||
# Handle Class ID (C000XXXXX)
|
||||
if identifier.startswith('C') and identifier[1:].isdigit():
|
||||
try:
|
||||
# Try our direct implementation
|
||||
fund_info = direct_get_fund_with_filings(identifier)
|
||||
if fund_info and hasattr(fund_info, 'fund_cik'):
|
||||
return int(fund_info.fund_cik)
|
||||
except Exception as e:
|
||||
log.warning("Error resolving class ID %s: %s", identifier, e)
|
||||
|
||||
# Handle fund ticker
|
||||
if is_fund_ticker(identifier):
|
||||
try:
|
||||
# Use our direct implementation for tickers
|
||||
fund_info = (identifier)
|
||||
if fund_info and hasattr(fund_info, 'company_cik'):
|
||||
return int(fund_info.company_cik)
|
||||
except Exception as e:
|
||||
log.warning("Error resolving fund ticker %s: %s", identifier, e)
|
||||
|
||||
return identifier
|
||||
|
||||
|
||||
def get_fund_information(header):
|
||||
"""
|
||||
Extract fund information from a filing header.
|
||||
|
||||
Args:
|
||||
header: Filing header
|
||||
|
||||
Returns:
|
||||
Fund series and contract information
|
||||
"""
|
||||
# Import FundSeriesAndContracts here to avoid circular imports
|
||||
from edgar.funds import FundSeriesAndContracts
|
||||
|
||||
if not header or not hasattr(header, 'text'):
|
||||
return FundSeriesAndContracts()
|
||||
|
||||
try:
|
||||
# Try our direct implementation first
|
||||
header_text = header.text
|
||||
series_and_classes_contracts_text = re.search(
|
||||
r'<SERIES-AND-CLASSES-CONTRACTS-DATA>(.*?)</SERIES-AND-CLASSES-CONTRACTS-DATA>',
|
||||
header_text,
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
if series_and_classes_contracts_text:
|
||||
# Use our directly implemented parse_fund_data
|
||||
df = parse_fund_data(series_and_classes_contracts_text.group(1))
|
||||
return FundSeriesAndContracts(df)
|
||||
|
||||
except Exception as e:
|
||||
log.debug("Error parsing fund information directly: %s", e)
|
||||
|
||||
# Fallback implementation - extract fund information from header directly using regex
|
||||
try:
|
||||
# Try to extract fund information from the header text with regex
|
||||
if header and hasattr(header, 'text'):
|
||||
# Look for SERIES-ID and CONTRACT-ID in the header
|
||||
series_matches = re.findall(r'SERIES-ID[^>]*>([^<]+)', str(header.text))
|
||||
contract_matches = re.findall(r'CONTRACT-ID[^>]*>([^<]+)', str(header.text))
|
||||
name_matches = re.findall(r'FILER[^>]*>.*?COMPANY-DATA[^>]*>.*?CONFORMED-NAME[^>]*>([^<]+)', str(header.text))
|
||||
ticker_matches = re.findall(r'TICKER-SYMBOL[^>]*>([^<]+)', str(header.text))
|
||||
|
||||
# If we found any matches, create a DataFrame with the information
|
||||
if series_matches or contract_matches:
|
||||
data = []
|
||||
# Join series and contract IDs as rows
|
||||
for i in range(max(len(series_matches), len(contract_matches))):
|
||||
series_id = series_matches[i] if i < len(series_matches) else None
|
||||
contract_id = contract_matches[i] if i < len(contract_matches) else None
|
||||
fund_name = name_matches[0] if name_matches else None
|
||||
ticker = ticker_matches[0] if ticker_matches else None
|
||||
|
||||
data.append({
|
||||
'SeriesID': series_id,
|
||||
'ContractID': contract_id,
|
||||
'Fund': fund_name,
|
||||
'Ticker': ticker,
|
||||
'Class': f"Class {contract_id[-1].upper()}" if contract_id else None
|
||||
})
|
||||
|
||||
if data:
|
||||
return FundSeriesAndContracts(pd.DataFrame(data))
|
||||
|
||||
except Exception as e:
|
||||
log.warning("Error in fallback get_fund_information: %s", e)
|
||||
|
||||
# Return an empty container if everything else fails
|
||||
return FundSeriesAndContracts()
|
||||
|
||||
|
||||
def parse_series_and_classes_from_html(html_content: str, cik:str) -> List[Dict]:
|
||||
"""
|
||||
Parse series and class information from the SEC series listing HTML page.
|
||||
|
||||
This parses HTML content from the URL https://www.sec.gov/cgi-bin/browse-edgar?CIK=XXXX&scd=series
|
||||
which contains a structured listing of all series and classes for a fund company.
|
||||
|
||||
Args:
|
||||
html_content: HTML content from the SEC webpage
|
||||
fund: Fund entity to associate with the series/classes
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing series and class information
|
||||
"""
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
series_data = []
|
||||
|
||||
# Debug information
|
||||
log.debug("Parsing series HTML content for fund %s", cik)
|
||||
|
||||
# The table structure in this specific page has series and classes
|
||||
# organized in a specific way with indentation levels
|
||||
try:
|
||||
# Find the main table - in Kinetics HTML, it's the main table in the content area
|
||||
tables = soup.find_all('table')
|
||||
|
||||
# Find the table that's likely to contain the series information
|
||||
# In SEC pages, it's typically the one with class/contract and series information
|
||||
table = None
|
||||
for t in tables:
|
||||
# Look for rows with series or class info
|
||||
if t.find('tr') and re.search(r'Series|Class/Contract', str(t)):
|
||||
table = t
|
||||
break
|
||||
|
||||
if not table:
|
||||
log.warning("No suitable table found in series HTML content")
|
||||
return []
|
||||
|
||||
current_series = None
|
||||
series_data = []
|
||||
|
||||
# Loop through all rows and process them
|
||||
rows = table.find_all('tr')
|
||||
|
||||
# Debug information
|
||||
log.debug("Found %d rows in the table", len(rows))
|
||||
|
||||
# Process all rows since the table structure might vary
|
||||
for _row_idx, row in enumerate(rows):
|
||||
cells = row.find_all('td')
|
||||
if not cells or len(cells) < 3:
|
||||
continue
|
||||
|
||||
# Check if this is a series row - marked by an S000 ID in a cell with a link
|
||||
series_cell = None
|
||||
series_id = None
|
||||
series_name = None
|
||||
|
||||
# Series IDs are normally in the form S######
|
||||
for cell in cells:
|
||||
# Look for <a> tags with S IDs
|
||||
links = cell.find_all('a', href=True)
|
||||
for link in links:
|
||||
if re.search(r'S\d{6,}', link.text):
|
||||
series_id = re.search(r'S\d{6,}', link.text).group(0)
|
||||
series_cell = cell
|
||||
break
|
||||
if series_cell:
|
||||
break
|
||||
|
||||
# If we found a series ID, extract its name and create a series entry
|
||||
if series_id:
|
||||
# Try to find the series name in the next cell or in the same row
|
||||
series_name = None
|
||||
for cell in cells:
|
||||
# Look for a cell with a link that's not the series ID
|
||||
if cell != series_cell and cell.find('a'):
|
||||
# Check if the link text doesn't match the series ID - it's likely the name
|
||||
link_text = cell.find('a').text.strip()
|
||||
if link_text and series_id not in link_text:
|
||||
series_name = link_text
|
||||
break
|
||||
|
||||
# If we couldn't find a name, use a default
|
||||
if not series_name:
|
||||
series_name = f"Series {series_id}"
|
||||
|
||||
# Create a new series entry
|
||||
current_series = {
|
||||
'series_id': series_id,
|
||||
'series_name': series_name,
|
||||
'classes': []
|
||||
}
|
||||
series_data.append(current_series)
|
||||
log.debug("Found series: %s - %s", series_id, series_name)
|
||||
|
||||
# Check if this row contains a class - marked by a C000 ID
|
||||
# Classes appear after a series and are indented
|
||||
elif current_series:
|
||||
class_id = None
|
||||
class_name = None
|
||||
class_ticker = ""
|
||||
|
||||
# Look for class IDs in the form C######
|
||||
for cell in cells:
|
||||
# Search for C IDs in links
|
||||
links = cell.find_all('a', href=True)
|
||||
for link in links:
|
||||
if re.search(r'C\d{6,}', link.text):
|
||||
class_id = re.search(r'C\d{6,}', link.text).group(0)
|
||||
break
|
||||
if class_id:
|
||||
break
|
||||
|
||||
if class_id:
|
||||
# Find the class name - usually in a cell after the ID
|
||||
for cell_idx, cell in enumerate(cells):
|
||||
if class_id in str(cell) and cell_idx + 1 < len(cells):
|
||||
# Class name is often in the next cell
|
||||
class_name = cells[cell_idx + 1].text.strip()
|
||||
break
|
||||
|
||||
parts = class_name.split("\n")
|
||||
class_name = parts[1]
|
||||
if len(parts) > 2:
|
||||
class_ticker = parts[2].strip()
|
||||
|
||||
# If we couldn't find a name, use a default
|
||||
if not class_name:
|
||||
class_name = f"Class {class_id}"
|
||||
|
||||
# Add this class to the current series
|
||||
current_series['classes'].append({
|
||||
'class_id': class_id,
|
||||
'class_name': class_name,
|
||||
'ticker': class_ticker
|
||||
})
|
||||
log.debug("Found class: %s - %s (%s)", class_id, class_name, class_ticker)
|
||||
|
||||
# Debug information
|
||||
log.debug("Found %d series with classes", len(series_data))
|
||||
|
||||
except Exception as e:
|
||||
log.warning("Error parsing series HTML: %s", e)
|
||||
import traceback
|
||||
log.debug(traceback.format_exc())
|
||||
|
||||
return series_data
|
||||
|
||||
|
||||
def get_series_and_classes_from_sec(cik: Union[str, int]) -> List[Dict]:
|
||||
"""
|
||||
Directly fetch and parse series and class information from the SEC website.
|
||||
|
||||
This uses the SEC's series listing page which provides a comprehensive view
|
||||
of all series and classes for a fund company.
|
||||
|
||||
Args:
|
||||
cik: The CIK of the fund company
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing parsed series and class information
|
||||
"""
|
||||
|
||||
# Format CIK properly for URL
|
||||
cik_str = str(cik).zfill(10)
|
||||
url = fund_series_direct_url.format(cik_str)
|
||||
|
||||
# Download the HTML content
|
||||
html_content = download_text(url)
|
||||
|
||||
# Check if we received valid content
|
||||
if 'No matching' in html_content or 'series for cik' not in html_content.lower():
|
||||
log.debug("No series information found for CIK %s", cik)
|
||||
return []
|
||||
|
||||
return parse_series_and_classes_from_html(html_content, cik)
|
||||
82
venv/lib/python3.10/site-packages/edgar/funds/examples.py
Normal file
82
venv/lib/python3.10/site-packages/edgar/funds/examples.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
Examples of using the new fund entity API.
|
||||
|
||||
This module demonstrates how to use the improved fund entity API
|
||||
to work with fund companies, series, and classes.
|
||||
"""
|
||||
|
||||
from edgar.funds import (
|
||||
find_fund,
|
||||
get_fund_class,
|
||||
get_fund_company,
|
||||
get_fund_series,
|
||||
)
|
||||
|
||||
|
||||
def demonstrate_find_fund():
|
||||
"""Demonstrate the smart finder function."""
|
||||
|
||||
# Find a fund company by CIK
|
||||
find_fund("0001048636") # T. Rowe Price
|
||||
|
||||
# Find a fund series by series ID
|
||||
find_fund("S000005029") # Kinetics Internet Fund
|
||||
|
||||
# Find a fund class by class ID
|
||||
find_fund("C000013712") # Kinetics Internet Fund Advisor Class C
|
||||
|
||||
# Find a fund class by ticker
|
||||
find_fund("KINCX") # Kinetics Internet Fund Advisor Class C
|
||||
|
||||
|
||||
def demonstrate_specialized_getters():
|
||||
"""Demonstrate the specialized getter functions."""
|
||||
|
||||
# Get a fund company
|
||||
get_fund_company("0001048636") # T. Rowe Price
|
||||
|
||||
# Get a fund series
|
||||
get_fund_series("S000005029") # Kinetics Internet Fund
|
||||
|
||||
# Get a fund class by ID
|
||||
get_fund_class("C000013712") # Kinetics Internet Fund Advisor Class C
|
||||
|
||||
# Get a fund class by ticker
|
||||
get_fund_class("KINCX") # Should be the same as above
|
||||
|
||||
|
||||
def demonstrate_entity_navigation():
|
||||
"""Demonstrate navigation between related entities."""
|
||||
|
||||
# Start with a fund class
|
||||
fund_class = get_fund_class("KINCX")
|
||||
|
||||
# Navigate to its series
|
||||
series = fund_class.series
|
||||
|
||||
# Navigate to the fund company
|
||||
company = fund_class.series.fund_company
|
||||
|
||||
# Get all series for the company
|
||||
all_series = company.all_series
|
||||
for _s in all_series[:3]: # Show first 3
|
||||
pass
|
||||
|
||||
# Get all classes for a series
|
||||
if series:
|
||||
series_classes = series.get_classes()
|
||||
for _c in series_classes:
|
||||
pass
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to run all demonstrations."""
|
||||
|
||||
demonstrate_find_fund()
|
||||
demonstrate_specialized_getters()
|
||||
demonstrate_entity_navigation()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
Fund data models.
|
||||
|
||||
This package contains all the data models used for fund reporting,
|
||||
separated by functional area for better maintainability.
|
||||
"""
|
||||
|
||||
# Import all derivative models for convenience
|
||||
from edgar.funds.models.derivatives import (
|
||||
DerivativeInfo,
|
||||
ForwardDerivative,
|
||||
FutureDerivative,
|
||||
OptionDerivative,
|
||||
SwapDerivative,
|
||||
SwaptionDerivative,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Derivative models
|
||||
'DerivativeInfo',
|
||||
'ForwardDerivative',
|
||||
'SwapDerivative',
|
||||
'FutureDerivative',
|
||||
'SwaptionDerivative',
|
||||
'OptionDerivative',
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,740 @@
|
||||
"""
|
||||
Derivative instrument models for fund portfolio reporting.
|
||||
|
||||
This module contains all the data models for different types of derivative
|
||||
instruments found in fund portfolios, including forwards, swaps, futures,
|
||||
options, and swaptions.
|
||||
"""
|
||||
from decimal import Decimal
|
||||
from typing import Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from edgar.xmltools import child_text, optional_decimal
|
||||
|
||||
|
||||
def optional_decimal_attr(element, attr_name):
|
||||
"""Helper function to parse optional decimal attributes from XML elements"""
|
||||
if element is None:
|
||||
return None
|
||||
|
||||
attr_value = element.attrs.get(attr_name)
|
||||
if not attr_value or attr_value == "N/A":
|
||||
return None
|
||||
|
||||
try:
|
||||
return Decimal(attr_value)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
class ForwardDerivative(BaseModel):
|
||||
counterparty_name: Optional[str]
|
||||
counterparty_lei: Optional[str]
|
||||
currency_sold: Optional[str]
|
||||
amount_sold: Optional[Decimal]
|
||||
currency_purchased: Optional[str]
|
||||
amount_purchased: Optional[Decimal]
|
||||
settlement_date: Optional[str]
|
||||
unrealized_appreciation: Optional[Decimal]
|
||||
|
||||
# Additional info from derivAddlInfo (when nested)
|
||||
deriv_addl_name: Optional[str]
|
||||
deriv_addl_lei: Optional[str]
|
||||
deriv_addl_title: Optional[str]
|
||||
deriv_addl_cusip: Optional[str]
|
||||
deriv_addl_identifier: Optional[str]
|
||||
deriv_addl_identifier_type: Optional[str]
|
||||
deriv_addl_balance: Optional[Decimal]
|
||||
deriv_addl_units: Optional[str]
|
||||
deriv_addl_currency: Optional[str]
|
||||
deriv_addl_value_usd: Optional[Decimal]
|
||||
deriv_addl_pct_val: Optional[Decimal]
|
||||
deriv_addl_asset_cat: Optional[str]
|
||||
deriv_addl_issuer_cat: Optional[str]
|
||||
deriv_addl_inv_country: Optional[str]
|
||||
|
||||
@classmethod
|
||||
def from_xml(cls, tag):
|
||||
if tag and tag.name == "fwdDeriv":
|
||||
counterparties = tag.find("counterparties")
|
||||
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
|
||||
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
|
||||
|
||||
# Check for derivAddlInfo (when nested in options)
|
||||
deriv_addl_name = None
|
||||
deriv_addl_lei = None
|
||||
deriv_addl_title = None
|
||||
deriv_addl_cusip = None
|
||||
deriv_addl_identifier = None
|
||||
deriv_addl_identifier_type = None
|
||||
deriv_addl_balance = None
|
||||
deriv_addl_units = None
|
||||
deriv_addl_currency = None
|
||||
deriv_addl_value_usd = None
|
||||
deriv_addl_pct_val = None
|
||||
deriv_addl_asset_cat = None
|
||||
deriv_addl_issuer_cat = None
|
||||
deriv_addl_inv_country = None
|
||||
|
||||
deriv_addl_info = tag.find("derivAddlInfo")
|
||||
if deriv_addl_info:
|
||||
deriv_addl_name = child_text(deriv_addl_info, "name")
|
||||
deriv_addl_lei = child_text(deriv_addl_info, "lei")
|
||||
deriv_addl_title = child_text(deriv_addl_info, "title")
|
||||
deriv_addl_cusip = child_text(deriv_addl_info, "cusip")
|
||||
deriv_addl_balance = optional_decimal(deriv_addl_info, "balance")
|
||||
deriv_addl_units = child_text(deriv_addl_info, "units")
|
||||
deriv_addl_currency = child_text(deriv_addl_info, "curCd")
|
||||
deriv_addl_value_usd = optional_decimal(deriv_addl_info, "valUSD")
|
||||
deriv_addl_pct_val = optional_decimal(deriv_addl_info, "pctVal")
|
||||
deriv_addl_asset_cat = child_text(deriv_addl_info, "assetCat")
|
||||
deriv_addl_inv_country = child_text(deriv_addl_info, "invCountry")
|
||||
|
||||
# Parse issuer conditional
|
||||
issuer_cond = deriv_addl_info.find("issuerConditional")
|
||||
if issuer_cond:
|
||||
deriv_addl_issuer_cat = issuer_cond.attrs.get("issuerCat")
|
||||
|
||||
# Parse identifiers
|
||||
identifiers = deriv_addl_info.find("identifiers")
|
||||
if identifiers:
|
||||
other_tag = identifiers.find("other")
|
||||
if other_tag:
|
||||
deriv_addl_identifier = other_tag.attrs.get("value")
|
||||
deriv_addl_identifier_type = other_tag.attrs.get("otherDesc")
|
||||
|
||||
return cls(
|
||||
counterparty_name=counterparty_name,
|
||||
counterparty_lei=counterparty_lei,
|
||||
currency_sold=child_text(tag, "curSold"),
|
||||
amount_sold=optional_decimal(tag, "amtCurSold"),
|
||||
currency_purchased=child_text(tag, "curPur"),
|
||||
amount_purchased=optional_decimal(tag, "amtCurPur"),
|
||||
settlement_date=child_text(tag, "settlementDt"),
|
||||
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
|
||||
|
||||
# Additional info from derivAddlInfo
|
||||
deriv_addl_name=deriv_addl_name,
|
||||
deriv_addl_lei=deriv_addl_lei,
|
||||
deriv_addl_title=deriv_addl_title,
|
||||
deriv_addl_cusip=deriv_addl_cusip,
|
||||
deriv_addl_identifier=deriv_addl_identifier,
|
||||
deriv_addl_identifier_type=deriv_addl_identifier_type,
|
||||
deriv_addl_balance=deriv_addl_balance,
|
||||
deriv_addl_units=deriv_addl_units,
|
||||
deriv_addl_currency=deriv_addl_currency,
|
||||
deriv_addl_value_usd=deriv_addl_value_usd,
|
||||
deriv_addl_pct_val=deriv_addl_pct_val,
|
||||
deriv_addl_asset_cat=deriv_addl_asset_cat,
|
||||
deriv_addl_issuer_cat=deriv_addl_issuer_cat,
|
||||
deriv_addl_inv_country=deriv_addl_inv_country
|
||||
)
|
||||
|
||||
|
||||
class SwapDerivative(BaseModel):
|
||||
# Basic derivative info
|
||||
counterparty_name: Optional[str]
|
||||
counterparty_lei: Optional[str]
|
||||
notional_amount: Optional[Decimal]
|
||||
currency: Optional[str]
|
||||
unrealized_appreciation: Optional[Decimal]
|
||||
termination_date: Optional[str]
|
||||
upfront_payment: Optional[Decimal]
|
||||
payment_currency: Optional[str]
|
||||
upfront_receipt: Optional[Decimal]
|
||||
receipt_currency: Optional[str]
|
||||
reference_entity_name: Optional[str]
|
||||
reference_entity_title: Optional[str]
|
||||
reference_entity_cusip: Optional[str]
|
||||
reference_entity_isin: Optional[str]
|
||||
reference_entity_ticker: Optional[str]
|
||||
swap_flag: Optional[str]
|
||||
|
||||
# Additional info from derivAddlInfo (when nested)
|
||||
deriv_addl_name: Optional[str]
|
||||
deriv_addl_lei: Optional[str]
|
||||
deriv_addl_title: Optional[str]
|
||||
deriv_addl_cusip: Optional[str]
|
||||
deriv_addl_identifier: Optional[str]
|
||||
deriv_addl_identifier_type: Optional[str]
|
||||
deriv_addl_balance: Optional[Decimal]
|
||||
deriv_addl_units: Optional[str]
|
||||
deriv_addl_desc_units: Optional[str]
|
||||
deriv_addl_currency: Optional[str]
|
||||
deriv_addl_value_usd: Optional[Decimal]
|
||||
deriv_addl_pct_val: Optional[Decimal]
|
||||
deriv_addl_asset_cat: Optional[str]
|
||||
deriv_addl_issuer_cat: Optional[str]
|
||||
deriv_addl_inv_country: Optional[str]
|
||||
|
||||
# DIRECTIONAL RECEIVE LEG (what we receive)
|
||||
fixed_rate_receive: Optional[Decimal]
|
||||
fixed_amount_receive: Optional[Decimal]
|
||||
fixed_currency_receive: Optional[str]
|
||||
floating_index_receive: Optional[str]
|
||||
floating_spread_receive: Optional[Decimal]
|
||||
floating_amount_receive: Optional[Decimal]
|
||||
floating_currency_receive: Optional[str]
|
||||
floating_tenor_receive: Optional[str]
|
||||
floating_tenor_unit_receive: Optional[str]
|
||||
floating_reset_date_tenor_receive: Optional[str]
|
||||
floating_reset_date_unit_receive: Optional[str]
|
||||
other_description_receive: Optional[str]
|
||||
other_type_receive: Optional[str] # fixedOrFloating attribute
|
||||
|
||||
# Additional upfront payment/receipt info
|
||||
upfront_payment: Optional[Decimal]
|
||||
payment_currency: Optional[str]
|
||||
upfront_receipt: Optional[Decimal]
|
||||
receipt_currency: Optional[str]
|
||||
|
||||
# DIRECTIONAL PAYMENT LEG (what we pay)
|
||||
fixed_rate_pay: Optional[Decimal]
|
||||
fixed_amount_pay: Optional[Decimal]
|
||||
fixed_currency_pay: Optional[str]
|
||||
floating_index_pay: Optional[str]
|
||||
floating_spread_pay: Optional[Decimal]
|
||||
floating_amount_pay: Optional[Decimal]
|
||||
floating_currency_pay: Optional[str]
|
||||
floating_tenor_pay: Optional[str]
|
||||
floating_tenor_unit_pay: Optional[str]
|
||||
floating_reset_date_tenor_pay: Optional[str]
|
||||
floating_reset_date_unit_pay: Optional[str]
|
||||
other_description_pay: Optional[str]
|
||||
other_type_pay: Optional[str] # fixedOrFloating attribute
|
||||
|
||||
@classmethod
|
||||
def from_xml(cls, tag):
|
||||
if tag and tag.name == "swapDeriv":
|
||||
# Basic counterparty and reference info
|
||||
counterparties = tag.find("counterparties")
|
||||
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
|
||||
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
|
||||
|
||||
# Check for derivAddlInfo (when nested in swaptions)
|
||||
deriv_addl_name = None
|
||||
deriv_addl_lei = None
|
||||
deriv_addl_title = None
|
||||
deriv_addl_cusip = None
|
||||
deriv_addl_identifier = None
|
||||
deriv_addl_identifier_type = None
|
||||
deriv_addl_balance = None
|
||||
deriv_addl_units = None
|
||||
deriv_addl_desc_units = None
|
||||
deriv_addl_currency = None
|
||||
deriv_addl_value_usd = None
|
||||
deriv_addl_pct_val = None
|
||||
deriv_addl_asset_cat = None
|
||||
deriv_addl_issuer_cat = None
|
||||
deriv_addl_inv_country = None
|
||||
|
||||
deriv_addl_info = tag.find("derivAddlInfo")
|
||||
if deriv_addl_info:
|
||||
deriv_addl_name = child_text(deriv_addl_info, "name")
|
||||
deriv_addl_lei = child_text(deriv_addl_info, "lei")
|
||||
deriv_addl_title = child_text(deriv_addl_info, "title")
|
||||
deriv_addl_cusip = child_text(deriv_addl_info, "cusip")
|
||||
deriv_addl_balance = optional_decimal(deriv_addl_info, "balance")
|
||||
deriv_addl_units = child_text(deriv_addl_info, "units")
|
||||
deriv_addl_desc_units = child_text(deriv_addl_info, "descOthUnits")
|
||||
deriv_addl_currency = child_text(deriv_addl_info, "curCd")
|
||||
deriv_addl_value_usd = optional_decimal(deriv_addl_info, "valUSD")
|
||||
deriv_addl_pct_val = optional_decimal(deriv_addl_info, "pctVal")
|
||||
deriv_addl_asset_cat = child_text(deriv_addl_info, "assetCat")
|
||||
deriv_addl_inv_country = child_text(deriv_addl_info, "invCountry")
|
||||
|
||||
# Parse issuer conditional
|
||||
issuer_cond = deriv_addl_info.find("issuerConditional")
|
||||
if issuer_cond:
|
||||
deriv_addl_issuer_cat = issuer_cond.attrs.get("issuerCat")
|
||||
|
||||
# Parse identifiers
|
||||
identifiers = deriv_addl_info.find("identifiers")
|
||||
if identifiers:
|
||||
other_tag = identifiers.find("other")
|
||||
if other_tag:
|
||||
deriv_addl_identifier = other_tag.attrs.get("value")
|
||||
deriv_addl_identifier_type = other_tag.attrs.get("otherDesc")
|
||||
|
||||
# Get reference instrument info (for CDS)
|
||||
ref_entity_name = None
|
||||
ref_entity_title = None
|
||||
ref_entity_cusip = None
|
||||
ref_entity_isin = None
|
||||
ref_entity_ticker = None
|
||||
desc_ref = tag.find("descRefInstrmnt")
|
||||
if desc_ref:
|
||||
other_ref = desc_ref.find("otherRefInst")
|
||||
if other_ref:
|
||||
ref_entity_name = child_text(other_ref, "issuerName")
|
||||
ref_entity_title = child_text(other_ref, "issueTitle")
|
||||
identifiers = other_ref.find("identifiers")
|
||||
if identifiers:
|
||||
cusip_tag = identifiers.find("cusip")
|
||||
if cusip_tag:
|
||||
ref_entity_cusip = cusip_tag.attrs.get("value")
|
||||
isin_tag = identifiers.find("isin")
|
||||
if isin_tag:
|
||||
ref_entity_isin = isin_tag.attrs.get("value")
|
||||
ticker_tag = identifiers.find("ticker")
|
||||
if ticker_tag:
|
||||
ref_entity_ticker = ticker_tag.attrs.get("value")
|
||||
|
||||
# DIRECTIONAL RECEIVE LEG PARSING
|
||||
fixed_rec_desc = tag.find("fixedRecDesc")
|
||||
floating_rec_desc = tag.find("floatingRecDesc")
|
||||
other_rec_desc = tag.find("otherRecDesc")
|
||||
|
||||
# Fixed receive leg
|
||||
fixed_rate_receive = None
|
||||
fixed_amount_receive = None
|
||||
fixed_currency_receive = None
|
||||
if fixed_rec_desc:
|
||||
fixed_rate_receive = optional_decimal_attr(fixed_rec_desc, "fixedRt")
|
||||
fixed_amount_receive = optional_decimal_attr(fixed_rec_desc, "amount")
|
||||
fixed_currency_receive = fixed_rec_desc.attrs.get("curCd")
|
||||
|
||||
# Floating receive leg
|
||||
floating_index_receive = None
|
||||
floating_spread_receive = None
|
||||
floating_amount_receive = None
|
||||
floating_currency_receive = None
|
||||
floating_tenor_receive = None
|
||||
floating_tenor_unit_receive = None
|
||||
floating_reset_date_tenor_receive = None
|
||||
floating_reset_date_unit_receive = None
|
||||
if floating_rec_desc:
|
||||
floating_index_receive = floating_rec_desc.attrs.get("floatingRtIndex")
|
||||
floating_spread_receive = optional_decimal_attr(floating_rec_desc, "floatingRtSpread")
|
||||
floating_amount_receive = optional_decimal_attr(floating_rec_desc, "pmntAmt")
|
||||
floating_currency_receive = floating_rec_desc.attrs.get("curCd")
|
||||
|
||||
# Rate reset tenors for receive leg
|
||||
rate_reset_tenors = floating_rec_desc.find("rtResetTenors")
|
||||
if rate_reset_tenors:
|
||||
rate_reset_tenor = rate_reset_tenors.find("rtResetTenor")
|
||||
if rate_reset_tenor:
|
||||
floating_tenor_receive = rate_reset_tenor.attrs.get("rateTenor")
|
||||
floating_tenor_unit_receive = rate_reset_tenor.attrs.get("rateTenorUnit")
|
||||
floating_reset_date_tenor_receive = rate_reset_tenor.attrs.get("resetDt")
|
||||
floating_reset_date_unit_receive = rate_reset_tenor.attrs.get("resetDtUnit")
|
||||
|
||||
# Other receive leg
|
||||
other_description_receive = None
|
||||
other_type_receive = None
|
||||
if other_rec_desc:
|
||||
other_type_receive = other_rec_desc.attrs.get("fixedOrFloating")
|
||||
if other_type_receive == "Other":
|
||||
other_description_receive = other_rec_desc.text
|
||||
else:
|
||||
other_description_receive = other_type_receive
|
||||
|
||||
# DIRECTIONAL PAYMENT LEG PARSING
|
||||
fixed_pmnt_desc = tag.find("fixedPmntDesc")
|
||||
floating_pmnt_desc = tag.find("floatingPmntDesc")
|
||||
other_pmnt_desc = tag.find("otherPmntDesc")
|
||||
|
||||
# Fixed payment leg
|
||||
fixed_rate_pay = None
|
||||
fixed_amount_pay = None
|
||||
fixed_currency_pay = None
|
||||
if fixed_pmnt_desc:
|
||||
fixed_rate_pay = optional_decimal_attr(fixed_pmnt_desc, "fixedRt")
|
||||
fixed_amount_pay = optional_decimal_attr(fixed_pmnt_desc, "amount")
|
||||
fixed_currency_pay = fixed_pmnt_desc.attrs.get("curCd")
|
||||
|
||||
# Floating payment leg
|
||||
floating_index_pay = None
|
||||
floating_spread_pay = None
|
||||
floating_amount_pay = None
|
||||
floating_currency_pay = None
|
||||
floating_tenor_pay = None
|
||||
floating_tenor_unit_pay = None
|
||||
floating_reset_date_tenor_pay = None
|
||||
floating_reset_date_unit_pay = None
|
||||
if floating_pmnt_desc:
|
||||
floating_index_pay = floating_pmnt_desc.attrs.get("floatingRtIndex")
|
||||
floating_spread_pay = optional_decimal_attr(floating_pmnt_desc, "floatingRtSpread")
|
||||
floating_amount_pay = optional_decimal_attr(floating_pmnt_desc, "pmntAmt")
|
||||
floating_currency_pay = floating_pmnt_desc.attrs.get("curCd")
|
||||
|
||||
# Rate reset tenors for payment leg
|
||||
rate_reset_tenors = floating_pmnt_desc.find("rtResetTenors")
|
||||
if rate_reset_tenors:
|
||||
rate_reset_tenor = rate_reset_tenors.find("rtResetTenor")
|
||||
if rate_reset_tenor:
|
||||
floating_tenor_pay = rate_reset_tenor.attrs.get("rateTenor")
|
||||
floating_tenor_unit_pay = rate_reset_tenor.attrs.get("rateTenorUnit")
|
||||
floating_reset_date_tenor_pay = rate_reset_tenor.attrs.get("resetDt")
|
||||
floating_reset_date_unit_pay = rate_reset_tenor.attrs.get("resetDtUnit")
|
||||
|
||||
# Other payment leg
|
||||
other_description_pay = None
|
||||
other_type_pay = None
|
||||
if other_pmnt_desc:
|
||||
other_type_pay = other_pmnt_desc.attrs.get("fixedOrFloating")
|
||||
if other_type_pay == "Other":
|
||||
other_description_pay = other_pmnt_desc.text
|
||||
else:
|
||||
other_description_pay = other_type_pay
|
||||
|
||||
return cls(
|
||||
# Basic info
|
||||
counterparty_name=counterparty_name,
|
||||
counterparty_lei=counterparty_lei,
|
||||
notional_amount=optional_decimal(tag, "notionalAmt"),
|
||||
currency=child_text(tag, "curCd"),
|
||||
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
|
||||
termination_date=child_text(tag, "terminationDt"),
|
||||
# Upfront payment/receipt info
|
||||
upfront_payment=optional_decimal(tag, "upfrontPmnt"),
|
||||
payment_currency=child_text(tag, "pmntCurCd"),
|
||||
upfront_receipt=optional_decimal(tag, "upfrontRcpt"),
|
||||
receipt_currency=child_text(tag, "rcptCurCd"),
|
||||
reference_entity_name=ref_entity_name,
|
||||
reference_entity_title=ref_entity_title,
|
||||
reference_entity_cusip=ref_entity_cusip,
|
||||
reference_entity_isin=ref_entity_isin,
|
||||
reference_entity_ticker=ref_entity_ticker,
|
||||
swap_flag=child_text(tag, "swapFlag"),
|
||||
|
||||
# Additional info from derivAddlInfo
|
||||
deriv_addl_name=deriv_addl_name,
|
||||
deriv_addl_lei=deriv_addl_lei,
|
||||
deriv_addl_title=deriv_addl_title,
|
||||
deriv_addl_cusip=deriv_addl_cusip,
|
||||
deriv_addl_identifier=deriv_addl_identifier,
|
||||
deriv_addl_identifier_type=deriv_addl_identifier_type,
|
||||
deriv_addl_balance=deriv_addl_balance,
|
||||
deriv_addl_units=deriv_addl_units,
|
||||
deriv_addl_desc_units=deriv_addl_desc_units,
|
||||
deriv_addl_currency=deriv_addl_currency,
|
||||
deriv_addl_value_usd=deriv_addl_value_usd,
|
||||
deriv_addl_pct_val=deriv_addl_pct_val,
|
||||
deriv_addl_asset_cat=deriv_addl_asset_cat,
|
||||
deriv_addl_issuer_cat=deriv_addl_issuer_cat,
|
||||
deriv_addl_inv_country=deriv_addl_inv_country,
|
||||
|
||||
# RECEIVE LEG
|
||||
fixed_rate_receive=fixed_rate_receive,
|
||||
fixed_amount_receive=fixed_amount_receive,
|
||||
fixed_currency_receive=fixed_currency_receive,
|
||||
floating_index_receive=floating_index_receive,
|
||||
floating_spread_receive=floating_spread_receive,
|
||||
floating_amount_receive=floating_amount_receive,
|
||||
floating_currency_receive=floating_currency_receive,
|
||||
floating_tenor_receive=floating_tenor_receive,
|
||||
floating_tenor_unit_receive=floating_tenor_unit_receive,
|
||||
floating_reset_date_tenor_receive=floating_reset_date_tenor_receive,
|
||||
floating_reset_date_unit_receive=floating_reset_date_unit_receive,
|
||||
other_description_receive=other_description_receive,
|
||||
other_type_receive=other_type_receive,
|
||||
|
||||
# PAYMENT LEG
|
||||
fixed_rate_pay=fixed_rate_pay,
|
||||
fixed_amount_pay=fixed_amount_pay,
|
||||
fixed_currency_pay=fixed_currency_pay,
|
||||
floating_index_pay=floating_index_pay,
|
||||
floating_spread_pay=floating_spread_pay,
|
||||
floating_amount_pay=floating_amount_pay,
|
||||
floating_currency_pay=floating_currency_pay,
|
||||
floating_tenor_pay=floating_tenor_pay,
|
||||
floating_tenor_unit_pay=floating_tenor_unit_pay,
|
||||
floating_reset_date_tenor_pay=floating_reset_date_tenor_pay,
|
||||
floating_reset_date_unit_pay=floating_reset_date_unit_pay,
|
||||
other_description_pay=other_description_pay,
|
||||
other_type_pay=other_type_pay
|
||||
)
|
||||
|
||||
|
||||
class FutureDerivative(BaseModel):
|
||||
counterparty_name: Optional[str]
|
||||
counterparty_lei: Optional[str]
|
||||
payoff_profile: Optional[str]
|
||||
expiration_date: Optional[str]
|
||||
notional_amount: Optional[Decimal]
|
||||
currency: Optional[str]
|
||||
unrealized_appreciation: Optional[Decimal]
|
||||
reference_entity_name: Optional[str]
|
||||
reference_entity_title: Optional[str]
|
||||
# Identifiers
|
||||
reference_entity_cusip: Optional[str]
|
||||
reference_entity_isin: Optional[str]
|
||||
reference_entity_ticker: Optional[str]
|
||||
reference_entity_other_id: Optional[str]
|
||||
reference_entity_other_id_type: Optional[str]
|
||||
|
||||
@classmethod
|
||||
def from_xml(cls, tag):
|
||||
if tag and tag.name == "futrDeriv":
|
||||
counterparties = tag.find("counterparties")
|
||||
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
|
||||
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
|
||||
|
||||
# Get reference instrument info
|
||||
ref_entity_name = None
|
||||
ref_entity_title = None
|
||||
ref_entity_cusip = None
|
||||
ref_entity_isin = None
|
||||
ref_entity_ticker = None
|
||||
ref_entity_other_id = None
|
||||
ref_entity_other_id_type = None
|
||||
|
||||
desc_ref = tag.find("descRefInstrmnt")
|
||||
if desc_ref:
|
||||
other_ref = desc_ref.find("otherRefInst")
|
||||
if other_ref:
|
||||
ref_entity_name = child_text(other_ref, "issuerName")
|
||||
ref_entity_title = child_text(other_ref, "issueTitle")
|
||||
|
||||
# Parse identifiers
|
||||
identifiers = other_ref.find("identifiers")
|
||||
if identifiers:
|
||||
cusip_tag = identifiers.find("cusip")
|
||||
if cusip_tag:
|
||||
ref_entity_cusip = cusip_tag.attrs.get("value")
|
||||
|
||||
isin_tag = identifiers.find("isin")
|
||||
if isin_tag:
|
||||
ref_entity_isin = isin_tag.attrs.get("value")
|
||||
|
||||
ticker_tag = identifiers.find("ticker")
|
||||
if ticker_tag:
|
||||
ref_entity_ticker = ticker_tag.attrs.get("value")
|
||||
|
||||
other_tag = identifiers.find("other")
|
||||
if other_tag:
|
||||
ref_entity_other_id = other_tag.attrs.get("value")
|
||||
ref_entity_other_id_type = other_tag.attrs.get("otherDesc")
|
||||
|
||||
return cls(
|
||||
counterparty_name=counterparty_name,
|
||||
counterparty_lei=counterparty_lei,
|
||||
payoff_profile=child_text(tag, "payOffProf"),
|
||||
expiration_date=child_text(tag, "expDate"),
|
||||
notional_amount=optional_decimal(tag, "notionalAmt"),
|
||||
currency=child_text(tag, "curCd"),
|
||||
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
|
||||
reference_entity_name=ref_entity_name,
|
||||
reference_entity_title=ref_entity_title,
|
||||
reference_entity_cusip=ref_entity_cusip,
|
||||
reference_entity_isin=ref_entity_isin,
|
||||
reference_entity_ticker=ref_entity_ticker,
|
||||
reference_entity_other_id=ref_entity_other_id,
|
||||
reference_entity_other_id_type=ref_entity_other_id_type
|
||||
)
|
||||
|
||||
|
||||
class SwaptionDerivative(BaseModel):
|
||||
"""Swaption derivative (SWO) - option on a swap"""
|
||||
counterparty_name: Optional[str]
|
||||
counterparty_lei: Optional[str]
|
||||
put_or_call: Optional[str]
|
||||
written_or_purchased: Optional[str]
|
||||
share_number: Optional[Decimal]
|
||||
exercise_price: Optional[Decimal]
|
||||
exercise_price_currency: Optional[str]
|
||||
expiration_date: Optional[str]
|
||||
delta: Optional[Union[Decimal, str]] # Can be numeric or 'XXXX'
|
||||
unrealized_appreciation: Optional[Decimal]
|
||||
# The underlying swap
|
||||
nested_swap: Optional['SwapDerivative']
|
||||
|
||||
@classmethod
|
||||
def from_xml(cls, tag):
|
||||
if tag and tag.name == "optionSwaptionWarrantDeriv":
|
||||
counterparties = tag.find("counterparties")
|
||||
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
|
||||
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
|
||||
|
||||
# Parse nested swap from descRefInstrmnt > nestedDerivInfo
|
||||
nested_swap = None
|
||||
desc_ref = tag.find("descRefInstrmnt")
|
||||
if desc_ref:
|
||||
nested_deriv_info = desc_ref.find("nestedDerivInfo")
|
||||
if nested_deriv_info:
|
||||
swap_tag = nested_deriv_info.find("swapDeriv")
|
||||
if swap_tag:
|
||||
nested_swap = SwapDerivative.from_xml(swap_tag)
|
||||
|
||||
return cls(
|
||||
counterparty_name=counterparty_name,
|
||||
counterparty_lei=counterparty_lei,
|
||||
put_or_call=child_text(tag, "putOrCall"),
|
||||
written_or_purchased=child_text(tag, "writtenOrPur"),
|
||||
share_number=optional_decimal(tag, "shareNo"),
|
||||
exercise_price=optional_decimal(tag, "exercisePrice"),
|
||||
exercise_price_currency=child_text(tag, "exercisePriceCurCd"),
|
||||
expiration_date=child_text(tag, "expDt"),
|
||||
delta=child_text(tag, "delta"),
|
||||
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
|
||||
nested_swap=nested_swap
|
||||
)
|
||||
|
||||
|
||||
class OptionDerivative(BaseModel):
|
||||
"""Option derivative (OPT) - can have nested forward, future, or other derivatives"""
|
||||
counterparty_name: Optional[str]
|
||||
counterparty_lei: Optional[str]
|
||||
put_or_call: Optional[str]
|
||||
written_or_purchased: Optional[str]
|
||||
share_number: Optional[Decimal]
|
||||
exercise_price: Optional[Decimal]
|
||||
exercise_price_currency: Optional[str]
|
||||
expiration_date: Optional[str]
|
||||
delta: Optional[Union[Decimal, str]] # Can be numeric or 'XXXX'
|
||||
unrealized_appreciation: Optional[Decimal]
|
||||
# Reference entity (for options on individual securities)
|
||||
reference_entity_name: Optional[str]
|
||||
reference_entity_title: Optional[str]
|
||||
reference_entity_cusip: Optional[str]
|
||||
reference_entity_isin: Optional[str]
|
||||
reference_entity_ticker: Optional[str]
|
||||
reference_entity_other_id: Optional[str]
|
||||
reference_entity_other_id_type: Optional[str]
|
||||
# Index reference (for options on indices like S&P 500)
|
||||
index_name: Optional[str]
|
||||
index_identifier: Optional[str]
|
||||
# For options with nested derivatives
|
||||
nested_forward: Optional['ForwardDerivative']
|
||||
nested_future: Optional['FutureDerivative']
|
||||
nested_swap: Optional['SwapDerivative']
|
||||
|
||||
@classmethod
|
||||
def from_xml(cls, tag):
|
||||
if tag and tag.name == "optionSwaptionWarrantDeriv":
|
||||
counterparties = tag.find("counterparties")
|
||||
counterparty_name = child_text(counterparties, "counterpartyName") if counterparties else None
|
||||
counterparty_lei = child_text(counterparties, "counterpartyLei") if counterparties else None
|
||||
|
||||
# Get reference instrument info
|
||||
ref_entity_name = None
|
||||
ref_entity_title = None
|
||||
ref_entity_cusip = None
|
||||
ref_entity_isin = None
|
||||
ref_entity_ticker = None
|
||||
ref_entity_other_id = None
|
||||
ref_entity_other_id_type = None
|
||||
index_name = None
|
||||
index_identifier = None
|
||||
nested_forward = None
|
||||
|
||||
desc_ref = tag.find("descRefInstrmnt")
|
||||
if desc_ref:
|
||||
# Check for nested derivative info (e.g., option on forward, future, swap)
|
||||
nested_deriv_info = desc_ref.find("nestedDerivInfo")
|
||||
nested_future = None
|
||||
nested_swap_nested = None
|
||||
if nested_deriv_info:
|
||||
# Parse any type of nested derivative
|
||||
fwd_tag = nested_deriv_info.find("fwdDeriv")
|
||||
if fwd_tag:
|
||||
nested_forward = ForwardDerivative.from_xml(fwd_tag)
|
||||
|
||||
fut_tag = nested_deriv_info.find("futrDeriv")
|
||||
if fut_tag:
|
||||
nested_future = FutureDerivative.from_xml(fut_tag)
|
||||
|
||||
swap_tag = nested_deriv_info.find("swapDeriv")
|
||||
if swap_tag:
|
||||
nested_swap_nested = SwapDerivative.from_xml(swap_tag)
|
||||
else:
|
||||
# Regular option - parse reference instrument
|
||||
# Check for index reference first
|
||||
index_basket = desc_ref.find("indexBasketInfo")
|
||||
if index_basket:
|
||||
index_name = child_text(index_basket, "indexName")
|
||||
index_identifier = child_text(index_basket, "indexIdentifier")
|
||||
|
||||
# Then check for other reference instrument
|
||||
other_ref = desc_ref.find("otherRefInst")
|
||||
if other_ref:
|
||||
ref_entity_name = child_text(other_ref, "issuerName")
|
||||
ref_entity_title = child_text(other_ref, "issueTitle")
|
||||
identifiers = other_ref.find("identifiers")
|
||||
if identifiers:
|
||||
cusip_tag = identifiers.find("cusip")
|
||||
if cusip_tag:
|
||||
ref_entity_cusip = cusip_tag.attrs.get("value")
|
||||
isin_tag = identifiers.find("isin")
|
||||
if isin_tag:
|
||||
ref_entity_isin = isin_tag.attrs.get("value")
|
||||
ticker_tag = identifiers.find("ticker")
|
||||
if ticker_tag:
|
||||
ref_entity_ticker = ticker_tag.attrs.get("value")
|
||||
|
||||
other_tag = identifiers.find("other")
|
||||
if other_tag:
|
||||
ref_entity_other_id = other_tag.attrs.get("value")
|
||||
ref_entity_other_id_type = other_tag.attrs.get("otherDesc")
|
||||
|
||||
return cls(
|
||||
counterparty_name=counterparty_name,
|
||||
counterparty_lei=counterparty_lei,
|
||||
put_or_call=child_text(tag, "putOrCall"),
|
||||
written_or_purchased=child_text(tag, "writtenOrPur"),
|
||||
share_number=optional_decimal(tag, "shareNo"),
|
||||
exercise_price=optional_decimal(tag, "exercisePrice"),
|
||||
exercise_price_currency=child_text(tag, "exercisePriceCurCd"),
|
||||
expiration_date=child_text(tag, "expDt"),
|
||||
delta=child_text(tag, "delta"),
|
||||
unrealized_appreciation=optional_decimal(tag, "unrealizedAppr"),
|
||||
reference_entity_name=ref_entity_name,
|
||||
reference_entity_title=ref_entity_title,
|
||||
reference_entity_cusip=ref_entity_cusip,
|
||||
reference_entity_isin=ref_entity_isin,
|
||||
reference_entity_ticker=ref_entity_ticker,
|
||||
reference_entity_other_id=ref_entity_other_id,
|
||||
reference_entity_other_id_type=ref_entity_other_id_type,
|
||||
index_name=index_name,
|
||||
index_identifier=index_identifier,
|
||||
nested_forward=nested_forward,
|
||||
nested_future=nested_future,
|
||||
nested_swap=nested_swap_nested
|
||||
)
|
||||
|
||||
|
||||
class DerivativeInfo(BaseModel):
|
||||
derivative_category: Optional[str] # FWD, SWP, FUT, OPT, SWO, WAR
|
||||
forward_derivative: Optional[ForwardDerivative]
|
||||
swap_derivative: Optional[SwapDerivative]
|
||||
future_derivative: Optional[FutureDerivative]
|
||||
option_derivative: Optional[OptionDerivative]
|
||||
swaption_derivative: Optional[SwaptionDerivative]
|
||||
|
||||
@classmethod
|
||||
def from_xml(cls, tag):
|
||||
if tag and tag.name == "derivativeInfo":
|
||||
# Use direct children only to avoid finding nested derivatives
|
||||
fwd_tag = tag.find("fwdDeriv", recursive=False)
|
||||
swap_tag = tag.find("swapDeriv", recursive=False)
|
||||
future_tag = tag.find("futrDeriv", recursive=False)
|
||||
option_tag = tag.find("optionSwaptionWarrantDeriv", recursive=False)
|
||||
|
||||
deriv_cat = None
|
||||
option_deriv = None
|
||||
swaption_deriv = None
|
||||
|
||||
if fwd_tag:
|
||||
deriv_cat = fwd_tag.attrs.get("derivCat")
|
||||
elif swap_tag:
|
||||
deriv_cat = swap_tag.attrs.get("derivCat")
|
||||
elif future_tag:
|
||||
deriv_cat = future_tag.attrs.get("derivCat")
|
||||
elif option_tag:
|
||||
deriv_cat = option_tag.attrs.get("derivCat")
|
||||
# Determine if it's a swaption (SWO) or regular option (OPT/WAR)
|
||||
if deriv_cat == "SWO":
|
||||
swaption_deriv = SwaptionDerivative.from_xml(option_tag)
|
||||
else:
|
||||
option_deriv = OptionDerivative.from_xml(option_tag)
|
||||
|
||||
return cls(
|
||||
derivative_category=deriv_cat,
|
||||
forward_derivative=ForwardDerivative.from_xml(fwd_tag) if fwd_tag else None,
|
||||
swap_derivative=SwapDerivative.from_xml(swap_tag) if swap_tag else None,
|
||||
future_derivative=FutureDerivative.from_xml(future_tag) if future_tag else None,
|
||||
option_derivative=option_deriv,
|
||||
swaption_derivative=swaption_deriv
|
||||
)
|
||||
565
venv/lib/python3.10/site-packages/edgar/funds/reference.py
Normal file
565
venv/lib/python3.10/site-packages/edgar/funds/reference.py
Normal file
@@ -0,0 +1,565 @@
|
||||
import logging
|
||||
import urllib.parse
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from io import StringIO
|
||||
from typing import Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from edgar.httprequests import download_text
|
||||
|
||||
# Base URL for resolving relative links
|
||||
SEC_BASE_URL = "https://www.sec.gov"
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Data classes for our normalized data model
|
||||
@dataclass
|
||||
class FundCompanyRecord:
|
||||
cik: str
|
||||
name: str
|
||||
entity_org_type: str
|
||||
file_number: str
|
||||
address_1: Optional[str] = None
|
||||
address_2: Optional[str] = None
|
||||
city: Optional[str] = None
|
||||
state: Optional[str] = None
|
||||
zip_code: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class FundSeriesRecord:
|
||||
series_id: str
|
||||
name: str
|
||||
cik: str # Parent company CIK
|
||||
|
||||
|
||||
@dataclass
|
||||
class FundClassRecord:
|
||||
class_id: str
|
||||
name: str
|
||||
ticker: Optional[str]
|
||||
series_id: str # Parent series ID
|
||||
|
||||
|
||||
class FundReferenceData:
|
||||
"""
|
||||
A memory-efficient container for fund reference data that provides fast lookups
|
||||
while minimizing data duplication.
|
||||
|
||||
Internally, this class normalizes the data into separate tables for companies,
|
||||
series, and classes, with relationships maintained through IDs.
|
||||
|
||||
Lookups are accelerated through indices on common lookup patterns
|
||||
like ticker, CIK, series ID, and class ID.
|
||||
"""
|
||||
|
||||
def __init__(self, data: pd.DataFrame = None):
|
||||
"""
|
||||
Initialize with a DataFrame of fund data.
|
||||
|
||||
The DataFrame should have columns similar to the SEC fund data file:
|
||||
- 'Reporting File Number', 'CIK Number', 'Entity Name', 'Entity Org Type',
|
||||
- 'Series ID', 'Series Name', 'Class ID', 'Class Name', 'Class Ticker', etc.
|
||||
|
||||
Args:
|
||||
data: DataFrame containing fund reference data
|
||||
"""
|
||||
# Initialize empty containers
|
||||
self._companies: Dict[str, FundCompanyRecord] = {}
|
||||
self._series: Dict[str, FundSeriesRecord] = {}
|
||||
self._classes: Dict[str, FundClassRecord] = {}
|
||||
|
||||
# Indexes for fast lookups
|
||||
self._ticker_to_class: Dict[str, str] = {} # ticker -> class_id
|
||||
self._series_by_company: Dict[str, Set[str]] = {} # cik -> set of series_ids
|
||||
self._classes_by_series: Dict[str, Set[str]] = {} # series_id -> set of class_ids
|
||||
|
||||
# Load data if provided
|
||||
if data is not None:
|
||||
self._load_data(data)
|
||||
|
||||
def _load_data(self, data: pd.DataFrame):
|
||||
"""
|
||||
Load and normalize data from a DataFrame into the internal data structures.
|
||||
|
||||
Args:
|
||||
data: DataFrame containing fund reference data
|
||||
"""
|
||||
# Standardize column names if needed
|
||||
col_map = {
|
||||
'CIK Number': 'cik',
|
||||
'Entity Name': 'company_name',
|
||||
'Entity Org Type': 'entity_org_type',
|
||||
'Reporting File Number': 'file_number',
|
||||
'Series ID': 'series_id',
|
||||
'Series Name': 'series_name',
|
||||
'Class ID': 'class_id',
|
||||
'Class Name': 'class_name',
|
||||
'Class Ticker': 'ticker',
|
||||
'Address_1': 'address_1',
|
||||
'Address_2': 'address_2',
|
||||
'City': 'city',
|
||||
'State': 'state',
|
||||
'Zip Code': 'zip_code'
|
||||
}
|
||||
|
||||
# Rename columns if they don't match our expected names
|
||||
df = data.copy()
|
||||
rename_dict = {k: v for k, v in col_map.items() if k in df.columns and v not in df.columns}
|
||||
if rename_dict:
|
||||
df = df.rename(columns=rename_dict)
|
||||
|
||||
# Process companies (distinct CIKs)
|
||||
company_df = df.drop_duplicates(subset=['cik'])[
|
||||
['cik', 'company_name', 'entity_org_type', 'file_number',
|
||||
'address_1', 'address_2', 'city', 'state', 'zip_code']
|
||||
].fillna('')
|
||||
|
||||
for _, row in company_df.iterrows():
|
||||
cik = str(row['cik']).zfill(10) # Ensure CIK is properly formatted
|
||||
self._companies[cik] = FundCompanyRecord(
|
||||
cik=cik,
|
||||
name=row['company_name'],
|
||||
entity_org_type=row['entity_org_type'],
|
||||
file_number=row['file_number'],
|
||||
address_1=row['address_1'] if row['address_1'] else None,
|
||||
address_2=row['address_2'] if row['address_2'] else None,
|
||||
city=row['city'] if row['city'] else None,
|
||||
state=row['state'] if row['state'] else None,
|
||||
zip_code=row['zip_code'] if row['zip_code'] else None
|
||||
)
|
||||
# Initialize empty set for series in this company
|
||||
self._series_by_company[cik] = set()
|
||||
|
||||
# Process series (distinct series IDs)
|
||||
series_df = df.dropna(subset=['series_id']).drop_duplicates(subset=['series_id'])[
|
||||
['series_id', 'series_name', 'cik']
|
||||
]
|
||||
|
||||
for _, row in series_df.iterrows():
|
||||
series_id = row['series_id']
|
||||
cik = str(row['cik']).zfill(10)
|
||||
|
||||
# Skip if parent company doesn't exist
|
||||
if cik not in self._companies:
|
||||
continue
|
||||
|
||||
self._series[series_id] = FundSeriesRecord(
|
||||
series_id=series_id,
|
||||
name=row['series_name'],
|
||||
cik=cik
|
||||
)
|
||||
|
||||
# Add to company's series set
|
||||
self._series_by_company[cik].add(series_id)
|
||||
|
||||
# Initialize empty set for classes in this series
|
||||
self._classes_by_series[series_id] = set()
|
||||
|
||||
# Process classes (distinct class IDs)
|
||||
class_df = df.dropna(subset=['class_id']).drop_duplicates(subset=['class_id'])[
|
||||
['class_id', 'class_name', 'ticker', 'series_id']
|
||||
]
|
||||
|
||||
for _, row in class_df.iterrows():
|
||||
class_id = row['class_id']
|
||||
series_id = row['series_id']
|
||||
|
||||
# Skip if parent series doesn't exist
|
||||
if series_id not in self._series:
|
||||
continue
|
||||
|
||||
# Handle potentially missing ticker
|
||||
ticker = row['ticker'] if pd.notna(row['ticker']) else None
|
||||
|
||||
self._classes[class_id] = FundClassRecord(
|
||||
class_id=class_id,
|
||||
name=row['class_name'],
|
||||
ticker=ticker,
|
||||
series_id=series_id
|
||||
)
|
||||
|
||||
# Add to series' classes set
|
||||
self._classes_by_series[series_id].add(class_id)
|
||||
|
||||
# Add ticker to lookup index if available
|
||||
if ticker:
|
||||
self._ticker_to_class[ticker] = class_id
|
||||
|
||||
@property
|
||||
def companies_count(self) -> int:
|
||||
"""Get the total number of fund companies."""
|
||||
return len(self._companies)
|
||||
|
||||
@property
|
||||
def series_count(self) -> int:
|
||||
"""Get the total number of fund series."""
|
||||
return len(self._series)
|
||||
|
||||
@property
|
||||
def classes_count(self) -> int:
|
||||
"""Get the total number of fund classes."""
|
||||
return len(self._classes)
|
||||
|
||||
def get_company(self, cik: str) -> Optional[FundCompanyRecord]:
|
||||
"""
|
||||
Get company information by CIK.
|
||||
|
||||
Args:
|
||||
cik: Company CIK
|
||||
|
||||
Returns:
|
||||
FundCompanyRecord or None if not found
|
||||
"""
|
||||
# Ensure consistent formatting of CIK
|
||||
cik = str(cik).zfill(10)
|
||||
return self._companies.get(cik)
|
||||
|
||||
def get_series(self, series_id: str) -> Optional[FundSeriesRecord]:
|
||||
"""
|
||||
Get series information by series ID.
|
||||
|
||||
Args:
|
||||
series_id: Series ID
|
||||
|
||||
Returns:
|
||||
FundSeriesRecord or None if not found
|
||||
"""
|
||||
return self._series.get(series_id)
|
||||
|
||||
def get_class(self, class_id: str) -> Optional[FundClassRecord]:
|
||||
"""
|
||||
Get class information by class ID.
|
||||
|
||||
Args:
|
||||
class_id: Class ID
|
||||
|
||||
Returns:
|
||||
FundClassRecord or None if not found
|
||||
"""
|
||||
return self._classes.get(class_id)
|
||||
|
||||
def get_class_by_ticker(self, ticker: str) -> Optional[FundClassRecord]:
|
||||
"""
|
||||
Get class information by ticker symbol.
|
||||
|
||||
Args:
|
||||
ticker: Ticker symbol
|
||||
|
||||
Returns:
|
||||
FundClassRecord or None if not found
|
||||
"""
|
||||
class_id = self._ticker_to_class.get(ticker)
|
||||
if class_id:
|
||||
return self._classes.get(class_id)
|
||||
return None
|
||||
|
||||
def get_series_for_company(self, cik: str) -> List[FundSeriesRecord]:
|
||||
"""
|
||||
Get all series for a company.
|
||||
|
||||
Args:
|
||||
cik: Company CIK
|
||||
|
||||
Returns:
|
||||
List of FundSeriesRecord objects
|
||||
"""
|
||||
cik = str(cik).zfill(10)
|
||||
series_ids = self._series_by_company.get(cik, set())
|
||||
return [self._series[s_id] for s_id in series_ids if s_id in self._series]
|
||||
|
||||
def get_classes_for_series(self, series_id: str) -> List[FundClassRecord]:
|
||||
"""
|
||||
Get all classes for a series.
|
||||
|
||||
Args:
|
||||
series_id: Series ID
|
||||
|
||||
Returns:
|
||||
List of FundClassRecord objects
|
||||
"""
|
||||
class_ids = self._classes_by_series.get(series_id, set())
|
||||
return [self._classes[c_id] for c_id in class_ids if c_id in self._classes]
|
||||
|
||||
def find_by_name(self, name_fragment: str, search_type: str = 'company') -> List[Union[FundCompanyRecord, FundSeriesRecord, FundClassRecord]]:
|
||||
"""
|
||||
Find entities containing the name fragment.
|
||||
|
||||
Args:
|
||||
name_fragment: Case-insensitive fragment to search for
|
||||
search_type: Type of entity to search ('company', 'series', or 'class')
|
||||
|
||||
Returns:
|
||||
List of matching records
|
||||
"""
|
||||
name_fragment = name_fragment.lower()
|
||||
|
||||
if search_type == 'company':
|
||||
return [company for company in self._companies.values()
|
||||
if name_fragment in company.name.lower()]
|
||||
elif search_type == 'series':
|
||||
return [series for series in self._series.values()
|
||||
if name_fragment in series.name.lower()]
|
||||
elif search_type == 'class':
|
||||
return [cls for cls in self._classes.values()
|
||||
if name_fragment in cls.name.lower()]
|
||||
else:
|
||||
raise ValueError(f"Invalid search_type: {search_type}")
|
||||
|
||||
def get_company_for_series(self, series_id: str) -> Optional[FundCompanyRecord]:
|
||||
"""
|
||||
Get the parent company for a series.
|
||||
|
||||
Args:
|
||||
series_id: Series ID
|
||||
|
||||
Returns:
|
||||
FundCompanyRecord or None if not found
|
||||
"""
|
||||
series = self._series.get(series_id)
|
||||
if series:
|
||||
return self._companies.get(series.cik)
|
||||
return None
|
||||
|
||||
def get_series_for_class(self, class_id: str) -> Optional[FundSeriesRecord]:
|
||||
"""
|
||||
Get the parent series for a class.
|
||||
|
||||
Args:
|
||||
class_id: Class ID
|
||||
|
||||
Returns:
|
||||
FundSeriesRecord or None if not found
|
||||
"""
|
||||
class_record = self._classes.get(class_id)
|
||||
if class_record:
|
||||
return self._series.get(class_record.series_id)
|
||||
return None
|
||||
|
||||
def get_company_for_class(self, class_id: str) -> Optional[FundCompanyRecord]:
|
||||
"""
|
||||
Get the parent company for a class (traversing through series).
|
||||
|
||||
Args:
|
||||
class_id: Class ID
|
||||
|
||||
Returns:
|
||||
FundCompanyRecord or None if not found
|
||||
"""
|
||||
series = self.get_series_for_class(class_id)
|
||||
if series:
|
||||
return self._companies.get(series.cik)
|
||||
return None
|
||||
|
||||
def get_hierarchical_info(self, identifier: str) -> Tuple[Optional[FundCompanyRecord], Optional[FundSeriesRecord], Optional[FundClassRecord]]:
|
||||
"""
|
||||
Get the complete hierarchy for an identifier (CIK, series ID, class ID, or ticker).
|
||||
|
||||
Args:
|
||||
identifier: Any identifier (CIK, series ID, class ID, or ticker)
|
||||
|
||||
Returns:
|
||||
Tuple of (company, series, class) records, with None for levels not applicable
|
||||
"""
|
||||
company = None
|
||||
series = None
|
||||
class_record = None
|
||||
|
||||
# Check if it's a CIK (10 digits with leading zeros)
|
||||
if isinstance(identifier, str) and (identifier.isdigit() or identifier.startswith('0')):
|
||||
cik = str(identifier).zfill(10)
|
||||
company = self.get_company(cik)
|
||||
if company:
|
||||
return company, None, None
|
||||
|
||||
# Check if it's a series ID (starts with S)
|
||||
if isinstance(identifier, str) and identifier.upper().startswith('S'):
|
||||
series = self.get_series(identifier)
|
||||
if series:
|
||||
company = self.get_company(series.cik)
|
||||
return company, series, None
|
||||
|
||||
# Check if it's a class ID (starts with C)
|
||||
if isinstance(identifier, str) and identifier.upper().startswith('C'):
|
||||
class_record = self.get_class(identifier)
|
||||
if class_record:
|
||||
series = self.get_series(class_record.series_id)
|
||||
if series:
|
||||
company = self.get_company(series.cik)
|
||||
return company, series, class_record
|
||||
|
||||
# Check if it's a ticker
|
||||
class_record = self.get_class_by_ticker(identifier)
|
||||
if class_record:
|
||||
series = self.get_series(class_record.series_id)
|
||||
if series:
|
||||
company = self.get_company(series.cik)
|
||||
return company, series, class_record
|
||||
|
||||
# Nothing found
|
||||
return None, None, None
|
||||
|
||||
def to_dataframe(self) -> pd.DataFrame:
|
||||
"""
|
||||
Convert the normalized data back to a flat DataFrame.
|
||||
|
||||
Returns:
|
||||
DataFrame containing all fund data
|
||||
"""
|
||||
records = []
|
||||
|
||||
for _class_id, class_record in self._classes.items():
|
||||
series_id = class_record.series_id
|
||||
series_record = self._series.get(series_id)
|
||||
|
||||
if not series_record:
|
||||
continue
|
||||
|
||||
cik = series_record.cik
|
||||
company_record = self._companies.get(cik)
|
||||
|
||||
if not company_record:
|
||||
continue
|
||||
|
||||
records.append({
|
||||
'cik': company_record.cik,
|
||||
'company_name': company_record.name,
|
||||
'entity_org_type': company_record.entity_org_type,
|
||||
'file_number': company_record.file_number,
|
||||
'series_id': series_record.series_id,
|
||||
'series_name': series_record.name,
|
||||
'class_id': class_record.class_id,
|
||||
'class_name': class_record.name,
|
||||
'ticker': class_record.ticker,
|
||||
'address_1': company_record.address_1,
|
||||
'address_2': company_record.address_2,
|
||||
'city': company_record.city,
|
||||
'state': company_record.state,
|
||||
'zip_code': company_record.zip_code
|
||||
})
|
||||
|
||||
return pd.DataFrame(records)
|
||||
|
||||
|
||||
def _find_latest_fund_data_url():
|
||||
"""Find the URL of the latest fund data CSV file from the SEC website.
|
||||
The listing looks like this:
|
||||
|
||||
| File | Format | Size |
|
||||
|------------------------------|--------|------|
|
||||
|[2024](link) Updated 6/5/24 | XML | 1.2 MB|
|
||||
|[2024](link) Updated 6/5/24 | CSV | 1.2 MB|
|
||||
|[2023](link) Updated 6/5/24 | XML | 1.2 MB|
|
||||
|[2023](link) Updated 6/5/24 | CSV | 1.2 MB|
|
||||
|
||||
|
||||
"""
|
||||
list_url = "https://www.sec.gov/about/opendatasetsshtmlinvestment_company"
|
||||
html_content = download_text(list_url)
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# Find all tables on the page
|
||||
tables = soup.find_all('table')
|
||||
|
||||
for table in tables:
|
||||
# Look for a table with a header row containing 'File', 'Format', 'Size'
|
||||
headers = [th.get_text(strip=True) for th in table.find_all('th')]
|
||||
if 'File' in headers and 'Format' in headers and 'Size' in headers:
|
||||
# Find the index of the Format and File columns
|
||||
try:
|
||||
format_index = headers.index('Format')
|
||||
file_index = headers.index('File')
|
||||
except ValueError:
|
||||
continue # Headers not found in the expected order
|
||||
|
||||
# Iterate through the rows of this table
|
||||
for row in table.find_all('tr'):
|
||||
cells = row.find_all('td')
|
||||
if len(cells) > max(format_index, file_index):
|
||||
# Check if the format is CSV
|
||||
format_text = cells[format_index].get_text(strip=True)
|
||||
if 'CSV' in format_text:
|
||||
# Find the link in the File column
|
||||
link_tag = cells[file_index].find('a')
|
||||
if link_tag and 'href' in link_tag.attrs:
|
||||
relative_url = link_tag['href']
|
||||
# Construct the absolute URL
|
||||
absolute_url = urllib.parse.urljoin(SEC_BASE_URL, relative_url)
|
||||
return absolute_url
|
||||
# If CSV not found in this suitable table, continue to next table just in case
|
||||
# but typically the first one found is the correct one.
|
||||
|
||||
# If no suitable table or CSV link is found after checking all tables
|
||||
raise ValueError("No fund data CSV file found on the SEC website.")
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_bulk_fund_data() -> pd.DataFrame:
|
||||
"""
|
||||
Downloads the latest Investment Company tickers and CIKs from the SEC website.
|
||||
These are the columns
|
||||
['Reporting File Number', 'CIK Number', 'Entity Name', 'Entity Org Type',
|
||||
'Series ID', 'Series Name', 'Class ID', 'Class Name', 'Class Ticker',
|
||||
'Address_1', 'Address_2', 'City', 'State', 'Zip Code']
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: A DataFrame containing the fund ticker data.
|
||||
Columns typically include 'Ticker', 'CIK', 'Series ID', 'Class ID', etc.
|
||||
"""
|
||||
# Find the latest fund data file URL
|
||||
csv_url = _find_latest_fund_data_url()
|
||||
|
||||
raw_data = download_text(csv_url)
|
||||
fund_data = pd.read_csv(StringIO(raw_data))
|
||||
|
||||
return fund_data
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_fund_reference_data() -> FundReferenceData:
|
||||
"""
|
||||
Get a normalized reference data object for all funds, series, and classes.
|
||||
|
||||
Returns:
|
||||
FundReferenceData: An object providing efficient lookups for fund entities
|
||||
"""
|
||||
fund_data = get_bulk_fund_data()
|
||||
return FundReferenceData(fund_data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
# Get the fund reference data
|
||||
fund_ref_data = get_fund_reference_data()
|
||||
|
||||
# Print summary statistics
|
||||
|
||||
# Show sample lookups
|
||||
|
||||
# Look up a well-known fund
|
||||
vfinx_class = fund_ref_data.get_class_by_ticker('VFIAX')
|
||||
if vfinx_class:
|
||||
|
||||
# Get parent series
|
||||
vfinx_series = fund_ref_data.get_series_for_class(vfinx_class.class_id)
|
||||
if vfinx_series:
|
||||
|
||||
# Get all classes in the series
|
||||
series_classes = fund_ref_data.get_classes_for_series(vfinx_series.series_id)
|
||||
for _i, _cls in enumerate(series_classes[:5]):
|
||||
pass
|
||||
if len(series_classes) > 5:
|
||||
pass
|
||||
|
||||
# Get parent company
|
||||
vanguard = fund_ref_data.get_company_for_series(vfinx_series.series_id)
|
||||
if vanguard:
|
||||
|
||||
# Get all series for the company
|
||||
company_series = fund_ref_data.get_series_for_company(vanguard.cik)
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
1479
venv/lib/python3.10/site-packages/edgar/funds/reports.py
Normal file
1479
venv/lib/python3.10/site-packages/edgar/funds/reports.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
Series resolution service for ETF/Fund ticker-to-series mapping.
|
||||
|
||||
This module provides services for resolving ticker symbols to series IDs,
|
||||
addressing GitHub issue #417.
|
||||
"""
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from typing import List, Optional
|
||||
|
||||
from edgar.core import log
|
||||
|
||||
__all__ = ['SeriesInfo', 'TickerSeriesResolver']
|
||||
|
||||
|
||||
@dataclass
|
||||
class SeriesInfo:
|
||||
"""Information about a fund series"""
|
||||
series_id: str
|
||||
series_name: Optional[str]
|
||||
ticker: str
|
||||
class_id: Optional[str] = None
|
||||
class_name: Optional[str] = None
|
||||
|
||||
|
||||
class TickerSeriesResolver:
|
||||
"""Handles ticker to series ID resolution with caching."""
|
||||
|
||||
@staticmethod
|
||||
@lru_cache(maxsize=1000)
|
||||
def resolve_ticker_to_series(ticker: str) -> List[SeriesInfo]:
|
||||
"""Resolve ticker to all associated series with ETF fallback."""
|
||||
if not ticker:
|
||||
return []
|
||||
|
||||
try:
|
||||
# First try mutual fund data (original behavior)
|
||||
from edgar.reference.tickers import get_mutual_fund_tickers
|
||||
mf_data = get_mutual_fund_tickers()
|
||||
|
||||
# Find all matches for this ticker
|
||||
matches = mf_data[mf_data['ticker'].str.upper() == ticker.upper()]
|
||||
|
||||
series_list = []
|
||||
for _, row in matches.iterrows():
|
||||
series_info = SeriesInfo(
|
||||
series_id=row['seriesId'],
|
||||
series_name=None, # Not available in the ticker data
|
||||
ticker=row['ticker'],
|
||||
class_id=row['classId']
|
||||
)
|
||||
series_list.append(series_info)
|
||||
|
||||
# If found in mutual fund data, return those results
|
||||
if series_list:
|
||||
return series_list
|
||||
|
||||
# NEW: Fallback to company ticker data for ETFs
|
||||
log.debug(f"Ticker {ticker} not found in mutual fund data, trying company data...")
|
||||
|
||||
from edgar.reference.tickers import find_cik, get_company_tickers
|
||||
cik = find_cik(ticker)
|
||||
|
||||
if cik:
|
||||
# Found as company ticker - likely an ETF
|
||||
company_data = get_company_tickers()
|
||||
company_matches = company_data[
|
||||
(company_data['ticker'].str.upper() == ticker.upper()) &
|
||||
(company_data['cik'] == cik)
|
||||
]
|
||||
|
||||
if len(company_matches) > 0:
|
||||
company_match = company_matches.iloc[0]
|
||||
# Create synthetic series info for ETF
|
||||
etf_series = SeriesInfo(
|
||||
series_id=f"ETF_{cik}", # Synthetic series ID for ETFs
|
||||
series_name=company_match['company'], # Company name as series name
|
||||
ticker=company_match['ticker'],
|
||||
class_id=f"ETF_CLASS_{cik}" # Synthetic class ID
|
||||
)
|
||||
log.debug(f"Resolved {ticker} as ETF company with CIK {cik}")
|
||||
return [etf_series]
|
||||
|
||||
log.debug(f"Ticker {ticker} not found in either mutual fund or company data")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
log.warning(f"Error resolving ticker {ticker} to series: {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def get_primary_series(ticker: str) -> Optional[str]:
|
||||
"""Get the primary/most relevant series for a ticker."""
|
||||
series_list = TickerSeriesResolver.resolve_ticker_to_series(ticker)
|
||||
|
||||
if not series_list:
|
||||
return None
|
||||
|
||||
# If only one series, return it
|
||||
if len(series_list) == 1:
|
||||
return series_list[0].series_id
|
||||
|
||||
# If multiple series, return the first one (could be enhanced with better logic)
|
||||
return series_list[0].series_id
|
||||
|
||||
@staticmethod
|
||||
def has_multiple_series(ticker: str) -> bool:
|
||||
"""Check if a ticker maps to multiple series."""
|
||||
series_list = TickerSeriesResolver.resolve_ticker_to_series(ticker)
|
||||
return len(series_list) > 1
|
||||
106
venv/lib/python3.10/site-packages/edgar/funds/thirteenf.py
Normal file
106
venv/lib/python3.10/site-packages/edgar/funds/thirteenf.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""
|
||||
13F filing module for investment funds.
|
||||
|
||||
This module provides classes and functions for working with 13F filings
|
||||
that report investment fund portfolio holdings.
|
||||
"""
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# Define constants
|
||||
THIRTEENF_FORMS = ['13F-HR', "13F-HR/A", "13F-NT", "13F-NT/A", "13F-CTR", "13F-CTR/A"]
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# We'll define these functions without directly importing them at the module level
|
||||
# to avoid circular imports
|
||||
|
||||
def get_ThirteenF():
|
||||
"""Dynamically import ThirteenF to avoid circular imports."""
|
||||
from edgar.thirteenf import ThirteenF as OriginalThirteenF
|
||||
return OriginalThirteenF
|
||||
|
||||
# Create property-like functions that provide lazy loading
|
||||
def ThirteenF():
|
||||
"""Get the ThirteenF class, dynamically importing it to avoid circular imports."""
|
||||
return get_ThirteenF()
|
||||
|
||||
def get_thirteenf_portfolio(filing) -> pd.DataFrame:
|
||||
"""
|
||||
Extract portfolio holdings from a 13F filing.
|
||||
|
||||
Args:
|
||||
filing: The 13F filing to extract data from
|
||||
|
||||
Returns:
|
||||
DataFrame containing portfolio holdings
|
||||
"""
|
||||
try:
|
||||
# Create a ThirteenF from the filing
|
||||
thirteenf_class = get_ThirteenF()
|
||||
thirteenf = thirteenf_class(filing, use_latest_period_of_report=True)
|
||||
|
||||
# Check if the filing has an information table
|
||||
if not thirteenf.has_infotable():
|
||||
log.info("Filing %s does not have an information table", filing.accession_no)
|
||||
return pd.DataFrame()
|
||||
|
||||
# Extract the information table
|
||||
infotable = thirteenf.infotable
|
||||
if infotable is None:
|
||||
log.warning("Could not extract information table from filing %s", filing.accession_no)
|
||||
return pd.DataFrame()
|
||||
|
||||
# Convert to DataFrame
|
||||
df = pd.DataFrame(infotable)
|
||||
|
||||
# Clean up and organize data
|
||||
if not df.empty:
|
||||
# Update column names for consistency
|
||||
if 'nameOfIssuer' in df.columns:
|
||||
df = df.rename(columns={
|
||||
'nameOfIssuer': 'name',
|
||||
'titleOfClass': 'title',
|
||||
'cusip': 'cusip',
|
||||
'value': 'value_usd',
|
||||
'sshPrnamt': 'shares',
|
||||
'sshPrnamtType': 'share_type',
|
||||
'investmentDiscretion': 'investment_discretion',
|
||||
'votingAuthority': 'voting_authority'
|
||||
})
|
||||
|
||||
# Add ticker mapping if possible
|
||||
try:
|
||||
from edgar.reference import cusip_ticker_mapping
|
||||
cusip_map = cusip_ticker_mapping(allow_duplicate_cusips=False)
|
||||
df['ticker'] = df['cusip'].map(cusip_map.Ticker)
|
||||
except Exception as e:
|
||||
log.warning("Error adding ticker mappings: %s", e)
|
||||
df['ticker'] = None
|
||||
|
||||
# Calculate percent of portfolio
|
||||
if 'value_usd' in df.columns:
|
||||
total_value = df['value_usd'].sum()
|
||||
if total_value > 0:
|
||||
df['pct_value'] = df['value_usd'] / total_value * 100
|
||||
else:
|
||||
df['pct_value'] = 0
|
||||
|
||||
# Sort by value
|
||||
df = df.sort_values('value_usd', ascending=False).reset_index(drop=True)
|
||||
|
||||
return df
|
||||
|
||||
except Exception as e:
|
||||
log.warning("Error extracting holdings from 13F filing: %s", e)
|
||||
|
||||
# Return empty DataFrame if extraction failed
|
||||
return pd.DataFrame()
|
||||
|
||||
# Functions for export
|
||||
__all__ = [
|
||||
'ThirteenF',
|
||||
'THIRTEENF_FORMS',
|
||||
'get_thirteenf_portfolio',
|
||||
]
|
||||
@@ -0,0 +1,110 @@
|
||||
"""
|
||||
Ticker resolution service for ETF/Fund holdings.
|
||||
|
||||
This module provides services for resolving ticker symbols from various identifiers
|
||||
like CUSIP, ISIN, and company names, addressing GitHub issue #418.
|
||||
"""
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from typing import Optional
|
||||
|
||||
from edgar.core import log
|
||||
from edgar.reference.tickers import get_ticker_from_cusip
|
||||
|
||||
__all__ = ['TickerResolutionResult', 'TickerResolutionService']
|
||||
|
||||
|
||||
@dataclass
|
||||
class TickerResolutionResult:
|
||||
"""Result of ticker resolution attempt"""
|
||||
ticker: Optional[str]
|
||||
method: str # 'direct', 'cusip', 'failed'
|
||||
confidence: float # 0.0 to 1.0
|
||||
error_message: Optional[str] = None
|
||||
|
||||
@property
|
||||
def success(self) -> bool:
|
||||
return self.ticker is not None and self.confidence > 0.0
|
||||
|
||||
|
||||
class TickerResolutionService:
|
||||
"""Centralized service for resolving tickers from various identifiers"""
|
||||
|
||||
CONFIDENCE_SCORES = {
|
||||
'direct': 1.0, # Direct from NPORT-P
|
||||
'cusip': 0.85, # High confidence - official identifier
|
||||
'isin': 0.75, # Good confidence - international identifier
|
||||
'name': 0.5, # Lower confidence - fuzzy matching
|
||||
'failed': 0.0 # No resolution
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@lru_cache(maxsize=1000)
|
||||
def resolve_ticker(ticker: Optional[str] = None,
|
||||
cusip: Optional[str] = None,
|
||||
isin: Optional[str] = None,
|
||||
company_name: Optional[str] = None) -> TickerResolutionResult:
|
||||
"""
|
||||
Main resolution entry point
|
||||
|
||||
Args:
|
||||
ticker: Direct ticker from NPORT-P
|
||||
cusip: CUSIP identifier
|
||||
isin: ISIN identifier (future use)
|
||||
company_name: Company name (future use)
|
||||
|
||||
Returns:
|
||||
TickerResolutionResult with ticker and metadata
|
||||
"""
|
||||
# 1. Direct ticker resolution
|
||||
if ticker and ticker.strip():
|
||||
return TickerResolutionResult(
|
||||
ticker=ticker.strip().upper(),
|
||||
method='direct',
|
||||
confidence=TickerResolutionService.CONFIDENCE_SCORES['direct']
|
||||
)
|
||||
|
||||
# 2. CUSIP-based resolution
|
||||
if cusip:
|
||||
resolved_ticker = TickerResolutionService._resolve_via_cusip(cusip)
|
||||
if resolved_ticker:
|
||||
return TickerResolutionResult(
|
||||
ticker=resolved_ticker,
|
||||
method='cusip',
|
||||
confidence=TickerResolutionService.CONFIDENCE_SCORES['cusip']
|
||||
)
|
||||
|
||||
# 3. Future: ISIN-based resolution
|
||||
# if isin:
|
||||
# resolved_ticker = TickerResolutionService._resolve_via_isin(isin)
|
||||
# ...
|
||||
|
||||
# 4. Future: Name-based resolution
|
||||
# if company_name:
|
||||
# resolved_ticker = TickerResolutionService._resolve_via_name(company_name)
|
||||
# ...
|
||||
|
||||
return TickerResolutionResult(
|
||||
ticker=None,
|
||||
method='failed',
|
||||
confidence=0.0,
|
||||
error_message='No resolution methods succeeded'
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _resolve_via_cusip(cusip: str) -> Optional[str]:
|
||||
"""Resolve ticker using CUSIP mapping"""
|
||||
try:
|
||||
if not cusip or len(cusip.strip()) < 8:
|
||||
return None
|
||||
|
||||
cusip = cusip.strip().upper()
|
||||
ticker = get_ticker_from_cusip(cusip)
|
||||
if ticker:
|
||||
return ticker.upper()
|
||||
|
||||
except Exception as e:
|
||||
log.warning(f"CUSIP ticker resolution failed for {cusip}: {e}")
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user