Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,109 @@
from edgar.reference.company_subsets import (
# Classes and Enums
CompanySubset,
MarketCapTier,
PopularityTier,
# Core Functions
get_all_companies,
get_companies_by_exchanges,
get_popular_companies,
# Industry and State Filtering (Comprehensive Mode)
get_companies_by_industry,
get_companies_by_state,
# Sampling and Filtering
get_random_sample,
get_stratified_sample,
get_top_companies_by_metric,
filter_companies,
exclude_companies,
# Set Operations
combine_company_sets,
intersect_company_sets,
# Convenience Functions - General
get_faang_companies,
get_tech_giants,
get_dow_jones_sample,
# Convenience Functions - Industry Specific
get_pharmaceutical_companies,
get_biotechnology_companies,
get_software_companies,
get_semiconductor_companies,
get_banking_companies,
get_investment_companies,
get_insurance_companies,
get_real_estate_companies,
get_oil_gas_companies,
get_retail_companies,
)
from edgar.reference.company_dataset import (
get_company_dataset,
build_company_dataset_parquet,
build_company_dataset_duckdb,
is_individual_from_json,
to_duckdb,
)
from edgar.reference.forms import describe_form
from edgar.reference.tickers import cusip_ticker_mapping, get_icon_from_ticker, get_ticker_from_cusip
# A dict of state abbreviations and their full names
states = {
"AL": "Alabama",
"AK": "Alaska",
"AZ": "Arizona",
"AR": "Arkansas",
"CA": "California",
"CO": "Colorado",
"CT": "Connecticut",
"DE": "Delaware",
"FL": "Florida",
"GA": "Georgia",
"HI": "Hawaii",
"ID": "Idaho",
"IL": "Illinois",
"IN": "Indiana",
"IA": "Iowa",
"KS": "Kansas",
"KY": "Kentucky",
"LA": "Louisiana",
"ME": "Maine",
"MD": "Maryland",
"MA": "Massachusetts",
"MI": "Michigan",
"MN": "Minnesota",
"MS": "Mississippi",
"MO": "Missouri",
"MT": "Montana",
"NE": "Nebraska",
"NV": "Nevada",
"NH": "New Hampshire",
"NJ": "New Jersey",
"NM": "New Mexico",
"NY": "New York",
"NC": "North Carolina",
"ND": "North Dakota",
"OH": "Ohio",
"OK": "Oklahoma",
"OR": "Oregon",
"PA": "Pennsylvania",
"RI": "Rhode Island",
"SC": "South Carolina",
"SD": "South Dakota",
"TN": "Tennessee",
"TX": "Texas",
"UT": "Utah",
"VT": "Vermont",
"VA": "Virginia",
"WA": "Washington",
"WV": "West Virginia",
"WI": "Wisconsin",
"WY": "Wyoming",
}

View File

@@ -0,0 +1,111 @@
ACRONYMS = {
"CCC": "CIK Confirmation Code",
"CIK": "Central Index Key",
"EDGAR": "Electronic Data Gathering, Analysis, and Retrieval",
"SEC": "Securities and Exchange Commission",
}
INVESTMENT_CATEGORIES = {
"ABS": "Asset-backed securities",
"ACMO": "Agency collateralized mortgage obligations",
"ACMBS": "Agency debentures and agency strips",
"AMBS": "Agency mortgage-backed securities",
"UST": " U.S. Treasuries (including strips)",
"N/A": "Not applicable"
}
ISO_STATES_AND_OUTLYING_AREAS = {
"US-AL": "ALABAMA",
"US-AK": "ALASKA",
"US-AZ": "ARIZONA",
"US-AR": "ARKANSAS",
"US-CA": "CALIFORNIA",
"US-CO": "COLORADO",
"US-CT": "CONNECTICUT",
"US-DE": "DELAWARE",
"US-DC": "DISTRICT OF COLUMBIA",
}
ISO_COUNTRY_CODES = {
"AF": " AFGHANISTAN",
"AX": "ALAND ISLANDS",
"AL": "ALBANIA",
"DZ": "ALGERIA",
"AS": "AMERICAN SAMOA",
"AD": "ANDORRA",
"AO": "ANGOLA",
"AI": "ANGUILLA",
"AQ": "ANTARCTICA",
"AG": "ANTIGUA AND BARBUDA",
"AR": "ARGENTINA",
"AM": "ARMENIA",
"AW": "ARUBA",
"AU": "AUSTRALIA",
"AT": "AUSTRIA",
"AZ": "AZERBAIJAN",
"BS": "BAHAMAS",
"BH": "BAHRAIN",
"BD": "BANGLADESH",
"BB": "BARBADOS",
"BY": "BELARUS",
"BE": "BELGIUM",
"BZ": "BELIZE",
"BJ": "BENIN",
"BM": "BERMUDA",
"BT": "BHUTAN",
"BO": "BOLIVIA (PLURINATIONAL STATE OF)",
"BQ": "BONAIRE, SINT EUSTATIUS AND SABA",
"BA": "BOSNIA AND HERZEGOVINA",
"BW": "BOTSWANA",
"BV": "BOUVET ISLAND",
"BR": "BRAZIL",
"IO": "BRITISH INDIAN OCEAN TERRITORY",
"BN": "BRUNEI DARUSSALAM",
"BG": "BULGARIA",
"BF": "BURKINA FASO",
"BI": "BURUNDI",
"CV": "CABO VERDE",
"KH": "CAMBODIA",
"CM": "CAMEROON",
"CA": "CANADA",
"KY": "CAYMAN ISLANDS",
"CF": "CENTRAL AFRICAN REPUBLIC",
"TD": "CHAD",
"CL": "CHILE",
"CN": "CHINA",
"CX": "CHRISTMAS ISLAND",
"CC": "COCOS (KEELING) ISLANDS",
"CO": "COLOMBIA",
"KM": "COMOROS",
"CG": "CONGO",
"CD": "COOK ISLANDS",
"CR": "COSTA RICA",
"CI": "COTE D'IVOIRE",
"HR": "CROATIA",
"CU": "CUBA",
"CW": "CURACAO",
"CY": "CYPRUS",
"CZ": "CZECHIA",
"DK": "DENMARK",
"DJ": "DJIBOUTI",
"DM": "DOMINICA",
"DO": "DOMINICAN REPUBLIC",
"EC": "ECUADOR",
"EG": "EGYPT",
"SV": "EL SALVADOR",
"GQ": "EQUATORIAL GUINEA",
"ER": "ERITREA",
"EE": "ESTONIA",
"ET": "ETHIOPIA",
"FK": "FALKLAND ISLANDS (MALVINAS)",
"FO": "FAROE ISLANDS",
"FJ": "FIJI",
"FI": "FINLAND",
"FR": "FRANCE",
"GF": "FRENCH GUIANA",
"PF": "FRENCH POLYNESIA",
"TF": "FRENCH SOUTHERN TERRITORIES",
"GA": "GABON",
"GM": "GAMBIA",
}

View File

@@ -0,0 +1,606 @@
"""
Company Dataset Builder for EdgarTools
Builds high-performance company datasets from SEC submissions data with two output formats:
1. PyArrow Parquet (5-20 MB) - Fast filtering with PyArrow compute API
2. DuckDB (287 MB) - Optional SQL interface for power users
Performance:
- Build time: ~30 seconds (optimized with orjson + company filtering)
- Records: ~562,413 companies (40% individual filers filtered)
- Query speed: <1ms (DuckDB) or <100ms (Parquet)
Example:
>>> from edgar.reference import get_company_dataset
>>> import pyarrow.compute as pc
>>>
>>> # Load dataset (builds on first use)
>>> companies = get_company_dataset()
>>>
>>> # Filter pharmaceutical companies
>>> pharma = companies.filter(pc.field('sic').between(2834, 2836))
>>> print(f"Found {len(pharma)} pharma companies")
"""
from pathlib import Path
from typing import Optional, Union
import logging
import pyarrow as pa
import pyarrow.parquet as pq
from tqdm import tqdm
from edgar.core import get_edgar_data_directory, log
# Try to import orjson for performance, fall back to stdlib json
try:
import orjson
def load_json(path: Path) -> dict:
"""Load JSON file using orjson (1.55x faster)"""
return orjson.loads(path.read_bytes())
JSON_PARSER = "orjson"
except ImportError:
import json
def load_json(path: Path) -> dict:
"""Load JSON file using stdlib json"""
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
JSON_PARSER = "json (stdlib)"
# Company dataset schema
COMPANY_SCHEMA = pa.schema([
('cik', pa.string()), # Keep as string to preserve leading zeros
('name', pa.string()),
('sic', pa.int32()), # Nullable - some companies have no SIC
('sic_description', pa.string()),
('tickers', pa.string()), # Pipe-delimited (e.g., "AAPL|APPLE")
('exchanges', pa.string()), # Pipe-delimited (e.g., "Nasdaq|NYSE")
('state_of_incorporation', pa.string()),
('state_of_incorporation_description', pa.string()),
('fiscal_year_end', pa.string()), # MMDD format
('entity_type', pa.string()),
('ein', pa.string()),
])
def is_individual_from_json(data: dict) -> bool:
"""
Determine if entity is an individual filer vs a company.
Uses the same logic as edgar.entity.data:478 (is_individual property).
Companies typically have:
- Tickers or exchanges
- State of incorporation
- Entity type other than '' or 'other'
- Company-specific filings (10-K, 10-Q, 8-K, etc.)
Args:
data: Parsed JSON submission data
Returns:
True if individual filer, False if company
Example:
>>> data = {'cik': '0001318605', 'tickers': ['TSLA']}
>>> is_individual_from_json(data)
False
>>> data = {'cik': '0001078519', 'name': 'JOHN DOE'}
>>> is_individual_from_json(data)
True
"""
# Has ticker or exchange → company
if data.get('tickers') or data.get('exchanges'):
return False
# Has state of incorporation → company (with exceptions)
state = data.get('stateOfIncorporation', '')
if state and state != '':
# Reed Hastings exception (individual with state of incorporation)
if data.get('cik') == '0001033331':
return True
return False
# Has entity type (not '' or 'other') → company
entity_type = data.get('entityType', '')
if entity_type and entity_type not in ['', 'other']:
return False
# Files company forms (10-K, 10-Q, etc.) → company
filings = data.get('filings', {})
if filings:
recent = filings.get('recent', {})
forms = recent.get('form', [])
company_forms = {'10-K', '10-Q', '8-K', '10-K/A', '10-Q/A', '20-F', 'S-1'}
if any(form in company_forms for form in forms):
return False
# Default: individual
return True
def build_company_dataset_parquet(
submissions_dir: Path,
output_path: Path,
filter_individuals: bool = True,
show_progress: bool = True
) -> pa.Table:
"""
Build PyArrow Parquet dataset from submissions directory (companies only).
This function processes all CIK*.json files in the submissions directory,
filters out individual filers (optional), and creates a compressed Parquet file.
Performance:
- ~30 seconds for 562,413 companies (with orjson + filtering)
- Output size: ~5-20 MB (zstd compressed)
- Memory usage: ~100-200 MB during build
Args:
submissions_dir: Directory containing CIK*.json files
output_path: Where to save the .pq file
filter_individuals: Skip individual filers (default: True)
show_progress: Show progress bar (default: True)
Returns:
PyArrow Table with company data
Raises:
FileNotFoundError: If submissions_dir doesn't exist
Example:
>>> from pathlib import Path
>>> submissions_dir = Path.home() / '.edgar' / 'submissions'
>>> output_path = Path.home() / '.edgar' / 'companies.pq'
>>> table = build_company_dataset_parquet(submissions_dir, output_path)
>>> print(f"Built dataset: {len(table):,} companies")
"""
if not submissions_dir.exists():
raise FileNotFoundError(
f"Submissions directory not found: {submissions_dir}\n\n"
"Please download submissions data first:\n"
" from edgar.storage import download_submissions\n"
" download_submissions()\n"
)
# Get all submission JSON files
json_files = list(submissions_dir.glob("CIK*.json"))
if len(json_files) == 0:
raise FileNotFoundError(
f"No submission files found in: {submissions_dir}\n"
"Expected CIK*.json files"
)
log.info(f"Building company dataset from {len(json_files):,} submission files")
log.info(f"Using JSON parser: {JSON_PARSER}")
companies = []
errors = 0
individuals_skipped = 0
# Process each file with progress bar
iterator = tqdm(json_files, desc="Processing submissions", disable=not show_progress)
for json_file in iterator:
try:
data = load_json(json_file)
# Skip individuals if filtering enabled
if filter_individuals and is_individual_from_json(data):
individuals_skipped += 1
continue
# Extract SIC (handle empty strings)
sic = data.get('sic')
sic_int = int(sic) if sic and sic != '' else None
# Extract tickers and exchanges (filter None values)
tickers = data.get('tickers', [])
exchanges = data.get('exchanges', [])
companies.append({
'cik': data.get('cik'),
'name': data.get('name'),
'sic': sic_int,
'sic_description': data.get('sicDescription'),
'tickers': '|'.join(filter(None, tickers)) if tickers else None,
'exchanges': '|'.join(filter(None, exchanges)) if exchanges else None,
'state_of_incorporation': data.get('stateOfIncorporation'),
'state_of_incorporation_description': data.get('stateOfIncorporationDescription'),
'fiscal_year_end': data.get('fiscalYearEnd'),
'entity_type': data.get('entityType'),
'ein': data.get('ein'),
})
except Exception as e:
errors += 1
log.debug(f"Error processing {json_file.name}: {e}")
continue
# Log statistics
log.info(f"Processed {len(json_files):,} files:")
log.info(f" - Companies: {len(companies):,}")
if filter_individuals:
log.info(f" - Individuals skipped: {individuals_skipped:,}")
if errors > 0:
log.warning(f" - Errors: {errors:,}")
# Create PyArrow Table
table = pa.Table.from_pylist(companies, schema=COMPANY_SCHEMA)
# Write to Parquet with compression
output_path.parent.mkdir(parents=True, exist_ok=True)
pq.write_table(
table,
output_path,
compression='zstd',
compression_level=9,
use_dictionary=True
)
file_size_mb = output_path.stat().st_size / (1024 * 1024)
log.info(f"Saved Parquet file: {output_path} ({file_size_mb:.1f} MB)")
return table
def build_company_dataset_duckdb(
submissions_dir: Path,
output_path: Path,
filter_individuals: bool = True,
create_indexes: bool = True,
show_progress: bool = True
) -> None:
"""
Build DuckDB database from submissions directory (companies only).
This function creates a DuckDB database with a 'companies' table and
optional indexes on key columns for fast querying.
Performance:
- ~30 seconds for 562,413 companies (with orjson + filtering)
- Output size: ~287 MB
- Query speed: <1ms with indexes
Args:
submissions_dir: Directory containing CIK*.json files
output_path: Where to save the .duckdb file
filter_individuals: Skip individual filers (default: True)
create_indexes: Create indexes on cik, sic, name (default: True)
show_progress: Show progress bar (default: True)
Raises:
FileNotFoundError: If submissions_dir doesn't exist
ImportError: If duckdb package not installed
Example:
>>> from pathlib import Path
>>> submissions_dir = Path.home() / '.edgar' / 'submissions'
>>> output_path = Path.home() / '.edgar' / 'companies.duckdb'
>>> build_company_dataset_duckdb(submissions_dir, output_path)
>>>
>>> import duckdb
>>> con = duckdb.connect(str(output_path))
>>> result = con.execute("SELECT COUNT(*) FROM companies").fetchone()
>>> print(f"Companies: {result[0]:,}")
"""
try:
import duckdb
except ImportError:
raise ImportError(
"DuckDB export requires duckdb package.\n"
"Install with: pip install duckdb"
)
if not submissions_dir.exists():
raise FileNotFoundError(
f"Submissions directory not found: {submissions_dir}\n\n"
"Please download submissions data first:\n"
" from edgar.storage import download_submissions\n"
" download_submissions()\n"
)
# Get all submission JSON files
json_files = list(submissions_dir.glob("CIK*.json"))
if len(json_files) == 0:
raise FileNotFoundError(
f"No submission files found in: {submissions_dir}\n"
"Expected CIK*.json files"
)
log.info(f"Building DuckDB database from {len(json_files):,} submission files")
log.info(f"Using JSON parser: {JSON_PARSER}")
companies = []
errors = 0
individuals_skipped = 0
# Process each file with progress bar
iterator = tqdm(json_files, desc="Processing submissions", disable=not show_progress)
for json_file in iterator:
try:
data = load_json(json_file)
# Skip individuals if filtering enabled
if filter_individuals and is_individual_from_json(data):
individuals_skipped += 1
continue
# Extract SIC (handle empty strings)
sic = data.get('sic')
sic_int = int(sic) if sic and sic != '' else None
# Extract tickers and exchanges (filter None values)
tickers = data.get('tickers', [])
exchanges = data.get('exchanges', [])
companies.append({
'cik': data.get('cik'),
'name': data.get('name'),
'sic': sic_int,
'sic_description': data.get('sicDescription'),
'tickers': '|'.join(filter(None, tickers)) if tickers else None,
'exchanges': '|'.join(filter(None, exchanges)) if exchanges else None,
'state_of_incorporation': data.get('stateOfIncorporation'),
'state_of_incorporation_description': data.get('stateOfIncorporationDescription'),
'fiscal_year_end': data.get('fiscalYearEnd'),
'entity_type': data.get('entityType'),
'ein': data.get('ein'),
})
except Exception as e:
errors += 1
log.debug(f"Error processing {json_file.name}: {e}")
continue
# Log statistics
log.info(f"Processed {len(json_files):,} files:")
log.info(f" - Companies: {len(companies):,}")
if filter_individuals:
log.info(f" - Individuals skipped: {individuals_skipped:,}")
if errors > 0:
log.warning(f" - Errors: {errors:,}")
# Create DuckDB database
import pandas as pd
output_path.parent.mkdir(parents=True, exist_ok=True)
con = duckdb.connect(str(output_path))
# Create table from DataFrame
df = pd.DataFrame(companies)
con.execute("CREATE TABLE companies AS SELECT * FROM df")
# Create indexes
if create_indexes:
log.info("Creating indexes...")
con.execute("CREATE INDEX idx_cik ON companies(cik)")
con.execute("CREATE INDEX idx_sic ON companies(sic)")
con.execute("CREATE INDEX idx_name ON companies(name)")
# Add metadata table
con.execute("""
CREATE TABLE metadata AS
SELECT
CURRENT_TIMESTAMP as created_at,
COUNT(*) as total_companies,
COUNT(DISTINCT sic) as unique_sic_codes,
COUNT(DISTINCT CASE WHEN tickers IS NOT NULL THEN 1 END) as companies_with_tickers,
COUNT(DISTINCT CASE WHEN exchanges IS NOT NULL THEN 1 END) as companies_with_exchanges
FROM companies
""")
con.close()
file_size_mb = output_path.stat().st_size / (1024 * 1024)
log.info(f"Saved DuckDB database: {output_path} ({file_size_mb:.1f} MB)")
def load_company_dataset_parquet(parquet_path: Path) -> pa.Table:
"""
Load company dataset from Parquet file.
This is a simple wrapper around pyarrow.parquet.read_table() with
logging for consistency.
Performance: <100ms for typical dataset
Args:
parquet_path: Path to .pq file
Returns:
PyArrow Table with company data
Example:
>>> from pathlib import Path
>>> path = Path.home() / '.edgar' / 'companies.pq'
>>> companies = load_company_dataset_parquet(path)
>>> print(f"Loaded {len(companies):,} companies")
"""
if not parquet_path.exists():
raise FileNotFoundError(f"Parquet file not found: {parquet_path}")
table = pq.read_table(parquet_path)
log.debug(f"Loaded {len(table):,} companies from {parquet_path}")
return table
def to_duckdb(
parquet_path: Path,
duckdb_path: Path,
create_indexes: bool = True
) -> None:
"""
Convert Parquet dataset to DuckDB database.
This provides an easy way to export the Parquet dataset to DuckDB
for users who want SQL query capabilities.
Performance: <5 seconds for typical dataset
Args:
parquet_path: Path to source .pq file
duckdb_path: Path to output .duckdb file
create_indexes: Create indexes on key columns (default: True)
Example:
>>> from pathlib import Path
>>> parquet_path = Path.home() / '.edgar' / 'companies.pq'
>>> duckdb_path = Path.home() / '.edgar' / 'companies.duckdb'
>>> to_duckdb(parquet_path, duckdb_path)
>>>
>>> import duckdb
>>> con = duckdb.connect(str(duckdb_path))
>>> result = con.execute(
... "SELECT * FROM companies WHERE sic = 2834"
... ).fetchdf()
"""
try:
import duckdb
except ImportError:
raise ImportError(
"DuckDB export requires duckdb package.\n"
"Install with: pip install duckdb"
)
if not parquet_path.exists():
raise FileNotFoundError(f"Parquet file not found: {parquet_path}")
log.info(f"Converting Parquet to DuckDB: {parquet_path} -> {duckdb_path}")
# Read Parquet file and convert to pandas
table = pq.read_table(parquet_path)
import pandas as pd
df = table.to_pandas()
# Create DuckDB database
duckdb_path.parent.mkdir(parents=True, exist_ok=True)
con = duckdb.connect(str(duckdb_path))
# Create table from DataFrame
con.execute("CREATE TABLE companies AS SELECT * FROM df")
# Create indexes
if create_indexes:
log.info("Creating indexes...")
con.execute("CREATE INDEX idx_cik ON companies(cik)")
con.execute("CREATE INDEX idx_sic ON companies(sic)")
con.execute("CREATE INDEX idx_name ON companies(name)")
# Add metadata
con.execute("""
CREATE TABLE metadata AS
SELECT
CURRENT_TIMESTAMP as created_at,
COUNT(*) as total_companies,
COUNT(DISTINCT sic) as unique_sic_codes,
COUNT(DISTINCT CASE WHEN tickers IS NOT NULL THEN 1 END) as companies_with_tickers,
COUNT(DISTINCT CASE WHEN exchanges IS NOT NULL THEN 1 END) as companies_with_exchanges
FROM companies
""")
con.close()
file_size_mb = duckdb_path.stat().st_size / (1024 * 1024)
log.info(f"Exported to DuckDB: {duckdb_path} ({file_size_mb:.1f} MB)")
# In-memory cache for dataset
_CACHE = {}
def get_company_dataset(rebuild: bool = False) -> pa.Table:
"""
Get company dataset, building from submissions if needed.
This function checks for a cached dataset at ~/.edgar/companies.pq.
If not found, it automatically builds the dataset from submissions data.
On first use, this will take ~30 seconds to build the dataset. Subsequent
calls load from cache in <100ms.
Args:
rebuild: Force rebuild even if cache exists (default: False)
Returns:
PyArrow Table with company data (~562,413 companies)
Raises:
FileNotFoundError: If submissions directory not found or incomplete
Performance:
- First use: ~30 seconds (builds dataset)
- Cached: <100ms (loads from disk)
- Memory: ~20-50 MB
Example:
>>> from edgar.reference import get_company_dataset
>>> import pyarrow.compute as pc
>>>
>>> # First call builds dataset (takes ~30s)
>>> companies = get_company_dataset()
>>> print(f"Loaded {len(companies):,} companies")
>>>
>>> # Subsequent calls are fast (<100ms)
>>> companies = get_company_dataset()
>>>
>>> # Filter pharmaceutical companies (SIC 2834-2836)
>>> pharma = companies.filter(
... pc.field('sic').between(2834, 2836)
... )
>>> print(f"Found {len(pharma)} pharma companies")
>>>
>>> # Filter by exchange
>>> nasdaq = companies.filter(
... pc.field('exchanges').contains('Nasdaq')
... )
>>>
>>> # Force rebuild with latest data
>>> companies = get_company_dataset(rebuild=True)
"""
# Check in-memory cache first
if not rebuild and 'companies' in _CACHE:
return _CACHE['companies']
# Check disk cache
cache_path = get_edgar_data_directory() / 'companies.pq'
if cache_path.exists() and not rebuild:
# Load from cache
log.info(f"Loading company dataset from cache: {cache_path}")
table = load_company_dataset_parquet(cache_path)
_CACHE['companies'] = table
return table
# Need to build dataset
log.info("Building company dataset from submissions (this may take ~30 seconds)...")
submissions_dir = get_edgar_data_directory() / 'submissions'
if not submissions_dir.exists() or len(list(submissions_dir.glob('CIK*.json'))) < 100000:
raise FileNotFoundError(
f"Submissions directory not found or incomplete: {submissions_dir}\n\n"
"Please download submissions data first:\n"
" from edgar.storage import download_submissions\n"
" download_submissions()\n\n"
"This is a one-time download (~500 MB compressed)."
)
# Build dataset
table = build_company_dataset_parquet(
submissions_dir,
cache_path,
filter_individuals=True
)
log.info(f"✅ Built dataset: {len(table):,} companies, cached at {cache_path}")
_CACHE['companies'] = table
return table

View File

@@ -0,0 +1,991 @@
"""
Company subset selection utilities for analysis and learning tasks.
This module provides flexible ways to create subsets of companies from SEC reference data
for educational, research, and analysis purposes. It offers exchange-based selection,
popularity-based filtering, sampling capabilities, and composition utilities.
Key features:
- Exchange-based selection (NYSE, NASDAQ, OTC, CBOE)
- Popularity-based selection (popular stocks, market cap tiers)
- Sampling capabilities (random, stratified, top N)
- Filtering and combination utilities
- Consistent DataFrame output format
All functions return a standardized DataFrame with columns: ['cik', 'ticker', 'name', 'exchange']
"""
from enum import Enum
from functools import lru_cache
from typing import Callable, List, Optional, Union
import pandas as pd
from edgar.core import log
from edgar.reference.tickers import get_company_ticker_name_exchange, popular_us_stocks
__all__ = [
# Classes and Enums
'CompanySubset',
'MarketCapTier',
'PopularityTier',
# Core Functions
'get_all_companies',
'get_companies_by_exchanges',
'get_popular_companies',
# Industry and State Filtering (Comprehensive Mode)
'get_companies_by_industry',
'get_companies_by_state',
# Sampling and Filtering
'get_random_sample',
'get_stratified_sample',
'get_top_companies_by_metric',
'filter_companies',
'exclude_companies',
# Set Operations
'combine_company_sets',
'intersect_company_sets',
# Convenience Functions - General
'get_faang_companies',
'get_tech_giants',
'get_dow_jones_sample',
# Convenience Functions - Industry Specific
'get_pharmaceutical_companies',
'get_biotechnology_companies',
'get_software_companies',
'get_semiconductor_companies',
'get_banking_companies',
'get_investment_companies',
'get_insurance_companies',
'get_real_estate_companies',
'get_oil_gas_companies',
'get_retail_companies',
]
class MarketCapTier(Enum):
"""Market cap tiers for company classification."""
LARGE_CAP = "large_cap" # Usually > $10B
MID_CAP = "mid_cap" # Usually $2B - $10B
SMALL_CAP = "small_cap" # Usually $300M - $2B
MICRO_CAP = "micro_cap" # Usually < $300M
class PopularityTier(Enum):
"""Popularity tiers based on trading activity and recognition."""
MEGA_CAP = "mega_cap" # Top 10 most valuable companies
POPULAR = "popular" # Popular stocks list
MAINSTREAM = "mainstream" # Well-known companies
EMERGING = "emerging" # Smaller but notable companies
class CompanySubset:
"""
Fluent interface for building company subsets with chainable operations.
Example:
# Get 50 random NYSE companies excluding financial sector
companies = (CompanySubset()
.from_exchange('NYSE')
.exclude_tickers(['JPM', 'GS', 'C'])
.sample(50)
.get())
# Get pharmaceutical companies with comprehensive metadata
pharma = (CompanySubset(use_comprehensive=True)
.from_industry(sic_range=(2834, 2836))
.sample(100)
.get())
"""
def __init__(self, companies: Optional[pd.DataFrame] = None, use_comprehensive: bool = False):
"""
Initialize with optional starting dataset.
Args:
companies: Optional DataFrame to start with. If None, loads from get_all_companies()
use_comprehensive: If True and companies is None, load comprehensive dataset
with rich metadata (SIC, state, entity type, etc.)
"""
if companies is not None:
self._companies = companies
else:
self._companies = get_all_companies(use_comprehensive=use_comprehensive)
self._use_comprehensive = use_comprehensive
def from_exchange(self, exchanges: Union[str, List[str]]) -> 'CompanySubset':
"""Filter companies by exchange(s)."""
self._companies = get_companies_by_exchanges(exchanges)
return self
def from_popular(self, tier: Optional[PopularityTier] = None) -> 'CompanySubset':
"""Filter to popular companies."""
self._companies = get_popular_companies(tier)
return self
def from_industry(
self,
sic: Optional[Union[int, List[int]]] = None,
sic_range: Optional[tuple[int, int]] = None,
sic_description_contains: Optional[str] = None
) -> 'CompanySubset':
"""
Filter companies by industry (SIC code).
Automatically enables comprehensive mode to access industry metadata.
Args:
sic: Single SIC code or list of SIC codes to match exactly
sic_range: Tuple of (min_sic, max_sic) for range filtering
sic_description_contains: String to search within SIC description
Returns:
CompanySubset with industry filter applied
Example:
>>> # Pharmaceutical companies
>>> pharma = CompanySubset().from_industry(sic=2834)
>>> # Biotech sector
>>> biotech = CompanySubset().from_industry(sic_range=(2833, 2836))
"""
self._companies = get_companies_by_industry(
sic=sic,
sic_range=sic_range,
sic_description_contains=sic_description_contains
)
self._use_comprehensive = True
return self
def from_state(self, states: Union[str, List[str]]) -> 'CompanySubset':
"""
Filter companies by state of incorporation.
Automatically enables comprehensive mode to access state metadata.
Args:
states: Single state code or list of state codes (e.g., 'DE', 'CA')
Returns:
CompanySubset with state filter applied
Example:
>>> # Delaware corporations
>>> de_corps = CompanySubset().from_state('DE')
>>> # Delaware or Nevada corporations
>>> de_nv = CompanySubset().from_state(['DE', 'NV'])
"""
self._companies = get_companies_by_state(states)
self._use_comprehensive = True
return self
def filter_by(self, condition: Callable[[pd.DataFrame], pd.DataFrame]) -> 'CompanySubset':
"""Apply custom filter function."""
self._companies = condition(self._companies)
return self
def exclude_tickers(self, tickers: List[str]) -> 'CompanySubset':
"""Exclude specific tickers."""
self._companies = exclude_companies(self._companies, tickers)
return self
def include_tickers(self, tickers: List[str]) -> 'CompanySubset':
"""Include only specific tickers."""
self._companies = filter_companies(self._companies, ticker_list=tickers)
return self
def sample(self, n: int, random_state: Optional[int] = None) -> 'CompanySubset':
"""Take random sample of n companies."""
self._companies = get_random_sample(self._companies, n, random_state)
return self
def top(self, n: int, by: str = 'name') -> 'CompanySubset':
"""Take top n companies by specified column."""
self._companies = get_top_companies_by_metric(self._companies, n, by)
return self
def combine_with(self, other: 'CompanySubset') -> 'CompanySubset':
"""Combine with another subset (union)."""
self._companies = combine_company_sets([self._companies, other.get()])
return self
def intersect_with(self, other: 'CompanySubset') -> 'CompanySubset':
"""Intersect with another subset."""
self._companies = intersect_company_sets([self._companies, other.get()])
return self
def get(self) -> pd.DataFrame:
"""Get the final DataFrame."""
return self._companies.copy()
def __len__(self) -> int:
"""Return number of companies in subset."""
return len(self._companies)
def __repr__(self) -> str:
"""String representation showing count and sample."""
count = len(self._companies)
if count == 0:
return "CompanySubset(empty)"
sample_size = min(3, count)
sample_tickers = self._companies['ticker'].head(sample_size).tolist()
sample_str = ', '.join(sample_tickers)
if count > sample_size:
sample_str += f", ... +{count - sample_size} more"
return f"CompanySubset({count} companies: {sample_str})"
def _get_comprehensive_companies() -> pd.DataFrame:
"""
Get comprehensive company dataset from company_dataset module.
This function loads the full SEC submissions dataset (~562K companies) with rich metadata
including SIC codes, state of incorporation, entity types, and more.
Returns:
DataFrame with extended schema:
['cik', 'ticker', 'name', 'exchange', 'sic', 'sic_description',
'state_of_incorporation', 'state_of_incorporation_description',
'fiscal_year_end', 'entity_type', 'ein']
Note:
- First call may take ~30 seconds to build the dataset
- Subsequent calls use cached Parquet file (<100ms load time)
- Primary ticker extracted from pipe-delimited tickers field
- Primary exchange extracted from pipe-delimited exchanges field
"""
try:
from edgar.reference.company_dataset import get_company_dataset
# Get PyArrow Table from company_dataset
table = get_company_dataset()
# Convert to pandas
df = table.to_pandas()
# Extract primary ticker from pipe-delimited tickers field
def extract_primary(value):
"""Extract first value from pipe-delimited string."""
if pd.isna(value) or value is None:
return None
value_str = str(value)
parts = value_str.split('|')
return parts[0] if parts and parts[0] else None
df['ticker'] = df['tickers'].apply(extract_primary)
df['exchange'] = df['exchanges'].apply(extract_primary)
# Drop the original pipe-delimited columns
df = df.drop(columns=['tickers', 'exchanges'])
# Reorder columns to match standard format plus extensions
columns = [
'cik', 'ticker', 'name', 'exchange',
'sic', 'sic_description',
'state_of_incorporation', 'state_of_incorporation_description',
'fiscal_year_end', 'entity_type', 'ein'
]
return df[columns]
except Exception as e:
log.error(f"Error fetching comprehensive company data: {e}")
# Return empty DataFrame with extended schema
return pd.DataFrame(columns=[
'cik', 'ticker', 'name', 'exchange',
'sic', 'sic_description',
'state_of_incorporation', 'state_of_incorporation_description',
'fiscal_year_end', 'entity_type', 'ein'
])
@lru_cache(maxsize=2)
def get_all_companies(use_comprehensive: bool = False) -> pd.DataFrame:
"""
Get all companies from SEC reference data in standardized format.
Args:
use_comprehensive: If True, load comprehensive dataset with ~562K companies
and rich metadata (SIC, state, entity type, etc.).
If False (default), load ticker-only dataset with ~13K companies.
Returns:
DataFrame with columns ['cik', 'ticker', 'name', 'exchange']
If use_comprehensive=True, also includes:
['sic', 'sic_description', 'state_of_incorporation',
'state_of_incorporation_description', 'fiscal_year_end',
'entity_type', 'ein']
Note:
- Default (use_comprehensive=False) maintains backward compatibility
- Comprehensive mode adds ~30 second build time on first call
- Both modes use caching for fast subsequent calls
Example:
>>> # Standard mode - fast, ticker-only data
>>> companies = get_all_companies()
>>> len(companies) # ~13K companies
>>> # Comprehensive mode - slower first call, rich metadata
>>> all_companies = get_all_companies(use_comprehensive=True)
>>> len(all_companies) # ~562K companies
>>> 'sic' in all_companies.columns # True
"""
if use_comprehensive:
return _get_comprehensive_companies()
try:
df = get_company_ticker_name_exchange().copy()
# Reorder columns to match our standard format
return df[['cik', 'ticker', 'name', 'exchange']]
except Exception as e:
log.error(f"Error fetching company data: {e}")
# Return empty DataFrame with correct structure
return pd.DataFrame(columns=['cik', 'ticker', 'name', 'exchange'])
def get_companies_by_exchanges(exchanges: Union[str, List[str]]) -> pd.DataFrame:
"""
Get companies listed on specific exchange(s).
Args:
exchanges: Single exchange string or list of exchanges
('NYSE', 'Nasdaq', 'OTC', 'CBOE')
Returns:
DataFrame with companies from specified exchanges
Example:
>>> nyse_companies = get_companies_by_exchanges('NYSE')
>>> major_exchanges = get_companies_by_exchanges(['NYSE', 'Nasdaq'])
"""
if isinstance(exchanges, str):
exchanges = [exchanges]
try:
all_companies = get_all_companies()
return all_companies[all_companies['exchange'].isin(exchanges)].reset_index(drop=True)
except Exception as e:
log.error(f"Error filtering companies by exchanges {exchanges}: {e}")
return pd.DataFrame(columns=['cik', 'ticker', 'name', 'exchange'])
def get_popular_companies(tier: Optional[PopularityTier] = None) -> pd.DataFrame:
"""
Get popular companies based on tier selection.
Args:
tier: Popularity tier (MEGA_CAP, POPULAR, MAINSTREAM, EMERGING)
If None, returns all popular companies
Returns:
DataFrame with popular companies
Example:
>>> mega_cap = get_popular_companies(PopularityTier.MEGA_CAP)
>>> all_popular = get_popular_companies()
"""
try:
# Get popular stocks and merge with exchange data
popular_df = popular_us_stocks().reset_index() # CIK becomes a column
popular_df = popular_df.rename(columns={'Cik': 'cik', 'Ticker': 'ticker', 'Company': 'name'})
# Get exchange information
all_companies = get_all_companies()
# Merge to get exchange information
result = popular_df.merge(
all_companies[['cik', 'exchange']],
on='cik',
how='left'
)
# Fill missing exchanges with 'Unknown'
result['exchange'] = result['exchange'].fillna('Unknown')
# Apply tier filtering
if tier == PopularityTier.MEGA_CAP:
result = result.head(10) # Top 10 by market cap (order in CSV)
elif tier == PopularityTier.POPULAR:
result = result.head(50) # Top 50 popular
elif tier == PopularityTier.MAINSTREAM:
result = result.head(100) # Top 100
# EMERGING or None returns all
return result[['cik', 'ticker', 'name', 'exchange']].reset_index(drop=True)
except Exception as e:
log.error(f"Error fetching popular companies: {e}")
return pd.DataFrame(columns=['cik', 'ticker', 'name', 'exchange'])
def get_random_sample(
companies: Optional[pd.DataFrame] = None,
n: int = 100,
random_state: Optional[int] = None
) -> pd.DataFrame:
"""
Get random sample of companies.
Args:
companies: DataFrame to sample from (if None, uses all companies)
n: Number of companies to sample
random_state: Random seed for reproducibility
Returns:
DataFrame with n randomly selected companies
Example:
>>> random_100 = get_random_sample(n=100, random_state=42)
>>> nasdaq_sample = get_random_sample(get_companies_by_exchanges('Nasdaq'), n=50)
"""
if companies is None:
companies = get_all_companies()
if len(companies) == 0:
return companies.copy()
# Ensure we don't sample more than available
sample_size = min(n, len(companies))
try:
return companies.sample(n=sample_size, random_state=random_state).reset_index(drop=True)
except Exception as e:
log.error(f"Error sampling companies: {e}")
return companies.head(sample_size).reset_index(drop=True)
def get_stratified_sample(
companies: Optional[pd.DataFrame] = None,
n: int = 100,
stratify_by: str = 'exchange',
random_state: Optional[int] = None
) -> pd.DataFrame:
"""
Get stratified sample of companies maintaining proportions by specified column.
Args:
companies: DataFrame to sample from (if None, uses all companies)
n: Total number of companies to sample
stratify_by: Column to stratify by (default: 'exchange')
random_state: Random seed for reproducibility
Returns:
DataFrame with stratified sample
Example:
>>> # Sample maintaining exchange proportions
>>> stratified = get_stratified_sample(n=200, stratify_by='exchange')
"""
if companies is None:
companies = get_all_companies()
if len(companies) == 0 or stratify_by not in companies.columns:
return get_random_sample(companies, n, random_state)
try:
# Calculate proportions
proportions = companies[stratify_by].value_counts(normalize=True)
samples = []
remaining_n = n
for category, prop in proportions.items():
category_companies = companies[companies[stratify_by] == category]
# Calculate sample size for this category
if category == proportions.index[-1]: # Last category gets remainder
category_n = remaining_n
else:
category_n = max(1, int(n * prop)) # At least 1 company per category
remaining_n -= category_n
# Sample from this category
if len(category_companies) > 0:
category_sample = get_random_sample(
category_companies,
min(category_n, len(category_companies)),
random_state
)
samples.append(category_sample)
# Combine all samples
if samples:
result = pd.concat(samples, ignore_index=True)
# If we ended up with more than n, randomly select n
if len(result) > n:
result = get_random_sample(result, n, random_state)
return result
else:
return pd.DataFrame(columns=['cik', 'ticker', 'name', 'exchange'])
except Exception as e:
log.error(f"Error creating stratified sample: {e}")
return get_random_sample(companies, n, random_state)
def get_top_companies_by_metric(
companies: Optional[pd.DataFrame] = None,
n: int = 100,
metric: str = 'name',
ascending: bool = True
) -> pd.DataFrame:
"""
Get top N companies sorted by specified metric.
Args:
companies: DataFrame to select from (if None, uses all companies)
n: Number of top companies to return
metric: Column to sort by (default: 'name' for alphabetical)
ascending: Sort order (True for ascending, False for descending)
Returns:
DataFrame with top N companies by metric
Example:
>>> # Top 50 companies alphabetically by name
>>> top_alpha = get_top_companies_by_metric(n=50, metric='name')
>>> # Top 100 popular companies by ticker (reverse alphabetical)
>>> top_tickers = get_top_companies_by_metric(
... get_popular_companies(), n=100, metric='ticker', ascending=False)
"""
if companies is None:
companies = get_all_companies()
if len(companies) == 0 or metric not in companies.columns:
return companies.head(n).copy()
try:
sorted_companies = companies.sort_values(by=metric, ascending=ascending)
return sorted_companies.head(n).reset_index(drop=True)
except Exception as e:
log.error(f"Error sorting companies by {metric}: {e}")
return companies.head(n).copy()
def filter_companies(
companies: pd.DataFrame,
ticker_list: Optional[List[str]] = None,
name_contains: Optional[str] = None,
cik_list: Optional[List[int]] = None,
custom_filter: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None
) -> pd.DataFrame:
"""
Filter companies by various criteria.
Args:
companies: DataFrame to filter
ticker_list: List of specific tickers to include
name_contains: String that company name must contain (case-insensitive)
cik_list: List of specific CIKs to include
custom_filter: Custom function that takes and returns a DataFrame
Returns:
Filtered DataFrame
Example:
>>> # Filter to specific tickers
>>> faang = filter_companies(
... companies, ticker_list=['AAPL', 'AMZN', 'NFLX', 'GOOGL', 'META'])
>>> # Filter by name containing 'Inc'
>>> inc_companies = filter_companies(companies, name_contains='Inc')
"""
result = companies.copy()
try:
if ticker_list is not None:
ticker_list_upper = [t.upper() for t in ticker_list]
result = result[result['ticker'].str.upper().isin(ticker_list_upper)]
if name_contains is not None:
result = result[result['name'].str.contains(name_contains, case=False, na=False)]
if cik_list is not None:
result = result[result['cik'].isin(cik_list)]
if custom_filter is not None:
result = custom_filter(result)
return result.reset_index(drop=True)
except Exception as e:
log.error(f"Error filtering companies: {e}")
return result
def exclude_companies(
companies: pd.DataFrame,
ticker_list: Optional[List[str]] = None,
name_contains: Optional[str] = None,
cik_list: Optional[List[int]] = None
) -> pd.DataFrame:
"""
Exclude companies by various criteria.
Args:
companies: DataFrame to filter
ticker_list: List of tickers to exclude
name_contains: String to exclude companies whose names contain it
cik_list: List of CIKs to exclude
Returns:
DataFrame with specified companies excluded
Example:
>>> # Exclude financial companies (simplified)
>>> non_financial = exclude_companies(
... companies, ticker_list=['JPM', 'GS', 'C', 'BAC'])
>>> # Exclude companies with 'Corp' in name
>>> non_corp = exclude_companies(companies, name_contains='Corp')
"""
result = companies.copy()
try:
if ticker_list is not None:
ticker_list_upper = [t.upper() for t in ticker_list]
result = result[~result['ticker'].str.upper().isin(ticker_list_upper)]
if name_contains is not None:
result = result[~result['name'].str.contains(name_contains, case=False, na=False)]
if cik_list is not None:
result = result[~result['cik'].isin(cik_list)]
return result.reset_index(drop=True)
except Exception as e:
log.error(f"Error excluding companies: {e}")
return result
def combine_company_sets(company_sets: List[pd.DataFrame]) -> pd.DataFrame:
"""
Combine multiple company DataFrames (union operation).
Args:
company_sets: List of company DataFrames to combine
Returns:
Combined DataFrame with duplicates removed
Example:
>>> nyse = get_companies_by_exchanges('NYSE')
>>> popular = get_popular_companies()
>>> combined = combine_company_sets([nyse, popular])
"""
if not company_sets:
return pd.DataFrame(columns=['cik', 'ticker', 'name', 'exchange'])
try:
# Concatenate all DataFrames
result = pd.concat(company_sets, ignore_index=True)
# Remove duplicates based on CIK (primary key)
result = result.drop_duplicates(subset=['cik']).reset_index(drop=True)
return result
except Exception as e:
log.error(f"Error combining company sets: {e}")
return company_sets[0].copy() if company_sets else pd.DataFrame(columns=['cik', 'ticker', 'name', 'exchange'])
def intersect_company_sets(company_sets: List[pd.DataFrame]) -> pd.DataFrame:
"""
Find intersection of multiple company DataFrames.
Args:
company_sets: List of company DataFrames to intersect
Returns:
DataFrame containing only companies present in all sets
Example:
>>> nyse = get_companies_by_exchanges('NYSE')
>>> popular = get_popular_companies()
>>> nyse_popular = intersect_company_sets([nyse, popular])
"""
if not company_sets:
return pd.DataFrame(columns=['cik', 'ticker', 'name', 'exchange'])
if len(company_sets) == 1:
return company_sets[0].copy()
try:
# Start with first set
result = company_sets[0].copy()
# Intersect with each subsequent set
for df in company_sets[1:]:
# Find common CIKs
common_ciks = set(result['cik']) & set(df['cik'])
result = result[result['cik'].isin(common_ciks)]
return result.reset_index(drop=True)
except Exception as e:
log.error(f"Error intersecting company sets: {e}")
return company_sets[0].copy() if company_sets else pd.DataFrame(columns=['cik', 'ticker', 'name', 'exchange'])
def get_companies_by_industry(
sic: Optional[Union[int, List[int]]] = None,
sic_range: Optional[tuple[int, int]] = None,
sic_description_contains: Optional[str] = None
) -> pd.DataFrame:
"""
Get companies by industry classification using SIC (Standard Industrial Classification) codes.
Requires comprehensive company dataset. This function automatically uses use_comprehensive=True.
Args:
sic: Single SIC code or list of SIC codes to match exactly
sic_range: Tuple of (min_sic, max_sic) for range filtering (inclusive)
sic_description_contains: String to search within SIC description (case-insensitive)
Returns:
DataFrame with companies matching the industry criteria, including comprehensive metadata
Example:
>>> # Pharmaceutical companies (SIC 2834)
>>> pharma = get_companies_by_industry(sic=2834)
>>> # Biotech range (SIC 2833-2836)
>>> biotech = get_companies_by_industry(sic_range=(2833, 2836))
>>> # All companies with "software" in industry description
>>> software = get_companies_by_industry(sic_description_contains='software')
>>> # Multiple specific SIC codes
>>> healthcare = get_companies_by_industry(sic=[2834, 2835, 2836])
Note:
SIC Code Ranges:
- 0100-0999: Agriculture, Forestry, Fishing
- 1000-1499: Mining
- 1500-1799: Construction
- 2000-3999: Manufacturing
- 4000-4999: Transportation, Communications, Utilities
- 5000-5199: Wholesale Trade
- 5200-5999: Retail Trade
- 6000-6799: Finance, Insurance, Real Estate
- 7000-8999: Services
- 9100-9729: Public Administration
"""
# Auto-enable comprehensive mode for industry filtering
companies = get_all_companies(use_comprehensive=True)
result = companies.copy()
try:
# Filter by exact SIC code(s)
if sic is not None:
if isinstance(sic, int):
sic = [sic]
result = result[result['sic'].isin(sic)]
# Filter by SIC range
if sic_range is not None:
min_sic, max_sic = sic_range
result = result[
(result['sic'] >= min_sic) &
(result['sic'] <= max_sic)
]
# Filter by SIC description contains
if sic_description_contains is not None:
result = result[
result['sic_description'].str.contains(
sic_description_contains,
case=False,
na=False
)
]
return result.reset_index(drop=True)
except Exception as e:
log.error(f"Error filtering companies by industry: {e}")
return pd.DataFrame(columns=companies.columns)
def get_companies_by_state(
states: Union[str, List[str]],
include_description: bool = True
) -> pd.DataFrame:
"""
Get companies by state of incorporation.
Requires comprehensive company dataset. This function automatically uses use_comprehensive=True.
Args:
states: Single state code or list of state codes (e.g., 'DE', 'CA', ['DE', 'NV'])
include_description: If True, includes state_of_incorporation_description in output
Returns:
DataFrame with companies incorporated in specified state(s)
Example:
>>> # Delaware corporations
>>> de_corps = get_companies_by_state('DE')
>>> # Delaware and Nevada corporations
>>> de_nv = get_companies_by_state(['DE', 'NV'])
>>> # California corporations
>>> ca_corps = get_companies_by_state('CA')
Note:
Common states of incorporation:
- DE: Delaware (most common for public companies)
- NV: Nevada (popular for tax benefits)
- CA: California
- NY: New York
- TX: Texas
"""
if isinstance(states, str):
states = [states]
# Auto-enable comprehensive mode for state filtering
companies = get_all_companies(use_comprehensive=True)
try:
# Normalize state codes to uppercase
states_upper = [s.upper() for s in states]
result = companies[
companies['state_of_incorporation'].str.upper().isin(states_upper)
].reset_index(drop=True)
return result
except Exception as e:
log.error(f"Error filtering companies by state {states}: {e}")
return pd.DataFrame(columns=companies.columns)
# Convenience functions for common use cases
def get_faang_companies() -> pd.DataFrame:
"""Get FAANG companies (Facebook/Meta, Apple, Amazon, Netflix, Google)."""
return filter_companies(
get_all_companies(),
ticker_list=['META', 'AAPL', 'AMZN', 'NFLX', 'GOOGL']
)
def get_tech_giants() -> pd.DataFrame:
"""Get major technology companies."""
tech_tickers = [
'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'NVDA',
'NFLX', 'ADBE', 'CRM', 'ORCL', 'INTC', 'CSCO'
]
return filter_companies(get_all_companies(), ticker_list=tech_tickers)
def get_dow_jones_sample() -> pd.DataFrame:
"""Get sample of Dow Jones Industrial Average companies."""
dow_tickers = [
'AAPL', 'MSFT', 'UNH', 'GS', 'HD', 'CAT', 'MCD', 'V', 'AXP', 'BA',
'TRV', 'JPM', 'IBM', 'JNJ', 'WMT', 'CVX', 'NKE', 'MRK', 'KO', 'DIS',
'MMM', 'DOW', 'CSCO', 'VZ', 'INTC', 'WBA', 'CRM', 'HON', 'AMGN', 'PG'
]
return filter_companies(get_all_companies(), ticker_list=dow_tickers)
# Industry-specific convenience functions (require comprehensive dataset)
def get_pharmaceutical_companies() -> pd.DataFrame:
"""
Get pharmaceutical preparation companies (SIC 2834).
Returns companies in the pharmaceutical preparations industry including
prescription drugs, biologics, and vaccines.
"""
return get_companies_by_industry(sic=2834)
def get_biotechnology_companies() -> pd.DataFrame:
"""
Get biotechnology companies (SIC 2833-2836).
Returns companies in biotech and related pharmaceutical industries.
"""
return get_companies_by_industry(sic_range=(2833, 2836))
def get_software_companies() -> pd.DataFrame:
"""
Get software and computer programming companies (SIC 7371-7379).
Returns companies in software publishing, programming, and related services.
"""
return get_companies_by_industry(sic_range=(7371, 7379))
def get_semiconductor_companies() -> pd.DataFrame:
"""
Get semiconductor and electronic component companies (SIC 3674).
Returns companies manufacturing semiconductors and related devices.
"""
return get_companies_by_industry(sic=3674)
def get_banking_companies() -> pd.DataFrame:
"""
Get commercial banking companies (SIC 6020-6029).
Returns national and state commercial banks.
"""
return get_companies_by_industry(sic_range=(6020, 6029))
def get_investment_companies() -> pd.DataFrame:
"""
Get investment companies and funds (SIC 6200-6299).
Returns securities brokers, dealers, investment advisors, and funds.
"""
return get_companies_by_industry(sic_range=(6200, 6299))
def get_insurance_companies() -> pd.DataFrame:
"""
Get insurance companies (SIC 6300-6399).
Returns life, health, property, and casualty insurance companies.
"""
return get_companies_by_industry(sic_range=(6300, 6399))
def get_real_estate_companies() -> pd.DataFrame:
"""
Get real estate companies (SIC 6500-6599).
Returns REITs, real estate operators, and developers.
"""
return get_companies_by_industry(sic_range=(6500, 6599))
def get_oil_gas_companies() -> pd.DataFrame:
"""
Get oil and gas extraction companies (SIC 1300-1399).
Returns crude petroleum, natural gas, and oil/gas field services companies.
"""
return get_companies_by_industry(sic_range=(1300, 1399))
def get_retail_companies() -> pd.DataFrame:
"""
Get retail trade companies (SIC 5200-5999).
Returns general merchandise, apparel, food, and other retail stores.
"""
return get_companies_by_industry(sic_range=(5200, 5999))

View File

@@ -0,0 +1,300 @@
# Portfolio Manager Database - Manual Maintenance Guide
This guide explains how to manually add, update, and maintain portfolio manager information in the EdgarTools database.
## File Location
**Database File**: `/Users/dwight/PycharmProjects/edgartools/edgar/data/portfolio_managers.json`
## Database Structure
The JSON file has two main sections:
### 1. Metadata Section
```json
{
"metadata": {
"version": "2024.12.01",
"description": "Curated database of portfolio managers for major 13F filing institutions",
"total_companies": 15,
"total_managers": 25,
"last_updated": "2024-12-01",
"sources": ["company_websites", "sec_filings", "press_releases", "public_records"]
}
}
```
**Update when adding managers:**
- Increment `total_companies` when adding new companies
- Increment `total_managers` when adding new individual managers
- Update `last_updated` to current date
### 2. Managers Section
Each company entry follows this structure:
```json
{
"managers": {
"company_key": {
"company_name": "Full Legal Company Name",
"aum_billions": 123,
"match_patterns": ["pattern1", "pattern2", "pattern3"],
"website": "https://www.company.com",
"managers": [
{
"name": "Manager Full Name",
"title": "Official Title",
"status": "active|retired|deceased|former",
"confidence": "high|medium|low",
"sources": ["source1", "source2"],
"start_date": "YYYY-MM-DD",
"end_date": "YYYY-MM-DD",
"last_verified": "YYYY-MM-DD",
"note": "Additional context or details"
}
]
}
}
}
```
## Adding New Companies
### Step 1: Choose Company Key
Use lowercase, underscore-separated format:
- ✅ Good: `berkshire_hathaway`, `goldman_sachs`, `two_sigma`
- ❌ Bad: `Berkshire-Hathaway`, `goldmanSachs`, `TwoSigma`
### Step 2: Research Company Information
Gather the following data:
**Required:**
- Full legal company name (from SEC filings)
- Current AUM in billions (approximate is fine)
- Company website URL
- Portfolio manager names and titles
**Recommended Sources:**
1. Company website "Leadership" or "Team" pages
2. Latest 10-K filing (Item 1A - Directors and Executive Officers)
3. Latest DEF 14A proxy statement
4. Recent press releases
5. Financial news articles
### Step 3: Add Company Entry
```json
{
"new_company": {
"company_name": "New Company Inc",
"aum_billions": 50,
"match_patterns": ["new company", "newco", "nc inc"],
"website": "https://www.newcompany.com",
"managers": []
}
}
```
**Match Patterns Tips:**
- Include common variations of company name
- Include stock ticker symbols if applicable
- Include abbreviations commonly used
- All patterns should be lowercase
### Step 4: Add Manager Information
```json
{
"managers": [
{
"name": "Jane Smith",
"title": "Chief Investment Officer",
"status": "active",
"confidence": "high",
"sources": ["company_website", "sec_filing_2024"],
"start_date": "2020-01-01",
"last_verified": "2024-12-01",
"note": "Former Goldman Sachs managing director"
}
]
}
```
## Manager Status Definitions
- **active**: Currently in active management role
- **retired**: Retired but may retain advisory role
- **deceased**: Deceased (include year in status like "deceased_2023")
- **former**: No longer with the organization
## Confidence Levels
- **high**: Verified from multiple official sources (company website + SEC filing)
- **medium**: Verified from single official source
- **low**: Approximate or historical information
## Common Sources
**Primary (High Confidence):**
- `company_website` - Official leadership pages
- `sec_filings` - 10-K, DEF 14A proxy statements
- `annual_report_2024` - Latest annual report
**Secondary (Medium Confidence):**
- `press_releases` - Official company announcements
- `financial_press` - WSJ, FT, Bloomberg articles
- `industry_publications` - Trade publications
**Tertiary (Low Confidence):**
- `linkedin_profile` - Professional profiles
- `wikipedia` - Publicly edited sources
- `interview_transcript` - Media interviews
## Example: Adding a New Manager
Let's add a new company "Example Capital Management":
```json
{
"example_capital": {
"company_name": "Example Capital Management LLC",
"aum_billions": 25,
"match_patterns": ["example capital", "example", "ecm"],
"website": "https://www.examplecapital.com",
"managers": [
{
"name": "John Doe",
"title": "Founder & Chief Investment Officer",
"status": "active",
"confidence": "high",
"sources": ["company_website", "sec_filing_2024"],
"start_date": "2015-01-01",
"last_verified": "2024-12-01",
"note": "Former hedge fund analyst at Two Sigma"
},
{
"name": "Sarah Wilson",
"title": "Portfolio Manager",
"status": "active",
"confidence": "medium",
"sources": ["company_website"],
"start_date": "2018-06-01",
"last_verified": "2024-12-01",
"note": "Specializes in technology sector investments"
}
]
}
}
```
## Data Validation Checklist
Before adding entries, verify:
- [ ] Company key is lowercase with underscores
- [ ] Company name matches legal entity in SEC filings
- [ ] AUM is reasonable (check recent 13F filings)
- [ ] Match patterns are comprehensive and lowercase
- [ ] Manager names are spelled correctly (double-check sources)
- [ ] Status is appropriate (active/retired/deceased/former)
- [ ] Confidence level matches quality of sources
- [ ] Dates are in YYYY-MM-DD format
- [ ] Sources are specific and verifiable
- [ ] Notes provide helpful context
## Updating Existing Entries
### Manager Status Changes
When a manager retires, is promoted, or leaves:
```json
{
"name": "John Smith",
"title": "Former CEO",
"status": "retired",
"end_date": "2024-06-30",
"note": "Retired June 2024, remains on board of directors"
}
```
### Adding New Managers to Existing Companies
Simply add to the managers array:
```json
{
"managers": [
// ... existing managers ...
{
"name": "New Manager Name",
"title": "Chief Investment Officer",
"status": "active",
// ... complete manager entry
}
]
}
```
## Testing Your Changes
After making changes, test the functionality:
```python
import edgar
# Test with a company you added/modified
company = edgar.Company("COMPANY_TICKER")
filing = company.get_filings(form="13F-HR").head(1)[0]
thirteen_f = filing.obj()
# Check if your managers are returned
managers = thirteen_f.get_portfolio_managers()
print(f"Found managers: {managers}")
# Test manager info summary
summary = thirteen_f.get_manager_info_summary()
print(f"Manager count: {summary['external_sources']['manager_count']}")
```
## Common Mistakes to Avoid
1. **Inconsistent naming**: Use exact legal names from SEC filings
2. **Missing match patterns**: Add common abbreviations and variations
3. **Outdated information**: Always verify against recent sources
4. **Low confidence data**: Avoid unverified Wikipedia or blog sources
5. **JSON syntax errors**: Use a JSON validator before saving
6. **Forgetting metadata**: Update total counts and last_updated date
## Priority Companies to Add
Focus on top 13F filers by AUM:
1. **Immediate Priority (AUM > $100B):**
- Already added: BlackRock, Vanguard, Fidelity, State Street
- Still needed: T. Rowe Price, Capital Group, Invesco
2. **High Priority (AUM $50-100B):**
- Already added: AQR, Citadel, Two Sigma, Renaissance
- Still needed: Millennium, D.E. Shaw, Baupost Group
3. **Medium Priority (AUM $20-50B):**
- Already added: Elliott, Pershing Square, Icahn
- Still needed: Third Point, ValueAct, Jana Partners
This systematic approach will provide coverage for the majority of institutional investment assets tracked in 13F filings.
---
## Enhancement Planning
**Current Status**: As of January 2025, this database covers 21 companies with verified CIKs (53.8% by count, 63.5% by AUM).
**Enhancement Roadmap**: See `docs-internal/features/FEAT-021-portfolio-manager-enhancement-followup.md` for:
- Systematic expansion plans to reach 85% AUM coverage
- Quarterly maintenance automation
- International firm integration strategy
- Historical manager tracking capabilities
**Priority Targets for Next Expansion**:
1. **Vanguard Group** ($8.1T AUM) - Research filing patterns
2. **Capital Group Companies** ($2.8T AUM) - American Funds family
3. **T. Rowe Price Group** ($1.6T AUM) - Major active manager
4. **Wellington Management** ($1.3T AUM) - Institutional specialist
For enhancement requests or database improvements, see the follow-up planning document and contribute via GitHub issues.

View File

@@ -0,0 +1,41 @@
import sys
from functools import lru_cache
import pandas as pd
import pyarrow.parquet as pq
# Dynamic import based on Python version
if sys.version_info >= (3, 9):
from importlib import resources
else:
import importlib_resources as resources
__all__ = ['read_parquet_from_package', 'read_pyarrow_from_package', 'read_csv_from_package']
@lru_cache(maxsize=1)
def read_parquet_from_package(parquet_filename: str):
package_name = 'edgar.reference.data'
with resources.path(package_name, parquet_filename) as parquet_path:
df = pd.read_parquet(parquet_path)
return df
def read_pyarrow_from_package(parquet_filename: str):
package_name = 'edgar.reference.data'
with resources.path(package_name, parquet_filename) as parquet_path:
# Read a pyarrow table from a parquet file
table = pq.read_table(parquet_path)
return table
def read_csv_from_package(csv_filename: str, **pandas_kwargs):
package_name = 'edgar.reference.data'
with resources.path(package_name, csv_filename) as csv_path:
df = pd.read_csv(csv_path, **pandas_kwargs)
return df

View File

@@ -0,0 +1,37 @@
Exhibit No.,Description,Form Types Involved,Regex
1,Underwriting Agreement,"S-1, S-3, F-1, F-3, S-8, S-11, etc.",^EX-1\b
2,"Plan of Acquisition, Reorganization, Arrangement, Liquidation or Succession","Commonly used across various forms including S-4, S-1, S-11, 10-K, etc.",^EX-2\b
3,Articles of Incorporation and Bylaws,"S-1, S-3, F-1, F-3, S-8, S-11, etc.",^EX-3(\.\d+)?\b
4,"Instruments Defining the Rights of Security Holders, including Indentures",Required across various form types,^EX-4\b
5,Opinion regarding Legality,Typically required across all form types,^EX-5\b
6,Reserved,N/A,^EX-6\b
7,Correspondence from Independent Accountants,Limited use (specific forms only),^EX-7\b
8,Opinion re Tax Matters,"S-11, F-1, F-3, S-3, S-8",^EX-8\b
9,Voting Trust Agreements,Mostly required in S-4 and other specific forms,^EX-9\b
10,Material Contracts,Required widely across forms for significant contracts,^EX-10(\.\d+)?\b
11,Statement re Computation of Per Share Earnings,Commonly required where applicable,^EX-11\b
12,Statements re Computation of Ratios,Required in forms where ratios are relevant,^EX-12\b
13,Annual Report to Security Holders,Typically part of 10-K or annual disclosures,^EX-13\b
14,Code of Ethics,Required disclosure for most forms,^EX-14\b
15,Letter re Unaudited Interim Financial Information,Used in specific situations across various forms,^EX-15\b
16,Letter re Change in Certifying Accountant,Used primarily in 10-K and 10-Q,^EX-16\b
17,Correspondence on Departure of Director,"Occasionally required, depending on the circumstances",^EX-17\b
18,Letter re Change in Accounting Principles,Used when significant changes in accounting principles occur,^EX-18\b
19,Report Furnished to Security Holders,Often part of 10-Q or similar reports,^EX-19\b
20,Other Documents or Statements to Security Holders,"As applicable, varies by form and content required",^EX-20\b
21,Subsidiaries of the Registrant,Required across various forms depending on the structure of the registrant,^EX-21\b
22,Published Report Regarding Matters Submitted to Vote of Security Holders,As applicable to the voting matters,^EX-22\b
23,Consents of Experts and Counsel,Required across various forms when expert consents are necessary,^EX-23(\.\d+)?\b
24,Power of Attorney,"As required, often associated with filings involving multiple signatories",^EX-24\b
25,Statement of Eligibility of Trustee,Required in filings involving indentures under the Trust Indenture Act,^EX-25\b
26,Invitation for Competitive Bids,Required in specific cases involving competitive bids,^EX-26\b
27-30,Reserved,N/A,`^EX-(27
31,Rule 13a-14(a)/15d-14(a) Certifications,Common certification required across various forms,^EX-31(\.\d+)?\b
32,Section 1350 Certifications,Required under specific legal stipulations,^EX-32\b
33-34,Assessment and Attestation Reports regarding Compliance,Specific to asset-backed securities,`^EX-(33
35-36,Servicer Compliance Statement and Depositor Certification,Specific to asset-backed securities,`^EX-(35
95,Mine Safety Disclosure Exhibit,Specific to registrants involved in mining operations,^EX-95\b
99,Additional Exhibits,As required by specific circumstances or regulatory demands,^EX-99(\.\d+)?\b
100-101,XBRL-Related Documents and Interactive Data File,Required for electronic data submission,`^EX-(100
102-103,Asset Data File and Asset Related Documents,Specific to asset-backed securities filings,`^EX-(102
104-106,Reserved/Static Pool PDF,N/A or specific to asset-backed securities,`^EX-(104
1 Exhibit No. Description Form Types Involved Regex
2 1 Underwriting Agreement S-1, S-3, F-1, F-3, S-8, S-11, etc. ^EX-1\b
3 2 Plan of Acquisition, Reorganization, Arrangement, Liquidation or Succession Commonly used across various forms including S-4, S-1, S-11, 10-K, etc. ^EX-2\b
4 3 Articles of Incorporation and Bylaws S-1, S-3, F-1, F-3, S-8, S-11, etc. ^EX-3(\.\d+)?\b
5 4 Instruments Defining the Rights of Security Holders, including Indentures Required across various form types ^EX-4\b
6 5 Opinion regarding Legality Typically required across all form types ^EX-5\b
7 6 Reserved N/A ^EX-6\b
8 7 Correspondence from Independent Accountants Limited use (specific forms only) ^EX-7\b
9 8 Opinion re Tax Matters S-11, F-1, F-3, S-3, S-8 ^EX-8\b
10 9 Voting Trust Agreements Mostly required in S-4 and other specific forms ^EX-9\b
11 10 Material Contracts Required widely across forms for significant contracts ^EX-10(\.\d+)?\b
12 11 Statement re Computation of Per Share Earnings Commonly required where applicable ^EX-11\b
13 12 Statements re Computation of Ratios Required in forms where ratios are relevant ^EX-12\b
14 13 Annual Report to Security Holders Typically part of 10-K or annual disclosures ^EX-13\b
15 14 Code of Ethics Required disclosure for most forms ^EX-14\b
16 15 Letter re Unaudited Interim Financial Information Used in specific situations across various forms ^EX-15\b
17 16 Letter re Change in Certifying Accountant Used primarily in 10-K and 10-Q ^EX-16\b
18 17 Correspondence on Departure of Director Occasionally required, depending on the circumstances ^EX-17\b
19 18 Letter re Change in Accounting Principles Used when significant changes in accounting principles occur ^EX-18\b
20 19 Report Furnished to Security Holders Often part of 10-Q or similar reports ^EX-19\b
21 20 Other Documents or Statements to Security Holders As applicable, varies by form and content required ^EX-20\b
22 21 Subsidiaries of the Registrant Required across various forms depending on the structure of the registrant ^EX-21\b
23 22 Published Report Regarding Matters Submitted to Vote of Security Holders As applicable to the voting matters ^EX-22\b
24 23 Consents of Experts and Counsel Required across various forms when expert consents are necessary ^EX-23(\.\d+)?\b
25 24 Power of Attorney As required, often associated with filings involving multiple signatories ^EX-24\b
26 25 Statement of Eligibility of Trustee Required in filings involving indentures under the Trust Indenture Act ^EX-25\b
27 26 Invitation for Competitive Bids Required in specific cases involving competitive bids ^EX-26\b
28 27-30 Reserved N/A `^EX-(27
29 31 Rule 13a-14(a)/15d-14(a) Certifications Common certification required across various forms ^EX-31(\.\d+)?\b
30 32 Section 1350 Certifications Required under specific legal stipulations ^EX-32\b
31 33-34 Assessment and Attestation Reports regarding Compliance Specific to asset-backed securities `^EX-(33
32 35-36 Servicer Compliance Statement and Depositor Certification Specific to asset-backed securities `^EX-(35
33 95 Mine Safety Disclosure Exhibit Specific to registrants involved in mining operations ^EX-95\b
34 99 Additional Exhibits As required by specific circumstances or regulatory demands ^EX-99(\.\d+)?\b
35 100-101 XBRL-Related Documents and Interactive Data File Required for electronic data submission `^EX-(100
36 102-103 Asset Data File and Asset Related Documents Specific to asset-backed securities filings `^EX-(102
37 104-106 Reserved/Static Pool PDF N/A or specific to asset-backed securities `^EX-(104

View File

@@ -0,0 +1,86 @@
Ticker,Company,Cik
AAPL,Apple Inc.,320193
MSFT,Microsoft Corporation,789019
AMZN,"Amazon.com, Inc.",1018724
NVDA,NVIDIA Corporation,1045810
TSLA,"Tesla, Inc.",1318605
GOOGL,Alphabet Inc. Class A,1652044
META,"Meta Platforms, Inc.",1326801
AMD,"Advanced Micro Devices, Inc.",2488
NFLX,"Netflix, Inc.",1065280
BRK.B,Berkshire Hathaway Inc.,1067983
V,Visa Inc.,1403161
JNJ,Johnson & Johnson,200406
PG,Procter & Gamble Co.,80424
JPM,JPMorgan Chase & Co.,19617
UNH,UnitedHealth Group Incorporated,731766
DIS,The Walt Disney Company,1744489
HD,"Home Depot, Inc.",354950
XOM,Exxon Mobil Corporation,34088
KO,Coca-Cola Company,21344
PEP,"PepsiCo, Inc.",77476
PFE,Pfizer Inc.,78003
MA,Mastercard Incorporated,1141391
ADBE,Adobe Inc.,796343
CRM,"Salesforce, Inc.",1108524
INTC,Intel Corporation,50863
CSCO,"Cisco Systems, Inc.",858877
NKE,"Nike, Inc.",320187
T,AT&T Inc.,732717
CMCSA,Comcast Corporation,1166691
VZ,Verizon Communications Inc.,732712
CVX,Chevron Corporation,93410
ABBV,AbbVie Inc.,1551152
MRK,"Merck & Co., Inc.",310158
BMY,Bristol-Myers Squibb Company,14272
WMT,Walmart Inc.,104169
MCD,McDonald's Corporation,63908
SBUX,Starbucks Corporation,829224
GS,"Goldman Sachs Group, Inc.",886982
MS,Morgan Stanley,895421
AXP,American Express Company,4962
C,Citigroup Inc.,831001
BA,Boeing Company,12927
DAL,"Delta Air Lines, Inc.",27904
LUV,Southwest Airlines Co.,92380
MAR,"Marriott International, Inc.",1048286
HLT,Hilton Worldwide Holdings Inc.,1585689
BKNG,Booking Holdings Inc.,1075531
PYPL,"PayPal Holdings, Inc.",1633917
SQ,"Square, Inc.",1512673
ZM,"Zoom Video Communications, Inc.",1585521
SNOW,Snowflake Inc.,1640147
UBER,"Uber Technologies, Inc.",1543151
LYFT,"Lyft, Inc.",1759509
ROKU,"Roku, Inc.",1428439
SPOT,Spotify Technology S.A.,1639920
SHOP,Shopify Inc.,1594805
EBAY,eBay Inc.,1065088
TWTR,"Twitter, Inc.",1418091
SNAP,Snap Inc.,1564408
PINS,"Pinterest, Inc.",1506293
PLTR,Palantir Technologies Inc.,1321655
ZI,ZoomInfo Technologies Inc.,1794515
DOCU,"DocuSign, Inc.",1261333
TWLO,Twilio Inc.,1447669
CRWD,"CrowdStrike Holdings, Inc.",1535527
NET,"Cloudflare, Inc.",1477333
DDOG,"Datadog, Inc.",1561550
MDB,"MongoDB, Inc.",1441816
ZS,"Zscaler, Inc.",1713683
OKTA,"Okta, Inc.",1660134
DBX,"Dropbox, Inc.",1467623
SMAR,Smartsheet Inc.,1366561
ASAN,"Asana, Inc.",1477720
RNG,"RingCentral, Inc.",1384905
PTON,"Peloton Interactive, Inc.",1639825
TTD,"The Trade Desk, Inc.",1671933
HUBS,"HubSpot, Inc.",1404655
COUP,Coupa Software Incorporated,1385867
AYX,"Alteryx, Inc.",1689923
SPLK,Splunk Inc.,1353283
NEWR,"New Relic, Inc.",1448056
DT,"Dynatrace, Inc.",1773383
NOW,"ServiceNow, Inc.",1373715
WDAY,"Workday, Inc.",1327811
ADSK,"Autodesk, Inc.",769397
1 Ticker Company Cik
2 AAPL Apple Inc. 320193
3 MSFT Microsoft Corporation 789019
4 AMZN Amazon.com, Inc. 1018724
5 NVDA NVIDIA Corporation 1045810
6 TSLA Tesla, Inc. 1318605
7 GOOGL Alphabet Inc. Class A 1652044
8 META Meta Platforms, Inc. 1326801
9 AMD Advanced Micro Devices, Inc. 2488
10 NFLX Netflix, Inc. 1065280
11 BRK.B Berkshire Hathaway Inc. 1067983
12 V Visa Inc. 1403161
13 JNJ Johnson & Johnson 200406
14 PG Procter & Gamble Co. 80424
15 JPM JPMorgan Chase & Co. 19617
16 UNH UnitedHealth Group Incorporated 731766
17 DIS The Walt Disney Company 1744489
18 HD Home Depot, Inc. 354950
19 XOM Exxon Mobil Corporation 34088
20 KO Coca-Cola Company 21344
21 PEP PepsiCo, Inc. 77476
22 PFE Pfizer Inc. 78003
23 MA Mastercard Incorporated 1141391
24 ADBE Adobe Inc. 796343
25 CRM Salesforce, Inc. 1108524
26 INTC Intel Corporation 50863
27 CSCO Cisco Systems, Inc. 858877
28 NKE Nike, Inc. 320187
29 T AT&T Inc. 732717
30 CMCSA Comcast Corporation 1166691
31 VZ Verizon Communications Inc. 732712
32 CVX Chevron Corporation 93410
33 ABBV AbbVie Inc. 1551152
34 MRK Merck & Co., Inc. 310158
35 BMY Bristol-Myers Squibb Company 14272
36 WMT Walmart Inc. 104169
37 MCD McDonald's Corporation 63908
38 SBUX Starbucks Corporation 829224
39 GS Goldman Sachs Group, Inc. 886982
40 MS Morgan Stanley 895421
41 AXP American Express Company 4962
42 C Citigroup Inc. 831001
43 BA Boeing Company 12927
44 DAL Delta Air Lines, Inc. 27904
45 LUV Southwest Airlines Co. 92380
46 MAR Marriott International, Inc. 1048286
47 HLT Hilton Worldwide Holdings Inc. 1585689
48 BKNG Booking Holdings Inc. 1075531
49 PYPL PayPal Holdings, Inc. 1633917
50 SQ Square, Inc. 1512673
51 ZM Zoom Video Communications, Inc. 1585521
52 SNOW Snowflake Inc. 1640147
53 UBER Uber Technologies, Inc. 1543151
54 LYFT Lyft, Inc. 1759509
55 ROKU Roku, Inc. 1428439
56 SPOT Spotify Technology S.A. 1639920
57 SHOP Shopify Inc. 1594805
58 EBAY eBay Inc. 1065088
59 TWTR Twitter, Inc. 1418091
60 SNAP Snap Inc. 1564408
61 PINS Pinterest, Inc. 1506293
62 PLTR Palantir Technologies Inc. 1321655
63 ZI ZoomInfo Technologies Inc. 1794515
64 DOCU DocuSign, Inc. 1261333
65 TWLO Twilio Inc. 1447669
66 CRWD CrowdStrike Holdings, Inc. 1535527
67 NET Cloudflare, Inc. 1477333
68 DDOG Datadog, Inc. 1561550
69 MDB MongoDB, Inc. 1441816
70 ZS Zscaler, Inc. 1713683
71 OKTA Okta, Inc. 1660134
72 DBX Dropbox, Inc. 1467623
73 SMAR Smartsheet Inc. 1366561
74 ASAN Asana, Inc. 1477720
75 RNG RingCentral, Inc. 1384905
76 PTON Peloton Interactive, Inc. 1639825
77 TTD The Trade Desk, Inc. 1671933
78 HUBS HubSpot, Inc. 1404655
79 COUP Coupa Software Incorporated 1385867
80 AYX Alteryx, Inc. 1689923
81 SPLK Splunk Inc. 1353283
82 NEWR New Relic, Inc. 1448056
83 DT Dynatrace, Inc. 1773383
84 NOW ServiceNow, Inc. 1373715
85 WDAY Workday, Inc. 1327811
86 ADSK Autodesk, Inc. 769397

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,312 @@
Form,Description
1-A POS,Reg A Offering Amendment
1-A-W,Reg A Offering Withdrawal
1-E,Notification filing for small business investment companies
1-E AD,Sales material for small business investment companies
1-K,Annual report for Regulation A issuers
1-SA,Semiannual report for Regulation A issuers
1-U,Current report for Regulation A issuers
1-Z,Exit report for terminated Regulation A offerings
1-Z-W,Withdrawal of Regulation A exit report
2-E,Report of securities sales
10-12B,Registration of a class of securities)
10-12G,Registration of a class of securities
10-D,Periodic distribution reports for asset-backed securities
10-K,Annual report for public companies
10-KT,Transition report with change in fiscal year
10-Q,Quarterly report for public companies
10-QT,Quarterly transition report with change in fiscal year
11-K,Annual report for employee stock plans
11-KT,Transition report for employee stock plans
13F-CTR,Confidential treatment request by institutional managers
13F-HR,Initial quarterly holdings report by institutional managers
13F-NT,Initial quarterly notice by institutional managers
13H,Registration for large traders
144,Notice of proposed sale
15-12G,Securities registration termination
15-15D,Suspension of reporting obligations
15F-12B,Foreign private issuer equity securities termination
15F-12G,Securities registration termination by foreign private issuer
15F-15D,Foreign private issuer reporting suspension
17HACON,Confidential annual broker-dealer report
17HQCON,Confidential quarterly broker-dealer report
18-12B,Securities registration by foreign governments
18-12G,Securities registration by foreign governments
18-K,Annual report for foreign governments
20-F,Annual report for foreign companies
20FR12B,Foreign private issuer securities registration
20FR12G,Foreign private issuer securities registration
24F-2NT,Rule 24F-2 notice for investment companies
25,Securities delisting
25-NSE,Notice of matured/redeemed/retired securities by exchanges
3,Initial statement of beneficial ownership
305B2,Application for new trustee
4,Statement of changes in beneficial ownership
40-6B,Application by employees' securities company
40-17F1,Custody report for management investment companies
40-17F2,Custody report for management investment companies
40-17G,Fidelity bond filing for investment companies
40-17GCS,Claims and settlements under investment company fidelity bond
40-24B2,Sales literature filing for investment companies
40-33,Investment company shareholder derivative actions
40-8B25,Investment company report or document
40-8F-2,Application for deregistration by investment companies
40-APP,Applications under Investment Company/Advisers Acts
40-F,Annual report (Canadian)
40FR12B,Securities registration by certain Canadian issuers
40FR12G,Securities registration by certain Canadian issuers
40-OIP,Applications under Investment Company/Advisers Acts reviewed by insurance office
424A,Prospectus outlining the details of securities offered by a company
424B1,Initial primary offering
424B2,Primary offering prospectus
424B3,Prospectus supplement
424B4,Prospectus supplement with pricing
424B5,Supplement to primary offering
424B7,Prospectus with material changes
424B8,Final prospectus changes
424H,Preliminary prospectus
425,Prospectus in business combination transactions
424I,Prospectus filed under Rule 424(i)(1)
485APOS,Post effective amendment
485BPOS,Post effective amendment
485BXT,Amendment to designate new effective date
486APOS,Post-effective amendment
486BPOS,Post-effective amendment
486BXT,Amendment to designate new effective date
487,Pre-effective pricing amendment under Rule 487
497,Fund prospectus
497AD,Rule 482 ads filed under Rule 497
497H2,Filings under Rule 497(h)(2)
497J,Certification of no changes to prospectus
497K,Summary fund prospectus
497VPI,Variable contracts summary prospectus
497VPSUB,Substitution-related supplement for variable contracts
497VPU,Updated summary prospectus for variable contracts
5,Annual statement of beneficial ownership changes
6-K,Foreign issuer current report
8-A12B,Registration of securities
8-A12G,Registration of securities
8-K,Current report
8-K12B,Successor issuer registration
8-K12G3,Successor issuer registration
8-K15D5,Successor issuer reporting
ABS-15G,Asset-backed securities report
ABS-EE,Electronic exhibits for asset-backed securities offerings
ANNLRPT,Annual development bank report
APP WD,Withdrawal of exemptive relief application
ARS,Annual report to security holders
ATS-N,Initial Alternative Trading System (ATS) notice
ATS-N/CA,Correcting amendment to ATS notice
ATS-N/MA,Material amendment to ATS notice
ATS-N/OFA,Order display and fair access amendment to ATS notice
ATS-N/UA,Updating amendment to ATS notice
ATS-N-C,Notice of ATS cessation
ATS-N-W,Withdrawal of ATS notice
AW,Withdrawal of Securities Act registration amendment
AW WD,Withdrawal request for registration amendment withdrawal
BULK,Bulk submission
C,Offering statement
C-W,Withdrawal of offering statement
C/A-W,Withdrawal of offering statement amendment
C-U,Progress update
C-U-W,Withdrawal of progress update
C-AR,Annual report
C-AR-W,Withdrawal of annual report
C-AR/A-W,Withdrawal of annual report amendment
C-TR,Termination of reporting
C-TR-W,Withdrawal of termination of reporting
CB,Notice for certain foreign issuer transactions
CERT,Exchange certification of listing approval
CFPORTAL,Funding portal registration application
CFPORTAL-W,Withdrawal of funding portal registration
CORRESP,Correspondence with the SEC
D,Notice of exempt Regulation D offering
DEF 14A,Definitive proxy statement
DEF 14C,Definitive information statement
DEFA14A,Additional definitive proxy materials
DEFA14C,Additional definitive information statement materials
DEFC14A,Definitive proxy statement - contested solicitation
DEFC14C,Definitive information statement - contested solicitation
DEFM14A,Definitive proxy statement for merger/acquisition
DEFM14C,Definitive information statement for merger/acquisition
DEFN14A,Definitive proxy statement by non-management
DEFR14A,Revised definitive proxy materials
DEFR14C,Revised definitive information statement materials
DEL AM,Delaying amendment for registration statement
DFAN14A,Additional proxy materials by non-management
DFRN14A,Revised proxy statement by non-management
DOS,Draft offering statement under Regulation A
DOSLTR,Draft offering statement letter
DRS,Draft registration statement
DRSLTR,Draft registration statement letter
DSTRBRPT,Distribution report for development bank
F-1,Securities registration by foreign private issuers
F-10,Securities registration by certain Canadian issuers
F-10EF,Auto-effective registration by certain Canadian issuers
F-10POS,Amendment to F-10EF registration
F-1MEF,Additional securities registered to prior F-1
F-3,Foreign private securities registration
F-3ASR,Foreign private securities registration
F-3D,Foreign private securities registration
F-3DPOS,Amendment to F-3D registration
F-3MEF,Additional securities registered to prior F-3
F-4,Business combination for foreign issuers
F-4MEF,Additional securities registered to prior F-4
F-6 POS,Amendment to F-6EF registration
F-6,Depositary receipts by foreign private issuers
F-6EF,Depositary receipts by foreign private issuers
F-7 POS,Amended F-7 registration
F-7,Canadian rights offerings
F-8 POS,Amendment to F-8 registration
F-8,Canadian business combination
F-80,Canadian business combination
F-80POS,Amendment to F-80 registration
F-N,Appointment of agent for service by foreign institutions
FWP,Filing of free writing prospectuses
F-X,Appointment of agent for service by foreign issuers
IRANNOTICE,Notice of Iran/Syria disclosures in periodic reports
MA,Municipal advisor registration
MA-I,Natural persons engaged in municipal advisory activities
MA-W,Withdrawal from municipal advisor registration
MODULE,Module submission
N-14 8C,Initial registration statement by closed-end funds for business combinations
N-14,Initial registration statement by open-end funds for business combinations
N-14MEF,Additional securities registered by closed-end funds
N-18F1,Election of terms for future filings
N-1A,Initial registration statement for open-end funds
N-2,Closed-end fund registration
N-2ASR,Closed-end fund automatic registration
N-2 POSASR,Amendment to N-2ASR registration
N-23C-2,Notice of closed-end fund's intention to call or redeem securities
N-23C3A,Closed-end fund periodic repurchase offer notice
N-23C3B,Filing under by closed-end funds
N-23C3C,Filings under and (c) by closed-end funds
N-27D-1,Accounting report for segregated trust accounts
N-2MEF,Additional securities registered to prior N-2
N-3,Initial registration for separate accounts of management companies
N-30B-2,Periodic reports (other than annual/semi-annual) by management companies
N-30D,Annual and semi-annual reports by management companies
N-4,Initial registration for separate accounts of unit trusts
N-5,Registration statement for small business investment companies
N-54A,Election filing by business development companies
N-54C,Withdrawal filing by business development companies
N-6,Registration statement for separate accounts of unit trusts
N-6F,Notice by business development companies electing to be subject to Sections 55-65
N-8A,Initial notification of registration
N-8B-2,Initial registration statement for unit investment trusts
N-8B-3,Initial registration statement for periodic payment plans
N-8B-4,Initial registration statement for face-amount certificate companies
N-8F,Application for deregistration
N-CEN,Annual report for registered investment companies
N-CR,Current report for money market funds
N-CSR,Certified annual shareholder report
N-CSRS,Certified semi-annual shareholder report
N-MFP2/A,Monthly portfolio holdings for money market funds
N-MFP3,Monthly portfolio holdings for money market funds
NPORT-EX,Portfolio holdings exhibit to Form N-PORT
NPORT-NP,Non-public monthly portfolio investments report
NPORT-P,Public monthly portfolio investments report
N-PX,Annual proxy voting record report
N-PX CTR,Confidential treatment request for Form N-PX
N-RN,Current report for registered funds and BDCs
NRSRO-UPD,Registration update by credit rating agencies
NRSRO-CE,Annual certification by credit rating agencies
NRSRO-FR,Annual reports for statistical rating organizations
NRSRO-WCLS,Withdrawal from credit rating class for nationally recognized statistical rating organizations
NRSRO-WREG,Withdrawal from registration as a nationally recognized statistical rating organization
NT 10-K,Late filing of 10-K
NT 10-D,Late filing of 10-D
NT 10-Q,Late filing of 10-Q
NT 11-K,Late filing of 11-K
NT 15D2,Late filing of special report
NT 20-F,Late filing of Form 20-F
NT-NCEN,Late filing of Form N-CEN
NT-NCSR,Late filing of Form N-CSR
N-VP,Notice document for certain variable contracts
N-VPFS,Financial statements for certain variable contracts
POS 8C,Post-effective amendment for closed-end funds
POS AM,Post-effective amendment to a registration statement
POS AMI,Post-effective amendment for investment company filings
POSASR,Post-effective amendment to automatic shelf registration
POS EX,Post-effective amendment adding exhibits
POS462B,Post-effective amendment filed
POS462C,Post-effective amendment filed
PRE 14A,Preliminary proxy statement
PRE 14C,Preliminary information statement
PREC14A,Preliminary proxy statement for contested solicitations
PREC14C,Preliminary information statement for contested solicitations
PREM14A,Preliminary merger proxy statement
PREM14C,Preliminary merger information statement
PREN14A,Preliminary proxy statement filed by non-management
PRER14A,Preliminary revised proxy materials
PRER14C,Preliminary revised information statements
PRRN14A,Revised preliminary proxy statement non-management
PX14A6G,Exempt solicitation
PX14A6N,Exempt solicitation for roll-up transaction
QRTLYRPT,Development banks quarterly report
RW,Registration withdrawal
RW WD,Withdrawal of registration withdrawal
S-1,Securities registration
S-11,Real estate securities registration
S-11MEF,Registration statement for prior Form S-11
S-1MEF,Registration statement for prior Form S-1
S-20,Standardized options registration
S-3,Simplified securities registration
S-3ASR,Automatic shelf registration
S-3D,Dividend reinvestment plans automatic securities registration
S-3DPOS,Post-effective amendment to Form S-3D
S-3MEF,Registration statement filed relating to prior Form S-3
S-4 POS,Post-effective amendment to Form S-4
S-4,Business acquisitions registration
S-4EF,Bank/S&L loan registration
S-4MEF,Registration statement filed relating to prior Form S-4
S-6,Initial registration statement for unit investment trusts
S-8,Employee securities registration
S-8 POS,Post-effective amendment to Form S-8
S-B,Foreign governments securities registration
S-BMEF,Registration statement filed relating to prior Form S-B
SBSE,Security-based swap dealer registration
SBSE-A,Abbreviated application for SEC-registered swap entities also registered with CFTC
SBSE-BD,Application for broker-dealer security-based swap dealers/major participants
SBSE-C,Certifications for security-based swap dealer/major participant registration
SBSE-W,Request to withdraw registration as security-based swap dealer/major participant
SBSE-DISPUTE NOTICE,Notice of valuation dispute by a security-based swap entity
SBSE-CCO-RPT,Annual compliance report for security-based swap dealers
SC 13D,Ownership for control disclosure
SCHEDULE 13D,Disclosure of beneficial ownership over 5% (XML)
SC 13E1,Issuer statement for going private transactions
SC 13E3,Schedule for going private transactions
SC 13G,Beneficial ownership
SCHEDULE 13G,Beneficial ownership by passive investors/institutions
SC 14D9,Solicitation/recommendation statement for third-party tender offers
SC 14F1,Statement for changes to majority of directors
SC 14N,Information by nominating shareholders
SC 14N-S,Solicitation relating to Rule 14a-11 nominating groups
SC TO-C,Written communication relating to tender offers
SC TO-I,Tender offer by issuer
SC TO-T,Tender offer by third party
SC13E4F,Foreign issuer tender
SC14D1F,3rd party tender offer by foreign issuer
SC14D9C,Subject company communication relating to third-party tender offer
SC14D9F,Solicitation/recommendation statement by foreign issuers for third-party tender offers
SD,Specialized disclosure report on conflict minerals or resource extraction payments
SDR,Registration for security-based swap data repositories
SDR-CCO,Compliance and financial reports for security-based swap data repositories
SDR-W,Withdrawal from registration as security-based swap data repository
SF-1,Asset-backed securities registration
SF-1MEF,Registration statement filed relating to prior Form SF-1
SF-3,Asset-backed securities shelf offerings
SF-3MEF,Registration statement filed relating to prior Form SF-3
SH-ER,Weekly entries report by institutional investment managers
SH-NT,Weekly notice report by institutional investment managers
SP 15D2,Special financial report
SPDSCL,Specialized disclosure filing
SUPPL,Supplemental material filed by foreign private issuers
T-3,Initial application for trust indenture qualification
T-6,Application for foreign entity to act as institutional trustee
TA-1,Initial application for transfer agent registration
TA-2,Annual report by registered transfer agents
TA-W,Notice of withdrawal from transfer agent registration
UPLOAD,Submission of documents
UNDER,Initial undertaking to file reports
X-17A-5,Reports required of brokers and dealers
1 Form Description
2 1-A POS Reg A Offering Amendment
3 1-A-W Reg A Offering Withdrawal
4 1-E Notification filing for small business investment companies
5 1-E AD Sales material for small business investment companies
6 1-K Annual report for Regulation A issuers
7 1-SA Semiannual report for Regulation A issuers
8 1-U Current report for Regulation A issuers
9 1-Z Exit report for terminated Regulation A offerings
10 1-Z-W Withdrawal of Regulation A exit report
11 2-E Report of securities sales
12 10-12B Registration of a class of securities)
13 10-12G Registration of a class of securities
14 10-D Periodic distribution reports for asset-backed securities
15 10-K Annual report for public companies
16 10-KT Transition report with change in fiscal year
17 10-Q Quarterly report for public companies
18 10-QT Quarterly transition report with change in fiscal year
19 11-K Annual report for employee stock plans
20 11-KT Transition report for employee stock plans
21 13F-CTR Confidential treatment request by institutional managers
22 13F-HR Initial quarterly holdings report by institutional managers
23 13F-NT Initial quarterly notice by institutional managers
24 13H Registration for large traders
25 144 Notice of proposed sale
26 15-12G Securities registration termination
27 15-15D Suspension of reporting obligations
28 15F-12B Foreign private issuer equity securities termination
29 15F-12G Securities registration termination by foreign private issuer
30 15F-15D Foreign private issuer reporting suspension
31 17HACON Confidential annual broker-dealer report
32 17HQCON Confidential quarterly broker-dealer report
33 18-12B Securities registration by foreign governments
34 18-12G Securities registration by foreign governments
35 18-K Annual report for foreign governments
36 20-F Annual report for foreign companies
37 20FR12B Foreign private issuer securities registration
38 20FR12G Foreign private issuer securities registration
39 24F-2NT Rule 24F-2 notice for investment companies
40 25 Securities delisting
41 25-NSE Notice of matured/redeemed/retired securities by exchanges
42 3 Initial statement of beneficial ownership
43 305B2 Application for new trustee
44 4 Statement of changes in beneficial ownership
45 40-6B Application by employees' securities company
46 40-17F1 Custody report for management investment companies
47 40-17F2 Custody report for management investment companies
48 40-17G Fidelity bond filing for investment companies
49 40-17GCS Claims and settlements under investment company fidelity bond
50 40-24B2 Sales literature filing for investment companies
51 40-33 Investment company shareholder derivative actions
52 40-8B25 Investment company report or document
53 40-8F-2 Application for deregistration by investment companies
54 40-APP Applications under Investment Company/Advisers Acts
55 40-F Annual report (Canadian)
56 40FR12B Securities registration by certain Canadian issuers
57 40FR12G Securities registration by certain Canadian issuers
58 40-OIP Applications under Investment Company/Advisers Acts reviewed by insurance office
59 424A Prospectus outlining the details of securities offered by a company
60 424B1 Initial primary offering
61 424B2 Primary offering prospectus
62 424B3 Prospectus supplement
63 424B4 Prospectus supplement with pricing
64 424B5 Supplement to primary offering
65 424B7 Prospectus with material changes
66 424B8 Final prospectus changes
67 424H Preliminary prospectus
68 425 Prospectus in business combination transactions
69 424I Prospectus filed under Rule 424(i)(1)
70 485APOS Post effective amendment
71 485BPOS Post effective amendment
72 485BXT Amendment to designate new effective date
73 486APOS Post-effective amendment
74 486BPOS Post-effective amendment
75 486BXT Amendment to designate new effective date
76 487 Pre-effective pricing amendment under Rule 487
77 497 Fund prospectus
78 497AD Rule 482 ads filed under Rule 497
79 497H2 Filings under Rule 497(h)(2)
80 497J Certification of no changes to prospectus
81 497K Summary fund prospectus
82 497VPI Variable contracts summary prospectus
83 497VPSUB Substitution-related supplement for variable contracts
84 497VPU Updated summary prospectus for variable contracts
85 5 Annual statement of beneficial ownership changes
86 6-K Foreign issuer current report
87 8-A12B Registration of securities
88 8-A12G Registration of securities
89 8-K Current report
90 8-K12B Successor issuer registration
91 8-K12G3 Successor issuer registration
92 8-K15D5 Successor issuer reporting
93 ABS-15G Asset-backed securities report
94 ABS-EE Electronic exhibits for asset-backed securities offerings
95 ANNLRPT Annual development bank report
96 APP WD Withdrawal of exemptive relief application
97 ARS Annual report to security holders
98 ATS-N Initial Alternative Trading System (ATS) notice
99 ATS-N/CA Correcting amendment to ATS notice
100 ATS-N/MA Material amendment to ATS notice
101 ATS-N/OFA Order display and fair access amendment to ATS notice
102 ATS-N/UA Updating amendment to ATS notice
103 ATS-N-C Notice of ATS cessation
104 ATS-N-W Withdrawal of ATS notice
105 AW Withdrawal of Securities Act registration amendment
106 AW WD Withdrawal request for registration amendment withdrawal
107 BULK Bulk submission
108 C Offering statement
109 C-W Withdrawal of offering statement
110 C/A-W Withdrawal of offering statement amendment
111 C-U Progress update
112 C-U-W Withdrawal of progress update
113 C-AR Annual report
114 C-AR-W Withdrawal of annual report
115 C-AR/A-W Withdrawal of annual report amendment
116 C-TR Termination of reporting
117 C-TR-W Withdrawal of termination of reporting
118 CB Notice for certain foreign issuer transactions
119 CERT Exchange certification of listing approval
120 CFPORTAL Funding portal registration application
121 CFPORTAL-W Withdrawal of funding portal registration
122 CORRESP Correspondence with the SEC
123 D Notice of exempt Regulation D offering
124 DEF 14A Definitive proxy statement
125 DEF 14C Definitive information statement
126 DEFA14A Additional definitive proxy materials
127 DEFA14C Additional definitive information statement materials
128 DEFC14A Definitive proxy statement - contested solicitation
129 DEFC14C Definitive information statement - contested solicitation
130 DEFM14A Definitive proxy statement for merger/acquisition
131 DEFM14C Definitive information statement for merger/acquisition
132 DEFN14A Definitive proxy statement by non-management
133 DEFR14A Revised definitive proxy materials
134 DEFR14C Revised definitive information statement materials
135 DEL AM Delaying amendment for registration statement
136 DFAN14A Additional proxy materials by non-management
137 DFRN14A Revised proxy statement by non-management
138 DOS Draft offering statement under Regulation A
139 DOSLTR Draft offering statement letter
140 DRS Draft registration statement
141 DRSLTR Draft registration statement letter
142 DSTRBRPT Distribution report for development bank
143 F-1 Securities registration by foreign private issuers
144 F-10 Securities registration by certain Canadian issuers
145 F-10EF Auto-effective registration by certain Canadian issuers
146 F-10POS Amendment to F-10EF registration
147 F-1MEF Additional securities registered to prior F-1
148 F-3 Foreign private securities registration
149 F-3ASR Foreign private securities registration
150 F-3D Foreign private securities registration
151 F-3DPOS Amendment to F-3D registration
152 F-3MEF Additional securities registered to prior F-3
153 F-4 Business combination for foreign issuers
154 F-4MEF Additional securities registered to prior F-4
155 F-6 POS Amendment to F-6EF registration
156 F-6 Depositary receipts by foreign private issuers
157 F-6EF Depositary receipts by foreign private issuers
158 F-7 POS Amended F-7 registration
159 F-7 Canadian rights offerings
160 F-8 POS Amendment to F-8 registration
161 F-8 Canadian business combination
162 F-80 Canadian business combination
163 F-80POS Amendment to F-80 registration
164 F-N Appointment of agent for service by foreign institutions
165 FWP Filing of free writing prospectuses
166 F-X Appointment of agent for service by foreign issuers
167 IRANNOTICE Notice of Iran/Syria disclosures in periodic reports
168 MA Municipal advisor registration
169 MA-I Natural persons engaged in municipal advisory activities
170 MA-W Withdrawal from municipal advisor registration
171 MODULE Module submission
172 N-14 8C Initial registration statement by closed-end funds for business combinations
173 N-14 Initial registration statement by open-end funds for business combinations
174 N-14MEF Additional securities registered by closed-end funds
175 N-18F1 Election of terms for future filings
176 N-1A Initial registration statement for open-end funds
177 N-2 Closed-end fund registration
178 N-2ASR Closed-end fund automatic registration
179 N-2 POSASR Amendment to N-2ASR registration
180 N-23C-2 Notice of closed-end fund's intention to call or redeem securities
181 N-23C3A Closed-end fund periodic repurchase offer notice
182 N-23C3B Filing under by closed-end funds
183 N-23C3C Filings under and (c) by closed-end funds
184 N-27D-1 Accounting report for segregated trust accounts
185 N-2MEF Additional securities registered to prior N-2
186 N-3 Initial registration for separate accounts of management companies
187 N-30B-2 Periodic reports (other than annual/semi-annual) by management companies
188 N-30D Annual and semi-annual reports by management companies
189 N-4 Initial registration for separate accounts of unit trusts
190 N-5 Registration statement for small business investment companies
191 N-54A Election filing by business development companies
192 N-54C Withdrawal filing by business development companies
193 N-6 Registration statement for separate accounts of unit trusts
194 N-6F Notice by business development companies electing to be subject to Sections 55-65
195 N-8A Initial notification of registration
196 N-8B-2 Initial registration statement for unit investment trusts
197 N-8B-3 Initial registration statement for periodic payment plans
198 N-8B-4 Initial registration statement for face-amount certificate companies
199 N-8F Application for deregistration
200 N-CEN Annual report for registered investment companies
201 N-CR Current report for money market funds
202 N-CSR Certified annual shareholder report
203 N-CSRS Certified semi-annual shareholder report
204 N-MFP2/A Monthly portfolio holdings for money market funds
205 N-MFP3 Monthly portfolio holdings for money market funds
206 NPORT-EX Portfolio holdings exhibit to Form N-PORT
207 NPORT-NP Non-public monthly portfolio investments report
208 NPORT-P Public monthly portfolio investments report
209 N-PX Annual proxy voting record report
210 N-PX CTR Confidential treatment request for Form N-PX
211 N-RN Current report for registered funds and BDCs
212 NRSRO-UPD Registration update by credit rating agencies
213 NRSRO-CE Annual certification by credit rating agencies
214 NRSRO-FR Annual reports for statistical rating organizations
215 NRSRO-WCLS Withdrawal from credit rating class for nationally recognized statistical rating organizations
216 NRSRO-WREG Withdrawal from registration as a nationally recognized statistical rating organization
217 NT 10-K Late filing of 10-K
218 NT 10-D Late filing of 10-D
219 NT 10-Q Late filing of 10-Q
220 NT 11-K Late filing of 11-K
221 NT 15D2 Late filing of special report
222 NT 20-F Late filing of Form 20-F
223 NT-NCEN Late filing of Form N-CEN
224 NT-NCSR Late filing of Form N-CSR
225 N-VP Notice document for certain variable contracts
226 N-VPFS Financial statements for certain variable contracts
227 POS 8C Post-effective amendment for closed-end funds
228 POS AM Post-effective amendment to a registration statement
229 POS AMI Post-effective amendment for investment company filings
230 POSASR Post-effective amendment to automatic shelf registration
231 POS EX Post-effective amendment adding exhibits
232 POS462B Post-effective amendment filed
233 POS462C Post-effective amendment filed
234 PRE 14A Preliminary proxy statement
235 PRE 14C Preliminary information statement
236 PREC14A Preliminary proxy statement for contested solicitations
237 PREC14C Preliminary information statement for contested solicitations
238 PREM14A Preliminary merger proxy statement
239 PREM14C Preliminary merger information statement
240 PREN14A Preliminary proxy statement filed by non-management
241 PRER14A Preliminary revised proxy materials
242 PRER14C Preliminary revised information statements
243 PRRN14A Revised preliminary proxy statement non-management
244 PX14A6G Exempt solicitation
245 PX14A6N Exempt solicitation for roll-up transaction
246 QRTLYRPT Development banks quarterly report
247 RW Registration withdrawal
248 RW WD Withdrawal of registration withdrawal
249 S-1 Securities registration
250 S-11 Real estate securities registration
251 S-11MEF Registration statement for prior Form S-11
252 S-1MEF Registration statement for prior Form S-1
253 S-20 Standardized options registration
254 S-3 Simplified securities registration
255 S-3ASR Automatic shelf registration
256 S-3D Dividend reinvestment plans automatic securities registration
257 S-3DPOS Post-effective amendment to Form S-3D
258 S-3MEF Registration statement filed relating to prior Form S-3
259 S-4 POS Post-effective amendment to Form S-4
260 S-4 Business acquisitions registration
261 S-4EF Bank/S&L loan registration
262 S-4MEF Registration statement filed relating to prior Form S-4
263 S-6 Initial registration statement for unit investment trusts
264 S-8 Employee securities registration
265 S-8 POS Post-effective amendment to Form S-8
266 S-B Foreign governments securities registration
267 S-BMEF Registration statement filed relating to prior Form S-B
268 SBSE Security-based swap dealer registration
269 SBSE-A Abbreviated application for SEC-registered swap entities also registered with CFTC
270 SBSE-BD Application for broker-dealer security-based swap dealers/major participants
271 SBSE-C Certifications for security-based swap dealer/major participant registration
272 SBSE-W Request to withdraw registration as security-based swap dealer/major participant
273 SBSE-DISPUTE NOTICE Notice of valuation dispute by a security-based swap entity
274 SBSE-CCO-RPT Annual compliance report for security-based swap dealers
275 SC 13D Ownership for control disclosure
276 SCHEDULE 13D Disclosure of beneficial ownership over 5% (XML)
277 SC 13E1 Issuer statement for going private transactions
278 SC 13E3 Schedule for going private transactions
279 SC 13G Beneficial ownership
280 SCHEDULE 13G Beneficial ownership by passive investors/institutions
281 SC 14D9 Solicitation/recommendation statement for third-party tender offers
282 SC 14F1 Statement for changes to majority of directors
283 SC 14N Information by nominating shareholders
284 SC 14N-S Solicitation relating to Rule 14a-11 nominating groups
285 SC TO-C Written communication relating to tender offers
286 SC TO-I Tender offer by issuer
287 SC TO-T Tender offer by third party
288 SC13E4F Foreign issuer tender
289 SC14D1F 3rd party tender offer by foreign issuer
290 SC14D9C Subject company communication relating to third-party tender offer
291 SC14D9F Solicitation/recommendation statement by foreign issuers for third-party tender offers
292 SD Specialized disclosure report on conflict minerals or resource extraction payments
293 SDR Registration for security-based swap data repositories
294 SDR-CCO Compliance and financial reports for security-based swap data repositories
295 SDR-W Withdrawal from registration as security-based swap data repository
296 SF-1 Asset-backed securities registration
297 SF-1MEF Registration statement filed relating to prior Form SF-1
298 SF-3 Asset-backed securities shelf offerings
299 SF-3MEF Registration statement filed relating to prior Form SF-3
300 SH-ER Weekly entries report by institutional investment managers
301 SH-NT Weekly notice report by institutional investment managers
302 SP 15D2 Special financial report
303 SPDSCL Specialized disclosure filing
304 SUPPL Supplemental material filed by foreign private issuers
305 T-3 Initial application for trust indenture qualification
306 T-6 Application for foreign entity to act as institutional trustee
307 TA-1 Initial application for transfer agent registration
308 TA-2 Annual report by registered transfer agents
309 TA-W Notice of withdrawal from transfer agent registration
310 UPLOAD Submission of documents
311 UNDER Initial undertaking to file reports
312 X-17A-5 Reports required of brokers and dealers

View File

@@ -0,0 +1,7 @@
from edgar.httprequests import download_file
dera_data_url = 'https://www.sec.gov/dera/data'
financial_statement_datasets='financial-statement-data-sets'
if __name__ == '__main__':
download_file('https://www.sec.gov/files/dera/data/financial-statement-data-sets/2024q1.zip')

View File

@@ -0,0 +1,33 @@
from functools import lru_cache
from edgar.reference.data.common import read_csv_from_package
sec_form_data = read_csv_from_package('secforms.csv')
@lru_cache(maxsize=64)
def describe_form(form: str,
prepend_form: bool = True) -> str:
"""
Get the description of a form from the form descriptions file.
"""
is_amendment = False
if form.endswith("/A"):
form = form[:-2]
is_amendment = True
form = form.upper()
description = sec_form_data.loc[sec_form_data.Form == form]
if len(description) == 0:
return f"Form {form}"
else:
description = description.Description.iloc[0]
if prepend_form:
return f"Form {form}{' Amendment' if is_amendment else ''}: {description}"
else:
return description
PROSPECTUSES = ["S-1", "S-3", "S-4", "S-8", "S-11", "F-1", "F-3", "F-4", "F-6", "F-10", "424B1",
"424B2", "424B3", "424B4", "424B5", "424B7", "424B8", "485BPOS", "486BPOS", "497", "N-2", "N-14",
"POS AM", "POSASR", "POS EX", "10", "20-F", "8-A", "SF-1", "SF-3"
]

View File

@@ -0,0 +1,475 @@
import json
import os
import re
from enum import Enum
from functools import lru_cache
from io import StringIO
from typing import Any, Dict, List, Optional, Union
import pandas as pd
import pyarrow as pa
from httpx import HTTPStatusError
from edgar.core import get_edgar_data_directory, listify, log
from edgar.httprequests import download_file, download_json
from edgar.reference.data.common import read_csv_from_package, read_parquet_from_package
__all__ = ['cusip_ticker_mapping', 'get_ticker_from_cusip', 'get_company_tickers', 'get_icon_from_ticker', 'find_cik',
'get_cik_tickers', 'get_company_ticker_name_exchange', 'get_companies_by_exchange', 'popular_us_stocks',
'get_mutual_fund_tickers', 'find_mutual_fund_cik', 'list_all_tickers', 'find_ticker', 'find_ticker_safe', 'get_cik_ticker_lookup',
'get_company_cik_lookup', 'get_cik_tickers_from_ticker_txt', 'get_cik_tickers', 'get_company_tickers',
'ticker_txt_url', 'company_tickers_json_url', 'mutual_fund_tickers_url', 'company_tickers_exchange_url',
'Exchange'
]
ticker_txt_url = "https://www.sec.gov/include/ticker.txt"
company_tickers_json_url = "https://www.sec.gov/files/company_tickers.json"
mutual_fund_tickers_url = "https://www.sec.gov/files/company_tickers_mf.json"
company_tickers_exchange_url = "https://www.sec.gov/files/company_tickers_exchange.json"
@lru_cache(maxsize=1)
def cusip_ticker_mapping(allow_duplicate_cusips: bool = True) -> pd.DataFrame:
"""
Download the Cusip to Ticker mapping data from the SEC website.
This provides a Dataframe with Cusip as the index and Ticker as the column.
CUSIP can be duplicate to get non duplicate Cusips set allow_duplicate_cusips to False.
This will return only the first occurrence of the Cusip.
The first occurrence of the Cusip will also be most likely to be mapped to a Ticker that is linked to a cik
"""
df = read_parquet_from_package('ct.pq').set_index('Cusip')
if not allow_duplicate_cusips:
df = df[~df.index.duplicated(keep='first')]
return df
def load_tickers_from_local() -> Optional[Dict[str, Any]]:
"""
Load tickers from local data
"""
reference_dir = get_edgar_data_directory() / "reference"
if not reference_dir.exists():
return None
company_tickers_file = reference_dir / os.path.basename(company_tickers_json_url)
if not company_tickers_file.exists():
return None
return json.loads(company_tickers_file.read_text())
@lru_cache(maxsize=1)
def get_company_tickers(
as_dataframe: bool = True,
clean_name: bool = True,
clean_suffix: bool = False
) -> Union[pd.DataFrame, pa.Table]:
"""
Fetch and process company ticker data from SEC.
Args:
as_dataframe (bool): If True, returns pandas DataFrame; if False, returns pyarrow Table
clean_name (bool): If True, cleans company names
clean_suffix (bool): If True, removes common company suffixes
Returns:
Union[pd.DataFrame, pa.Table]: Processed company data
"""
# Pre-define schema for better performance
SCHEMA = pa.schema([
('cik', pa.int64()),
('ticker', pa.string()),
('company', pa.string())
])
try:
if os.getenv("EDGAR_USE_LOCAL_DATA"):
tickers_json = load_tickers_from_local()
if not tickers_json:
tickers_json = download_json(company_tickers_json_url)
else:
# Download JSON data
tickers_json = download_json(company_tickers_json_url)
# Pre-allocate lists for better memory efficiency
ciks = []
tickers = []
companies = []
# Process JSON data
for item in tickers_json.values():
company_name = item['title']
# Apply name cleaning if requested
if clean_name or clean_suffix:
if clean_name:
company_name = clean_company_name(company_name)
if clean_suffix:
company_name = clean_company_suffix(company_name)
# Append to respective lists
ciks.append(int(item['cik_str']))
tickers.append(item['ticker'])
companies.append(company_name)
if as_dataframe:
# Create DataFrame directly from lists
return pd.DataFrame({
'cik': ciks,
'ticker': tickers,
'company': companies
})
# Create pyarrow arrays
cik_array = pa.array(ciks, type=pa.int64())
ticker_array = pa.array(tickers, type=pa.string())
company_array = pa.array(companies, type=pa.string())
# Create and return pyarrow Table
return pa.Table.from_arrays(
[cik_array, ticker_array, company_array],
schema=SCHEMA
)
except Exception as e:
log.error(f"Error fetching company tickers from [{company_tickers_json_url}]: {str(e)}")
raise
def load_cik_tickers_from_local() -> Optional[str]:
"""
Load tickers.txt from local data
"""
reference_dir = get_edgar_data_directory() / "reference"
if not reference_dir.exists():
return None
tickers_txt_file = reference_dir / os.path.basename(ticker_txt_url)
if not tickers_txt_file.exists():
return None
return tickers_txt_file.read_text()
def get_cik_tickers_from_ticker_txt():
"""Get CIK and ticker data from ticker.txt file"""
try:
if os.getenv("EDGAR_USE_LOCAL_DATA"):
ticker_txt = load_cik_tickers_from_local()
if not ticker_txt:
ticker_txt = download_file(ticker_txt_url, as_text=True)
else:
ticker_txt = download_file(ticker_txt_url, as_text=True)
source = StringIO(ticker_txt)
data = pd.read_csv(source,
sep='\t',
header=None,
names=['ticker', 'cik']).dropna()
data['ticker'] = data['ticker'].str.upper()
return data
except Exception as e:
log.error(f"Error fetching company tickers from [{ticker_txt_url}]: {str(e)}")
return None
@lru_cache(maxsize=1)
def get_cik_tickers():
"""Merge unique records from both sources"""
txt_data = get_cik_tickers_from_ticker_txt()
try:
json_data = get_company_tickers(clean_name=False, clean_suffix=False)[['ticker', 'cik']]
except Exception:
json_data = None
if txt_data is None and json_data is None:
raise Exception("Both data sources are unavailable")
if txt_data is None:
return json_data
if json_data is None:
return txt_data
# Merge both dataframes and keep unique records
merged_data = pd.concat([txt_data, json_data], ignore_index=True)
merged_data = merged_data.drop_duplicates(subset=['ticker', 'cik'])
return merged_data
@lru_cache(maxsize=None)
def list_all_tickers():
"""List all tickers from the merged data"""
return get_cik_tickers()['ticker'].tolist()
@lru_cache(maxsize=None)
def get_company_cik_lookup():
df = get_cik_tickers()
lookup = {}
for ticker, cik in zip(df['ticker'], df['cik'], strict=False):
# Add original ticker
lookup[ticker] = cik
# Add base ticker (part before '-')
base_ticker = ticker.split('-')[0]
if base_ticker not in lookup:
lookup[base_ticker] = cik
return lookup
@lru_cache(maxsize=None)
def get_cik_ticker_lookup():
"""Create a mapping of CIK to base ticker symbols.
For CIKs with multiple tickers, uses the shortest ticker (usually the base symbol).
"""
company_lookup = get_company_cik_lookup()
cik_to_tickers = {}
for ticker, cik in company_lookup.items():
# Prefer the base ticker (without share class)
base_ticker = ticker.split('-')[0]
if cik not in cik_to_tickers or len(base_ticker) < len(cik_to_tickers[cik]):
cik_to_tickers[cik] = base_ticker
return cik_to_tickers
@lru_cache(maxsize=128)
def find_ticker(cik: Union[int, str]) -> str:
"""Find the ticker symbol for a given CIK.
Returns empty string if no ticker is found.
Args:
cik: Central Index Key (CIK) as integer or string
Returns:
str: Ticker symbol or empty string if not found
"""
try:
# Ensure cik is an integer
cik = int(str(cik).lstrip('0'))
return get_cik_ticker_lookup().get(cik, "")
except (ValueError, TypeError):
return ""
def find_ticker_safe(cik: Union[int, str]) -> Optional[str]:
"""Find the ticker symbol for a given CIK without making network calls.
Returns None if data is not already cached and would require a network call.
Returns empty string if CIK is found but has no ticker.
This function is designed for use cases where network calls should be avoided,
such as in rich display methods that should be fast and not block on I/O.
Args:
cik: Central Index Key (CIK) as integer or string
Returns:
Optional[str]: Ticker symbol, empty string if no ticker found, or None if network call would be required
"""
try:
# Simple approach: check if all required cache functions have data
# Only proceed if all the underlying data is already cached
if (get_cik_ticker_lookup.cache_info().currsize > 0 and
get_company_cik_lookup.cache_info().currsize > 0 and
get_cik_tickers.cache_info().currsize > 0):
# If we have cached data, try to use it
cik = int(str(cik).lstrip('0'))
# This should be fast since data is cached
lookup_dict = get_cik_ticker_lookup()
return lookup_dict.get(cik, "")
else:
# Not all required data is cached, return None to avoid network calls
return None
except Exception:
# Any error (including potential network errors) returns None
# This ensures we never trigger network calls
return None
@lru_cache(maxsize=None)
def get_company_ticker_name_exchange():
"""
Return a DataFrame with columns [cik name ticker exchange]
"""
data = download_json("https://www.sec.gov/files/company_tickers_exchange.json")
return pd.DataFrame(data['data'], columns=data['fields'])
def get_companies_by_exchange(exchange: Union[List[str], str]):
"""
Get companies listed on a specific exchange.
:param exchange: String, like 'Nasdaq' or 'NYSE'
:return: DataFrame with companies listed on the specified exchange
with columns [cik name ticker exchange]
"""
df = get_company_ticker_name_exchange()
exchanges = [ex.lower() for ex in listify(exchange)]
return df[df['exchange'].str.lower().isin(exchanges)].reset_index(drop=True)
@lru_cache(maxsize=None)
def get_mutual_fund_tickers():
"""
Get mutual fund tickers.
This returns a dataframe with columns
cik seriesId classId ticker
"""
data = download_json("https://www.sec.gov/files/company_tickers_mf.json")
return pd.DataFrame(data['data'], columns=['cik', 'seriesId', 'classId', 'ticker'])
@lru_cache(maxsize=None)
def get_mutual_fund_lookup():
df = get_mutual_fund_tickers()
return dict(zip(df['ticker'], df['cik'], strict=False))
def find_mutual_fund_cik(ticker):
"""
Find the CIK for a given mutual fund or ETF ticker.
:param ticker: String, the ticker symbol to look up
:return: Integer, the CIK for the given ticker, or None if not found
"""
lookup = get_mutual_fund_lookup()
return lookup.get(ticker.upper())
def find_company_cik(ticker):
lookup = get_company_cik_lookup()
ticker = ticker.upper().replace('.', '-')
return lookup.get(ticker)
def find_company_ticker(cik: Union[int, str]) -> Union[str, List[str], None]:
"""
Find the ticker for a given CIK.
:param cik (int or str): The CIK to look up
:return Union[str, List[str]]: A single ticker string if only one ticker is found,
a list of ticker strings if multiple tickers are found,
or an empty list if no tickers are found.
"""
try:
# Ensure cik is a string without leading zeros, then convert to int
cik = str(cik).lstrip('0')
cik = int(cik)
except (ValueError, TypeError):
return None
# Get DataFrame of CIK-Ticker mappings
df = get_cik_tickers()
# Ensure 'cik' and 'ticker' columns exist
if 'cik' not in df.columns or 'ticker' not in df.columns:
return None
# Filter DataFrame for the given CIK
ticker_series = df[df['cik'] == cik]['ticker']
# If no tickers found, return None
if ticker_series.empty:
return None
# Filter out None values from tickers
tickers = [ticker for ticker in ticker_series.to_numpy() if ticker is not None]
# Return a single ticker if only one found
if len(tickers) == 1:
return tickers[0]
return tickers
def find_cik(ticker):
"""
Find the CIK for a given ticker, checking both company and mutual fund/ETF data.
:param ticker: String, the ticker symbol to look up
:return: Integer, the CIK for the given ticker, or None if not found
"""
# First, check company CIKs
cik = find_company_cik(ticker)
if cik is not None:
return cik
# If not found, check mutual fund/ETF CIKs
return find_mutual_fund_cik(ticker)
@lru_cache(maxsize=128)
def get_ticker_from_cusip(cusip: str):
"""
Get the ticker symbol for a given Cusip.
"""
data = cusip_ticker_mapping()
results = data.loc[cusip]
if len(results) == 1:
return results.iloc[0]
elif len(results) > 1:
return results.iloc[0].Ticker
def clean_company_name(name: str) -> str:
# Regular expression to match unwanted patterns at the end of the company name
cleaned_name = re.sub(r'[/\\][A-Z]+[/\\]?$', '', name)
return cleaned_name.strip()
def clean_company_suffix(name: str) -> str:
"""Remove common suffixes from the company name, taking care of special cases."""
# Remove trailing slashes
name = name.rstrip('/')
# Handle cases like "JPMORGAN CHASE & CO" or "ELI LILLY & Co"
name = re.sub(r'\s*&\s*CO\b\.?', '', name, flags=re.IGNORECASE).strip()
# Remove other common suffixes, including "PLC", "LTD", "LIMITED", and combinations like "LTD CO"
name = re.sub(r'\b(?:Inc\.?|CO|CORP|PLC|LTD|LIMITED|L\.P\.)\b\.?$', '', name, flags=re.IGNORECASE).strip()
return name
def get_ticker_icon_url(ticker: str) -> str:
"""
Get the URL for the icon of a company with the given ticker.
"""
return f"https://raw.githubusercontent.com/nvstly/icons/main/ticker_icons/{ticker.upper()}.png"
@lru_cache(maxsize=4)
def get_icon_from_ticker(ticker: str) -> Optional[bytes]:
"""
Download an icon for a given ticker as a PNG image, if available.
WARNING: This function uses the nvstly/icons repository on GitHub to fetch the icons.
The icons are not guaranteed to be available for all tickers.
"""
if not isinstance(ticker, str):
raise ValueError("The ticker must be a valid string.")
if not ticker.isalpha():
raise ValueError("The ticker must only contain alphabetic characters.")
try:
downloaded = download_file(
f"https://raw.githubusercontent.com/nvstly/icons/main/ticker_icons/{ticker.upper()}.png", as_text=False)
return downloaded
except HTTPStatusError as e:
# If the status code is 404, the icon is not available
if e.response.status_code == 404:
return None
else:
raise
def popular_us_stocks():
df = (read_csv_from_package('popular_us_stocks.csv', dtype={'Cik': int})
.set_index('Cik')
)
return df
class Exchange(Enum):
Nasdaq = "Nasdaq"
NYSE = "NYSE"
OTC = "OTC"
CBOE = "CBOE"
def __str__(self):
return self.value