Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,41 @@
import sys
from functools import lru_cache
import pandas as pd
import pyarrow.parquet as pq
# Dynamic import based on Python version
if sys.version_info >= (3, 9):
from importlib import resources
else:
import importlib_resources as resources
__all__ = ['read_parquet_from_package', 'read_pyarrow_from_package', 'read_csv_from_package']
@lru_cache(maxsize=1)
def read_parquet_from_package(parquet_filename: str):
package_name = 'edgar.reference.data'
with resources.path(package_name, parquet_filename) as parquet_path:
df = pd.read_parquet(parquet_path)
return df
def read_pyarrow_from_package(parquet_filename: str):
package_name = 'edgar.reference.data'
with resources.path(package_name, parquet_filename) as parquet_path:
# Read a pyarrow table from a parquet file
table = pq.read_table(parquet_path)
return table
def read_csv_from_package(csv_filename: str, **pandas_kwargs):
package_name = 'edgar.reference.data'
with resources.path(package_name, csv_filename) as csv_path:
df = pd.read_csv(csv_path, **pandas_kwargs)
return df