42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
import sys
|
|
from functools import lru_cache
|
|
|
|
import pandas as pd
|
|
import pyarrow.parquet as pq
|
|
|
|
# Dynamic import based on Python version
|
|
if sys.version_info >= (3, 9):
|
|
from importlib import resources
|
|
else:
|
|
import importlib_resources as resources
|
|
|
|
__all__ = ['read_parquet_from_package', 'read_pyarrow_from_package', 'read_csv_from_package']
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def read_parquet_from_package(parquet_filename: str):
|
|
package_name = 'edgar.reference.data'
|
|
|
|
with resources.path(package_name, parquet_filename) as parquet_path:
|
|
df = pd.read_parquet(parquet_path)
|
|
|
|
return df
|
|
|
|
|
|
def read_pyarrow_from_package(parquet_filename: str):
|
|
package_name = 'edgar.reference.data'
|
|
|
|
with resources.path(package_name, parquet_filename) as parquet_path:
|
|
# Read a pyarrow table from a parquet file
|
|
table = pq.read_table(parquet_path)
|
|
return table
|
|
|
|
|
|
def read_csv_from_package(csv_filename: str, **pandas_kwargs):
|
|
package_name = 'edgar.reference.data'
|
|
|
|
with resources.path(package_name, csv_filename) as csv_path:
|
|
df = pd.read_csv(csv_path, **pandas_kwargs)
|
|
|
|
return df
|