Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,155 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from libc.stdint cimport *
from libcpp cimport bool as c_bool, nullptr
from libcpp.functional cimport function
from libcpp.memory cimport (shared_ptr, unique_ptr, make_shared,
static_pointer_cast, dynamic_pointer_cast)
from libcpp.optional cimport nullopt, optional
from libcpp.string cimport string as c_string
from libcpp.utility cimport move, pair
from libcpp.vector cimport vector
from libcpp.unordered_map cimport unordered_map
from libcpp.unordered_set cimport unordered_set
from cpython cimport PyObject
from cpython.datetime cimport PyDateTime_DateTime
cimport cpython
cdef extern from "<string_view>" namespace "std" nogil:
# Needed until https://github.com/cython/cython/issues/6651 is fixed
cdef cppclass cpp_string_view "std::string_view":
string_view()
string_view(const char*)
string_view(c_string&)
size_t size()
bint empty()
const char* data()
cdef extern from * namespace "arrow::py" nogil:
"""
#include <memory>
#include <string>
#include <string_view>
#include <utility>
namespace arrow {
namespace py {
template <typename T>
std::shared_ptr<T> to_shared(std::unique_ptr<T>& t) {
return std::move(t);
}
template <typename T>
std::shared_ptr<T> to_shared(std::unique_ptr<T>&& t) {
return std::move(t);
}
// Needed until https://github.com/cython/cython/issues/6651 is fixed
inline std::string to_string(std::string_view s) {
return std::string(s);
}
} // namespace py
} // namespace arrow
"""
cdef shared_ptr[T] to_shared" arrow::py::to_shared"[T](unique_ptr[T])
cdef c_string to_string(cpp_string_view s)
cdef extern from "arrow/python/platform.h":
pass
cdef extern from "<Python.h>":
void Py_XDECREF(PyObject* o)
Py_ssize_t Py_REFCNT(PyObject* o)
cdef extern from "arrow/api.h" namespace "arrow" nogil:
# We can later add more of the common status factory methods as needed
cdef CStatus CStatus_OK "arrow::Status::OK"()
cdef CStatus CStatus_Invalid "arrow::Status::Invalid"()
cdef CStatus CStatus_NotImplemented \
"arrow::Status::NotImplemented"(const c_string& msg)
cdef CStatus CStatus_UnknownError \
"arrow::Status::UnknownError"(const c_string& msg)
cdef cppclass CStatus "arrow::Status":
CStatus()
c_string ToString()
c_string message()
shared_ptr[CStatusDetail] detail()
c_bool ok()
c_bool IsIOError()
c_bool IsOutOfMemory()
c_bool IsInvalid()
c_bool IsKeyError()
c_bool IsNotImplemented()
c_bool IsTypeError()
c_bool IsCapacityError()
c_bool IsIndexError()
c_bool IsSerializationError()
c_bool IsCancelled()
void Warn()
cdef cppclass CStatusDetail "arrow::StatusDetail":
c_string ToString()
cdef extern from "arrow/result.h" namespace "arrow" nogil:
cdef cppclass CResult "arrow::Result"[T]:
CResult()
CResult(CStatus)
CResult(T)
c_bool ok()
CStatus status()
CStatus Value(T*)
T operator*()
cdef extern from "arrow/util/future.h" namespace "arrow" nogil:
cdef cppclass CFuture "arrow::Future"[T]:
CFuture()
cdef extern from "arrow/python/async.h" namespace "arrow::py" nogil:
# BindFuture's third argument is really a C++ callable with
# the signature `object(T*)`, but Cython does not allow declaring that.
# We use an ellipsis as a workaround.
# Another possibility is to type-erase the argument by making it
# `object(void*)`, but it would lose compile-time C++ type safety.
void BindFuture[T](CFuture[T], object cb, ...)
cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
T GetResultValue[T](CResult[T]) except *
cdef function[F] BindFunction[F](void* unbound, object bound, ...)
cdef inline object PyObject_to_object(PyObject* o):
# Cast to "object" increments reference count
cdef object result = <object> o
cpython.Py_DECREF(result)
return result

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,118 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
cdef extern from "arrow/acero/options.h" namespace "arrow::acero" nogil:
cdef enum CJoinType "arrow::acero::JoinType":
CJoinType_LEFT_SEMI "arrow::acero::JoinType::LEFT_SEMI"
CJoinType_RIGHT_SEMI "arrow::acero::JoinType::RIGHT_SEMI"
CJoinType_LEFT_ANTI "arrow::acero::JoinType::LEFT_ANTI"
CJoinType_RIGHT_ANTI "arrow::acero::JoinType::RIGHT_ANTI"
CJoinType_INNER "arrow::acero::JoinType::INNER"
CJoinType_LEFT_OUTER "arrow::acero::JoinType::LEFT_OUTER"
CJoinType_RIGHT_OUTER "arrow::acero::JoinType::RIGHT_OUTER"
CJoinType_FULL_OUTER "arrow::acero::JoinType::FULL_OUTER"
cdef cppclass CExecNodeOptions "arrow::acero::ExecNodeOptions":
pass
cdef cppclass CSourceNodeOptions "arrow::acero::SourceNodeOptions"(CExecNodeOptions):
pass
cdef cppclass CTableSourceNodeOptions "arrow::acero::TableSourceNodeOptions"(CExecNodeOptions):
CTableSourceNodeOptions(shared_ptr[CTable] table)
CTableSourceNodeOptions(shared_ptr[CTable] table, int64_t max_batch_size)
cdef cppclass CSinkNodeOptions "arrow::acero::SinkNodeOptions"(CExecNodeOptions):
pass
cdef cppclass CFilterNodeOptions "arrow::acero::FilterNodeOptions"(CExecNodeOptions):
CFilterNodeOptions(CExpression)
cdef cppclass CProjectNodeOptions "arrow::acero::ProjectNodeOptions"(CExecNodeOptions):
CProjectNodeOptions(vector[CExpression] expressions)
CProjectNodeOptions(vector[CExpression] expressions,
vector[c_string] names)
cdef cppclass CAggregateNodeOptions "arrow::acero::AggregateNodeOptions"(CExecNodeOptions):
CAggregateNodeOptions(vector[CAggregate] aggregates, vector[CFieldRef] names)
cdef cppclass COrderByNodeOptions "arrow::acero::OrderByNodeOptions"(CExecNodeOptions):
COrderByNodeOptions(COrdering ordering)
cdef cppclass CHashJoinNodeOptions "arrow::acero::HashJoinNodeOptions"(CExecNodeOptions):
CHashJoinNodeOptions(CJoinType, vector[CFieldRef] in_left_keys,
vector[CFieldRef] in_right_keys)
CHashJoinNodeOptions(CJoinType, vector[CFieldRef] in_left_keys,
vector[CFieldRef] in_right_keys,
CExpression filter,
c_string output_suffix_for_left,
c_string output_suffix_for_right)
CHashJoinNodeOptions(CJoinType join_type,
vector[CFieldRef] left_keys,
vector[CFieldRef] right_keys,
vector[CFieldRef] left_output,
vector[CFieldRef] right_output,
CExpression filter,
c_string output_suffix_for_left,
c_string output_suffix_for_right)
cdef struct CAsofJoinKeys "arrow::acero::AsofJoinNodeOptions::Keys":
CFieldRef on_key
vector[CFieldRef] by_key
cdef cppclass CAsofJoinNodeOptions "arrow::acero::AsofJoinNodeOptions"(CExecNodeOptions):
CAsofJoinNodeOptions(vector[CAsofJoinKeys] keys, int64_t tolerance)
cdef extern from "arrow/acero/exec_plan.h" namespace "arrow::acero" nogil:
cdef cppclass CDeclaration "arrow::acero::Declaration":
cppclass Input:
Input(CExecNode*)
Input(CDeclaration)
c_string label
vector[Input] inputs
CDeclaration()
CDeclaration(c_string factory_name, CExecNodeOptions options)
CDeclaration(c_string factory_name, vector[Input] inputs, shared_ptr[CExecNodeOptions] options)
@staticmethod
CDeclaration Sequence(vector[CDeclaration] decls)
cdef cppclass CExecNode "arrow::acero::ExecNode":
const vector[CExecNode*]& inputs() const
const shared_ptr[CSchema]& output_schema() const
CResult[shared_ptr[CTable]] DeclarationToTable(
CDeclaration declaration, c_bool use_threads
)
CResult[shared_ptr[CTable]] DeclarationToTable(
CDeclaration declaration, c_bool use_threads,
CMemoryPool* memory_pool, CFunctionRegistry* function_registry
)
CResult[unique_ptr[CRecordBatchReader]] DeclarationToReader(
CDeclaration declaration, c_bool use_threads
)
CResult[c_string] DeclarationToString(const CDeclaration& declaration)

View File

@@ -0,0 +1,109 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.libarrow cimport *
cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::cuda" nogil:
cdef cppclass CCudaDeviceManager" arrow::cuda::CudaDeviceManager":
@staticmethod
CResult[CCudaDeviceManager*] Instance()
CResult[shared_ptr[CCudaContext]] GetContext(int gpu_number)
CResult[shared_ptr[CCudaContext]] GetSharedContext(int gpu_number,
void* handle)
CStatus AllocateHost(int device_number, int64_t nbytes,
shared_ptr[CCudaHostBuffer]* buffer)
int num_devices() const
cdef cppclass CCudaContext" arrow::cuda::CudaContext":
CResult[shared_ptr[CCudaBuffer]] Allocate(int64_t nbytes)
CResult[shared_ptr[CCudaBuffer]] View(uint8_t* data, int64_t nbytes)
CResult[shared_ptr[CCudaBuffer]] OpenIpcBuffer(
const CCudaIpcMemHandle& ipc_handle)
CStatus Synchronize()
int64_t bytes_allocated() const
const void* handle() const
int device_number() const
CResult[uintptr_t] GetDeviceAddress(uintptr_t addr)
shared_ptr[CDevice] device() const
shared_ptr[CMemoryManager] memory_manager() const
cdef cppclass CCudaIpcMemHandle" arrow::cuda::CudaIpcMemHandle":
@staticmethod
CResult[shared_ptr[CCudaIpcMemHandle]] FromBuffer(
const void* opaque_handle)
CResult[shared_ptr[CBuffer]] Serialize(CMemoryPool* pool) const
cdef cppclass CCudaBuffer" arrow::cuda::CudaBuffer"(CBuffer):
CCudaBuffer(uint8_t* data, int64_t size,
const shared_ptr[CCudaContext]& context,
c_bool own_data=false, c_bool is_ipc=false)
CCudaBuffer(const shared_ptr[CCudaBuffer]& parent,
const int64_t offset, const int64_t size)
@staticmethod
CResult[shared_ptr[CCudaBuffer]] FromBuffer(shared_ptr[CBuffer] buf)
CStatus CopyToHost(const int64_t position, const int64_t nbytes,
void* out) const
CStatus CopyFromHost(const int64_t position, const void* data,
int64_t nbytes)
CStatus CopyFromDevice(const int64_t position, const void* data,
int64_t nbytes)
CStatus CopyFromAnotherDevice(const shared_ptr[CCudaContext]& src_ctx,
const int64_t position, const void* data,
int64_t nbytes)
CResult[shared_ptr[CCudaIpcMemHandle]] ExportForIpc()
shared_ptr[CCudaContext] context() const
cdef cppclass \
CCudaHostBuffer" arrow::cuda::CudaHostBuffer"(CMutableBuffer):
pass
cdef cppclass \
CCudaBufferReader" arrow::cuda::CudaBufferReader"(CBufferReader):
CCudaBufferReader(const shared_ptr[CBuffer]& buffer)
CResult[int64_t] Read(int64_t nbytes, void* buffer)
CResult[shared_ptr[CBuffer]] Read(int64_t nbytes)
cdef cppclass \
CCudaBufferWriter" arrow::cuda::CudaBufferWriter"(WritableFile):
CCudaBufferWriter(const shared_ptr[CCudaBuffer]& buffer)
CStatus Close()
CStatus Write(const void* data, int64_t nbytes)
CStatus WriteAt(int64_t position, const void* data, int64_t nbytes)
CStatus SetBufferSize(const int64_t buffer_size)
int64_t buffer_size()
int64_t num_bytes_buffered() const
CResult[shared_ptr[CCudaHostBuffer]] AllocateCudaHostBuffer(
int device_number, const int64_t size)
# Cuda prefix is added to avoid picking up arrow::cuda functions
# from arrow namespace.
CResult[shared_ptr[CCudaBuffer]] \
CudaSerializeRecordBatch" arrow::cuda::SerializeRecordBatch"\
(const CRecordBatch& batch,
CCudaContext* ctx)
CResult[shared_ptr[CRecordBatch]] \
CudaReadRecordBatch" arrow::cuda::ReadRecordBatch"\
(const shared_ptr[CSchema]& schema,
CDictionaryMemo* dictionary_memo,
const shared_ptr[CCudaBuffer]& buffer,
CMemoryPool* pool)

View File

@@ -0,0 +1,423 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from libcpp.unordered_map cimport unordered_map
from libcpp cimport bool as c_bool
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_acero cimport *
from pyarrow.includes.libarrow_fs cimport *
cdef extern from "arrow/dataset/plan.h" namespace "arrow::dataset::internal" nogil:
cdef void Initialize()
ctypedef CStatus cb_writer_finish_internal(CFileWriter*)
ctypedef void cb_writer_finish(dict, CFileWriter*)
cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
cdef enum ExistingDataBehavior" arrow::dataset::ExistingDataBehavior":
ExistingDataBehavior_DELETE_MATCHING" \
arrow::dataset::ExistingDataBehavior::kDeleteMatchingPartitions"
ExistingDataBehavior_OVERWRITE_OR_IGNORE" \
arrow::dataset::ExistingDataBehavior::kOverwriteOrIgnore"
ExistingDataBehavior_ERROR" \
arrow::dataset::ExistingDataBehavior::kError"
cdef cppclass CScanOptions "arrow::dataset::ScanOptions":
shared_ptr[CSchema] dataset_schema
shared_ptr[CSchema] projected_schema
c_bool use_threads
c_bool cache_metadata
CExpression filter
cdef cppclass CScanNodeOptions "arrow::dataset::ScanNodeOptions"(CExecNodeOptions):
CScanNodeOptions(shared_ptr[CDataset] dataset, shared_ptr[CScanOptions] scan_options, bint require_sequenced_output, bint implicit_ordering)
shared_ptr[CScanOptions] scan_options
cdef cppclass CFragmentScanOptions "arrow::dataset::FragmentScanOptions":
c_string type_name() const
ctypedef CIterator[shared_ptr[CScanTask]] CScanTaskIterator \
"arrow::dataset::ScanTaskIterator"
cdef cppclass CScanTask" arrow::dataset::ScanTask":
CResult[CRecordBatchIterator] Execute()
cdef cppclass CFragment "arrow::dataset::Fragment":
CResult[shared_ptr[CSchema]] ReadPhysicalSchema()
CResult[CScanTaskIterator] Scan(shared_ptr[CScanOptions] options)
c_bool splittable() const
c_string type_name() const
const CExpression& partition_expression() const
ctypedef vector[shared_ptr[CFragment]] CFragmentVector \
"arrow::dataset::FragmentVector"
ctypedef CIterator[shared_ptr[CFragment]] CFragmentIterator \
"arrow::dataset::FragmentIterator"
cdef cppclass CInMemoryFragment "arrow::dataset::InMemoryFragment"(
CFragment):
CInMemoryFragment(vector[shared_ptr[CRecordBatch]] record_batches,
CExpression partition_expression)
cdef cppclass CTaggedRecordBatch "arrow::dataset::TaggedRecordBatch":
shared_ptr[CRecordBatch] record_batch
shared_ptr[CFragment] fragment
ctypedef CIterator[CTaggedRecordBatch] CTaggedRecordBatchIterator \
"arrow::dataset::TaggedRecordBatchIterator"
cdef cppclass CScanner "arrow::dataset::Scanner":
CScanner(shared_ptr[CDataset], shared_ptr[CScanOptions])
CScanner(shared_ptr[CFragment], shared_ptr[CScanOptions])
CResult[CScanTaskIterator] Scan()
CResult[CTaggedRecordBatchIterator] ScanBatches()
CResult[shared_ptr[CTable]] ToTable()
CResult[shared_ptr[CTable]] TakeRows(const CArray& indices)
CResult[shared_ptr[CTable]] Head(int64_t num_rows)
CResult[int64_t] CountRows()
CResult[CFragmentIterator] GetFragments()
CResult[shared_ptr[CRecordBatchReader]] ToRecordBatchReader()
const shared_ptr[CScanOptions]& options()
cdef cppclass CScannerBuilder "arrow::dataset::ScannerBuilder":
CScannerBuilder(shared_ptr[CDataset],
shared_ptr[CScanOptions] scan_options)
CScannerBuilder(shared_ptr[CSchema], shared_ptr[CFragment],
shared_ptr[CScanOptions] scan_options)
@staticmethod
shared_ptr[CScannerBuilder] FromRecordBatchReader(
shared_ptr[CRecordBatchReader] reader)
CStatus ProjectColumns "Project"(const vector[c_string]& columns)
CStatus Project(vector[CExpression]& exprs, vector[c_string]& columns)
CStatus Filter(CExpression filter)
CStatus UseThreads(c_bool use_threads)
CStatus CacheMetadata(c_bool cache_metadata)
CStatus Pool(CMemoryPool* pool)
CStatus BatchSize(int64_t batch_size)
CStatus BatchReadahead(int32_t batch_readahead)
CStatus FragmentReadahead(int32_t fragment_readahead)
CStatus FragmentScanOptions(
shared_ptr[CFragmentScanOptions] fragment_scan_options)
CResult[shared_ptr[CScanOptions]] GetScanOptions()
CResult[shared_ptr[CScanner]] Finish()
shared_ptr[CSchema] schema() const
ctypedef vector[shared_ptr[CDataset]] CDatasetVector \
"arrow::dataset::DatasetVector"
cdef cppclass CDataset "arrow::dataset::Dataset":
const shared_ptr[CSchema] & schema()
CResult[CFragmentIterator] GetFragments()
CResult[CFragmentIterator] GetFragments(CExpression predicate)
const CExpression & partition_expression()
c_string type_name()
CResult[shared_ptr[CDataset]] ReplaceSchema(shared_ptr[CSchema])
CResult[shared_ptr[CScannerBuilder]] NewScan()
cdef cppclass CInMemoryDataset "arrow::dataset::InMemoryDataset"(
CDataset):
CInMemoryDataset(shared_ptr[CRecordBatchReader])
CInMemoryDataset(shared_ptr[CTable])
cdef cppclass CUnionDataset "arrow::dataset::UnionDataset"(
CDataset):
@staticmethod
CResult[shared_ptr[CUnionDataset]] Make(shared_ptr[CSchema] schema,
CDatasetVector children)
const CDatasetVector& children() const
cdef cppclass CInspectOptions "arrow::dataset::InspectOptions":
int fragments
CField.CMergeOptions field_merge_options
cdef cppclass CFinishOptions "arrow::dataset::FinishOptions":
shared_ptr[CSchema] schema
CInspectOptions inspect_options
c_bool validate_fragments
cdef cppclass CDatasetFactory "arrow::dataset::DatasetFactory":
CResult[vector[shared_ptr[CSchema]]] InspectSchemas(CInspectOptions)
CResult[shared_ptr[CSchema]] Inspect(CInspectOptions)
CResult[shared_ptr[CDataset]] FinishWithSchema "Finish"(
const shared_ptr[CSchema]& schema)
CResult[shared_ptr[CDataset]] Finish()
const CExpression& root_partition()
CStatus SetRootPartition(CExpression partition)
cdef cppclass CUnionDatasetFactory "arrow::dataset::UnionDatasetFactory":
@staticmethod
CResult[shared_ptr[CDatasetFactory]] Make(
vector[shared_ptr[CDatasetFactory]] factories)
cdef cppclass CFileSource "arrow::dataset::FileSource":
const c_string& path() const
const shared_ptr[CFileSystem]& filesystem() const
const shared_ptr[CBuffer]& buffer() const
const int64_t size() const
CResult[shared_ptr[CRandomAccessFile]] Open() const
# HACK: Cython can't handle all the overloads so don't declare them.
# This means invalid construction of CFileSource won't be caught in
# the C++ generation phase (though it will still be caught when
# the generated C++ is compiled).
CFileSource(...)
cdef cppclass CFileWriteOptions \
"arrow::dataset::FileWriteOptions":
const shared_ptr[CFileFormat]& format() const
c_string type_name() const
cdef cppclass CFileWriter \
"arrow::dataset::FileWriter":
const shared_ptr[CFileFormat]& format() const
const shared_ptr[CSchema]& schema() const
const shared_ptr[CFileWriteOptions]& options() const
const CFileLocator& destination() const
CResult[int64_t] GetBytesWritten()
cdef cppclass CFileFormat "arrow::dataset::FileFormat":
shared_ptr[CFragmentScanOptions] default_fragment_scan_options
c_string type_name() const
CResult[shared_ptr[CSchema]] Inspect(const CFileSource&) const
CResult[shared_ptr[CFileFragment]] MakeFragment(
CFileSource source,
CExpression partition_expression,
shared_ptr[CSchema] physical_schema)
shared_ptr[CFileWriteOptions] DefaultWriteOptions()
cdef cppclass CFileFragment "arrow::dataset::FileFragment"(
CFragment):
const CFileSource& source() const
const shared_ptr[CFileFormat]& format() const
cdef cppclass CFileSystemDatasetWriteOptions \
"arrow::dataset::FileSystemDatasetWriteOptions":
shared_ptr[CFileWriteOptions] file_write_options
shared_ptr[CFileSystem] filesystem
c_string base_dir
shared_ptr[CPartitioning] partitioning
c_bool preserve_order
int max_partitions
c_string basename_template
function[cb_writer_finish_internal] writer_pre_finish
function[cb_writer_finish_internal] writer_post_finish
ExistingDataBehavior existing_data_behavior
c_bool create_dir
uint32_t max_open_files
uint64_t max_rows_per_file
uint64_t min_rows_per_group
uint64_t max_rows_per_group
cdef cppclass CFileSystemDataset \
"arrow::dataset::FileSystemDataset"(CDataset):
@staticmethod
CResult[shared_ptr[CDataset]] Make(
shared_ptr[CSchema] schema,
CExpression source_partition,
shared_ptr[CFileFormat] format,
shared_ptr[CFileSystem] filesystem,
vector[shared_ptr[CFileFragment]] fragments)
@staticmethod
CStatus Write(
const CFileSystemDatasetWriteOptions& write_options,
shared_ptr[CScanner] scanner)
c_string type()
vector[c_string] files()
const shared_ptr[CFileFormat]& format() const
const shared_ptr[CFileSystem]& filesystem() const
const shared_ptr[CPartitioning]& partitioning() const
cdef cppclass CIpcFileWriteOptions \
"arrow::dataset::IpcFileWriteOptions"(CFileWriteOptions):
shared_ptr[CIpcWriteOptions] options
cdef cppclass CIpcFileFormat "arrow::dataset::IpcFileFormat"(
CFileFormat):
pass
cdef cppclass COrcFileFormat "arrow::dataset::OrcFileFormat"(
CFileFormat):
pass
cdef cppclass CCsvFileWriteOptions \
"arrow::dataset::CsvFileWriteOptions"(CFileWriteOptions):
shared_ptr[CCSVWriteOptions] write_options
CMemoryPool* pool
cdef cppclass CCsvFileFormat "arrow::dataset::CsvFileFormat"(
CFileFormat):
CCSVParseOptions parse_options
cdef cppclass CCsvFragmentScanOptions \
"arrow::dataset::CsvFragmentScanOptions"(CFragmentScanOptions):
CCSVConvertOptions convert_options
CCSVReadOptions read_options
function[StreamWrapFunc] stream_transform_func
cdef cppclass CJsonFileFormat "arrow::dataset::JsonFileFormat"(CFileFormat):
pass
cdef cppclass CJsonFragmentScanOptions "arrow::dataset::JsonFragmentScanOptions"(CFragmentScanOptions):
CJSONParseOptions parse_options
CJSONReadOptions read_options
cdef struct CPartitionPathFormat "arrow::dataset::PartitionPathFormat":
c_string directory
c_string filename
cdef cppclass CPartitioning "arrow::dataset::Partitioning":
c_string type_name() const
CResult[CExpression] Parse(const c_string & path) const
CResult[CPartitionPathFormat] Format(const CExpression & expr) const
const shared_ptr[CSchema] & schema()
c_bool Equals(const CPartitioning& other) const
cdef cppclass CSegmentEncoding" arrow::dataset::SegmentEncoding":
bint operator==(CSegmentEncoding)
CSegmentEncoding CSegmentEncoding_None\
" arrow::dataset::SegmentEncoding::None"
CSegmentEncoding CSegmentEncoding_Uri\
" arrow::dataset::SegmentEncoding::Uri"
cdef cppclass CKeyValuePartitioningOptions \
"arrow::dataset::KeyValuePartitioningOptions":
CSegmentEncoding segment_encoding
cdef cppclass CHivePartitioningOptions \
"arrow::dataset::HivePartitioningOptions":
CSegmentEncoding segment_encoding
c_string null_fallback
cdef cppclass CPartitioningFactoryOptions \
"arrow::dataset::PartitioningFactoryOptions":
c_bool infer_dictionary
shared_ptr[CSchema] schema
CSegmentEncoding segment_encoding
cdef cppclass CHivePartitioningFactoryOptions \
"arrow::dataset::HivePartitioningFactoryOptions":
c_bool infer_dictionary
c_string null_fallback
shared_ptr[CSchema] schema
CSegmentEncoding segment_encoding
cdef cppclass CPartitioningFactory "arrow::dataset::PartitioningFactory":
c_string type_name() const
cdef cppclass CKeyValuePartitioning \
"arrow::dataset::KeyValuePartitioning"(CPartitioning):
CKeyValuePartitioning(shared_ptr[CSchema] schema,
vector[shared_ptr[CArray]] dictionaries,
CKeyValuePartitioningOptions options)
vector[shared_ptr[CArray]] dictionaries() const
CSegmentEncoding segment_encoding()
cdef cppclass CDirectoryPartitioning \
"arrow::dataset::DirectoryPartitioning"(CPartitioning):
CDirectoryPartitioning(shared_ptr[CSchema] schema,
vector[shared_ptr[CArray]] dictionaries)
@staticmethod
shared_ptr[CPartitioningFactory] MakeFactory(
vector[c_string] field_names, CPartitioningFactoryOptions)
vector[shared_ptr[CArray]] dictionaries() const
cdef cppclass CHivePartitioning \
"arrow::dataset::HivePartitioning"(CPartitioning):
CHivePartitioning(shared_ptr[CSchema] schema,
vector[shared_ptr[CArray]] dictionaries,
CHivePartitioningOptions options)
@staticmethod
shared_ptr[CPartitioningFactory] MakeFactory(
CHivePartitioningFactoryOptions)
vector[shared_ptr[CArray]] dictionaries() const
c_string null_fallback() const
cdef cppclass CFilenamePartitioning \
"arrow::dataset::FilenamePartitioning"(CPartitioning):
CFilenamePartitioning(shared_ptr[CSchema] schema,
vector[shared_ptr[CArray]] dictionaries)
@staticmethod
shared_ptr[CPartitioningFactory] MakeFactory(
vector[c_string] field_names, CPartitioningFactoryOptions)
vector[shared_ptr[CArray]] dictionaries() const
cdef cppclass CPartitioningOrFactory \
"arrow::dataset::PartitioningOrFactory":
CPartitioningOrFactory(shared_ptr[CPartitioning])
CPartitioningOrFactory(shared_ptr[CPartitioningFactory])
CPartitioningOrFactory & operator = (shared_ptr[CPartitioning])
CPartitioningOrFactory & operator = (
shared_ptr[CPartitioningFactory])
shared_ptr[CPartitioning] partitioning() const
shared_ptr[CPartitioningFactory] factory() const
cdef cppclass CFileSystemFactoryOptions \
"arrow::dataset::FileSystemFactoryOptions":
CPartitioningOrFactory partitioning
c_string partition_base_dir
c_bool exclude_invalid_files
vector[c_string] selector_ignore_prefixes
cdef cppclass CFileSystemDatasetFactory \
"arrow::dataset::FileSystemDatasetFactory"(
CDatasetFactory):
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromPaths "Make"(
shared_ptr[CFileSystem] filesystem,
vector[c_string] paths,
shared_ptr[CFileFormat] format,
CFileSystemFactoryOptions options
)
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromSelector "Make"(
shared_ptr[CFileSystem] filesystem,
CFileSelector,
shared_ptr[CFileFormat] format,
CFileSystemFactoryOptions options
)
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromFileInfos "Make"(
shared_ptr[CFileSystem] filesystem,
vector[CFileInfo] files,
shared_ptr[CFileFormat] format,
CFileSystemFactoryOptions options
)

View File

@@ -0,0 +1,107 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.libarrow_dataset cimport *
from pyarrow.includes.libparquet_encryption cimport *
from pyarrow._parquet cimport *
cdef extern from "arrow/dataset/parquet_encryption_config.h" namespace "arrow::dataset" nogil:
cdef cppclass CParquetEncryptionConfig "arrow::dataset::ParquetEncryptionConfig":
shared_ptr[CCryptoFactory] crypto_factory
shared_ptr[CKmsConnectionConfig] kms_connection_config
shared_ptr[CEncryptionConfiguration] encryption_config
cdef cppclass CParquetDecryptionConfig "arrow::dataset::ParquetDecryptionConfig":
shared_ptr[CCryptoFactory] crypto_factory
shared_ptr[CKmsConnectionConfig] kms_connection_config
shared_ptr[CDecryptionConfiguration] decryption_config
cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
cdef cppclass CParquetFileWriter \
"arrow::dataset::ParquetFileWriter"(CFileWriter):
const shared_ptr[FileWriter]& parquet_writer() const
cdef cppclass CParquetFileWriteOptions \
"arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions):
shared_ptr[WriterProperties] writer_properties
shared_ptr[ArrowWriterProperties] arrow_writer_properties
shared_ptr[CParquetEncryptionConfig] parquet_encryption_config
cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"(
CFileFragment):
const vector[int]& row_groups() const
shared_ptr[CFileMetaData] metadata() const
CResult[vector[shared_ptr[CFragment]]] SplitByRowGroup(
CExpression predicate)
CResult[shared_ptr[CFragment]] SubsetWithFilter "Subset"(
CExpression predicate)
CResult[shared_ptr[CFragment]] SubsetWithIds "Subset"(
vector[int] row_group_ids)
CStatus EnsureCompleteMetadata()
cdef cppclass CParquetFileFormatReaderOptions \
"arrow::dataset::ParquetFileFormat::ReaderOptions":
unordered_set[c_string] dict_columns
TimeUnit coerce_int96_timestamp_unit
Type binary_type
Type list_type
cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
CFileFormat):
CParquetFileFormatReaderOptions reader_options
CResult[shared_ptr[CFileFragment]] MakeFragment(
CFileSource source,
CExpression partition_expression,
shared_ptr[CSchema] physical_schema,
vector[int] row_groups)
cdef cppclass CParquetFragmentScanOptions \
"arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions):
shared_ptr[CReaderProperties] reader_properties
shared_ptr[ArrowReaderProperties] arrow_reader_properties
shared_ptr[CParquetDecryptionConfig] parquet_decryption_config
cdef cppclass CParquetFactoryOptions \
"arrow::dataset::ParquetFactoryOptions":
CPartitioningOrFactory partitioning
c_string partition_base_dir
c_bool validate_column_chunk_paths
cdef cppclass CParquetDatasetFactory \
"arrow::dataset::ParquetDatasetFactory"(CDatasetFactory):
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataPath "Make"(
const c_string& metadata_path,
shared_ptr[CFileSystem] filesystem,
shared_ptr[CParquetFileFormat] format,
CParquetFactoryOptions options
)
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataSource "Make"(
const CFileSource& metadata_path,
const c_string& base_path,
shared_ptr[CFileSystem] filesystem,
shared_ptr[CParquetFileFormat] format,
CParquetFactoryOptions options
)

View File

@@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.libarrow cimport (CCompressionType, CStatus, CTable,
COutputStream, CResult, shared_ptr,
vector, CRandomAccessFile, CSchema,
c_string, CIpcReadOptions)
cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
int kFeatherV1Version" arrow::ipc::feather::kFeatherV1Version"
int kFeatherV2Version" arrow::ipc::feather::kFeatherV2Version"
cdef cppclass CFeatherProperties" arrow::ipc::feather::WriteProperties":
int version
int chunksize
CCompressionType compression
int compression_level
CStatus WriteFeather" arrow::ipc::feather::WriteTable" \
(const CTable& table, COutputStream* out,
CFeatherProperties properties)
cdef cppclass CFeatherReader" arrow::ipc::feather::Reader":
@staticmethod
CResult[shared_ptr[CFeatherReader]] Open(
const shared_ptr[CRandomAccessFile]& file,
const CIpcReadOptions& options)
int version()
shared_ptr[CSchema] schema()
CStatus Read(shared_ptr[CTable]* out)
CStatus Read(const vector[int] indices, shared_ptr[CTable]* out)
CStatus Read(const vector[c_string] names, shared_ptr[CTable]* out)

View File

@@ -0,0 +1,621 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_python cimport CTimePoint
from libcpp.map cimport multimap
cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
cdef char* CTracingServerMiddlewareName\
" arrow::flight::TracingServerMiddleware::kMiddlewareName"
cdef cppclass CActionType" arrow::flight::ActionType":
c_string type
c_string description
bint operator==(CActionType)
CResult[c_string] SerializeToString()
@staticmethod
CResult[CActionType] Deserialize(const c_string& serialized)
cdef cppclass CAction" arrow::flight::Action":
c_string type
shared_ptr[CBuffer] body
bint operator==(CAction)
CResult[c_string] SerializeToString()
c_string ToString()
@staticmethod
CResult[CAction] Deserialize(const c_string& serialized)
cdef cppclass CFlightResult" arrow::flight::Result":
CFlightResult()
CFlightResult(CFlightResult)
shared_ptr[CBuffer] body
bint operator==(CFlightResult)
CResult[c_string] SerializeToString()
c_string ToString()
@staticmethod
CResult[CFlightResult] Deserialize(const c_string& serialized)
cdef cppclass CBasicAuth" arrow::flight::BasicAuth":
CBasicAuth()
CBasicAuth(CBuffer)
CBasicAuth(CBasicAuth)
c_string username
c_string password
bint operator==(CBasicAuth)
CResult[c_string] SerializeToString()
c_string ToString()
@staticmethod
CResult[CBasicAuth] Deserialize(const c_string& serialized)
cdef cppclass CResultStream" arrow::flight::ResultStream":
CResult[unique_ptr[CFlightResult]] Next()
cdef cppclass CDescriptorType \
" arrow::flight::FlightDescriptor::DescriptorType":
bint operator==(CDescriptorType)
CDescriptorType CDescriptorTypeUnknown\
" arrow::flight::FlightDescriptor::UNKNOWN"
CDescriptorType CDescriptorTypePath\
" arrow::flight::FlightDescriptor::PATH"
CDescriptorType CDescriptorTypeCmd\
" arrow::flight::FlightDescriptor::CMD"
cdef cppclass CFlightDescriptor" arrow::flight::FlightDescriptor":
CDescriptorType type
c_string cmd
vector[c_string] path
bint operator==(CFlightDescriptor)
CResult[c_string] SerializeToString()
c_string ToString()
@staticmethod
CResult[CFlightDescriptor] Deserialize(const c_string& serialized)
cdef cppclass CTicket" arrow::flight::Ticket":
CTicket()
c_string ticket
bint operator==(CTicket)
CResult[c_string] SerializeToString()
c_string ToString()
@staticmethod
CResult[CTicket] Deserialize(const c_string& serialized)
cdef cppclass CCriteria" arrow::flight::Criteria":
CCriteria()
c_string expression
bint operator==(CCriteria)
CResult[c_string] SerializeToString()
@staticmethod
CResult[CCriteria] Deserialize(const c_string& serialized)
cdef cppclass CLocation" arrow::flight::Location":
CLocation()
c_string ToString()
c_bool Equals(const CLocation& other)
@staticmethod
CResult[CLocation] Parse(const c_string& uri_string)
@staticmethod
CResult[CLocation] ForGrpcTcp(const c_string& host, int port)
@staticmethod
CResult[CLocation] ForGrpcTls(const c_string& host, int port)
@staticmethod
CResult[CLocation] ForGrpcUnix(const c_string& path)
cdef cppclass CFlightEndpoint" arrow::flight::FlightEndpoint":
CFlightEndpoint()
CTicket ticket
vector[CLocation] locations
optional[CTimePoint] expiration_time
c_string app_metadata
bint operator==(CFlightEndpoint)
CResult[c_string] SerializeToString()
c_string ToString()
@staticmethod
CResult[CFlightEndpoint] Deserialize(const c_string& serialized)
cdef cppclass CFlightInfo" arrow::flight::FlightInfo":
CFlightInfo(CFlightInfo info)
int64_t total_records()
int64_t total_bytes()
c_bool ordered()
c_string app_metadata()
CResult[shared_ptr[CSchema]] GetSchema(CDictionaryMemo* memo)
CFlightDescriptor& descriptor()
const vector[CFlightEndpoint]& endpoints()
CResult[c_string] SerializeToString()
c_string ToString()
bint operator==(CFlightInfo)
@staticmethod
CResult[unique_ptr[CFlightInfo]] Deserialize(
const c_string& serialized)
cdef cppclass CSchemaResult" arrow::flight::SchemaResult":
CSchemaResult()
CSchemaResult(CSchemaResult result)
CResult[shared_ptr[CSchema]] GetSchema(CDictionaryMemo* memo)
bint operator==(CSchemaResult)
CResult[c_string] SerializeToString()
c_string ToString()
@staticmethod
CResult[CSchemaResult] Deserialize(const c_string& serialized)
cdef cppclass CFlightListing" arrow::flight::FlightListing":
CResult[unique_ptr[CFlightInfo]] Next()
cdef cppclass CSimpleFlightListing" arrow::flight::SimpleFlightListing":
# This doesn't work with Cython >= 3
# CSimpleFlightListing(vector[CFlightInfo]&& info)
CSimpleFlightListing(const vector[CFlightInfo]& info)
cdef cppclass CFlightPayload" arrow::flight::FlightPayload":
shared_ptr[CBuffer] descriptor
shared_ptr[CBuffer] app_metadata
CIpcPayload ipc_message
cdef cppclass CFlightDataStream" arrow::flight::FlightDataStream":
shared_ptr[CSchema] schema()
CResult[CFlightPayload] Next()
cdef cppclass CFlightStreamChunk" arrow::flight::FlightStreamChunk":
CFlightStreamChunk()
shared_ptr[CRecordBatch] data
shared_ptr[CBuffer] app_metadata
cdef cppclass CMetadataRecordBatchReader \
" arrow::flight::MetadataRecordBatchReader":
CResult[shared_ptr[CSchema]] GetSchema()
CResult[CFlightStreamChunk] Next()
CResult[shared_ptr[CTable]] ToTable()
CIpcReadStats stats() const
CResult[shared_ptr[CRecordBatchReader]] MakeRecordBatchReader\
" arrow::flight::MakeRecordBatchReader"(
shared_ptr[CMetadataRecordBatchReader])
cdef cppclass CMetadataRecordBatchWriter \
" arrow::flight::MetadataRecordBatchWriter"(CRecordBatchWriter):
CStatus Begin(shared_ptr[CSchema] schema,
const CIpcWriteOptions& options)
CStatus WriteMetadata(shared_ptr[CBuffer] app_metadata)
CStatus WriteWithMetadata(const CRecordBatch& batch,
shared_ptr[CBuffer] app_metadata)
cdef cppclass CFlightStreamReader \
" arrow::flight::FlightStreamReader"(CMetadataRecordBatchReader):
void Cancel()
CResult[shared_ptr[CTable]] ToTableWithStopToken" ToTable"\
(const CStopToken& stop_token)
cdef cppclass CFlightMessageReader \
" arrow::flight::FlightMessageReader"(CMetadataRecordBatchReader):
CFlightDescriptor& descriptor()
cdef cppclass CFlightMessageWriter \
" arrow::flight::FlightMessageWriter"(CMetadataRecordBatchWriter):
pass
cdef cppclass CFlightStreamWriter \
" arrow::flight::FlightStreamWriter"(CMetadataRecordBatchWriter):
CStatus DoneWriting()
cdef cppclass CRecordBatchStream \
" arrow::flight::RecordBatchStream"(CFlightDataStream):
CRecordBatchStream(shared_ptr[CRecordBatchReader]& reader,
const CIpcWriteOptions& options)
cdef cppclass CFlightMetadataReader" arrow::flight::FlightMetadataReader":
CStatus ReadMetadata(shared_ptr[CBuffer]* out)
cdef cppclass CFlightMetadataWriter" arrow::flight::FlightMetadataWriter":
CStatus WriteMetadata(const CBuffer& message)
cdef cppclass CServerAuthReader" arrow::flight::ServerAuthReader":
CStatus Read(c_string* token)
cdef cppclass CServerAuthSender" arrow::flight::ServerAuthSender":
CStatus Write(c_string& token)
cdef cppclass CClientAuthReader" arrow::flight::ClientAuthReader":
CStatus Read(c_string* token)
cdef cppclass CClientAuthSender" arrow::flight::ClientAuthSender":
CStatus Write(c_string& token)
cdef cppclass CServerAuthHandler" arrow::flight::ServerAuthHandler":
pass
cdef cppclass CClientAuthHandler" arrow::flight::ClientAuthHandler":
pass
cdef cppclass CServerCallContext" arrow::flight::ServerCallContext":
c_string& peer_identity()
c_string& peer()
c_bool is_cancelled()
void AddHeader(const c_string& key, const c_string& value)
void AddTrailer(const c_string& key, const c_string& value)
CServerMiddleware* GetMiddleware(const c_string& key)
cdef cppclass CTimeoutDuration" arrow::flight::TimeoutDuration":
CTimeoutDuration(double)
double count()
cdef cppclass CFlightCallOptions" arrow::flight::FlightCallOptions":
CFlightCallOptions()
CTimeoutDuration timeout
CIpcWriteOptions write_options
CIpcReadOptions read_options
vector[pair[c_string, c_string]] headers
CStopToken stop_token
cdef cppclass CCertKeyPair" arrow::flight::CertKeyPair":
CCertKeyPair()
c_string pem_cert
c_string pem_key
cdef cppclass CFlightMethod" arrow::flight::FlightMethod":
bint operator==(CFlightMethod)
CFlightMethod CFlightMethodInvalid\
" arrow::flight::FlightMethod::Invalid"
CFlightMethod CFlightMethodHandshake\
" arrow::flight::FlightMethod::Handshake"
CFlightMethod CFlightMethodListFlights\
" arrow::flight::FlightMethod::ListFlights"
CFlightMethod CFlightMethodGetFlightInfo\
" arrow::flight::FlightMethod::GetFlightInfo"
CFlightMethod CFlightMethodGetSchema\
" arrow::flight::FlightMethod::GetSchema"
CFlightMethod CFlightMethodDoGet\
" arrow::flight::FlightMethod::DoGet"
CFlightMethod CFlightMethodDoPut\
" arrow::flight::FlightMethod::DoPut"
CFlightMethod CFlightMethodDoAction\
" arrow::flight::FlightMethod::DoAction"
CFlightMethod CFlightMethodListActions\
" arrow::flight::FlightMethod::ListActions"
CFlightMethod CFlightMethodDoExchange\
" arrow::flight::FlightMethod::DoExchange"
cdef cppclass CCallInfo" arrow::flight::CallInfo":
CFlightMethod method
ctypedef multimap[cpp_string_view, cpp_string_view] CCallHeaders\
" arrow::flight::CallHeaders"
cdef cppclass CAddCallHeaders" arrow::flight::AddCallHeaders":
void AddHeader(const c_string& key, const c_string& value)
cdef cppclass CServerMiddleware" arrow::flight::ServerMiddleware":
c_string name()
cdef cppclass CServerMiddlewareFactory\
" arrow::flight::ServerMiddlewareFactory":
pass
cdef cppclass CClientMiddleware" arrow::flight::ClientMiddleware":
pass
cdef cppclass CClientMiddlewareFactory\
" arrow::flight::ClientMiddlewareFactory":
pass
cpdef cppclass CTracingServerMiddlewareTraceKey\
" arrow::flight::TracingServerMiddleware::TraceKey":
CTracingServerMiddlewareTraceKey()
c_string key
c_string value
cdef cppclass CTracingServerMiddleware\
" arrow::flight::TracingServerMiddleware"(CServerMiddleware):
vector[CTracingServerMiddlewareTraceKey] GetTraceContext()
cdef shared_ptr[CServerMiddlewareFactory] \
MakeTracingServerMiddlewareFactory\
" arrow::flight::MakeTracingServerMiddlewareFactory"()
cdef cppclass CFlightServerOptions" arrow::flight::FlightServerOptions":
CFlightServerOptions(const CLocation& location)
CLocation location
unique_ptr[CServerAuthHandler] auth_handler
vector[CCertKeyPair] tls_certificates
c_bool verify_client
c_string root_certificates
vector[pair[c_string, shared_ptr[CServerMiddlewareFactory]]] middleware
cdef cppclass CFlightClientOptions" arrow::flight::FlightClientOptions":
c_string tls_root_certs
c_string cert_chain
c_string private_key
c_string override_hostname
vector[shared_ptr[CClientMiddlewareFactory]] middleware
int64_t write_size_limit_bytes
vector[pair[c_string, CIntStringVariant]] generic_options
c_bool disable_server_verification
@staticmethod
CFlightClientOptions Defaults()
cdef cppclass CDoPutResult" arrow::flight::FlightClient::DoPutResult":
unique_ptr[CFlightStreamWriter] writer
unique_ptr[CFlightMetadataReader] reader
cdef cppclass CDoExchangeResult" arrow::flight::FlightClient::DoExchangeResult":
unique_ptr[CFlightStreamWriter] writer
unique_ptr[CFlightStreamReader] reader
cdef cppclass CFlightClient" arrow::flight::FlightClient":
@staticmethod
CResult[unique_ptr[CFlightClient]] Connect(const CLocation& location,
const CFlightClientOptions& options)
c_bool supports_async()
CStatus CheckAsyncSupport()
CStatus Authenticate(CFlightCallOptions& options,
unique_ptr[CClientAuthHandler] auth_handler)
CResult[pair[c_string, c_string]] AuthenticateBasicToken(
CFlightCallOptions& options,
const c_string& username,
const c_string& password)
CResult[unique_ptr[CResultStream]] DoAction(CFlightCallOptions& options, CAction& action)
CResult[vector[CActionType]] ListActions(CFlightCallOptions& options)
CResult[unique_ptr[CFlightListing]] ListFlights(CFlightCallOptions& options, CCriteria criteria)
CResult[unique_ptr[CFlightInfo]] GetFlightInfo(CFlightCallOptions& options,
CFlightDescriptor& descriptor)
CFuture[CFlightInfo] GetFlightInfoAsync(CFlightCallOptions& options,
CFlightDescriptor& descriptor)
CResult[unique_ptr[CSchemaResult]] GetSchema(CFlightCallOptions& options,
CFlightDescriptor& descriptor)
CResult[unique_ptr[CFlightStreamReader]] DoGet(CFlightCallOptions& options, CTicket& ticket)
CResult[CDoPutResult] DoPut(CFlightCallOptions& options,
CFlightDescriptor& descriptor,
shared_ptr[CSchema]& schema)
CResult[CDoExchangeResult] DoExchange(CFlightCallOptions& options,
CFlightDescriptor& descriptor)
CStatus Close()
cdef cppclass CFlightStatusCode" arrow::flight::FlightStatusCode":
bint operator==(CFlightStatusCode)
CFlightStatusCode CFlightStatusInternal \
" arrow::flight::FlightStatusCode::Internal"
CFlightStatusCode CFlightStatusTimedOut \
" arrow::flight::FlightStatusCode::TimedOut"
CFlightStatusCode CFlightStatusCancelled \
" arrow::flight::FlightStatusCode::Cancelled"
CFlightStatusCode CFlightStatusUnauthenticated \
" arrow::flight::FlightStatusCode::Unauthenticated"
CFlightStatusCode CFlightStatusUnauthorized \
" arrow::flight::FlightStatusCode::Unauthorized"
CFlightStatusCode CFlightStatusUnavailable \
" arrow::flight::FlightStatusCode::Unavailable"
CFlightStatusCode CFlightStatusFailed \
" arrow::flight::FlightStatusCode::Failed"
cdef cppclass FlightStatusDetail" arrow::flight::FlightStatusDetail":
CFlightStatusCode code()
c_string extra_info()
@staticmethod
shared_ptr[FlightStatusDetail] UnwrapStatus(const CStatus& status)
cdef cppclass FlightWriteSizeStatusDetail\
" arrow::flight::FlightWriteSizeStatusDetail":
int64_t limit()
int64_t actual()
@staticmethod
shared_ptr[FlightWriteSizeStatusDetail] UnwrapStatus(
const CStatus& status)
cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \
(CFlightStatusCode code, const c_string& message)
cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \
(CFlightStatusCode code,
const c_string& message,
const c_string& extra_info)
# Callbacks for implementing Flight servers
# Use typedef to emulate syntax for std::function<void(..)>
ctypedef CStatus cb_list_flights(object, const CServerCallContext&,
const CCriteria*,
unique_ptr[CFlightListing]*)
ctypedef CStatus cb_get_flight_info(object, const CServerCallContext&,
const CFlightDescriptor&,
unique_ptr[CFlightInfo]*)
ctypedef CStatus cb_get_schema(object, const CServerCallContext&,
const CFlightDescriptor&,
unique_ptr[CSchemaResult]*)
ctypedef CStatus cb_do_put(object, const CServerCallContext&,
unique_ptr[CFlightMessageReader],
unique_ptr[CFlightMetadataWriter])
ctypedef CStatus cb_do_get(object, const CServerCallContext&,
const CTicket&,
unique_ptr[CFlightDataStream]*)
ctypedef CStatus cb_do_exchange(object, const CServerCallContext&,
unique_ptr[CFlightMessageReader],
unique_ptr[CFlightMessageWriter])
ctypedef CStatus cb_do_action(object, const CServerCallContext&,
const CAction&,
unique_ptr[CResultStream]*)
ctypedef CStatus cb_list_actions(object, const CServerCallContext&,
vector[CActionType]*)
ctypedef CStatus cb_result_next(object, unique_ptr[CFlightResult]*)
ctypedef CStatus cb_data_stream_next(object, CFlightPayload*)
ctypedef CStatus cb_server_authenticate(object, CServerAuthSender*,
CServerAuthReader*)
ctypedef CStatus cb_is_valid(object, const c_string&, c_string*)
ctypedef CStatus cb_client_authenticate(object, CClientAuthSender*,
CClientAuthReader*)
ctypedef CStatus cb_get_token(object, c_string*)
ctypedef CStatus cb_middleware_sending_headers(object, CAddCallHeaders*)
ctypedef CStatus cb_middleware_call_completed(object, const CStatus&)
ctypedef CStatus cb_client_middleware_received_headers(
object, const CCallHeaders&)
ctypedef CStatus cb_server_middleware_start_call(
object,
const CCallInfo&,
const CCallHeaders&,
shared_ptr[CServerMiddleware]*)
ctypedef CStatus cb_client_middleware_start_call(
object,
const CCallInfo&,
unique_ptr[CClientMiddleware]*)
cdef extern from "arrow/python/flight.h" namespace "arrow::py::flight" nogil:
cdef char* CPyServerMiddlewareName\
" arrow::py::flight::kPyServerMiddlewareName"
cdef cppclass PyFlightServerVtable:
PyFlightServerVtable()
function[cb_list_flights] list_flights
function[cb_get_flight_info] get_flight_info
function[cb_get_schema] get_schema
function[cb_do_put] do_put
function[cb_do_get] do_get
function[cb_do_exchange] do_exchange
function[cb_do_action] do_action
function[cb_list_actions] list_actions
cdef cppclass PyServerAuthHandlerVtable:
PyServerAuthHandlerVtable()
function[cb_server_authenticate] authenticate
function[cb_is_valid] is_valid
cdef cppclass PyClientAuthHandlerVtable:
PyClientAuthHandlerVtable()
function[cb_client_authenticate] authenticate
function[cb_get_token] get_token
cdef cppclass PyFlightServer:
PyFlightServer(object server, PyFlightServerVtable vtable)
CStatus Init(CFlightServerOptions& options)
int port()
CStatus ServeWithSignals() except *
CStatus Shutdown()
CStatus Wait()
cdef cppclass PyServerAuthHandler\
" arrow::py::flight::PyServerAuthHandler"(CServerAuthHandler):
PyServerAuthHandler(object handler, PyServerAuthHandlerVtable vtable)
cdef cppclass PyClientAuthHandler\
" arrow::py::flight::PyClientAuthHandler"(CClientAuthHandler):
PyClientAuthHandler(object handler, PyClientAuthHandlerVtable vtable)
cdef cppclass CPyFlightResultStream\
" arrow::py::flight::PyFlightResultStream"(CResultStream):
CPyFlightResultStream(object generator,
function[cb_result_next] callback)
cdef cppclass CPyFlightDataStream\
" arrow::py::flight::PyFlightDataStream"(CFlightDataStream):
CPyFlightDataStream(object data_source,
unique_ptr[CFlightDataStream] stream)
cdef cppclass CPyGeneratorFlightDataStream\
" arrow::py::flight::PyGeneratorFlightDataStream"\
(CFlightDataStream):
CPyGeneratorFlightDataStream(object generator,
shared_ptr[CSchema] schema,
function[cb_data_stream_next] callback,
const CIpcWriteOptions& options)
cdef cppclass PyServerMiddlewareVtable\
" arrow::py::flight::PyServerMiddleware::Vtable":
PyServerMiddlewareVtable()
function[cb_middleware_sending_headers] sending_headers
function[cb_middleware_call_completed] call_completed
cdef cppclass PyClientMiddlewareVtable\
" arrow::py::flight::PyClientMiddleware::Vtable":
PyClientMiddlewareVtable()
function[cb_middleware_sending_headers] sending_headers
function[cb_client_middleware_received_headers] received_headers
function[cb_middleware_call_completed] call_completed
cdef cppclass CPyServerMiddleware\
" arrow::py::flight::PyServerMiddleware"(CServerMiddleware):
CPyServerMiddleware(object middleware, PyServerMiddlewareVtable vtable)
void* py_object()
cdef cppclass CPyServerMiddlewareFactory\
" arrow::py::flight::PyServerMiddlewareFactory"\
(CServerMiddlewareFactory):
CPyServerMiddlewareFactory(
object factory,
function[cb_server_middleware_start_call] start_call)
cdef cppclass CPyClientMiddleware\
" arrow::py::flight::PyClientMiddleware"(CClientMiddleware):
CPyClientMiddleware(object middleware, PyClientMiddlewareVtable vtable)
cdef cppclass CPyClientMiddlewareFactory\
" arrow::py::flight::PyClientMiddlewareFactory"\
(CClientMiddlewareFactory):
CPyClientMiddlewareFactory(
object factory,
function[cb_client_middleware_start_call] start_call)
cdef CStatus CreateFlightInfo" arrow::py::flight::CreateFlightInfo"(
shared_ptr[CSchema] schema,
CFlightDescriptor& descriptor,
vector[CFlightEndpoint] endpoints,
int64_t total_records,
int64_t total_bytes,
c_bool ordered,
const c_string& app_metadata,
unique_ptr[CFlightInfo]* out)
cdef CStatus CreateSchemaResult" arrow::py::flight::CreateSchemaResult"(
shared_ptr[CSchema] schema,
unique_ptr[CSchemaResult]* out)
cdef extern from "<variant>" namespace "std" nogil:
cdef cppclass CIntStringVariant" std::variant<int, std::string>":
CIntStringVariant()
CIntStringVariant(int)
CIntStringVariant(c_string)

View File

@@ -0,0 +1,364 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_python cimport CTimePoint
cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
ctypedef enum CFileType "arrow::fs::FileType":
CFileType_NotFound "arrow::fs::FileType::NotFound"
CFileType_Unknown "arrow::fs::FileType::Unknown"
CFileType_File "arrow::fs::FileType::File"
CFileType_Directory "arrow::fs::FileType::Directory"
cdef cppclass CFileInfo "arrow::fs::FileInfo":
CFileInfo()
CFileInfo(CFileInfo)
CFileInfo& operator=(CFileInfo)
CFileInfo(const CFileInfo&)
CFileInfo& operator=(const CFileInfo&)
CFileType type()
void set_type(CFileType type)
c_string path()
void set_path(const c_string& path)
c_string base_name()
int64_t size()
void set_size(int64_t size)
c_string extension()
CTimePoint mtime()
void set_mtime(CTimePoint mtime)
cdef cppclass CFileSelector "arrow::fs::FileSelector":
CFileSelector()
c_string base_dir
c_bool allow_not_found
c_bool recursive
cdef cppclass CFileLocator "arrow::fs::FileLocator":
shared_ptr[CFileSystem] filesystem
c_string path
cdef cppclass CFileSystem "arrow::fs::FileSystem":
shared_ptr[CFileSystem] shared_from_this()
c_string type_name() const
CResult[c_string] NormalizePath(c_string path)
CResult[c_string] MakeUri(c_string path)
CResult[CFileInfo] GetFileInfo(const c_string& path)
CResult[vector[CFileInfo]] GetFileInfo(
const vector[c_string]& paths)
CResult[vector[CFileInfo]] GetFileInfo(const CFileSelector& select)
CStatus CreateDir(const c_string& path, c_bool recursive)
CStatus DeleteDir(const c_string& path)
CStatus DeleteDirContents(const c_string& path, c_bool missing_dir_ok)
CStatus DeleteRootDirContents()
CStatus DeleteFile(const c_string& path)
CStatus DeleteFiles(const vector[c_string]& paths)
CStatus Move(const c_string& src, const c_string& dest)
CStatus CopyFile(const c_string& src, const c_string& dest)
CResult[shared_ptr[CInputStream]] OpenInputStream(
const c_string& path)
CResult[shared_ptr[CRandomAccessFile]] OpenInputFile(
const c_string& path)
CResult[shared_ptr[COutputStream]] OpenOutputStream(
const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
CResult[shared_ptr[COutputStream]] OpenAppendStream(
const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
c_bool Equals(const CFileSystem& other)
c_bool Equals(shared_ptr[CFileSystem] other)
CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \
"arrow::fs::FileSystemFromUri"(const c_string& uri)
CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \
"arrow::fs::FileSystemFromUri"(const c_string& uri, c_string* out_path)
CResult[shared_ptr[CFileSystem]] CFileSystemFromUriOrPath \
"arrow::fs::FileSystemFromUriOrPath"(const c_string& uri,
c_string* out_path)
cdef cppclass CFileSystemGlobalOptions \
"arrow::fs::FileSystemGlobalOptions":
c_string tls_ca_file_path
c_string tls_ca_dir_path
CStatus CFileSystemsInitialize "arrow::fs::Initialize" \
(const CFileSystemGlobalOptions& options)
cdef cppclass CSubTreeFileSystem \
"arrow::fs::SubTreeFileSystem"(CFileSystem):
CSubTreeFileSystem(const c_string& base_path,
shared_ptr[CFileSystem] base_fs)
c_string base_path()
shared_ptr[CFileSystem] base_fs()
ctypedef enum CS3LogLevel "arrow::fs::S3LogLevel":
CS3LogLevel_Off "arrow::fs::S3LogLevel::Off"
CS3LogLevel_Fatal "arrow::fs::S3LogLevel::Fatal"
CS3LogLevel_Error "arrow::fs::S3LogLevel::Error"
CS3LogLevel_Warn "arrow::fs::S3LogLevel::Warn"
CS3LogLevel_Info "arrow::fs::S3LogLevel::Info"
CS3LogLevel_Debug "arrow::fs::S3LogLevel::Debug"
CS3LogLevel_Trace "arrow::fs::S3LogLevel::Trace"
cdef struct CS3GlobalOptions "arrow::fs::S3GlobalOptions":
CS3LogLevel log_level
int num_event_loop_threads
cdef cppclass CS3ProxyOptions "arrow::fs::S3ProxyOptions":
c_string scheme
c_string host
int port
c_string username
c_string password
c_bool Equals(const CS3ProxyOptions& other)
@staticmethod
CResult[CS3ProxyOptions] FromUriString "FromUri"(
const c_string& uri_string)
ctypedef enum CS3CredentialsKind "arrow::fs::S3CredentialsKind":
CS3CredentialsKind_Anonymous "arrow::fs::S3CredentialsKind::Anonymous"
CS3CredentialsKind_Default "arrow::fs::S3CredentialsKind::Default"
CS3CredentialsKind_Explicit "arrow::fs::S3CredentialsKind::Explicit"
CS3CredentialsKind_Role "arrow::fs::S3CredentialsKind::Role"
CS3CredentialsKind_WebIdentity \
"arrow::fs::S3CredentialsKind::WebIdentity"
cdef cppclass CS3RetryStrategy "arrow::fs::S3RetryStrategy":
@staticmethod
shared_ptr[CS3RetryStrategy] GetAwsDefaultRetryStrategy(int64_t max_attempts)
@staticmethod
shared_ptr[CS3RetryStrategy] GetAwsStandardRetryStrategy(int64_t max_attempts)
cdef cppclass CS3Options "arrow::fs::S3Options":
c_string region
double connect_timeout
double request_timeout
c_string endpoint_override
c_string scheme
c_bool background_writes
c_bool allow_delayed_open
c_bool allow_bucket_creation
c_bool allow_bucket_deletion
c_bool check_directory_existence_before_creation
c_bool force_virtual_addressing
c_string tls_ca_file_path
shared_ptr[const CKeyValueMetadata] default_metadata
c_string role_arn
c_string session_name
c_string external_id
int load_frequency
CS3ProxyOptions proxy_options
CS3CredentialsKind credentials_kind
shared_ptr[CS3RetryStrategy] retry_strategy
void ConfigureDefaultCredentials()
void ConfigureAccessKey(const c_string& access_key,
const c_string& secret_key,
const c_string& session_token)
c_string GetAccessKey()
c_string GetSecretKey()
c_string GetSessionToken()
c_bool Equals(const CS3Options& other)
@staticmethod
CS3Options Defaults()
@staticmethod
CS3Options Anonymous()
@staticmethod
CS3Options FromAccessKey(const c_string& access_key,
const c_string& secret_key,
const c_string& session_token)
@staticmethod
CS3Options FromAssumeRole(const c_string& role_arn,
const c_string& session_name,
const c_string& external_id,
const int load_frequency)
cdef cppclass CS3FileSystem "arrow::fs::S3FileSystem"(CFileSystem):
@staticmethod
CResult[shared_ptr[CS3FileSystem]] Make(const CS3Options& options)
CS3Options options()
c_string region()
cdef CStatus CInitializeS3 "arrow::fs::InitializeS3"(
const CS3GlobalOptions& options)
cdef CStatus CEnsureS3Initialized "arrow::fs::EnsureS3Initialized"()
cdef CStatus CFinalizeS3 "arrow::fs::FinalizeS3"()
cdef CStatus CEnsureS3Finalized "arrow::fs::EnsureS3Finalized"()
cdef CResult[c_string] ResolveS3BucketRegion(const c_string& bucket)
cdef cppclass CGcsCredentials "arrow::fs::GcsCredentials":
c_bool anonymous()
CTimePoint expiration()
c_string access_token()
c_string target_service_account()
cdef cppclass CGcsOptions "arrow::fs::GcsOptions":
CGcsCredentials credentials
c_string endpoint_override
c_string scheme
c_string default_bucket_location
optional[c_string] project_id
optional[double] retry_limit_seconds
shared_ptr[const CKeyValueMetadata] default_metadata
c_bool Equals(const CS3Options& other)
@staticmethod
CGcsOptions Defaults()
@staticmethod
CGcsOptions Anonymous()
@staticmethod
CGcsOptions FromAccessToken(const c_string& access_token,
CTimePoint expiration)
@staticmethod
CGcsOptions FromImpersonatedServiceAccount(const CGcsCredentials& base_credentials,
c_string& target_service_account)
cdef cppclass CGcsFileSystem "arrow::fs::GcsFileSystem":
@staticmethod
CResult[shared_ptr[CGcsFileSystem]] Make(const CGcsOptions& options)
CGcsOptions options()
cdef cppclass CAzureOptions "arrow::fs::AzureOptions":
c_string account_name
c_string blob_storage_authority
c_string dfs_storage_authority
c_string blob_storage_scheme
c_string dfs_storage_scheme
c_bool Equals(const CAzureOptions& other)
CStatus ConfigureDefaultCredential()
CStatus ConfigureAccountKeyCredential(c_string account_key)
CStatus ConfigureSASCredential(c_string sas_token)
CStatus ConfigureManagedIdentityCredential(c_string client_id)
CStatus ConfigureClientSecretCredential(c_string tenant_id,
c_string client_id,
c_string client_secret)
cdef cppclass CAzureFileSystem "arrow::fs::AzureFileSystem":
@staticmethod
CResult[shared_ptr[CAzureFileSystem]] Make(const CAzureOptions& options)
CAzureOptions options()
cdef cppclass CHdfsOptions "arrow::fs::HdfsOptions":
HdfsConnectionConfig connection_config
int32_t buffer_size
int16_t replication
int64_t default_block_size
@staticmethod
CResult[CHdfsOptions] FromUriString "FromUri"(
const c_string& uri_string)
void ConfigureEndPoint(c_string host, int port)
void ConfigureDriver(c_bool use_hdfs3)
void ConfigureReplication(int16_t replication)
void ConfigureUser(c_string user_name)
void ConfigureBufferSize(int32_t buffer_size)
void ConfigureBlockSize(int64_t default_block_size)
void ConfigureKerberosTicketCachePath(c_string path)
void ConfigureExtraConf(c_string key, c_string value)
cdef cppclass CHadoopFileSystem "arrow::fs::HadoopFileSystem"(CFileSystem):
@staticmethod
CResult[shared_ptr[CHadoopFileSystem]] Make(
const CHdfsOptions& options)
CHdfsOptions options()
cdef cppclass CMockFileSystem "arrow::fs::internal::MockFileSystem"(
CFileSystem):
CMockFileSystem(CTimePoint current_time)
CStatus CCopyFiles "arrow::fs::CopyFiles"(
const vector[CFileLocator]& sources,
const vector[CFileLocator]& destinations,
const CIOContext& io_context,
int64_t chunk_size, c_bool use_threads)
CStatus CCopyFilesWithSelector "arrow::fs::CopyFiles"(
const shared_ptr[CFileSystem]& source_fs,
const CFileSelector& source_sel,
const shared_ptr[CFileSystem]& destination_fs,
const c_string& destination_base_dir,
const CIOContext& io_context,
int64_t chunk_size, c_bool use_threads)
# Callbacks for implementing Python filesystems
# Use typedef to emulate syntax for std::function<void(..)>
ctypedef void CallbackGetTypeName(object, c_string*)
ctypedef c_bool CallbackEquals(object, const CFileSystem&)
ctypedef void CallbackGetFileInfo(object, const c_string&, CFileInfo*)
ctypedef void CallbackGetFileInfoVector(object, const vector[c_string]&,
vector[CFileInfo]*)
ctypedef void CallbackGetFileInfoSelector(object, const CFileSelector&,
vector[CFileInfo]*)
ctypedef void CallbackCreateDir(object, const c_string&, c_bool)
ctypedef void CallbackDeleteDir(object, const c_string&)
ctypedef void CallbackDeleteDirContents(object, const c_string&, c_bool)
ctypedef void CallbackDeleteRootDirContents(object)
ctypedef void CallbackDeleteFile(object, const c_string&)
ctypedef void CallbackMove(object, const c_string&, const c_string&)
ctypedef void CallbackCopyFile(object, const c_string&, const c_string&)
ctypedef void CallbackOpenInputStream(object, const c_string&,
shared_ptr[CInputStream]*)
ctypedef void CallbackOpenInputFile(object, const c_string&,
shared_ptr[CRandomAccessFile]*)
ctypedef void CallbackOpenOutputStream(
object, const c_string&, const shared_ptr[const CKeyValueMetadata]&,
shared_ptr[COutputStream]*)
ctypedef void CallbackNormalizePath(object, const c_string&, c_string*)
cdef extern from "arrow/python/filesystem.h" namespace "arrow::py::fs" nogil:
cdef cppclass CPyFileSystemVtable "arrow::py::fs::PyFileSystemVtable":
PyFileSystemVtable()
function[CallbackGetTypeName] get_type_name
function[CallbackEquals] equals
function[CallbackGetFileInfo] get_file_info
function[CallbackGetFileInfoVector] get_file_info_vector
function[CallbackGetFileInfoSelector] get_file_info_selector
function[CallbackCreateDir] create_dir
function[CallbackDeleteDir] delete_dir
function[CallbackDeleteDirContents] delete_dir_contents
function[CallbackDeleteRootDirContents] delete_root_dir_contents
function[CallbackDeleteFile] delete_file
function[CallbackMove] move
function[CallbackCopyFile] copy_file
function[CallbackOpenInputStream] open_input_stream
function[CallbackOpenInputFile] open_input_file
function[CallbackOpenOutputStream] open_output_stream
function[CallbackOpenOutputStream] open_append_stream
function[CallbackNormalizePath] normalize_path
cdef cppclass CPyFileSystem "arrow::py::fs::PyFileSystem":
@staticmethod
shared_ptr[CPyFileSystem] Make(object handler,
CPyFileSystemVtable vtable)
PyObject* handler()

View File

@@ -0,0 +1,296 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
ctypedef CInvalidRowResult PyInvalidRowCallback(object,
const CCSVInvalidRow&)
cdef extern from "arrow/python/csv.h" namespace "arrow::py::csv":
function[CInvalidRowHandler] MakeInvalidRowHandler(
function[PyInvalidRowCallback], object handler)
cdef extern from "arrow/python/api.h" namespace "arrow::py":
# Requires GIL
CResult[shared_ptr[CDataType]] InferArrowType(
object obj, object mask, c_bool pandas_null_sentinels)
cdef extern from "arrow/python/api.h" namespace "arrow::py::internal":
object NewMonthDayNanoTupleType()
CResult[PyObject*] MonthDayNanoIntervalArrayToPyList(
const CMonthDayNanoIntervalArray& array)
CResult[PyObject*] MonthDayNanoIntervalScalarToPyObject(
const CMonthDayNanoIntervalScalar& scalar)
cdef extern from "arrow/python/arrow_to_pandas.h" namespace "arrow::py::MapConversionType":
cdef enum MapConversionType "arrow::py::MapConversionType":
DEFAULT,
LOSSY,
STRICT_
cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
shared_ptr[CDataType] GetPrimitiveType(Type type)
object PyFloat_FromHalf(uint16_t value)
cdef cppclass PyConversionOptions:
PyConversionOptions()
shared_ptr[CDataType] type
int64_t size
CMemoryPool* pool
c_bool from_pandas
c_bool ignore_timezone
c_bool strict
# TODO Some functions below are not actually "nogil"
CResult[shared_ptr[CChunkedArray]] ConvertPySequence(
object obj, object mask, const PyConversionOptions& options,
CMemoryPool* pool)
CResult[shared_ptr[CArray]] Arange(int64_t start, int64_t stop,
int64_t step, CMemoryPool* pool)
CResult[shared_ptr[CDataType]] NumPyDtypeToArrow(object dtype)
CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
c_bool from_pandas,
const shared_ptr[CDataType]& type,
shared_ptr[CChunkedArray]* out)
CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
c_bool from_pandas,
const shared_ptr[CDataType]& type,
const CCastOptions& cast_options,
shared_ptr[CChunkedArray]* out)
CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
const vector[c_string]& dim_names,
shared_ptr[CTensor]* out)
CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base,
PyObject** out)
CStatus SparseCOOTensorToNdarray(
const shared_ptr[CSparseCOOTensor]& sparse_tensor, object base,
PyObject** out_data, PyObject** out_coords)
CStatus SparseCSRMatrixToNdarray(
const shared_ptr[CSparseCSRMatrix]& sparse_tensor, object base,
PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
CStatus SparseCSCMatrixToNdarray(
const shared_ptr[CSparseCSCMatrix]& sparse_tensor, object base,
PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
CStatus SparseCSFTensorToNdarray(
const shared_ptr[CSparseCSFTensor]& sparse_tensor, object base,
PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
CStatus NdarraysToSparseCOOTensor(CMemoryPool* pool, object data_ao,
object coords_ao,
const vector[int64_t]& shape,
const vector[c_string]& dim_names,
shared_ptr[CSparseCOOTensor]* out)
CStatus NdarraysToSparseCSRMatrix(CMemoryPool* pool, object data_ao,
object indptr_ao, object indices_ao,
const vector[int64_t]& shape,
const vector[c_string]& dim_names,
shared_ptr[CSparseCSRMatrix]* out)
CStatus NdarraysToSparseCSCMatrix(CMemoryPool* pool, object data_ao,
object indptr_ao, object indices_ao,
const vector[int64_t]& shape,
const vector[c_string]& dim_names,
shared_ptr[CSparseCSCMatrix]* out)
CStatus NdarraysToSparseCSFTensor(CMemoryPool* pool, object data_ao,
object indptr_ao, object indices_ao,
const vector[int64_t]& shape,
const vector[int64_t]& axis_order,
const vector[c_string]& dim_names,
shared_ptr[CSparseCSFTensor]* out)
CStatus TensorToSparseCOOTensor(shared_ptr[CTensor],
shared_ptr[CSparseCOOTensor]* out)
CStatus TensorToSparseCSRMatrix(shared_ptr[CTensor],
shared_ptr[CSparseCSRMatrix]* out)
CStatus TensorToSparseCSCMatrix(shared_ptr[CTensor],
shared_ptr[CSparseCSCMatrix]* out)
CStatus TensorToSparseCSFTensor(shared_ptr[CTensor],
shared_ptr[CSparseCSFTensor]* out)
CStatus ConvertArrayToPandas(const PandasOptions& options,
shared_ptr[CArray] arr,
object py_ref, PyObject** out)
CStatus ConvertChunkedArrayToPandas(const PandasOptions& options,
shared_ptr[CChunkedArray] arr,
object py_ref, PyObject** out)
CStatus ConvertTableToPandas(const PandasOptions& options,
shared_ptr[CTable] table,
PyObject** out)
void c_set_default_memory_pool \
" arrow::py::set_default_memory_pool"(CMemoryPool* pool)\
CMemoryPool* c_get_memory_pool \
" arrow::py::get_memory_pool"()
cdef cppclass PyBuffer(CBuffer):
@staticmethod
CResult[shared_ptr[CBuffer]] FromPyObject(object obj)
cdef cppclass PyForeignBuffer(CBuffer):
@staticmethod
CStatus Make(const uint8_t* data, int64_t size, object base,
shared_ptr[CBuffer]* out)
cdef cppclass PyReadableFile(CRandomAccessFile):
PyReadableFile(object fo)
cdef cppclass PyOutputStream(COutputStream):
PyOutputStream(object fo)
cdef cppclass PandasOptions:
CMemoryPool* pool
c_bool strings_to_categorical
c_bool zero_copy_only
c_bool integer_object_nulls
c_bool date_as_object
c_bool timestamp_as_object
c_bool use_threads
c_bool coerce_temporal_nanoseconds
c_bool ignore_timezone
c_bool deduplicate_objects
c_bool safe_cast
c_bool split_blocks
c_bool self_destruct
MapConversionType maps_as_pydicts
c_bool decode_dictionaries
unordered_set[c_string] categorical_columns
unordered_set[c_string] extension_columns
c_bool to_numpy
cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
cdef cppclass CTimePoint "arrow::py::internal::TimePoint":
pass
CTimePoint PyDateTime_to_TimePoint(PyDateTime_DateTime* pydatetime)
int64_t TimePoint_to_ns(CTimePoint val)
CTimePoint TimePoint_from_s(double val)
CTimePoint TimePoint_from_ns(int64_t val)
CResult[c_string] TzinfoToString(PyObject* pytzinfo)
CResult[PyObject*] StringToTzinfo(c_string)
cdef extern from "arrow/python/numpy_init.h" namespace "arrow::py":
int arrow_init_numpy() except -1
cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
int import_pyarrow() except -1
cdef extern from "arrow/python/common.h" namespace "arrow::py":
c_bool IsPyError(const CStatus& status)
void RestorePyError(const CStatus& status) except *
cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
cdef cppclass SharedPtrNoGIL[T](shared_ptr[T]):
# This looks like the only way to satisfy both Cython 2 and Cython 3
SharedPtrNoGIL& operator=(...)
cdef cppclass UniquePtrNoGIL[T, DELETER=*](unique_ptr[T, DELETER]):
UniquePtrNoGIL& operator=(...)
cdef extern from "arrow/python/inference.h" namespace "arrow::py":
c_bool IsPyBool(object o)
c_bool IsPyInt(object o)
c_bool IsPyFloat(object o)
cdef extern from "arrow/python/ipc.h" namespace "arrow::py":
cdef cppclass CPyRecordBatchReader" arrow::py::PyRecordBatchReader" \
(CRecordBatchReader):
@staticmethod
CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CSchema],
object)
cdef extern from "arrow/python/ipc.h" namespace "arrow::py" nogil:
cdef cppclass CCastingRecordBatchReader" arrow::py::CastingRecordBatchReader" \
(CRecordBatchReader):
@staticmethod
CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CRecordBatchReader],
shared_ptr[CSchema])
cdef extern from "arrow/python/extension_type.h" namespace "arrow::py":
cdef cppclass CPyExtensionType \
" arrow::py::PyExtensionType"(CExtensionType):
@staticmethod
CStatus FromClass(const shared_ptr[CDataType] storage_type,
const c_string extension_name, object typ,
shared_ptr[CExtensionType]* out)
@staticmethod
CStatus FromInstance(shared_ptr[CDataType] storage_type,
object inst, shared_ptr[CExtensionType]* out)
object GetInstance()
CStatus SetInstance(object)
c_string PyExtensionName()
CStatus RegisterPyExtensionType(shared_ptr[CDataType])
CStatus UnregisterPyExtensionType(c_string type_name)
cdef extern from "arrow/python/benchmark.h" namespace "arrow::py::benchmark":
void Benchmark_PandasObjectIsNull(object lst) except *
cdef extern from "arrow/python/gdb.h" namespace "arrow::gdb" nogil:
void GdbTestSession "arrow::gdb::TestSession"()
cdef extern from "arrow/python/helpers.h" namespace "arrow::py::internal":
c_bool IsThreadingEnabled()
cdef extern from "arrow/python/config.h" namespace "arrow::py":
cdef cppclass CBuildInfo "arrow::py::BuildInfo":
c_string build_type
const CBuildInfo& GetBuildInfo "arrow::py::GetBuildInfo"()

View File

@@ -0,0 +1,100 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from libcpp.vector cimport vector as std_vector
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_acero cimport *
ctypedef CResult[CDeclaration] CNamedTableProvider(const std_vector[c_string]&, const CSchema&)
cdef extern from "arrow/engine/substrait/options.h" namespace "arrow::engine" nogil:
cdef enum ConversionStrictness \
"arrow::engine::ConversionStrictness":
EXACT_ROUNDTRIP \
"arrow::engine::ConversionStrictness::EXACT_ROUNDTRIP"
PRESERVE_STRUCTURE \
"arrow::engine::ConversionStrictness::PRESERVE_STRUCTURE"
BEST_EFFORT \
"arrow::engine::ConversionStrictness::BEST_EFFORT"
cdef cppclass CConversionOptions \
"arrow::engine::ConversionOptions":
CConversionOptions()
ConversionStrictness strictness
function[CNamedTableProvider] named_table_provider
c_bool allow_arrow_extensions
cdef extern from "arrow/engine/substrait/extension_set.h" \
namespace "arrow::engine" nogil:
cdef struct CSubstraitId "arrow::engine::Id":
cpp_string_view uri
cpp_string_view name
cdef struct CExtensionSetTypeRecord "arrow::engine::ExtensionSet::TypeRecord":
CSubstraitId id
shared_ptr[CDataType] type
cdef cppclass CExtensionSet "arrow::engine::ExtensionSet":
CExtensionSet()
unordered_map[uint32_t, cpp_string_view]& uris()
CResult[uint32_t] EncodeType(const CDataType&)
CResult[CExtensionSetTypeRecord] DecodeType(uint32_t)
cdef cppclass ExtensionIdRegistry:
std_vector[c_string] GetSupportedSubstraitFunctions()
ExtensionIdRegistry* default_extension_id_registry()
cdef extern from "arrow/engine/substrait/relation.h" namespace "arrow::engine" nogil:
cdef cppclass CNamedExpression "arrow::engine::NamedExpression":
CExpression expression
c_string name
cdef cppclass CBoundExpressions "arrow::engine::BoundExpressions":
std_vector[CNamedExpression] named_expressions
shared_ptr[CSchema] schema
cdef extern from "arrow/engine/substrait/serde.h" namespace "arrow::engine" nogil:
CResult[shared_ptr[CBuffer]] SerializeExpressions(
const CBoundExpressions& bound_expressions, const CConversionOptions& conversion_options)
CResult[CBoundExpressions] DeserializeExpressions(
const CBuffer& serialized_expressions)
CResult[shared_ptr[CBuffer]] SerializeSchema(
const CSchema &schema, CExtensionSet* extension_set,
const CConversionOptions& conversion_options)
CResult[shared_ptr[CSchema]] DeserializeSchema(
const CBuffer& serialized_schema, const CExtensionSet& extension_set,
const CConversionOptions& conversion_options)
cdef extern from "arrow/engine/substrait/util.h" namespace "arrow::engine" nogil:
CResult[shared_ptr[CRecordBatchReader]] ExecuteSerializedPlan(
const CBuffer& substrait_buffer, const ExtensionIdRegistry* registry,
CFunctionRegistry* func_registry, const CConversionOptions& conversion_options,
c_bool use_threads)
CResult[shared_ptr[CBuffer]] SerializeJsonPlan(const c_string& substrait_json)

View File

@@ -0,0 +1,298 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from libcpp.string cimport string as c_string
from libcpp.unordered_set cimport unordered_set as c_unordered_set
from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
cdef extern from "gandiva/node.h" namespace "gandiva" nogil:
cdef cppclass CNode" gandiva::Node":
c_string ToString()
shared_ptr[CDataType] return_type()
cdef cppclass CGandivaExpression" gandiva::Expression":
c_string ToString()
shared_ptr[CNode] root()
shared_ptr[CField] result()
ctypedef vector[shared_ptr[CNode]] CNodeVector" gandiva::NodeVector"
ctypedef vector[shared_ptr[CGandivaExpression]] \
CExpressionVector" gandiva::ExpressionVector"
cdef extern from "gandiva/selection_vector.h" namespace "gandiva" nogil:
cdef cppclass CSelectionVector" gandiva::SelectionVector":
shared_ptr[CArray] ToArray()
enum CSelectionVector_Mode" gandiva::SelectionVector::Mode":
CSelectionVector_Mode_NONE" gandiva::SelectionVector::Mode::MODE_NONE"
CSelectionVector_Mode_UINT16" \
gandiva::SelectionVector::Mode::MODE_UINT16"
CSelectionVector_Mode_UINT32" \
gandiva::SelectionVector::Mode::MODE_UINT32"
CSelectionVector_Mode_UINT64" \
gandiva::SelectionVector::Mode::MODE_UINT64"
cdef CStatus SelectionVector_MakeInt16\
"gandiva::SelectionVector::MakeInt16"(
int64_t max_slots, CMemoryPool* pool,
shared_ptr[CSelectionVector]* selection_vector)
cdef CStatus SelectionVector_MakeInt32\
"gandiva::SelectionVector::MakeInt32"(
int64_t max_slots, CMemoryPool* pool,
shared_ptr[CSelectionVector]* selection_vector)
cdef CStatus SelectionVector_MakeInt64\
"gandiva::SelectionVector::MakeInt64"(
int64_t max_slots, CMemoryPool* pool,
shared_ptr[CSelectionVector]* selection_vector)
cdef inline CSelectionVector_Mode _ensure_selection_mode(str name) except *:
uppercase = name.upper()
if uppercase == 'NONE':
return CSelectionVector_Mode_NONE
elif uppercase == 'UINT16':
return CSelectionVector_Mode_UINT16
elif uppercase == 'UINT32':
return CSelectionVector_Mode_UINT32
elif uppercase == 'UINT64':
return CSelectionVector_Mode_UINT64
else:
raise ValueError(f'Invalid value for Selection Mode: {name!r}')
cdef inline str _selection_mode_name(CSelectionVector_Mode ctype):
if ctype == CSelectionVector_Mode_NONE:
return 'NONE'
elif ctype == CSelectionVector_Mode_UINT16:
return 'UINT16'
elif ctype == CSelectionVector_Mode_UINT32:
return 'UINT32'
elif ctype == CSelectionVector_Mode_UINT64:
return 'UINT64'
else:
raise RuntimeError('Unexpected CSelectionVector_Mode value')
cdef extern from "gandiva/condition.h" namespace "gandiva" nogil:
cdef cppclass CCondition" gandiva::Condition":
c_string ToString()
shared_ptr[CNode] root()
shared_ptr[CField] result()
cdef extern from "gandiva/arrow.h" namespace "gandiva" nogil:
ctypedef vector[shared_ptr[CArray]] CArrayVector" gandiva::ArrayVector"
cdef extern from "gandiva/tree_expr_builder.h" namespace "gandiva" nogil:
cdef shared_ptr[CNode] TreeExprBuilder_MakeBoolLiteral \
"gandiva::TreeExprBuilder::MakeLiteral"(c_bool value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt8Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(uint8_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt16Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(uint16_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt32Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(uint32_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt64Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(uint64_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt8Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(int8_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt16Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(int16_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt32Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(int32_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt64Literal \
"gandiva::TreeExprBuilder::MakeLiteral"(int64_t value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeFloatLiteral \
"gandiva::TreeExprBuilder::MakeLiteral"(float value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeDoubleLiteral \
"gandiva::TreeExprBuilder::MakeLiteral"(double value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeStringLiteral \
"gandiva::TreeExprBuilder::MakeStringLiteral"(const c_string& value)
cdef shared_ptr[CNode] TreeExprBuilder_MakeBinaryLiteral \
"gandiva::TreeExprBuilder::MakeBinaryLiteral"(const c_string& value)
cdef shared_ptr[CGandivaExpression] TreeExprBuilder_MakeExpression\
"gandiva::TreeExprBuilder::MakeExpression"(
shared_ptr[CNode] root_node, shared_ptr[CField] result_field)
cdef shared_ptr[CNode] TreeExprBuilder_MakeFunction \
"gandiva::TreeExprBuilder::MakeFunction"(
const c_string& name, const CNodeVector& children,
shared_ptr[CDataType] return_type)
cdef shared_ptr[CNode] TreeExprBuilder_MakeField \
"gandiva::TreeExprBuilder::MakeField"(shared_ptr[CField] field)
cdef shared_ptr[CNode] TreeExprBuilder_MakeIf \
"gandiva::TreeExprBuilder::MakeIf"(
shared_ptr[CNode] condition, shared_ptr[CNode] this_node,
shared_ptr[CNode] else_node, shared_ptr[CDataType] return_type)
cdef shared_ptr[CNode] TreeExprBuilder_MakeAnd \
"gandiva::TreeExprBuilder::MakeAnd"(const CNodeVector& children)
cdef shared_ptr[CNode] TreeExprBuilder_MakeOr \
"gandiva::TreeExprBuilder::MakeOr"(const CNodeVector& children)
cdef shared_ptr[CCondition] TreeExprBuilder_MakeCondition \
"gandiva::TreeExprBuilder::MakeCondition"(
shared_ptr[CNode] condition)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt32 \
"gandiva::TreeExprBuilder::MakeInExpressionInt32"(
shared_ptr[CNode] node, const c_unordered_set[int32_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt64 \
"gandiva::TreeExprBuilder::MakeInExpressionInt64"(
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime32 \
"gandiva::TreeExprBuilder::MakeInExpressionTime32"(
shared_ptr[CNode] node, const c_unordered_set[int32_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime64 \
"gandiva::TreeExprBuilder::MakeInExpressionTime64"(
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate32 \
"gandiva::TreeExprBuilder::MakeInExpressionDate32"(
shared_ptr[CNode] node, const c_unordered_set[int32_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate64 \
"gandiva::TreeExprBuilder::MakeInExpressionDate64"(
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTimeStamp \
"gandiva::TreeExprBuilder::MakeInExpressionTimeStamp"(
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionString \
"gandiva::TreeExprBuilder::MakeInExpressionString"(
shared_ptr[CNode] node, const c_unordered_set[c_string]& values)
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionBinary \
"gandiva::TreeExprBuilder::MakeInExpressionBinary"(
shared_ptr[CNode] node, const c_unordered_set[c_string]& values)
cdef extern from "gandiva/projector.h" namespace "gandiva" nogil:
cdef cppclass CProjector" gandiva::Projector":
CStatus Evaluate(
const CRecordBatch& batch, CMemoryPool* pool,
const CArrayVector* output)
CStatus Evaluate(
const CRecordBatch& batch,
const CSelectionVector* selection,
CMemoryPool* pool,
const CArrayVector* output)
c_string DumpIR()
cdef CStatus Projector_Make \
"gandiva::Projector::Make"(
shared_ptr[CSchema] schema, const CExpressionVector& children,
shared_ptr[CProjector]* projector)
cdef CStatus Projector_Make \
"gandiva::Projector::Make"(
shared_ptr[CSchema] schema, const CExpressionVector& children,
CSelectionVector_Mode mode,
shared_ptr[CConfiguration] configuration,
shared_ptr[CProjector]* projector)
cdef extern from "gandiva/filter.h" namespace "gandiva" nogil:
cdef cppclass CFilter" gandiva::Filter":
CStatus Evaluate(
const CRecordBatch& batch,
shared_ptr[CSelectionVector] out_selection)
c_string DumpIR()
cdef CStatus Filter_Make \
"gandiva::Filter::Make"(
shared_ptr[CSchema] schema, shared_ptr[CCondition] condition,
shared_ptr[CConfiguration] configuration,
shared_ptr[CFilter]* filter)
cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil:
cdef cppclass CFunctionSignature" gandiva::FunctionSignature":
CFunctionSignature(const c_string& base_name,
vector[shared_ptr[CDataType]] param_types,
shared_ptr[CDataType] ret_type)
shared_ptr[CDataType] ret_type() const
const c_string& base_name() const
vector[shared_ptr[CDataType]] param_types() const
c_string ToString() const
cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil:
cdef vector[shared_ptr[CFunctionSignature]] \
GetRegisteredFunctionSignatures()
cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil:
cdef cppclass CConfiguration" gandiva::Configuration":
CConfiguration()
CConfiguration(bint optimize, bint dump_ir)
void set_optimize(bint optimize)
void set_dump_ir(bint dump_ir)
cdef cppclass CConfigurationBuilder \
" gandiva::ConfigurationBuilder":
@staticmethod
shared_ptr[CConfiguration] DefaultConfiguration()
CConfigurationBuilder()
shared_ptr[CConfiguration] build()

View File

@@ -0,0 +1,632 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport (Type, CChunkedArray, CScalar, CSchema,
CStatus, CTable, CMemoryPool, CBuffer,
CKeyValueMetadata, CRandomAccessFile,
COutputStream, CCacheOptions,
TimeUnit, CRecordBatchReader)
cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil:
cdef cppclass Node:
pass
cdef cppclass GroupNode(Node):
pass
cdef cppclass PrimitiveNode(Node):
pass
cdef cppclass ColumnPath:
c_string ToDotString()
vector[c_string] ToDotVector()
cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
enum ParquetType" parquet::Type::type":
ParquetType_BOOLEAN" parquet::Type::BOOLEAN"
ParquetType_INT32" parquet::Type::INT32"
ParquetType_INT64" parquet::Type::INT64"
ParquetType_INT96" parquet::Type::INT96"
ParquetType_FLOAT" parquet::Type::FLOAT"
ParquetType_DOUBLE" parquet::Type::DOUBLE"
ParquetType_BYTE_ARRAY" parquet::Type::BYTE_ARRAY"
ParquetType_FIXED_LEN_BYTE_ARRAY" parquet::Type::FIXED_LEN_BYTE_ARRAY"
enum ParquetLogicalTypeId" parquet::LogicalType::Type::type":
ParquetLogicalType_UNDEFINED" parquet::LogicalType::Type::UNDEFINED"
ParquetLogicalType_STRING" parquet::LogicalType::Type::STRING"
ParquetLogicalType_MAP" parquet::LogicalType::Type::MAP"
ParquetLogicalType_LIST" parquet::LogicalType::Type::LIST"
ParquetLogicalType_ENUM" parquet::LogicalType::Type::ENUM"
ParquetLogicalType_DECIMAL" parquet::LogicalType::Type::DECIMAL"
ParquetLogicalType_DATE" parquet::LogicalType::Type::DATE"
ParquetLogicalType_TIME" parquet::LogicalType::Type::TIME"
ParquetLogicalType_TIMESTAMP" parquet::LogicalType::Type::TIMESTAMP"
ParquetLogicalType_INT" parquet::LogicalType::Type::INT"
ParquetLogicalType_FLOAT16" parquet::LogicalType::Type::FLOAT16"
ParquetLogicalType_JSON" parquet::LogicalType::Type::JSON"
ParquetLogicalType_BSON" parquet::LogicalType::Type::BSON"
ParquetLogicalType_UUID" parquet::LogicalType::Type::UUID"
ParquetLogicalType_GEOMETRY" parquet::LogicalType::Type::GEOMETRY"
ParquetLogicalType_GEOGRAPHY" parquet::LogicalType::Type::GEOGRAPHY"
ParquetLogicalType_NONE" parquet::LogicalType::Type::NONE"
enum ParquetTimeUnit" parquet::LogicalType::TimeUnit::unit":
ParquetTimeUnit_UNKNOWN" parquet::LogicalType::TimeUnit::UNKNOWN"
ParquetTimeUnit_MILLIS" parquet::LogicalType::TimeUnit::MILLIS"
ParquetTimeUnit_MICROS" parquet::LogicalType::TimeUnit::MICROS"
ParquetTimeUnit_NANOS" parquet::LogicalType::TimeUnit::NANOS"
enum ParquetEdgeInterpolationAlgorithm" parquet::LogicalType::EdgeInterpolationAlgorithm":
ParquetEdgeInterpolationAlgorithm_UNKNOWN" parquet::LogicalType::EdgeInterpolationAlgorithm::UNKNOWN"
ParquetEdgeInterpolationAlgorithm_SPHERICAL" parquet::LogicalType::EdgeInterpolationAlgorithm::SPHERICAL"
ParquetEdgeInterpolationAlgorithm_VINCENTY" parquet::LogicalType::EdgeInterpolationAlgorithm::VINCENTY"
ParquetEdgeInterpolationAlgorithm_THOMAS" parquet::LogicalType::EdgeInterpolationAlgorithm::THOMAS"
ParquetEdgeInterpolationAlgorithm_ANDOYER" parquet::LogicalType::EdgeInterpolationAlgorithm::ANDOYER"
ParquetEdgeInterpolationAlgorithm_KARNEY" parquet::LogicalType::EdgeInterpolationAlgorithm::KARNEY"
enum ParquetConvertedType" parquet::ConvertedType::type":
ParquetConvertedType_NONE" parquet::ConvertedType::NONE"
ParquetConvertedType_UTF8" parquet::ConvertedType::UTF8"
ParquetConvertedType_MAP" parquet::ConvertedType::MAP"
ParquetConvertedType_MAP_KEY_VALUE \
" parquet::ConvertedType::MAP_KEY_VALUE"
ParquetConvertedType_LIST" parquet::ConvertedType::LIST"
ParquetConvertedType_ENUM" parquet::ConvertedType::ENUM"
ParquetConvertedType_DECIMAL" parquet::ConvertedType::DECIMAL"
ParquetConvertedType_DATE" parquet::ConvertedType::DATE"
ParquetConvertedType_TIME_MILLIS" parquet::ConvertedType::TIME_MILLIS"
ParquetConvertedType_TIME_MICROS" parquet::ConvertedType::TIME_MICROS"
ParquetConvertedType_TIMESTAMP_MILLIS \
" parquet::ConvertedType::TIMESTAMP_MILLIS"
ParquetConvertedType_TIMESTAMP_MICROS \
" parquet::ConvertedType::TIMESTAMP_MICROS"
ParquetConvertedType_UINT_8" parquet::ConvertedType::UINT_8"
ParquetConvertedType_UINT_16" parquet::ConvertedType::UINT_16"
ParquetConvertedType_UINT_32" parquet::ConvertedType::UINT_32"
ParquetConvertedType_UINT_64" parquet::ConvertedType::UINT_64"
ParquetConvertedType_INT_8" parquet::ConvertedType::INT_8"
ParquetConvertedType_INT_16" parquet::ConvertedType::INT_16"
ParquetConvertedType_INT_32" parquet::ConvertedType::INT_32"
ParquetConvertedType_INT_64" parquet::ConvertedType::INT_64"
ParquetConvertedType_JSON" parquet::ConvertedType::JSON"
ParquetConvertedType_BSON" parquet::ConvertedType::BSON"
ParquetConvertedType_INTERVAL" parquet::ConvertedType::INTERVAL"
enum ParquetRepetition" parquet::Repetition::type":
ParquetRepetition_REQUIRED" parquet::REPETITION::REQUIRED"
ParquetRepetition_OPTIONAL" parquet::REPETITION::OPTIONAL"
ParquetRepetition_REPEATED" parquet::REPETITION::REPEATED"
enum ParquetEncoding" parquet::Encoding::type":
ParquetEncoding_PLAIN" parquet::Encoding::PLAIN"
ParquetEncoding_PLAIN_DICTIONARY" parquet::Encoding::PLAIN_DICTIONARY"
ParquetEncoding_RLE" parquet::Encoding::RLE"
ParquetEncoding_BIT_PACKED" parquet::Encoding::BIT_PACKED"
ParquetEncoding_DELTA_BINARY_PACKED \
" parquet::Encoding::DELTA_BINARY_PACKED"
ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY \
" parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY"
ParquetEncoding_DELTA_BYTE_ARRAY" parquet::Encoding::DELTA_BYTE_ARRAY"
ParquetEncoding_RLE_DICTIONARY" parquet::Encoding::RLE_DICTIONARY"
ParquetEncoding_BYTE_STREAM_SPLIT \
" parquet::Encoding::BYTE_STREAM_SPLIT"
enum ParquetCompression" parquet::Compression::type":
ParquetCompression_UNCOMPRESSED" parquet::Compression::UNCOMPRESSED"
ParquetCompression_SNAPPY" parquet::Compression::SNAPPY"
ParquetCompression_GZIP" parquet::Compression::GZIP"
ParquetCompression_LZO" parquet::Compression::LZO"
ParquetCompression_BROTLI" parquet::Compression::BROTLI"
ParquetCompression_LZ4" parquet::Compression::LZ4"
ParquetCompression_ZSTD" parquet::Compression::ZSTD"
enum ParquetVersion" parquet::ParquetVersion::type":
ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"
ParquetVersion_V2_4" parquet::ParquetVersion::PARQUET_2_4"
ParquetVersion_V2_6" parquet::ParquetVersion::PARQUET_2_6"
enum ParquetSortOrder" parquet::SortOrder::type":
ParquetSortOrder_SIGNED" parquet::SortOrder::SIGNED"
ParquetSortOrder_UNSIGNED" parquet::SortOrder::UNSIGNED"
ParquetSortOrder_UNKNOWN" parquet::SortOrder::UNKNOWN"
cdef cppclass CParquetLogicalType" parquet::LogicalType":
c_string ToString() const
c_string ToJSON() const
ParquetLogicalTypeId type() const
cdef cppclass CParquetDecimalType \
" parquet::DecimalLogicalType"(CParquetLogicalType):
int32_t precision() const
int32_t scale() const
cdef cppclass CParquetIntType \
" parquet::IntLogicalType"(CParquetLogicalType):
int bit_width() const
c_bool is_signed() const
cdef cppclass CParquetTimeType \
" parquet::TimeLogicalType"(CParquetLogicalType):
c_bool is_adjusted_to_utc() const
ParquetTimeUnit time_unit() const
cdef cppclass CParquetTimestampType \
" parquet::TimestampLogicalType"(CParquetLogicalType):
c_bool is_adjusted_to_utc() const
ParquetTimeUnit time_unit() const
cdef cppclass CParquetGeometryType \
" parquet::GeometryLogicalType"(CParquetLogicalType):
c_string crs() const
cdef cppclass CParquetGeographyType \
" parquet::GeographyLogicalType"(CParquetLogicalType):
c_string crs() const
ParquetEdgeInterpolationAlgorithm algorithm() const
cdef cppclass ColumnDescriptor" parquet::ColumnDescriptor":
c_bool Equals(const ColumnDescriptor& other)
shared_ptr[ColumnPath] path()
int16_t max_definition_level()
int16_t max_repetition_level()
ParquetType physical_type()
const shared_ptr[const CParquetLogicalType]& logical_type()
ParquetConvertedType converted_type()
const c_string& name()
int type_length()
int type_precision()
int type_scale()
cdef cppclass SchemaDescriptor:
const ColumnDescriptor* Column(int i)
shared_ptr[Node] schema()
GroupNode* group()
c_bool Equals(const SchemaDescriptor& other)
c_string ToString()
int num_columns()
cdef c_string FormatStatValue(ParquetType parquet_type, c_string val)
enum ParquetCipher" parquet::ParquetCipher::type":
ParquetCipher_AES_GCM_V1" parquet::ParquetCipher::AES_GCM_V1"
ParquetCipher_AES_GCM_CTR_V1" parquet::ParquetCipher::AES_GCM_CTR_V1"
struct AadMetadata:
c_string aad_prefix
c_string aad_file_unique
c_bool supply_aad_prefix
struct EncryptionAlgorithm:
ParquetCipher algorithm
AadMetadata aad
# Specific array<> types needed for GeoStatistics
cdef extern from "<array>" namespace "std" nogil:
cdef cppclass double_array4 "std::array<double, 4>":
double_array4() except +
double& operator[](size_t)
cdef cppclass bool_array4 "std::array<bool, 4>":
bool_array4() except +
c_bool& operator[](size_t)
cdef extern from "parquet/geospatial/statistics.h" namespace "parquet" nogil:
cdef cppclass CParquetGeoStatistics" parquet::geospatial::GeoStatistics":
c_bool is_valid() const
double_array4 lower_bound() const
double_array4 upper_bound() const
bool_array4 dimension_valid() const
bool_array4 dimension_empty() const
optional[vector[int32_t]] geometry_types() const
c_string ToString() const
cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
cdef cppclass ColumnReader:
pass
cdef cppclass BoolReader(ColumnReader):
pass
cdef cppclass Int32Reader(ColumnReader):
pass
cdef cppclass Int64Reader(ColumnReader):
pass
cdef cppclass Int96Reader(ColumnReader):
pass
cdef cppclass FloatReader(ColumnReader):
pass
cdef cppclass DoubleReader(ColumnReader):
pass
cdef cppclass ByteArrayReader(ColumnReader):
pass
cdef cppclass RowGroupReader:
pass
cdef cppclass CEncodedStatistics" parquet::EncodedStatistics":
const c_string& max() const
const c_string& min() const
int64_t null_count
int64_t distinct_count
bint has_min
bint has_max
bint has_null_count
bint has_distinct_count
cdef cppclass ParquetByteArray" parquet::ByteArray":
uint32_t len
const uint8_t* ptr
cdef cppclass ParquetFLBA" parquet::FLBA":
const uint8_t* ptr
cdef cppclass CStatistics" parquet::Statistics":
int64_t null_count() const
int64_t distinct_count() const
int64_t num_values() const
bint HasMinMax()
bint HasNullCount()
bint HasDistinctCount()
c_bool Equals(const CStatistics&) const
void Reset()
c_string EncodeMin()
c_string EncodeMax()
CEncodedStatistics Encode()
void SetComparator()
ParquetType physical_type() const
const ColumnDescriptor* descr() const
cdef cppclass CBoolStatistics" parquet::BoolStatistics"(CStatistics):
c_bool min()
c_bool max()
cdef cppclass CInt32Statistics" parquet::Int32Statistics"(CStatistics):
int32_t min()
int32_t max()
cdef cppclass CInt64Statistics" parquet::Int64Statistics"(CStatistics):
int64_t min()
int64_t max()
cdef cppclass CFloatStatistics" parquet::FloatStatistics"(CStatistics):
float min()
float max()
cdef cppclass CDoubleStatistics" parquet::DoubleStatistics"(CStatistics):
double min()
double max()
cdef cppclass CByteArrayStatistics \
" parquet::ByteArrayStatistics"(CStatistics):
ParquetByteArray min()
ParquetByteArray max()
cdef cppclass CFLBAStatistics" parquet::FLBAStatistics"(CStatistics):
ParquetFLBA min()
ParquetFLBA max()
cdef cppclass CColumnCryptoMetaData" parquet::ColumnCryptoMetaData":
shared_ptr[ColumnPath] path_in_schema() const
c_bool encrypted_with_footer_key() const
const c_string& key_metadata() const
cdef cppclass ParquetIndexLocation" parquet::IndexLocation":
int64_t offset
int32_t length
cdef cppclass CColumnChunkMetaData" parquet::ColumnChunkMetaData":
int64_t file_offset() const
const c_string& file_path() const
c_bool is_metadata_set() const
ParquetType type() const
int64_t num_values() const
shared_ptr[ColumnPath] path_in_schema() const
bint is_stats_set() const
shared_ptr[CStatistics] statistics() const
c_bool is_geo_stats_set() const
shared_ptr[CParquetGeoStatistics] geo_statistics() const
ParquetCompression compression() const
const vector[ParquetEncoding]& encodings() const
c_bool Equals(const CColumnChunkMetaData&) const
int64_t has_dictionary_page() const
int64_t dictionary_page_offset() const
int64_t data_page_offset() const
int64_t index_page_offset() const
int64_t total_compressed_size() const
int64_t total_uncompressed_size() const
unique_ptr[CColumnCryptoMetaData] crypto_metadata() const
optional[ParquetIndexLocation] GetColumnIndexLocation() const
optional[ParquetIndexLocation] GetOffsetIndexLocation() const
shared_ptr[const CKeyValueMetadata] key_value_metadata() const
struct CSortingColumn" parquet::SortingColumn":
int column_idx
c_bool descending
c_bool nulls_first
cdef cppclass CRowGroupMetaData" parquet::RowGroupMetaData":
c_bool Equals(const CRowGroupMetaData&) const
int num_columns() const
int64_t num_rows() const
int64_t total_byte_size() const
vector[CSortingColumn] sorting_columns() const
unique_ptr[CColumnChunkMetaData] ColumnChunk(int i) const
cdef cppclass CFileMetaData" parquet::FileMetaData":
c_bool Equals(const CFileMetaData&) const
uint32_t size()
int num_columns()
int64_t num_rows()
int num_row_groups()
ParquetVersion version()
const c_string created_by()
int num_schema_elements()
void set_file_path(const c_string& path)
void AppendRowGroups(const CFileMetaData& other) except +
unique_ptr[CRowGroupMetaData] RowGroup(int i)
const SchemaDescriptor* schema()
shared_ptr[const CKeyValueMetadata] key_value_metadata() const
void WriteTo(COutputStream* dst) const
inline c_bool is_encryption_algorithm_set() const
inline EncryptionAlgorithm encryption_algorithm() const
inline const c_string& footer_signing_key_metadata() const
cdef shared_ptr[CFileMetaData] CFileMetaData_Make \
" parquet::FileMetaData::Make"(const void* serialized_metadata,
uint32_t* metadata_len)
cdef cppclass CReaderProperties" parquet::ReaderProperties":
c_bool is_buffered_stream_enabled() const
void enable_buffered_stream()
void disable_buffered_stream()
void set_buffer_size(int64_t buf_size)
int64_t buffer_size() const
void set_thrift_string_size_limit(int32_t size)
int32_t thrift_string_size_limit() const
void set_thrift_container_size_limit(int32_t size)
int32_t thrift_container_size_limit() const
void file_decryption_properties(shared_ptr[CFileDecryptionProperties]
decryption)
shared_ptr[CFileDecryptionProperties] file_decryption_properties() \
const
c_bool page_checksum_verification() const
void set_page_checksum_verification(c_bool check_crc)
CReaderProperties default_reader_properties()
cdef cppclass ArrowReaderProperties:
ArrowReaderProperties()
void set_binary_type(Type binary_type)
Type binary_type()
void set_list_type(Type list_type)
Type list_type()
void set_read_dictionary(int column_index, c_bool read_dict)
c_bool read_dictionary(int column_index)
void set_batch_size(int64_t batch_size)
int64_t batch_size()
void set_pre_buffer(c_bool pre_buffer)
c_bool pre_buffer() const
void set_cache_options(CCacheOptions options)
CCacheOptions cache_options() const
void set_coerce_int96_timestamp_unit(TimeUnit unit)
TimeUnit coerce_int96_timestamp_unit() const
void set_arrow_extensions_enabled(c_bool extensions_enabled)
c_bool get_arrow_extensions_enabled() const
ArrowReaderProperties default_arrow_reader_properties()
cdef cppclass ParquetFileReader:
shared_ptr[CFileMetaData] metadata()
cdef extern from "parquet/api/writer.h" namespace "parquet" nogil:
cdef cppclass CdcOptions:
int64_t min_chunk_size
int64_t max_chunk_size
int norm_level
cdef cppclass WriterProperties:
cppclass Builder:
Builder* data_page_version(ParquetDataPageVersion version)
Builder* version(ParquetVersion version)
Builder* compression(ParquetCompression codec)
Builder* compression(const c_string& path,
ParquetCompression codec)
Builder* compression_level(int compression_level)
Builder* compression_level(const c_string& path,
int compression_level)
Builder* encryption(
shared_ptr[CFileEncryptionProperties]
file_encryption_properties)
Builder* disable_dictionary()
Builder* enable_dictionary()
Builder* enable_dictionary(const c_string& path)
Builder* set_sorting_columns(vector[CSortingColumn] sorting_columns)
Builder* disable_statistics()
Builder* enable_statistics()
Builder* enable_statistics(const c_string& path)
Builder* enable_store_decimal_as_integer()
Builder* disable_store_decimal_as_integer()
Builder* data_pagesize(int64_t size)
Builder* encoding(ParquetEncoding encoding)
Builder* encoding(const c_string& path,
ParquetEncoding encoding)
Builder* max_row_group_length(int64_t size)
Builder* write_batch_size(int64_t batch_size)
Builder* dictionary_pagesize_limit(int64_t dictionary_pagesize_limit)
Builder* enable_write_page_index()
Builder* disable_write_page_index()
Builder* enable_page_checksum()
Builder* disable_page_checksum()
Builder* enable_content_defined_chunking()
Builder* disable_content_defined_chunking()
Builder* content_defined_chunking_options(CdcOptions options)
shared_ptr[WriterProperties] build()
cdef cppclass ArrowWriterProperties:
cppclass Builder:
Builder()
Builder* disable_deprecated_int96_timestamps()
Builder* enable_deprecated_int96_timestamps()
Builder* coerce_timestamps(TimeUnit unit)
Builder* allow_truncated_timestamps()
Builder* disallow_truncated_timestamps()
Builder* store_schema()
Builder* enable_compliant_nested_types()
Builder* disable_compliant_nested_types()
Builder* set_engine_version(ArrowWriterEngineVersion version)
shared_ptr[ArrowWriterProperties] build()
c_bool support_deprecated_int96_timestamps()
cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil:
cdef cppclass FileReader:
FileReader(CMemoryPool* pool, unique_ptr[ParquetFileReader] reader)
CStatus GetSchema(shared_ptr[CSchema]* out)
CStatus ReadColumn(int i, shared_ptr[CChunkedArray]* out)
CStatus ReadSchemaField(int i, shared_ptr[CChunkedArray]* out)
int num_row_groups()
CStatus ReadRowGroup(int i, shared_ptr[CTable]* out)
CStatus ReadRowGroup(int i, const vector[int]& column_indices,
shared_ptr[CTable]* out)
CStatus ReadRowGroups(const vector[int]& row_groups,
shared_ptr[CTable]* out)
CStatus ReadRowGroups(const vector[int]& row_groups,
const vector[int]& column_indices,
shared_ptr[CTable]* out)
CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices,
const vector[int]& column_indices)
CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices)
CStatus ReadTable(shared_ptr[CTable]* out)
CStatus ReadTable(const vector[int]& column_indices,
shared_ptr[CTable]* out)
CStatus ScanContents(vector[int] columns, int32_t column_batch_size,
int64_t* num_rows)
const ParquetFileReader* parquet_reader()
void set_use_threads(c_bool use_threads)
void set_batch_size(int64_t batch_size)
cdef cppclass FileReaderBuilder:
FileReaderBuilder()
CStatus Open(const shared_ptr[CRandomAccessFile]& file,
const CReaderProperties& properties,
const shared_ptr[CFileMetaData]& metadata)
ParquetFileReader* raw_reader()
FileReaderBuilder* memory_pool(CMemoryPool*)
FileReaderBuilder* properties(const ArrowReaderProperties&)
CStatus Build(unique_ptr[FileReader]* out)
CStatus FromParquetSchema(
const SchemaDescriptor* parquet_schema,
const ArrowReaderProperties& properties,
const shared_ptr[const CKeyValueMetadata]& key_value_metadata,
shared_ptr[CSchema]* out)
CStatus StatisticsAsScalars(const CStatistics& Statistics,
shared_ptr[CScalar]* min,
shared_ptr[CScalar]* max)
cdef extern from "parquet/arrow/schema.h" namespace "parquet::arrow" nogil:
CStatus ToParquetSchema(
const CSchema* arrow_schema,
const WriterProperties& properties,
const ArrowWriterProperties& arrow_properties,
shared_ptr[SchemaDescriptor]* out)
cdef extern from "parquet/properties.h" namespace "parquet" nogil:
cdef enum ArrowWriterEngineVersion:
V1 "parquet::ArrowWriterProperties::V1",
V2 "parquet::ArrowWriterProperties::V2"
cdef cppclass ParquetDataPageVersion:
pass
cdef ParquetDataPageVersion ParquetDataPageVersion_V1 \
" parquet::ParquetDataPageVersion::V1"
cdef ParquetDataPageVersion ParquetDataPageVersion_V2 \
" parquet::ParquetDataPageVersion::V2"
cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil:
cdef cppclass FileWriter:
@staticmethod
CResult[unique_ptr[FileWriter]] Open(const CSchema& schema, CMemoryPool* pool,
const shared_ptr[COutputStream]& sink,
const shared_ptr[WriterProperties]& properties,
const shared_ptr[ArrowWriterProperties]& arrow_properties)
CStatus WriteTable(const CTable& table, int64_t chunk_size)
CStatus NewRowGroup()
CStatus Close()
CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& key_value_metadata)
const shared_ptr[CFileMetaData] metadata() const
CStatus WriteMetaDataFile(
const CFileMetaData& file_metadata,
const COutputStream* sink)
cdef extern from "parquet/encryption/encryption.h" namespace "parquet" nogil:
cdef cppclass CFileDecryptionProperties\
" parquet::FileDecryptionProperties":
pass
cdef cppclass CFileEncryptionProperties\
" parquet::FileEncryptionProperties":
pass

View File

@@ -0,0 +1,132 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport CSecureString
from pyarrow._parquet cimport (ParquetCipher,
CFileEncryptionProperties,
CFileDecryptionProperties,
ParquetCipher_AES_GCM_V1,
ParquetCipher_AES_GCM_CTR_V1)
cdef extern from "parquet/encryption/kms_client.h" \
namespace "parquet::encryption" nogil:
cdef cppclass CKmsClient" parquet::encryption::KmsClient":
c_string WrapKey(const CSecureString& key,
const c_string& master_key_identifier) except +
CSecureString UnwrapKey(const c_string& wrapped_key,
const c_string& master_key_identifier) except +
cdef cppclass CKeyAccessToken" parquet::encryption::KeyAccessToken":
CKeyAccessToken(const c_string value)
void Refresh(const c_string& new_value)
const c_string& value() const
cdef cppclass CKmsConnectionConfig \
" parquet::encryption::KmsConnectionConfig":
CKmsConnectionConfig()
c_string kms_instance_id
c_string kms_instance_url
shared_ptr[CKeyAccessToken] refreshable_key_access_token
unordered_map[c_string, c_string] custom_kms_conf
# Callbacks for implementing Python kms clients
# Use typedef to emulate syntax for std::function<void(..)>
ctypedef void CallbackWrapKey(
object, const CSecureString&, const c_string&, c_string*)
ctypedef void CallbackUnwrapKey(
object, const c_string&, const c_string&, CSecureString*)
cdef extern from "parquet/encryption/kms_client_factory.h" \
namespace "parquet::encryption" nogil:
cdef cppclass CKmsClientFactory" parquet::encryption::KmsClientFactory":
shared_ptr[CKmsClient] CreateKmsClient(
const CKmsConnectionConfig& kms_connection_config) except +
# Callbacks for implementing Python kms client factories
# Use typedef to emulate syntax for std::function<void(..)>
ctypedef void CallbackCreateKmsClient(
object,
const CKmsConnectionConfig&, shared_ptr[CKmsClient]*)
cdef extern from "parquet/encryption/crypto_factory.h" \
namespace "parquet::encryption" nogil:
cdef cppclass CEncryptionConfiguration\
" parquet::encryption::EncryptionConfiguration":
CEncryptionConfiguration(const c_string& footer_key) except +
c_string footer_key
c_string column_keys
c_bool uniform_encryption
ParquetCipher encryption_algorithm
c_bool plaintext_footer
c_bool double_wrapping
double cache_lifetime_seconds
c_bool internal_key_material
int32_t data_key_length_bits
cdef cppclass CDecryptionConfiguration\
" parquet::encryption::DecryptionConfiguration":
CDecryptionConfiguration() except +
double cache_lifetime_seconds
cdef cppclass CCryptoFactory" parquet::encryption::CryptoFactory":
void RegisterKmsClientFactory(
shared_ptr[CKmsClientFactory] kms_client_factory) except +
shared_ptr[CFileEncryptionProperties] GetFileEncryptionProperties(
const CKmsConnectionConfig& kms_connection_config,
const CEncryptionConfiguration& encryption_config) except +*
shared_ptr[CFileDecryptionProperties] GetFileDecryptionProperties(
const CKmsConnectionConfig& kms_connection_config,
const CDecryptionConfiguration& decryption_config) except +*
void RemoveCacheEntriesForToken(const c_string& access_token) except +
void RemoveCacheEntriesForAllTokens() except +
cdef extern from "arrow/python/parquet_encryption.h" \
namespace "arrow::py::parquet::encryption" nogil:
cdef cppclass CPyKmsClientVtable \
" arrow::py::parquet::encryption::PyKmsClientVtable":
CPyKmsClientVtable()
function[CallbackWrapKey] wrap_key
function[CallbackUnwrapKey] unwrap_key
cdef cppclass CPyKmsClient\
" arrow::py::parquet::encryption::PyKmsClient"(CKmsClient):
CPyKmsClient(object handler, CPyKmsClientVtable vtable)
cdef cppclass CPyKmsClientFactoryVtable\
" arrow::py::parquet::encryption::PyKmsClientFactoryVtable":
CPyKmsClientFactoryVtable()
function[CallbackCreateKmsClient] create_kms_client
cdef cppclass CPyKmsClientFactory\
" arrow::py::parquet::encryption::PyKmsClientFactory"(
CKmsClientFactory):
CPyKmsClientFactory(object handler, CPyKmsClientFactoryVtable vtable)
cdef cppclass CPyCryptoFactory\
" arrow::py::parquet::encryption::PyCryptoFactory"(CCryptoFactory):
CResult[shared_ptr[CFileEncryptionProperties]] \
SafeGetFileEncryptionProperties(
const CKmsConnectionConfig& kms_connection_config,
const CEncryptionConfiguration& encryption_config)
CResult[shared_ptr[CFileDecryptionProperties]] \
SafeGetFileDecryptionProperties(
const CKmsConnectionConfig& kms_connection_config,
const CDecryptionConfiguration& decryption_config)