Initial commit
This commit is contained in:
155
venv/lib/python3.10/site-packages/pyarrow/includes/common.pxd
Normal file
155
venv/lib/python3.10/site-packages/pyarrow/includes/common.pxd
Normal file
@@ -0,0 +1,155 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from libc.stdint cimport *
|
||||
|
||||
from libcpp cimport bool as c_bool, nullptr
|
||||
from libcpp.functional cimport function
|
||||
from libcpp.memory cimport (shared_ptr, unique_ptr, make_shared,
|
||||
static_pointer_cast, dynamic_pointer_cast)
|
||||
from libcpp.optional cimport nullopt, optional
|
||||
from libcpp.string cimport string as c_string
|
||||
from libcpp.utility cimport move, pair
|
||||
from libcpp.vector cimport vector
|
||||
from libcpp.unordered_map cimport unordered_map
|
||||
from libcpp.unordered_set cimport unordered_set
|
||||
|
||||
from cpython cimport PyObject
|
||||
from cpython.datetime cimport PyDateTime_DateTime
|
||||
cimport cpython
|
||||
|
||||
|
||||
cdef extern from "<string_view>" namespace "std" nogil:
|
||||
# Needed until https://github.com/cython/cython/issues/6651 is fixed
|
||||
cdef cppclass cpp_string_view "std::string_view":
|
||||
string_view()
|
||||
string_view(const char*)
|
||||
string_view(c_string&)
|
||||
size_t size()
|
||||
bint empty()
|
||||
const char* data()
|
||||
|
||||
|
||||
cdef extern from * namespace "arrow::py" nogil:
|
||||
"""
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
namespace arrow {
|
||||
namespace py {
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<T> to_shared(std::unique_ptr<T>& t) {
|
||||
return std::move(t);
|
||||
}
|
||||
template <typename T>
|
||||
std::shared_ptr<T> to_shared(std::unique_ptr<T>&& t) {
|
||||
return std::move(t);
|
||||
}
|
||||
|
||||
// Needed until https://github.com/cython/cython/issues/6651 is fixed
|
||||
inline std::string to_string(std::string_view s) {
|
||||
return std::string(s);
|
||||
}
|
||||
|
||||
} // namespace py
|
||||
} // namespace arrow
|
||||
"""
|
||||
cdef shared_ptr[T] to_shared" arrow::py::to_shared"[T](unique_ptr[T])
|
||||
cdef c_string to_string(cpp_string_view s)
|
||||
|
||||
cdef extern from "arrow/python/platform.h":
|
||||
pass
|
||||
|
||||
cdef extern from "<Python.h>":
|
||||
void Py_XDECREF(PyObject* o)
|
||||
Py_ssize_t Py_REFCNT(PyObject* o)
|
||||
|
||||
cdef extern from "arrow/api.h" namespace "arrow" nogil:
|
||||
# We can later add more of the common status factory methods as needed
|
||||
cdef CStatus CStatus_OK "arrow::Status::OK"()
|
||||
|
||||
cdef CStatus CStatus_Invalid "arrow::Status::Invalid"()
|
||||
cdef CStatus CStatus_NotImplemented \
|
||||
"arrow::Status::NotImplemented"(const c_string& msg)
|
||||
cdef CStatus CStatus_UnknownError \
|
||||
"arrow::Status::UnknownError"(const c_string& msg)
|
||||
|
||||
cdef cppclass CStatus "arrow::Status":
|
||||
CStatus()
|
||||
|
||||
c_string ToString()
|
||||
c_string message()
|
||||
shared_ptr[CStatusDetail] detail()
|
||||
|
||||
c_bool ok()
|
||||
c_bool IsIOError()
|
||||
c_bool IsOutOfMemory()
|
||||
c_bool IsInvalid()
|
||||
c_bool IsKeyError()
|
||||
c_bool IsNotImplemented()
|
||||
c_bool IsTypeError()
|
||||
c_bool IsCapacityError()
|
||||
c_bool IsIndexError()
|
||||
c_bool IsSerializationError()
|
||||
c_bool IsCancelled()
|
||||
|
||||
void Warn()
|
||||
|
||||
cdef cppclass CStatusDetail "arrow::StatusDetail":
|
||||
c_string ToString()
|
||||
|
||||
|
||||
cdef extern from "arrow/result.h" namespace "arrow" nogil:
|
||||
cdef cppclass CResult "arrow::Result"[T]:
|
||||
CResult()
|
||||
CResult(CStatus)
|
||||
CResult(T)
|
||||
c_bool ok()
|
||||
CStatus status()
|
||||
CStatus Value(T*)
|
||||
T operator*()
|
||||
|
||||
|
||||
cdef extern from "arrow/util/future.h" namespace "arrow" nogil:
|
||||
cdef cppclass CFuture "arrow::Future"[T]:
|
||||
CFuture()
|
||||
|
||||
|
||||
cdef extern from "arrow/python/async.h" namespace "arrow::py" nogil:
|
||||
# BindFuture's third argument is really a C++ callable with
|
||||
# the signature `object(T*)`, but Cython does not allow declaring that.
|
||||
# We use an ellipsis as a workaround.
|
||||
# Another possibility is to type-erase the argument by making it
|
||||
# `object(void*)`, but it would lose compile-time C++ type safety.
|
||||
void BindFuture[T](CFuture[T], object cb, ...)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
|
||||
T GetResultValue[T](CResult[T]) except *
|
||||
cdef function[F] BindFunction[F](void* unbound, object bound, ...)
|
||||
|
||||
|
||||
cdef inline object PyObject_to_object(PyObject* o):
|
||||
# Cast to "object" increments reference count
|
||||
cdef object result = <object> o
|
||||
cpython.Py_DECREF(result)
|
||||
return result
|
||||
3270
venv/lib/python3.10/site-packages/pyarrow/includes/libarrow.pxd
Normal file
3270
venv/lib/python3.10/site-packages/pyarrow/includes/libarrow.pxd
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,118 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from pyarrow.includes.common cimport *
|
||||
from pyarrow.includes.libarrow cimport *
|
||||
|
||||
|
||||
cdef extern from "arrow/acero/options.h" namespace "arrow::acero" nogil:
|
||||
cdef enum CJoinType "arrow::acero::JoinType":
|
||||
CJoinType_LEFT_SEMI "arrow::acero::JoinType::LEFT_SEMI"
|
||||
CJoinType_RIGHT_SEMI "arrow::acero::JoinType::RIGHT_SEMI"
|
||||
CJoinType_LEFT_ANTI "arrow::acero::JoinType::LEFT_ANTI"
|
||||
CJoinType_RIGHT_ANTI "arrow::acero::JoinType::RIGHT_ANTI"
|
||||
CJoinType_INNER "arrow::acero::JoinType::INNER"
|
||||
CJoinType_LEFT_OUTER "arrow::acero::JoinType::LEFT_OUTER"
|
||||
CJoinType_RIGHT_OUTER "arrow::acero::JoinType::RIGHT_OUTER"
|
||||
CJoinType_FULL_OUTER "arrow::acero::JoinType::FULL_OUTER"
|
||||
|
||||
cdef cppclass CExecNodeOptions "arrow::acero::ExecNodeOptions":
|
||||
pass
|
||||
|
||||
cdef cppclass CSourceNodeOptions "arrow::acero::SourceNodeOptions"(CExecNodeOptions):
|
||||
pass
|
||||
|
||||
cdef cppclass CTableSourceNodeOptions "arrow::acero::TableSourceNodeOptions"(CExecNodeOptions):
|
||||
CTableSourceNodeOptions(shared_ptr[CTable] table)
|
||||
CTableSourceNodeOptions(shared_ptr[CTable] table, int64_t max_batch_size)
|
||||
|
||||
cdef cppclass CSinkNodeOptions "arrow::acero::SinkNodeOptions"(CExecNodeOptions):
|
||||
pass
|
||||
|
||||
cdef cppclass CFilterNodeOptions "arrow::acero::FilterNodeOptions"(CExecNodeOptions):
|
||||
CFilterNodeOptions(CExpression)
|
||||
|
||||
cdef cppclass CProjectNodeOptions "arrow::acero::ProjectNodeOptions"(CExecNodeOptions):
|
||||
CProjectNodeOptions(vector[CExpression] expressions)
|
||||
CProjectNodeOptions(vector[CExpression] expressions,
|
||||
vector[c_string] names)
|
||||
|
||||
cdef cppclass CAggregateNodeOptions "arrow::acero::AggregateNodeOptions"(CExecNodeOptions):
|
||||
CAggregateNodeOptions(vector[CAggregate] aggregates, vector[CFieldRef] names)
|
||||
|
||||
cdef cppclass COrderByNodeOptions "arrow::acero::OrderByNodeOptions"(CExecNodeOptions):
|
||||
COrderByNodeOptions(COrdering ordering)
|
||||
|
||||
cdef cppclass CHashJoinNodeOptions "arrow::acero::HashJoinNodeOptions"(CExecNodeOptions):
|
||||
CHashJoinNodeOptions(CJoinType, vector[CFieldRef] in_left_keys,
|
||||
vector[CFieldRef] in_right_keys)
|
||||
CHashJoinNodeOptions(CJoinType, vector[CFieldRef] in_left_keys,
|
||||
vector[CFieldRef] in_right_keys,
|
||||
CExpression filter,
|
||||
c_string output_suffix_for_left,
|
||||
c_string output_suffix_for_right)
|
||||
CHashJoinNodeOptions(CJoinType join_type,
|
||||
vector[CFieldRef] left_keys,
|
||||
vector[CFieldRef] right_keys,
|
||||
vector[CFieldRef] left_output,
|
||||
vector[CFieldRef] right_output,
|
||||
CExpression filter,
|
||||
c_string output_suffix_for_left,
|
||||
c_string output_suffix_for_right)
|
||||
|
||||
cdef struct CAsofJoinKeys "arrow::acero::AsofJoinNodeOptions::Keys":
|
||||
CFieldRef on_key
|
||||
vector[CFieldRef] by_key
|
||||
|
||||
cdef cppclass CAsofJoinNodeOptions "arrow::acero::AsofJoinNodeOptions"(CExecNodeOptions):
|
||||
CAsofJoinNodeOptions(vector[CAsofJoinKeys] keys, int64_t tolerance)
|
||||
|
||||
|
||||
cdef extern from "arrow/acero/exec_plan.h" namespace "arrow::acero" nogil:
|
||||
cdef cppclass CDeclaration "arrow::acero::Declaration":
|
||||
cppclass Input:
|
||||
Input(CExecNode*)
|
||||
Input(CDeclaration)
|
||||
|
||||
c_string label
|
||||
vector[Input] inputs
|
||||
|
||||
CDeclaration()
|
||||
CDeclaration(c_string factory_name, CExecNodeOptions options)
|
||||
CDeclaration(c_string factory_name, vector[Input] inputs, shared_ptr[CExecNodeOptions] options)
|
||||
|
||||
@staticmethod
|
||||
CDeclaration Sequence(vector[CDeclaration] decls)
|
||||
|
||||
cdef cppclass CExecNode "arrow::acero::ExecNode":
|
||||
const vector[CExecNode*]& inputs() const
|
||||
const shared_ptr[CSchema]& output_schema() const
|
||||
|
||||
CResult[shared_ptr[CTable]] DeclarationToTable(
|
||||
CDeclaration declaration, c_bool use_threads
|
||||
)
|
||||
CResult[shared_ptr[CTable]] DeclarationToTable(
|
||||
CDeclaration declaration, c_bool use_threads,
|
||||
CMemoryPool* memory_pool, CFunctionRegistry* function_registry
|
||||
)
|
||||
CResult[unique_ptr[CRecordBatchReader]] DeclarationToReader(
|
||||
CDeclaration declaration, c_bool use_threads
|
||||
)
|
||||
|
||||
CResult[c_string] DeclarationToString(const CDeclaration& declaration)
|
||||
@@ -0,0 +1,109 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from pyarrow.includes.libarrow cimport *
|
||||
|
||||
cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::cuda" nogil:
|
||||
|
||||
cdef cppclass CCudaDeviceManager" arrow::cuda::CudaDeviceManager":
|
||||
@staticmethod
|
||||
CResult[CCudaDeviceManager*] Instance()
|
||||
CResult[shared_ptr[CCudaContext]] GetContext(int gpu_number)
|
||||
CResult[shared_ptr[CCudaContext]] GetSharedContext(int gpu_number,
|
||||
void* handle)
|
||||
CStatus AllocateHost(int device_number, int64_t nbytes,
|
||||
shared_ptr[CCudaHostBuffer]* buffer)
|
||||
int num_devices() const
|
||||
|
||||
cdef cppclass CCudaContext" arrow::cuda::CudaContext":
|
||||
CResult[shared_ptr[CCudaBuffer]] Allocate(int64_t nbytes)
|
||||
CResult[shared_ptr[CCudaBuffer]] View(uint8_t* data, int64_t nbytes)
|
||||
CResult[shared_ptr[CCudaBuffer]] OpenIpcBuffer(
|
||||
const CCudaIpcMemHandle& ipc_handle)
|
||||
CStatus Synchronize()
|
||||
int64_t bytes_allocated() const
|
||||
const void* handle() const
|
||||
int device_number() const
|
||||
CResult[uintptr_t] GetDeviceAddress(uintptr_t addr)
|
||||
shared_ptr[CDevice] device() const
|
||||
shared_ptr[CMemoryManager] memory_manager() const
|
||||
|
||||
cdef cppclass CCudaIpcMemHandle" arrow::cuda::CudaIpcMemHandle":
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CCudaIpcMemHandle]] FromBuffer(
|
||||
const void* opaque_handle)
|
||||
CResult[shared_ptr[CBuffer]] Serialize(CMemoryPool* pool) const
|
||||
|
||||
cdef cppclass CCudaBuffer" arrow::cuda::CudaBuffer"(CBuffer):
|
||||
CCudaBuffer(uint8_t* data, int64_t size,
|
||||
const shared_ptr[CCudaContext]& context,
|
||||
c_bool own_data=false, c_bool is_ipc=false)
|
||||
CCudaBuffer(const shared_ptr[CCudaBuffer]& parent,
|
||||
const int64_t offset, const int64_t size)
|
||||
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CCudaBuffer]] FromBuffer(shared_ptr[CBuffer] buf)
|
||||
|
||||
CStatus CopyToHost(const int64_t position, const int64_t nbytes,
|
||||
void* out) const
|
||||
CStatus CopyFromHost(const int64_t position, const void* data,
|
||||
int64_t nbytes)
|
||||
CStatus CopyFromDevice(const int64_t position, const void* data,
|
||||
int64_t nbytes)
|
||||
CStatus CopyFromAnotherDevice(const shared_ptr[CCudaContext]& src_ctx,
|
||||
const int64_t position, const void* data,
|
||||
int64_t nbytes)
|
||||
CResult[shared_ptr[CCudaIpcMemHandle]] ExportForIpc()
|
||||
shared_ptr[CCudaContext] context() const
|
||||
|
||||
cdef cppclass \
|
||||
CCudaHostBuffer" arrow::cuda::CudaHostBuffer"(CMutableBuffer):
|
||||
pass
|
||||
|
||||
cdef cppclass \
|
||||
CCudaBufferReader" arrow::cuda::CudaBufferReader"(CBufferReader):
|
||||
CCudaBufferReader(const shared_ptr[CBuffer]& buffer)
|
||||
CResult[int64_t] Read(int64_t nbytes, void* buffer)
|
||||
CResult[shared_ptr[CBuffer]] Read(int64_t nbytes)
|
||||
|
||||
cdef cppclass \
|
||||
CCudaBufferWriter" arrow::cuda::CudaBufferWriter"(WritableFile):
|
||||
CCudaBufferWriter(const shared_ptr[CCudaBuffer]& buffer)
|
||||
CStatus Close()
|
||||
CStatus Write(const void* data, int64_t nbytes)
|
||||
CStatus WriteAt(int64_t position, const void* data, int64_t nbytes)
|
||||
CStatus SetBufferSize(const int64_t buffer_size)
|
||||
int64_t buffer_size()
|
||||
int64_t num_bytes_buffered() const
|
||||
|
||||
CResult[shared_ptr[CCudaHostBuffer]] AllocateCudaHostBuffer(
|
||||
int device_number, const int64_t size)
|
||||
|
||||
# Cuda prefix is added to avoid picking up arrow::cuda functions
|
||||
# from arrow namespace.
|
||||
CResult[shared_ptr[CCudaBuffer]] \
|
||||
CudaSerializeRecordBatch" arrow::cuda::SerializeRecordBatch"\
|
||||
(const CRecordBatch& batch,
|
||||
CCudaContext* ctx)
|
||||
CResult[shared_ptr[CRecordBatch]] \
|
||||
CudaReadRecordBatch" arrow::cuda::ReadRecordBatch"\
|
||||
(const shared_ptr[CSchema]& schema,
|
||||
CDictionaryMemo* dictionary_memo,
|
||||
const shared_ptr[CCudaBuffer]& buffer,
|
||||
CMemoryPool* pool)
|
||||
@@ -0,0 +1,423 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from libcpp.unordered_map cimport unordered_map
|
||||
from libcpp cimport bool as c_bool
|
||||
|
||||
from pyarrow.includes.common cimport *
|
||||
from pyarrow.includes.libarrow cimport *
|
||||
from pyarrow.includes.libarrow_acero cimport *
|
||||
from pyarrow.includes.libarrow_fs cimport *
|
||||
|
||||
|
||||
cdef extern from "arrow/dataset/plan.h" namespace "arrow::dataset::internal" nogil:
|
||||
|
||||
cdef void Initialize()
|
||||
|
||||
|
||||
ctypedef CStatus cb_writer_finish_internal(CFileWriter*)
|
||||
ctypedef void cb_writer_finish(dict, CFileWriter*)
|
||||
|
||||
cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
|
||||
|
||||
cdef enum ExistingDataBehavior" arrow::dataset::ExistingDataBehavior":
|
||||
ExistingDataBehavior_DELETE_MATCHING" \
|
||||
arrow::dataset::ExistingDataBehavior::kDeleteMatchingPartitions"
|
||||
ExistingDataBehavior_OVERWRITE_OR_IGNORE" \
|
||||
arrow::dataset::ExistingDataBehavior::kOverwriteOrIgnore"
|
||||
ExistingDataBehavior_ERROR" \
|
||||
arrow::dataset::ExistingDataBehavior::kError"
|
||||
|
||||
cdef cppclass CScanOptions "arrow::dataset::ScanOptions":
|
||||
shared_ptr[CSchema] dataset_schema
|
||||
shared_ptr[CSchema] projected_schema
|
||||
c_bool use_threads
|
||||
c_bool cache_metadata
|
||||
CExpression filter
|
||||
|
||||
cdef cppclass CScanNodeOptions "arrow::dataset::ScanNodeOptions"(CExecNodeOptions):
|
||||
CScanNodeOptions(shared_ptr[CDataset] dataset, shared_ptr[CScanOptions] scan_options, bint require_sequenced_output, bint implicit_ordering)
|
||||
|
||||
shared_ptr[CScanOptions] scan_options
|
||||
|
||||
cdef cppclass CFragmentScanOptions "arrow::dataset::FragmentScanOptions":
|
||||
c_string type_name() const
|
||||
|
||||
ctypedef CIterator[shared_ptr[CScanTask]] CScanTaskIterator \
|
||||
"arrow::dataset::ScanTaskIterator"
|
||||
|
||||
cdef cppclass CScanTask" arrow::dataset::ScanTask":
|
||||
CResult[CRecordBatchIterator] Execute()
|
||||
|
||||
cdef cppclass CFragment "arrow::dataset::Fragment":
|
||||
CResult[shared_ptr[CSchema]] ReadPhysicalSchema()
|
||||
CResult[CScanTaskIterator] Scan(shared_ptr[CScanOptions] options)
|
||||
c_bool splittable() const
|
||||
c_string type_name() const
|
||||
const CExpression& partition_expression() const
|
||||
|
||||
ctypedef vector[shared_ptr[CFragment]] CFragmentVector \
|
||||
"arrow::dataset::FragmentVector"
|
||||
|
||||
ctypedef CIterator[shared_ptr[CFragment]] CFragmentIterator \
|
||||
"arrow::dataset::FragmentIterator"
|
||||
|
||||
cdef cppclass CInMemoryFragment "arrow::dataset::InMemoryFragment"(
|
||||
CFragment):
|
||||
CInMemoryFragment(vector[shared_ptr[CRecordBatch]] record_batches,
|
||||
CExpression partition_expression)
|
||||
|
||||
cdef cppclass CTaggedRecordBatch "arrow::dataset::TaggedRecordBatch":
|
||||
shared_ptr[CRecordBatch] record_batch
|
||||
shared_ptr[CFragment] fragment
|
||||
|
||||
ctypedef CIterator[CTaggedRecordBatch] CTaggedRecordBatchIterator \
|
||||
"arrow::dataset::TaggedRecordBatchIterator"
|
||||
|
||||
cdef cppclass CScanner "arrow::dataset::Scanner":
|
||||
CScanner(shared_ptr[CDataset], shared_ptr[CScanOptions])
|
||||
CScanner(shared_ptr[CFragment], shared_ptr[CScanOptions])
|
||||
CResult[CScanTaskIterator] Scan()
|
||||
CResult[CTaggedRecordBatchIterator] ScanBatches()
|
||||
CResult[shared_ptr[CTable]] ToTable()
|
||||
CResult[shared_ptr[CTable]] TakeRows(const CArray& indices)
|
||||
CResult[shared_ptr[CTable]] Head(int64_t num_rows)
|
||||
CResult[int64_t] CountRows()
|
||||
CResult[CFragmentIterator] GetFragments()
|
||||
CResult[shared_ptr[CRecordBatchReader]] ToRecordBatchReader()
|
||||
const shared_ptr[CScanOptions]& options()
|
||||
|
||||
cdef cppclass CScannerBuilder "arrow::dataset::ScannerBuilder":
|
||||
CScannerBuilder(shared_ptr[CDataset],
|
||||
shared_ptr[CScanOptions] scan_options)
|
||||
CScannerBuilder(shared_ptr[CSchema], shared_ptr[CFragment],
|
||||
shared_ptr[CScanOptions] scan_options)
|
||||
|
||||
@staticmethod
|
||||
shared_ptr[CScannerBuilder] FromRecordBatchReader(
|
||||
shared_ptr[CRecordBatchReader] reader)
|
||||
CStatus ProjectColumns "Project"(const vector[c_string]& columns)
|
||||
CStatus Project(vector[CExpression]& exprs, vector[c_string]& columns)
|
||||
CStatus Filter(CExpression filter)
|
||||
CStatus UseThreads(c_bool use_threads)
|
||||
CStatus CacheMetadata(c_bool cache_metadata)
|
||||
CStatus Pool(CMemoryPool* pool)
|
||||
CStatus BatchSize(int64_t batch_size)
|
||||
CStatus BatchReadahead(int32_t batch_readahead)
|
||||
CStatus FragmentReadahead(int32_t fragment_readahead)
|
||||
CStatus FragmentScanOptions(
|
||||
shared_ptr[CFragmentScanOptions] fragment_scan_options)
|
||||
CResult[shared_ptr[CScanOptions]] GetScanOptions()
|
||||
CResult[shared_ptr[CScanner]] Finish()
|
||||
shared_ptr[CSchema] schema() const
|
||||
|
||||
ctypedef vector[shared_ptr[CDataset]] CDatasetVector \
|
||||
"arrow::dataset::DatasetVector"
|
||||
|
||||
cdef cppclass CDataset "arrow::dataset::Dataset":
|
||||
const shared_ptr[CSchema] & schema()
|
||||
CResult[CFragmentIterator] GetFragments()
|
||||
CResult[CFragmentIterator] GetFragments(CExpression predicate)
|
||||
const CExpression & partition_expression()
|
||||
c_string type_name()
|
||||
|
||||
CResult[shared_ptr[CDataset]] ReplaceSchema(shared_ptr[CSchema])
|
||||
|
||||
CResult[shared_ptr[CScannerBuilder]] NewScan()
|
||||
|
||||
cdef cppclass CInMemoryDataset "arrow::dataset::InMemoryDataset"(
|
||||
CDataset):
|
||||
CInMemoryDataset(shared_ptr[CRecordBatchReader])
|
||||
CInMemoryDataset(shared_ptr[CTable])
|
||||
|
||||
cdef cppclass CUnionDataset "arrow::dataset::UnionDataset"(
|
||||
CDataset):
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CUnionDataset]] Make(shared_ptr[CSchema] schema,
|
||||
CDatasetVector children)
|
||||
|
||||
const CDatasetVector& children() const
|
||||
|
||||
cdef cppclass CInspectOptions "arrow::dataset::InspectOptions":
|
||||
int fragments
|
||||
CField.CMergeOptions field_merge_options
|
||||
|
||||
cdef cppclass CFinishOptions "arrow::dataset::FinishOptions":
|
||||
shared_ptr[CSchema] schema
|
||||
CInspectOptions inspect_options
|
||||
c_bool validate_fragments
|
||||
|
||||
cdef cppclass CDatasetFactory "arrow::dataset::DatasetFactory":
|
||||
CResult[vector[shared_ptr[CSchema]]] InspectSchemas(CInspectOptions)
|
||||
CResult[shared_ptr[CSchema]] Inspect(CInspectOptions)
|
||||
CResult[shared_ptr[CDataset]] FinishWithSchema "Finish"(
|
||||
const shared_ptr[CSchema]& schema)
|
||||
CResult[shared_ptr[CDataset]] Finish()
|
||||
const CExpression& root_partition()
|
||||
CStatus SetRootPartition(CExpression partition)
|
||||
|
||||
cdef cppclass CUnionDatasetFactory "arrow::dataset::UnionDatasetFactory":
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CDatasetFactory]] Make(
|
||||
vector[shared_ptr[CDatasetFactory]] factories)
|
||||
|
||||
cdef cppclass CFileSource "arrow::dataset::FileSource":
|
||||
const c_string& path() const
|
||||
const shared_ptr[CFileSystem]& filesystem() const
|
||||
const shared_ptr[CBuffer]& buffer() const
|
||||
const int64_t size() const
|
||||
CResult[shared_ptr[CRandomAccessFile]] Open() const
|
||||
# HACK: Cython can't handle all the overloads so don't declare them.
|
||||
# This means invalid construction of CFileSource won't be caught in
|
||||
# the C++ generation phase (though it will still be caught when
|
||||
# the generated C++ is compiled).
|
||||
CFileSource(...)
|
||||
|
||||
cdef cppclass CFileWriteOptions \
|
||||
"arrow::dataset::FileWriteOptions":
|
||||
const shared_ptr[CFileFormat]& format() const
|
||||
c_string type_name() const
|
||||
|
||||
cdef cppclass CFileWriter \
|
||||
"arrow::dataset::FileWriter":
|
||||
const shared_ptr[CFileFormat]& format() const
|
||||
const shared_ptr[CSchema]& schema() const
|
||||
const shared_ptr[CFileWriteOptions]& options() const
|
||||
const CFileLocator& destination() const
|
||||
CResult[int64_t] GetBytesWritten()
|
||||
|
||||
cdef cppclass CFileFormat "arrow::dataset::FileFormat":
|
||||
shared_ptr[CFragmentScanOptions] default_fragment_scan_options
|
||||
c_string type_name() const
|
||||
CResult[shared_ptr[CSchema]] Inspect(const CFileSource&) const
|
||||
CResult[shared_ptr[CFileFragment]] MakeFragment(
|
||||
CFileSource source,
|
||||
CExpression partition_expression,
|
||||
shared_ptr[CSchema] physical_schema)
|
||||
shared_ptr[CFileWriteOptions] DefaultWriteOptions()
|
||||
|
||||
cdef cppclass CFileFragment "arrow::dataset::FileFragment"(
|
||||
CFragment):
|
||||
const CFileSource& source() const
|
||||
const shared_ptr[CFileFormat]& format() const
|
||||
|
||||
cdef cppclass CFileSystemDatasetWriteOptions \
|
||||
"arrow::dataset::FileSystemDatasetWriteOptions":
|
||||
shared_ptr[CFileWriteOptions] file_write_options
|
||||
shared_ptr[CFileSystem] filesystem
|
||||
c_string base_dir
|
||||
shared_ptr[CPartitioning] partitioning
|
||||
c_bool preserve_order
|
||||
int max_partitions
|
||||
c_string basename_template
|
||||
function[cb_writer_finish_internal] writer_pre_finish
|
||||
function[cb_writer_finish_internal] writer_post_finish
|
||||
ExistingDataBehavior existing_data_behavior
|
||||
c_bool create_dir
|
||||
uint32_t max_open_files
|
||||
uint64_t max_rows_per_file
|
||||
uint64_t min_rows_per_group
|
||||
uint64_t max_rows_per_group
|
||||
|
||||
cdef cppclass CFileSystemDataset \
|
||||
"arrow::dataset::FileSystemDataset"(CDataset):
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CDataset]] Make(
|
||||
shared_ptr[CSchema] schema,
|
||||
CExpression source_partition,
|
||||
shared_ptr[CFileFormat] format,
|
||||
shared_ptr[CFileSystem] filesystem,
|
||||
vector[shared_ptr[CFileFragment]] fragments)
|
||||
|
||||
@staticmethod
|
||||
CStatus Write(
|
||||
const CFileSystemDatasetWriteOptions& write_options,
|
||||
shared_ptr[CScanner] scanner)
|
||||
|
||||
c_string type()
|
||||
vector[c_string] files()
|
||||
const shared_ptr[CFileFormat]& format() const
|
||||
const shared_ptr[CFileSystem]& filesystem() const
|
||||
const shared_ptr[CPartitioning]& partitioning() const
|
||||
|
||||
cdef cppclass CIpcFileWriteOptions \
|
||||
"arrow::dataset::IpcFileWriteOptions"(CFileWriteOptions):
|
||||
shared_ptr[CIpcWriteOptions] options
|
||||
|
||||
cdef cppclass CIpcFileFormat "arrow::dataset::IpcFileFormat"(
|
||||
CFileFormat):
|
||||
pass
|
||||
|
||||
cdef cppclass COrcFileFormat "arrow::dataset::OrcFileFormat"(
|
||||
CFileFormat):
|
||||
pass
|
||||
|
||||
cdef cppclass CCsvFileWriteOptions \
|
||||
"arrow::dataset::CsvFileWriteOptions"(CFileWriteOptions):
|
||||
shared_ptr[CCSVWriteOptions] write_options
|
||||
CMemoryPool* pool
|
||||
|
||||
cdef cppclass CCsvFileFormat "arrow::dataset::CsvFileFormat"(
|
||||
CFileFormat):
|
||||
CCSVParseOptions parse_options
|
||||
|
||||
cdef cppclass CCsvFragmentScanOptions \
|
||||
"arrow::dataset::CsvFragmentScanOptions"(CFragmentScanOptions):
|
||||
CCSVConvertOptions convert_options
|
||||
CCSVReadOptions read_options
|
||||
function[StreamWrapFunc] stream_transform_func
|
||||
|
||||
cdef cppclass CJsonFileFormat "arrow::dataset::JsonFileFormat"(CFileFormat):
|
||||
pass
|
||||
|
||||
cdef cppclass CJsonFragmentScanOptions "arrow::dataset::JsonFragmentScanOptions"(CFragmentScanOptions):
|
||||
CJSONParseOptions parse_options
|
||||
CJSONReadOptions read_options
|
||||
|
||||
cdef struct CPartitionPathFormat "arrow::dataset::PartitionPathFormat":
|
||||
c_string directory
|
||||
c_string filename
|
||||
|
||||
cdef cppclass CPartitioning "arrow::dataset::Partitioning":
|
||||
c_string type_name() const
|
||||
CResult[CExpression] Parse(const c_string & path) const
|
||||
CResult[CPartitionPathFormat] Format(const CExpression & expr) const
|
||||
const shared_ptr[CSchema] & schema()
|
||||
c_bool Equals(const CPartitioning& other) const
|
||||
|
||||
cdef cppclass CSegmentEncoding" arrow::dataset::SegmentEncoding":
|
||||
bint operator==(CSegmentEncoding)
|
||||
|
||||
CSegmentEncoding CSegmentEncoding_None\
|
||||
" arrow::dataset::SegmentEncoding::None"
|
||||
CSegmentEncoding CSegmentEncoding_Uri\
|
||||
" arrow::dataset::SegmentEncoding::Uri"
|
||||
|
||||
cdef cppclass CKeyValuePartitioningOptions \
|
||||
"arrow::dataset::KeyValuePartitioningOptions":
|
||||
CSegmentEncoding segment_encoding
|
||||
|
||||
cdef cppclass CHivePartitioningOptions \
|
||||
"arrow::dataset::HivePartitioningOptions":
|
||||
CSegmentEncoding segment_encoding
|
||||
c_string null_fallback
|
||||
|
||||
cdef cppclass CPartitioningFactoryOptions \
|
||||
"arrow::dataset::PartitioningFactoryOptions":
|
||||
c_bool infer_dictionary
|
||||
shared_ptr[CSchema] schema
|
||||
CSegmentEncoding segment_encoding
|
||||
|
||||
cdef cppclass CHivePartitioningFactoryOptions \
|
||||
"arrow::dataset::HivePartitioningFactoryOptions":
|
||||
c_bool infer_dictionary
|
||||
c_string null_fallback
|
||||
shared_ptr[CSchema] schema
|
||||
CSegmentEncoding segment_encoding
|
||||
|
||||
cdef cppclass CPartitioningFactory "arrow::dataset::PartitioningFactory":
|
||||
c_string type_name() const
|
||||
|
||||
cdef cppclass CKeyValuePartitioning \
|
||||
"arrow::dataset::KeyValuePartitioning"(CPartitioning):
|
||||
CKeyValuePartitioning(shared_ptr[CSchema] schema,
|
||||
vector[shared_ptr[CArray]] dictionaries,
|
||||
CKeyValuePartitioningOptions options)
|
||||
|
||||
vector[shared_ptr[CArray]] dictionaries() const
|
||||
CSegmentEncoding segment_encoding()
|
||||
|
||||
cdef cppclass CDirectoryPartitioning \
|
||||
"arrow::dataset::DirectoryPartitioning"(CPartitioning):
|
||||
CDirectoryPartitioning(shared_ptr[CSchema] schema,
|
||||
vector[shared_ptr[CArray]] dictionaries)
|
||||
|
||||
@staticmethod
|
||||
shared_ptr[CPartitioningFactory] MakeFactory(
|
||||
vector[c_string] field_names, CPartitioningFactoryOptions)
|
||||
|
||||
vector[shared_ptr[CArray]] dictionaries() const
|
||||
|
||||
cdef cppclass CHivePartitioning \
|
||||
"arrow::dataset::HivePartitioning"(CPartitioning):
|
||||
CHivePartitioning(shared_ptr[CSchema] schema,
|
||||
vector[shared_ptr[CArray]] dictionaries,
|
||||
CHivePartitioningOptions options)
|
||||
|
||||
@staticmethod
|
||||
shared_ptr[CPartitioningFactory] MakeFactory(
|
||||
CHivePartitioningFactoryOptions)
|
||||
|
||||
vector[shared_ptr[CArray]] dictionaries() const
|
||||
c_string null_fallback() const
|
||||
|
||||
cdef cppclass CFilenamePartitioning \
|
||||
"arrow::dataset::FilenamePartitioning"(CPartitioning):
|
||||
CFilenamePartitioning(shared_ptr[CSchema] schema,
|
||||
vector[shared_ptr[CArray]] dictionaries)
|
||||
|
||||
@staticmethod
|
||||
shared_ptr[CPartitioningFactory] MakeFactory(
|
||||
vector[c_string] field_names, CPartitioningFactoryOptions)
|
||||
|
||||
vector[shared_ptr[CArray]] dictionaries() const
|
||||
|
||||
cdef cppclass CPartitioningOrFactory \
|
||||
"arrow::dataset::PartitioningOrFactory":
|
||||
CPartitioningOrFactory(shared_ptr[CPartitioning])
|
||||
CPartitioningOrFactory(shared_ptr[CPartitioningFactory])
|
||||
CPartitioningOrFactory & operator = (shared_ptr[CPartitioning])
|
||||
CPartitioningOrFactory & operator = (
|
||||
shared_ptr[CPartitioningFactory])
|
||||
shared_ptr[CPartitioning] partitioning() const
|
||||
shared_ptr[CPartitioningFactory] factory() const
|
||||
|
||||
cdef cppclass CFileSystemFactoryOptions \
|
||||
"arrow::dataset::FileSystemFactoryOptions":
|
||||
CPartitioningOrFactory partitioning
|
||||
c_string partition_base_dir
|
||||
c_bool exclude_invalid_files
|
||||
vector[c_string] selector_ignore_prefixes
|
||||
|
||||
cdef cppclass CFileSystemDatasetFactory \
|
||||
"arrow::dataset::FileSystemDatasetFactory"(
|
||||
CDatasetFactory):
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CDatasetFactory]] MakeFromPaths "Make"(
|
||||
shared_ptr[CFileSystem] filesystem,
|
||||
vector[c_string] paths,
|
||||
shared_ptr[CFileFormat] format,
|
||||
CFileSystemFactoryOptions options
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CDatasetFactory]] MakeFromSelector "Make"(
|
||||
shared_ptr[CFileSystem] filesystem,
|
||||
CFileSelector,
|
||||
shared_ptr[CFileFormat] format,
|
||||
CFileSystemFactoryOptions options
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CDatasetFactory]] MakeFromFileInfos "Make"(
|
||||
shared_ptr[CFileSystem] filesystem,
|
||||
vector[CFileInfo] files,
|
||||
shared_ptr[CFileFormat] format,
|
||||
CFileSystemFactoryOptions options
|
||||
)
|
||||
@@ -0,0 +1,107 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from pyarrow.includes.libarrow_dataset cimport *
|
||||
from pyarrow.includes.libparquet_encryption cimport *
|
||||
|
||||
from pyarrow._parquet cimport *
|
||||
|
||||
|
||||
cdef extern from "arrow/dataset/parquet_encryption_config.h" namespace "arrow::dataset" nogil:
|
||||
cdef cppclass CParquetEncryptionConfig "arrow::dataset::ParquetEncryptionConfig":
|
||||
shared_ptr[CCryptoFactory] crypto_factory
|
||||
shared_ptr[CKmsConnectionConfig] kms_connection_config
|
||||
shared_ptr[CEncryptionConfiguration] encryption_config
|
||||
|
||||
cdef cppclass CParquetDecryptionConfig "arrow::dataset::ParquetDecryptionConfig":
|
||||
shared_ptr[CCryptoFactory] crypto_factory
|
||||
shared_ptr[CKmsConnectionConfig] kms_connection_config
|
||||
shared_ptr[CDecryptionConfiguration] decryption_config
|
||||
|
||||
|
||||
cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
|
||||
|
||||
cdef cppclass CParquetFileWriter \
|
||||
"arrow::dataset::ParquetFileWriter"(CFileWriter):
|
||||
const shared_ptr[FileWriter]& parquet_writer() const
|
||||
|
||||
cdef cppclass CParquetFileWriteOptions \
|
||||
"arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions):
|
||||
shared_ptr[WriterProperties] writer_properties
|
||||
shared_ptr[ArrowWriterProperties] arrow_writer_properties
|
||||
shared_ptr[CParquetEncryptionConfig] parquet_encryption_config
|
||||
|
||||
cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"(
|
||||
CFileFragment):
|
||||
const vector[int]& row_groups() const
|
||||
shared_ptr[CFileMetaData] metadata() const
|
||||
CResult[vector[shared_ptr[CFragment]]] SplitByRowGroup(
|
||||
CExpression predicate)
|
||||
CResult[shared_ptr[CFragment]] SubsetWithFilter "Subset"(
|
||||
CExpression predicate)
|
||||
CResult[shared_ptr[CFragment]] SubsetWithIds "Subset"(
|
||||
vector[int] row_group_ids)
|
||||
CStatus EnsureCompleteMetadata()
|
||||
|
||||
cdef cppclass CParquetFileFormatReaderOptions \
|
||||
"arrow::dataset::ParquetFileFormat::ReaderOptions":
|
||||
unordered_set[c_string] dict_columns
|
||||
TimeUnit coerce_int96_timestamp_unit
|
||||
Type binary_type
|
||||
Type list_type
|
||||
|
||||
cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
|
||||
CFileFormat):
|
||||
CParquetFileFormatReaderOptions reader_options
|
||||
CResult[shared_ptr[CFileFragment]] MakeFragment(
|
||||
CFileSource source,
|
||||
CExpression partition_expression,
|
||||
shared_ptr[CSchema] physical_schema,
|
||||
vector[int] row_groups)
|
||||
|
||||
cdef cppclass CParquetFragmentScanOptions \
|
||||
"arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions):
|
||||
shared_ptr[CReaderProperties] reader_properties
|
||||
shared_ptr[ArrowReaderProperties] arrow_reader_properties
|
||||
shared_ptr[CParquetDecryptionConfig] parquet_decryption_config
|
||||
|
||||
cdef cppclass CParquetFactoryOptions \
|
||||
"arrow::dataset::ParquetFactoryOptions":
|
||||
CPartitioningOrFactory partitioning
|
||||
c_string partition_base_dir
|
||||
c_bool validate_column_chunk_paths
|
||||
|
||||
cdef cppclass CParquetDatasetFactory \
|
||||
"arrow::dataset::ParquetDatasetFactory"(CDatasetFactory):
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataPath "Make"(
|
||||
const c_string& metadata_path,
|
||||
shared_ptr[CFileSystem] filesystem,
|
||||
shared_ptr[CParquetFileFormat] format,
|
||||
CParquetFactoryOptions options
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataSource "Make"(
|
||||
const CFileSource& metadata_path,
|
||||
const c_string& base_path,
|
||||
shared_ptr[CFileSystem] filesystem,
|
||||
shared_ptr[CParquetFileFormat] format,
|
||||
CParquetFactoryOptions options
|
||||
)
|
||||
@@ -0,0 +1,50 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from pyarrow.includes.libarrow cimport (CCompressionType, CStatus, CTable,
|
||||
COutputStream, CResult, shared_ptr,
|
||||
vector, CRandomAccessFile, CSchema,
|
||||
c_string, CIpcReadOptions)
|
||||
|
||||
|
||||
cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
|
||||
int kFeatherV1Version" arrow::ipc::feather::kFeatherV1Version"
|
||||
int kFeatherV2Version" arrow::ipc::feather::kFeatherV2Version"
|
||||
|
||||
cdef cppclass CFeatherProperties" arrow::ipc::feather::WriteProperties":
|
||||
int version
|
||||
int chunksize
|
||||
CCompressionType compression
|
||||
int compression_level
|
||||
|
||||
CStatus WriteFeather" arrow::ipc::feather::WriteTable" \
|
||||
(const CTable& table, COutputStream* out,
|
||||
CFeatherProperties properties)
|
||||
|
||||
cdef cppclass CFeatherReader" arrow::ipc::feather::Reader":
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CFeatherReader]] Open(
|
||||
const shared_ptr[CRandomAccessFile]& file,
|
||||
const CIpcReadOptions& options)
|
||||
int version()
|
||||
shared_ptr[CSchema] schema()
|
||||
|
||||
CStatus Read(shared_ptr[CTable]* out)
|
||||
CStatus Read(const vector[int] indices, shared_ptr[CTable]* out)
|
||||
CStatus Read(const vector[c_string] names, shared_ptr[CTable]* out)
|
||||
@@ -0,0 +1,621 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from pyarrow.includes.common cimport *
|
||||
from pyarrow.includes.libarrow cimport *
|
||||
from pyarrow.includes.libarrow_python cimport CTimePoint
|
||||
|
||||
from libcpp.map cimport multimap
|
||||
|
||||
|
||||
cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
|
||||
cdef char* CTracingServerMiddlewareName\
|
||||
" arrow::flight::TracingServerMiddleware::kMiddlewareName"
|
||||
|
||||
cdef cppclass CActionType" arrow::flight::ActionType":
|
||||
c_string type
|
||||
c_string description
|
||||
bint operator==(CActionType)
|
||||
CResult[c_string] SerializeToString()
|
||||
|
||||
@staticmethod
|
||||
CResult[CActionType] Deserialize(const c_string& serialized)
|
||||
|
||||
cdef cppclass CAction" arrow::flight::Action":
|
||||
c_string type
|
||||
shared_ptr[CBuffer] body
|
||||
bint operator==(CAction)
|
||||
CResult[c_string] SerializeToString()
|
||||
c_string ToString()
|
||||
|
||||
@staticmethod
|
||||
CResult[CAction] Deserialize(const c_string& serialized)
|
||||
|
||||
cdef cppclass CFlightResult" arrow::flight::Result":
|
||||
CFlightResult()
|
||||
CFlightResult(CFlightResult)
|
||||
shared_ptr[CBuffer] body
|
||||
bint operator==(CFlightResult)
|
||||
CResult[c_string] SerializeToString()
|
||||
c_string ToString()
|
||||
|
||||
@staticmethod
|
||||
CResult[CFlightResult] Deserialize(const c_string& serialized)
|
||||
|
||||
cdef cppclass CBasicAuth" arrow::flight::BasicAuth":
|
||||
CBasicAuth()
|
||||
CBasicAuth(CBuffer)
|
||||
CBasicAuth(CBasicAuth)
|
||||
c_string username
|
||||
c_string password
|
||||
bint operator==(CBasicAuth)
|
||||
CResult[c_string] SerializeToString()
|
||||
c_string ToString()
|
||||
|
||||
@staticmethod
|
||||
CResult[CBasicAuth] Deserialize(const c_string& serialized)
|
||||
|
||||
cdef cppclass CResultStream" arrow::flight::ResultStream":
|
||||
CResult[unique_ptr[CFlightResult]] Next()
|
||||
|
||||
cdef cppclass CDescriptorType \
|
||||
" arrow::flight::FlightDescriptor::DescriptorType":
|
||||
bint operator==(CDescriptorType)
|
||||
|
||||
CDescriptorType CDescriptorTypeUnknown\
|
||||
" arrow::flight::FlightDescriptor::UNKNOWN"
|
||||
CDescriptorType CDescriptorTypePath\
|
||||
" arrow::flight::FlightDescriptor::PATH"
|
||||
CDescriptorType CDescriptorTypeCmd\
|
||||
" arrow::flight::FlightDescriptor::CMD"
|
||||
|
||||
cdef cppclass CFlightDescriptor" arrow::flight::FlightDescriptor":
|
||||
CDescriptorType type
|
||||
c_string cmd
|
||||
vector[c_string] path
|
||||
bint operator==(CFlightDescriptor)
|
||||
CResult[c_string] SerializeToString()
|
||||
c_string ToString()
|
||||
|
||||
@staticmethod
|
||||
CResult[CFlightDescriptor] Deserialize(const c_string& serialized)
|
||||
|
||||
cdef cppclass CTicket" arrow::flight::Ticket":
|
||||
CTicket()
|
||||
c_string ticket
|
||||
bint operator==(CTicket)
|
||||
CResult[c_string] SerializeToString()
|
||||
c_string ToString()
|
||||
|
||||
@staticmethod
|
||||
CResult[CTicket] Deserialize(const c_string& serialized)
|
||||
|
||||
cdef cppclass CCriteria" arrow::flight::Criteria":
|
||||
CCriteria()
|
||||
c_string expression
|
||||
bint operator==(CCriteria)
|
||||
CResult[c_string] SerializeToString()
|
||||
|
||||
@staticmethod
|
||||
CResult[CCriteria] Deserialize(const c_string& serialized)
|
||||
|
||||
cdef cppclass CLocation" arrow::flight::Location":
|
||||
CLocation()
|
||||
c_string ToString()
|
||||
c_bool Equals(const CLocation& other)
|
||||
|
||||
@staticmethod
|
||||
CResult[CLocation] Parse(const c_string& uri_string)
|
||||
|
||||
@staticmethod
|
||||
CResult[CLocation] ForGrpcTcp(const c_string& host, int port)
|
||||
|
||||
@staticmethod
|
||||
CResult[CLocation] ForGrpcTls(const c_string& host, int port)
|
||||
|
||||
@staticmethod
|
||||
CResult[CLocation] ForGrpcUnix(const c_string& path)
|
||||
|
||||
cdef cppclass CFlightEndpoint" arrow::flight::FlightEndpoint":
|
||||
CFlightEndpoint()
|
||||
|
||||
CTicket ticket
|
||||
vector[CLocation] locations
|
||||
optional[CTimePoint] expiration_time
|
||||
c_string app_metadata
|
||||
|
||||
bint operator==(CFlightEndpoint)
|
||||
CResult[c_string] SerializeToString()
|
||||
c_string ToString()
|
||||
|
||||
@staticmethod
|
||||
CResult[CFlightEndpoint] Deserialize(const c_string& serialized)
|
||||
|
||||
cdef cppclass CFlightInfo" arrow::flight::FlightInfo":
|
||||
CFlightInfo(CFlightInfo info)
|
||||
int64_t total_records()
|
||||
int64_t total_bytes()
|
||||
c_bool ordered()
|
||||
c_string app_metadata()
|
||||
CResult[shared_ptr[CSchema]] GetSchema(CDictionaryMemo* memo)
|
||||
CFlightDescriptor& descriptor()
|
||||
const vector[CFlightEndpoint]& endpoints()
|
||||
CResult[c_string] SerializeToString()
|
||||
c_string ToString()
|
||||
bint operator==(CFlightInfo)
|
||||
|
||||
@staticmethod
|
||||
CResult[unique_ptr[CFlightInfo]] Deserialize(
|
||||
const c_string& serialized)
|
||||
|
||||
cdef cppclass CSchemaResult" arrow::flight::SchemaResult":
|
||||
CSchemaResult()
|
||||
CSchemaResult(CSchemaResult result)
|
||||
CResult[shared_ptr[CSchema]] GetSchema(CDictionaryMemo* memo)
|
||||
bint operator==(CSchemaResult)
|
||||
CResult[c_string] SerializeToString()
|
||||
c_string ToString()
|
||||
|
||||
@staticmethod
|
||||
CResult[CSchemaResult] Deserialize(const c_string& serialized)
|
||||
|
||||
cdef cppclass CFlightListing" arrow::flight::FlightListing":
|
||||
CResult[unique_ptr[CFlightInfo]] Next()
|
||||
|
||||
cdef cppclass CSimpleFlightListing" arrow::flight::SimpleFlightListing":
|
||||
# This doesn't work with Cython >= 3
|
||||
# CSimpleFlightListing(vector[CFlightInfo]&& info)
|
||||
CSimpleFlightListing(const vector[CFlightInfo]& info)
|
||||
|
||||
cdef cppclass CFlightPayload" arrow::flight::FlightPayload":
|
||||
shared_ptr[CBuffer] descriptor
|
||||
shared_ptr[CBuffer] app_metadata
|
||||
CIpcPayload ipc_message
|
||||
|
||||
cdef cppclass CFlightDataStream" arrow::flight::FlightDataStream":
|
||||
shared_ptr[CSchema] schema()
|
||||
CResult[CFlightPayload] Next()
|
||||
|
||||
cdef cppclass CFlightStreamChunk" arrow::flight::FlightStreamChunk":
|
||||
CFlightStreamChunk()
|
||||
shared_ptr[CRecordBatch] data
|
||||
shared_ptr[CBuffer] app_metadata
|
||||
|
||||
cdef cppclass CMetadataRecordBatchReader \
|
||||
" arrow::flight::MetadataRecordBatchReader":
|
||||
CResult[shared_ptr[CSchema]] GetSchema()
|
||||
CResult[CFlightStreamChunk] Next()
|
||||
CResult[shared_ptr[CTable]] ToTable()
|
||||
CIpcReadStats stats() const
|
||||
|
||||
CResult[shared_ptr[CRecordBatchReader]] MakeRecordBatchReader\
|
||||
" arrow::flight::MakeRecordBatchReader"(
|
||||
shared_ptr[CMetadataRecordBatchReader])
|
||||
|
||||
cdef cppclass CMetadataRecordBatchWriter \
|
||||
" arrow::flight::MetadataRecordBatchWriter"(CRecordBatchWriter):
|
||||
CStatus Begin(shared_ptr[CSchema] schema,
|
||||
const CIpcWriteOptions& options)
|
||||
CStatus WriteMetadata(shared_ptr[CBuffer] app_metadata)
|
||||
CStatus WriteWithMetadata(const CRecordBatch& batch,
|
||||
shared_ptr[CBuffer] app_metadata)
|
||||
|
||||
cdef cppclass CFlightStreamReader \
|
||||
" arrow::flight::FlightStreamReader"(CMetadataRecordBatchReader):
|
||||
void Cancel()
|
||||
CResult[shared_ptr[CTable]] ToTableWithStopToken" ToTable"\
|
||||
(const CStopToken& stop_token)
|
||||
|
||||
cdef cppclass CFlightMessageReader \
|
||||
" arrow::flight::FlightMessageReader"(CMetadataRecordBatchReader):
|
||||
CFlightDescriptor& descriptor()
|
||||
|
||||
cdef cppclass CFlightMessageWriter \
|
||||
" arrow::flight::FlightMessageWriter"(CMetadataRecordBatchWriter):
|
||||
pass
|
||||
|
||||
cdef cppclass CFlightStreamWriter \
|
||||
" arrow::flight::FlightStreamWriter"(CMetadataRecordBatchWriter):
|
||||
CStatus DoneWriting()
|
||||
|
||||
cdef cppclass CRecordBatchStream \
|
||||
" arrow::flight::RecordBatchStream"(CFlightDataStream):
|
||||
CRecordBatchStream(shared_ptr[CRecordBatchReader]& reader,
|
||||
const CIpcWriteOptions& options)
|
||||
|
||||
cdef cppclass CFlightMetadataReader" arrow::flight::FlightMetadataReader":
|
||||
CStatus ReadMetadata(shared_ptr[CBuffer]* out)
|
||||
|
||||
cdef cppclass CFlightMetadataWriter" arrow::flight::FlightMetadataWriter":
|
||||
CStatus WriteMetadata(const CBuffer& message)
|
||||
|
||||
cdef cppclass CServerAuthReader" arrow::flight::ServerAuthReader":
|
||||
CStatus Read(c_string* token)
|
||||
|
||||
cdef cppclass CServerAuthSender" arrow::flight::ServerAuthSender":
|
||||
CStatus Write(c_string& token)
|
||||
|
||||
cdef cppclass CClientAuthReader" arrow::flight::ClientAuthReader":
|
||||
CStatus Read(c_string* token)
|
||||
|
||||
cdef cppclass CClientAuthSender" arrow::flight::ClientAuthSender":
|
||||
CStatus Write(c_string& token)
|
||||
|
||||
cdef cppclass CServerAuthHandler" arrow::flight::ServerAuthHandler":
|
||||
pass
|
||||
|
||||
cdef cppclass CClientAuthHandler" arrow::flight::ClientAuthHandler":
|
||||
pass
|
||||
|
||||
cdef cppclass CServerCallContext" arrow::flight::ServerCallContext":
|
||||
c_string& peer_identity()
|
||||
c_string& peer()
|
||||
c_bool is_cancelled()
|
||||
void AddHeader(const c_string& key, const c_string& value)
|
||||
void AddTrailer(const c_string& key, const c_string& value)
|
||||
CServerMiddleware* GetMiddleware(const c_string& key)
|
||||
|
||||
cdef cppclass CTimeoutDuration" arrow::flight::TimeoutDuration":
|
||||
CTimeoutDuration(double)
|
||||
double count()
|
||||
|
||||
cdef cppclass CFlightCallOptions" arrow::flight::FlightCallOptions":
|
||||
CFlightCallOptions()
|
||||
CTimeoutDuration timeout
|
||||
CIpcWriteOptions write_options
|
||||
CIpcReadOptions read_options
|
||||
vector[pair[c_string, c_string]] headers
|
||||
CStopToken stop_token
|
||||
|
||||
cdef cppclass CCertKeyPair" arrow::flight::CertKeyPair":
|
||||
CCertKeyPair()
|
||||
c_string pem_cert
|
||||
c_string pem_key
|
||||
|
||||
cdef cppclass CFlightMethod" arrow::flight::FlightMethod":
|
||||
bint operator==(CFlightMethod)
|
||||
|
||||
CFlightMethod CFlightMethodInvalid\
|
||||
" arrow::flight::FlightMethod::Invalid"
|
||||
CFlightMethod CFlightMethodHandshake\
|
||||
" arrow::flight::FlightMethod::Handshake"
|
||||
CFlightMethod CFlightMethodListFlights\
|
||||
" arrow::flight::FlightMethod::ListFlights"
|
||||
CFlightMethod CFlightMethodGetFlightInfo\
|
||||
" arrow::flight::FlightMethod::GetFlightInfo"
|
||||
CFlightMethod CFlightMethodGetSchema\
|
||||
" arrow::flight::FlightMethod::GetSchema"
|
||||
CFlightMethod CFlightMethodDoGet\
|
||||
" arrow::flight::FlightMethod::DoGet"
|
||||
CFlightMethod CFlightMethodDoPut\
|
||||
" arrow::flight::FlightMethod::DoPut"
|
||||
CFlightMethod CFlightMethodDoAction\
|
||||
" arrow::flight::FlightMethod::DoAction"
|
||||
CFlightMethod CFlightMethodListActions\
|
||||
" arrow::flight::FlightMethod::ListActions"
|
||||
CFlightMethod CFlightMethodDoExchange\
|
||||
" arrow::flight::FlightMethod::DoExchange"
|
||||
|
||||
cdef cppclass CCallInfo" arrow::flight::CallInfo":
|
||||
CFlightMethod method
|
||||
|
||||
ctypedef multimap[cpp_string_view, cpp_string_view] CCallHeaders\
|
||||
" arrow::flight::CallHeaders"
|
||||
|
||||
cdef cppclass CAddCallHeaders" arrow::flight::AddCallHeaders":
|
||||
void AddHeader(const c_string& key, const c_string& value)
|
||||
|
||||
cdef cppclass CServerMiddleware" arrow::flight::ServerMiddleware":
|
||||
c_string name()
|
||||
|
||||
cdef cppclass CServerMiddlewareFactory\
|
||||
" arrow::flight::ServerMiddlewareFactory":
|
||||
pass
|
||||
|
||||
cdef cppclass CClientMiddleware" arrow::flight::ClientMiddleware":
|
||||
pass
|
||||
|
||||
cdef cppclass CClientMiddlewareFactory\
|
||||
" arrow::flight::ClientMiddlewareFactory":
|
||||
pass
|
||||
|
||||
cpdef cppclass CTracingServerMiddlewareTraceKey\
|
||||
" arrow::flight::TracingServerMiddleware::TraceKey":
|
||||
CTracingServerMiddlewareTraceKey()
|
||||
c_string key
|
||||
c_string value
|
||||
|
||||
cdef cppclass CTracingServerMiddleware\
|
||||
" arrow::flight::TracingServerMiddleware"(CServerMiddleware):
|
||||
vector[CTracingServerMiddlewareTraceKey] GetTraceContext()
|
||||
|
||||
cdef shared_ptr[CServerMiddlewareFactory] \
|
||||
MakeTracingServerMiddlewareFactory\
|
||||
" arrow::flight::MakeTracingServerMiddlewareFactory"()
|
||||
|
||||
cdef cppclass CFlightServerOptions" arrow::flight::FlightServerOptions":
|
||||
CFlightServerOptions(const CLocation& location)
|
||||
CLocation location
|
||||
unique_ptr[CServerAuthHandler] auth_handler
|
||||
vector[CCertKeyPair] tls_certificates
|
||||
c_bool verify_client
|
||||
c_string root_certificates
|
||||
vector[pair[c_string, shared_ptr[CServerMiddlewareFactory]]] middleware
|
||||
|
||||
cdef cppclass CFlightClientOptions" arrow::flight::FlightClientOptions":
|
||||
c_string tls_root_certs
|
||||
c_string cert_chain
|
||||
c_string private_key
|
||||
c_string override_hostname
|
||||
vector[shared_ptr[CClientMiddlewareFactory]] middleware
|
||||
int64_t write_size_limit_bytes
|
||||
vector[pair[c_string, CIntStringVariant]] generic_options
|
||||
c_bool disable_server_verification
|
||||
|
||||
@staticmethod
|
||||
CFlightClientOptions Defaults()
|
||||
|
||||
cdef cppclass CDoPutResult" arrow::flight::FlightClient::DoPutResult":
|
||||
unique_ptr[CFlightStreamWriter] writer
|
||||
unique_ptr[CFlightMetadataReader] reader
|
||||
|
||||
cdef cppclass CDoExchangeResult" arrow::flight::FlightClient::DoExchangeResult":
|
||||
unique_ptr[CFlightStreamWriter] writer
|
||||
unique_ptr[CFlightStreamReader] reader
|
||||
|
||||
cdef cppclass CFlightClient" arrow::flight::FlightClient":
|
||||
@staticmethod
|
||||
CResult[unique_ptr[CFlightClient]] Connect(const CLocation& location,
|
||||
const CFlightClientOptions& options)
|
||||
|
||||
c_bool supports_async()
|
||||
CStatus CheckAsyncSupport()
|
||||
|
||||
CStatus Authenticate(CFlightCallOptions& options,
|
||||
unique_ptr[CClientAuthHandler] auth_handler)
|
||||
|
||||
CResult[pair[c_string, c_string]] AuthenticateBasicToken(
|
||||
CFlightCallOptions& options,
|
||||
const c_string& username,
|
||||
const c_string& password)
|
||||
|
||||
CResult[unique_ptr[CResultStream]] DoAction(CFlightCallOptions& options, CAction& action)
|
||||
CResult[vector[CActionType]] ListActions(CFlightCallOptions& options)
|
||||
|
||||
CResult[unique_ptr[CFlightListing]] ListFlights(CFlightCallOptions& options, CCriteria criteria)
|
||||
CResult[unique_ptr[CFlightInfo]] GetFlightInfo(CFlightCallOptions& options,
|
||||
CFlightDescriptor& descriptor)
|
||||
CFuture[CFlightInfo] GetFlightInfoAsync(CFlightCallOptions& options,
|
||||
CFlightDescriptor& descriptor)
|
||||
CResult[unique_ptr[CSchemaResult]] GetSchema(CFlightCallOptions& options,
|
||||
CFlightDescriptor& descriptor)
|
||||
CResult[unique_ptr[CFlightStreamReader]] DoGet(CFlightCallOptions& options, CTicket& ticket)
|
||||
CResult[CDoPutResult] DoPut(CFlightCallOptions& options,
|
||||
CFlightDescriptor& descriptor,
|
||||
shared_ptr[CSchema]& schema)
|
||||
CResult[CDoExchangeResult] DoExchange(CFlightCallOptions& options,
|
||||
CFlightDescriptor& descriptor)
|
||||
CStatus Close()
|
||||
|
||||
cdef cppclass CFlightStatusCode" arrow::flight::FlightStatusCode":
|
||||
bint operator==(CFlightStatusCode)
|
||||
|
||||
CFlightStatusCode CFlightStatusInternal \
|
||||
" arrow::flight::FlightStatusCode::Internal"
|
||||
CFlightStatusCode CFlightStatusTimedOut \
|
||||
" arrow::flight::FlightStatusCode::TimedOut"
|
||||
CFlightStatusCode CFlightStatusCancelled \
|
||||
" arrow::flight::FlightStatusCode::Cancelled"
|
||||
CFlightStatusCode CFlightStatusUnauthenticated \
|
||||
" arrow::flight::FlightStatusCode::Unauthenticated"
|
||||
CFlightStatusCode CFlightStatusUnauthorized \
|
||||
" arrow::flight::FlightStatusCode::Unauthorized"
|
||||
CFlightStatusCode CFlightStatusUnavailable \
|
||||
" arrow::flight::FlightStatusCode::Unavailable"
|
||||
CFlightStatusCode CFlightStatusFailed \
|
||||
" arrow::flight::FlightStatusCode::Failed"
|
||||
|
||||
cdef cppclass FlightStatusDetail" arrow::flight::FlightStatusDetail":
|
||||
CFlightStatusCode code()
|
||||
c_string extra_info()
|
||||
|
||||
@staticmethod
|
||||
shared_ptr[FlightStatusDetail] UnwrapStatus(const CStatus& status)
|
||||
|
||||
cdef cppclass FlightWriteSizeStatusDetail\
|
||||
" arrow::flight::FlightWriteSizeStatusDetail":
|
||||
int64_t limit()
|
||||
int64_t actual()
|
||||
|
||||
@staticmethod
|
||||
shared_ptr[FlightWriteSizeStatusDetail] UnwrapStatus(
|
||||
const CStatus& status)
|
||||
|
||||
cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \
|
||||
(CFlightStatusCode code, const c_string& message)
|
||||
|
||||
cdef CStatus MakeFlightError" arrow::flight::MakeFlightError" \
|
||||
(CFlightStatusCode code,
|
||||
const c_string& message,
|
||||
const c_string& extra_info)
|
||||
|
||||
# Callbacks for implementing Flight servers
|
||||
# Use typedef to emulate syntax for std::function<void(..)>
|
||||
ctypedef CStatus cb_list_flights(object, const CServerCallContext&,
|
||||
const CCriteria*,
|
||||
unique_ptr[CFlightListing]*)
|
||||
ctypedef CStatus cb_get_flight_info(object, const CServerCallContext&,
|
||||
const CFlightDescriptor&,
|
||||
unique_ptr[CFlightInfo]*)
|
||||
ctypedef CStatus cb_get_schema(object, const CServerCallContext&,
|
||||
const CFlightDescriptor&,
|
||||
unique_ptr[CSchemaResult]*)
|
||||
ctypedef CStatus cb_do_put(object, const CServerCallContext&,
|
||||
unique_ptr[CFlightMessageReader],
|
||||
unique_ptr[CFlightMetadataWriter])
|
||||
ctypedef CStatus cb_do_get(object, const CServerCallContext&,
|
||||
const CTicket&,
|
||||
unique_ptr[CFlightDataStream]*)
|
||||
ctypedef CStatus cb_do_exchange(object, const CServerCallContext&,
|
||||
unique_ptr[CFlightMessageReader],
|
||||
unique_ptr[CFlightMessageWriter])
|
||||
ctypedef CStatus cb_do_action(object, const CServerCallContext&,
|
||||
const CAction&,
|
||||
unique_ptr[CResultStream]*)
|
||||
ctypedef CStatus cb_list_actions(object, const CServerCallContext&,
|
||||
vector[CActionType]*)
|
||||
ctypedef CStatus cb_result_next(object, unique_ptr[CFlightResult]*)
|
||||
ctypedef CStatus cb_data_stream_next(object, CFlightPayload*)
|
||||
ctypedef CStatus cb_server_authenticate(object, CServerAuthSender*,
|
||||
CServerAuthReader*)
|
||||
ctypedef CStatus cb_is_valid(object, const c_string&, c_string*)
|
||||
ctypedef CStatus cb_client_authenticate(object, CClientAuthSender*,
|
||||
CClientAuthReader*)
|
||||
ctypedef CStatus cb_get_token(object, c_string*)
|
||||
|
||||
ctypedef CStatus cb_middleware_sending_headers(object, CAddCallHeaders*)
|
||||
ctypedef CStatus cb_middleware_call_completed(object, const CStatus&)
|
||||
ctypedef CStatus cb_client_middleware_received_headers(
|
||||
object, const CCallHeaders&)
|
||||
ctypedef CStatus cb_server_middleware_start_call(
|
||||
object,
|
||||
const CCallInfo&,
|
||||
const CCallHeaders&,
|
||||
shared_ptr[CServerMiddleware]*)
|
||||
ctypedef CStatus cb_client_middleware_start_call(
|
||||
object,
|
||||
const CCallInfo&,
|
||||
unique_ptr[CClientMiddleware]*)
|
||||
|
||||
cdef extern from "arrow/python/flight.h" namespace "arrow::py::flight" nogil:
|
||||
cdef char* CPyServerMiddlewareName\
|
||||
" arrow::py::flight::kPyServerMiddlewareName"
|
||||
|
||||
cdef cppclass PyFlightServerVtable:
|
||||
PyFlightServerVtable()
|
||||
function[cb_list_flights] list_flights
|
||||
function[cb_get_flight_info] get_flight_info
|
||||
function[cb_get_schema] get_schema
|
||||
function[cb_do_put] do_put
|
||||
function[cb_do_get] do_get
|
||||
function[cb_do_exchange] do_exchange
|
||||
function[cb_do_action] do_action
|
||||
function[cb_list_actions] list_actions
|
||||
|
||||
cdef cppclass PyServerAuthHandlerVtable:
|
||||
PyServerAuthHandlerVtable()
|
||||
function[cb_server_authenticate] authenticate
|
||||
function[cb_is_valid] is_valid
|
||||
|
||||
cdef cppclass PyClientAuthHandlerVtable:
|
||||
PyClientAuthHandlerVtable()
|
||||
function[cb_client_authenticate] authenticate
|
||||
function[cb_get_token] get_token
|
||||
|
||||
cdef cppclass PyFlightServer:
|
||||
PyFlightServer(object server, PyFlightServerVtable vtable)
|
||||
|
||||
CStatus Init(CFlightServerOptions& options)
|
||||
int port()
|
||||
CStatus ServeWithSignals() except *
|
||||
CStatus Shutdown()
|
||||
CStatus Wait()
|
||||
|
||||
cdef cppclass PyServerAuthHandler\
|
||||
" arrow::py::flight::PyServerAuthHandler"(CServerAuthHandler):
|
||||
PyServerAuthHandler(object handler, PyServerAuthHandlerVtable vtable)
|
||||
|
||||
cdef cppclass PyClientAuthHandler\
|
||||
" arrow::py::flight::PyClientAuthHandler"(CClientAuthHandler):
|
||||
PyClientAuthHandler(object handler, PyClientAuthHandlerVtable vtable)
|
||||
|
||||
cdef cppclass CPyFlightResultStream\
|
||||
" arrow::py::flight::PyFlightResultStream"(CResultStream):
|
||||
CPyFlightResultStream(object generator,
|
||||
function[cb_result_next] callback)
|
||||
|
||||
cdef cppclass CPyFlightDataStream\
|
||||
" arrow::py::flight::PyFlightDataStream"(CFlightDataStream):
|
||||
CPyFlightDataStream(object data_source,
|
||||
unique_ptr[CFlightDataStream] stream)
|
||||
|
||||
cdef cppclass CPyGeneratorFlightDataStream\
|
||||
" arrow::py::flight::PyGeneratorFlightDataStream"\
|
||||
(CFlightDataStream):
|
||||
CPyGeneratorFlightDataStream(object generator,
|
||||
shared_ptr[CSchema] schema,
|
||||
function[cb_data_stream_next] callback,
|
||||
const CIpcWriteOptions& options)
|
||||
|
||||
cdef cppclass PyServerMiddlewareVtable\
|
||||
" arrow::py::flight::PyServerMiddleware::Vtable":
|
||||
PyServerMiddlewareVtable()
|
||||
function[cb_middleware_sending_headers] sending_headers
|
||||
function[cb_middleware_call_completed] call_completed
|
||||
|
||||
cdef cppclass PyClientMiddlewareVtable\
|
||||
" arrow::py::flight::PyClientMiddleware::Vtable":
|
||||
PyClientMiddlewareVtable()
|
||||
function[cb_middleware_sending_headers] sending_headers
|
||||
function[cb_client_middleware_received_headers] received_headers
|
||||
function[cb_middleware_call_completed] call_completed
|
||||
|
||||
cdef cppclass CPyServerMiddleware\
|
||||
" arrow::py::flight::PyServerMiddleware"(CServerMiddleware):
|
||||
CPyServerMiddleware(object middleware, PyServerMiddlewareVtable vtable)
|
||||
void* py_object()
|
||||
|
||||
cdef cppclass CPyServerMiddlewareFactory\
|
||||
" arrow::py::flight::PyServerMiddlewareFactory"\
|
||||
(CServerMiddlewareFactory):
|
||||
CPyServerMiddlewareFactory(
|
||||
object factory,
|
||||
function[cb_server_middleware_start_call] start_call)
|
||||
|
||||
cdef cppclass CPyClientMiddleware\
|
||||
" arrow::py::flight::PyClientMiddleware"(CClientMiddleware):
|
||||
CPyClientMiddleware(object middleware, PyClientMiddlewareVtable vtable)
|
||||
|
||||
cdef cppclass CPyClientMiddlewareFactory\
|
||||
" arrow::py::flight::PyClientMiddlewareFactory"\
|
||||
(CClientMiddlewareFactory):
|
||||
CPyClientMiddlewareFactory(
|
||||
object factory,
|
||||
function[cb_client_middleware_start_call] start_call)
|
||||
|
||||
cdef CStatus CreateFlightInfo" arrow::py::flight::CreateFlightInfo"(
|
||||
shared_ptr[CSchema] schema,
|
||||
CFlightDescriptor& descriptor,
|
||||
vector[CFlightEndpoint] endpoints,
|
||||
int64_t total_records,
|
||||
int64_t total_bytes,
|
||||
c_bool ordered,
|
||||
const c_string& app_metadata,
|
||||
unique_ptr[CFlightInfo]* out)
|
||||
|
||||
cdef CStatus CreateSchemaResult" arrow::py::flight::CreateSchemaResult"(
|
||||
shared_ptr[CSchema] schema,
|
||||
unique_ptr[CSchemaResult]* out)
|
||||
|
||||
|
||||
cdef extern from "<variant>" namespace "std" nogil:
|
||||
cdef cppclass CIntStringVariant" std::variant<int, std::string>":
|
||||
CIntStringVariant()
|
||||
CIntStringVariant(int)
|
||||
CIntStringVariant(c_string)
|
||||
@@ -0,0 +1,364 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from pyarrow.includes.common cimport *
|
||||
from pyarrow.includes.libarrow cimport *
|
||||
from pyarrow.includes.libarrow_python cimport CTimePoint
|
||||
|
||||
cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
|
||||
|
||||
ctypedef enum CFileType "arrow::fs::FileType":
|
||||
CFileType_NotFound "arrow::fs::FileType::NotFound"
|
||||
CFileType_Unknown "arrow::fs::FileType::Unknown"
|
||||
CFileType_File "arrow::fs::FileType::File"
|
||||
CFileType_Directory "arrow::fs::FileType::Directory"
|
||||
|
||||
cdef cppclass CFileInfo "arrow::fs::FileInfo":
|
||||
CFileInfo()
|
||||
CFileInfo(CFileInfo)
|
||||
CFileInfo& operator=(CFileInfo)
|
||||
CFileInfo(const CFileInfo&)
|
||||
CFileInfo& operator=(const CFileInfo&)
|
||||
|
||||
CFileType type()
|
||||
void set_type(CFileType type)
|
||||
c_string path()
|
||||
void set_path(const c_string& path)
|
||||
c_string base_name()
|
||||
int64_t size()
|
||||
void set_size(int64_t size)
|
||||
c_string extension()
|
||||
CTimePoint mtime()
|
||||
void set_mtime(CTimePoint mtime)
|
||||
|
||||
cdef cppclass CFileSelector "arrow::fs::FileSelector":
|
||||
CFileSelector()
|
||||
c_string base_dir
|
||||
c_bool allow_not_found
|
||||
c_bool recursive
|
||||
|
||||
cdef cppclass CFileLocator "arrow::fs::FileLocator":
|
||||
shared_ptr[CFileSystem] filesystem
|
||||
c_string path
|
||||
|
||||
cdef cppclass CFileSystem "arrow::fs::FileSystem":
|
||||
shared_ptr[CFileSystem] shared_from_this()
|
||||
c_string type_name() const
|
||||
CResult[c_string] NormalizePath(c_string path)
|
||||
CResult[c_string] MakeUri(c_string path)
|
||||
CResult[CFileInfo] GetFileInfo(const c_string& path)
|
||||
CResult[vector[CFileInfo]] GetFileInfo(
|
||||
const vector[c_string]& paths)
|
||||
CResult[vector[CFileInfo]] GetFileInfo(const CFileSelector& select)
|
||||
CStatus CreateDir(const c_string& path, c_bool recursive)
|
||||
CStatus DeleteDir(const c_string& path)
|
||||
CStatus DeleteDirContents(const c_string& path, c_bool missing_dir_ok)
|
||||
CStatus DeleteRootDirContents()
|
||||
CStatus DeleteFile(const c_string& path)
|
||||
CStatus DeleteFiles(const vector[c_string]& paths)
|
||||
CStatus Move(const c_string& src, const c_string& dest)
|
||||
CStatus CopyFile(const c_string& src, const c_string& dest)
|
||||
CResult[shared_ptr[CInputStream]] OpenInputStream(
|
||||
const c_string& path)
|
||||
CResult[shared_ptr[CRandomAccessFile]] OpenInputFile(
|
||||
const c_string& path)
|
||||
CResult[shared_ptr[COutputStream]] OpenOutputStream(
|
||||
const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
|
||||
CResult[shared_ptr[COutputStream]] OpenAppendStream(
|
||||
const c_string& path, const shared_ptr[const CKeyValueMetadata]&)
|
||||
c_bool Equals(const CFileSystem& other)
|
||||
c_bool Equals(shared_ptr[CFileSystem] other)
|
||||
|
||||
CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \
|
||||
"arrow::fs::FileSystemFromUri"(const c_string& uri)
|
||||
CResult[shared_ptr[CFileSystem]] CFileSystemFromUri \
|
||||
"arrow::fs::FileSystemFromUri"(const c_string& uri, c_string* out_path)
|
||||
CResult[shared_ptr[CFileSystem]] CFileSystemFromUriOrPath \
|
||||
"arrow::fs::FileSystemFromUriOrPath"(const c_string& uri,
|
||||
c_string* out_path)
|
||||
|
||||
cdef cppclass CFileSystemGlobalOptions \
|
||||
"arrow::fs::FileSystemGlobalOptions":
|
||||
c_string tls_ca_file_path
|
||||
c_string tls_ca_dir_path
|
||||
|
||||
CStatus CFileSystemsInitialize "arrow::fs::Initialize" \
|
||||
(const CFileSystemGlobalOptions& options)
|
||||
|
||||
cdef cppclass CSubTreeFileSystem \
|
||||
"arrow::fs::SubTreeFileSystem"(CFileSystem):
|
||||
CSubTreeFileSystem(const c_string& base_path,
|
||||
shared_ptr[CFileSystem] base_fs)
|
||||
c_string base_path()
|
||||
shared_ptr[CFileSystem] base_fs()
|
||||
|
||||
ctypedef enum CS3LogLevel "arrow::fs::S3LogLevel":
|
||||
CS3LogLevel_Off "arrow::fs::S3LogLevel::Off"
|
||||
CS3LogLevel_Fatal "arrow::fs::S3LogLevel::Fatal"
|
||||
CS3LogLevel_Error "arrow::fs::S3LogLevel::Error"
|
||||
CS3LogLevel_Warn "arrow::fs::S3LogLevel::Warn"
|
||||
CS3LogLevel_Info "arrow::fs::S3LogLevel::Info"
|
||||
CS3LogLevel_Debug "arrow::fs::S3LogLevel::Debug"
|
||||
CS3LogLevel_Trace "arrow::fs::S3LogLevel::Trace"
|
||||
|
||||
cdef struct CS3GlobalOptions "arrow::fs::S3GlobalOptions":
|
||||
CS3LogLevel log_level
|
||||
int num_event_loop_threads
|
||||
|
||||
cdef cppclass CS3ProxyOptions "arrow::fs::S3ProxyOptions":
|
||||
c_string scheme
|
||||
c_string host
|
||||
int port
|
||||
c_string username
|
||||
c_string password
|
||||
c_bool Equals(const CS3ProxyOptions& other)
|
||||
|
||||
@staticmethod
|
||||
CResult[CS3ProxyOptions] FromUriString "FromUri"(
|
||||
const c_string& uri_string)
|
||||
|
||||
ctypedef enum CS3CredentialsKind "arrow::fs::S3CredentialsKind":
|
||||
CS3CredentialsKind_Anonymous "arrow::fs::S3CredentialsKind::Anonymous"
|
||||
CS3CredentialsKind_Default "arrow::fs::S3CredentialsKind::Default"
|
||||
CS3CredentialsKind_Explicit "arrow::fs::S3CredentialsKind::Explicit"
|
||||
CS3CredentialsKind_Role "arrow::fs::S3CredentialsKind::Role"
|
||||
CS3CredentialsKind_WebIdentity \
|
||||
"arrow::fs::S3CredentialsKind::WebIdentity"
|
||||
|
||||
cdef cppclass CS3RetryStrategy "arrow::fs::S3RetryStrategy":
|
||||
@staticmethod
|
||||
shared_ptr[CS3RetryStrategy] GetAwsDefaultRetryStrategy(int64_t max_attempts)
|
||||
|
||||
@staticmethod
|
||||
shared_ptr[CS3RetryStrategy] GetAwsStandardRetryStrategy(int64_t max_attempts)
|
||||
|
||||
cdef cppclass CS3Options "arrow::fs::S3Options":
|
||||
c_string region
|
||||
double connect_timeout
|
||||
double request_timeout
|
||||
c_string endpoint_override
|
||||
c_string scheme
|
||||
c_bool background_writes
|
||||
c_bool allow_delayed_open
|
||||
c_bool allow_bucket_creation
|
||||
c_bool allow_bucket_deletion
|
||||
c_bool check_directory_existence_before_creation
|
||||
c_bool force_virtual_addressing
|
||||
c_string tls_ca_file_path
|
||||
shared_ptr[const CKeyValueMetadata] default_metadata
|
||||
c_string role_arn
|
||||
c_string session_name
|
||||
c_string external_id
|
||||
int load_frequency
|
||||
CS3ProxyOptions proxy_options
|
||||
CS3CredentialsKind credentials_kind
|
||||
shared_ptr[CS3RetryStrategy] retry_strategy
|
||||
void ConfigureDefaultCredentials()
|
||||
void ConfigureAccessKey(const c_string& access_key,
|
||||
const c_string& secret_key,
|
||||
const c_string& session_token)
|
||||
c_string GetAccessKey()
|
||||
c_string GetSecretKey()
|
||||
c_string GetSessionToken()
|
||||
c_bool Equals(const CS3Options& other)
|
||||
|
||||
@staticmethod
|
||||
CS3Options Defaults()
|
||||
|
||||
@staticmethod
|
||||
CS3Options Anonymous()
|
||||
|
||||
@staticmethod
|
||||
CS3Options FromAccessKey(const c_string& access_key,
|
||||
const c_string& secret_key,
|
||||
const c_string& session_token)
|
||||
|
||||
@staticmethod
|
||||
CS3Options FromAssumeRole(const c_string& role_arn,
|
||||
const c_string& session_name,
|
||||
const c_string& external_id,
|
||||
const int load_frequency)
|
||||
|
||||
cdef cppclass CS3FileSystem "arrow::fs::S3FileSystem"(CFileSystem):
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CS3FileSystem]] Make(const CS3Options& options)
|
||||
CS3Options options()
|
||||
c_string region()
|
||||
|
||||
cdef CStatus CInitializeS3 "arrow::fs::InitializeS3"(
|
||||
const CS3GlobalOptions& options)
|
||||
cdef CStatus CEnsureS3Initialized "arrow::fs::EnsureS3Initialized"()
|
||||
cdef CStatus CFinalizeS3 "arrow::fs::FinalizeS3"()
|
||||
cdef CStatus CEnsureS3Finalized "arrow::fs::EnsureS3Finalized"()
|
||||
|
||||
cdef CResult[c_string] ResolveS3BucketRegion(const c_string& bucket)
|
||||
|
||||
cdef cppclass CGcsCredentials "arrow::fs::GcsCredentials":
|
||||
c_bool anonymous()
|
||||
CTimePoint expiration()
|
||||
c_string access_token()
|
||||
c_string target_service_account()
|
||||
|
||||
cdef cppclass CGcsOptions "arrow::fs::GcsOptions":
|
||||
CGcsCredentials credentials
|
||||
c_string endpoint_override
|
||||
c_string scheme
|
||||
c_string default_bucket_location
|
||||
optional[c_string] project_id
|
||||
optional[double] retry_limit_seconds
|
||||
shared_ptr[const CKeyValueMetadata] default_metadata
|
||||
c_bool Equals(const CS3Options& other)
|
||||
|
||||
@staticmethod
|
||||
CGcsOptions Defaults()
|
||||
|
||||
@staticmethod
|
||||
CGcsOptions Anonymous()
|
||||
|
||||
@staticmethod
|
||||
CGcsOptions FromAccessToken(const c_string& access_token,
|
||||
CTimePoint expiration)
|
||||
|
||||
@staticmethod
|
||||
CGcsOptions FromImpersonatedServiceAccount(const CGcsCredentials& base_credentials,
|
||||
c_string& target_service_account)
|
||||
|
||||
cdef cppclass CGcsFileSystem "arrow::fs::GcsFileSystem":
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CGcsFileSystem]] Make(const CGcsOptions& options)
|
||||
CGcsOptions options()
|
||||
|
||||
cdef cppclass CAzureOptions "arrow::fs::AzureOptions":
|
||||
c_string account_name
|
||||
c_string blob_storage_authority
|
||||
c_string dfs_storage_authority
|
||||
c_string blob_storage_scheme
|
||||
c_string dfs_storage_scheme
|
||||
|
||||
c_bool Equals(const CAzureOptions& other)
|
||||
CStatus ConfigureDefaultCredential()
|
||||
CStatus ConfigureAccountKeyCredential(c_string account_key)
|
||||
CStatus ConfigureSASCredential(c_string sas_token)
|
||||
CStatus ConfigureManagedIdentityCredential(c_string client_id)
|
||||
CStatus ConfigureClientSecretCredential(c_string tenant_id,
|
||||
c_string client_id,
|
||||
c_string client_secret)
|
||||
|
||||
cdef cppclass CAzureFileSystem "arrow::fs::AzureFileSystem":
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CAzureFileSystem]] Make(const CAzureOptions& options)
|
||||
CAzureOptions options()
|
||||
|
||||
cdef cppclass CHdfsOptions "arrow::fs::HdfsOptions":
|
||||
HdfsConnectionConfig connection_config
|
||||
int32_t buffer_size
|
||||
int16_t replication
|
||||
int64_t default_block_size
|
||||
|
||||
@staticmethod
|
||||
CResult[CHdfsOptions] FromUriString "FromUri"(
|
||||
const c_string& uri_string)
|
||||
void ConfigureEndPoint(c_string host, int port)
|
||||
void ConfigureDriver(c_bool use_hdfs3)
|
||||
void ConfigureReplication(int16_t replication)
|
||||
void ConfigureUser(c_string user_name)
|
||||
void ConfigureBufferSize(int32_t buffer_size)
|
||||
void ConfigureBlockSize(int64_t default_block_size)
|
||||
void ConfigureKerberosTicketCachePath(c_string path)
|
||||
void ConfigureExtraConf(c_string key, c_string value)
|
||||
|
||||
cdef cppclass CHadoopFileSystem "arrow::fs::HadoopFileSystem"(CFileSystem):
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CHadoopFileSystem]] Make(
|
||||
const CHdfsOptions& options)
|
||||
CHdfsOptions options()
|
||||
|
||||
cdef cppclass CMockFileSystem "arrow::fs::internal::MockFileSystem"(
|
||||
CFileSystem):
|
||||
CMockFileSystem(CTimePoint current_time)
|
||||
|
||||
CStatus CCopyFiles "arrow::fs::CopyFiles"(
|
||||
const vector[CFileLocator]& sources,
|
||||
const vector[CFileLocator]& destinations,
|
||||
const CIOContext& io_context,
|
||||
int64_t chunk_size, c_bool use_threads)
|
||||
CStatus CCopyFilesWithSelector "arrow::fs::CopyFiles"(
|
||||
const shared_ptr[CFileSystem]& source_fs,
|
||||
const CFileSelector& source_sel,
|
||||
const shared_ptr[CFileSystem]& destination_fs,
|
||||
const c_string& destination_base_dir,
|
||||
const CIOContext& io_context,
|
||||
int64_t chunk_size, c_bool use_threads)
|
||||
|
||||
|
||||
# Callbacks for implementing Python filesystems
|
||||
# Use typedef to emulate syntax for std::function<void(..)>
|
||||
ctypedef void CallbackGetTypeName(object, c_string*)
|
||||
ctypedef c_bool CallbackEquals(object, const CFileSystem&)
|
||||
|
||||
ctypedef void CallbackGetFileInfo(object, const c_string&, CFileInfo*)
|
||||
ctypedef void CallbackGetFileInfoVector(object, const vector[c_string]&,
|
||||
vector[CFileInfo]*)
|
||||
ctypedef void CallbackGetFileInfoSelector(object, const CFileSelector&,
|
||||
vector[CFileInfo]*)
|
||||
ctypedef void CallbackCreateDir(object, const c_string&, c_bool)
|
||||
ctypedef void CallbackDeleteDir(object, const c_string&)
|
||||
ctypedef void CallbackDeleteDirContents(object, const c_string&, c_bool)
|
||||
ctypedef void CallbackDeleteRootDirContents(object)
|
||||
ctypedef void CallbackDeleteFile(object, const c_string&)
|
||||
ctypedef void CallbackMove(object, const c_string&, const c_string&)
|
||||
ctypedef void CallbackCopyFile(object, const c_string&, const c_string&)
|
||||
|
||||
ctypedef void CallbackOpenInputStream(object, const c_string&,
|
||||
shared_ptr[CInputStream]*)
|
||||
ctypedef void CallbackOpenInputFile(object, const c_string&,
|
||||
shared_ptr[CRandomAccessFile]*)
|
||||
ctypedef void CallbackOpenOutputStream(
|
||||
object, const c_string&, const shared_ptr[const CKeyValueMetadata]&,
|
||||
shared_ptr[COutputStream]*)
|
||||
ctypedef void CallbackNormalizePath(object, const c_string&, c_string*)
|
||||
|
||||
cdef extern from "arrow/python/filesystem.h" namespace "arrow::py::fs" nogil:
|
||||
|
||||
cdef cppclass CPyFileSystemVtable "arrow::py::fs::PyFileSystemVtable":
|
||||
PyFileSystemVtable()
|
||||
function[CallbackGetTypeName] get_type_name
|
||||
function[CallbackEquals] equals
|
||||
function[CallbackGetFileInfo] get_file_info
|
||||
function[CallbackGetFileInfoVector] get_file_info_vector
|
||||
function[CallbackGetFileInfoSelector] get_file_info_selector
|
||||
function[CallbackCreateDir] create_dir
|
||||
function[CallbackDeleteDir] delete_dir
|
||||
function[CallbackDeleteDirContents] delete_dir_contents
|
||||
function[CallbackDeleteRootDirContents] delete_root_dir_contents
|
||||
function[CallbackDeleteFile] delete_file
|
||||
function[CallbackMove] move
|
||||
function[CallbackCopyFile] copy_file
|
||||
function[CallbackOpenInputStream] open_input_stream
|
||||
function[CallbackOpenInputFile] open_input_file
|
||||
function[CallbackOpenOutputStream] open_output_stream
|
||||
function[CallbackOpenOutputStream] open_append_stream
|
||||
function[CallbackNormalizePath] normalize_path
|
||||
|
||||
cdef cppclass CPyFileSystem "arrow::py::fs::PyFileSystem":
|
||||
@staticmethod
|
||||
shared_ptr[CPyFileSystem] Make(object handler,
|
||||
CPyFileSystemVtable vtable)
|
||||
|
||||
PyObject* handler()
|
||||
@@ -0,0 +1,296 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from pyarrow.includes.common cimport *
|
||||
from pyarrow.includes.libarrow cimport *
|
||||
|
||||
|
||||
ctypedef CInvalidRowResult PyInvalidRowCallback(object,
|
||||
const CCSVInvalidRow&)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/csv.h" namespace "arrow::py::csv":
|
||||
|
||||
function[CInvalidRowHandler] MakeInvalidRowHandler(
|
||||
function[PyInvalidRowCallback], object handler)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/api.h" namespace "arrow::py":
|
||||
# Requires GIL
|
||||
CResult[shared_ptr[CDataType]] InferArrowType(
|
||||
object obj, object mask, c_bool pandas_null_sentinels)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/api.h" namespace "arrow::py::internal":
|
||||
object NewMonthDayNanoTupleType()
|
||||
CResult[PyObject*] MonthDayNanoIntervalArrayToPyList(
|
||||
const CMonthDayNanoIntervalArray& array)
|
||||
CResult[PyObject*] MonthDayNanoIntervalScalarToPyObject(
|
||||
const CMonthDayNanoIntervalScalar& scalar)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/arrow_to_pandas.h" namespace "arrow::py::MapConversionType":
|
||||
cdef enum MapConversionType "arrow::py::MapConversionType":
|
||||
DEFAULT,
|
||||
LOSSY,
|
||||
STRICT_
|
||||
|
||||
|
||||
cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
|
||||
shared_ptr[CDataType] GetPrimitiveType(Type type)
|
||||
|
||||
object PyFloat_FromHalf(uint16_t value)
|
||||
|
||||
cdef cppclass PyConversionOptions:
|
||||
PyConversionOptions()
|
||||
|
||||
shared_ptr[CDataType] type
|
||||
int64_t size
|
||||
CMemoryPool* pool
|
||||
c_bool from_pandas
|
||||
c_bool ignore_timezone
|
||||
c_bool strict
|
||||
|
||||
# TODO Some functions below are not actually "nogil"
|
||||
|
||||
CResult[shared_ptr[CChunkedArray]] ConvertPySequence(
|
||||
object obj, object mask, const PyConversionOptions& options,
|
||||
CMemoryPool* pool)
|
||||
|
||||
CResult[shared_ptr[CArray]] Arange(int64_t start, int64_t stop,
|
||||
int64_t step, CMemoryPool* pool)
|
||||
|
||||
CResult[shared_ptr[CDataType]] NumPyDtypeToArrow(object dtype)
|
||||
|
||||
CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
|
||||
c_bool from_pandas,
|
||||
const shared_ptr[CDataType]& type,
|
||||
shared_ptr[CChunkedArray]* out)
|
||||
|
||||
CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
|
||||
c_bool from_pandas,
|
||||
const shared_ptr[CDataType]& type,
|
||||
const CCastOptions& cast_options,
|
||||
shared_ptr[CChunkedArray]* out)
|
||||
|
||||
CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
|
||||
const vector[c_string]& dim_names,
|
||||
shared_ptr[CTensor]* out)
|
||||
|
||||
CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base,
|
||||
PyObject** out)
|
||||
|
||||
CStatus SparseCOOTensorToNdarray(
|
||||
const shared_ptr[CSparseCOOTensor]& sparse_tensor, object base,
|
||||
PyObject** out_data, PyObject** out_coords)
|
||||
|
||||
CStatus SparseCSRMatrixToNdarray(
|
||||
const shared_ptr[CSparseCSRMatrix]& sparse_tensor, object base,
|
||||
PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
|
||||
|
||||
CStatus SparseCSCMatrixToNdarray(
|
||||
const shared_ptr[CSparseCSCMatrix]& sparse_tensor, object base,
|
||||
PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
|
||||
|
||||
CStatus SparseCSFTensorToNdarray(
|
||||
const shared_ptr[CSparseCSFTensor]& sparse_tensor, object base,
|
||||
PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
|
||||
|
||||
CStatus NdarraysToSparseCOOTensor(CMemoryPool* pool, object data_ao,
|
||||
object coords_ao,
|
||||
const vector[int64_t]& shape,
|
||||
const vector[c_string]& dim_names,
|
||||
shared_ptr[CSparseCOOTensor]* out)
|
||||
|
||||
CStatus NdarraysToSparseCSRMatrix(CMemoryPool* pool, object data_ao,
|
||||
object indptr_ao, object indices_ao,
|
||||
const vector[int64_t]& shape,
|
||||
const vector[c_string]& dim_names,
|
||||
shared_ptr[CSparseCSRMatrix]* out)
|
||||
|
||||
CStatus NdarraysToSparseCSCMatrix(CMemoryPool* pool, object data_ao,
|
||||
object indptr_ao, object indices_ao,
|
||||
const vector[int64_t]& shape,
|
||||
const vector[c_string]& dim_names,
|
||||
shared_ptr[CSparseCSCMatrix]* out)
|
||||
|
||||
CStatus NdarraysToSparseCSFTensor(CMemoryPool* pool, object data_ao,
|
||||
object indptr_ao, object indices_ao,
|
||||
const vector[int64_t]& shape,
|
||||
const vector[int64_t]& axis_order,
|
||||
const vector[c_string]& dim_names,
|
||||
shared_ptr[CSparseCSFTensor]* out)
|
||||
|
||||
CStatus TensorToSparseCOOTensor(shared_ptr[CTensor],
|
||||
shared_ptr[CSparseCOOTensor]* out)
|
||||
|
||||
CStatus TensorToSparseCSRMatrix(shared_ptr[CTensor],
|
||||
shared_ptr[CSparseCSRMatrix]* out)
|
||||
|
||||
CStatus TensorToSparseCSCMatrix(shared_ptr[CTensor],
|
||||
shared_ptr[CSparseCSCMatrix]* out)
|
||||
|
||||
CStatus TensorToSparseCSFTensor(shared_ptr[CTensor],
|
||||
shared_ptr[CSparseCSFTensor]* out)
|
||||
|
||||
CStatus ConvertArrayToPandas(const PandasOptions& options,
|
||||
shared_ptr[CArray] arr,
|
||||
object py_ref, PyObject** out)
|
||||
|
||||
CStatus ConvertChunkedArrayToPandas(const PandasOptions& options,
|
||||
shared_ptr[CChunkedArray] arr,
|
||||
object py_ref, PyObject** out)
|
||||
|
||||
CStatus ConvertTableToPandas(const PandasOptions& options,
|
||||
shared_ptr[CTable] table,
|
||||
PyObject** out)
|
||||
|
||||
void c_set_default_memory_pool \
|
||||
" arrow::py::set_default_memory_pool"(CMemoryPool* pool)\
|
||||
|
||||
CMemoryPool* c_get_memory_pool \
|
||||
" arrow::py::get_memory_pool"()
|
||||
|
||||
cdef cppclass PyBuffer(CBuffer):
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CBuffer]] FromPyObject(object obj)
|
||||
|
||||
cdef cppclass PyForeignBuffer(CBuffer):
|
||||
@staticmethod
|
||||
CStatus Make(const uint8_t* data, int64_t size, object base,
|
||||
shared_ptr[CBuffer]* out)
|
||||
|
||||
cdef cppclass PyReadableFile(CRandomAccessFile):
|
||||
PyReadableFile(object fo)
|
||||
|
||||
cdef cppclass PyOutputStream(COutputStream):
|
||||
PyOutputStream(object fo)
|
||||
|
||||
cdef cppclass PandasOptions:
|
||||
CMemoryPool* pool
|
||||
c_bool strings_to_categorical
|
||||
c_bool zero_copy_only
|
||||
c_bool integer_object_nulls
|
||||
c_bool date_as_object
|
||||
c_bool timestamp_as_object
|
||||
c_bool use_threads
|
||||
c_bool coerce_temporal_nanoseconds
|
||||
c_bool ignore_timezone
|
||||
c_bool deduplicate_objects
|
||||
c_bool safe_cast
|
||||
c_bool split_blocks
|
||||
c_bool self_destruct
|
||||
MapConversionType maps_as_pydicts
|
||||
c_bool decode_dictionaries
|
||||
unordered_set[c_string] categorical_columns
|
||||
unordered_set[c_string] extension_columns
|
||||
c_bool to_numpy
|
||||
|
||||
|
||||
cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
|
||||
cdef cppclass CTimePoint "arrow::py::internal::TimePoint":
|
||||
pass
|
||||
|
||||
CTimePoint PyDateTime_to_TimePoint(PyDateTime_DateTime* pydatetime)
|
||||
int64_t TimePoint_to_ns(CTimePoint val)
|
||||
CTimePoint TimePoint_from_s(double val)
|
||||
CTimePoint TimePoint_from_ns(int64_t val)
|
||||
|
||||
CResult[c_string] TzinfoToString(PyObject* pytzinfo)
|
||||
CResult[PyObject*] StringToTzinfo(c_string)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/numpy_init.h" namespace "arrow::py":
|
||||
int arrow_init_numpy() except -1
|
||||
|
||||
|
||||
cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
|
||||
int import_pyarrow() except -1
|
||||
|
||||
|
||||
cdef extern from "arrow/python/common.h" namespace "arrow::py":
|
||||
c_bool IsPyError(const CStatus& status)
|
||||
void RestorePyError(const CStatus& status) except *
|
||||
|
||||
|
||||
cdef extern from "arrow/python/common.h" namespace "arrow::py" nogil:
|
||||
cdef cppclass SharedPtrNoGIL[T](shared_ptr[T]):
|
||||
# This looks like the only way to satisfy both Cython 2 and Cython 3
|
||||
SharedPtrNoGIL& operator=(...)
|
||||
cdef cppclass UniquePtrNoGIL[T, DELETER=*](unique_ptr[T, DELETER]):
|
||||
UniquePtrNoGIL& operator=(...)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/inference.h" namespace "arrow::py":
|
||||
c_bool IsPyBool(object o)
|
||||
c_bool IsPyInt(object o)
|
||||
c_bool IsPyFloat(object o)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/ipc.h" namespace "arrow::py":
|
||||
cdef cppclass CPyRecordBatchReader" arrow::py::PyRecordBatchReader" \
|
||||
(CRecordBatchReader):
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CSchema],
|
||||
object)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/ipc.h" namespace "arrow::py" nogil:
|
||||
cdef cppclass CCastingRecordBatchReader" arrow::py::CastingRecordBatchReader" \
|
||||
(CRecordBatchReader):
|
||||
@staticmethod
|
||||
CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CRecordBatchReader],
|
||||
shared_ptr[CSchema])
|
||||
|
||||
|
||||
cdef extern from "arrow/python/extension_type.h" namespace "arrow::py":
|
||||
cdef cppclass CPyExtensionType \
|
||||
" arrow::py::PyExtensionType"(CExtensionType):
|
||||
@staticmethod
|
||||
CStatus FromClass(const shared_ptr[CDataType] storage_type,
|
||||
const c_string extension_name, object typ,
|
||||
shared_ptr[CExtensionType]* out)
|
||||
|
||||
@staticmethod
|
||||
CStatus FromInstance(shared_ptr[CDataType] storage_type,
|
||||
object inst, shared_ptr[CExtensionType]* out)
|
||||
|
||||
object GetInstance()
|
||||
CStatus SetInstance(object)
|
||||
|
||||
c_string PyExtensionName()
|
||||
CStatus RegisterPyExtensionType(shared_ptr[CDataType])
|
||||
CStatus UnregisterPyExtensionType(c_string type_name)
|
||||
|
||||
|
||||
cdef extern from "arrow/python/benchmark.h" namespace "arrow::py::benchmark":
|
||||
void Benchmark_PandasObjectIsNull(object lst) except *
|
||||
|
||||
|
||||
cdef extern from "arrow/python/gdb.h" namespace "arrow::gdb" nogil:
|
||||
void GdbTestSession "arrow::gdb::TestSession"()
|
||||
|
||||
cdef extern from "arrow/python/helpers.h" namespace "arrow::py::internal":
|
||||
c_bool IsThreadingEnabled()
|
||||
|
||||
cdef extern from "arrow/python/config.h" namespace "arrow::py":
|
||||
cdef cppclass CBuildInfo "arrow::py::BuildInfo":
|
||||
c_string build_type
|
||||
|
||||
const CBuildInfo& GetBuildInfo "arrow::py::GetBuildInfo"()
|
||||
@@ -0,0 +1,100 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from libcpp.vector cimport vector as std_vector
|
||||
|
||||
from pyarrow.includes.common cimport *
|
||||
from pyarrow.includes.libarrow cimport *
|
||||
from pyarrow.includes.libarrow_acero cimport *
|
||||
|
||||
ctypedef CResult[CDeclaration] CNamedTableProvider(const std_vector[c_string]&, const CSchema&)
|
||||
|
||||
cdef extern from "arrow/engine/substrait/options.h" namespace "arrow::engine" nogil:
|
||||
cdef enum ConversionStrictness \
|
||||
"arrow::engine::ConversionStrictness":
|
||||
EXACT_ROUNDTRIP \
|
||||
"arrow::engine::ConversionStrictness::EXACT_ROUNDTRIP"
|
||||
PRESERVE_STRUCTURE \
|
||||
"arrow::engine::ConversionStrictness::PRESERVE_STRUCTURE"
|
||||
BEST_EFFORT \
|
||||
"arrow::engine::ConversionStrictness::BEST_EFFORT"
|
||||
|
||||
cdef cppclass CConversionOptions \
|
||||
"arrow::engine::ConversionOptions":
|
||||
CConversionOptions()
|
||||
ConversionStrictness strictness
|
||||
function[CNamedTableProvider] named_table_provider
|
||||
c_bool allow_arrow_extensions
|
||||
|
||||
cdef extern from "arrow/engine/substrait/extension_set.h" \
|
||||
namespace "arrow::engine" nogil:
|
||||
|
||||
cdef struct CSubstraitId "arrow::engine::Id":
|
||||
cpp_string_view uri
|
||||
cpp_string_view name
|
||||
|
||||
cdef struct CExtensionSetTypeRecord "arrow::engine::ExtensionSet::TypeRecord":
|
||||
CSubstraitId id
|
||||
shared_ptr[CDataType] type
|
||||
|
||||
cdef cppclass CExtensionSet "arrow::engine::ExtensionSet":
|
||||
CExtensionSet()
|
||||
unordered_map[uint32_t, cpp_string_view]& uris()
|
||||
CResult[uint32_t] EncodeType(const CDataType&)
|
||||
CResult[CExtensionSetTypeRecord] DecodeType(uint32_t)
|
||||
|
||||
cdef cppclass ExtensionIdRegistry:
|
||||
std_vector[c_string] GetSupportedSubstraitFunctions()
|
||||
|
||||
ExtensionIdRegistry* default_extension_id_registry()
|
||||
|
||||
cdef extern from "arrow/engine/substrait/relation.h" namespace "arrow::engine" nogil:
|
||||
|
||||
cdef cppclass CNamedExpression "arrow::engine::NamedExpression":
|
||||
CExpression expression
|
||||
c_string name
|
||||
|
||||
cdef cppclass CBoundExpressions "arrow::engine::BoundExpressions":
|
||||
std_vector[CNamedExpression] named_expressions
|
||||
shared_ptr[CSchema] schema
|
||||
|
||||
cdef extern from "arrow/engine/substrait/serde.h" namespace "arrow::engine" nogil:
|
||||
|
||||
CResult[shared_ptr[CBuffer]] SerializeExpressions(
|
||||
const CBoundExpressions& bound_expressions, const CConversionOptions& conversion_options)
|
||||
|
||||
CResult[CBoundExpressions] DeserializeExpressions(
|
||||
const CBuffer& serialized_expressions)
|
||||
|
||||
CResult[shared_ptr[CBuffer]] SerializeSchema(
|
||||
const CSchema &schema, CExtensionSet* extension_set,
|
||||
const CConversionOptions& conversion_options)
|
||||
|
||||
CResult[shared_ptr[CSchema]] DeserializeSchema(
|
||||
const CBuffer& serialized_schema, const CExtensionSet& extension_set,
|
||||
const CConversionOptions& conversion_options)
|
||||
|
||||
|
||||
cdef extern from "arrow/engine/substrait/util.h" namespace "arrow::engine" nogil:
|
||||
CResult[shared_ptr[CRecordBatchReader]] ExecuteSerializedPlan(
|
||||
const CBuffer& substrait_buffer, const ExtensionIdRegistry* registry,
|
||||
CFunctionRegistry* func_registry, const CConversionOptions& conversion_options,
|
||||
c_bool use_threads)
|
||||
|
||||
CResult[shared_ptr[CBuffer]] SerializeJsonPlan(const c_string& substrait_json)
|
||||
@@ -0,0 +1,298 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from libcpp.string cimport string as c_string
|
||||
from libcpp.unordered_set cimport unordered_set as c_unordered_set
|
||||
from libc.stdint cimport int64_t, int32_t, uint8_t, uintptr_t
|
||||
|
||||
from pyarrow.includes.common cimport *
|
||||
from pyarrow.includes.libarrow cimport *
|
||||
|
||||
cdef extern from "gandiva/node.h" namespace "gandiva" nogil:
|
||||
|
||||
cdef cppclass CNode" gandiva::Node":
|
||||
c_string ToString()
|
||||
shared_ptr[CDataType] return_type()
|
||||
|
||||
cdef cppclass CGandivaExpression" gandiva::Expression":
|
||||
c_string ToString()
|
||||
shared_ptr[CNode] root()
|
||||
shared_ptr[CField] result()
|
||||
|
||||
ctypedef vector[shared_ptr[CNode]] CNodeVector" gandiva::NodeVector"
|
||||
|
||||
ctypedef vector[shared_ptr[CGandivaExpression]] \
|
||||
CExpressionVector" gandiva::ExpressionVector"
|
||||
|
||||
cdef extern from "gandiva/selection_vector.h" namespace "gandiva" nogil:
|
||||
|
||||
cdef cppclass CSelectionVector" gandiva::SelectionVector":
|
||||
|
||||
shared_ptr[CArray] ToArray()
|
||||
|
||||
enum CSelectionVector_Mode" gandiva::SelectionVector::Mode":
|
||||
CSelectionVector_Mode_NONE" gandiva::SelectionVector::Mode::MODE_NONE"
|
||||
CSelectionVector_Mode_UINT16" \
|
||||
gandiva::SelectionVector::Mode::MODE_UINT16"
|
||||
CSelectionVector_Mode_UINT32" \
|
||||
gandiva::SelectionVector::Mode::MODE_UINT32"
|
||||
CSelectionVector_Mode_UINT64" \
|
||||
gandiva::SelectionVector::Mode::MODE_UINT64"
|
||||
|
||||
cdef CStatus SelectionVector_MakeInt16\
|
||||
"gandiva::SelectionVector::MakeInt16"(
|
||||
int64_t max_slots, CMemoryPool* pool,
|
||||
shared_ptr[CSelectionVector]* selection_vector)
|
||||
|
||||
cdef CStatus SelectionVector_MakeInt32\
|
||||
"gandiva::SelectionVector::MakeInt32"(
|
||||
int64_t max_slots, CMemoryPool* pool,
|
||||
shared_ptr[CSelectionVector]* selection_vector)
|
||||
|
||||
cdef CStatus SelectionVector_MakeInt64\
|
||||
"gandiva::SelectionVector::MakeInt64"(
|
||||
int64_t max_slots, CMemoryPool* pool,
|
||||
shared_ptr[CSelectionVector]* selection_vector)
|
||||
|
||||
cdef inline CSelectionVector_Mode _ensure_selection_mode(str name) except *:
|
||||
uppercase = name.upper()
|
||||
if uppercase == 'NONE':
|
||||
return CSelectionVector_Mode_NONE
|
||||
elif uppercase == 'UINT16':
|
||||
return CSelectionVector_Mode_UINT16
|
||||
elif uppercase == 'UINT32':
|
||||
return CSelectionVector_Mode_UINT32
|
||||
elif uppercase == 'UINT64':
|
||||
return CSelectionVector_Mode_UINT64
|
||||
else:
|
||||
raise ValueError(f'Invalid value for Selection Mode: {name!r}')
|
||||
|
||||
cdef inline str _selection_mode_name(CSelectionVector_Mode ctype):
|
||||
if ctype == CSelectionVector_Mode_NONE:
|
||||
return 'NONE'
|
||||
elif ctype == CSelectionVector_Mode_UINT16:
|
||||
return 'UINT16'
|
||||
elif ctype == CSelectionVector_Mode_UINT32:
|
||||
return 'UINT32'
|
||||
elif ctype == CSelectionVector_Mode_UINT64:
|
||||
return 'UINT64'
|
||||
else:
|
||||
raise RuntimeError('Unexpected CSelectionVector_Mode value')
|
||||
|
||||
cdef extern from "gandiva/condition.h" namespace "gandiva" nogil:
|
||||
|
||||
cdef cppclass CCondition" gandiva::Condition":
|
||||
c_string ToString()
|
||||
shared_ptr[CNode] root()
|
||||
shared_ptr[CField] result()
|
||||
|
||||
cdef extern from "gandiva/arrow.h" namespace "gandiva" nogil:
|
||||
|
||||
ctypedef vector[shared_ptr[CArray]] CArrayVector" gandiva::ArrayVector"
|
||||
|
||||
|
||||
cdef extern from "gandiva/tree_expr_builder.h" namespace "gandiva" nogil:
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeBoolLiteral \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(c_bool value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt8Literal \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(uint8_t value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt16Literal \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(uint16_t value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt32Literal \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(uint32_t value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeUInt64Literal \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(uint64_t value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt8Literal \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(int8_t value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt16Literal \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(int16_t value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt32Literal \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(int32_t value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInt64Literal \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(int64_t value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeFloatLiteral \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(float value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeDoubleLiteral \
|
||||
"gandiva::TreeExprBuilder::MakeLiteral"(double value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeStringLiteral \
|
||||
"gandiva::TreeExprBuilder::MakeStringLiteral"(const c_string& value)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeBinaryLiteral \
|
||||
"gandiva::TreeExprBuilder::MakeBinaryLiteral"(const c_string& value)
|
||||
|
||||
cdef shared_ptr[CGandivaExpression] TreeExprBuilder_MakeExpression\
|
||||
"gandiva::TreeExprBuilder::MakeExpression"(
|
||||
shared_ptr[CNode] root_node, shared_ptr[CField] result_field)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeFunction \
|
||||
"gandiva::TreeExprBuilder::MakeFunction"(
|
||||
const c_string& name, const CNodeVector& children,
|
||||
shared_ptr[CDataType] return_type)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeField \
|
||||
"gandiva::TreeExprBuilder::MakeField"(shared_ptr[CField] field)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeIf \
|
||||
"gandiva::TreeExprBuilder::MakeIf"(
|
||||
shared_ptr[CNode] condition, shared_ptr[CNode] this_node,
|
||||
shared_ptr[CNode] else_node, shared_ptr[CDataType] return_type)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeAnd \
|
||||
"gandiva::TreeExprBuilder::MakeAnd"(const CNodeVector& children)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeOr \
|
||||
"gandiva::TreeExprBuilder::MakeOr"(const CNodeVector& children)
|
||||
|
||||
cdef shared_ptr[CCondition] TreeExprBuilder_MakeCondition \
|
||||
"gandiva::TreeExprBuilder::MakeCondition"(
|
||||
shared_ptr[CNode] condition)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt32 \
|
||||
"gandiva::TreeExprBuilder::MakeInExpressionInt32"(
|
||||
shared_ptr[CNode] node, const c_unordered_set[int32_t]& values)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionInt64 \
|
||||
"gandiva::TreeExprBuilder::MakeInExpressionInt64"(
|
||||
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime32 \
|
||||
"gandiva::TreeExprBuilder::MakeInExpressionTime32"(
|
||||
shared_ptr[CNode] node, const c_unordered_set[int32_t]& values)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTime64 \
|
||||
"gandiva::TreeExprBuilder::MakeInExpressionTime64"(
|
||||
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate32 \
|
||||
"gandiva::TreeExprBuilder::MakeInExpressionDate32"(
|
||||
shared_ptr[CNode] node, const c_unordered_set[int32_t]& values)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionDate64 \
|
||||
"gandiva::TreeExprBuilder::MakeInExpressionDate64"(
|
||||
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionTimeStamp \
|
||||
"gandiva::TreeExprBuilder::MakeInExpressionTimeStamp"(
|
||||
shared_ptr[CNode] node, const c_unordered_set[int64_t]& values)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionString \
|
||||
"gandiva::TreeExprBuilder::MakeInExpressionString"(
|
||||
shared_ptr[CNode] node, const c_unordered_set[c_string]& values)
|
||||
|
||||
cdef shared_ptr[CNode] TreeExprBuilder_MakeInExpressionBinary \
|
||||
"gandiva::TreeExprBuilder::MakeInExpressionBinary"(
|
||||
shared_ptr[CNode] node, const c_unordered_set[c_string]& values)
|
||||
|
||||
cdef extern from "gandiva/projector.h" namespace "gandiva" nogil:
|
||||
|
||||
cdef cppclass CProjector" gandiva::Projector":
|
||||
|
||||
CStatus Evaluate(
|
||||
const CRecordBatch& batch, CMemoryPool* pool,
|
||||
const CArrayVector* output)
|
||||
|
||||
CStatus Evaluate(
|
||||
const CRecordBatch& batch,
|
||||
const CSelectionVector* selection,
|
||||
CMemoryPool* pool,
|
||||
const CArrayVector* output)
|
||||
|
||||
c_string DumpIR()
|
||||
|
||||
cdef CStatus Projector_Make \
|
||||
"gandiva::Projector::Make"(
|
||||
shared_ptr[CSchema] schema, const CExpressionVector& children,
|
||||
shared_ptr[CProjector]* projector)
|
||||
|
||||
cdef CStatus Projector_Make \
|
||||
"gandiva::Projector::Make"(
|
||||
shared_ptr[CSchema] schema, const CExpressionVector& children,
|
||||
CSelectionVector_Mode mode,
|
||||
shared_ptr[CConfiguration] configuration,
|
||||
shared_ptr[CProjector]* projector)
|
||||
|
||||
cdef extern from "gandiva/filter.h" namespace "gandiva" nogil:
|
||||
|
||||
cdef cppclass CFilter" gandiva::Filter":
|
||||
|
||||
CStatus Evaluate(
|
||||
const CRecordBatch& batch,
|
||||
shared_ptr[CSelectionVector] out_selection)
|
||||
|
||||
c_string DumpIR()
|
||||
|
||||
cdef CStatus Filter_Make \
|
||||
"gandiva::Filter::Make"(
|
||||
shared_ptr[CSchema] schema, shared_ptr[CCondition] condition,
|
||||
shared_ptr[CConfiguration] configuration,
|
||||
shared_ptr[CFilter]* filter)
|
||||
|
||||
cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil:
|
||||
|
||||
cdef cppclass CFunctionSignature" gandiva::FunctionSignature":
|
||||
|
||||
CFunctionSignature(const c_string& base_name,
|
||||
vector[shared_ptr[CDataType]] param_types,
|
||||
shared_ptr[CDataType] ret_type)
|
||||
|
||||
shared_ptr[CDataType] ret_type() const
|
||||
|
||||
const c_string& base_name() const
|
||||
|
||||
vector[shared_ptr[CDataType]] param_types() const
|
||||
|
||||
c_string ToString() const
|
||||
|
||||
cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil:
|
||||
|
||||
cdef vector[shared_ptr[CFunctionSignature]] \
|
||||
GetRegisteredFunctionSignatures()
|
||||
|
||||
cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil:
|
||||
|
||||
cdef cppclass CConfiguration" gandiva::Configuration":
|
||||
|
||||
CConfiguration()
|
||||
|
||||
CConfiguration(bint optimize, bint dump_ir)
|
||||
|
||||
void set_optimize(bint optimize)
|
||||
|
||||
void set_dump_ir(bint dump_ir)
|
||||
|
||||
cdef cppclass CConfigurationBuilder \
|
||||
" gandiva::ConfigurationBuilder":
|
||||
@staticmethod
|
||||
shared_ptr[CConfiguration] DefaultConfiguration()
|
||||
|
||||
CConfigurationBuilder()
|
||||
|
||||
shared_ptr[CConfiguration] build()
|
||||
@@ -0,0 +1,632 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from pyarrow.includes.common cimport *
|
||||
from pyarrow.includes.libarrow cimport (Type, CChunkedArray, CScalar, CSchema,
|
||||
CStatus, CTable, CMemoryPool, CBuffer,
|
||||
CKeyValueMetadata, CRandomAccessFile,
|
||||
COutputStream, CCacheOptions,
|
||||
TimeUnit, CRecordBatchReader)
|
||||
|
||||
|
||||
cdef extern from "parquet/api/schema.h" namespace "parquet::schema" nogil:
|
||||
cdef cppclass Node:
|
||||
pass
|
||||
|
||||
cdef cppclass GroupNode(Node):
|
||||
pass
|
||||
|
||||
cdef cppclass PrimitiveNode(Node):
|
||||
pass
|
||||
|
||||
cdef cppclass ColumnPath:
|
||||
c_string ToDotString()
|
||||
vector[c_string] ToDotVector()
|
||||
|
||||
|
||||
cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
|
||||
enum ParquetType" parquet::Type::type":
|
||||
ParquetType_BOOLEAN" parquet::Type::BOOLEAN"
|
||||
ParquetType_INT32" parquet::Type::INT32"
|
||||
ParquetType_INT64" parquet::Type::INT64"
|
||||
ParquetType_INT96" parquet::Type::INT96"
|
||||
ParquetType_FLOAT" parquet::Type::FLOAT"
|
||||
ParquetType_DOUBLE" parquet::Type::DOUBLE"
|
||||
ParquetType_BYTE_ARRAY" parquet::Type::BYTE_ARRAY"
|
||||
ParquetType_FIXED_LEN_BYTE_ARRAY" parquet::Type::FIXED_LEN_BYTE_ARRAY"
|
||||
|
||||
enum ParquetLogicalTypeId" parquet::LogicalType::Type::type":
|
||||
ParquetLogicalType_UNDEFINED" parquet::LogicalType::Type::UNDEFINED"
|
||||
ParquetLogicalType_STRING" parquet::LogicalType::Type::STRING"
|
||||
ParquetLogicalType_MAP" parquet::LogicalType::Type::MAP"
|
||||
ParquetLogicalType_LIST" parquet::LogicalType::Type::LIST"
|
||||
ParquetLogicalType_ENUM" parquet::LogicalType::Type::ENUM"
|
||||
ParquetLogicalType_DECIMAL" parquet::LogicalType::Type::DECIMAL"
|
||||
ParquetLogicalType_DATE" parquet::LogicalType::Type::DATE"
|
||||
ParquetLogicalType_TIME" parquet::LogicalType::Type::TIME"
|
||||
ParquetLogicalType_TIMESTAMP" parquet::LogicalType::Type::TIMESTAMP"
|
||||
ParquetLogicalType_INT" parquet::LogicalType::Type::INT"
|
||||
ParquetLogicalType_FLOAT16" parquet::LogicalType::Type::FLOAT16"
|
||||
ParquetLogicalType_JSON" parquet::LogicalType::Type::JSON"
|
||||
ParquetLogicalType_BSON" parquet::LogicalType::Type::BSON"
|
||||
ParquetLogicalType_UUID" parquet::LogicalType::Type::UUID"
|
||||
ParquetLogicalType_GEOMETRY" parquet::LogicalType::Type::GEOMETRY"
|
||||
ParquetLogicalType_GEOGRAPHY" parquet::LogicalType::Type::GEOGRAPHY"
|
||||
ParquetLogicalType_NONE" parquet::LogicalType::Type::NONE"
|
||||
|
||||
enum ParquetTimeUnit" parquet::LogicalType::TimeUnit::unit":
|
||||
ParquetTimeUnit_UNKNOWN" parquet::LogicalType::TimeUnit::UNKNOWN"
|
||||
ParquetTimeUnit_MILLIS" parquet::LogicalType::TimeUnit::MILLIS"
|
||||
ParquetTimeUnit_MICROS" parquet::LogicalType::TimeUnit::MICROS"
|
||||
ParquetTimeUnit_NANOS" parquet::LogicalType::TimeUnit::NANOS"
|
||||
|
||||
enum ParquetEdgeInterpolationAlgorithm" parquet::LogicalType::EdgeInterpolationAlgorithm":
|
||||
ParquetEdgeInterpolationAlgorithm_UNKNOWN" parquet::LogicalType::EdgeInterpolationAlgorithm::UNKNOWN"
|
||||
ParquetEdgeInterpolationAlgorithm_SPHERICAL" parquet::LogicalType::EdgeInterpolationAlgorithm::SPHERICAL"
|
||||
ParquetEdgeInterpolationAlgorithm_VINCENTY" parquet::LogicalType::EdgeInterpolationAlgorithm::VINCENTY"
|
||||
ParquetEdgeInterpolationAlgorithm_THOMAS" parquet::LogicalType::EdgeInterpolationAlgorithm::THOMAS"
|
||||
ParquetEdgeInterpolationAlgorithm_ANDOYER" parquet::LogicalType::EdgeInterpolationAlgorithm::ANDOYER"
|
||||
ParquetEdgeInterpolationAlgorithm_KARNEY" parquet::LogicalType::EdgeInterpolationAlgorithm::KARNEY"
|
||||
|
||||
enum ParquetConvertedType" parquet::ConvertedType::type":
|
||||
ParquetConvertedType_NONE" parquet::ConvertedType::NONE"
|
||||
ParquetConvertedType_UTF8" parquet::ConvertedType::UTF8"
|
||||
ParquetConvertedType_MAP" parquet::ConvertedType::MAP"
|
||||
ParquetConvertedType_MAP_KEY_VALUE \
|
||||
" parquet::ConvertedType::MAP_KEY_VALUE"
|
||||
ParquetConvertedType_LIST" parquet::ConvertedType::LIST"
|
||||
ParquetConvertedType_ENUM" parquet::ConvertedType::ENUM"
|
||||
ParquetConvertedType_DECIMAL" parquet::ConvertedType::DECIMAL"
|
||||
ParquetConvertedType_DATE" parquet::ConvertedType::DATE"
|
||||
ParquetConvertedType_TIME_MILLIS" parquet::ConvertedType::TIME_MILLIS"
|
||||
ParquetConvertedType_TIME_MICROS" parquet::ConvertedType::TIME_MICROS"
|
||||
ParquetConvertedType_TIMESTAMP_MILLIS \
|
||||
" parquet::ConvertedType::TIMESTAMP_MILLIS"
|
||||
ParquetConvertedType_TIMESTAMP_MICROS \
|
||||
" parquet::ConvertedType::TIMESTAMP_MICROS"
|
||||
ParquetConvertedType_UINT_8" parquet::ConvertedType::UINT_8"
|
||||
ParquetConvertedType_UINT_16" parquet::ConvertedType::UINT_16"
|
||||
ParquetConvertedType_UINT_32" parquet::ConvertedType::UINT_32"
|
||||
ParquetConvertedType_UINT_64" parquet::ConvertedType::UINT_64"
|
||||
ParquetConvertedType_INT_8" parquet::ConvertedType::INT_8"
|
||||
ParquetConvertedType_INT_16" parquet::ConvertedType::INT_16"
|
||||
ParquetConvertedType_INT_32" parquet::ConvertedType::INT_32"
|
||||
ParquetConvertedType_INT_64" parquet::ConvertedType::INT_64"
|
||||
ParquetConvertedType_JSON" parquet::ConvertedType::JSON"
|
||||
ParquetConvertedType_BSON" parquet::ConvertedType::BSON"
|
||||
ParquetConvertedType_INTERVAL" parquet::ConvertedType::INTERVAL"
|
||||
|
||||
enum ParquetRepetition" parquet::Repetition::type":
|
||||
ParquetRepetition_REQUIRED" parquet::REPETITION::REQUIRED"
|
||||
ParquetRepetition_OPTIONAL" parquet::REPETITION::OPTIONAL"
|
||||
ParquetRepetition_REPEATED" parquet::REPETITION::REPEATED"
|
||||
|
||||
enum ParquetEncoding" parquet::Encoding::type":
|
||||
ParquetEncoding_PLAIN" parquet::Encoding::PLAIN"
|
||||
ParquetEncoding_PLAIN_DICTIONARY" parquet::Encoding::PLAIN_DICTIONARY"
|
||||
ParquetEncoding_RLE" parquet::Encoding::RLE"
|
||||
ParquetEncoding_BIT_PACKED" parquet::Encoding::BIT_PACKED"
|
||||
ParquetEncoding_DELTA_BINARY_PACKED \
|
||||
" parquet::Encoding::DELTA_BINARY_PACKED"
|
||||
ParquetEncoding_DELTA_LENGTH_BYTE_ARRAY \
|
||||
" parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY"
|
||||
ParquetEncoding_DELTA_BYTE_ARRAY" parquet::Encoding::DELTA_BYTE_ARRAY"
|
||||
ParquetEncoding_RLE_DICTIONARY" parquet::Encoding::RLE_DICTIONARY"
|
||||
ParquetEncoding_BYTE_STREAM_SPLIT \
|
||||
" parquet::Encoding::BYTE_STREAM_SPLIT"
|
||||
|
||||
enum ParquetCompression" parquet::Compression::type":
|
||||
ParquetCompression_UNCOMPRESSED" parquet::Compression::UNCOMPRESSED"
|
||||
ParquetCompression_SNAPPY" parquet::Compression::SNAPPY"
|
||||
ParquetCompression_GZIP" parquet::Compression::GZIP"
|
||||
ParquetCompression_LZO" parquet::Compression::LZO"
|
||||
ParquetCompression_BROTLI" parquet::Compression::BROTLI"
|
||||
ParquetCompression_LZ4" parquet::Compression::LZ4"
|
||||
ParquetCompression_ZSTD" parquet::Compression::ZSTD"
|
||||
|
||||
enum ParquetVersion" parquet::ParquetVersion::type":
|
||||
ParquetVersion_V1" parquet::ParquetVersion::PARQUET_1_0"
|
||||
ParquetVersion_V2_4" parquet::ParquetVersion::PARQUET_2_4"
|
||||
ParquetVersion_V2_6" parquet::ParquetVersion::PARQUET_2_6"
|
||||
|
||||
enum ParquetSortOrder" parquet::SortOrder::type":
|
||||
ParquetSortOrder_SIGNED" parquet::SortOrder::SIGNED"
|
||||
ParquetSortOrder_UNSIGNED" parquet::SortOrder::UNSIGNED"
|
||||
ParquetSortOrder_UNKNOWN" parquet::SortOrder::UNKNOWN"
|
||||
|
||||
cdef cppclass CParquetLogicalType" parquet::LogicalType":
|
||||
c_string ToString() const
|
||||
c_string ToJSON() const
|
||||
ParquetLogicalTypeId type() const
|
||||
|
||||
cdef cppclass CParquetDecimalType \
|
||||
" parquet::DecimalLogicalType"(CParquetLogicalType):
|
||||
int32_t precision() const
|
||||
int32_t scale() const
|
||||
|
||||
cdef cppclass CParquetIntType \
|
||||
" parquet::IntLogicalType"(CParquetLogicalType):
|
||||
int bit_width() const
|
||||
c_bool is_signed() const
|
||||
|
||||
cdef cppclass CParquetTimeType \
|
||||
" parquet::TimeLogicalType"(CParquetLogicalType):
|
||||
c_bool is_adjusted_to_utc() const
|
||||
ParquetTimeUnit time_unit() const
|
||||
|
||||
cdef cppclass CParquetTimestampType \
|
||||
" parquet::TimestampLogicalType"(CParquetLogicalType):
|
||||
c_bool is_adjusted_to_utc() const
|
||||
ParquetTimeUnit time_unit() const
|
||||
|
||||
cdef cppclass CParquetGeometryType \
|
||||
" parquet::GeometryLogicalType"(CParquetLogicalType):
|
||||
c_string crs() const
|
||||
|
||||
cdef cppclass CParquetGeographyType \
|
||||
" parquet::GeographyLogicalType"(CParquetLogicalType):
|
||||
c_string crs() const
|
||||
ParquetEdgeInterpolationAlgorithm algorithm() const
|
||||
|
||||
cdef cppclass ColumnDescriptor" parquet::ColumnDescriptor":
|
||||
c_bool Equals(const ColumnDescriptor& other)
|
||||
|
||||
shared_ptr[ColumnPath] path()
|
||||
int16_t max_definition_level()
|
||||
int16_t max_repetition_level()
|
||||
|
||||
ParquetType physical_type()
|
||||
const shared_ptr[const CParquetLogicalType]& logical_type()
|
||||
ParquetConvertedType converted_type()
|
||||
const c_string& name()
|
||||
int type_length()
|
||||
int type_precision()
|
||||
int type_scale()
|
||||
|
||||
cdef cppclass SchemaDescriptor:
|
||||
const ColumnDescriptor* Column(int i)
|
||||
shared_ptr[Node] schema()
|
||||
GroupNode* group()
|
||||
c_bool Equals(const SchemaDescriptor& other)
|
||||
c_string ToString()
|
||||
int num_columns()
|
||||
|
||||
cdef c_string FormatStatValue(ParquetType parquet_type, c_string val)
|
||||
|
||||
enum ParquetCipher" parquet::ParquetCipher::type":
|
||||
ParquetCipher_AES_GCM_V1" parquet::ParquetCipher::AES_GCM_V1"
|
||||
ParquetCipher_AES_GCM_CTR_V1" parquet::ParquetCipher::AES_GCM_CTR_V1"
|
||||
|
||||
struct AadMetadata:
|
||||
c_string aad_prefix
|
||||
c_string aad_file_unique
|
||||
c_bool supply_aad_prefix
|
||||
|
||||
struct EncryptionAlgorithm:
|
||||
ParquetCipher algorithm
|
||||
AadMetadata aad
|
||||
|
||||
|
||||
# Specific array<> types needed for GeoStatistics
|
||||
cdef extern from "<array>" namespace "std" nogil:
|
||||
cdef cppclass double_array4 "std::array<double, 4>":
|
||||
double_array4() except +
|
||||
double& operator[](size_t)
|
||||
|
||||
cdef cppclass bool_array4 "std::array<bool, 4>":
|
||||
bool_array4() except +
|
||||
c_bool& operator[](size_t)
|
||||
|
||||
|
||||
cdef extern from "parquet/geospatial/statistics.h" namespace "parquet" nogil:
|
||||
|
||||
cdef cppclass CParquetGeoStatistics" parquet::geospatial::GeoStatistics":
|
||||
c_bool is_valid() const
|
||||
|
||||
double_array4 lower_bound() const
|
||||
double_array4 upper_bound() const
|
||||
bool_array4 dimension_valid() const
|
||||
bool_array4 dimension_empty() const
|
||||
|
||||
optional[vector[int32_t]] geometry_types() const
|
||||
|
||||
c_string ToString() const
|
||||
|
||||
|
||||
cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
|
||||
cdef cppclass ColumnReader:
|
||||
pass
|
||||
|
||||
cdef cppclass BoolReader(ColumnReader):
|
||||
pass
|
||||
|
||||
cdef cppclass Int32Reader(ColumnReader):
|
||||
pass
|
||||
|
||||
cdef cppclass Int64Reader(ColumnReader):
|
||||
pass
|
||||
|
||||
cdef cppclass Int96Reader(ColumnReader):
|
||||
pass
|
||||
|
||||
cdef cppclass FloatReader(ColumnReader):
|
||||
pass
|
||||
|
||||
cdef cppclass DoubleReader(ColumnReader):
|
||||
pass
|
||||
|
||||
cdef cppclass ByteArrayReader(ColumnReader):
|
||||
pass
|
||||
|
||||
cdef cppclass RowGroupReader:
|
||||
pass
|
||||
|
||||
cdef cppclass CEncodedStatistics" parquet::EncodedStatistics":
|
||||
const c_string& max() const
|
||||
const c_string& min() const
|
||||
int64_t null_count
|
||||
int64_t distinct_count
|
||||
bint has_min
|
||||
bint has_max
|
||||
bint has_null_count
|
||||
bint has_distinct_count
|
||||
|
||||
cdef cppclass ParquetByteArray" parquet::ByteArray":
|
||||
uint32_t len
|
||||
const uint8_t* ptr
|
||||
|
||||
cdef cppclass ParquetFLBA" parquet::FLBA":
|
||||
const uint8_t* ptr
|
||||
|
||||
cdef cppclass CStatistics" parquet::Statistics":
|
||||
int64_t null_count() const
|
||||
int64_t distinct_count() const
|
||||
int64_t num_values() const
|
||||
bint HasMinMax()
|
||||
bint HasNullCount()
|
||||
bint HasDistinctCount()
|
||||
c_bool Equals(const CStatistics&) const
|
||||
void Reset()
|
||||
c_string EncodeMin()
|
||||
c_string EncodeMax()
|
||||
CEncodedStatistics Encode()
|
||||
void SetComparator()
|
||||
ParquetType physical_type() const
|
||||
const ColumnDescriptor* descr() const
|
||||
|
||||
cdef cppclass CBoolStatistics" parquet::BoolStatistics"(CStatistics):
|
||||
c_bool min()
|
||||
c_bool max()
|
||||
|
||||
cdef cppclass CInt32Statistics" parquet::Int32Statistics"(CStatistics):
|
||||
int32_t min()
|
||||
int32_t max()
|
||||
|
||||
cdef cppclass CInt64Statistics" parquet::Int64Statistics"(CStatistics):
|
||||
int64_t min()
|
||||
int64_t max()
|
||||
|
||||
cdef cppclass CFloatStatistics" parquet::FloatStatistics"(CStatistics):
|
||||
float min()
|
||||
float max()
|
||||
|
||||
cdef cppclass CDoubleStatistics" parquet::DoubleStatistics"(CStatistics):
|
||||
double min()
|
||||
double max()
|
||||
|
||||
cdef cppclass CByteArrayStatistics \
|
||||
" parquet::ByteArrayStatistics"(CStatistics):
|
||||
ParquetByteArray min()
|
||||
ParquetByteArray max()
|
||||
|
||||
cdef cppclass CFLBAStatistics" parquet::FLBAStatistics"(CStatistics):
|
||||
ParquetFLBA min()
|
||||
ParquetFLBA max()
|
||||
|
||||
cdef cppclass CColumnCryptoMetaData" parquet::ColumnCryptoMetaData":
|
||||
shared_ptr[ColumnPath] path_in_schema() const
|
||||
c_bool encrypted_with_footer_key() const
|
||||
const c_string& key_metadata() const
|
||||
|
||||
cdef cppclass ParquetIndexLocation" parquet::IndexLocation":
|
||||
int64_t offset
|
||||
int32_t length
|
||||
|
||||
cdef cppclass CColumnChunkMetaData" parquet::ColumnChunkMetaData":
|
||||
int64_t file_offset() const
|
||||
const c_string& file_path() const
|
||||
|
||||
c_bool is_metadata_set() const
|
||||
ParquetType type() const
|
||||
int64_t num_values() const
|
||||
shared_ptr[ColumnPath] path_in_schema() const
|
||||
bint is_stats_set() const
|
||||
shared_ptr[CStatistics] statistics() const
|
||||
c_bool is_geo_stats_set() const
|
||||
shared_ptr[CParquetGeoStatistics] geo_statistics() const
|
||||
ParquetCompression compression() const
|
||||
const vector[ParquetEncoding]& encodings() const
|
||||
c_bool Equals(const CColumnChunkMetaData&) const
|
||||
|
||||
int64_t has_dictionary_page() const
|
||||
int64_t dictionary_page_offset() const
|
||||
int64_t data_page_offset() const
|
||||
int64_t index_page_offset() const
|
||||
int64_t total_compressed_size() const
|
||||
int64_t total_uncompressed_size() const
|
||||
unique_ptr[CColumnCryptoMetaData] crypto_metadata() const
|
||||
optional[ParquetIndexLocation] GetColumnIndexLocation() const
|
||||
optional[ParquetIndexLocation] GetOffsetIndexLocation() const
|
||||
shared_ptr[const CKeyValueMetadata] key_value_metadata() const
|
||||
|
||||
struct CSortingColumn" parquet::SortingColumn":
|
||||
int column_idx
|
||||
c_bool descending
|
||||
c_bool nulls_first
|
||||
|
||||
cdef cppclass CRowGroupMetaData" parquet::RowGroupMetaData":
|
||||
c_bool Equals(const CRowGroupMetaData&) const
|
||||
int num_columns() const
|
||||
int64_t num_rows() const
|
||||
int64_t total_byte_size() const
|
||||
vector[CSortingColumn] sorting_columns() const
|
||||
unique_ptr[CColumnChunkMetaData] ColumnChunk(int i) const
|
||||
|
||||
cdef cppclass CFileMetaData" parquet::FileMetaData":
|
||||
c_bool Equals(const CFileMetaData&) const
|
||||
uint32_t size()
|
||||
int num_columns()
|
||||
int64_t num_rows()
|
||||
int num_row_groups()
|
||||
ParquetVersion version()
|
||||
const c_string created_by()
|
||||
int num_schema_elements()
|
||||
|
||||
void set_file_path(const c_string& path)
|
||||
void AppendRowGroups(const CFileMetaData& other) except +
|
||||
|
||||
unique_ptr[CRowGroupMetaData] RowGroup(int i)
|
||||
const SchemaDescriptor* schema()
|
||||
shared_ptr[const CKeyValueMetadata] key_value_metadata() const
|
||||
void WriteTo(COutputStream* dst) const
|
||||
|
||||
inline c_bool is_encryption_algorithm_set() const
|
||||
inline EncryptionAlgorithm encryption_algorithm() const
|
||||
inline const c_string& footer_signing_key_metadata() const
|
||||
|
||||
cdef shared_ptr[CFileMetaData] CFileMetaData_Make \
|
||||
" parquet::FileMetaData::Make"(const void* serialized_metadata,
|
||||
uint32_t* metadata_len)
|
||||
|
||||
cdef cppclass CReaderProperties" parquet::ReaderProperties":
|
||||
c_bool is_buffered_stream_enabled() const
|
||||
void enable_buffered_stream()
|
||||
void disable_buffered_stream()
|
||||
|
||||
void set_buffer_size(int64_t buf_size)
|
||||
int64_t buffer_size() const
|
||||
|
||||
void set_thrift_string_size_limit(int32_t size)
|
||||
int32_t thrift_string_size_limit() const
|
||||
|
||||
void set_thrift_container_size_limit(int32_t size)
|
||||
int32_t thrift_container_size_limit() const
|
||||
|
||||
void file_decryption_properties(shared_ptr[CFileDecryptionProperties]
|
||||
decryption)
|
||||
shared_ptr[CFileDecryptionProperties] file_decryption_properties() \
|
||||
const
|
||||
|
||||
c_bool page_checksum_verification() const
|
||||
void set_page_checksum_verification(c_bool check_crc)
|
||||
|
||||
CReaderProperties default_reader_properties()
|
||||
|
||||
cdef cppclass ArrowReaderProperties:
|
||||
ArrowReaderProperties()
|
||||
void set_binary_type(Type binary_type)
|
||||
Type binary_type()
|
||||
void set_list_type(Type list_type)
|
||||
Type list_type()
|
||||
void set_read_dictionary(int column_index, c_bool read_dict)
|
||||
c_bool read_dictionary(int column_index)
|
||||
void set_batch_size(int64_t batch_size)
|
||||
int64_t batch_size()
|
||||
void set_pre_buffer(c_bool pre_buffer)
|
||||
c_bool pre_buffer() const
|
||||
void set_cache_options(CCacheOptions options)
|
||||
CCacheOptions cache_options() const
|
||||
void set_coerce_int96_timestamp_unit(TimeUnit unit)
|
||||
TimeUnit coerce_int96_timestamp_unit() const
|
||||
void set_arrow_extensions_enabled(c_bool extensions_enabled)
|
||||
c_bool get_arrow_extensions_enabled() const
|
||||
|
||||
ArrowReaderProperties default_arrow_reader_properties()
|
||||
|
||||
cdef cppclass ParquetFileReader:
|
||||
shared_ptr[CFileMetaData] metadata()
|
||||
|
||||
|
||||
cdef extern from "parquet/api/writer.h" namespace "parquet" nogil:
|
||||
cdef cppclass CdcOptions:
|
||||
int64_t min_chunk_size
|
||||
int64_t max_chunk_size
|
||||
int norm_level
|
||||
|
||||
cdef cppclass WriterProperties:
|
||||
cppclass Builder:
|
||||
Builder* data_page_version(ParquetDataPageVersion version)
|
||||
Builder* version(ParquetVersion version)
|
||||
Builder* compression(ParquetCompression codec)
|
||||
Builder* compression(const c_string& path,
|
||||
ParquetCompression codec)
|
||||
Builder* compression_level(int compression_level)
|
||||
Builder* compression_level(const c_string& path,
|
||||
int compression_level)
|
||||
Builder* encryption(
|
||||
shared_ptr[CFileEncryptionProperties]
|
||||
file_encryption_properties)
|
||||
Builder* disable_dictionary()
|
||||
Builder* enable_dictionary()
|
||||
Builder* enable_dictionary(const c_string& path)
|
||||
Builder* set_sorting_columns(vector[CSortingColumn] sorting_columns)
|
||||
Builder* disable_statistics()
|
||||
Builder* enable_statistics()
|
||||
Builder* enable_statistics(const c_string& path)
|
||||
Builder* enable_store_decimal_as_integer()
|
||||
Builder* disable_store_decimal_as_integer()
|
||||
Builder* data_pagesize(int64_t size)
|
||||
Builder* encoding(ParquetEncoding encoding)
|
||||
Builder* encoding(const c_string& path,
|
||||
ParquetEncoding encoding)
|
||||
Builder* max_row_group_length(int64_t size)
|
||||
Builder* write_batch_size(int64_t batch_size)
|
||||
Builder* dictionary_pagesize_limit(int64_t dictionary_pagesize_limit)
|
||||
Builder* enable_write_page_index()
|
||||
Builder* disable_write_page_index()
|
||||
Builder* enable_page_checksum()
|
||||
Builder* disable_page_checksum()
|
||||
Builder* enable_content_defined_chunking()
|
||||
Builder* disable_content_defined_chunking()
|
||||
Builder* content_defined_chunking_options(CdcOptions options)
|
||||
shared_ptr[WriterProperties] build()
|
||||
|
||||
cdef cppclass ArrowWriterProperties:
|
||||
cppclass Builder:
|
||||
Builder()
|
||||
Builder* disable_deprecated_int96_timestamps()
|
||||
Builder* enable_deprecated_int96_timestamps()
|
||||
Builder* coerce_timestamps(TimeUnit unit)
|
||||
Builder* allow_truncated_timestamps()
|
||||
Builder* disallow_truncated_timestamps()
|
||||
Builder* store_schema()
|
||||
Builder* enable_compliant_nested_types()
|
||||
Builder* disable_compliant_nested_types()
|
||||
Builder* set_engine_version(ArrowWriterEngineVersion version)
|
||||
shared_ptr[ArrowWriterProperties] build()
|
||||
c_bool support_deprecated_int96_timestamps()
|
||||
|
||||
cdef extern from "parquet/arrow/reader.h" namespace "parquet::arrow" nogil:
|
||||
cdef cppclass FileReader:
|
||||
FileReader(CMemoryPool* pool, unique_ptr[ParquetFileReader] reader)
|
||||
|
||||
CStatus GetSchema(shared_ptr[CSchema]* out)
|
||||
|
||||
CStatus ReadColumn(int i, shared_ptr[CChunkedArray]* out)
|
||||
CStatus ReadSchemaField(int i, shared_ptr[CChunkedArray]* out)
|
||||
|
||||
int num_row_groups()
|
||||
CStatus ReadRowGroup(int i, shared_ptr[CTable]* out)
|
||||
CStatus ReadRowGroup(int i, const vector[int]& column_indices,
|
||||
shared_ptr[CTable]* out)
|
||||
|
||||
CStatus ReadRowGroups(const vector[int]& row_groups,
|
||||
shared_ptr[CTable]* out)
|
||||
CStatus ReadRowGroups(const vector[int]& row_groups,
|
||||
const vector[int]& column_indices,
|
||||
shared_ptr[CTable]* out)
|
||||
|
||||
CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices,
|
||||
const vector[int]& column_indices)
|
||||
CResult[unique_ptr[CRecordBatchReader]] GetRecordBatchReader(const vector[int]& row_group_indices)
|
||||
|
||||
CStatus ReadTable(shared_ptr[CTable]* out)
|
||||
CStatus ReadTable(const vector[int]& column_indices,
|
||||
shared_ptr[CTable]* out)
|
||||
|
||||
CStatus ScanContents(vector[int] columns, int32_t column_batch_size,
|
||||
int64_t* num_rows)
|
||||
|
||||
const ParquetFileReader* parquet_reader()
|
||||
|
||||
void set_use_threads(c_bool use_threads)
|
||||
|
||||
void set_batch_size(int64_t batch_size)
|
||||
|
||||
cdef cppclass FileReaderBuilder:
|
||||
FileReaderBuilder()
|
||||
CStatus Open(const shared_ptr[CRandomAccessFile]& file,
|
||||
const CReaderProperties& properties,
|
||||
const shared_ptr[CFileMetaData]& metadata)
|
||||
|
||||
ParquetFileReader* raw_reader()
|
||||
FileReaderBuilder* memory_pool(CMemoryPool*)
|
||||
FileReaderBuilder* properties(const ArrowReaderProperties&)
|
||||
CStatus Build(unique_ptr[FileReader]* out)
|
||||
|
||||
CStatus FromParquetSchema(
|
||||
const SchemaDescriptor* parquet_schema,
|
||||
const ArrowReaderProperties& properties,
|
||||
const shared_ptr[const CKeyValueMetadata]& key_value_metadata,
|
||||
shared_ptr[CSchema]* out)
|
||||
|
||||
CStatus StatisticsAsScalars(const CStatistics& Statistics,
|
||||
shared_ptr[CScalar]* min,
|
||||
shared_ptr[CScalar]* max)
|
||||
|
||||
cdef extern from "parquet/arrow/schema.h" namespace "parquet::arrow" nogil:
|
||||
|
||||
CStatus ToParquetSchema(
|
||||
const CSchema* arrow_schema,
|
||||
const WriterProperties& properties,
|
||||
const ArrowWriterProperties& arrow_properties,
|
||||
shared_ptr[SchemaDescriptor]* out)
|
||||
|
||||
|
||||
cdef extern from "parquet/properties.h" namespace "parquet" nogil:
|
||||
cdef enum ArrowWriterEngineVersion:
|
||||
V1 "parquet::ArrowWriterProperties::V1",
|
||||
V2 "parquet::ArrowWriterProperties::V2"
|
||||
|
||||
cdef cppclass ParquetDataPageVersion:
|
||||
pass
|
||||
|
||||
cdef ParquetDataPageVersion ParquetDataPageVersion_V1 \
|
||||
" parquet::ParquetDataPageVersion::V1"
|
||||
cdef ParquetDataPageVersion ParquetDataPageVersion_V2 \
|
||||
" parquet::ParquetDataPageVersion::V2"
|
||||
|
||||
cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil:
|
||||
cdef cppclass FileWriter:
|
||||
|
||||
@staticmethod
|
||||
CResult[unique_ptr[FileWriter]] Open(const CSchema& schema, CMemoryPool* pool,
|
||||
const shared_ptr[COutputStream]& sink,
|
||||
const shared_ptr[WriterProperties]& properties,
|
||||
const shared_ptr[ArrowWriterProperties]& arrow_properties)
|
||||
|
||||
CStatus WriteTable(const CTable& table, int64_t chunk_size)
|
||||
CStatus NewRowGroup()
|
||||
CStatus Close()
|
||||
CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& key_value_metadata)
|
||||
|
||||
const shared_ptr[CFileMetaData] metadata() const
|
||||
|
||||
CStatus WriteMetaDataFile(
|
||||
const CFileMetaData& file_metadata,
|
||||
const COutputStream* sink)
|
||||
|
||||
cdef extern from "parquet/encryption/encryption.h" namespace "parquet" nogil:
|
||||
cdef cppclass CFileDecryptionProperties\
|
||||
" parquet::FileDecryptionProperties":
|
||||
pass
|
||||
|
||||
cdef cppclass CFileEncryptionProperties\
|
||||
" parquet::FileEncryptionProperties":
|
||||
pass
|
||||
@@ -0,0 +1,132 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# distutils: language = c++
|
||||
|
||||
from pyarrow.includes.common cimport *
|
||||
from pyarrow.includes.libarrow cimport CSecureString
|
||||
from pyarrow._parquet cimport (ParquetCipher,
|
||||
CFileEncryptionProperties,
|
||||
CFileDecryptionProperties,
|
||||
ParquetCipher_AES_GCM_V1,
|
||||
ParquetCipher_AES_GCM_CTR_V1)
|
||||
|
||||
|
||||
cdef extern from "parquet/encryption/kms_client.h" \
|
||||
namespace "parquet::encryption" nogil:
|
||||
cdef cppclass CKmsClient" parquet::encryption::KmsClient":
|
||||
c_string WrapKey(const CSecureString& key,
|
||||
const c_string& master_key_identifier) except +
|
||||
CSecureString UnwrapKey(const c_string& wrapped_key,
|
||||
const c_string& master_key_identifier) except +
|
||||
|
||||
cdef cppclass CKeyAccessToken" parquet::encryption::KeyAccessToken":
|
||||
CKeyAccessToken(const c_string value)
|
||||
void Refresh(const c_string& new_value)
|
||||
const c_string& value() const
|
||||
|
||||
cdef cppclass CKmsConnectionConfig \
|
||||
" parquet::encryption::KmsConnectionConfig":
|
||||
CKmsConnectionConfig()
|
||||
c_string kms_instance_id
|
||||
c_string kms_instance_url
|
||||
shared_ptr[CKeyAccessToken] refreshable_key_access_token
|
||||
unordered_map[c_string, c_string] custom_kms_conf
|
||||
|
||||
# Callbacks for implementing Python kms clients
|
||||
# Use typedef to emulate syntax for std::function<void(..)>
|
||||
ctypedef void CallbackWrapKey(
|
||||
object, const CSecureString&, const c_string&, c_string*)
|
||||
ctypedef void CallbackUnwrapKey(
|
||||
object, const c_string&, const c_string&, CSecureString*)
|
||||
|
||||
cdef extern from "parquet/encryption/kms_client_factory.h" \
|
||||
namespace "parquet::encryption" nogil:
|
||||
cdef cppclass CKmsClientFactory" parquet::encryption::KmsClientFactory":
|
||||
shared_ptr[CKmsClient] CreateKmsClient(
|
||||
const CKmsConnectionConfig& kms_connection_config) except +
|
||||
|
||||
# Callbacks for implementing Python kms client factories
|
||||
# Use typedef to emulate syntax for std::function<void(..)>
|
||||
ctypedef void CallbackCreateKmsClient(
|
||||
object,
|
||||
const CKmsConnectionConfig&, shared_ptr[CKmsClient]*)
|
||||
|
||||
cdef extern from "parquet/encryption/crypto_factory.h" \
|
||||
namespace "parquet::encryption" nogil:
|
||||
cdef cppclass CEncryptionConfiguration\
|
||||
" parquet::encryption::EncryptionConfiguration":
|
||||
CEncryptionConfiguration(const c_string& footer_key) except +
|
||||
c_string footer_key
|
||||
c_string column_keys
|
||||
c_bool uniform_encryption
|
||||
ParquetCipher encryption_algorithm
|
||||
c_bool plaintext_footer
|
||||
c_bool double_wrapping
|
||||
double cache_lifetime_seconds
|
||||
c_bool internal_key_material
|
||||
int32_t data_key_length_bits
|
||||
|
||||
cdef cppclass CDecryptionConfiguration\
|
||||
" parquet::encryption::DecryptionConfiguration":
|
||||
CDecryptionConfiguration() except +
|
||||
double cache_lifetime_seconds
|
||||
|
||||
cdef cppclass CCryptoFactory" parquet::encryption::CryptoFactory":
|
||||
void RegisterKmsClientFactory(
|
||||
shared_ptr[CKmsClientFactory] kms_client_factory) except +
|
||||
shared_ptr[CFileEncryptionProperties] GetFileEncryptionProperties(
|
||||
const CKmsConnectionConfig& kms_connection_config,
|
||||
const CEncryptionConfiguration& encryption_config) except +*
|
||||
shared_ptr[CFileDecryptionProperties] GetFileDecryptionProperties(
|
||||
const CKmsConnectionConfig& kms_connection_config,
|
||||
const CDecryptionConfiguration& decryption_config) except +*
|
||||
void RemoveCacheEntriesForToken(const c_string& access_token) except +
|
||||
void RemoveCacheEntriesForAllTokens() except +
|
||||
|
||||
cdef extern from "arrow/python/parquet_encryption.h" \
|
||||
namespace "arrow::py::parquet::encryption" nogil:
|
||||
cdef cppclass CPyKmsClientVtable \
|
||||
" arrow::py::parquet::encryption::PyKmsClientVtable":
|
||||
CPyKmsClientVtable()
|
||||
function[CallbackWrapKey] wrap_key
|
||||
function[CallbackUnwrapKey] unwrap_key
|
||||
|
||||
cdef cppclass CPyKmsClient\
|
||||
" arrow::py::parquet::encryption::PyKmsClient"(CKmsClient):
|
||||
CPyKmsClient(object handler, CPyKmsClientVtable vtable)
|
||||
|
||||
cdef cppclass CPyKmsClientFactoryVtable\
|
||||
" arrow::py::parquet::encryption::PyKmsClientFactoryVtable":
|
||||
CPyKmsClientFactoryVtable()
|
||||
function[CallbackCreateKmsClient] create_kms_client
|
||||
|
||||
cdef cppclass CPyKmsClientFactory\
|
||||
" arrow::py::parquet::encryption::PyKmsClientFactory"(
|
||||
CKmsClientFactory):
|
||||
CPyKmsClientFactory(object handler, CPyKmsClientFactoryVtable vtable)
|
||||
|
||||
cdef cppclass CPyCryptoFactory\
|
||||
" arrow::py::parquet::encryption::PyCryptoFactory"(CCryptoFactory):
|
||||
CResult[shared_ptr[CFileEncryptionProperties]] \
|
||||
SafeGetFileEncryptionProperties(
|
||||
const CKmsConnectionConfig& kms_connection_config,
|
||||
const CEncryptionConfiguration& encryption_config)
|
||||
CResult[shared_ptr[CFileDecryptionProperties]] \
|
||||
SafeGetFileDecryptionProperties(
|
||||
const CKmsConnectionConfig& kms_connection_config,
|
||||
const CDecryptionConfiguration& decryption_config)
|
||||
Reference in New Issue
Block a user