Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,460 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
/// \file abi.h Arrow C Data Interface
///
/// The Arrow C Data interface defines a very small, stable set
/// of C definitions which can be easily copied into any project's
/// source code and vendored to be used for columnar data interchange
/// in the Arrow format. For non-C/C++ languages and runtimes,
/// it should be almost as easy to translate the C definitions into
/// the corresponding C FFI declarations.
///
/// Applications and libraries can therefore work with Arrow memory
/// without necessarily using the Arrow libraries or reinventing
/// the wheel. Developers can choose between tight integration
/// with the Arrow software project or minimal integration with
/// the Arrow format only.
#pragma once
#include <stdint.h>
// Spec and documentation: https://arrow.apache.org/docs/format/CDataInterface.html
#ifdef __cplusplus
extern "C" {
#endif
#ifndef ARROW_C_DATA_INTERFACE
# define ARROW_C_DATA_INTERFACE
# define ARROW_FLAG_DICTIONARY_ORDERED 1
# define ARROW_FLAG_NULLABLE 2
# define ARROW_FLAG_MAP_KEYS_SORTED 4
struct ArrowSchema {
// Array type description
const char* format;
const char* name;
const char* metadata;
int64_t flags;
int64_t n_children;
struct ArrowSchema** children;
struct ArrowSchema* dictionary;
// Release callback
void (*release)(struct ArrowSchema*);
// Opaque producer-specific data
void* private_data;
};
struct ArrowArray {
// Array data description
int64_t length;
int64_t null_count;
int64_t offset;
int64_t n_buffers;
int64_t n_children;
const void** buffers;
struct ArrowArray** children;
struct ArrowArray* dictionary;
// Release callback
void (*release)(struct ArrowArray*);
// Opaque producer-specific data
void* private_data;
};
# define ARROW_STATISTICS_KEY_AVERAGE_BYTE_WIDTH_EXACT "ARROW:average_byte_width:exact"
# define ARROW_STATISTICS_KEY_AVERAGE_BYTE_WIDTH_APPROXIMATE \
"ARROW:average_byte_width:approximate"
# define ARROW_STATISTICS_KEY_DISTINCT_COUNT_EXACT "ARROW:distinct_count:exact"
# define ARROW_STATISTICS_KEY_DISTINCT_COUNT_APPROXIMATE \
"ARROW:distinct_count:approximate"
# define ARROW_STATISTICS_KEY_MAX_BYTE_WIDTH_EXACT "ARROW:max_byte_width:exact"
# define ARROW_STATISTICS_KEY_MAX_BYTE_WIDTH_APPROXIMATE \
"ARROW:max_byte_width:approximate"
# define ARROW_STATISTICS_KEY_MAX_VALUE_EXACT "ARROW:max_value:exact"
# define ARROW_STATISTICS_KEY_MAX_VALUE_APPROXIMATE "ARROW:max_value:approximate"
# define ARROW_STATISTICS_KEY_MIN_VALUE_EXACT "ARROW:min_value:exact"
# define ARROW_STATISTICS_KEY_MIN_VALUE_APPROXIMATE "ARROW:min_value:approximate"
# define ARROW_STATISTICS_KEY_NULL_COUNT_EXACT "ARROW:null_count:exact"
# define ARROW_STATISTICS_KEY_NULL_COUNT_APPROXIMATE "ARROW:null_count:approximate"
# define ARROW_STATISTICS_KEY_ROW_COUNT_EXACT "ARROW:row_count:exact"
# define ARROW_STATISTICS_KEY_ROW_COUNT_APPROXIMATE "ARROW:row_count:approximate"
#endif // ARROW_C_DATA_INTERFACE
#ifndef ARROW_C_DEVICE_DATA_INTERFACE
# define ARROW_C_DEVICE_DATA_INTERFACE
// Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html
// DeviceType for the allocated memory
typedef int32_t ArrowDeviceType;
// CPU device, same as using ArrowArray directly
# define ARROW_DEVICE_CPU 1
// CUDA GPU Device
# define ARROW_DEVICE_CUDA 2
// Pinned CUDA CPU memory by cudaMallocHost
# define ARROW_DEVICE_CUDA_HOST 3
// OpenCL Device
# define ARROW_DEVICE_OPENCL 4
// Vulkan buffer for next-gen graphics
# define ARROW_DEVICE_VULKAN 7
// Metal for Apple GPU
# define ARROW_DEVICE_METAL 8
// Verilog simulator buffer
# define ARROW_DEVICE_VPI 9
// ROCm GPUs for AMD GPUs
# define ARROW_DEVICE_ROCM 10
// Pinned ROCm CPU memory allocated by hipMallocHost
# define ARROW_DEVICE_ROCM_HOST 11
// Reserved for extension
# define ARROW_DEVICE_EXT_DEV 12
// CUDA managed/unified memory allocated by cudaMallocManaged
# define ARROW_DEVICE_CUDA_MANAGED 13
// unified shared memory allocated on a oneAPI non-partitioned device.
# define ARROW_DEVICE_ONEAPI 14
// GPU support for next-gen WebGPU standard
# define ARROW_DEVICE_WEBGPU 15
// Qualcomm Hexagon DSP
# define ARROW_DEVICE_HEXAGON 16
struct ArrowDeviceArray {
// the Allocated Array
//
// the buffers in the array (along with the buffers of any
// children) are what is allocated on the device.
struct ArrowArray array;
// The device id to identify a specific device
int64_t device_id;
// The type of device which can access this memory.
ArrowDeviceType device_type;
// An event-like object to synchronize on if needed.
void* sync_event;
// Reserved bytes for future expansion.
int64_t reserved[3];
};
#endif // ARROW_C_DEVICE_DATA_INTERFACE
#ifndef ARROW_C_STREAM_INTERFACE
# define ARROW_C_STREAM_INTERFACE
struct ArrowArrayStream {
// Callback to get the stream type
// (will be the same for all arrays in the stream).
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowSchema must be released independently from the stream.
int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
// Callback to get the next array
// (if no error and the array is released, the stream has ended)
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowArray must be released independently from the stream.
int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
// Callback to get optional detailed error information.
// This must only be called if the last stream operation failed
// with a non-0 return code.
//
// Return value: pointer to a null-terminated character array describing
// the last error, or NULL if no description is available.
//
// The returned pointer is only valid until the next operation on this stream
// (including release).
const char* (*get_last_error)(struct ArrowArrayStream*);
// Release callback: release the stream's own resources.
// Note that arrays returned by `get_next` must be individually released.
void (*release)(struct ArrowArrayStream*);
// Opaque producer-specific data
void* private_data;
};
#endif // ARROW_C_STREAM_INTERFACE
#ifndef ARROW_C_DEVICE_STREAM_INTERFACE
# define ARROW_C_DEVICE_STREAM_INTERFACE
// Equivalent to ArrowArrayStream, but for ArrowDeviceArrays.
//
// This stream is intended to provide a stream of data on a single
// device, if a producer wants data to be produced on multiple devices
// then multiple streams should be provided. One per device.
struct ArrowDeviceArrayStream {
// The device that this stream produces data on.
ArrowDeviceType device_type;
// Callback to get the stream schema
// (will be the same for all arrays in the stream).
//
// Return value 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowSchema must be released independently from the stream.
// The schema should be accessible via CPU memory.
int (*get_schema)(struct ArrowDeviceArrayStream* self, struct ArrowSchema* out);
// Callback to get the next array
// (if no error and the array is released, the stream has ended)
//
// Return value: 0 if successful, an `errno`-compatible error code otherwise.
//
// If successful, the ArrowDeviceArray must be released independently from the stream.
int (*get_next)(struct ArrowDeviceArrayStream* self, struct ArrowDeviceArray* out);
// Callback to get optional detailed error information.
// This must only be called if the last stream operation failed
// with a non-0 return code.
//
// Return value: pointer to a null-terminated character array describing
// the last error, or NULL if no description is available.
//
// The returned pointer is only valid until the next operation on this stream
// (including release).
const char* (*get_last_error)(struct ArrowDeviceArrayStream* self);
// Release callback: release the stream's own resources.
// Note that arrays returned by `get_next` must be individually released.
void (*release)(struct ArrowDeviceArrayStream* self);
// Opaque producer-specific data
void* private_data;
};
#endif // ARROW_C_DEVICE_STREAM_INTERFACE
#ifndef ARROW_C_ASYNC_STREAM_INTERFACE
# define ARROW_C_ASYNC_STREAM_INTERFACE
// EXPERIMENTAL: ArrowAsyncTask represents available data from a producer that was passed
// to an invocation of `on_next_task` on the ArrowAsyncDeviceStreamHandler.
//
// The reason for this Task approach instead of the Async interface returning
// the Array directly is to allow for more complex thread handling and reducing
// context switching and data transfers between CPU cores (e.g. from one L1/L2
// cache to another) if desired.
//
// For example, the `on_next_task` callback can be called when data is ready, while
// the producer puts potential "decoding" logic in the `ArrowAsyncTask` object. This
// allows for the producer to manage the I/O on one thread which calls `on_next_task`
// and the consumer can determine when the decoding (producer logic in the `extract_data`
// callback of the task) occurs and on which thread, to avoid a CPU core transfer
// (data staying in the L2 cache).
struct ArrowAsyncTask {
// This callback should populate the ArrowDeviceArray associated with this task.
// The order of ArrowAsyncTasks provided by the producer enables a consumer to
// ensure the order of data to process.
//
// This function is expected to be synchronous, but should not perform any blocking
// I/O. Ideally it should be as cheap as possible so as to not tie up the consumer
// thread unnecessarily.
//
// Returns: 0 if successful, errno-compatible error otherwise.
//
// If a non-0 value is returned then it should be followed by a call to `on_error`
// on the appropriate ArrowAsyncDeviceStreamHandler. This is because it's highly
// likely that whatever is calling this function may be entirely disconnected from
// the current control flow. Indicating an error here with a non-zero return allows
// the current flow to be aware of the error occurring, while still allowing any
// logging or error handling to still be centralized in the `on_error` callback of
// the original Async handler.
//
// Rather than a release callback, any required cleanup should be performed as part
// of the invocation of `extract_data`. Ownership of the Array is passed to the consumer
// calling this, and so it must be released separately.
//
// It is only valid to call this method exactly once.
int (*extract_data)(struct ArrowAsyncTask* self, struct ArrowDeviceArray* out);
// opaque task-specific data
void* private_data;
};
// EXPERIMENTAL: ArrowAsyncProducer represents a 1-to-1 relationship between an async
// producer and consumer. This object allows the consumer to perform backpressure and flow
// control on the asynchronous stream processing. This object must be owned by the
// producer who creates it, and thus is responsible for cleaning it up.
struct ArrowAsyncProducer {
// The device type that this stream produces data on.
ArrowDeviceType device_type;
// A consumer must call this function to start receiving on_next_task calls.
//
// It *must* be valid to call this synchronously from within `on_next_task` or
// `on_schema`, but this function *must not* immediately call `on_next_task` so as
// to avoid recursion and reentrant callbacks.
//
// After cancel has been called, additional calls to this function must be NOPs,
// but allowed. While not cancelled, calling this function must register the
// given number of additional arrays/batches to be produced with the producer.
// The producer should only call `on_next_task` at most the registered number
// of arrays before propagating backpressure.
//
// Any error encountered by calling request must be propagated by calling the `on_error`
// callback of the ArrowAsyncDeviceStreamHandler.
//
// While not cancelled, any subsequent calls to `on_next_task`, `on_error` or
// `release` should be scheduled by the producer to be called later.
//
// It is invalid for a consumer to call this with a value of n <= 0, producers should
// error if given such a value.
void (*request)(struct ArrowAsyncProducer* self, int64_t n);
// This cancel callback signals a producer that it must eventually stop making calls
// to on_next_task. It must be idempotent and thread-safe. After calling cancel once,
// subsequent calls must be NOPs. This must not call any consumer-side handlers other
// than `on_error`.
//
// It is not required that calling cancel affect the producer immediately, only that it
// must eventually stop calling on_next_task and subsequently call release on the
// async handler. As such, a consumer must be prepared to receive one or more calls to
// `on_next_task` even after calling cancel if there are still requested arrays pending.
//
// Successful cancellation should *not* result in the producer calling `on_error`, it
// should finish out any remaining tasks and eventually call `release`.
//
// Any error encountered during handling a call to cancel must be reported via the
// on_error callback on the async stream handler.
void (*cancel)(struct ArrowAsyncProducer* self);
// Any additional metadata tied to a specific stream of data. This must either be NULL
// or a valid pointer to metadata which is encoded in the same way schema metadata
// would be. Non-null metadata must be valid for the lifetime of this object. As an
// example a producer could use this to provide the total number of rows and/or batches
// in the stream if known.
const char* additional_metadata;
// producer-specific opaque data.
void* private_data;
};
// EXPERIMENTAL: Similar to ArrowDeviceArrayStream, except designed for an asynchronous
// style of interaction. While ArrowDeviceArrayStream provides producer
// defined callbacks, this is intended to be created by the consumer instead.
// The consumer passes this handler to the producer, which in turn uses the
// callbacks to inform the consumer of events in the stream.
struct ArrowAsyncDeviceStreamHandler {
// Handler for receiving a schema. The passed in stream_schema must be
// released or moved by the handler (producer is giving ownership of the schema to
// the handler, but not ownership of the top level object itself).
//
// With the exception of an error occurring (on_error), this must be the first
// callback function which is called by a producer and must only be called exactly
// once. As such, the producer should provide a valid ArrowAsyncProducer instance
// so the consumer can control the flow. See the documentation on ArrowAsyncProducer
// for how it works. The ArrowAsyncProducer is owned by the producer who calls this
// function and thus the producer is responsible for cleaning it up when calling
// the release callback of this handler.
//
// If there is any additional metadata tied to this stream, it will be provided as
// a non-null value for the `additional_metadata` field of the ArrowAsyncProducer
// which will be valid at least until the release callback is called.
//
// Return value: 0 if successful, `errno`-compatible error otherwise
//
// A producer that receives a non-zero return here should stop producing and eventually
// call release instead.
int (*on_schema)(struct ArrowAsyncDeviceStreamHandler* self,
struct ArrowSchema* stream_schema);
// Handler for receiving data. This is called when data is available providing an
// ArrowAsyncTask struct to signify it. The producer indicates the end of the stream
// by passing NULL as the value for the task rather than a valid pointer to a task.
// The task object is only valid for the lifetime of this function call, if a consumer
// wants to utilize it after this function returns, it must copy or move the contents
// of it to a new ArrowAsyncTask object.
//
// The `request` callback of a provided ArrowAsyncProducer must be called in order
// to start receiving calls to this handler.
//
// The metadata argument can be null or can be used by a producer
// to pass arbitrary extra information to the consumer (such as total number
// of rows, context info, or otherwise). The data should be passed using the same
// encoding as the metadata within the ArrowSchema struct itself (defined in
// the spec at
// https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.metadata)
//
// If metadata is non-null then it only needs to exist for the lifetime of this call,
// a consumer who wants it to live after that must copy it to ensure lifetime.
//
// A producer *must not* call this concurrently from multiple different threads.
//
// A consumer must be prepared to receive one or more calls to this callback even
// after calling cancel on the corresponding ArrowAsyncProducer, as cancel does not
// guarantee it happens immediately.
//
// Return value: 0 if successful, `errno`-compatible error otherwise.
//
// If the consumer returns a non-zero return from this method, that indicates to the
// producer that it should stop propagating data as an error occurred. After receiving
// such a return, the only interaction with this object is for the producer to call
// the `release` callback.
int (*on_next_task)(struct ArrowAsyncDeviceStreamHandler* self,
struct ArrowAsyncTask* task, const char* metadata);
// Handler for encountering an error. The producer should call release after
// this returns to clean up any resources. The `code` passed in can be any error
// code that a producer wants, but should be errno-compatible for consistency.
//
// If the message or metadata are non-null, they will only last as long as this
// function call. The consumer would need to perform a copy of the data if it is
// necessary for them to live past the lifetime of this call.
//
// Error metadata should be encoded as with metadata in ArrowSchema, defined in
// the spec at
// https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.metadata
//
// It is valid for this to be called by a producer with or without a preceding call
// to ArrowAsyncProducer.request.
//
// This callback must not call any methods of an ArrowAsyncProducer object.
void (*on_error)(struct ArrowAsyncDeviceStreamHandler* self, int code,
const char* message, const char* metadata);
// Release callback to release any resources for the handler. Should always be
// called by a producer when it is done utilizing a handler. No callbacks should
// be called after this is called.
//
// It is valid for the release callback to be called by a producer with or without
// a preceding call to ArrowAsyncProducer.request.
//
// The release callback must not call any methods of an ArrowAsyncProducer object.
void (*release)(struct ArrowAsyncDeviceStreamHandler* self);
// MUST be populated by the producer BEFORE calling any callbacks other than release.
// This provides the connection between a handler and its producer, and must exist until
// the release callback is called.
struct ArrowAsyncProducer* producer;
// Opaque handler-specific data
void* private_data;
};
#endif // ARROW_C_ASYNC_STREAM_INTERFACE
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,489 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include "arrow/c/abi.h"
#include "arrow/device.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/async_generator_fwd.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
namespace arrow {
/// \defgroup c-data-interface Functions for working with the C data interface.
///
/// @{
/// \brief Export C++ DataType using the C data interface format.
///
/// The root type is considered to have empty name and metadata.
/// If you want the root type to have a name and/or metadata, pass
/// a Field instead.
///
/// \param[in] type DataType object to export
/// \param[out] out C struct where to export the datatype
ARROW_EXPORT
Status ExportType(const DataType& type, struct ArrowSchema* out);
/// \brief Export C++ Field using the C data interface format.
///
/// \param[in] field Field object to export
/// \param[out] out C struct where to export the field
ARROW_EXPORT
Status ExportField(const Field& field, struct ArrowSchema* out);
/// \brief Export C++ Schema using the C data interface format.
///
/// \param[in] schema Schema object to export
/// \param[out] out C struct where to export the field
ARROW_EXPORT
Status ExportSchema(const Schema& schema, struct ArrowSchema* out);
/// \brief Export C++ Array using the C data interface format.
///
/// The resulting ArrowArray struct keeps the array data and buffers alive
/// until its release callback is called by the consumer.
///
/// \param[in] array Array object to export
/// \param[out] out C struct where to export the array
/// \param[out] out_schema optional C struct where to export the array type
ARROW_EXPORT
Status ExportArray(const Array& array, struct ArrowArray* out,
struct ArrowSchema* out_schema = NULLPTR);
/// \brief Export C++ RecordBatch using the C data interface format.
///
/// The record batch is exported as if it were a struct array.
/// The resulting ArrowArray struct keeps the record batch data and buffers alive
/// until its release callback is called by the consumer.
///
/// \param[in] batch Record batch to export
/// \param[out] out C struct where to export the record batch
/// \param[out] out_schema optional C struct where to export the record batch schema
ARROW_EXPORT
Status ExportRecordBatch(const RecordBatch& batch, struct ArrowArray* out,
struct ArrowSchema* out_schema = NULLPTR);
/// \brief Import C++ DataType from the C data interface.
///
/// The given ArrowSchema struct is released (as per the C data interface
/// specification), even if this function fails.
///
/// \param[in,out] schema C data interface struct representing the data type
/// \return Imported type object
ARROW_EXPORT
Result<std::shared_ptr<DataType>> ImportType(struct ArrowSchema* schema);
/// \brief Import C++ Field from the C data interface.
///
/// The given ArrowSchema struct is released (as per the C data interface
/// specification), even if this function fails.
///
/// \param[in,out] schema C data interface struct representing the field
/// \return Imported field object
ARROW_EXPORT
Result<std::shared_ptr<Field>> ImportField(struct ArrowSchema* schema);
/// \brief Import C++ Schema from the C data interface.
///
/// The given ArrowSchema struct is released (as per the C data interface
/// specification), even if this function fails.
///
/// \param[in,out] schema C data interface struct representing the field
/// \return Imported field object
ARROW_EXPORT
Result<std::shared_ptr<Schema>> ImportSchema(struct ArrowSchema* schema);
/// \brief Import C++ array from the C data interface.
///
/// The ArrowArray struct has its contents moved (as per the C data interface
/// specification) to a private object held alive by the resulting array.
///
/// \param[in,out] array C data interface struct holding the array data
/// \param[in] type type of the imported array
/// \return Imported array object
ARROW_EXPORT
Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
std::shared_ptr<DataType> type);
/// \brief Import C++ array and its type from the C data interface.
///
/// The ArrowArray struct has its contents moved (as per the C data interface
/// specification) to a private object held alive by the resulting array.
/// The ArrowSchema struct is released, even if this function fails.
///
/// \param[in,out] array C data interface struct holding the array data
/// \param[in,out] type C data interface struct holding the array type
/// \return Imported array object
ARROW_EXPORT
Result<std::shared_ptr<Array>> ImportArray(struct ArrowArray* array,
struct ArrowSchema* type);
/// \brief Import C++ record batch from the C data interface.
///
/// The ArrowArray struct has its contents moved (as per the C data interface
/// specification) to a private object held alive by the resulting record batch.
///
/// \param[in,out] array C data interface struct holding the record batch data
/// \param[in] schema schema of the imported record batch
/// \return Imported record batch object
ARROW_EXPORT
Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
std::shared_ptr<Schema> schema);
/// \brief Import C++ record batch and its schema from the C data interface.
///
/// The type represented by the ArrowSchema struct must be a struct type array.
/// The ArrowArray struct has its contents moved (as per the C data interface
/// specification) to a private object held alive by the resulting record batch.
/// The ArrowSchema struct is released, even if this function fails.
///
/// \param[in,out] array C data interface struct holding the record batch data
/// \param[in,out] schema C data interface struct holding the record batch schema
/// \return Imported record batch object
ARROW_EXPORT
Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
struct ArrowSchema* schema);
/// @}
/// \defgroup c-data-device-interface Functions for working with the C data device
/// interface.
///
/// @{
/// \brief EXPERIMENTAL: Export C++ Array as an ArrowDeviceArray.
///
/// The resulting ArrowDeviceArray struct keeps the array data and buffers alive
/// until its release callback is called by the consumer. All buffers in
/// the provided array MUST have the same device_type, otherwise an error
/// will be returned.
///
/// If sync is non-null, get_event will be called on it in order to
/// potentially provide an event for consumers to synchronize on.
///
/// \param[in] array Array object to export
/// \param[in] sync shared_ptr to object derived from Device::SyncEvent or null
/// \param[out] out C struct to export the array to
/// \param[out] out_schema optional C struct to export the array type to
ARROW_EXPORT
Status ExportDeviceArray(const Array& array, std::shared_ptr<Device::SyncEvent> sync,
struct ArrowDeviceArray* out,
struct ArrowSchema* out_schema = NULLPTR);
/// \brief EXPERIMENTAL: Export C++ RecordBatch as an ArrowDeviceArray.
///
/// The record batch is exported as if it were a struct array.
/// The resulting ArrowDeviceArray struct keeps the record batch data and buffers alive
/// until its release callback is called by the consumer.
///
/// All buffers of all columns in the record batch must have the same device_type
/// otherwise an error will be returned. If columns are on different devices,
/// they should be exported using different ArrowDeviceArray instances.
///
/// If sync is non-null, get_event will be called on it in order to
/// potentially provide an event for consumers to synchronize on.
///
/// \param[in] batch Record batch to export
/// \param[in] sync shared_ptr to object derived from Device::SyncEvent or null
/// \param[out] out C struct where to export the record batch
/// \param[out] out_schema optional C struct where to export the record batch schema
ARROW_EXPORT
Status ExportDeviceRecordBatch(const RecordBatch& batch,
std::shared_ptr<Device::SyncEvent> sync,
struct ArrowDeviceArray* out,
struct ArrowSchema* out_schema = NULLPTR);
using DeviceMemoryMapper =
std::function<Result<std::shared_ptr<MemoryManager>>(ArrowDeviceType, int64_t)>;
ARROW_EXPORT
Result<std::shared_ptr<MemoryManager>> DefaultDeviceMemoryMapper(
ArrowDeviceType device_type, int64_t device_id);
/// \brief EXPERIMENTAL: Import C++ device array from the C data interface.
///
/// The ArrowArray struct has its contents moved (as per the C data interface
/// specification) to a private object held alive by the resulting array. The
/// buffers of the Array are located on the device indicated by the device_type.
///
/// \param[in,out] array C data interface struct holding the array data
/// \param[in] type type of the imported array
/// \param[in] mapper A function to map device + id to memory manager. If not
/// specified, defaults to map "cpu" to the built-in default memory manager.
/// \return Imported array object
ARROW_EXPORT
Result<std::shared_ptr<Array>> ImportDeviceArray(
struct ArrowDeviceArray* array, std::shared_ptr<DataType> type,
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
/// \brief EXPERIMENTAL: Import C++ device array and its type from the C data interface.
///
/// The ArrowArray struct has its contents moved (as per the C data interface
/// specification) to a private object held alive by the resulting array.
/// The ArrowSchema struct is released, even if this function fails. The
/// buffers of the Array are located on the device indicated by the device_type.
///
/// \param[in,out] array C data interface struct holding the array data
/// \param[in,out] type C data interface struct holding the array type
/// \param[in] mapper A function to map device + id to memory manager. If not
/// specified, defaults to map "cpu" to the built-in default memory manager.
/// \return Imported array object
ARROW_EXPORT
Result<std::shared_ptr<Array>> ImportDeviceArray(
struct ArrowDeviceArray* array, struct ArrowSchema* type,
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
/// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device from the C data
/// interface.
///
/// The ArrowArray struct has its contents moved (as per the C data interface
/// specification) to a private object held alive by the resulting record batch.
/// The buffers of all columns of the record batch are located on the device
/// indicated by the device type.
///
/// \param[in,out] array C data interface struct holding the record batch data
/// \param[in] schema schema of the imported record batch
/// \param[in] mapper A function to map device + id to memory manager. If not
/// specified, defaults to map "cpu" to the built-in default memory manager.
/// \return Imported record batch object
ARROW_EXPORT
Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
struct ArrowDeviceArray* array, std::shared_ptr<Schema> schema,
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
/// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device and its schema
/// from the C data interface.
///
/// The type represented by the ArrowSchema struct must be a struct type array.
/// The ArrowArray struct has its contents moved (as per the C data interface
/// specification) to a private object held alive by the resulting record batch.
/// The ArrowSchema struct is released, even if this function fails. The buffers
/// of all columns of the record batch are located on the device indicated by the
/// device type.
///
/// \param[in,out] array C data interface struct holding the record batch data
/// \param[in,out] schema C data interface struct holding the record batch schema
/// \param[in] mapper A function to map device + id to memory manager. If not
/// specified, defaults to map "cpu" to the built-in default memory manager.
/// \return Imported record batch object
ARROW_EXPORT
Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
struct ArrowDeviceArray* array, struct ArrowSchema* schema,
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
/// @}
/// \defgroup c-stream-interface Functions for working with the C data interface.
///
/// @{
/// \brief Export C++ RecordBatchReader using the C stream interface.
///
/// The resulting ArrowArrayStream struct keeps the record batch reader alive
/// until its release callback is called by the consumer.
///
/// \param[in] reader RecordBatchReader object to export
/// \param[out] out C struct where to export the stream
ARROW_EXPORT
Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
struct ArrowArrayStream* out);
/// \brief Export C++ ChunkedArray using the C data interface format.
///
/// The resulting ArrowArrayStream struct keeps the chunked array data and buffers alive
/// until its release callback is called by the consumer.
///
/// \param[in] chunked_array ChunkedArray object to export
/// \param[out] out C struct where to export the stream
ARROW_EXPORT
Status ExportChunkedArray(std::shared_ptr<ChunkedArray> chunked_array,
struct ArrowArrayStream* out);
/// \brief Export C++ RecordBatchReader using the C device stream interface
///
/// The resulting ArrowDeviceArrayStream struct keeps the record batch reader
/// alive until its release callback is called by the consumer. The device
/// type is determined by calling device_type() on the RecordBatchReader.
///
/// \param[in] reader RecordBatchReader object to export
/// \param[out] out C struct to export the stream to
ARROW_EXPORT
Status ExportDeviceRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
struct ArrowDeviceArrayStream* out);
/// \brief Export C++ ChunkedArray using the C device data interface format.
///
/// The resulting ArrowDeviceArrayStream keeps the chunked array data and buffers
/// alive until its release callback is called by the consumer.
///
/// \param[in] chunked_array ChunkedArray object to export
/// \param[in] device_type the device type the data is located on
/// \param[out] out C struct to export the stream to
ARROW_EXPORT
Status ExportDeviceChunkedArray(std::shared_ptr<ChunkedArray> chunked_array,
DeviceAllocationType device_type,
struct ArrowDeviceArrayStream* out);
/// \brief Import C++ RecordBatchReader from the C stream interface.
///
/// The ArrowArrayStream struct has its contents moved to a private object
/// held alive by the resulting record batch reader.
///
/// \param[in,out] stream C stream interface struct
/// \return Imported RecordBatchReader object
ARROW_EXPORT
Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
struct ArrowArrayStream* stream);
/// \brief Import C++ ChunkedArray from the C stream interface
///
/// The ArrowArrayStream struct has its contents moved to a private object,
/// is consumed in its entirity, and released before returning all chunks
/// as a ChunkedArray.
///
/// \param[in,out] stream C stream interface struct
/// \return Imported ChunkedArray object
ARROW_EXPORT
Result<std::shared_ptr<ChunkedArray>> ImportChunkedArray(struct ArrowArrayStream* stream);
/// \brief Import C++ RecordBatchReader from the C device stream interface
///
/// The ArrowDeviceArrayStream struct has its contents moved to a private object
/// held alive by the resulting record batch reader.
///
/// \note If there was a required sync event, sync events are accessible by individual
/// buffers of columns. We are not yet bubbling the sync events from the buffers up to
/// the `GetSyncEvent` method of an imported RecordBatch. This will be added in a future
/// update.
///
/// \param[in,out] stream C device stream interface struct
/// \param[in] mapper mapping from device type and ID to memory manager
/// \return Imported RecordBatchReader object
ARROW_EXPORT
Result<std::shared_ptr<RecordBatchReader>> ImportDeviceRecordBatchReader(
struct ArrowDeviceArrayStream* stream,
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
/// \brief Import C++ ChunkedArray from the C device stream interface
///
/// The ArrowDeviceArrayStream struct has its contents moved to a private object,
/// is consumed in its entirety, and released before returning all chunks as a
/// ChunkedArray.
///
/// \note Any chunks that require synchronization for their device memory will have
/// the SyncEvent objects available by checking the individual buffers of each chunk.
/// These SyncEvents should be checked before accessing the data in those buffers.
///
/// \param[in,out] stream C device stream interface struct
/// \param[in] mapper mapping from device type and ID to memory manager
/// \return Imported ChunkedArray object
ARROW_EXPORT
Result<std::shared_ptr<ChunkedArray>> ImportDeviceChunkedArray(
struct ArrowDeviceArrayStream* stream,
const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper);
/// @}
/// \defgroup c-async-stream-interface Functions for working with the async C data
/// interface.
///
/// @{
/// \brief EXPERIMENTAL: AsyncErrorDetail is a StatusDetail that contains an error code
/// and message from an asynchronous operation.
class AsyncErrorDetail : public StatusDetail {
public:
AsyncErrorDetail(int code, std::string message, std::string metadata)
: code_(code), message_(std::move(message)), metadata_(std::move(metadata)) {}
const char* type_id() const override { return "AsyncErrorDetail"; }
// ToString just returns the error message that was returned with the error
std::string ToString() const override { return message_; }
// code is an errno-compatible error code
int code() const { return code_; }
// returns any metadata that was returned with the error, likely in a
// key-value format similar to ArrowSchema metadata
const std::string& ErrorMetadataString() const { return metadata_; }
std::shared_ptr<KeyValueMetadata> ErrorMetadata() const;
private:
int code_{0};
std::string message_;
std::string metadata_;
};
struct AsyncRecordBatchGenerator {
std::shared_ptr<Schema> schema;
DeviceAllocationType device_type;
AsyncGenerator<RecordBatchWithMetadata> generator;
};
namespace internal {
class Executor;
}
/// \brief EXPERIMENTAL: Create an AsyncRecordBatchReader and populate a corresponding
/// handler to pass to a producer
///
/// The ArrowAsyncDeviceStreamHandler struct is intended to have its callbacks populated
/// and then be passed to a producer to call the appropriate callbacks when data is ready.
/// This inverts the traditional flow of control, and so we construct a corresponding
/// AsyncRecordBatchGenerator to provide an interface for the consumer to retrieve data as
/// it is pushed to the handler.
///
/// \param[in,out] handler C struct to be populated
/// \param[in] executor the executor to use for waiting and populating record batches
/// \param[in] queue_size initial number of record batches to request for queueing
/// \param[in] mapper mapping from device type and ID to memory manager
/// \return Future that resolves to either an error or AsyncRecordBatchGenerator once a
/// schema is available or an error is received.
ARROW_EXPORT
Future<AsyncRecordBatchGenerator> CreateAsyncDeviceStreamHandler(
struct ArrowAsyncDeviceStreamHandler* handler, internal::Executor* executor,
uint64_t queue_size = 5, DeviceMemoryMapper mapper = DefaultDeviceMemoryMapper);
/// \brief EXPERIMENTAL: Export an AsyncGenerator of record batches using a provided
/// handler
///
/// This function calls the callbacks on the consumer-provided async handler as record
/// batches become available from the AsyncGenerator which is provided. It will first call
/// on_schema using the provided schema, and then serially visit each record batch from
/// the generator, calling the on_next_task callback. If an error occurs, on_error will be
/// called appropriately.
///
/// \param[in] schema the schema of the stream being exported
/// \param[in] generator a generator that asynchronously produces record batches
/// \param[in] device_type the device type that the record batches will be located on
/// \param[in] handler the handler whose callbacks to utilize as data is available
/// \return Future that will resolve once the generator is exhausted or an error occurs
ARROW_EXPORT
Future<> ExportAsyncRecordBatchReader(
std::shared_ptr<Schema> schema,
AsyncGenerator<std::shared_ptr<RecordBatch>> generator,
DeviceAllocationType device_type, struct ArrowAsyncDeviceStreamHandler* handler);
/// @}
} // namespace arrow

View File

@@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "arrow/array/array_base.h"
#include "arrow/c/dlpack_abi.h"
namespace arrow::dlpack {
/// \brief Export Arrow array as DLPack tensor.
///
/// DLMangedTensor is produced as defined by the DLPack protocol,
/// see https://dmlc.github.io/dlpack/latest/.
///
/// Data types for which the protocol is supported are
/// integer and floating-point data types.
///
/// DLPack protocol only supports arrays with one contiguous
/// memory region which means Arrow Arrays with validity buffers
/// are not supported.
///
/// \param[in] arr Arrow array
/// \return DLManagedTensor struct
ARROW_EXPORT
Result<DLManagedTensor*> ExportArray(const std::shared_ptr<Array>& arr);
ARROW_EXPORT
Result<DLManagedTensor*> ExportTensor(const std::shared_ptr<Tensor>& t);
/// \brief Get DLDevice with enumerator specifying the
/// type of the device data is stored on and index of the
/// device which is 0 by default for CPU.
///
/// \param[in] arr Arrow array
/// \return DLDevice struct
ARROW_EXPORT
Result<DLDevice> ExportDevice(const std::shared_ptr<Array>& arr);
ARROW_EXPORT
Result<DLDevice> ExportDevice(const std::shared_ptr<Tensor>& t);
} // namespace arrow::dlpack

View File

@@ -0,0 +1,321 @@
// Taken from:
// https://github.com/dmlc/dlpack/blob/ca4d00ad3e2e0f410eeab3264d21b8a39397f362/include/dlpack/dlpack.h
/*!
* Copyright (c) 2017 by Contributors
* \file dlpack.h
* \brief The common header of DLPack.
*/
#ifndef DLPACK_DLPACK_H_
#define DLPACK_DLPACK_H_
/**
* \brief Compatibility with C++
*/
#ifdef __cplusplus
# define DLPACK_EXTERN_C extern "C"
#else
# define DLPACK_EXTERN_C
#endif
/*! \brief The current major version of dlpack */
#define DLPACK_MAJOR_VERSION 1
/*! \brief The current minor version of dlpack */
#define DLPACK_MINOR_VERSION 0
/*! \brief DLPACK_DLL prefix for windows */
#ifdef _WIN32
# ifdef DLPACK_EXPORTS
# define DLPACK_DLL __declspec(dllexport)
# else
# define DLPACK_DLL __declspec(dllimport)
# endif
#else
# define DLPACK_DLL
#endif
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \brief The DLPack version.
*
* A change in major version indicates that we have changed the
* data layout of the ABI - DLManagedTensorVersioned.
*
* A change in minor version indicates that we have added new
* code, such as a new device type, but the ABI is kept the same.
*
* If an obtained DLPack tensor has a major version that disagrees
* with the version number specified in this header file
* (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
* (and it is safe to do so). It is not safe to access any other fields
* as the memory layout will have changed.
*
* In the case of a minor version mismatch, the tensor can be safely used as
* long as the consumer knows how to interpret all fields. Minor version
* updates indicate the addition of enumeration values.
*/
typedef struct {
/*! \brief DLPack major version. */
uint32_t major;
/*! \brief DLPack minor version. */
uint32_t minor;
} DLPackVersion;
/*!
* \brief The device type in DLDevice.
*/
#ifdef __cplusplus
typedef enum : int32_t {
#else
typedef enum {
#endif
/*! \brief CPU device */
kDLCPU = 1,
/*! \brief CUDA GPU device */
kDLCUDA = 2,
/*!
* \brief Pinned CUDA CPU memory by cudaMallocHost
*/
kDLCUDAHost = 3,
/*! \brief OpenCL devices. */
kDLOpenCL = 4,
/*! \brief Vulkan buffer for next generation graphics. */
kDLVulkan = 7,
/*! \brief Metal for Apple GPU. */
kDLMetal = 8,
/*! \brief Verilog simulator buffer */
kDLVPI = 9,
/*! \brief ROCm GPUs for AMD GPUs */
kDLROCM = 10,
/*!
* \brief Pinned ROCm CPU memory allocated by hipMallocHost
*/
kDLROCMHost = 11,
/*!
* \brief Reserved extension device type,
* used for quickly test extension device
* The semantics can differ depending on the implementation.
*/
kDLExtDev = 12,
/*!
* \brief CUDA managed/unified memory allocated by cudaMallocManaged
*/
kDLCUDAManaged = 13,
/*!
* \brief Unified shared memory allocated on a oneAPI non-partititioned
* device. Call to oneAPI runtime is required to determine the device
* type, the USM allocation type and the sycl context it is bound to.
*
*/
kDLOneAPI = 14,
/*! \brief GPU support for next generation WebGPU standard. */
kDLWebGPU = 15,
/*! \brief Qualcomm Hexagon DSP */
kDLHexagon = 16,
} DLDeviceType;
/*!
* \brief A Device for Tensor and operator.
*/
typedef struct {
/*! \brief The device type used in the device. */
DLDeviceType device_type;
/*!
* \brief The device index.
* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
*/
int32_t device_id;
} DLDevice;
/*!
* \brief The type code options DLDataType.
*/
typedef enum {
/*! \brief signed integer */
kDLInt = 0U,
/*! \brief unsigned integer */
kDLUInt = 1U,
/*! \brief IEEE floating point */
kDLFloat = 2U,
/*!
* \brief Opaque handle type, reserved for testing purposes.
* Frameworks need to agree on the handle data type for the exchange to be well-defined.
*/
kDLOpaqueHandle = 3U,
/*! \brief bfloat16 */
kDLBfloat = 4U,
/*!
* \brief complex number
* (C/C++/Python layout: compact struct per complex number)
*/
kDLComplex = 5U,
/*! \brief boolean */
kDLBool = 6U,
} DLDataTypeCode;
/*!
* \brief The data type the tensor can hold. The data type is assumed to follow the
* native endian-ness. An explicit error message should be raised when attempting to
* export an array with non-native endianness
*
* Examples
* - float: type_code = 2, bits = 32, lanes = 1
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
* - int8: type_code = 0, bits = 8, lanes = 1
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention,
* the underlying storage size of bool is 8 bits)
*/
typedef struct {
/*!
* \brief Type code of base types.
* We keep it uint8_t instead of DLDataTypeCode for minimal memory
* footprint, but the value should be one of DLDataTypeCode enum values.
* */
uint8_t code;
/*!
* \brief Number of bits, common choices are 8, 16, 32.
*/
uint8_t bits;
/*! \brief Number of lanes in the type, used for vector types. */
uint16_t lanes;
} DLDataType;
/*!
* \brief Plain C Tensor object, does not manage memory.
*/
typedef struct {
/*!
* \brief The data pointer points to the allocated data. This will be CUDA
* device pointer or cl_mem handle in OpenCL. It may be opaque on some device
* types. This pointer is always aligned to 256 bytes as in CUDA. The
* `byte_offset` field should be used to point to the beginning of the data.
*
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
* (after which this note will be updated); at the moment it is recommended
* to not rely on the data pointer being correctly aligned.
*
* For given DLTensor, the size of memory required to store the contents of
* data is calculated as follows:
*
* \code{.c}
* static inline size_t GetDataSize(const DLTensor* t) {
* size_t size = 1;
* for (tvm_index_t i = 0; i < t->ndim; ++i) {
* size *= t->shape[i];
* }
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
* return size;
* }
* \endcode
*/
void* data;
/*! \brief The device of the tensor */
DLDevice device;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief The data type of the pointer*/
DLDataType dtype;
/*! \brief The shape of the tensor */
int64_t* shape;
/*!
* \brief strides of the tensor (in number of elements, not bytes)
* can be NULL, indicating tensor is compact and row-majored.
*/
int64_t* strides;
/*! \brief The offset in bytes to the beginning pointer to data */
uint64_t byte_offset;
} DLTensor;
/*!
* \brief C Tensor object, manage memory of DLTensor. This data structure is
* intended to facilitate the borrowing of DLTensor by another framework. It is
* not meant to transfer the tensor. When the borrowing framework doesn't need
* the tensor, it should call the deleter to notify the host that the resource
* is no longer needed.
*
* \note This data structure is used as Legacy DLManagedTensor
* in DLPack exchange and is deprecated after DLPack v0.8
* Use DLManagedTensorVersioned instead.
* This data structure may get renamed or deleted in future versions.
*
* \sa DLManagedTensorVersioned
*/
typedef struct DLManagedTensor {
/*! \brief DLTensor which is being memory managed */
DLTensor dl_tensor;
/*! \brief the context of the original host framework of DLManagedTensor in
* which DLManagedTensor is used in the framework. It can also be NULL.
*/
void* manager_ctx;
/*!
* \brief Destructor - this should be called
* to destruct the manager_ctx which backs the DLManagedTensor. It can be
* NULL if there is no way for the caller to provide a reasonable destructor.
* The destructors deletes the argument self as well.
*/
void (*deleter)(struct DLManagedTensor* self);
} DLManagedTensor;
// bit masks used in in the DLManagedTensorVersioned
/*! \brief bit mask to indicate that the tensor is read only. */
#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
/*!
* \brief A versioned and managed C Tensor object, manage memory of DLTensor.
*
* This data structure is intended to facilitate the borrowing of DLTensor by
* another framework. It is not meant to transfer the tensor. When the borrowing
* framework doesn't need the tensor, it should call the deleter to notify the
* host that the resource is no longer needed.
*
* \note This is the current standard DLPack exchange data structure.
*/
struct DLManagedTensorVersioned {
/*!
* \brief The API and ABI version of the current managed Tensor
*/
DLPackVersion version;
/*!
* \brief the context of the original host framework.
*
* Stores DLManagedTensorVersioned is used in the
* framework. It can also be NULL.
*/
void* manager_ctx;
/*!
* \brief Destructor.
*
* This should be called to destruct manager_ctx which holds the
* DLManagedTensorVersioned. It can be NULL if there is no way for the caller to provide
* a reasonable destructor. The destructors deletes the argument self as well.
*/
void (*deleter)(struct DLManagedTensorVersioned* self);
/*!
* \brief Additional bitmask flags information about the tensor.
*
* By default the flags should be set to 0.
*
* \note Future ABI changes should keep everything until this field
* stable, to ensure that deleter can be correctly called.
*
* \sa DLPACK_FLAG_BITMASK_READ_ONLY
*/
uint64_t flags;
/*! \brief DLTensor which is being memory managed */
DLTensor dl_tensor;
};
#ifdef __cplusplus
} // DLPACK_EXTERN_C
#endif
#endif // DLPACK_DLPACK_H_

View File

@@ -0,0 +1,178 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "arrow/c/abi.h"
#define ARROW_C_ASSERT(condition, msg) \
do { \
if (!(condition)) { \
fprintf(stderr, "%s:%d:: %s", __FILE__, __LINE__, (msg)); \
abort(); \
} \
} while (0)
#ifdef __cplusplus
extern "C" {
#endif
/// Query whether the C schema is released
inline int ArrowSchemaIsReleased(const struct ArrowSchema* schema) {
return schema->release == NULL;
}
/// Mark the C schema released (for use in release callbacks)
inline void ArrowSchemaMarkReleased(struct ArrowSchema* schema) {
schema->release = NULL;
}
/// Move the C schema from `src` to `dest`
///
/// Note `dest` must *not* point to a valid schema already, otherwise there
/// will be a memory leak.
inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) {
assert(dest != src);
assert(!ArrowSchemaIsReleased(src));
memcpy(dest, src, sizeof(struct ArrowSchema));
ArrowSchemaMarkReleased(src);
}
/// Release the C schema, if necessary, by calling its release callback
inline void ArrowSchemaRelease(struct ArrowSchema* schema) {
if (!ArrowSchemaIsReleased(schema)) {
schema->release(schema);
ARROW_C_ASSERT(ArrowSchemaIsReleased(schema),
"ArrowSchemaRelease did not cleanup release callback");
}
}
/// Query whether the C array is released
inline int ArrowArrayIsReleased(const struct ArrowArray* array) {
return array->release == NULL;
}
inline int ArrowDeviceArrayIsReleased(const struct ArrowDeviceArray* array) {
return ArrowArrayIsReleased(&array->array);
}
/// Mark the C array released (for use in release callbacks)
inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; }
inline void ArrowDeviceArrayMarkReleased(struct ArrowDeviceArray* array) {
ArrowArrayMarkReleased(&array->array);
}
/// Move the C array from `src` to `dest`
///
/// Note `dest` must *not* point to a valid array already, otherwise there
/// will be a memory leak.
inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) {
assert(dest != src);
assert(!ArrowArrayIsReleased(src));
memcpy(dest, src, sizeof(struct ArrowArray));
ArrowArrayMarkReleased(src);
}
inline void ArrowDeviceArrayMove(struct ArrowDeviceArray* src,
struct ArrowDeviceArray* dest) {
assert(dest != src);
assert(!ArrowDeviceArrayIsReleased(src));
memcpy(dest, src, sizeof(struct ArrowDeviceArray));
ArrowDeviceArrayMarkReleased(src);
}
/// Release the C array, if necessary, by calling its release callback
inline void ArrowArrayRelease(struct ArrowArray* array) {
if (!ArrowArrayIsReleased(array)) {
array->release(array);
ARROW_C_ASSERT(ArrowArrayIsReleased(array),
"ArrowArrayRelease did not cleanup release callback");
}
}
inline void ArrowDeviceArrayRelease(struct ArrowDeviceArray* array) {
if (!ArrowDeviceArrayIsReleased(array)) {
array->array.release(&array->array);
ARROW_C_ASSERT(ArrowDeviceArrayIsReleased(array),
"ArrowDeviceArrayRelease did not cleanup release callback");
}
}
/// Query whether the C array stream is released
inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) {
return stream->release == NULL;
}
inline int ArrowDeviceArrayStreamIsReleased(const struct ArrowDeviceArrayStream* stream) {
return stream->release == NULL;
}
/// Mark the C array stream released (for use in release callbacks)
inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) {
stream->release = NULL;
}
inline void ArrowDeviceArrayStreamMarkReleased(struct ArrowDeviceArrayStream* stream) {
stream->release = NULL;
}
/// Move the C array stream from `src` to `dest`
///
/// Note `dest` must *not* point to a valid stream already, otherwise there
/// will be a memory leak.
inline void ArrowArrayStreamMove(struct ArrowArrayStream* src,
struct ArrowArrayStream* dest) {
assert(dest != src);
assert(!ArrowArrayStreamIsReleased(src));
memcpy(dest, src, sizeof(struct ArrowArrayStream));
ArrowArrayStreamMarkReleased(src);
}
inline void ArrowDeviceArrayStreamMove(struct ArrowDeviceArrayStream* src,
struct ArrowDeviceArrayStream* dest) {
assert(dest != src);
assert(!ArrowDeviceArrayStreamIsReleased(src));
memcpy(dest, src, sizeof(struct ArrowDeviceArrayStream));
ArrowDeviceArrayStreamMarkReleased(src);
}
/// Release the C array stream, if necessary, by calling its release callback
inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) {
if (!ArrowArrayStreamIsReleased(stream)) {
stream->release(stream);
ARROW_C_ASSERT(ArrowArrayStreamIsReleased(stream),
"ArrowArrayStreamRelease did not cleanup release callback");
}
}
inline void ArrowDeviceArrayStreamRelease(struct ArrowDeviceArrayStream* stream) {
if (!ArrowDeviceArrayStreamIsReleased(stream)) {
stream->release(stream);
ARROW_C_ASSERT(ArrowDeviceArrayStreamIsReleased(stream),
"ArrowDeviceArrayStreamRelease did not cleanup release callback");
}
}
#ifdef __cplusplus
}
#endif