Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,60 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "arrow/extension_type.h"
namespace arrow::extension {
/// \brief Bool8 is an alternate representation for boolean
/// arrays using 8 bits instead of 1 bit per value. The underlying
/// storage type is int8.
class ARROW_EXPORT Bool8Array : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
};
/// \brief Bool8 is an alternate representation for boolean
/// arrays using 8 bits instead of 1 bit per value. The underlying
/// storage type is int8.
class ARROW_EXPORT Bool8Type : public ExtensionType {
public:
/// \brief Construct a Bool8Type.
Bool8Type() : ExtensionType(int8()) {}
std::string extension_name() const override { return "arrow.bool8"; }
std::string ToString(bool show_metadata = false) const override;
bool ExtensionEquals(const ExtensionType& other) const override;
std::string Serialize() const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized_data) const override;
/// Create a Bool8Array from ArrayData
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
static Result<std::shared_ptr<DataType>> Make();
};
/// \brief Return a Bool8Type instance.
ARROW_EXPORT std::shared_ptr<DataType> bool8();
} // namespace arrow::extension

View File

@@ -0,0 +1,130 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "arrow/extension_type.h"
namespace arrow {
namespace extension {
class ARROW_EXPORT FixedShapeTensorArray : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
/// \brief Create a FixedShapeTensorArray from a Tensor
///
/// This method will create a FixedShapeTensorArray from a Tensor, taking its first
/// dimension as the number of elements in the resulting array and the remaining
/// dimensions as the shape of the individual tensors. If Tensor provides strides,
/// they will be used to determine dimension permutation. Otherwise, row-major layout
/// (i.e. no permutation) will be assumed.
///
/// \param[in] tensor The Tensor to convert to a FixedShapeTensorArray
static Result<std::shared_ptr<FixedShapeTensorArray>> FromTensor(
const std::shared_ptr<Tensor>& tensor);
/// \brief Create a Tensor from FixedShapeTensorArray
///
/// This method will create a Tensor from a FixedShapeTensorArray, setting its first
/// dimension as length equal to the FixedShapeTensorArray's length and the remaining
/// dimensions as the FixedShapeTensorType's shape. Shape and dim_names will be
/// permuted according to permutation stored in the FixedShapeTensorType metadata.
const Result<std::shared_ptr<Tensor>> ToTensor() const;
};
/// \brief Concrete type class for constant-size Tensor data.
/// This is a canonical arrow extension type.
/// See: https://arrow.apache.org/docs/format/CanonicalExtensions.html
class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
public:
FixedShapeTensorType(const std::shared_ptr<DataType>& value_type, const int32_t& size,
const std::vector<int64_t>& shape,
const std::vector<int64_t>& permutation = {},
const std::vector<std::string>& dim_names = {})
: ExtensionType(fixed_size_list(value_type, size)),
value_type_(value_type),
shape_(shape),
permutation_(permutation),
dim_names_(dim_names) {}
std::string extension_name() const override { return "arrow.fixed_shape_tensor"; }
std::string ToString(bool show_metadata = false) const override;
/// Number of dimensions of tensor elements
size_t ndim() const { return shape_.size(); }
/// Shape of tensor elements
const std::vector<int64_t>& shape() const { return shape_; }
/// Value type of tensor elements
const std::shared_ptr<DataType>& value_type() const { return value_type_; }
/// Strides of tensor elements. Strides state offset in bytes between adjacent
/// elements along each dimension. In case permutation is non-empty strides are
/// computed from permuted tensor element's shape.
const std::vector<int64_t>& strides();
/// Permutation mapping from logical to physical memory layout of tensor elements
const std::vector<int64_t>& permutation() const { return permutation_; }
/// Dimension names of tensor elements. Dimensions are ordered physically.
const std::vector<std::string>& dim_names() const { return dim_names_; }
bool ExtensionEquals(const ExtensionType& other) const override;
std::string Serialize() const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized_data) const override;
/// Create a FixedShapeTensorArray from ArrayData
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
/// \brief Create a Tensor from an ExtensionScalar from a FixedShapeTensorArray
///
/// This method will return a Tensor from ExtensionScalar with strides
/// derived from shape and permutation of FixedShapeTensorType. Shape and
/// dim_names will be permuted according to permutation stored in the
/// FixedShapeTensorType metadata.
static Result<std::shared_ptr<Tensor>> MakeTensor(
const std::shared_ptr<ExtensionScalar>& scalar);
/// \brief Create a FixedShapeTensorType instance
static Result<std::shared_ptr<DataType>> Make(
const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& shape,
const std::vector<int64_t>& permutation = {},
const std::vector<std::string>& dim_names = {});
private:
std::shared_ptr<DataType> storage_type_;
std::shared_ptr<DataType> value_type_;
std::vector<int64_t> shape_;
std::vector<int64_t> strides_;
std::vector<int64_t> permutation_;
std::vector<std::string> dim_names_;
};
/// \brief Return a FixedShapeTensorType instance.
ARROW_EXPORT std::shared_ptr<DataType> fixed_shape_tensor(
const std::shared_ptr<DataType>& storage_type, const std::vector<int64_t>& shape,
const std::vector<int64_t>& permutation = {},
const std::vector<std::string>& dim_names = {});
} // namespace extension
} // namespace arrow

View File

@@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <stdexcept>
#include <string>
#include "arrow/extension_type.h"
#include "arrow/result.h"
#include "arrow/type_fwd.h"
#include "arrow/util/visibility.h"
namespace arrow::extension {
/// \brief Concrete type class for variable-size JSON data, utf8-encoded.
class ARROW_EXPORT JsonExtensionType : public ExtensionType {
public:
explicit JsonExtensionType(const std::shared_ptr<DataType>& storage_type)
: ExtensionType(storage_type) {}
std::string extension_name() const override { return "arrow.json"; }
bool ExtensionEquals(const ExtensionType& other) const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized_data) const override;
std::string Serialize() const override;
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
static Result<std::shared_ptr<DataType>> Make(std::shared_ptr<DataType> storage_type);
static bool IsSupportedStorageType(Type::type type_id);
};
/// \brief Return a JsonExtensionType instance.
ARROW_EXPORT std::shared_ptr<DataType> json(
std::shared_ptr<DataType> storage_type = utf8());
} // namespace arrow::extension

View File

@@ -0,0 +1,71 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "arrow/extension_type.h"
#include "arrow/type.h"
namespace arrow::extension {
/// \brief Opaque is a placeholder for a type from an external (usually
/// non-Arrow) system that could not be interpreted.
class ARROW_EXPORT OpaqueType : public ExtensionType {
public:
/// \brief Construct an OpaqueType.
///
/// \param[in] storage_type The underlying storage type. Should be
/// arrow::null if there is no data.
/// \param[in] type_name The name of the type in the external system.
/// \param[in] vendor_name The name of the external system.
explicit OpaqueType(std::shared_ptr<DataType> storage_type, std::string type_name,
std::string vendor_name)
: ExtensionType(std::move(storage_type)),
type_name_(std::move(type_name)),
vendor_name_(std::move(vendor_name)) {}
std::string extension_name() const override { return "arrow.opaque"; }
std::string ToString(bool show_metadata) const override;
bool ExtensionEquals(const ExtensionType& other) const override;
std::string Serialize() const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized_data) const override;
/// Create an OpaqueArray from ArrayData
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
std::string_view type_name() const { return type_name_; }
std::string_view vendor_name() const { return vendor_name_; }
private:
std::string type_name_;
std::string vendor_name_;
};
/// \brief Opaque is a wrapper for (usually binary) data from an external
/// (often non-Arrow) system that could not be interpreted.
class ARROW_EXPORT OpaqueArray : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
};
/// \brief Return an OpaqueType instance.
ARROW_EXPORT std::shared_ptr<DataType> opaque(std::shared_ptr<DataType> storage_type,
std::string type_name,
std::string vendor_name);
} // namespace arrow::extension

View File

@@ -0,0 +1,63 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "arrow/extension_type.h"
namespace arrow::extension {
/// \brief UuidArray stores array of UUIDs. Underlying storage type is
/// FixedSizeBinary(16).
class ARROW_EXPORT UuidArray : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;
};
/// \brief UuidType is a canonical arrow extension type for UUIDs.
/// UUIDs are stored as FixedSizeBinary(16) with big-endian notation and this
/// does not interpret the bytes in any way. Specific UUID version is not
/// required or guaranteed.
class ARROW_EXPORT UuidType : public ExtensionType {
public:
/// \brief Construct a UuidType.
UuidType() : ExtensionType(fixed_size_binary(16)) {}
std::string extension_name() const override { return "arrow.uuid"; }
std::string ToString(bool show_metadata = false) const override;
bool ExtensionEquals(const ExtensionType& other) const override;
/// Create a UuidArray from ArrayData
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
Result<std::shared_ptr<DataType>> Deserialize(
std::shared_ptr<DataType> storage_type,
const std::string& serialized) const override;
std::string Serialize() const override { return ""; }
/// \brief Create a UuidType instance
static Result<std::shared_ptr<DataType>> Make() { return std::make_shared<UuidType>(); }
static bool IsSupportedStorageType(const std::shared_ptr<DataType>& storage_type);
};
/// \brief Return a UuidType instance.
ARROW_EXPORT std::shared_ptr<DataType> uuid();
} // namespace arrow::extension