Initial commit
This commit is contained in:
@@ -0,0 +1,60 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/extension_type.h"
|
||||
|
||||
namespace arrow::extension {
|
||||
|
||||
/// \brief Bool8 is an alternate representation for boolean
|
||||
/// arrays using 8 bits instead of 1 bit per value. The underlying
|
||||
/// storage type is int8.
|
||||
class ARROW_EXPORT Bool8Array : public ExtensionArray {
|
||||
public:
|
||||
using ExtensionArray::ExtensionArray;
|
||||
};
|
||||
|
||||
/// \brief Bool8 is an alternate representation for boolean
|
||||
/// arrays using 8 bits instead of 1 bit per value. The underlying
|
||||
/// storage type is int8.
|
||||
class ARROW_EXPORT Bool8Type : public ExtensionType {
|
||||
public:
|
||||
/// \brief Construct a Bool8Type.
|
||||
Bool8Type() : ExtensionType(int8()) {}
|
||||
|
||||
std::string extension_name() const override { return "arrow.bool8"; }
|
||||
std::string ToString(bool show_metadata = false) const override;
|
||||
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
|
||||
std::string Serialize() const override;
|
||||
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized_data) const override;
|
||||
|
||||
/// Create a Bool8Array from ArrayData
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
static Result<std::shared_ptr<DataType>> Make();
|
||||
};
|
||||
|
||||
/// \brief Return a Bool8Type instance.
|
||||
ARROW_EXPORT std::shared_ptr<DataType> bool8();
|
||||
|
||||
} // namespace arrow::extension
|
||||
@@ -0,0 +1,130 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/extension_type.h"
|
||||
|
||||
namespace arrow {
|
||||
namespace extension {
|
||||
|
||||
class ARROW_EXPORT FixedShapeTensorArray : public ExtensionArray {
|
||||
public:
|
||||
using ExtensionArray::ExtensionArray;
|
||||
|
||||
/// \brief Create a FixedShapeTensorArray from a Tensor
|
||||
///
|
||||
/// This method will create a FixedShapeTensorArray from a Tensor, taking its first
|
||||
/// dimension as the number of elements in the resulting array and the remaining
|
||||
/// dimensions as the shape of the individual tensors. If Tensor provides strides,
|
||||
/// they will be used to determine dimension permutation. Otherwise, row-major layout
|
||||
/// (i.e. no permutation) will be assumed.
|
||||
///
|
||||
/// \param[in] tensor The Tensor to convert to a FixedShapeTensorArray
|
||||
static Result<std::shared_ptr<FixedShapeTensorArray>> FromTensor(
|
||||
const std::shared_ptr<Tensor>& tensor);
|
||||
|
||||
/// \brief Create a Tensor from FixedShapeTensorArray
|
||||
///
|
||||
/// This method will create a Tensor from a FixedShapeTensorArray, setting its first
|
||||
/// dimension as length equal to the FixedShapeTensorArray's length and the remaining
|
||||
/// dimensions as the FixedShapeTensorType's shape. Shape and dim_names will be
|
||||
/// permuted according to permutation stored in the FixedShapeTensorType metadata.
|
||||
const Result<std::shared_ptr<Tensor>> ToTensor() const;
|
||||
};
|
||||
|
||||
/// \brief Concrete type class for constant-size Tensor data.
|
||||
/// This is a canonical arrow extension type.
|
||||
/// See: https://arrow.apache.org/docs/format/CanonicalExtensions.html
|
||||
class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
|
||||
public:
|
||||
FixedShapeTensorType(const std::shared_ptr<DataType>& value_type, const int32_t& size,
|
||||
const std::vector<int64_t>& shape,
|
||||
const std::vector<int64_t>& permutation = {},
|
||||
const std::vector<std::string>& dim_names = {})
|
||||
: ExtensionType(fixed_size_list(value_type, size)),
|
||||
value_type_(value_type),
|
||||
shape_(shape),
|
||||
permutation_(permutation),
|
||||
dim_names_(dim_names) {}
|
||||
|
||||
std::string extension_name() const override { return "arrow.fixed_shape_tensor"; }
|
||||
std::string ToString(bool show_metadata = false) const override;
|
||||
|
||||
/// Number of dimensions of tensor elements
|
||||
size_t ndim() const { return shape_.size(); }
|
||||
|
||||
/// Shape of tensor elements
|
||||
const std::vector<int64_t>& shape() const { return shape_; }
|
||||
|
||||
/// Value type of tensor elements
|
||||
const std::shared_ptr<DataType>& value_type() const { return value_type_; }
|
||||
|
||||
/// Strides of tensor elements. Strides state offset in bytes between adjacent
|
||||
/// elements along each dimension. In case permutation is non-empty strides are
|
||||
/// computed from permuted tensor element's shape.
|
||||
const std::vector<int64_t>& strides();
|
||||
|
||||
/// Permutation mapping from logical to physical memory layout of tensor elements
|
||||
const std::vector<int64_t>& permutation() const { return permutation_; }
|
||||
|
||||
/// Dimension names of tensor elements. Dimensions are ordered physically.
|
||||
const std::vector<std::string>& dim_names() const { return dim_names_; }
|
||||
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
|
||||
std::string Serialize() const override;
|
||||
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized_data) const override;
|
||||
|
||||
/// Create a FixedShapeTensorArray from ArrayData
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
/// \brief Create a Tensor from an ExtensionScalar from a FixedShapeTensorArray
|
||||
///
|
||||
/// This method will return a Tensor from ExtensionScalar with strides
|
||||
/// derived from shape and permutation of FixedShapeTensorType. Shape and
|
||||
/// dim_names will be permuted according to permutation stored in the
|
||||
/// FixedShapeTensorType metadata.
|
||||
static Result<std::shared_ptr<Tensor>> MakeTensor(
|
||||
const std::shared_ptr<ExtensionScalar>& scalar);
|
||||
|
||||
/// \brief Create a FixedShapeTensorType instance
|
||||
static Result<std::shared_ptr<DataType>> Make(
|
||||
const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& shape,
|
||||
const std::vector<int64_t>& permutation = {},
|
||||
const std::vector<std::string>& dim_names = {});
|
||||
|
||||
private:
|
||||
std::shared_ptr<DataType> storage_type_;
|
||||
std::shared_ptr<DataType> value_type_;
|
||||
std::vector<int64_t> shape_;
|
||||
std::vector<int64_t> strides_;
|
||||
std::vector<int64_t> permutation_;
|
||||
std::vector<std::string> dim_names_;
|
||||
};
|
||||
|
||||
/// \brief Return a FixedShapeTensorType instance.
|
||||
ARROW_EXPORT std::shared_ptr<DataType> fixed_shape_tensor(
|
||||
const std::shared_ptr<DataType>& storage_type, const std::vector<int64_t>& shape,
|
||||
const std::vector<int64_t>& permutation = {},
|
||||
const std::vector<std::string>& dim_names = {});
|
||||
|
||||
} // namespace extension
|
||||
} // namespace arrow
|
||||
@@ -0,0 +1,57 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "arrow/extension_type.h"
|
||||
#include "arrow/result.h"
|
||||
#include "arrow/type_fwd.h"
|
||||
#include "arrow/util/visibility.h"
|
||||
|
||||
namespace arrow::extension {
|
||||
|
||||
/// \brief Concrete type class for variable-size JSON data, utf8-encoded.
|
||||
class ARROW_EXPORT JsonExtensionType : public ExtensionType {
|
||||
public:
|
||||
explicit JsonExtensionType(const std::shared_ptr<DataType>& storage_type)
|
||||
: ExtensionType(storage_type) {}
|
||||
|
||||
std::string extension_name() const override { return "arrow.json"; }
|
||||
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized_data) const override;
|
||||
|
||||
std::string Serialize() const override;
|
||||
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
static Result<std::shared_ptr<DataType>> Make(std::shared_ptr<DataType> storage_type);
|
||||
|
||||
static bool IsSupportedStorageType(Type::type type_id);
|
||||
};
|
||||
|
||||
/// \brief Return a JsonExtensionType instance.
|
||||
ARROW_EXPORT std::shared_ptr<DataType> json(
|
||||
std::shared_ptr<DataType> storage_type = utf8());
|
||||
|
||||
} // namespace arrow::extension
|
||||
@@ -0,0 +1,71 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/extension_type.h"
|
||||
#include "arrow/type.h"
|
||||
|
||||
namespace arrow::extension {
|
||||
|
||||
/// \brief Opaque is a placeholder for a type from an external (usually
|
||||
/// non-Arrow) system that could not be interpreted.
|
||||
class ARROW_EXPORT OpaqueType : public ExtensionType {
|
||||
public:
|
||||
/// \brief Construct an OpaqueType.
|
||||
///
|
||||
/// \param[in] storage_type The underlying storage type. Should be
|
||||
/// arrow::null if there is no data.
|
||||
/// \param[in] type_name The name of the type in the external system.
|
||||
/// \param[in] vendor_name The name of the external system.
|
||||
explicit OpaqueType(std::shared_ptr<DataType> storage_type, std::string type_name,
|
||||
std::string vendor_name)
|
||||
: ExtensionType(std::move(storage_type)),
|
||||
type_name_(std::move(type_name)),
|
||||
vendor_name_(std::move(vendor_name)) {}
|
||||
|
||||
std::string extension_name() const override { return "arrow.opaque"; }
|
||||
std::string ToString(bool show_metadata) const override;
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
std::string Serialize() const override;
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized_data) const override;
|
||||
/// Create an OpaqueArray from ArrayData
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
std::string_view type_name() const { return type_name_; }
|
||||
std::string_view vendor_name() const { return vendor_name_; }
|
||||
|
||||
private:
|
||||
std::string type_name_;
|
||||
std::string vendor_name_;
|
||||
};
|
||||
|
||||
/// \brief Opaque is a wrapper for (usually binary) data from an external
|
||||
/// (often non-Arrow) system that could not be interpreted.
|
||||
class ARROW_EXPORT OpaqueArray : public ExtensionArray {
|
||||
public:
|
||||
using ExtensionArray::ExtensionArray;
|
||||
};
|
||||
|
||||
/// \brief Return an OpaqueType instance.
|
||||
ARROW_EXPORT std::shared_ptr<DataType> opaque(std::shared_ptr<DataType> storage_type,
|
||||
std::string type_name,
|
||||
std::string vendor_name);
|
||||
|
||||
} // namespace arrow::extension
|
||||
@@ -0,0 +1,63 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "arrow/extension_type.h"
|
||||
|
||||
namespace arrow::extension {
|
||||
|
||||
/// \brief UuidArray stores array of UUIDs. Underlying storage type is
|
||||
/// FixedSizeBinary(16).
|
||||
class ARROW_EXPORT UuidArray : public ExtensionArray {
|
||||
public:
|
||||
using ExtensionArray::ExtensionArray;
|
||||
};
|
||||
|
||||
/// \brief UuidType is a canonical arrow extension type for UUIDs.
|
||||
/// UUIDs are stored as FixedSizeBinary(16) with big-endian notation and this
|
||||
/// does not interpret the bytes in any way. Specific UUID version is not
|
||||
/// required or guaranteed.
|
||||
class ARROW_EXPORT UuidType : public ExtensionType {
|
||||
public:
|
||||
/// \brief Construct a UuidType.
|
||||
UuidType() : ExtensionType(fixed_size_binary(16)) {}
|
||||
|
||||
std::string extension_name() const override { return "arrow.uuid"; }
|
||||
std::string ToString(bool show_metadata = false) const override;
|
||||
|
||||
bool ExtensionEquals(const ExtensionType& other) const override;
|
||||
|
||||
/// Create a UuidArray from ArrayData
|
||||
std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
|
||||
|
||||
Result<std::shared_ptr<DataType>> Deserialize(
|
||||
std::shared_ptr<DataType> storage_type,
|
||||
const std::string& serialized) const override;
|
||||
|
||||
std::string Serialize() const override { return ""; }
|
||||
|
||||
/// \brief Create a UuidType instance
|
||||
static Result<std::shared_ptr<DataType>> Make() { return std::make_shared<UuidType>(); }
|
||||
|
||||
static bool IsSupportedStorageType(const std::shared_ptr<DataType>& storage_type);
|
||||
};
|
||||
|
||||
/// \brief Return a UuidType instance.
|
||||
ARROW_EXPORT std::shared_ptr<DataType> uuid();
|
||||
|
||||
} // namespace arrow::extension
|
||||
Reference in New Issue
Block a user