rok commented on code in PR #8510: URL: https://github.com/apache/arrow/pull/8510#discussion_r1124320912
########## cpp/src/arrow/extension/fixed_shape_tensor.h: ########## @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <numeric> +#include <sstream> + +#include "arrow/extension_type.h" + +namespace arrow { +namespace extension { + +const std::shared_ptr<DataType> GetStorageType( + const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& shape); + +const std::vector<int64_t> ComputeStrides(const std::shared_ptr<DataType>& value_type, + const std::vector<int64_t>& shape, + const std::vector<int64_t>& permutation); + +class ARROW_EXPORT FixedShapeTensorArray : public ExtensionArray { + public: + using ExtensionArray::ExtensionArray; +}; + +/// \brief Concrete type class for constant-size Tensor data. +class ARROW_EXPORT FixedShapeTensorType : public ExtensionType { + public: + FixedShapeTensorType(const std::shared_ptr<DataType>& value_type, + const std::vector<int64_t>& shape, + const std::vector<int64_t>& permutation = {}, + const std::vector<std::string>& dim_names = {}) + : ExtensionType(GetStorageType(value_type, shape)), + value_type_(value_type), + shape_(shape), + strides_(ComputeStrides(value_type, shape, permutation)), + permutation_(permutation), + dim_names_(dim_names) {} + + std::string extension_name() const override { return "arrow.fixed_shape_tensor"; } + + /// Number of dimensions of tensor elements + size_t ndim() { return shape_.size(); } + + /// Shape of tensor elements + const std::vector<int64_t>& shape() const { return shape_; } + + /// Strides of tensor elements. Strides state offset in bytes between adjacent + /// elements along each dimension. + const std::vector<int64_t>& strides() const { return strides_; } + + /// Permutation mapping from logical to physical memory layout of tensor elements + const std::vector<int64_t>& permutation() const { return permutation_; } + + /// Dimension names of tensor elements. Dimensions are ordered logically. + const std::vector<std::string>& dim_names() const { return dim_names_; } + + bool ExtensionEquals(const ExtensionType& other) const override; + + std::string Serialize() const override; + + Result<std::shared_ptr<DataType>> Deserialize( + std::shared_ptr<DataType> storage_type, + const std::string& serialized_data) const override; + + /// Create a FixedShapeTensorArray from ArrayData + std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override; + + /// \brief Create a FixedShapeTensorArray from a Tensor + /// + /// This function will create a FixedShapeTensorArray from a Tensor, taking it's + /// first dimension as the "element dimension" and the remaining dimensions as the + /// "tensor dimensions". The tensor dimensions must match the FixedShapeTensorType's + /// element shape. This function assumes that the tensor's memory layout is + /// row-major. + /// + /// \param[in] tensor The Tensor to convert to a FixedShapeTensorArray + Result<std::shared_ptr<Array>> MakeArray(std::shared_ptr<Tensor> tensor) const; + + /// \brief Create a Tensor from FixedShapeTensorArray + /// + /// This function will create a Tensor from a FixedShapeTensorArray, setting it's + /// first dimension as length equal to the FixedShapeTensorArray's length and the + /// remaining dimensions as the FixedShapeTensorType's element shape. + /// + /// \param[in] arr The FixedShapeTensorArray to convert to a Tensor + Result<std::shared_ptr<Tensor>> ToTensor(std::shared_ptr<Array> arr) const; + + private: + std::shared_ptr<DataType> storage_type_; + std::shared_ptr<DataType> value_type_; + std::vector<int64_t> shape_; + std::vector<int64_t> strides_; + std::vector<int64_t> permutation_; + std::vector<std::string> dim_names_; +}; + +/// \brief Return a FixedShapeTensorType instance. +ARROW_EXPORT std::shared_ptr<FixedShapeTensorType> fixed_shape_tensor( Review Comment: Changed. It causes a lot of extra casting in tests, but I prefer consistency. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
