wesm commented on a change in pull request #7539:
URL: https://github.com/apache/arrow/pull/7539#discussion_r447030010
##
File path: cpp/src/arrow/tensor/csf_converter.cc
##
@@ -57,73 +57,86 @@ inline void IncrementIndex(std::vector& coord,
const std::vector
-class SparseCSFTensorConverter {
- public:
- using NumericTensorType = NumericTensor;
- using value_type = typename NumericTensorType::value_type;
+class SparseCSFTensorConverter : private SparseTensorConverterMixin {
+ using SparseTensorConverterMixin::AssignIndex;
+ using SparseTensorConverterMixin::IsNonZero;
- SparseCSFTensorConverter(const NumericTensorType& tensor,
+ public:
+ SparseCSFTensorConverter(const Tensor& tensor,
const std::shared_ptr& index_value_type,
MemoryPool* pool)
: tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
- template
Status Convert() {
-using c_index_value_type = typename IndexValueType::c_type;
-
RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits::max()));
+RETURN_NOT_OK(CheckSparseIndexMaximumValue(index_value_type_,
tensor_.shape()));
+
+const int index_elsize =
+checked_cast(*index_value_type_).bit_width() /
CHAR_BIT;
+const int value_elsize =
+checked_cast(*tensor_.type()).bit_width() /
CHAR_BIT;
Review comment:
Could be helper function, maybe we can put an `internal::GetByteWidth`
function in `arrow/type.h` that does this?
##
File path: cpp/src/arrow/tensor/csx_converter.cc
##
@@ -0,0 +1,245 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/tensor/converter.h"
+
+#include
+#include
+#include
+#include
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace internal {
+namespace {
+
+// --
+// SparseTensorConverter for SparseCSRIndex
+
+class SparseCSXMatrixConverter : private SparseTensorConverterMixin {
+ using SparseTensorConverterMixin::AssignIndex;
+ using SparseTensorConverterMixin::IsNonZero;
+
+ public:
+ SparseCSXMatrixConverter(SparseMatrixCompressedAxis axis, const Tensor&
tensor,
+ const std::shared_ptr& index_value_type,
+ MemoryPool* pool)
+ : axis_(axis), tensor_(tensor), index_value_type_(index_value_type),
pool_(pool) {}
+
+ Status Convert() {
+RETURN_NOT_OK(CheckSparseIndexMaximumValue(index_value_type_,
tensor_.shape()));
+
+const int index_elsize =
+checked_cast(*index_value_type_).bit_width() /
CHAR_BIT;
Review comment:
Can you factor this "get byte width" operation into a helper function?
##
File path: cpp/src/arrow/tensor/csf_converter.cc
##
@@ -148,84 +161,130 @@ class SparseCSFTensorConverter {
return Status::OK();
}
-#define CALL_TYPE_SPECIFIC_CONVERT(TYPE_CLASS) \
- case TYPE_CLASS##Type::type_id: \
-return Convert();
-
- Status Convert() {
-switch (index_value_type_->id()) {
- ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(CALL_TYPE_SPECIFIC_CONVERT);
- // LCOV_EXCL_START: The following invalid causes program failure.
- default:
-return Status::TypeError("Unsupported SparseTensor index value type");
-// LCOV_EXCL_STOP
-}
- }
-
-#undef CALL_TYPE_SPECIFIC_CONVERT
-
std::shared_ptr sparse_index;
std::shared_ptr data;
private:
- const NumericTensorType& tensor_;
+ const Tensor& tensor_;
const std::shared_ptr& index_value_type_;
MemoryPool* pool_;
+};
- template
- inline Status CheckMaximumValue(const c_value_type type_max) const {
-auto max_dimension =
-*std::max_element(tensor_.shape().begin(), tensor_.shape().end());
-if (static_cast(type_max) < max_dimension) {
- // LCOV_EXCL_START: The following invalid causes program failure.
- return Status::Invalid("The bit width of the index value type is too
small");
- // LCOV_EXCL_STOP
-}
-return Status::OK();
+class TensorBuilderFromSparseCSFTens