[GitHub] [arrow] wesm commented on a change in pull request #7539: ARROW-9156: [C++] Reducing the code size of the tensor module

2020-06-29 Thread GitBox


wesm commented on a change in pull request #7539:
URL: https://github.com/apache/arrow/pull/7539#discussion_r447264419



##
File path: cpp/src/arrow/tensor/csf_converter.cc
##
@@ -57,73 +57,86 @@ inline void IncrementIndex(std::vector& coord, 
const std::vector
-class SparseCSFTensorConverter {
- public:
-  using NumericTensorType = NumericTensor;
-  using value_type = typename NumericTensorType::value_type;
+class SparseCSFTensorConverter : private SparseTensorConverterMixin {
+  using SparseTensorConverterMixin::AssignIndex;
+  using SparseTensorConverterMixin::IsNonZero;
 
-  SparseCSFTensorConverter(const NumericTensorType& tensor,
+ public:
+  SparseCSFTensorConverter(const Tensor& tensor,
const std::shared_ptr& index_value_type,
MemoryPool* pool)
   : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
 
-  template 
   Status Convert() {
-using c_index_value_type = typename IndexValueType::c_type;
-
RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits::max()));
+RETURN_NOT_OK(CheckSparseIndexMaximumValue(index_value_type_, 
tensor_.shape()));
+
+const int index_elsize =
+checked_cast(*index_value_type_).bit_width() / 
CHAR_BIT;
+const int value_elsize =
+checked_cast(*tensor_.type()).bit_width() / 
CHAR_BIT;

Review comment:
   As long as the signature of the function is `int(const DataType&)` it 
sounds good to me. I don't think it should be a non-internal API because it 
does not need to do error-handling. If it's a non-static member of 
FixedWidthType, then a `checked_cast` is still necessary which seems against 
the spirit of making the code simpler





This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [arrow] wesm commented on a change in pull request #7539: ARROW-9156: [C++] Reducing the code size of the tensor module

2020-06-29 Thread GitBox


wesm commented on a change in pull request #7539:
URL: https://github.com/apache/arrow/pull/7539#discussion_r447030010



##
File path: cpp/src/arrow/tensor/csf_converter.cc
##
@@ -57,73 +57,86 @@ inline void IncrementIndex(std::vector& coord, 
const std::vector
-class SparseCSFTensorConverter {
- public:
-  using NumericTensorType = NumericTensor;
-  using value_type = typename NumericTensorType::value_type;
+class SparseCSFTensorConverter : private SparseTensorConverterMixin {
+  using SparseTensorConverterMixin::AssignIndex;
+  using SparseTensorConverterMixin::IsNonZero;
 
-  SparseCSFTensorConverter(const NumericTensorType& tensor,
+ public:
+  SparseCSFTensorConverter(const Tensor& tensor,
const std::shared_ptr& index_value_type,
MemoryPool* pool)
   : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
 
-  template 
   Status Convert() {
-using c_index_value_type = typename IndexValueType::c_type;
-
RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits::max()));
+RETURN_NOT_OK(CheckSparseIndexMaximumValue(index_value_type_, 
tensor_.shape()));
+
+const int index_elsize =
+checked_cast(*index_value_type_).bit_width() / 
CHAR_BIT;
+const int value_elsize =
+checked_cast(*tensor_.type()).bit_width() / 
CHAR_BIT;

Review comment:
   Could be helper function, maybe we can put an `internal::GetByteWidth` 
function in `arrow/type.h` that does this? 

##
File path: cpp/src/arrow/tensor/csx_converter.cc
##
@@ -0,0 +1,245 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/tensor/converter.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+namespace internal {
+namespace {
+
+// --
+// SparseTensorConverter for SparseCSRIndex
+
+class SparseCSXMatrixConverter : private SparseTensorConverterMixin {
+  using SparseTensorConverterMixin::AssignIndex;
+  using SparseTensorConverterMixin::IsNonZero;
+
+ public:
+  SparseCSXMatrixConverter(SparseMatrixCompressedAxis axis, const Tensor& 
tensor,
+   const std::shared_ptr& index_value_type,
+   MemoryPool* pool)
+  : axis_(axis), tensor_(tensor), index_value_type_(index_value_type), 
pool_(pool) {}
+
+  Status Convert() {
+RETURN_NOT_OK(CheckSparseIndexMaximumValue(index_value_type_, 
tensor_.shape()));
+
+const int index_elsize =
+checked_cast(*index_value_type_).bit_width() / 
CHAR_BIT;

Review comment:
   Can you factor this "get byte width" operation into a helper function? 

##
File path: cpp/src/arrow/tensor/csf_converter.cc
##
@@ -148,84 +161,130 @@ class SparseCSFTensorConverter {
 return Status::OK();
   }
 
-#define CALL_TYPE_SPECIFIC_CONVERT(TYPE_CLASS) \
-  case TYPE_CLASS##Type::type_id:  \
-return Convert();
-
-  Status Convert() {
-switch (index_value_type_->id()) {
-  ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(CALL_TYPE_SPECIFIC_CONVERT);
-  // LCOV_EXCL_START: The following invalid causes program failure.
-  default:
-return Status::TypeError("Unsupported SparseTensor index value type");
-// LCOV_EXCL_STOP
-}
-  }
-
-#undef CALL_TYPE_SPECIFIC_CONVERT
-
   std::shared_ptr sparse_index;
   std::shared_ptr data;
 
  private:
-  const NumericTensorType& tensor_;
+  const Tensor& tensor_;
   const std::shared_ptr& index_value_type_;
   MemoryPool* pool_;
+};
 
-  template 
-  inline Status CheckMaximumValue(const c_value_type type_max) const {
-auto max_dimension =
-*std::max_element(tensor_.shape().begin(), tensor_.shape().end());
-if (static_cast(type_max) < max_dimension) {
-  // LCOV_EXCL_START: The following invalid causes program failure.
-  return Status::Invalid("The bit width of the index value type is too 
small");
-  // LCOV_EXCL_STOP
-}
-return Status::OK();
+class TensorBuilderFromSparseCSFTens