pitrou commented on code in PR #47586:
URL: https://github.com/apache/arrow/pull/47586#discussion_r2426367687
##########
cpp/src/arrow/sparse_tensor.cc:
##########
@@ -337,25 +340,68 @@ void CheckSparseCSXIndexValidity(const
std::shared_ptr<DataType>& indptr_type,
namespace {
-inline Status CheckSparseCSFIndexValidity(const std::shared_ptr<DataType>&
indptr_type,
- const std::shared_ptr<DataType>&
indices_type,
- const int64_t num_indptrs,
- const int64_t num_indices,
- const int64_t axis_order_size) {
+inline Status CheckSparseCSFIndexValidity(
+ const std::vector<std::shared_ptr<Tensor>>& indptr,
+ const std::vector<std::shared_ptr<Tensor>>& indices,
+ const std::vector<int64_t>& axis_order) {
+ auto indptr_type = indptr.front()->type();
+ auto indices_type = indices.front()->type();
+
if (!is_integer(indptr_type->id())) {
return Status::TypeError("Type of SparseCSFIndex indptr must be integer");
}
if (!is_integer(indices_type->id())) {
return Status::TypeError("Type of SparseCSFIndex indices must be integer");
}
- if (num_indptrs + 1 != num_indices) {
+ if (indptr.size() + 1 != indices.size()) {
return Status::Invalid(
"Length of indices must be equal to length of indptrs + 1 for
SparseCSFIndex.");
}
- if (axis_order_size != num_indices) {
+ if (axis_order.size() != indices.size()) {
return Status::Invalid(
"Length of indices must be equal to number of dimensions for
SparseCSFIndex.");
}
+
+ for (int64_t i = 1; i < static_cast<int64_t>(indptr.size()); i++) {
+ if (!indptr_type->Equals(indptr[i]->type())) {
+ return Status::Invalid("All index pointers must have the same data
type");
Review Comment:
Make it `TypeError`?
##########
cpp/src/arrow/tensor/csx_converter.cc:
##########
@@ -24,40 +24,47 @@
#include "arrow/buffer.h"
#include "arrow/status.h"
+#include "arrow/tensor.h"
#include "arrow/type.h"
#include "arrow/util/checked_cast.h"
-#include "arrow/visit_type_inline.h"
+#include "arrow/util/logging_internal.h"
+#include "arrow/util/sparse_tensor_util.h"
namespace arrow {
class MemoryPool;
namespace internal {
+
namespace {
// ----------------------------------------------------------------------
// SparseTensorConverter for SparseCSRIndex
-class SparseCSXMatrixConverter : private SparseTensorConverterMixin {
- using SparseTensorConverterMixin::AssignIndex;
- using SparseTensorConverterMixin::IsNonZero;
-
+class SparseCSXMatrixConverter {
public:
SparseCSXMatrixConverter(SparseMatrixCompressedAxis axis, const Tensor&
tensor,
const std::shared_ptr<DataType>& index_value_type,
MemoryPool* pool)
: axis_(axis), tensor_(tensor), index_value_type_(index_value_type),
pool_(pool) {}
- Status Convert() {
+ // Note: The same type is considered for both indices and indptr during
+ // tensor-to-CSX-tensor conversion.
Review Comment:
But then the compiler may still generate `Convert` specializations that will
never get called. Can we instead pass a single type to `Convert`?
##########
cpp/src/arrow/tensor/csx_converter.cc:
##########
@@ -150,8 +143,9 @@ Status
MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis,
std::shared_ptr<SparseIndex>*
out_sparse_index,
std::shared_ptr<Buffer>* out_data) {
SparseCSXMatrixConverter converter(axis, tensor, index_value_type, pool);
- RETURN_NOT_OK(converter.Convert());
-
+ ConverterVisitor visitor(converter);
+ ARROW_RETURN_NOT_OK(
+ util::VisitCSXType(*tensor.type(), *index_value_type, *index_value_type,
visitor));
Review Comment:
Can use `VisitTypeInline` (or `VisitType`) directly instead.
##########
cpp/src/arrow/tensor/converter_internal.h:
##########
@@ -17,72 +17,65 @@
#pragma once
-#include "arrow/tensor/converter.h"
-
-#define DISPATCH(ACTION, index_elsize, value_elsize, ...) \
- switch (index_elsize) { \
- case 1: \
- switch (value_elsize) { \
- case 1: \
- ACTION(uint8_t, uint8_t, __VA_ARGS__); \
- break; \
- case 2: \
- ACTION(uint8_t, uint16_t, __VA_ARGS__); \
- break; \
- case 4: \
- ACTION(uint8_t, uint32_t, __VA_ARGS__); \
- break; \
- case 8: \
- ACTION(uint8_t, uint64_t, __VA_ARGS__); \
- break; \
- } \
- break; \
- case 2: \
- switch (value_elsize) { \
- case 1: \
- ACTION(uint16_t, uint8_t, __VA_ARGS__); \
- break; \
- case 2: \
- ACTION(uint16_t, uint16_t, __VA_ARGS__); \
- break; \
- case 4: \
- ACTION(uint16_t, uint32_t, __VA_ARGS__); \
- break; \
- case 8: \
- ACTION(uint16_t, uint64_t, __VA_ARGS__); \
- break; \
- } \
- break; \
- case 4: \
- switch (value_elsize) { \
- case 1: \
- ACTION(uint32_t, uint8_t, __VA_ARGS__); \
- break; \
- case 2: \
- ACTION(uint32_t, uint16_t, __VA_ARGS__); \
- break; \
- case 4: \
- ACTION(uint32_t, uint32_t, __VA_ARGS__); \
- break; \
- case 8: \
- ACTION(uint32_t, uint64_t, __VA_ARGS__); \
- break; \
- } \
- break; \
- case 8: \
- switch (value_elsize) { \
- case 1: \
- ACTION(int64_t, uint8_t, __VA_ARGS__); \
- break; \
- case 2: \
- ACTION(int64_t, uint16_t, __VA_ARGS__); \
- break; \
- case 4: \
- ACTION(int64_t, uint32_t, __VA_ARGS__); \
- break; \
- case 8: \
- ACTION(int64_t, uint64_t, __VA_ARGS__); \
- break; \
- } \
- break; \
+#include "arrow/sparse_tensor.h" // IWYU pragma: export
+
+#include <memory>
+#include <utility>
+
+namespace arrow {
+
+namespace internal {
Review Comment:
Nit: with C++17 this can be worded more concisely
```suggestion
namespace arrow::internal {
```
##########
cpp/src/arrow/sparse_tensor.cc:
##########
@@ -475,4 +523,292 @@ Result<std::shared_ptr<Tensor>>
SparseTensor::ToTensor(MemoryPool* pool) const {
}
}
+namespace {
+
+struct SparseTensorValidatorBase {
+ SparseTensorValidatorBase(const Tensor& tensor, const SparseTensor&
sparse_tensor)
+ : tensor(tensor), sparse_tensor(sparse_tensor) {}
+
+ template <typename ValueType>
+ Status ValidateValue(typename ValueType::c_type sparse_tensor_value,
+ typename ValueType::c_type tensor_value) {
+ if (!internal::is_not_zero<ValueType>(sparse_tensor_value)) {
+ return Status::Invalid("Sparse tensor values must be non-zero");
+ } else if (sparse_tensor_value != tensor_value) {
+ if constexpr (is_floating_type<ValueType>::value) {
+ if (!std::isnan(tensor_value) || !std::isnan(sparse_tensor_value)) {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ } else {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ }
+ return Status::OK();
+ }
+
+ const Tensor& tensor;
+ const SparseTensor& sparse_tensor;
+};
+
+struct SparseCOOValidator : public SparseTensorValidatorBase {
+ using SparseTensorValidatorBase::SparseTensorValidatorBase;
+
+ Status Validate() {
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto indices = sparse_coo_index->indices();
+ RETURN_NOT_OK(CheckSparseCOOIndexValidity(indices->type(),
indices->shape(),
+ indices->strides()));
+ // Validate Values
+ return util::VisitCOOTensorType(*sparse_tensor.type(), *indices->type(),
*this);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type) {
+ return ValidateSparseCooTensorValues(value_type, index_type);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status ValidateSparseCooTensorValues(const ValueType&, const IndexType&) {
+ using IndexCType = typename IndexType::c_type;
+ using ValueCType = typename ValueType::c_type;
+
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto sparse_coo_values_buffer = sparse_tensor.data();
+
+ const auto& indices = sparse_coo_index->indices();
+ const auto* indices_data =
sparse_coo_index->indices()->data()->data_as<IndexCType>();
+ const auto* sparse_coo_values =
sparse_coo_values_buffer->data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ } else if (indices->shape()[1] !=
static_cast<int64_t>(tensor.shape().size())) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices->shape()[1], ") and tensor shape (",
+ tensor.shape().size(), ")");
+ }
+
+ auto coord_size = indices->shape()[1];
+ std::vector<int64_t> coord(coord_size);
+ for (int64_t i = 0; i < indices->shape()[0]; i++) {
+ for (int64_t j = 0; j < coord_size; j++) {
+ coord[j] = static_cast<int64_t>(indices_data[i * coord_size + j]);
+ }
+ ARROW_RETURN_NOT_OK(
+ ValidateValue<ValueType>(sparse_coo_values[i],
tensor.Value<ValueType>(coord)));
+ }
+
+ return Status::OK();
+ }
+};
+
+template <typename SparseCSXIndex>
+struct SparseCSXValidator : public SparseTensorValidatorBase {
+ SparseCSXValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csx_index =
+
internal::checked_pointer_cast<SparseCSXIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ auto indptr = sparse_csx_index->indptr();
+ auto indices = sparse_csx_index->indices();
+ ARROW_RETURN_NOT_OK(
+ internal::ValidateSparseCSXIndex(indptr->type(), indices->type(),
indptr->shape(),
+ indices->shape(),
sparse_csx_index->kTypeName));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices->type(),
*indptr->type(),
+ *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
+ return ValidateSparseCSXTensorValues(value_type, index_type,
index_pointer_type);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status ValidateSparseCSXTensorValues(const ValueType&, const IndexType&,
+ const IndexPointerType&) {
+ using ValueCType = typename ValueType::c_type;
+ using IndexCType = typename IndexType::c_type;
+ using IndexPointerCType = typename IndexPointerType::c_type;
+ auto axis = sparse_csx_index->kCompressedAxis;
+
+ auto& indptr = sparse_csx_index->indptr();
+ auto& indices = sparse_csx_index->indices();
+ auto indptr_data = indptr->data()->template data_as<IndexPointerCType>();
+ auto indices_data = indices->data()->template data_as<IndexCType>();
+ auto sparse_csx_values = sparse_tensor.data()->template
data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ }
+
+ for (int64_t i = 0; i < indptr->size() - 1; ++i) {
+ const auto start = static_cast<int64_t>(indptr_data[i]);
+ const auto stop = static_cast<int64_t>(indptr_data[i + 1]);
+ std::vector<int64_t> coord(2);
+ for (int64_t j = start; j < stop; ++j) {
+ switch (axis) {
+ case internal::SparseMatrixCompressedAxis::ROW:
+ coord[0] = i;
+ coord[1] = static_cast<int64_t>(indices_data[j]);
+ break;
+ case internal::SparseMatrixCompressedAxis::COLUMN:
+ coord[0] = static_cast<int64_t>(indices_data[j]);
+ coord[1] = i;
+ break;
+ }
+ ARROW_RETURN_NOT_OK(ValidateValue<ValueType>(sparse_csx_values[j],
+
tensor.Value<ValueType>(coord)));
+ }
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCSXIndex> sparse_csx_index;
+};
+
+struct SparseCSFValidator : public SparseTensorValidatorBase {
+ SparseCSFValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csf_index =
+
internal::checked_pointer_cast<SparseCSFIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ const auto& indptr = sparse_csf_index->indptr();
+ const auto& indices = sparse_csf_index->indices();
+ const auto& axis_order = sparse_csf_index->axis_order();
+
+ RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr, indices, axis_order));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices.front()->type(),
+ *indptr.front()->type(), *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
+ return ValidateSparseTensorCSFValues(value_type, index_type,
index_pointer_type);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status ValidateSparseTensorCSFValues(const ValueType&, const IndexType&,
+ const IndexPointerType&) {
+ const auto& indices = sparse_csf_index->indices();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+ if (indices.back()->size() != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices.back()->size(), ") and dense tensor (",
+ non_zero_count, ")");
+ } else if (indices.size() != tensor.shape().size()) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices.size(), ") and tensor shape (",
+ tensor.shape().size(), ")");
+ } else {
+ return CheckValues<ValueType, IndexType, IndexPointerType>(
+ 0, 0, 0, sparse_csf_index->indptr()[0]->size() - 1);
+ }
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status CheckValues(const int64_t dim, const int64_t dim_offset, const
int64_t start,
+ const int64_t stop) {
+ using ValueCType = typename ValueType::c_type;
+ using IndexCType = typename IndexType::c_type;
+ using IndexPointerCType = typename IndexPointerType::c_type;
+
+ const auto& indices = sparse_csf_index->indices();
+ const auto& indptr = sparse_csf_index->indptr();
+ const auto& axis_order = sparse_csf_index->axis_order();
+ const auto* values = sparse_tensor.data()->data_as<ValueCType>();
+ auto ndim = indices.size();
+ auto strides = tensor.strides();
+
+ const auto& cur_indices = indices[dim];
+ const auto* indices_data = cur_indices->data()->data_as<IndexCType>() +
start;
+
+ if (dim == static_cast<int64_t>(ndim) - 1) {
+ for (auto i = start; i < stop; ++i) {
+ auto index = static_cast<int64_t>(*indices_data);
+ const int64_t offset = dim_offset + index * strides[axis_order[dim]];
+
+ auto sparse_value = values[i];
+ auto tensor_value =
+ *reinterpret_cast<const ValueCType*>(tensor.raw_data() + offset);
+ ARROW_RETURN_NOT_OK(ValidateValue<ValueType>(sparse_value,
tensor_value));
+ ++indices_data;
+ }
+ } else {
+ const auto& cur_indptr = indptr[dim];
+ const auto* indptr_data =
cur_indptr->data()->data_as<IndexPointerCType>() + start;
+
+ for (int64_t i = start; i < stop; ++i) {
+ const int64_t index = *indices_data;
+ int64_t offset = dim_offset + index * strides[axis_order[dim]];
+ auto next_start = static_cast<int64_t>(*indptr_data);
+ auto next_stop = static_cast<int64_t>(*(indptr_data + 1));
+
+ ARROW_RETURN_NOT_OK((CheckValues<ValueType, IndexType,
IndexPointerType>(
+ dim + 1, offset, next_start, next_stop)));
+
+ ++indices_data;
+ ++indptr_data;
+ }
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCSFIndex> sparse_csf_index;
+};
+
+} // namespace
+
+Status SparseTensor::Validate(const Tensor& tensor) const {
+ if (!is_tensor_supported(type_->id())) {
+ return Status::NotImplemented("SparseTensor values only support numeric
types");
+ } else if (!tensor.type()->Equals(type_)) {
+ return Status::Invalid("SparseTensor value types do not match");
Review Comment:
Make this `TypeError`?
##########
cpp/src/arrow/sparse_tensor.cc:
##########
@@ -475,4 +523,292 @@ Result<std::shared_ptr<Tensor>>
SparseTensor::ToTensor(MemoryPool* pool) const {
}
}
+namespace {
+
+struct SparseTensorValidatorBase {
+ SparseTensorValidatorBase(const Tensor& tensor, const SparseTensor&
sparse_tensor)
+ : tensor(tensor), sparse_tensor(sparse_tensor) {}
+
+ template <typename ValueType>
+ Status ValidateValue(typename ValueType::c_type sparse_tensor_value,
+ typename ValueType::c_type tensor_value) {
+ if (!internal::is_not_zero<ValueType>(sparse_tensor_value)) {
+ return Status::Invalid("Sparse tensor values must be non-zero");
+ } else if (sparse_tensor_value != tensor_value) {
+ if constexpr (is_floating_type<ValueType>::value) {
+ if (!std::isnan(tensor_value) || !std::isnan(sparse_tensor_value)) {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ } else {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ }
+ return Status::OK();
+ }
+
+ const Tensor& tensor;
+ const SparseTensor& sparse_tensor;
+};
+
+struct SparseCOOValidator : public SparseTensorValidatorBase {
+ using SparseTensorValidatorBase::SparseTensorValidatorBase;
+
+ Status Validate() {
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto indices = sparse_coo_index->indices();
+ RETURN_NOT_OK(CheckSparseCOOIndexValidity(indices->type(),
indices->shape(),
+ indices->strides()));
+ // Validate Values
+ return util::VisitCOOTensorType(*sparse_tensor.type(), *indices->type(),
*this);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type) {
+ return ValidateSparseCooTensorValues(value_type, index_type);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status ValidateSparseCooTensorValues(const ValueType&, const IndexType&) {
+ using IndexCType = typename IndexType::c_type;
+ using ValueCType = typename ValueType::c_type;
+
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto sparse_coo_values_buffer = sparse_tensor.data();
+
+ const auto& indices = sparse_coo_index->indices();
+ const auto* indices_data =
sparse_coo_index->indices()->data()->data_as<IndexCType>();
+ const auto* sparse_coo_values =
sparse_coo_values_buffer->data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ } else if (indices->shape()[1] !=
static_cast<int64_t>(tensor.shape().size())) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices->shape()[1], ") and tensor shape (",
+ tensor.shape().size(), ")");
+ }
+
+ auto coord_size = indices->shape()[1];
+ std::vector<int64_t> coord(coord_size);
+ for (int64_t i = 0; i < indices->shape()[0]; i++) {
+ for (int64_t j = 0; j < coord_size; j++) {
+ coord[j] = static_cast<int64_t>(indices_data[i * coord_size + j]);
+ }
+ ARROW_RETURN_NOT_OK(
+ ValidateValue<ValueType>(sparse_coo_values[i],
tensor.Value<ValueType>(coord)));
+ }
+
+ return Status::OK();
+ }
+};
+
+template <typename SparseCSXIndex>
+struct SparseCSXValidator : public SparseTensorValidatorBase {
+ SparseCSXValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csx_index =
+
internal::checked_pointer_cast<SparseCSXIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ auto indptr = sparse_csx_index->indptr();
+ auto indices = sparse_csx_index->indices();
+ ARROW_RETURN_NOT_OK(
+ internal::ValidateSparseCSXIndex(indptr->type(), indices->type(),
indptr->shape(),
+ indices->shape(),
sparse_csx_index->kTypeName));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices->type(),
*indptr->type(),
+ *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
+ return ValidateSparseCSXTensorValues(value_type, index_type,
index_pointer_type);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status ValidateSparseCSXTensorValues(const ValueType&, const IndexType&,
+ const IndexPointerType&) {
+ using ValueCType = typename ValueType::c_type;
+ using IndexCType = typename IndexType::c_type;
+ using IndexPointerCType = typename IndexPointerType::c_type;
+ auto axis = sparse_csx_index->kCompressedAxis;
+
+ auto& indptr = sparse_csx_index->indptr();
+ auto& indices = sparse_csx_index->indices();
+ auto indptr_data = indptr->data()->template data_as<IndexPointerCType>();
+ auto indices_data = indices->data()->template data_as<IndexCType>();
+ auto sparse_csx_values = sparse_tensor.data()->template
data_as<ValueCType>();
Review Comment:
Mark all these `const` for clarity?
##########
cpp/src/arrow/sparse_tensor.cc:
##########
@@ -475,4 +523,292 @@ Result<std::shared_ptr<Tensor>>
SparseTensor::ToTensor(MemoryPool* pool) const {
}
}
+namespace {
+
+struct SparseTensorValidatorBase {
+ SparseTensorValidatorBase(const Tensor& tensor, const SparseTensor&
sparse_tensor)
+ : tensor(tensor), sparse_tensor(sparse_tensor) {}
+
+ template <typename ValueType>
+ Status ValidateValue(typename ValueType::c_type sparse_tensor_value,
+ typename ValueType::c_type tensor_value) {
+ if (!internal::is_not_zero<ValueType>(sparse_tensor_value)) {
+ return Status::Invalid("Sparse tensor values must be non-zero");
+ } else if (sparse_tensor_value != tensor_value) {
+ if constexpr (is_floating_type<ValueType>::value) {
+ if (!std::isnan(tensor_value) || !std::isnan(sparse_tensor_value)) {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ } else {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ }
+ return Status::OK();
+ }
+
+ const Tensor& tensor;
+ const SparseTensor& sparse_tensor;
+};
+
+struct SparseCOOValidator : public SparseTensorValidatorBase {
+ using SparseTensorValidatorBase::SparseTensorValidatorBase;
+
+ Status Validate() {
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto indices = sparse_coo_index->indices();
+ RETURN_NOT_OK(CheckSparseCOOIndexValidity(indices->type(),
indices->shape(),
+ indices->strides()));
+ // Validate Values
+ return util::VisitCOOTensorType(*sparse_tensor.type(), *indices->type(),
*this);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type) {
+ return ValidateSparseCooTensorValues(value_type, index_type);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status ValidateSparseCooTensorValues(const ValueType&, const IndexType&) {
+ using IndexCType = typename IndexType::c_type;
+ using ValueCType = typename ValueType::c_type;
+
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto sparse_coo_values_buffer = sparse_tensor.data();
+
+ const auto& indices = sparse_coo_index->indices();
+ const auto* indices_data =
sparse_coo_index->indices()->data()->data_as<IndexCType>();
+ const auto* sparse_coo_values =
sparse_coo_values_buffer->data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ } else if (indices->shape()[1] !=
static_cast<int64_t>(tensor.shape().size())) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices->shape()[1], ") and tensor shape (",
+ tensor.shape().size(), ")");
+ }
+
+ auto coord_size = indices->shape()[1];
+ std::vector<int64_t> coord(coord_size);
+ for (int64_t i = 0; i < indices->shape()[0]; i++) {
+ for (int64_t j = 0; j < coord_size; j++) {
+ coord[j] = static_cast<int64_t>(indices_data[i * coord_size + j]);
+ }
+ ARROW_RETURN_NOT_OK(
+ ValidateValue<ValueType>(sparse_coo_values[i],
tensor.Value<ValueType>(coord)));
+ }
+
+ return Status::OK();
+ }
+};
+
+template <typename SparseCSXIndex>
+struct SparseCSXValidator : public SparseTensorValidatorBase {
+ SparseCSXValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csx_index =
+
internal::checked_pointer_cast<SparseCSXIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ auto indptr = sparse_csx_index->indptr();
+ auto indices = sparse_csx_index->indices();
+ ARROW_RETURN_NOT_OK(
+ internal::ValidateSparseCSXIndex(indptr->type(), indices->type(),
indptr->shape(),
+ indices->shape(),
sparse_csx_index->kTypeName));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices->type(),
*indptr->type(),
+ *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
+ return ValidateSparseCSXTensorValues(value_type, index_type,
index_pointer_type);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status ValidateSparseCSXTensorValues(const ValueType&, const IndexType&,
+ const IndexPointerType&) {
+ using ValueCType = typename ValueType::c_type;
+ using IndexCType = typename IndexType::c_type;
+ using IndexPointerCType = typename IndexPointerType::c_type;
+ auto axis = sparse_csx_index->kCompressedAxis;
+
+ auto& indptr = sparse_csx_index->indptr();
+ auto& indices = sparse_csx_index->indices();
+ auto indptr_data = indptr->data()->template data_as<IndexPointerCType>();
+ auto indices_data = indices->data()->template data_as<IndexCType>();
+ auto sparse_csx_values = sparse_tensor.data()->template
data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ }
+
+ for (int64_t i = 0; i < indptr->size() - 1; ++i) {
+ const auto start = static_cast<int64_t>(indptr_data[i]);
+ const auto stop = static_cast<int64_t>(indptr_data[i + 1]);
+ std::vector<int64_t> coord(2);
+ for (int64_t j = start; j < stop; ++j) {
+ switch (axis) {
+ case internal::SparseMatrixCompressedAxis::ROW:
+ coord[0] = i;
+ coord[1] = static_cast<int64_t>(indices_data[j]);
+ break;
+ case internal::SparseMatrixCompressedAxis::COLUMN:
+ coord[0] = static_cast<int64_t>(indices_data[j]);
+ coord[1] = i;
+ break;
+ }
+ ARROW_RETURN_NOT_OK(ValidateValue<ValueType>(sparse_csx_values[j],
+
tensor.Value<ValueType>(coord)));
+ }
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCSXIndex> sparse_csx_index;
+};
+
+struct SparseCSFValidator : public SparseTensorValidatorBase {
+ SparseCSFValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csf_index =
+
internal::checked_pointer_cast<SparseCSFIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ const auto& indptr = sparse_csf_index->indptr();
+ const auto& indices = sparse_csf_index->indices();
+ const auto& axis_order = sparse_csf_index->axis_order();
+
+ RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr, indices, axis_order));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices.front()->type(),
+ *indptr.front()->type(), *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
+ return ValidateSparseTensorCSFValues(value_type, index_type,
index_pointer_type);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status ValidateSparseTensorCSFValues(const ValueType&, const IndexType&,
+ const IndexPointerType&) {
+ const auto& indices = sparse_csf_index->indices();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+ if (indices.back()->size() != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices.back()->size(), ") and dense tensor (",
+ non_zero_count, ")");
+ } else if (indices.size() != tensor.shape().size()) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices.size(), ") and tensor shape (",
+ tensor.shape().size(), ")");
+ } else {
+ return CheckValues<ValueType, IndexType, IndexPointerType>(
+ 0, 0, 0, sparse_csf_index->indptr()[0]->size() - 1);
+ }
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status CheckValues(const int64_t dim, const int64_t dim_offset, const
int64_t start,
+ const int64_t stop) {
+ using ValueCType = typename ValueType::c_type;
+ using IndexCType = typename IndexType::c_type;
+ using IndexPointerCType = typename IndexPointerType::c_type;
+
+ const auto& indices = sparse_csf_index->indices();
+ const auto& indptr = sparse_csf_index->indptr();
+ const auto& axis_order = sparse_csf_index->axis_order();
+ const auto* values = sparse_tensor.data()->data_as<ValueCType>();
+ auto ndim = indices.size();
+ auto strides = tensor.strides();
+
+ const auto& cur_indices = indices[dim];
+ const auto* indices_data = cur_indices->data()->data_as<IndexCType>() +
start;
+
+ if (dim == static_cast<int64_t>(ndim) - 1) {
+ for (auto i = start; i < stop; ++i) {
+ auto index = static_cast<int64_t>(*indices_data);
+ const int64_t offset = dim_offset + index * strides[axis_order[dim]];
+
+ auto sparse_value = values[i];
+ auto tensor_value =
+ *reinterpret_cast<const ValueCType*>(tensor.raw_data() + offset);
+ ARROW_RETURN_NOT_OK(ValidateValue<ValueType>(sparse_value,
tensor_value));
+ ++indices_data;
+ }
+ } else {
+ const auto& cur_indptr = indptr[dim];
+ const auto* indptr_data =
cur_indptr->data()->data_as<IndexPointerCType>() + start;
+
+ for (int64_t i = start; i < stop; ++i) {
+ const int64_t index = *indices_data;
+ int64_t offset = dim_offset + index * strides[axis_order[dim]];
+ auto next_start = static_cast<int64_t>(*indptr_data);
+ auto next_stop = static_cast<int64_t>(*(indptr_data + 1));
+
+ ARROW_RETURN_NOT_OK((CheckValues<ValueType, IndexType,
IndexPointerType>(
+ dim + 1, offset, next_start, next_stop)));
+
+ ++indices_data;
+ ++indptr_data;
+ }
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCSFIndex> sparse_csf_index;
+};
+
+} // namespace
+
+Status SparseTensor::Validate(const Tensor& tensor) const {
+ if (!is_tensor_supported(type_->id())) {
+ return Status::NotImplemented("SparseTensor values only support numeric
types");
+ } else if (!tensor.type()->Equals(type_)) {
+ return Status::Invalid("SparseTensor value types do not match");
+ } else if (tensor.shape() != shape_) {
+ return Status::Invalid("SparseTensor shape do not match");
+ } else if (tensor.dim_names() != dim_names_) {
+ return Status::Invalid("SparseTensor dim_names do not match");
Review Comment:
```suggestion
return Status::Invalid("SparseTensor dim_names does not match Tensor");
```
##########
cpp/src/arrow/util/sparse_tensor_util.h:
##########
Review Comment:
Are we sure we want to expose these APIs publicly? I would suggest to put
these into `arrow/tensor/util_internal.h` instead.
##########
cpp/src/arrow/sparse_tensor.cc:
##########
@@ -337,25 +340,68 @@ void CheckSparseCSXIndexValidity(const
std::shared_ptr<DataType>& indptr_type,
namespace {
-inline Status CheckSparseCSFIndexValidity(const std::shared_ptr<DataType>&
indptr_type,
- const std::shared_ptr<DataType>&
indices_type,
- const int64_t num_indptrs,
- const int64_t num_indices,
- const int64_t axis_order_size) {
+inline Status CheckSparseCSFIndexValidity(
+ const std::vector<std::shared_ptr<Tensor>>& indptr,
+ const std::vector<std::shared_ptr<Tensor>>& indices,
+ const std::vector<int64_t>& axis_order) {
+ auto indptr_type = indptr.front()->type();
+ auto indices_type = indices.front()->type();
+
if (!is_integer(indptr_type->id())) {
return Status::TypeError("Type of SparseCSFIndex indptr must be integer");
}
if (!is_integer(indices_type->id())) {
return Status::TypeError("Type of SparseCSFIndex indices must be integer");
}
- if (num_indptrs + 1 != num_indices) {
+ if (indptr.size() + 1 != indices.size()) {
return Status::Invalid(
"Length of indices must be equal to length of indptrs + 1 for
SparseCSFIndex.");
}
- if (axis_order_size != num_indices) {
+ if (axis_order.size() != indices.size()) {
return Status::Invalid(
"Length of indices must be equal to number of dimensions for
SparseCSFIndex.");
}
+
+ for (int64_t i = 1; i < static_cast<int64_t>(indptr.size()); i++) {
+ if (!indptr_type->Equals(indptr[i]->type())) {
+ return Status::Invalid("All index pointers must have the same data
type");
+ }
+ }
+
+ for (int64_t i = 1; i < static_cast<int64_t>(indices.size()); i++) {
+ if (!indices_type->Equals(indices[i]->type())) {
+ return Status::Invalid("All indices must have the same data type");
Review Comment:
Similar.
##########
cpp/src/arrow/tensor/csf_converter.cc:
##########
@@ -57,85 +59,78 @@ inline void IncrementIndex(std::vector<int64_t>& coord,
const std::vector<int64_
// ----------------------------------------------------------------------
// SparseTensorConverter for SparseCSFIndex
-class SparseCSFTensorConverter : private SparseTensorConverterMixin {
- using SparseTensorConverterMixin::AssignIndex;
- using SparseTensorConverterMixin::IsNonZero;
-
+class SparseCSFTensorConverter {
public:
SparseCSFTensorConverter(const Tensor& tensor,
const std::shared_ptr<DataType>& index_value_type,
MemoryPool* pool)
: tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
- Status Convert() {
+ // Note: The same type is considered for both indices and indptr during
+ // tensor-to-CSF-tensor conversion.
Review Comment:
Same comment as for CSX: we should trim down on code generation by taking a
single type argument here.
##########
cpp/src/arrow/util/sparse_tensor_util.h:
##########
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/sparse_tensor.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/visit_type_inline.h"
+
+namespace arrow::util {
+
+namespace detail {
+
+struct ValueVisitor {
+ template <typename ValueType, typename Function, typename... Args>
+ enable_if_number<ValueType, Status> Visit(const ValueType& value_type,
+ Function&& function, Args&&...
args) {
+ return function(value_type, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ Status Visit(const DataType& value_type, Args&&... args) {
+ return Status::TypeError("Invalid value type: ", value_type.name(),
+ ". Expected a number.");
+ }
+};
+
+struct IndexVisitor {
+ template <typename IndexType, typename Function, typename... Args>
+ enable_if_integer<IndexType, Status> Visit(const IndexType& index_type,
+ Function&& function,
+ const DataType& value_type,
Args&&... args) {
+ ValueVisitor visitor;
+ return VisitTypeInline(value_type, &visitor,
std::forward<Function>(function),
+ index_type, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ Status Visit(const DataType& index_type, Args&&...) {
+ return Status::TypeError("Invalid index pointer type: ", index_type.name(),
+ ". Expected integer.");
+ }
+};
+
+struct IndexPointerVisitor {
+ template <typename IndexPointerType, typename Function>
+ enable_if_integer<IndexPointerType, Status> Visit(
+ const IndexPointerType& index_pointer_type, Function&& function,
+ const DataType& index_type, const DataType& value_type) {
+ IndexVisitor visitor;
+ return VisitTypeInline(index_type, &visitor,
std::forward<Function>(function),
+ value_type, index_pointer_type);
+ }
+
+ template <typename... Args>
+ Status Visit(const DataType& index_pointer_type, Args&&...) {
+ return Status::TypeError("Invalid index pointer type: ",
index_pointer_type.name(),
+ ". Expected integer.");
+ }
+};
+
+} // namespace detail
+
+template <typename Function>
+inline Status VisitCSXType(const DataType& value_type, const DataType&
index_type,
+ const DataType& indptr_type, Function&& function) {
+ detail::IndexPointerVisitor visitor;
+ return VisitTypeInline(indptr_type, &visitor,
std::forward<Function>(function),
+ index_type, value_type);
+}
+
+template <typename Function>
+inline Status VisitCOOTensorType(const DataType& value_type, const DataType&
index_type,
Review Comment:
This is not COO-specific, let's call it `VisitTensorType`?
##########
cpp/src/arrow/sparse_tensor.cc:
##########
@@ -475,4 +523,292 @@ Result<std::shared_ptr<Tensor>>
SparseTensor::ToTensor(MemoryPool* pool) const {
}
}
+namespace {
+
+struct SparseTensorValidatorBase {
+ SparseTensorValidatorBase(const Tensor& tensor, const SparseTensor&
sparse_tensor)
+ : tensor(tensor), sparse_tensor(sparse_tensor) {}
+
+ template <typename ValueType>
+ Status ValidateValue(typename ValueType::c_type sparse_tensor_value,
+ typename ValueType::c_type tensor_value) {
+ if (!internal::is_not_zero<ValueType>(sparse_tensor_value)) {
+ return Status::Invalid("Sparse tensor values must be non-zero");
+ } else if (sparse_tensor_value != tensor_value) {
+ if constexpr (is_floating_type<ValueType>::value) {
+ if (!std::isnan(tensor_value) || !std::isnan(sparse_tensor_value)) {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ } else {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ }
+ return Status::OK();
+ }
+
+ const Tensor& tensor;
+ const SparseTensor& sparse_tensor;
+};
+
+struct SparseCOOValidator : public SparseTensorValidatorBase {
+ using SparseTensorValidatorBase::SparseTensorValidatorBase;
+
+ Status Validate() {
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto indices = sparse_coo_index->indices();
+ RETURN_NOT_OK(CheckSparseCOOIndexValidity(indices->type(),
indices->shape(),
+ indices->strides()));
+ // Validate Values
+ return util::VisitCOOTensorType(*sparse_tensor.type(), *indices->type(),
*this);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type) {
+ return ValidateSparseCooTensorValues(value_type, index_type);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status ValidateSparseCooTensorValues(const ValueType&, const IndexType&) {
+ using IndexCType = typename IndexType::c_type;
+ using ValueCType = typename ValueType::c_type;
+
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto sparse_coo_values_buffer = sparse_tensor.data();
+
+ const auto& indices = sparse_coo_index->indices();
+ const auto* indices_data =
sparse_coo_index->indices()->data()->data_as<IndexCType>();
+ const auto* sparse_coo_values =
sparse_coo_values_buffer->data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ } else if (indices->shape()[1] !=
static_cast<int64_t>(tensor.shape().size())) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices->shape()[1], ") and tensor shape (",
+ tensor.shape().size(), ")");
+ }
+
+ auto coord_size = indices->shape()[1];
+ std::vector<int64_t> coord(coord_size);
+ for (int64_t i = 0; i < indices->shape()[0]; i++) {
+ for (int64_t j = 0; j < coord_size; j++) {
+ coord[j] = static_cast<int64_t>(indices_data[i * coord_size + j]);
+ }
+ ARROW_RETURN_NOT_OK(
+ ValidateValue<ValueType>(sparse_coo_values[i],
tensor.Value<ValueType>(coord)));
+ }
+
+ return Status::OK();
+ }
+};
+
+template <typename SparseCSXIndex>
+struct SparseCSXValidator : public SparseTensorValidatorBase {
+ SparseCSXValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csx_index =
+
internal::checked_pointer_cast<SparseCSXIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ auto indptr = sparse_csx_index->indptr();
+ auto indices = sparse_csx_index->indices();
+ ARROW_RETURN_NOT_OK(
+ internal::ValidateSparseCSXIndex(indptr->type(), indices->type(),
indptr->shape(),
+ indices->shape(),
sparse_csx_index->kTypeName));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices->type(),
*indptr->type(),
+ *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
Review Comment:
As discussed elsewhere, can you also remove `IndexPointerType`
parametrization here and below?
##########
cpp/src/arrow/util/sparse_tensor_util.h:
##########
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/sparse_tensor.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/visit_type_inline.h"
+
+namespace arrow::util {
+
+namespace detail {
+
+struct ValueVisitor {
+ template <typename ValueType, typename Function, typename... Args>
+ enable_if_number<ValueType, Status> Visit(const ValueType& value_type,
+ Function&& function, Args&&...
args) {
+ return function(value_type, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ Status Visit(const DataType& value_type, Args&&... args) {
+ return Status::TypeError("Invalid value type: ", value_type.name(),
+ ". Expected a number.");
+ }
+};
+
+struct IndexVisitor {
+ template <typename IndexType, typename Function, typename... Args>
+ enable_if_integer<IndexType, Status> Visit(const IndexType& index_type,
+ Function&& function,
+ const DataType& value_type,
Args&&... args) {
+ ValueVisitor visitor;
+ return VisitTypeInline(value_type, &visitor,
std::forward<Function>(function),
+ index_type, std::forward<Args>(args)...);
+ }
+
+ template <typename... Args>
+ Status Visit(const DataType& index_type, Args&&...) {
+ return Status::TypeError("Invalid index pointer type: ", index_type.name(),
+ ". Expected integer.");
+ }
+};
+
+struct IndexPointerVisitor {
+ template <typename IndexPointerType, typename Function>
+ enable_if_integer<IndexPointerType, Status> Visit(
+ const IndexPointerType& index_pointer_type, Function&& function,
+ const DataType& index_type, const DataType& value_type) {
+ IndexVisitor visitor;
+ return VisitTypeInline(index_type, &visitor,
std::forward<Function>(function),
+ value_type, index_pointer_type);
+ }
+
+ template <typename... Args>
+ Status Visit(const DataType& index_pointer_type, Args&&...) {
+ return Status::TypeError("Invalid index pointer type: ",
index_pointer_type.name(),
+ ". Expected integer.");
+ }
+};
+
+} // namespace detail
+
+template <typename Function>
+inline Status VisitCSXType(const DataType& value_type, const DataType&
index_type,
+ const DataType& indptr_type, Function&& function) {
Review Comment:
The 3-type variant should not be needed as per previous comments.
##########
cpp/src/arrow/sparse_tensor.cc:
##########
@@ -475,4 +523,292 @@ Result<std::shared_ptr<Tensor>>
SparseTensor::ToTensor(MemoryPool* pool) const {
}
}
+namespace {
+
+struct SparseTensorValidatorBase {
+ SparseTensorValidatorBase(const Tensor& tensor, const SparseTensor&
sparse_tensor)
+ : tensor(tensor), sparse_tensor(sparse_tensor) {}
+
+ template <typename ValueType>
+ Status ValidateValue(typename ValueType::c_type sparse_tensor_value,
+ typename ValueType::c_type tensor_value) {
+ if (!internal::is_not_zero<ValueType>(sparse_tensor_value)) {
+ return Status::Invalid("Sparse tensor values must be non-zero");
+ } else if (sparse_tensor_value != tensor_value) {
+ if constexpr (is_floating_type<ValueType>::value) {
+ if (!std::isnan(tensor_value) || !std::isnan(sparse_tensor_value)) {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ } else {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ }
+ return Status::OK();
+ }
+
+ const Tensor& tensor;
+ const SparseTensor& sparse_tensor;
+};
+
+struct SparseCOOValidator : public SparseTensorValidatorBase {
+ using SparseTensorValidatorBase::SparseTensorValidatorBase;
+
+ Status Validate() {
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto indices = sparse_coo_index->indices();
+ RETURN_NOT_OK(CheckSparseCOOIndexValidity(indices->type(),
indices->shape(),
+ indices->strides()));
+ // Validate Values
+ return util::VisitCOOTensorType(*sparse_tensor.type(), *indices->type(),
*this);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type) {
+ return ValidateSparseCooTensorValues(value_type, index_type);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status ValidateSparseCooTensorValues(const ValueType&, const IndexType&) {
+ using IndexCType = typename IndexType::c_type;
+ using ValueCType = typename ValueType::c_type;
+
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto sparse_coo_values_buffer = sparse_tensor.data();
+
+ const auto& indices = sparse_coo_index->indices();
+ const auto* indices_data =
sparse_coo_index->indices()->data()->data_as<IndexCType>();
+ const auto* sparse_coo_values =
sparse_coo_values_buffer->data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ } else if (indices->shape()[1] !=
static_cast<int64_t>(tensor.shape().size())) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices->shape()[1], ") and tensor shape (",
+ tensor.shape().size(), ")");
+ }
+
+ auto coord_size = indices->shape()[1];
+ std::vector<int64_t> coord(coord_size);
+ for (int64_t i = 0; i < indices->shape()[0]; i++) {
+ for (int64_t j = 0; j < coord_size; j++) {
+ coord[j] = static_cast<int64_t>(indices_data[i * coord_size + j]);
+ }
+ ARROW_RETURN_NOT_OK(
+ ValidateValue<ValueType>(sparse_coo_values[i],
tensor.Value<ValueType>(coord)));
+ }
+
+ return Status::OK();
+ }
+};
+
+template <typename SparseCSXIndex>
+struct SparseCSXValidator : public SparseTensorValidatorBase {
+ SparseCSXValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csx_index =
+
internal::checked_pointer_cast<SparseCSXIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ auto indptr = sparse_csx_index->indptr();
+ auto indices = sparse_csx_index->indices();
+ ARROW_RETURN_NOT_OK(
+ internal::ValidateSparseCSXIndex(indptr->type(), indices->type(),
indptr->shape(),
+ indices->shape(),
sparse_csx_index->kTypeName));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices->type(),
*indptr->type(),
+ *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
+ return ValidateSparseCSXTensorValues(value_type, index_type,
index_pointer_type);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status ValidateSparseCSXTensorValues(const ValueType&, const IndexType&,
+ const IndexPointerType&) {
+ using ValueCType = typename ValueType::c_type;
+ using IndexCType = typename IndexType::c_type;
+ using IndexPointerCType = typename IndexPointerType::c_type;
+ auto axis = sparse_csx_index->kCompressedAxis;
+
+ auto& indptr = sparse_csx_index->indptr();
+ auto& indices = sparse_csx_index->indices();
+ auto indptr_data = indptr->data()->template data_as<IndexPointerCType>();
+ auto indices_data = indices->data()->template data_as<IndexCType>();
+ auto sparse_csx_values = sparse_tensor.data()->template
data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ }
+
+ for (int64_t i = 0; i < indptr->size() - 1; ++i) {
+ const auto start = static_cast<int64_t>(indptr_data[i]);
+ const auto stop = static_cast<int64_t>(indptr_data[i + 1]);
+ std::vector<int64_t> coord(2);
+ for (int64_t j = start; j < stop; ++j) {
+ switch (axis) {
+ case internal::SparseMatrixCompressedAxis::ROW:
+ coord[0] = i;
+ coord[1] = static_cast<int64_t>(indices_data[j]);
+ break;
+ case internal::SparseMatrixCompressedAxis::COLUMN:
+ coord[0] = static_cast<int64_t>(indices_data[j]);
+ coord[1] = i;
+ break;
+ }
+ ARROW_RETURN_NOT_OK(ValidateValue<ValueType>(sparse_csx_values[j],
+
tensor.Value<ValueType>(coord)));
+ }
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCSXIndex> sparse_csx_index;
+};
+
+struct SparseCSFValidator : public SparseTensorValidatorBase {
+ SparseCSFValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csf_index =
+
internal::checked_pointer_cast<SparseCSFIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ const auto& indptr = sparse_csf_index->indptr();
+ const auto& indices = sparse_csf_index->indices();
+ const auto& axis_order = sparse_csf_index->axis_order();
+
+ RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr, indices, axis_order));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices.front()->type(),
+ *indptr.front()->type(), *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
+ return ValidateSparseTensorCSFValues(value_type, index_type,
index_pointer_type);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status ValidateSparseTensorCSFValues(const ValueType&, const IndexType&,
+ const IndexPointerType&) {
+ const auto& indices = sparse_csf_index->indices();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+ if (indices.back()->size() != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices.back()->size(), ") and dense tensor (",
+ non_zero_count, ")");
+ } else if (indices.size() != tensor.shape().size()) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices.size(), ") and tensor shape (",
+ tensor.shape().size(), ")");
+ } else {
+ return CheckValues<ValueType, IndexType, IndexPointerType>(
+ 0, 0, 0, sparse_csf_index->indptr()[0]->size() - 1);
+ }
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status CheckValues(const int64_t dim, const int64_t dim_offset, const
int64_t start,
+ const int64_t stop) {
+ using ValueCType = typename ValueType::c_type;
+ using IndexCType = typename IndexType::c_type;
+ using IndexPointerCType = typename IndexPointerType::c_type;
+
+ const auto& indices = sparse_csf_index->indices();
+ const auto& indptr = sparse_csf_index->indptr();
+ const auto& axis_order = sparse_csf_index->axis_order();
+ const auto* values = sparse_tensor.data()->data_as<ValueCType>();
+ auto ndim = indices.size();
+ auto strides = tensor.strides();
+
+ const auto& cur_indices = indices[dim];
+ const auto* indices_data = cur_indices->data()->data_as<IndexCType>() +
start;
+
+ if (dim == static_cast<int64_t>(ndim) - 1) {
+ for (auto i = start; i < stop; ++i) {
+ auto index = static_cast<int64_t>(*indices_data);
+ const int64_t offset = dim_offset + index * strides[axis_order[dim]];
+
+ auto sparse_value = values[i];
+ auto tensor_value =
+ *reinterpret_cast<const ValueCType*>(tensor.raw_data() + offset);
+ ARROW_RETURN_NOT_OK(ValidateValue<ValueType>(sparse_value,
tensor_value));
+ ++indices_data;
+ }
+ } else {
+ const auto& cur_indptr = indptr[dim];
+ const auto* indptr_data =
cur_indptr->data()->data_as<IndexPointerCType>() + start;
+
+ for (int64_t i = start; i < stop; ++i) {
+ const int64_t index = *indices_data;
+ int64_t offset = dim_offset + index * strides[axis_order[dim]];
+ auto next_start = static_cast<int64_t>(*indptr_data);
+ auto next_stop = static_cast<int64_t>(*(indptr_data + 1));
+
+ ARROW_RETURN_NOT_OK((CheckValues<ValueType, IndexType,
IndexPointerType>(
+ dim + 1, offset, next_start, next_stop)));
+
+ ++indices_data;
+ ++indptr_data;
+ }
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCSFIndex> sparse_csf_index;
+};
+
+} // namespace
+
+Status SparseTensor::Validate(const Tensor& tensor) const {
+ if (!is_tensor_supported(type_->id())) {
+ return Status::NotImplemented("SparseTensor values only support numeric
types");
+ } else if (!tensor.type()->Equals(type_)) {
+ return Status::Invalid("SparseTensor value types do not match");
Review Comment:
```suggestion
return Status::Invalid("SparseTensor value type does not match Tensor
value type");
```
##########
cpp/src/arrow/sparse_tensor.cc:
##########
@@ -475,4 +523,292 @@ Result<std::shared_ptr<Tensor>>
SparseTensor::ToTensor(MemoryPool* pool) const {
}
}
+namespace {
+
+struct SparseTensorValidatorBase {
+ SparseTensorValidatorBase(const Tensor& tensor, const SparseTensor&
sparse_tensor)
+ : tensor(tensor), sparse_tensor(sparse_tensor) {}
+
+ template <typename ValueType>
+ Status ValidateValue(typename ValueType::c_type sparse_tensor_value,
+ typename ValueType::c_type tensor_value) {
+ if (!internal::is_not_zero<ValueType>(sparse_tensor_value)) {
+ return Status::Invalid("Sparse tensor values must be non-zero");
+ } else if (sparse_tensor_value != tensor_value) {
+ if constexpr (is_floating_type<ValueType>::value) {
+ if (!std::isnan(tensor_value) || !std::isnan(sparse_tensor_value)) {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ } else {
+ return Status::Invalid(
+ "Inconsistent values between sparse tensor and dense tensor");
+ }
+ }
+ return Status::OK();
+ }
+
+ const Tensor& tensor;
+ const SparseTensor& sparse_tensor;
+};
+
+struct SparseCOOValidator : public SparseTensorValidatorBase {
+ using SparseTensorValidatorBase::SparseTensorValidatorBase;
+
+ Status Validate() {
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto indices = sparse_coo_index->indices();
+ RETURN_NOT_OK(CheckSparseCOOIndexValidity(indices->type(),
indices->shape(),
+ indices->strides()));
+ // Validate Values
+ return util::VisitCOOTensorType(*sparse_tensor.type(), *indices->type(),
*this);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type) {
+ return ValidateSparseCooTensorValues(value_type, index_type);
+ }
+
+ template <typename ValueType, typename IndexType>
+ Status ValidateSparseCooTensorValues(const ValueType&, const IndexType&) {
+ using IndexCType = typename IndexType::c_type;
+ using ValueCType = typename ValueType::c_type;
+
+ auto sparse_coo_index =
+
internal::checked_pointer_cast<SparseCOOIndex>(sparse_tensor.sparse_index());
+ auto sparse_coo_values_buffer = sparse_tensor.data();
+
+ const auto& indices = sparse_coo_index->indices();
+ const auto* indices_data =
sparse_coo_index->indices()->data()->data_as<IndexCType>();
+ const auto* sparse_coo_values =
sparse_coo_values_buffer->data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ } else if (indices->shape()[1] !=
static_cast<int64_t>(tensor.shape().size())) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices->shape()[1], ") and tensor shape (",
+ tensor.shape().size(), ")");
+ }
+
+ auto coord_size = indices->shape()[1];
+ std::vector<int64_t> coord(coord_size);
+ for (int64_t i = 0; i < indices->shape()[0]; i++) {
+ for (int64_t j = 0; j < coord_size; j++) {
+ coord[j] = static_cast<int64_t>(indices_data[i * coord_size + j]);
+ }
+ ARROW_RETURN_NOT_OK(
+ ValidateValue<ValueType>(sparse_coo_values[i],
tensor.Value<ValueType>(coord)));
+ }
+
+ return Status::OK();
+ }
+};
+
+template <typename SparseCSXIndex>
+struct SparseCSXValidator : public SparseTensorValidatorBase {
+ SparseCSXValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csx_index =
+
internal::checked_pointer_cast<SparseCSXIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ auto indptr = sparse_csx_index->indptr();
+ auto indices = sparse_csx_index->indices();
+ ARROW_RETURN_NOT_OK(
+ internal::ValidateSparseCSXIndex(indptr->type(), indices->type(),
indptr->shape(),
+ indices->shape(),
sparse_csx_index->kTypeName));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices->type(),
*indptr->type(),
+ *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
+ return ValidateSparseCSXTensorValues(value_type, index_type,
index_pointer_type);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status ValidateSparseCSXTensorValues(const ValueType&, const IndexType&,
+ const IndexPointerType&) {
+ using ValueCType = typename ValueType::c_type;
+ using IndexCType = typename IndexType::c_type;
+ using IndexPointerCType = typename IndexPointerType::c_type;
+ auto axis = sparse_csx_index->kCompressedAxis;
+
+ auto& indptr = sparse_csx_index->indptr();
+ auto& indices = sparse_csx_index->indices();
+ auto indptr_data = indptr->data()->template data_as<IndexPointerCType>();
+ auto indices_data = indices->data()->template data_as<IndexCType>();
+ auto sparse_csx_values = sparse_tensor.data()->template
data_as<ValueCType>();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+ if (indices->shape()[0] != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices->shape()[0], ") and dense tensor (",
non_zero_count,
+ ")");
+ }
+
+ for (int64_t i = 0; i < indptr->size() - 1; ++i) {
+ const auto start = static_cast<int64_t>(indptr_data[i]);
+ const auto stop = static_cast<int64_t>(indptr_data[i + 1]);
+ std::vector<int64_t> coord(2);
+ for (int64_t j = start; j < stop; ++j) {
+ switch (axis) {
+ case internal::SparseMatrixCompressedAxis::ROW:
+ coord[0] = i;
+ coord[1] = static_cast<int64_t>(indices_data[j]);
+ break;
+ case internal::SparseMatrixCompressedAxis::COLUMN:
+ coord[0] = static_cast<int64_t>(indices_data[j]);
+ coord[1] = i;
+ break;
+ }
+ ARROW_RETURN_NOT_OK(ValidateValue<ValueType>(sparse_csx_values[j],
+
tensor.Value<ValueType>(coord)));
+ }
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCSXIndex> sparse_csx_index;
+};
+
+struct SparseCSFValidator : public SparseTensorValidatorBase {
+ SparseCSFValidator(const Tensor& tensor, const SparseTensor& sparse_tensor)
+ : SparseTensorValidatorBase(tensor, sparse_tensor) {
+ sparse_csf_index =
+
internal::checked_pointer_cast<SparseCSFIndex>(sparse_tensor.sparse_index());
+ }
+
+ Status Validate() {
+ const auto& indptr = sparse_csf_index->indptr();
+ const auto& indices = sparse_csf_index->indices();
+ const auto& axis_order = sparse_csf_index->axis_order();
+
+ RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr, indices, axis_order));
+ return util::VisitCSXType(*sparse_tensor.type(), *indices.front()->type(),
+ *indptr.front()->type(), *this);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status operator()(const ValueType& value_type, const IndexType& index_type,
+ const IndexPointerType& index_pointer_type) {
+ return ValidateSparseTensorCSFValues(value_type, index_type,
index_pointer_type);
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status ValidateSparseTensorCSFValues(const ValueType&, const IndexType&,
+ const IndexPointerType&) {
+ const auto& indices = sparse_csf_index->indices();
+
+ ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
+ if (indices.back()->size() != non_zero_count) {
+ return Status::Invalid("Mismatch between non-zero count in sparse tensor
(",
+ indices.back()->size(), ") and dense tensor (",
+ non_zero_count, ")");
+ } else if (indices.size() != tensor.shape().size()) {
+ return Status::Invalid("Mismatch between coordinate dimension in sparse
tensor (",
+ indices.size(), ") and tensor shape (",
+ tensor.shape().size(), ")");
+ } else {
+ return CheckValues<ValueType, IndexType, IndexPointerType>(
+ 0, 0, 0, sparse_csf_index->indptr()[0]->size() - 1);
+ }
+ }
+
+ template <typename ValueType, typename IndexType, typename IndexPointerType>
+ Status CheckValues(const int64_t dim, const int64_t dim_offset, const
int64_t start,
+ const int64_t stop) {
+ using ValueCType = typename ValueType::c_type;
+ using IndexCType = typename IndexType::c_type;
+ using IndexPointerCType = typename IndexPointerType::c_type;
+
+ const auto& indices = sparse_csf_index->indices();
+ const auto& indptr = sparse_csf_index->indptr();
+ const auto& axis_order = sparse_csf_index->axis_order();
+ const auto* values = sparse_tensor.data()->data_as<ValueCType>();
+ auto ndim = indices.size();
+ auto strides = tensor.strides();
+
+ const auto& cur_indices = indices[dim];
+ const auto* indices_data = cur_indices->data()->data_as<IndexCType>() +
start;
+
+ if (dim == static_cast<int64_t>(ndim) - 1) {
+ for (auto i = start; i < stop; ++i) {
+ auto index = static_cast<int64_t>(*indices_data);
+ const int64_t offset = dim_offset + index * strides[axis_order[dim]];
+
+ auto sparse_value = values[i];
+ auto tensor_value =
+ *reinterpret_cast<const ValueCType*>(tensor.raw_data() + offset);
+ ARROW_RETURN_NOT_OK(ValidateValue<ValueType>(sparse_value,
tensor_value));
+ ++indices_data;
+ }
+ } else {
+ const auto& cur_indptr = indptr[dim];
+ const auto* indptr_data =
cur_indptr->data()->data_as<IndexPointerCType>() + start;
+
+ for (int64_t i = start; i < stop; ++i) {
+ const int64_t index = *indices_data;
+ int64_t offset = dim_offset + index * strides[axis_order[dim]];
+ auto next_start = static_cast<int64_t>(*indptr_data);
+ auto next_stop = static_cast<int64_t>(*(indptr_data + 1));
+
+ ARROW_RETURN_NOT_OK((CheckValues<ValueType, IndexType,
IndexPointerType>(
+ dim + 1, offset, next_start, next_stop)));
+
+ ++indices_data;
+ ++indptr_data;
+ }
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<SparseCSFIndex> sparse_csf_index;
+};
+
+} // namespace
+
+Status SparseTensor::Validate(const Tensor& tensor) const {
+ if (!is_tensor_supported(type_->id())) {
+ return Status::NotImplemented("SparseTensor values only support numeric
types");
+ } else if (!tensor.type()->Equals(type_)) {
+ return Status::Invalid("SparseTensor value types do not match");
+ } else if (tensor.shape() != shape_) {
+ return Status::Invalid("SparseTensor shape do not match");
Review Comment:
```suggestion
return Status::Invalid("SparseTensor shape does not match Tensor");
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]