This is an automated email from the ASF dual-hosted git repository.
rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e37c5163f5 GH-49104: [C++] Fix Segfault in SparseCSFIndex::Equals with
mismatched dimensions (#49105)
e37c5163f5 is described below
commit e37c5163f53ffbe2faf0b37a0ff18f1f58cb3676
Author: Ali Mahmood Rana <[email protected]>
AuthorDate: Thu Feb 5 00:00:56 2026 +0500
GH-49104: [C++] Fix Segfault in SparseCSFIndex::Equals with mismatched
dimensions (#49105)
### Rationale for This Change
The `SparseCSFIndex::Equals` method can crash when comparing two sparse
indices that have a different number of dimensions. The method iterates over
the `indices()` and `indptr()` vectors of the current object and accesses the
corresponding elements in the `other` object without first verifying that both
objects have matching vector sizes. This can lead to out-of-bounds access and a
segmentation fault when the dimension counts differ.
### What Changes Are Included in This PR?
This change adds explicit size equality checks for the `indices()` and
`indptr()` vectors at the beginning of the `SparseCSFIndex::Equals` method. If
the dimensions do not match, the method now safely returns `false` instead of
attempting invalid memory access.
### Are These Changes Tested?
Yes. The fix has been validated through targeted reproduction of the crash
scenario using mismatched dimension counts, ensuring the method behaves safely
and deterministically.
### Are There Any User-Facing Changes?
No. This change improves internal safety and robustness without altering
public APIs or observable user behavior.
* GitHub Issue: #49104
Lead-authored-by: Alirana2829 <[email protected]>
Co-authored-by: Ali Mahmood Rana
<[email protected]>
Co-authored-by: Rok Mihevc <[email protected]>
Signed-off-by: Rok Mihevc <[email protected]>
---
cpp/src/arrow/sparse_tensor.cc | 11 ++++-------
cpp/src/arrow/sparse_tensor_test.cc | 24 +++++++++++++++++++++++-
2 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index b84070b3d2..477fa2f765 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -405,13 +405,10 @@ SparseCSFIndex::SparseCSFIndex(const
std::vector<std::shared_ptr<Tensor>>& indpt
std::string SparseCSFIndex::ToString() const { return
std::string("SparseCSFIndex"); }
bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const {
- for (int64_t i = 0; i < static_cast<int64_t>(indices().size()); ++i) {
- if (!indices()[i]->Equals(*other.indices()[i])) return false;
- }
- for (int64_t i = 0; i < static_cast<int64_t>(indptr().size()); ++i) {
- if (!indptr()[i]->Equals(*other.indptr()[i])) return false;
- }
- return axis_order() == other.axis_order();
+ auto eq = [](const auto& a, const auto& b) { return a->Equals(*b); };
+ return axis_order() == other.axis_order() &&
+ std::ranges::equal(indices(), other.indices(), eq) &&
+ std::ranges::equal(indptr(), other.indptr(), eq);
}
// ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/sparse_tensor_test.cc
b/cpp/src/arrow/sparse_tensor_test.cc
index c9c28a11b1..434f4a1723 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -1641,10 +1641,32 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType,
TestNonAscendingShape) {
ASSERT_TRUE(st->Equals(*sparse_tensor));
}
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType,
TestEqualityMismatchedDimensions) {
+ using IndexValueType = TypeParam;
+ using c_index_value_type = typename IndexValueType::c_type;
+
+ // 2D vs 3D - comparing indices with different dimensionality
+ // 2D CSF: ndim=2, so indptr.size()=1, indices.size()=2
+ std::vector<int64_t> axis_order_2D = {0, 1};
+ std::vector<std::vector<c_index_value_type>> indptr_2D = {{0, 1}};
+ std::vector<std::vector<c_index_value_type>> indices_2D = {{0}, {0}};
+ auto si_2D = this->MakeSparseCSFIndex(axis_order_2D, indptr_2D, indices_2D);
+
+ // 3D CSF: ndim=3, so indptr.size()=2, indices.size()=3
+ std::vector<int64_t> axis_order_3D = {0, 1, 2};
+ std::vector<std::vector<c_index_value_type>> indptr_3D = {{0, 1}, {0, 1}};
+ std::vector<std::vector<c_index_value_type>> indices_3D = {{0}, {0}, {0}};
+ auto si_3D = this->MakeSparseCSFIndex(axis_order_3D, indptr_3D, indices_3D);
+
+ ASSERT_FALSE(si_2D->Equals(*si_3D));
+ ASSERT_FALSE(si_3D->Equals(*si_2D));
+ ASSERT_TRUE(si_2D->Equals(*si_2D));
+}
+
REGISTER_TYPED_TEST_SUITE_P(TestSparseCSFTensorForIndexValueType,
TestCreateSparseTensor,
TestTensorToSparseTensor, TestSparseTensorToTensor,
TestAlternativeAxisOrder, TestNonAscendingShape,
- TestRoundTrip);
+ TestRoundTrip, TestEqualityMismatchedDimensions);
INSTANTIATE_TYPED_TEST_SUITE_P(TestInt8, TestSparseCSFTensorForIndexValueType,
Int8Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestSparseCSFTensorForIndexValueType,