This is an automated email from the ASF dual-hosted git repository.

rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e37c5163f5 GH-49104: [C++] Fix Segfault in SparseCSFIndex::Equals with 
mismatched dimensions (#49105)
e37c5163f5 is described below

commit e37c5163f53ffbe2faf0b37a0ff18f1f58cb3676
Author: Ali Mahmood Rana <[email protected]>
AuthorDate: Thu Feb 5 00:00:56 2026 +0500

    GH-49104: [C++] Fix Segfault in SparseCSFIndex::Equals with mismatched 
dimensions (#49105)
    
    ### Rationale for This Change
    
    The `SparseCSFIndex::Equals` method can crash when comparing two sparse 
indices that have a different number of dimensions. The method iterates over 
the `indices()` and `indptr()` vectors of the current object and accesses the 
corresponding elements in the `other` object without first verifying that both 
objects have matching vector sizes. This can lead to out-of-bounds access and a 
segmentation fault when the dimension counts differ.
    
    ### What Changes Are Included in This PR?
    
    This change adds explicit size equality checks for the `indices()` and 
`indptr()` vectors at the beginning of the `SparseCSFIndex::Equals` method. If 
the dimensions do not match, the method now safely returns `false` instead of 
attempting invalid memory access.
    
    ### Are These Changes Tested?
    
    Yes. The fix has been validated through targeted reproduction of the crash 
scenario using mismatched dimension counts, ensuring the method behaves safely 
and deterministically.
    
    ### Are There Any User-Facing Changes?
    
    No. This change improves internal safety and robustness without altering 
public APIs or observable user behavior.
    
    * GitHub Issue: #49104
    
    Lead-authored-by: Alirana2829 <[email protected]>
    Co-authored-by: Ali Mahmood Rana 
<[email protected]>
    Co-authored-by: Rok Mihevc <[email protected]>
    Signed-off-by: Rok Mihevc <[email protected]>
---
 cpp/src/arrow/sparse_tensor.cc      | 11 ++++-------
 cpp/src/arrow/sparse_tensor_test.cc | 24 +++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index b84070b3d2..477fa2f765 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -405,13 +405,10 @@ SparseCSFIndex::SparseCSFIndex(const 
std::vector<std::shared_ptr<Tensor>>& indpt
 std::string SparseCSFIndex::ToString() const { return 
std::string("SparseCSFIndex"); }
 
 bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const {
-  for (int64_t i = 0; i < static_cast<int64_t>(indices().size()); ++i) {
-    if (!indices()[i]->Equals(*other.indices()[i])) return false;
-  }
-  for (int64_t i = 0; i < static_cast<int64_t>(indptr().size()); ++i) {
-    if (!indptr()[i]->Equals(*other.indptr()[i])) return false;
-  }
-  return axis_order() == other.axis_order();
+  auto eq = [](const auto& a, const auto& b) { return a->Equals(*b); };
+  return axis_order() == other.axis_order() &&
+         std::ranges::equal(indices(), other.indices(), eq) &&
+         std::ranges::equal(indptr(), other.indptr(), eq);
 }
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/sparse_tensor_test.cc 
b/cpp/src/arrow/sparse_tensor_test.cc
index c9c28a11b1..434f4a1723 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -1641,10 +1641,32 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, 
TestNonAscendingShape) {
   ASSERT_TRUE(st->Equals(*sparse_tensor));
 }
 
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, 
TestEqualityMismatchedDimensions) {
+  using IndexValueType = TypeParam;
+  using c_index_value_type = typename IndexValueType::c_type;
+
+  // 2D vs 3D - comparing indices with different dimensionality
+  // 2D CSF: ndim=2, so indptr.size()=1, indices.size()=2
+  std::vector<int64_t> axis_order_2D = {0, 1};
+  std::vector<std::vector<c_index_value_type>> indptr_2D = {{0, 1}};
+  std::vector<std::vector<c_index_value_type>> indices_2D = {{0}, {0}};
+  auto si_2D = this->MakeSparseCSFIndex(axis_order_2D, indptr_2D, indices_2D);
+
+  // 3D CSF: ndim=3, so indptr.size()=2, indices.size()=3
+  std::vector<int64_t> axis_order_3D = {0, 1, 2};
+  std::vector<std::vector<c_index_value_type>> indptr_3D = {{0, 1}, {0, 1}};
+  std::vector<std::vector<c_index_value_type>> indices_3D = {{0}, {0}, {0}};
+  auto si_3D = this->MakeSparseCSFIndex(axis_order_3D, indptr_3D, indices_3D);
+
+  ASSERT_FALSE(si_2D->Equals(*si_3D));
+  ASSERT_FALSE(si_3D->Equals(*si_2D));
+  ASSERT_TRUE(si_2D->Equals(*si_2D));
+}
+
 REGISTER_TYPED_TEST_SUITE_P(TestSparseCSFTensorForIndexValueType, 
TestCreateSparseTensor,
                             TestTensorToSparseTensor, TestSparseTensorToTensor,
                             TestAlternativeAxisOrder, TestNonAscendingShape,
-                            TestRoundTrip);
+                            TestRoundTrip, TestEqualityMismatchedDimensions);
 
 INSTANTIATE_TYPED_TEST_SUITE_P(TestInt8, TestSparseCSFTensorForIndexValueType, 
Int8Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestSparseCSFTensorForIndexValueType,

Reply via email to