This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 45b176716c GH-43414: [C++][Compute] Fix invalid memory access when 
resizing var-length buffer in row table (#43415)
45b176716c is described below

commit 45b176716cc667384577a2a1218c6da454854109
Author: Rossi Sun <[email protected]>
AuthorDate: Sat Aug 3 09:23:06 2024 +0800

    GH-43414: [C++][Compute] Fix invalid memory access when resizing var-length 
buffer in row table (#43415)
    
    
    
    ### Rationale for this change
    
    As #43414 explained. The UT in PR reproduces this issue well (may need ASAN 
to detect).
    
    ### What changes are included in this PR?
    
    Check if `is_fixed_length` before treating the second buffer as offset.
    
    ### Are these changes tested?
    
    UT included.
    
    ### Are there any user-facing changes?
    
    None.
    
    * GitHub Issue: #43414
    
    Lead-authored-by: Ruoxi Sun <[email protected]>
    Co-authored-by: Felipe Oliveira Carvalho <[email protected]>
    Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
 cpp/src/arrow/compute/row/row_internal.cc |  8 ++++--
 cpp/src/arrow/compute/row/row_internal.h  |  7 +++++
 cpp/src/arrow/compute/row/row_test.cc     | 47 ++++++++++++++++++-------------
 3 files changed, 41 insertions(+), 21 deletions(-)

diff --git a/cpp/src/arrow/compute/row/row_internal.cc 
b/cpp/src/arrow/compute/row/row_internal.cc
index 2365ef5632..746ed950ff 100644
--- a/cpp/src/arrow/compute/row/row_internal.cc
+++ b/cpp/src/arrow/compute/row/row_internal.cc
@@ -293,8 +293,10 @@ Status RowTableImpl::ResizeFixedLengthBuffers(int64_t 
num_extra_rows) {
 }
 
 Status RowTableImpl::ResizeOptionalVaryingLengthBuffer(int64_t 
num_extra_bytes) {
+  DCHECK(!metadata_.is_fixed_length);
+
   int64_t num_bytes = offsets()[num_rows_];
-  if (bytes_capacity_ >= num_bytes + num_extra_bytes || 
metadata_.is_fixed_length) {
+  if (bytes_capacity_ >= num_bytes + num_extra_bytes) {
     return Status::OK();
   }
 
@@ -397,7 +399,9 @@ Status RowTableImpl::AppendSelectionFrom(const 
RowTableImpl& from,
 Status RowTableImpl::AppendEmpty(uint32_t num_rows_to_append,
                                  uint32_t num_extra_bytes_to_append) {
   RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
-  RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
+  if (!metadata_.is_fixed_length) {
+    
RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
+  }
   num_rows_ += num_rows_to_append;
   if (metadata_.row_alignment > 1 || metadata_.string_alignment > 1) {
     memset(rows_->mutable_data(), 0, bytes_capacity_);
diff --git a/cpp/src/arrow/compute/row/row_internal.h 
b/cpp/src/arrow/compute/row/row_internal.h
index 80409f93d2..93818fb14d 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -220,7 +220,14 @@ class ARROW_EXPORT RowTableImpl {
   }
 
  private:
+  /// \brief Resize the fixed length buffers to store `num_extra_rows` more 
rows. The
+  /// fixed length buffers are buffers_[0] for null masks, buffers_[1] for row 
data if the
+  /// row is fixed length, or for row offsets otherwise.
   Status ResizeFixedLengthBuffers(int64_t num_extra_rows);
+
+  /// \brief Resize the optional varying length buffer to store 
`num_extra_bytes` more
+  /// bytes.
+  /// \pre !metadata_.is_fixed_length
   Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);
 
   // Helper functions to determine the number of bytes needed for each
diff --git a/cpp/src/arrow/compute/row/row_test.cc 
b/cpp/src/arrow/compute/row/row_test.cc
index 679ad519a9..75f981fb12 100644
--- a/cpp/src/arrow/compute/row/row_test.cc
+++ b/cpp/src/arrow/compute/row/row_test.cc
@@ -69,9 +69,14 @@ TEST(RowTableMemoryConsumption, Encode) {
   constexpr int64_t num_rows_max = 8192;
   constexpr int64_t padding_for_vectors = 64;
 
-  ASSERT_OK_AND_ASSIGN(
-      auto fixed_length_column,
-      
::arrow::gen::Constant(std::make_shared<UInt32Scalar>(0))->Generate(num_rows_max));
+  std::vector<std::shared_ptr<Array>> fixed_length_columns;
+  for (const auto& dt : {int8(), uint16(), int32(), uint64(), 
fixed_size_binary(16),
+                         fixed_size_binary(32)}) {
+    ASSERT_OK_AND_ASSIGN(auto fixed_length_column,
+                         ::arrow::gen::Random(dt)->Generate(num_rows_max));
+    fixed_length_columns.push_back(std::move(fixed_length_column));
+  }
+
   ASSERT_OK_AND_ASSIGN(auto var_length_column,
                        
::arrow::gen::Constant(std::make_shared<BinaryScalar>("X"))
                            ->Generate(num_rows_max));
@@ -81,22 +86,26 @@ TEST(RowTableMemoryConsumption, Encode) {
     {
       SCOPED_TRACE("encoding fixed length column of " + 
std::to_string(num_rows) +
                    " rows");
-      ASSERT_OK_AND_ASSIGN(auto row_table,
-                           MakeRowTableFromColumn(fixed_length_column, 
num_rows,
-                                                  uint32()->byte_width(), 0));
-      ASSERT_NE(row_table.data(0), NULLPTR);
-      ASSERT_NE(row_table.data(1), NULLPTR);
-      ASSERT_EQ(row_table.data(2), NULLPTR);
-
-      int64_t actual_null_mask_size =
-          num_rows * row_table.metadata().null_masks_bytes_per_row;
-      ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - 
padding_for_vectors);
-      ASSERT_GT(actual_null_mask_size * 2,
-                row_table.buffer_size(0) - padding_for_vectors);
-
-      int64_t actual_rows_size = num_rows * uint32()->byte_width();
-      ASSERT_LE(actual_rows_size, row_table.buffer_size(1) - 
padding_for_vectors);
-      ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) - 
padding_for_vectors);
+      for (const auto& col : fixed_length_columns) {
+        const auto& dt = col->type();
+        SCOPED_TRACE("encoding fixed length column of type " + dt->ToString());
+        ASSERT_OK_AND_ASSIGN(auto row_table,
+                             MakeRowTableFromColumn(col, num_rows, 
dt->byte_width(),
+                                                    /*string_alignment=*/0));
+        ASSERT_NE(row_table.data(0), NULLPTR);
+        ASSERT_NE(row_table.data(1), NULLPTR);
+        ASSERT_EQ(row_table.data(2), NULLPTR);
+
+        int64_t actual_null_mask_size =
+            num_rows * row_table.metadata().null_masks_bytes_per_row;
+        ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - 
padding_for_vectors);
+        ASSERT_GT(actual_null_mask_size * 2,
+                  row_table.buffer_size(0) - padding_for_vectors);
+
+        int64_t actual_rows_size = num_rows * dt->byte_width();
+        ASSERT_LE(actual_rows_size, row_table.buffer_size(1) - 
padding_for_vectors);
+        ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) - 
padding_for_vectors);
+      }
     }
 
     // Var length column.

Reply via email to