This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 45b176716c GH-43414: [C++][Compute] Fix invalid memory access when
resizing var-length buffer in row table (#43415)
45b176716c is described below
commit 45b176716cc667384577a2a1218c6da454854109
Author: Rossi Sun <[email protected]>
AuthorDate: Sat Aug 3 09:23:06 2024 +0800
GH-43414: [C++][Compute] Fix invalid memory access when resizing var-length
buffer in row table (#43415)
### Rationale for this change
As #43414 explained. The UT in PR reproduces this issue well (may need ASAN
to detect).
### What changes are included in this PR?
Check if `is_fixed_length` before treating the second buffer as offset.
### Are these changes tested?
UT included.
### Are there any user-facing changes?
None.
* GitHub Issue: #43414
Lead-authored-by: Ruoxi Sun <[email protected]>
Co-authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
cpp/src/arrow/compute/row/row_internal.cc | 8 ++++--
cpp/src/arrow/compute/row/row_internal.h | 7 +++++
cpp/src/arrow/compute/row/row_test.cc | 47 ++++++++++++++++++-------------
3 files changed, 41 insertions(+), 21 deletions(-)
diff --git a/cpp/src/arrow/compute/row/row_internal.cc
b/cpp/src/arrow/compute/row/row_internal.cc
index 2365ef5632..746ed950ff 100644
--- a/cpp/src/arrow/compute/row/row_internal.cc
+++ b/cpp/src/arrow/compute/row/row_internal.cc
@@ -293,8 +293,10 @@ Status RowTableImpl::ResizeFixedLengthBuffers(int64_t
num_extra_rows) {
}
Status RowTableImpl::ResizeOptionalVaryingLengthBuffer(int64_t
num_extra_bytes) {
+ DCHECK(!metadata_.is_fixed_length);
+
int64_t num_bytes = offsets()[num_rows_];
- if (bytes_capacity_ >= num_bytes + num_extra_bytes ||
metadata_.is_fixed_length) {
+ if (bytes_capacity_ >= num_bytes + num_extra_bytes) {
return Status::OK();
}
@@ -397,7 +399,9 @@ Status RowTableImpl::AppendSelectionFrom(const
RowTableImpl& from,
Status RowTableImpl::AppendEmpty(uint32_t num_rows_to_append,
uint32_t num_extra_bytes_to_append) {
RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
- RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
+ if (!metadata_.is_fixed_length) {
+
RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
+ }
num_rows_ += num_rows_to_append;
if (metadata_.row_alignment > 1 || metadata_.string_alignment > 1) {
memset(rows_->mutable_data(), 0, bytes_capacity_);
diff --git a/cpp/src/arrow/compute/row/row_internal.h
b/cpp/src/arrow/compute/row/row_internal.h
index 80409f93d2..93818fb14d 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -220,7 +220,14 @@ class ARROW_EXPORT RowTableImpl {
}
private:
+ /// \brief Resize the fixed length buffers to store `num_extra_rows` more
rows. The
+ /// fixed length buffers are buffers_[0] for null masks, buffers_[1] for row
data if the
+ /// row is fixed length, or for row offsets otherwise.
Status ResizeFixedLengthBuffers(int64_t num_extra_rows);
+
+ /// \brief Resize the optional varying length buffer to store
`num_extra_bytes` more
+ /// bytes.
+ /// \pre !metadata_.is_fixed_length
Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);
// Helper functions to determine the number of bytes needed for each
diff --git a/cpp/src/arrow/compute/row/row_test.cc
b/cpp/src/arrow/compute/row/row_test.cc
index 679ad519a9..75f981fb12 100644
--- a/cpp/src/arrow/compute/row/row_test.cc
+++ b/cpp/src/arrow/compute/row/row_test.cc
@@ -69,9 +69,14 @@ TEST(RowTableMemoryConsumption, Encode) {
constexpr int64_t num_rows_max = 8192;
constexpr int64_t padding_for_vectors = 64;
- ASSERT_OK_AND_ASSIGN(
- auto fixed_length_column,
-
::arrow::gen::Constant(std::make_shared<UInt32Scalar>(0))->Generate(num_rows_max));
+ std::vector<std::shared_ptr<Array>> fixed_length_columns;
+ for (const auto& dt : {int8(), uint16(), int32(), uint64(),
fixed_size_binary(16),
+ fixed_size_binary(32)}) {
+ ASSERT_OK_AND_ASSIGN(auto fixed_length_column,
+ ::arrow::gen::Random(dt)->Generate(num_rows_max));
+ fixed_length_columns.push_back(std::move(fixed_length_column));
+ }
+
ASSERT_OK_AND_ASSIGN(auto var_length_column,
::arrow::gen::Constant(std::make_shared<BinaryScalar>("X"))
->Generate(num_rows_max));
@@ -81,22 +86,26 @@ TEST(RowTableMemoryConsumption, Encode) {
{
SCOPED_TRACE("encoding fixed length column of " +
std::to_string(num_rows) +
" rows");
- ASSERT_OK_AND_ASSIGN(auto row_table,
- MakeRowTableFromColumn(fixed_length_column,
num_rows,
- uint32()->byte_width(), 0));
- ASSERT_NE(row_table.data(0), NULLPTR);
- ASSERT_NE(row_table.data(1), NULLPTR);
- ASSERT_EQ(row_table.data(2), NULLPTR);
-
- int64_t actual_null_mask_size =
- num_rows * row_table.metadata().null_masks_bytes_per_row;
- ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) -
padding_for_vectors);
- ASSERT_GT(actual_null_mask_size * 2,
- row_table.buffer_size(0) - padding_for_vectors);
-
- int64_t actual_rows_size = num_rows * uint32()->byte_width();
- ASSERT_LE(actual_rows_size, row_table.buffer_size(1) -
padding_for_vectors);
- ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) -
padding_for_vectors);
+ for (const auto& col : fixed_length_columns) {
+ const auto& dt = col->type();
+ SCOPED_TRACE("encoding fixed length column of type " + dt->ToString());
+ ASSERT_OK_AND_ASSIGN(auto row_table,
+ MakeRowTableFromColumn(col, num_rows,
dt->byte_width(),
+ /*string_alignment=*/0));
+ ASSERT_NE(row_table.data(0), NULLPTR);
+ ASSERT_NE(row_table.data(1), NULLPTR);
+ ASSERT_EQ(row_table.data(2), NULLPTR);
+
+ int64_t actual_null_mask_size =
+ num_rows * row_table.metadata().null_masks_bytes_per_row;
+ ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) -
padding_for_vectors);
+ ASSERT_GT(actual_null_mask_size * 2,
+ row_table.buffer_size(0) - padding_for_vectors);
+
+ int64_t actual_rows_size = num_rows * dt->byte_width();
+ ASSERT_LE(actual_rows_size, row_table.buffer_size(1) -
padding_for_vectors);
+ ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) -
padding_for_vectors);
+ }
}
// Var length column.