This is an automated email from the ASF dual-hosted git repository.

marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 7431a9c5aa [GLUTEN-9163][VL][FOLLOWUP] Fix segfault triggered by 
fixed-width inputs (#9766)
7431a9c5aa is described below

commit 7431a9c5aa8eac7049fc33ad4af66cd91bd5da2f
Author: Rong Ma <[email protected]>
AuthorDate: Thu May 29 07:00:08 2025 +0100

    [GLUTEN-9163][VL][FOLLOWUP] Fix segfault triggered by fixed-width inputs 
(#9766)
---
 cpp/velox/shuffle/VeloxSortShuffleWriter.cc |   2 +-
 cpp/velox/tests/VeloxShuffleWriterTest.cc   | 136 +++++++++++++++-------------
 2 files changed, 72 insertions(+), 66 deletions(-)

diff --git a/cpp/velox/shuffle/VeloxSortShuffleWriter.cc 
b/cpp/velox/shuffle/VeloxSortShuffleWriter.cc
index 3bf39683a7..83dc41fdb9 100644
--- a/cpp/velox/shuffle/VeloxSortShuffleWriter.cc
+++ b/cpp/velox/shuffle/VeloxSortShuffleWriter.cc
@@ -160,7 +160,7 @@ arrow::Status VeloxSortShuffleWriter::insert(const 
facebook::velox::RowVectorPtr
   facebook::velox::row::CompactRow row(vector);
 
   if (fixedRowSize_.has_value()) {
-    rowSize_.resize(inputRows, fixedRowSize_.value() + sizeof(RowSizeType));
+    rowSize_.resize(inputRows, fixedRowSize_.value());
   } else {
     rowSize_.resize(inputRows);
     rowSizePrefixSum_.resize(inputRows + 1);
diff --git a/cpp/velox/tests/VeloxShuffleWriterTest.cc 
b/cpp/velox/tests/VeloxShuffleWriterTest.cc
index 53ef82a021..1024dafcfb 100644
--- a/cpp/velox/tests/VeloxShuffleWriterTest.cc
+++ b/cpp/velox/tests/VeloxShuffleWriterTest.cc
@@ -290,7 +290,7 @@ class VeloxShuffleWriterTest : public 
::testing::TestWithParam<ShuffleTestParams
   std::shared_ptr<arrow::io::ReadableFile> file_;
 };
 
-class SinglePartitioningShuffleWriter : public VeloxShuffleWriterTest {
+class SinglePartitioningShuffleWriterTest : public VeloxShuffleWriterTest {
  protected:
   std::shared_ptr<VeloxShuffleWriter> createShuffleWriter(uint32_t) override {
     auto* arrowPool = getDefaultMemoryManager()->getArrowMemoryPool();
@@ -311,7 +311,7 @@ class SinglePartitioningShuffleWriter : public 
VeloxShuffleWriterTest {
   }
 };
 
-class HashPartitioningShuffleWriter : public VeloxShuffleWriterTest {
+class HashPartitioningShuffleWriterTest : public VeloxShuffleWriterTest {
  protected:
   void SetUp() override {
     VeloxShuffleWriterTest::SetUp();
@@ -351,7 +351,7 @@ class HashPartitioningShuffleWriter : public 
VeloxShuffleWriterTest {
   facebook::velox::RowVectorPtr hashInputVector2_;
 };
 
-class RangePartitioningShuffleWriter : public VeloxShuffleWriterTest {
+class RangePartitioningShuffleWriterTest : public VeloxShuffleWriterTest {
  protected:
   void SetUp() override {
     VeloxShuffleWriterTest::SetUp();
@@ -394,7 +394,7 @@ class RangePartitioningShuffleWriter : public 
VeloxShuffleWriterTest {
   std::shared_ptr<ColumnarBatch> compositeBatch2_;
 };
 
-class RoundRobinPartitioningShuffleWriter : public VeloxShuffleWriterTest {
+class RoundRobinPartitioningShuffleWriterTest : public VeloxShuffleWriterTest {
  protected:
   std::shared_ptr<VeloxShuffleWriter> createShuffleWriter(uint32_t 
numPartitions) override {
     auto* arrowPool = getDefaultMemoryManager()->getArrowMemoryPool();
@@ -420,7 +420,7 @@ class RoundRobinPartitioningShuffleWriter : public 
VeloxShuffleWriterTest {
   }
 };
 
-TEST_P(SinglePartitioningShuffleWriter, single) {
+TEST_P(SinglePartitioningShuffleWriterTest, single) {
   if (GetParam().shuffleWriterType != ShuffleWriterType::kHashShuffle) {
     return;
   }
@@ -476,57 +476,63 @@ TEST_P(SinglePartitioningShuffleWriter, single) {
   }
 }
 
-TEST_P(HashPartitioningShuffleWriter, hashPart1Vector) {
+TEST_P(HashPartitioningShuffleWriterTest, hashPart1Vector) {
   ASSERT_NOT_OK(initShuffleWriterOptions());
-  auto shuffleWriter = createShuffleWriter(2);
-  auto vector = makeRowVector({
-      makeFlatVector<int32_t>({1, 2, 1, 2}),
-      makeNullableFlatVector<int8_t>({1, 2, 3, std::nullopt}),
-      makeFlatVector<int64_t>({1, 2, 3, 4}),
-      makeFlatVector<StringView>({"nn", "re", "fr", "juiu"}),
-      makeFlatVector<int64_t>({232, 34567235, 1212, 4567}, DECIMAL(12, 4)),
-      makeFlatVector<int128_t>({232, 34567235, 1212, 4567}, DECIMAL(20, 4)),
-      makeFlatVector<int32_t>(
-          4, [](vector_size_t row) { return row % 2; }, nullEvery(5), DATE()),
-      makeFlatVector<Timestamp>(
-          4,
-          [](vector_size_t row) {
-            return Timestamp{row % 2, 0};
-          },
-          nullEvery(5)),
-  });
-
-  auto rowType = facebook::velox::asRowType(vector->type());
-  auto children = rowType->children();
-  auto names = rowType->names();
-  children.erase(children.begin());
-  names.erase(names.begin());
-  auto dataType = facebook::velox::ROW(std::move(names), std::move(children));
-
-  auto firstBlock = makeRowVector({
-      makeNullableFlatVector<int8_t>({2, std::nullopt}),
-      makeFlatVector<int64_t>({2, 4}),
-      makeFlatVector<StringView>({"re", "juiu"}),
-      makeFlatVector<int64_t>({34567235, 4567}, DECIMAL(12, 4)),
-      makeFlatVector<int128_t>({34567235, 4567}, DECIMAL(20, 4)),
-      makeFlatVector<int32_t>({1, 1}, DATE()),
-      makeFlatVector<Timestamp>({Timestamp(1, 0), Timestamp(1, 0)}),
-  });
-
-  auto secondBlock = makeRowVector({
-      makeNullableFlatVector<int8_t>({1, 3}),
-      makeFlatVector<int64_t>({1, 3}),
-      makeFlatVector<StringView>({"nn", "fr"}),
-      makeFlatVector<int64_t>({232, 1212}, DECIMAL(12, 4)),
-      makeFlatVector<int128_t>({232, 1212}, DECIMAL(20, 4)),
-      makeNullableFlatVector<int32_t>({std::nullopt, 0}, DATE()),
-      makeNullableFlatVector<Timestamp>({std::nullopt, Timestamp(0, 0)}),
-  });
-
-  testShuffleRoundTrip(*shuffleWriter, {vector}, 2, {{firstBlock}, 
{secondBlock}});
+
+  // Fixed-length input.
+  {
+    auto shuffleWriter = createShuffleWriter(2);
+
+    std::vector<VectorPtr> data = {
+        makeNullableFlatVector<int8_t>({1, 2, 3, std::nullopt}),
+        makeNullableFlatVector<int16_t>({1, 2, 3, 4}),
+        makeFlatVector<int64_t>({1, 2, 3, 4}),
+        makeFlatVector<int64_t>({232, 34567235, 1212, 4567}, DECIMAL(12, 4)),
+        makeFlatVector<int128_t>({232, 34567235, 1212, 4567}, DECIMAL(20, 4)),
+        makeFlatVector<int32_t>(
+            4, [](vector_size_t row) { return row % 2; }, nullEvery(5), 
DATE()),
+        makeFlatVector<Timestamp>(
+            4,
+            [](vector_size_t row) {
+              return Timestamp{row % 2, 0};
+            },
+            nullEvery(5))};
+
+    const auto vector = makeRowVector(data);
+
+    const auto blocksPid0 = takeRows({vector}, {{1, 3}});
+    const auto blocksPid1 = takeRows({vector}, {{0, 2}});
+
+    // Add partition id as the first column.
+    data.insert(data.begin(), makeFlatVector<int32_t>({1, 2, 1, 2}));
+    const auto input = makeRowVector(data);
+
+    testShuffleRoundTrip(*shuffleWriter, {input}, 2, {blocksPid0, blocksPid1});
+  }
+
+  // Variable-length input.
+  {
+    auto shuffleWriter = createShuffleWriter(2);
+
+    std::vector<VectorPtr> data = {
+        makeFlatVector<StringView>({"nn", "", "fr", "juiu"}),
+        makeNullableFlatVector<int8_t>({1, 2, 3, std::nullopt}),
+        makeNullableFlatVector<StringView>({std::nullopt, "de", "10 I'm not 
inline string", "de"})};
+
+    const auto vector = makeRowVector(data);
+
+    const auto blocksPid0 = takeRows({vector}, {{0, 1, 3}});
+    const auto blocksPid1 = takeRows({vector}, {{2}});
+
+    // Add partition id as the first column.
+    data.insert(data.begin(), makeFlatVector<int32_t>({2, 2, 1, 2}));
+    const auto input = makeRowVector(data);
+
+    testShuffleRoundTrip(*shuffleWriter, {input}, 2, {blocksPid0, blocksPid1});
+  }
 }
 
-TEST_P(HashPartitioningShuffleWriter, hashPart1VectorComplexType) {
+TEST_P(HashPartitioningShuffleWriterTest, hashPart1VectorComplexType) {
   ASSERT_NOT_OK(initShuffleWriterOptions());
   auto shuffleWriter = createShuffleWriter(2);
   auto children = childrenComplex_;
@@ -538,7 +544,7 @@ TEST_P(HashPartitioningShuffleWriter, 
hashPart1VectorComplexType) {
   testShuffleRoundTrip(*shuffleWriter, {vector}, 2, {firstBlock, secondBlock});
 }
 
-TEST_P(HashPartitioningShuffleWriter, hashPart3Vectors) {
+TEST_P(HashPartitioningShuffleWriterTest, hashPart3Vectors) {
   ASSERT_NOT_OK(initShuffleWriterOptions());
   auto shuffleWriter = createShuffleWriter(2);
 
@@ -549,7 +555,7 @@ TEST_P(HashPartitioningShuffleWriter, hashPart3Vectors) {
       *shuffleWriter, {hashInputVector1_, hashInputVector2_, 
hashInputVector1_}, 2, {blockPid2, blockPid1});
 }
 
-TEST_P(HashPartitioningShuffleWriter, hashLargeVectors) {
+TEST_P(HashPartitioningShuffleWriterTest, hashLargeVectors) {
   const int32_t expectedMaxBatchSize = 8;
   ASSERT_NOT_OK(initShuffleWriterOptions());
   auto shuffleWriter = createShuffleWriter(2);
@@ -567,7 +573,7 @@ TEST_P(HashPartitioningShuffleWriter, hashLargeVectors) {
   testShuffleRoundTrip(*shuffleWriter, {hashInputVector2_, hashInputVector1_}, 
2, {blockPid2, blockPid1});
 }
 
-TEST_P(RangePartitioningShuffleWriter, range) {
+TEST_P(RangePartitioningShuffleWriterTest, range) {
   ASSERT_NOT_OK(initShuffleWriterOptions());
   auto shuffleWriter = createShuffleWriter(2);
 
@@ -578,7 +584,7 @@ TEST_P(RangePartitioningShuffleWriter, range) {
       *shuffleWriter, {compositeBatch1_, compositeBatch2_, compositeBatch1_}, 
2, {blockPid1, blockPid2});
 }
 
-TEST_P(RoundRobinPartitioningShuffleWriter, roundRobin) {
+TEST_P(RoundRobinPartitioningShuffleWriterTest, roundRobin) {
   ASSERT_NOT_OK(initShuffleWriterOptions());
   auto shuffleWriter = createShuffleWriter(2);
 
@@ -588,7 +594,7 @@ TEST_P(RoundRobinPartitioningShuffleWriter, roundRobin) {
   testShuffleRoundTrip(*shuffleWriter, {inputVector1_, inputVector2_, 
inputVector1_}, 2, {blockPid1, blockPid2});
 }
 
-TEST_P(RoundRobinPartitioningShuffleWriter, preAllocForceRealloc) {
+TEST_P(RoundRobinPartitioningShuffleWriterTest, preAllocForceRealloc) {
   if (GetParam().shuffleWriterType != ShuffleWriterType::kHashShuffle) {
     return;
   }
@@ -646,7 +652,7 @@ TEST_P(RoundRobinPartitioningShuffleWriter, 
preAllocForceRealloc) {
   ASSERT_NOT_OK(shuffleWriter->stop());
 }
 
-TEST_P(RoundRobinPartitioningShuffleWriter, preAllocForceReuse) {
+TEST_P(RoundRobinPartitioningShuffleWriterTest, preAllocForceReuse) {
   if (GetParam().shuffleWriterType != ShuffleWriterType::kHashShuffle) {
     return;
   }
@@ -681,7 +687,7 @@ TEST_P(RoundRobinPartitioningShuffleWriter, 
preAllocForceReuse) {
   ASSERT_NOT_OK(shuffleWriter->stop());
 }
 
-TEST_P(RoundRobinPartitioningShuffleWriter, spillVerifyResult) {
+TEST_P(RoundRobinPartitioningShuffleWriterTest, spillVerifyResult) {
   if (GetParam().shuffleWriterType != ShuffleWriterType::kHashShuffle) {
     return;
   }
@@ -721,7 +727,7 @@ TEST_P(RoundRobinPartitioningShuffleWriter, 
spillVerifyResult) {
   shuffleWriteReadMultiBlocks(*shuffleWriter, 2, {blockPid1, blockPid2});
 }
 
-TEST_P(RoundRobinPartitioningShuffleWriter, sortMaxRows) {
+TEST_P(RoundRobinPartitioningShuffleWriterTest, sortMaxRows) {
   if (GetParam().shuffleWriterType != ShuffleWriterType::kSortShuffle) {
     return;
   }
@@ -738,22 +744,22 @@ TEST_P(RoundRobinPartitioningShuffleWriter, sortMaxRows) {
 
 INSTANTIATE_TEST_SUITE_P(
     SinglePartitioningShuffleWriterGroup,
-    SinglePartitioningShuffleWriter,
+    SinglePartitioningShuffleWriterTest,
     ::testing::ValuesIn(getTestParams()));
 
 INSTANTIATE_TEST_SUITE_P(
     RoundRobinPartitioningShuffleWriterGroup,
-    RoundRobinPartitioningShuffleWriter,
+    RoundRobinPartitioningShuffleWriterTest,
     ::testing::ValuesIn(getTestParams()));
 
 INSTANTIATE_TEST_SUITE_P(
     HashPartitioningShuffleWriterGroup,
-    HashPartitioningShuffleWriter,
+    HashPartitioningShuffleWriterTest,
     ::testing::ValuesIn(getTestParams()));
 
 INSTANTIATE_TEST_SUITE_P(
     RangePartitioningShuffleWriterGroup,
-    RangePartitioningShuffleWriter,
+    RangePartitioningShuffleWriterTest,
     ::testing::ValuesIn(getTestParams()));
 
 } // namespace gluten


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to