This is an automated email from the ASF dual-hosted git repository.

felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 8ade7a9cb [VL] Use slice instead of resize in ensureFlattened (#5523)
8ade7a9cb is described below

commit 8ade7a9cb50910021a2c0cb6977aaaeb9a17b5ce
Author: Rong Ma <[email protected]>
AuthorDate: Fri Apr 26 01:42:02 2024 +0800

    [VL] Use slice instead of resize in ensureFlattened (#5523)
    
    The children of input RowVector, can reference same BaseVector before and 
after flattening. e.g. select explode(a), a from ..., which a is of map type 
will output 3 columns: col_0: a.mapKeys col_1: a.mapValues and col_2: a. In 
this case, col_0/co_1 is referencing the same BaseVector as the 
mapKeys/mapValues in col_2. Resizing col_0/col_1 will cause expected failure in 
accessing col_2.
---
 cpp/velox/memory/VeloxColumnarBatch.cc    |  2 +-
 cpp/velox/tests/CMakeLists.txt            | 44 +++++++++++---------
 cpp/velox/tests/VeloxColumnarBatchTest.cc | 67 +++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 20 deletions(-)

diff --git a/cpp/velox/memory/VeloxColumnarBatch.cc 
b/cpp/velox/memory/VeloxColumnarBatch.cc
index da004340c..83428707b 100644
--- a/cpp/velox/memory/VeloxColumnarBatch.cc
+++ b/cpp/velox/memory/VeloxColumnarBatch.cc
@@ -57,7 +57,7 @@ void VeloxColumnarBatch::ensureFlattened() {
     }
     // In case of output from Limit, RowVector size can be smaller than its 
children size.
     if (child->size() > rowVector_->size()) {
-      child->resize(rowVector_->size());
+      child = child->slice(0, rowVector_->size());
     }
   }
   flattened_ = true;
diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt
index dff210b7f..58482fe15 100644
--- a/cpp/velox/tests/CMakeLists.txt
+++ b/cpp/velox/tests/CMakeLists.txt
@@ -17,13 +17,13 @@ function(add_velox_test TEST_EXEC)
   set(options)
   set(one_value_args)
   set(multi_value_args
-    SOURCES
-  )
+      SOURCES
+      )
   cmake_parse_arguments(ARG
-  "${options}"
-  "${one_value_args}"
-  "${multi_value_args}"
-  ${ARGN})
+      "${options}"
+      "${one_value_args}"
+      "${multi_value_args}"
+      ${ARGN})
 
   if(ARG_SOURCES)
     set(SOURCES ${ARG_SOURCES})
@@ -39,19 +39,25 @@ endfunction()
 add_velox_test(velox_shuffle_writer_test SOURCES VeloxShuffleWriterTest.cc)
 # TODO: ORC is not well supported.
 # add_velox_test(orc_test SOURCES OrcTest.cc)
-add_velox_test(velox_operators_test SOURCES VeloxColumnarToRowTest.cc 
VeloxRowToColumnarTest.cc VeloxColumnarBatchSerializerTest.cc)
 add_velox_test(
-  velox_plan_conversion_test
-  SOURCES
-  Substrait2VeloxPlanConversionTest.cc
-  Substrait2VeloxPlanValidatorTest.cc
-  Substrait2VeloxValuesNodeConversionTest.cc
-  SubstraitExtensionCollectorTest.cc
-  VeloxSubstraitRoundTripTest.cc
-  VeloxSubstraitSignatureTest.cc
-  VeloxToSubstraitTypeTest.cc
-  FunctionTest.cc
-  JsonToProtoConverter.cc
-  FilePathGenerator.cc)
+    velox_operators_test
+    SOURCES
+    VeloxColumnarToRowTest.cc
+    VeloxRowToColumnarTest.cc
+    VeloxColumnarBatchSerializerTest.cc
+    VeloxColumnarBatchTest.cc)
+add_velox_test(
+    velox_plan_conversion_test
+    SOURCES
+    Substrait2VeloxPlanConversionTest.cc
+    Substrait2VeloxPlanValidatorTest.cc
+    Substrait2VeloxValuesNodeConversionTest.cc
+    SubstraitExtensionCollectorTest.cc
+    VeloxSubstraitRoundTripTest.cc
+    VeloxSubstraitSignatureTest.cc
+    VeloxToSubstraitTypeTest.cc
+    FunctionTest.cc
+    JsonToProtoConverter.cc
+    FilePathGenerator.cc)
 add_velox_test(spark_functions_test SOURCES SparkFunctionTest.cc)
 add_velox_test(execution_ctx_test SOURCES RuntimeTest.cc)
diff --git a/cpp/velox/tests/VeloxColumnarBatchTest.cc 
b/cpp/velox/tests/VeloxColumnarBatchTest.cc
new file mode 100644
index 000000000..559f9f047
--- /dev/null
+++ b/cpp/velox/tests/VeloxColumnarBatchTest.cc
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "memory/VeloxColumnarBatch.h"
+#include "velox/vector/arrow/Bridge.h"
+#include "velox/vector/tests/utils/VectorTestBase.h"
+
+using namespace facebook::velox;
+
+namespace gluten {
+class VeloxColumnarBatchTest : public ::testing::Test, public 
test::VectorTestBase {
+ protected:
+  // Velox requires the mem manager to be instanced.
+  static void SetUpTestCase() {
+    memory::MemoryManager::testingSetInstance({});
+  }
+
+  std::shared_ptr<memory::MemoryPool> veloxPool_ = 
defaultLeafVeloxMemoryPool();
+};
+
+TEST_F(VeloxColumnarBatchTest, flattenTruncatedVector) {
+  vector_size_t inputSize = 1'00;
+  vector_size_t childSize = 1'000;
+  auto mapVector = makeMapVector<int32_t, int64_t>(
+      childSize, [](auto row) { return 1; }, [](auto row) { return row; }, 
[](auto row) { return row; });
+  auto mapKeys = mapVector->mapKeys();
+  auto mapValues = mapVector->mapValues();
+
+  // First, make a row vector with the mapKeys and mapValues as children.
+  // Make the row vector size less than the children size.
+  auto input = std::make_shared<RowVector>(
+      veloxPool_.get(),
+      ROW({INTEGER(), BIGINT(), MAP(INTEGER(), BIGINT())}),
+      nullptr,
+      inputSize,
+      std::vector<VectorPtr>{mapKeys, mapValues});
+
+  auto batch = std::make_shared<VeloxColumnarBatch>(input);
+  ASSERT_NO_THROW(batch->getFlattenedRowVector());
+
+  // Allocate a dummy indices and wrap the original mapVector with it as a 
dictionary, to force it get decoded in
+  // flattenVector.
+  auto indices = allocateIndices(childSize, veloxPool_.get());
+  auto* rawIndices = indices->asMutable<vector_size_t>();
+  for (vector_size_t i = 0; i < childSize; i++) {
+    rawIndices[i] = i;
+  }
+  auto encodedMapVector = BaseVector::wrapInDictionary(nullptr, indices, 
inputSize, mapVector);
+  auto inputOfMap = makeRowVector({encodedMapVector});
+  auto batchOfMap = std::make_shared<VeloxColumnarBatch>(inputOfMap);
+  ASSERT_NO_THROW(batchOfMap->getFlattenedRowVector());
+}
+} // namespace gluten


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to