This is an automated email from the ASF dual-hosted git repository.
felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 8ade7a9cb [VL] Use slice instead of resize in ensureFlattened (#5523)
8ade7a9cb is described below
commit 8ade7a9cb50910021a2c0cb6977aaaeb9a17b5ce
Author: Rong Ma <[email protected]>
AuthorDate: Fri Apr 26 01:42:02 2024 +0800
[VL] Use slice instead of resize in ensureFlattened (#5523)
The children of input RowVector, can reference same BaseVector before and
after flattening. e.g. select explode(a), a from ..., which a is of map type
will output 3 columns: col_0: a.mapKeys col_1: a.mapValues and col_2: a. In
this case, col_0/co_1 is referencing the same BaseVector as the
mapKeys/mapValues in col_2. Resizing col_0/col_1 will cause expected failure in
accessing col_2.
---
cpp/velox/memory/VeloxColumnarBatch.cc | 2 +-
cpp/velox/tests/CMakeLists.txt | 44 +++++++++++---------
cpp/velox/tests/VeloxColumnarBatchTest.cc | 67 +++++++++++++++++++++++++++++++
3 files changed, 93 insertions(+), 20 deletions(-)
diff --git a/cpp/velox/memory/VeloxColumnarBatch.cc
b/cpp/velox/memory/VeloxColumnarBatch.cc
index da004340c..83428707b 100644
--- a/cpp/velox/memory/VeloxColumnarBatch.cc
+++ b/cpp/velox/memory/VeloxColumnarBatch.cc
@@ -57,7 +57,7 @@ void VeloxColumnarBatch::ensureFlattened() {
}
// In case of output from Limit, RowVector size can be smaller than its
children size.
if (child->size() > rowVector_->size()) {
- child->resize(rowVector_->size());
+ child = child->slice(0, rowVector_->size());
}
}
flattened_ = true;
diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt
index dff210b7f..58482fe15 100644
--- a/cpp/velox/tests/CMakeLists.txt
+++ b/cpp/velox/tests/CMakeLists.txt
@@ -17,13 +17,13 @@ function(add_velox_test TEST_EXEC)
set(options)
set(one_value_args)
set(multi_value_args
- SOURCES
- )
+ SOURCES
+ )
cmake_parse_arguments(ARG
- "${options}"
- "${one_value_args}"
- "${multi_value_args}"
- ${ARGN})
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN})
if(ARG_SOURCES)
set(SOURCES ${ARG_SOURCES})
@@ -39,19 +39,25 @@ endfunction()
add_velox_test(velox_shuffle_writer_test SOURCES VeloxShuffleWriterTest.cc)
# TODO: ORC is not well supported.
# add_velox_test(orc_test SOURCES OrcTest.cc)
-add_velox_test(velox_operators_test SOURCES VeloxColumnarToRowTest.cc
VeloxRowToColumnarTest.cc VeloxColumnarBatchSerializerTest.cc)
add_velox_test(
- velox_plan_conversion_test
- SOURCES
- Substrait2VeloxPlanConversionTest.cc
- Substrait2VeloxPlanValidatorTest.cc
- Substrait2VeloxValuesNodeConversionTest.cc
- SubstraitExtensionCollectorTest.cc
- VeloxSubstraitRoundTripTest.cc
- VeloxSubstraitSignatureTest.cc
- VeloxToSubstraitTypeTest.cc
- FunctionTest.cc
- JsonToProtoConverter.cc
- FilePathGenerator.cc)
+ velox_operators_test
+ SOURCES
+ VeloxColumnarToRowTest.cc
+ VeloxRowToColumnarTest.cc
+ VeloxColumnarBatchSerializerTest.cc
+ VeloxColumnarBatchTest.cc)
+add_velox_test(
+ velox_plan_conversion_test
+ SOURCES
+ Substrait2VeloxPlanConversionTest.cc
+ Substrait2VeloxPlanValidatorTest.cc
+ Substrait2VeloxValuesNodeConversionTest.cc
+ SubstraitExtensionCollectorTest.cc
+ VeloxSubstraitRoundTripTest.cc
+ VeloxSubstraitSignatureTest.cc
+ VeloxToSubstraitTypeTest.cc
+ FunctionTest.cc
+ JsonToProtoConverter.cc
+ FilePathGenerator.cc)
add_velox_test(spark_functions_test SOURCES SparkFunctionTest.cc)
add_velox_test(execution_ctx_test SOURCES RuntimeTest.cc)
diff --git a/cpp/velox/tests/VeloxColumnarBatchTest.cc
b/cpp/velox/tests/VeloxColumnarBatchTest.cc
new file mode 100644
index 000000000..559f9f047
--- /dev/null
+++ b/cpp/velox/tests/VeloxColumnarBatchTest.cc
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "memory/VeloxColumnarBatch.h"
+#include "velox/vector/arrow/Bridge.h"
+#include "velox/vector/tests/utils/VectorTestBase.h"
+
+using namespace facebook::velox;
+
+namespace gluten {
+class VeloxColumnarBatchTest : public ::testing::Test, public
test::VectorTestBase {
+ protected:
+ // Velox requires the mem manager to be instanced.
+ static void SetUpTestCase() {
+ memory::MemoryManager::testingSetInstance({});
+ }
+
+ std::shared_ptr<memory::MemoryPool> veloxPool_ =
defaultLeafVeloxMemoryPool();
+};
+
+TEST_F(VeloxColumnarBatchTest, flattenTruncatedVector) {
+ vector_size_t inputSize = 1'00;
+ vector_size_t childSize = 1'000;
+ auto mapVector = makeMapVector<int32_t, int64_t>(
+ childSize, [](auto row) { return 1; }, [](auto row) { return row; },
[](auto row) { return row; });
+ auto mapKeys = mapVector->mapKeys();
+ auto mapValues = mapVector->mapValues();
+
+ // First, make a row vector with the mapKeys and mapValues as children.
+ // Make the row vector size less than the children size.
+ auto input = std::make_shared<RowVector>(
+ veloxPool_.get(),
+ ROW({INTEGER(), BIGINT(), MAP(INTEGER(), BIGINT())}),
+ nullptr,
+ inputSize,
+ std::vector<VectorPtr>{mapKeys, mapValues});
+
+ auto batch = std::make_shared<VeloxColumnarBatch>(input);
+ ASSERT_NO_THROW(batch->getFlattenedRowVector());
+
+ // Allocate a dummy indices and wrap the original mapVector with it as a
dictionary, to force it get decoded in
+ // flattenVector.
+ auto indices = allocateIndices(childSize, veloxPool_.get());
+ auto* rawIndices = indices->asMutable<vector_size_t>();
+ for (vector_size_t i = 0; i < childSize; i++) {
+ rawIndices[i] = i;
+ }
+ auto encodedMapVector = BaseVector::wrapInDictionary(nullptr, indices,
inputSize, mapVector);
+ auto inputOfMap = makeRowVector({encodedMapVector});
+ auto batchOfMap = std::make_shared<VeloxColumnarBatch>(inputOfMap);
+ ASSERT_NO_THROW(batchOfMap->getFlattenedRowVector());
+}
+} // namespace gluten
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]