This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch dev-1.0.0 in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
commit 7fdd24889cc2ec83d7a163ef21d4cc46a3988162 Author: HappenLee <[email protected]> AuthorDate: Fri Mar 11 16:54:37 2022 +0800 [improvement](vectorized) Support function tuple is null (#8442) --- be/src/exprs/expr.h | 1 - be/src/vec/CMakeLists.txt | 1 + be/src/vec/columns/column.h | 3 + be/src/vec/columns/column_nullable.cpp | 5 ++ be/src/vec/columns/column_nullable.h | 3 + be/src/vec/columns/column_vector.cpp | 3 +- be/src/vec/exec/join/vhash_join_node.cpp | 7 +- be/src/vec/exprs/vexpr.cpp | 5 ++ be/src/vec/exprs/vtuple_is_null_predicate.cpp | 86 ++++++++++++++++++++++ be/src/vec/exprs/vtuple_is_null_predicate.h | 48 ++++++++++++ .../doris/analysis/TupleIsNullPredicate.java | 27 ++++--- .../apache/doris/planner/SingleNodePlanner.java | 5 +- 12 files changed, 175 insertions(+), 19 deletions(-) diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index fdb4755..f8b4aa2 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -50,7 +50,6 @@ class RuntimeState; class TColumnValue; class TExpr; class TExprNode; -class SetVar; class TupleIsNullPredicate; class VectorizedRowBatch; class Literal; diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 8365714..6bea9a6 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -99,6 +99,7 @@ set(VEC_FILES exprs/vexpr_context.cpp exprs/vliteral.cpp exprs/vin_predicate.cpp + exprs/vtuple_is_null_predicate.cpp exprs/vslot_ref.cpp exprs/vcast_expr.cpp exprs/vcase_expr.cpp diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index cb04a1c..fdfd85b 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -162,6 +162,9 @@ public: /// Appends a batch elements from other column with the same type /// indices_begin + indices_end represent the row indices of column src + /// Warning: + /// if *indices == -1 means the row is null, only use in outer join, do not use in any other place + /// insert -1 in null map to hint the null is produced by outer join virtual void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) = 0; /// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented). diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index bae8f9a..9877903 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -112,6 +112,11 @@ StringRef ColumnNullable::serialize_value_into_arena(size_t n, Arena& arena, return StringRef(nested_ref.data - s, nested_ref.size + s); } + void ColumnNullable::insert_join_null_data() { + get_nested_column().insert_default(); + get_null_map_data().push_back(-1); + } + const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) { UInt8 val = *reinterpret_cast<const UInt8*>(pos); pos += sizeof(val); diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 21c67a5..030ca13 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -80,6 +80,9 @@ public: /// Will insert null value if pos=nullptr void insert_data(const char* pos, size_t length) override; + /// -1 in null map means null is generated by join, only use in tuple is null + void insert_join_null_data(); + StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override; const char* deserialize_and_insert_from_arena(const char* pos) override; void insert_range_from(const IColumn& src, size_t start, size_t length) override; diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 132d359..3188a93 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -231,7 +231,8 @@ void ColumnVector<T>::insert_indices_from(const IColumn& src, const int* indices // Now Uint8 use to identify null and non null // 1. nullable column : offset == -1 means is null at the here, set true here // 2. real data column : offset == -1 what at is meaningless - data[origin_size + i] = (offset == -1) ? T{1} : src_vec.get_element(offset); + // 3. -1 only use in outer join to hint the null is produced by outer join + data[origin_size + i] = (offset == -1) ? UInt8(-1) : src_vec.get_element(offset); } else { data[origin_size + i] = (offset == -1) ? T{0} : src_vec.get_element(offset); } diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index e4f8058..0b27e15 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -291,7 +291,7 @@ struct ProcessHashTableProbe { if constexpr (probe_all) { if (_build_block_offsets[j] == -1) { DCHECK(mcol[i + right_col_idx]->is_nullable()); - assert_cast<ColumnNullable *>(mcol[i + right_col_idx].get())->insert_data(nullptr, 0); + assert_cast<ColumnNullable *>(mcol[i + right_col_idx].get())->insert_join_null_data(); } else { auto& column = *_build_blocks[_build_block_offsets[j]].get_by_position(i).column; mcol[i + right_col_idx]->insert_from(column, _build_block_rows[j]); @@ -383,7 +383,7 @@ struct ProcessHashTableProbe { JoinOpType::value == TJoinOp::FULL_OUTER_JOIN) { for (size_t j = 0; j < right_col_len; ++j) { DCHECK(mcol[j + right_col_idx]->is_nullable()); - assert_cast<ColumnNullable *>(mcol[j + right_col_idx].get())->insert_data(nullptr, 0); + assert_cast<ColumnNullable *>(mcol[j + right_col_idx].get())->insert_join_null_data(); } } else { for (size_t j = 0; j < right_col_len; ++j) { @@ -553,12 +553,11 @@ struct ProcessHashTableProbe { // right outer join / full join need insert data of left table if constexpr (JoinOpType::value == TJoinOp::LEFT_OUTER_JOIN || - JoinOpType::value == TJoinOp::FULL_OUTER_JOIN || JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN || JoinOpType::value == TJoinOp::FULL_OUTER_JOIN) { for (int i = 0; i < right_col_idx; ++i) { for (int j = 0; j < block_size; ++j) { - assert_cast<ColumnNullable *>(mcol[i].get())->insert_data(nullptr, 0); + assert_cast<ColumnNullable *>(mcol[i].get())->insert_join_null_data(); } } } diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index ff072d0..066808c 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -28,6 +28,7 @@ #include "vec/exprs/vcompound_pred.h" #include "vec/exprs/vectorized_fn_call.h" #include "vec/exprs/vin_predicate.h" +#include "vec/exprs/vtuple_is_null_predicate.h" #include "vec/exprs/vliteral.h" #include "vec/exprs/vslot_ref.h" #include "vec/exprs/vinfo_func.h" @@ -130,6 +131,10 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr *expr = pool->add(new VInfoFunc(texpr_node)); break; } + case TExprNodeType::TUPLE_IS_NULL_PRED: { + *expr = pool->add(new VTupleIsNullPredicate(texpr_node)); + break; + } default: return Status::InternalError( fmt::format("Unknown expr node type: {}", texpr_node.node_type)); diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.cpp b/be/src/vec/exprs/vtuple_is_null_predicate.cpp new file mode 100644 index 0000000..7900918 --- /dev/null +++ b/be/src/vec/exprs/vtuple_is_null_predicate.cpp @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/vtuple_is_null_predicate.h" + +#include <string_view> + +#include "exprs/create_predicate_function.h" + +#include "vec/core/field.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_nullable.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +VTupleIsNullPredicate::VTupleIsNullPredicate(const TExprNode& node) + : VExpr(node), + _expr_name(function_name), + _tuple_ids(node.tuple_is_null_pred.tuple_ids.begin(), + node.tuple_is_null_pred.tuple_ids.end()) {} + +Status VTupleIsNullPredicate::prepare(RuntimeState* state, const RowDescriptor& desc, + VExprContext* context) { + RETURN_IF_ERROR(VExpr::prepare(state, desc, context)); + DCHECK_EQ(0, _children.size()); + DCHECK_GT(_tuple_ids.size(), 0); + + _column_to_check.reserve(_tuple_ids.size()); + // Resolve tuple ids to tuple indexes. + for (auto tuple_id : _tuple_ids) { + uint32_t loc = 0; + for (auto& tuple_desc : desc.tuple_descriptors()) { + if (tuple_desc->id() == tuple_id) { + _column_to_check.emplace_back(loc); + break; + } + loc += tuple_desc->slots().size(); + } + } + + return Status::OK(); +} + +Status VTupleIsNullPredicate::execute(VExprContext* context, Block* block, int* result_column_id) { + // TODO: not execute const expr again, but use the const column in function context + // call function + size_t num_columns_without_result = block->columns(); + auto target_rows = block->rows(); + auto ans = ColumnVector<UInt8>::create(target_rows, 1); + auto* __restrict ans_map = ans->get_data().data(); + + for (auto col_id : _column_to_check) { + auto* __restrict null_map = reinterpret_cast<const ColumnNullable&>( + *block->get_by_position(col_id).column).get_null_map_column().get_data().data(); + + for (int i = 0; i < target_rows; ++i) { + ans_map[i] &= null_map[i] == uint8_t(-1); + } + } + + // prepare a column to save result + block->insert({std::move(ans), _data_type, _expr_name}); + *result_column_id = num_columns_without_result; + return Status::OK(); +} + +const std::string& VTupleIsNullPredicate::expr_name() const { + return _expr_name; +} + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.h b/be/src/vec/exprs/vtuple_is_null_predicate.h new file mode 100644 index 0000000..7854253 --- /dev/null +++ b/be/src/vec/exprs/vtuple_is_null_predicate.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "vec/exprs/vexpr.h" + +namespace doris::vectorized { +class VTupleIsNullPredicate final : public VExpr { +public: + explicit VTupleIsNullPredicate(const TExprNode& node); + ~VTupleIsNullPredicate() override = default; + doris::Status execute(VExprContext* context, doris::vectorized::Block* block, + int* result_column_id) override; + doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, + VExprContext* context) override; + + VExpr* clone(doris::ObjectPool* pool) const override { + return pool->add(new VTupleIsNullPredicate(*this)); + } + + [[nodiscard]] bool is_constant() const override { return false; } + + [[nodiscard]] const std::string& expr_name() const override; + +private: + std::string _expr_name; + std::vector<TupleId> _tuple_ids; + std::vector<uint32_t> _column_to_check; + +private: + static const constexpr char* function_name = "tuple_is_null"; +}; +} // namespace doris::vectorized diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java index c2b23b5..a0e637a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java @@ -46,7 +46,7 @@ public class TupleIsNullPredicate extends Predicate { protected TupleIsNullPredicate(TupleIsNullPredicate other) { super(other); tupleIds.addAll(other.tupleIds); - } + } @Override protected void analyzeImpl(Analyzer analyzer) throws AnalysisException { @@ -54,11 +54,13 @@ public class TupleIsNullPredicate extends Predicate { } @Override - protected boolean isConstantImpl() { return false; } + protected boolean isConstantImpl() { + return false; + } @Override public boolean isBoundByTupleIds(List<TupleId> tids) { - for (TupleId tid: tids) { + for (TupleId tid : tids) { if (tupleIds.contains(tid)) return true; } return false; @@ -93,29 +95,29 @@ public class TupleIsNullPredicate extends Predicate { } TupleIsNullPredicate other = (TupleIsNullPredicate) o; return other.tupleIds.containsAll(tupleIds) - && tupleIds.containsAll(other.tupleIds); + && tupleIds.containsAll(other.tupleIds); } /** * Makes each input expr nullable, if necessary, by wrapping it as follows: * IF(TupleIsNull(tids), NULL, expr) - * + * <p> * The given tids must be materialized. The given inputExprs are expected to be bound * by tids once fully substituted against base tables. However, inputExprs may not yet * be fully substituted at this point. - * + * <p> * Returns a new list with the nullable exprs. */ public static List<Expr> wrapExprs(List<Expr> inputExprs, - List<TupleId> tids, Analyzer analyzer) throws UserException { + List<TupleId> tids, Analyzer analyzer) throws UserException { // Assert that all tids are materialized. - for (TupleId tid: tids) { + for (TupleId tid : tids) { TupleDescriptor tupleDesc = analyzer.getTupleDesc(tid); Preconditions.checkState(tupleDesc.getIsMaterialized()); } // Perform the wrapping. List<Expr> result = Lists.newArrayListWithCapacity(inputExprs.size()); - for (Expr e: inputExprs) { + for (Expr e : inputExprs) { result.add(wrapExpr(e, tids, analyzer)); } return result; @@ -166,7 +168,7 @@ public class TupleIsNullPredicate extends Predicate { * Recursive function that replaces all 'IF(TupleIsNull(), NULL, e)' exprs in * 'expr' with e and returns the modified expr. */ - public static Expr unwrapExpr(Expr expr) { + public static Expr unwrapExpr(Expr expr) { if (expr instanceof FunctionCallExpr) { FunctionCallExpr fnCallExpr = (FunctionCallExpr) expr; List<Expr> params = fnCallExpr.getParams().exprs(); @@ -181,4 +183,9 @@ public class TupleIsNullPredicate extends Predicate { } return expr; } + + @Override + public boolean isNullable() { + return false; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index b3aaf2a..1f5e9da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -60,7 +60,6 @@ import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.common.Reference; import org.apache.doris.common.UserException; -import org.apache.doris.common.util.VectorizedUtil; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; @@ -1367,8 +1366,8 @@ public class SingleNodePlanner { // inline view's plan. ExprSubstitutionMap outputSmap = ExprSubstitutionMap.compose( inlineViewRef.getSmap(), rootNode.getOutputSmap(), analyzer); - // Vec exec engine not need the function of TupleIsNull, So here just skip wrap it - if (analyzer.isOuterJoined(inlineViewRef.getId()) && !VectorizedUtil.isVectorized()) { + + if (analyzer.isOuterJoined(inlineViewRef.getId())) { rootNode.setWithoutTupleIsNullOutputSmap(outputSmap); // Exprs against non-matched rows of an outer join should always return NULL. // Make the rhs exprs of the output smap nullable, if necessary. This expr wrapping --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
