This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.0
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit 7fdd24889cc2ec83d7a163ef21d4cc46a3988162
Author: HappenLee <[email protected]>
AuthorDate: Fri Mar 11 16:54:37 2022 +0800

    [improvement](vectorized) Support function tuple is null (#8442)
---
 be/src/exprs/expr.h                                |  1 -
 be/src/vec/CMakeLists.txt                          |  1 +
 be/src/vec/columns/column.h                        |  3 +
 be/src/vec/columns/column_nullable.cpp             |  5 ++
 be/src/vec/columns/column_nullable.h               |  3 +
 be/src/vec/columns/column_vector.cpp               |  3 +-
 be/src/vec/exec/join/vhash_join_node.cpp           |  7 +-
 be/src/vec/exprs/vexpr.cpp                         |  5 ++
 be/src/vec/exprs/vtuple_is_null_predicate.cpp      | 86 ++++++++++++++++++++++
 be/src/vec/exprs/vtuple_is_null_predicate.h        | 48 ++++++++++++
 .../doris/analysis/TupleIsNullPredicate.java       | 27 ++++---
 .../apache/doris/planner/SingleNodePlanner.java    |  5 +-
 12 files changed, 175 insertions(+), 19 deletions(-)

diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h
index fdb4755..f8b4aa2 100644
--- a/be/src/exprs/expr.h
+++ b/be/src/exprs/expr.h
@@ -50,7 +50,6 @@ class RuntimeState;
 class TColumnValue;
 class TExpr;
 class TExprNode;
-class SetVar;
 class TupleIsNullPredicate;
 class VectorizedRowBatch;
 class Literal;
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index 8365714..6bea9a6 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -99,6 +99,7 @@ set(VEC_FILES
   exprs/vexpr_context.cpp
   exprs/vliteral.cpp
   exprs/vin_predicate.cpp
+  exprs/vtuple_is_null_predicate.cpp
   exprs/vslot_ref.cpp
   exprs/vcast_expr.cpp
   exprs/vcase_expr.cpp
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index cb04a1c..fdfd85b 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -162,6 +162,9 @@ public:
  
     /// Appends a batch elements from other column with the same type
     /// indices_begin + indices_end represent the row indices of column src
+    /// Warning:
+    ///       if *indices == -1 means the row is null, only use in outer join, 
do not use in any other place
+    ///       insert -1 in null map to hint the null is produced by outer join
     virtual void insert_indices_from(const IColumn& src, const int* 
indices_begin, const int* indices_end) = 0;
 
     /// Appends data located in specified memory chunk if it is possible 
(throws an exception if it cannot be implemented).
diff --git a/be/src/vec/columns/column_nullable.cpp 
b/be/src/vec/columns/column_nullable.cpp
index bae8f9a..9877903 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -112,6 +112,11 @@ StringRef 
ColumnNullable::serialize_value_into_arena(size_t n, Arena& arena,
     return StringRef(nested_ref.data - s, nested_ref.size + s);
 }
 
+    void ColumnNullable::insert_join_null_data() {
+        get_nested_column().insert_default();
+        get_null_map_data().push_back(-1);
+    }
+
 const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) 
{
     UInt8 val = *reinterpret_cast<const UInt8*>(pos);
     pos += sizeof(val);
diff --git a/be/src/vec/columns/column_nullable.h 
b/be/src/vec/columns/column_nullable.h
index 21c67a5..030ca13 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -80,6 +80,9 @@ public:
 
     /// Will insert null value if pos=nullptr
     void insert_data(const char* pos, size_t length) override;
+    /// -1 in null map means null is generated by join, only use in tuple is 
null
+    void insert_join_null_data();
+
     StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& 
begin) const override;
     const char* deserialize_and_insert_from_arena(const char* pos) override;
     void insert_range_from(const IColumn& src, size_t start, size_t length) 
override;
diff --git a/be/src/vec/columns/column_vector.cpp 
b/be/src/vec/columns/column_vector.cpp
index 132d359..3188a93 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -231,7 +231,8 @@ void ColumnVector<T>::insert_indices_from(const IColumn& 
src, const int* indices
             // Now Uint8 use to identify null and non null
             // 1. nullable column : offset == -1 means is null at the here, 
set true here
             // 2. real data column : offset == -1 what at is meaningless
-            data[origin_size + i] = (offset == -1) ? T{1} : 
src_vec.get_element(offset);
+            // 3. -1 only use in outer join to hint the null is produced by 
outer join
+            data[origin_size + i] = (offset == -1) ? UInt8(-1) : 
src_vec.get_element(offset);
         } else {
             data[origin_size + i] = (offset == -1) ? T{0} : 
src_vec.get_element(offset);
         }
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index e4f8058..0b27e15 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -291,7 +291,7 @@ struct ProcessHashTableProbe {
                         if constexpr (probe_all) {
                             if (_build_block_offsets[j] == -1) {
                                 DCHECK(mcol[i + right_col_idx]->is_nullable());
-                                assert_cast<ColumnNullable *>(mcol[i + 
right_col_idx].get())->insert_data(nullptr, 0);
+                                assert_cast<ColumnNullable *>(mcol[i + 
right_col_idx].get())->insert_join_null_data();
                             } else {
                                 auto& column = 
*_build_blocks[_build_block_offsets[j]].get_by_position(i).column;
                                 mcol[i + right_col_idx]->insert_from(column, 
_build_block_rows[j]);
@@ -383,7 +383,7 @@ struct ProcessHashTableProbe {
                               JoinOpType::value == TJoinOp::FULL_OUTER_JOIN) {
                     for (size_t j = 0; j < right_col_len; ++j) {
                         DCHECK(mcol[j + right_col_idx]->is_nullable());
-                        assert_cast<ColumnNullable *>(mcol[j + 
right_col_idx].get())->insert_data(nullptr, 0);
+                        assert_cast<ColumnNullable *>(mcol[j + 
right_col_idx].get())->insert_join_null_data();
                     }
                 } else {
                     for (size_t j = 0; j < right_col_len; ++j) {
@@ -553,12 +553,11 @@ struct ProcessHashTableProbe {
 
         // right outer join / full join need insert data of left table
         if constexpr (JoinOpType::value == TJoinOp::LEFT_OUTER_JOIN ||
-                      JoinOpType::value == TJoinOp::FULL_OUTER_JOIN ||
                       JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN ||
                       JoinOpType::value == TJoinOp::FULL_OUTER_JOIN) {
             for (int i = 0; i < right_col_idx; ++i) {
                 for (int j = 0; j < block_size; ++j) {
-                    assert_cast<ColumnNullable 
*>(mcol[i].get())->insert_data(nullptr, 0);
+                    assert_cast<ColumnNullable 
*>(mcol[i].get())->insert_join_null_data();
                 }
             }
         }
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index ff072d0..066808c 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -28,6 +28,7 @@
 #include "vec/exprs/vcompound_pred.h"
 #include "vec/exprs/vectorized_fn_call.h"
 #include "vec/exprs/vin_predicate.h"
+#include "vec/exprs/vtuple_is_null_predicate.h"
 #include "vec/exprs/vliteral.h"
 #include "vec/exprs/vslot_ref.h"
 #include "vec/exprs/vinfo_func.h"
@@ -130,6 +131,10 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const 
doris::TExprNode& texpr
         *expr = pool->add(new VInfoFunc(texpr_node));
         break;
     }
+    case TExprNodeType::TUPLE_IS_NULL_PRED: {
+        *expr = pool->add(new VTupleIsNullPredicate(texpr_node));
+        break;
+    }
     default:
         return Status::InternalError(
                 fmt::format("Unknown expr node type: {}", 
texpr_node.node_type));
diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.cpp 
b/be/src/vec/exprs/vtuple_is_null_predicate.cpp
new file mode 100644
index 0000000..7900918
--- /dev/null
+++ b/be/src/vec/exprs/vtuple_is_null_predicate.cpp
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vtuple_is_null_predicate.h"
+
+#include <string_view>
+
+#include "exprs/create_predicate_function.h"
+
+#include "vec/core/field.h"
+#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+VTupleIsNullPredicate::VTupleIsNullPredicate(const TExprNode& node)
+        : VExpr(node),
+          _expr_name(function_name),
+          _tuple_ids(node.tuple_is_null_pred.tuple_ids.begin(),
+                     node.tuple_is_null_pred.tuple_ids.end()) {}
+
+Status VTupleIsNullPredicate::prepare(RuntimeState* state, const 
RowDescriptor& desc,
+                             VExprContext* context) {
+    RETURN_IF_ERROR(VExpr::prepare(state, desc, context));
+    DCHECK_EQ(0, _children.size());
+    DCHECK_GT(_tuple_ids.size(), 0);
+
+    _column_to_check.reserve(_tuple_ids.size());
+    // Resolve tuple ids to tuple indexes.
+    for (auto tuple_id : _tuple_ids) {
+        uint32_t loc = 0;
+        for (auto& tuple_desc : desc.tuple_descriptors()) {
+            if (tuple_desc->id() == tuple_id) {
+                _column_to_check.emplace_back(loc);
+                break;
+            }
+            loc += tuple_desc->slots().size();
+        }
+    }
+
+    return Status::OK();
+}
+
+Status VTupleIsNullPredicate::execute(VExprContext* context, Block* block, 
int* result_column_id) {
+    // TODO: not execute const expr again, but use the const column in 
function context
+    // call function
+    size_t num_columns_without_result = block->columns();
+    auto target_rows = block->rows();
+    auto ans = ColumnVector<UInt8>::create(target_rows, 1);
+    auto* __restrict ans_map = ans->get_data().data();
+
+    for (auto col_id : _column_to_check) {
+        auto* __restrict null_map = reinterpret_cast<const ColumnNullable&>(
+                
*block->get_by_position(col_id).column).get_null_map_column().get_data().data();
+
+        for (int i = 0; i < target_rows; ++i) {
+            ans_map[i] &= null_map[i] == uint8_t(-1);
+        }
+    }
+
+    // prepare a column to save result
+    block->insert({std::move(ans), _data_type, _expr_name});
+    *result_column_id = num_columns_without_result;
+    return Status::OK();
+}
+
+const std::string& VTupleIsNullPredicate::expr_name() const {
+    return _expr_name;
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exprs/vtuple_is_null_predicate.h 
b/be/src/vec/exprs/vtuple_is_null_predicate.h
new file mode 100644
index 0000000..7854253
--- /dev/null
+++ b/be/src/vec/exprs/vtuple_is_null_predicate.h
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/exprs/vexpr.h"
+
+namespace doris::vectorized {
+class VTupleIsNullPredicate final : public VExpr {
+public:
+    explicit VTupleIsNullPredicate(const TExprNode& node);
+    ~VTupleIsNullPredicate() override = default;
+    doris::Status execute(VExprContext* context, doris::vectorized::Block* 
block,
+                                  int* result_column_id) override;
+    doris::Status prepare(doris::RuntimeState* state, const 
doris::RowDescriptor& desc,
+                                  VExprContext* context) override;
+
+    VExpr* clone(doris::ObjectPool* pool) const override {
+        return pool->add(new VTupleIsNullPredicate(*this));
+    }
+
+    [[nodiscard]] bool is_constant() const override { return false; }
+
+    [[nodiscard]] const std::string& expr_name() const override;
+
+private:
+    std::string _expr_name;
+    std::vector<TupleId> _tuple_ids;
+    std::vector<uint32_t> _column_to_check;
+
+private:
+    static const constexpr char* function_name = "tuple_is_null";
+};
+} // namespace doris::vectorized
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java
index c2b23b5..a0e637a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/TupleIsNullPredicate.java
@@ -46,7 +46,7 @@ public class TupleIsNullPredicate extends Predicate {
     protected TupleIsNullPredicate(TupleIsNullPredicate other) {
         super(other);
         tupleIds.addAll(other.tupleIds);
-   }
+    }
 
     @Override
     protected void analyzeImpl(Analyzer analyzer) throws AnalysisException {
@@ -54,11 +54,13 @@ public class TupleIsNullPredicate extends Predicate {
     }
 
     @Override
-    protected boolean isConstantImpl() { return false; }
+    protected boolean isConstantImpl() {
+        return false;
+    }
 
     @Override
     public boolean isBoundByTupleIds(List<TupleId> tids) {
-        for (TupleId tid: tids) {
+        for (TupleId tid : tids) {
             if (tupleIds.contains(tid)) return true;
         }
         return false;
@@ -93,29 +95,29 @@ public class TupleIsNullPredicate extends Predicate {
         }
         TupleIsNullPredicate other = (TupleIsNullPredicate) o;
         return other.tupleIds.containsAll(tupleIds)
-            && tupleIds.containsAll(other.tupleIds);
+                && tupleIds.containsAll(other.tupleIds);
     }
 
     /**
      * Makes each input expr nullable, if necessary, by wrapping it as follows:
      * IF(TupleIsNull(tids), NULL, expr)
-     *
+     * <p>
      * The given tids must be materialized. The given inputExprs are expected 
to be bound
      * by tids once fully substituted against base tables. However, inputExprs 
may not yet
      * be fully substituted at this point.
-     *
+     * <p>
      * Returns a new list with the nullable exprs.
      */
     public static List<Expr> wrapExprs(List<Expr> inputExprs,
-        List<TupleId> tids, Analyzer analyzer) throws UserException {
+                                       List<TupleId> tids, Analyzer analyzer) 
throws UserException {
         // Assert that all tids are materialized.
-        for (TupleId tid: tids) {
+        for (TupleId tid : tids) {
             TupleDescriptor tupleDesc = analyzer.getTupleDesc(tid);
             Preconditions.checkState(tupleDesc.getIsMaterialized());
         }
         // Perform the wrapping.
         List<Expr> result = Lists.newArrayListWithCapacity(inputExprs.size());
-        for (Expr e: inputExprs) {
+        for (Expr e : inputExprs) {
             result.add(wrapExpr(e, tids, analyzer));
         }
         return result;
@@ -166,7 +168,7 @@ public class TupleIsNullPredicate extends Predicate {
      * Recursive function that replaces all 'IF(TupleIsNull(), NULL, e)' exprs 
in
      * 'expr' with e and returns the modified expr.
      */
-    public static Expr unwrapExpr(Expr expr)  {
+    public static Expr unwrapExpr(Expr expr) {
         if (expr instanceof FunctionCallExpr) {
             FunctionCallExpr fnCallExpr = (FunctionCallExpr) expr;
             List<Expr> params = fnCallExpr.getParams().exprs();
@@ -181,4 +183,9 @@ public class TupleIsNullPredicate extends Predicate {
         }
         return expr;
     }
+
+    @Override
+    public boolean isNullable() {
+        return false;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index b3aaf2a..1f5e9da 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -60,7 +60,6 @@ import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
 import org.apache.doris.common.Reference;
 import org.apache.doris.common.UserException;
-import org.apache.doris.common.util.VectorizedUtil;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
@@ -1367,8 +1366,8 @@ public class SingleNodePlanner {
         // inline view's plan.
         ExprSubstitutionMap outputSmap = ExprSubstitutionMap.compose(
                 inlineViewRef.getSmap(), rootNode.getOutputSmap(), analyzer);
-        // Vec exec engine not need the function of TupleIsNull, So here just 
skip wrap it
-        if (analyzer.isOuterJoined(inlineViewRef.getId()) && 
!VectorizedUtil.isVectorized()) {
+
+        if (analyzer.isOuterJoined(inlineViewRef.getId())) {
             rootNode.setWithoutTupleIsNullOutputSmap(outputSmap);
             // Exprs against non-matched rows of an outer join should always 
return NULL.
             // Make the rhs exprs of the output smap nullable, if necessary. 
This expr wrapping

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to