This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new be124523f4 [enhancement](profile) add profile to show column 
predicates (#13862)
be124523f4 is described below

commit be124523f4f80b423c495c2b36dddb5c46be4d0b
Author: Pxl <[email protected]>
AuthorDate: Wed Nov 2 09:07:26 2022 +0800

    [enhancement](profile) add profile to show column predicates (#13862)
---
 be/src/olap/bloom_filter_predicate.h             |  6 +++
 be/src/olap/column_predicate.h                   | 51 ++++++++++++++++++++++++
 be/src/olap/comparison_predicate.h               |  6 +++
 be/src/olap/in_list_predicate.h                  |  6 +++
 be/src/olap/iterators.h                          |  2 +
 be/src/olap/like_column_predicate.h              |  5 +++
 be/src/olap/null_predicate.h                     |  5 +++
 be/src/olap/reader.h                             |  2 +
 be/src/olap/rowset/beta_rowset_reader.h          |  7 ++++
 be/src/olap/rowset/rowset_reader.h               |  2 +
 be/src/olap/rowset/segment_v2/segment_iterator.h | 31 ++++++++++++++
 be/src/vec/exec/scan/new_olap_scan_node.cpp      |  6 +--
 be/src/vec/exec/scan/new_olap_scanner.cpp        | 13 +++---
 be/src/vec/exec/scan/new_olap_scanner.h          |  6 ++-
 be/src/vec/olap/block_reader.h                   |  4 ++
 be/src/vec/olap/vcollect_iterator.h              | 23 +++++++++++
 be/src/vec/olap/vgeneric_iterators.cpp           | 14 +++++++
 17 files changed, 180 insertions(+), 9 deletions(-)

diff --git a/be/src/olap/bloom_filter_predicate.h 
b/be/src/olap/bloom_filter_predicate.h
index 8c2af710fa..3712833a8d 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -20,6 +20,7 @@
 #include "exprs/bloomfilter_predicate.h"
 #include "exprs/runtime_filter.h"
 #include "olap/column_predicate.h"
+#include "runtime/primitive_type.h"
 #include "vec/columns/column_dictionary.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_vector.h"
@@ -119,6 +120,11 @@ private:
         return new_size;
     }
 
+    std::string _debug_string() override {
+        std::string info = "BloomFilterColumnPredicate(" + type_to_string(T) + 
")";
+        return info;
+    }
+
     std::shared_ptr<BloomFilterFuncBase> _filter;
     SpecificFilter* _specific_filter; // owned by _filter
     mutable uint64_t _evaluated_rows = 1;
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index e20683c205..350d945f7c 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -47,6 +47,50 @@ enum class PredicateType {
     BF = 11, // BloomFilter
 };
 
+inline std::string type_to_string(PredicateType type) {
+    switch (type) {
+    case PredicateType::UNKNOWN:
+        return "UNKNOWN";
+
+    case PredicateType::EQ:
+        return "EQ";
+
+    case PredicateType::NE:
+        return "NE";
+
+    case PredicateType::LT:
+        return "LT";
+
+    case PredicateType::LE:
+        return "LE";
+
+    case PredicateType::GT:
+        return "GT";
+
+    case PredicateType::GE:
+        return "GE";
+
+    case PredicateType::IN_LIST:
+        return "IN_LIST";
+
+    case PredicateType::NOT_IN_LIST:
+        return "NOT_IN_LIST";
+
+    case PredicateType::IS_NULL:
+        return "IS_NULL";
+
+    case PredicateType::IS_NOT_NULL:
+        return "IS_NOT_NULL";
+
+    case PredicateType::BF:
+        return "BF";
+    default:
+        return "";
+    };
+
+    return "";
+}
+
 struct PredicateTypeTraits {
     static constexpr bool is_range(PredicateType type) {
         return (type == PredicateType::LT || type == PredicateType::LE ||
@@ -121,7 +165,14 @@ public:
     }
     uint32_t column_id() const { return _column_id; }
 
+    virtual std::string debug_string() {
+        return _debug_string() + ", column_id=" + std::to_string(_column_id) +
+               ", opposite=" + (_opposite ? "true" : "false");
+    }
+
 protected:
+    virtual std::string _debug_string() = 0;
+
     uint32_t _column_id;
     // TODO: the value is only in delete condition, better be template value
     bool _opposite;
diff --git a/be/src/olap/comparison_predicate.h 
b/be/src/olap/comparison_predicate.h
index 914b1989d3..f52a2082e7 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -601,6 +601,12 @@ private:
         }
     }
 
+    std::string _debug_string() override {
+        std::string info =
+                "ComparisonPredicateBase(" + type_to_string(Type) + ", " + 
type_to_string(PT) + ")";
+        return info;
+    }
+
     T _value;
     static constexpr PrimitiveType EvalType = (Type == TYPE_CHAR ? TYPE_STRING 
: Type);
 };
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 33e440a5e6..32b0dc2fb0 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -536,6 +536,12 @@ private:
         }
     }
 
+    std::string _debug_string() override {
+        std::string info =
+                "InListPredicateBase(" + type_to_string(Type) + ", " + 
type_to_string(PT) + ")";
+        return info;
+    }
+
     phmap::flat_hash_set<T> _values;
     mutable std::vector<vectorized::UInt8> _value_in_dict_flags;
     T _min_value;
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index 4f12118c2c..15777226b8 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -136,6 +136,8 @@ public:
     // Return the data id such as segment id, used for keep the insert order 
when do
     // merge sort in priority queue
     virtual uint64_t data_id() const { return 0; }
+
+    virtual bool update_profile(RuntimeProfile* profile) { return false; }
 };
 
 } // namespace doris
diff --git a/be/src/olap/like_column_predicate.h 
b/be/src/olap/like_column_predicate.h
index 0bb53c8119..85f30b5ab4 100644
--- a/be/src/olap/like_column_predicate.h
+++ b/be/src/olap/like_column_predicate.h
@@ -144,6 +144,11 @@ private:
         }
     }
 
+    std::string _debug_string() override {
+        std::string info = "LikeColumnPredicate";
+        return info;
+    }
+
     std::string _origin;
     // life time controlled by scan node
     doris_udf::FunctionContext* _fn_ctx;
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index 7279f01259..7155036bce 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -81,6 +81,11 @@ public:
     void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* 
flags) const override;
 
 private:
+    std::string _debug_string() override {
+        std::string info = "NullPredicate(" + std::string(_is_null ? "is_null" 
: "not_null") + ")";
+        return info;
+    }
+
     bool _is_null; //true for null, false for not null
 };
 
diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h
index 7c5dc32fcf..e3f501e063 100644
--- a/be/src/olap/reader.h
+++ b/be/src/olap/reader.h
@@ -147,6 +147,8 @@ public:
     const OlapReaderStatistics& stats() const { return _stats; }
     OlapReaderStatistics* mutable_stats() { return &_stats; }
 
+    virtual bool update_profile(RuntimeProfile* profile) { return false; }
+
 protected:
     friend class CollectIterator;
     friend class vectorized::VCollectIterator;
diff --git a/be/src/olap/rowset/beta_rowset_reader.h 
b/be/src/olap/rowset/beta_rowset_reader.h
index 5424722c16..0b0ef05fe0 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -65,6 +65,13 @@ public:
 
     Status get_segment_num_rows(std::vector<uint32_t>* segment_num_rows) 
override;
 
+    bool update_profile(RuntimeProfile* profile) override {
+        if (_iterator != nullptr) {
+            return _iterator->update_profile(profile);
+        }
+        return false;
+    }
+
 private:
     bool _should_push_down_value_predicates() const;
 
diff --git a/be/src/olap/rowset/rowset_reader.h 
b/be/src/olap/rowset/rowset_reader.h
index eecf594254..f9a1acc0d8 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -74,6 +74,8 @@ public:
     virtual Status get_segment_num_rows(std::vector<uint32_t>* 
segment_num_rows) {
         return Status::NotSupported("to be implemented");
     }
+
+    virtual bool update_profile(RuntimeProfile* profile) = 0;
 };
 
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h 
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 03d0c1d2fb..5a7a35773d 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -65,6 +65,37 @@ public:
     bool is_lazy_materialization_read() const override { return 
_lazy_materialization_read; }
     uint64_t data_id() const override { return _segment->id(); }
 
+    bool update_profile(RuntimeProfile* profile) override {
+        if (_short_cir_eval_predicate.empty() && 
_pre_eval_block_predicate.empty()) {
+            if (_col_predicates.empty()) {
+                return false;
+            }
+
+            std::string info;
+            for (auto pred : _col_predicates) {
+                info += "\n" + pred->debug_string();
+            }
+            profile->add_info_string("ColumnPredicates", info);
+        } else {
+            if (!_short_cir_eval_predicate.empty()) {
+                std::string info;
+                for (auto pred : _short_cir_eval_predicate) {
+                    info += "\n" + pred->debug_string();
+                }
+                profile->add_info_string("Short Circuit ColumnPredicates", 
info);
+            }
+            if (!_pre_eval_block_predicate.empty()) {
+                std::string info;
+                for (auto pred : _pre_eval_block_predicate) {
+                    info += "\n" + pred->debug_string();
+                }
+                profile->add_info_string("Pre Evaluate Block 
ColumnPredicates", info);
+            }
+        }
+
+        return true;
+    }
+
 private:
     Status _init(bool is_vec = false);
 
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp 
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 5542a71b34..a4253fe1cc 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -310,9 +310,9 @@ Status 
NewOlapScanNode::_init_scanners(std::list<VScanner*>* scanners) {
                 scanner_ranges.push_back((*ranges)[i].get());
             }
 
-            NewOlapScanner* scanner = new NewOlapScanner(_state, this, 
_limit_per_scanner,
-                                                         
_olap_scan_node.is_preaggregation,
-                                                         _need_agg_finalize, 
*scan_range);
+            NewOlapScanner* scanner = new NewOlapScanner(
+                    _state, this, _limit_per_scanner, 
_olap_scan_node.is_preaggregation,
+                    _need_agg_finalize, *scan_range, _scanner_profile.get());
             // add scanner to pool before doing prepare.
             // so that scanner can be automatically deconstructed if prepare 
failed.
             _scanner_pool.add(scanner);
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp 
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index ec33346b04..ed82933178 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -25,11 +25,12 @@ namespace doris::vectorized {
 
 NewOlapScanner::NewOlapScanner(RuntimeState* state, NewOlapScanNode* parent, 
int64_t limit,
                                bool aggregation, bool need_agg_finalize,
-                               const TPaloScanRange& scan_range)
+                               const TPaloScanRange& scan_range, 
RuntimeProfile* profile)
         : VScanner(state, static_cast<VScanNode*>(parent), limit),
           _aggregation(aggregation),
           _need_agg_finalize(need_agg_finalize),
-          _version(-1) {
+          _version(-1),
+          _profile(profile) {
     _tablet_schema = std::make_shared<TabletSchema>();
 }
 
@@ -121,6 +122,7 @@ Status NewOlapScanner::open(RuntimeState* state) {
            << ", backend=" << BackendOptions::get_localhost();
         return Status::InternalError(ss.str());
     }
+
     return Status::OK();
 }
 
@@ -299,6 +301,9 @@ Status NewOlapScanner::_init_return_columns(bool 
need_seq_col) {
 }
 
 Status NewOlapScanner::_get_block_impl(RuntimeState* state, Block* block, 
bool* eof) {
+    if (!_profile_updated) {
+        _profile_updated = _tablet_reader->update_profile(_profile);
+    }
     // Read one block from block reader
     // ATTN: Here we need to let the _get_block_impl method guarantee the 
semantics of the interface,
     // that is, eof can be set to true only when the returned block is empty.
@@ -342,9 +347,7 @@ void NewOlapScanner::_update_realtime_counters() {
 }
 
 void NewOlapScanner::_update_counters_before_close() {
-    if (!_state->enable_profile()) return;
-
-    if (_has_updated_counter) {
+    if (!_state->enable_profile() || _has_updated_counter) {
         return;
     }
     _has_updated_counter = true;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.h 
b/be/src/vec/exec/scan/new_olap_scanner.h
index 6b07438bf1..899a0878e1 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.h
+++ b/be/src/vec/exec/scan/new_olap_scanner.h
@@ -21,6 +21,7 @@
 #include "exprs/bloomfilter_predicate.h"
 #include "exprs/function_filter.h"
 #include "olap/reader.h"
+#include "util/runtime_profile.h"
 #include "vec/exec/scan/vscanner.h"
 
 namespace doris {
@@ -34,7 +35,8 @@ class NewOlapScanNode;
 class NewOlapScanner : public VScanner {
 public:
     NewOlapScanner(RuntimeState* state, NewOlapScanNode* parent, int64_t 
limit, bool aggregation,
-                   bool need_agg_finalize, const TPaloScanRange& scan_range);
+                   bool need_agg_finalize, const TPaloScanRange& scan_range,
+                   RuntimeProfile* profile);
 
     Status open(RuntimeState* state) override;
 
@@ -81,6 +83,8 @@ private:
     // ========= profiles ==========
     int64_t _compressed_bytes_read = 0;
     int64_t _raw_rows_read = 0;
+    RuntimeProfile* _profile;
+    bool _profile_updated = false;
 };
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/olap/block_reader.h b/be/src/vec/olap/block_reader.h
index 682caed065..356a5a1ab4 100644
--- a/be/src/vec/olap/block_reader.h
+++ b/be/src/vec/olap/block_reader.h
@@ -47,6 +47,10 @@ public:
 
     std::vector<RowLocation> current_block_row_locations() { return 
_block_row_locations; }
 
+    bool update_profile(RuntimeProfile* profile) override {
+        return _vcollect_iter.update_profile(profile);
+    }
+
     ColumnPredicate* _parse_to_predicate(const FunctionFilter& 
function_filter) override;
 
 private:
diff --git a/be/src/vec/olap/vcollect_iterator.h 
b/be/src/vec/olap/vcollect_iterator.h
index 69ef3da70a..815752f28c 100644
--- a/be/src/vec/olap/vcollect_iterator.h
+++ b/be/src/vec/olap/vcollect_iterator.h
@@ -64,6 +64,13 @@ public:
         return _inner_iter->current_block_row_locations(block_row_locations);
     }
 
+    bool update_profile(RuntimeProfile* profile) {
+        if (_inner_iter != nullptr) {
+            return _inner_iter->update_profile(profile);
+        }
+        return false;
+    }
+
 private:
     // This interface is the actual implementation of the new version of 
iterator.
     // It currently contains two implementations, one is Level0Iterator,
@@ -101,6 +108,8 @@ private:
 
         virtual Status current_block_row_locations(std::vector<RowLocation>* 
row_location) = 0;
 
+        virtual bool update_profile(RuntimeProfile* profile) = 0;
+
     protected:
         const TabletSchema& _schema;
         IteratorRowRef _ref;
@@ -148,6 +157,13 @@ private:
 
         Status current_block_row_locations(std::vector<RowLocation>* 
block_row_locations) override;
 
+        bool update_profile(RuntimeProfile* profile) override {
+            if (_rs_reader != nullptr) {
+                return _rs_reader->update_profile(profile);
+            }
+            return false;
+        }
+
     private:
         Status _refresh_current_row();
         Status _next_by_ref(IteratorRowRef* ref);
@@ -219,6 +235,13 @@ private:
 
         ~Level1Iterator() override;
 
+        bool update_profile(RuntimeProfile* profile) override {
+            if (_cur_child != nullptr) {
+                return _cur_child->update_profile(profile);
+            }
+            return false;
+        }
+
     private:
         Status _merge_next(IteratorRowRef* ref);
 
diff --git a/be/src/vec/olap/vgeneric_iterators.cpp 
b/be/src/vec/olap/vgeneric_iterators.cpp
index e64668a838..bc74ab0862 100644
--- a/be/src/vec/olap/vgeneric_iterators.cpp
+++ b/be/src/vec/olap/vgeneric_iterators.cpp
@@ -453,6 +453,13 @@ public:
         return Status::OK();
     }
 
+    bool update_profile(RuntimeProfile* profile) override {
+        if (!_origin_iters.empty()) {
+            return (*_origin_iters.begin())->update_profile(profile);
+        }
+        return false;
+    }
+
 private:
     int _get_size(Block* block) { return block->rows(); }
     int _get_size(BlockView* block_view) { return block_view->size(); }
@@ -590,6 +597,13 @@ public:
 
     Status current_block_row_locations(std::vector<RowLocation>* locations) 
override;
 
+    bool update_profile(RuntimeProfile* profile) override {
+        if (_cur_iter != nullptr) {
+            return _cur_iter->update_profile(profile);
+        }
+        return false;
+    }
+
 private:
     const Schema* _schema = nullptr;
     RowwiseIterator* _cur_iter = nullptr;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to