This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new be124523f4 [enhancement](profile) add profile to show column
predicates (#13862)
be124523f4 is described below
commit be124523f4f80b423c495c2b36dddb5c46be4d0b
Author: Pxl <[email protected]>
AuthorDate: Wed Nov 2 09:07:26 2022 +0800
[enhancement](profile) add profile to show column predicates (#13862)
---
be/src/olap/bloom_filter_predicate.h | 6 +++
be/src/olap/column_predicate.h | 51 ++++++++++++++++++++++++
be/src/olap/comparison_predicate.h | 6 +++
be/src/olap/in_list_predicate.h | 6 +++
be/src/olap/iterators.h | 2 +
be/src/olap/like_column_predicate.h | 5 +++
be/src/olap/null_predicate.h | 5 +++
be/src/olap/reader.h | 2 +
be/src/olap/rowset/beta_rowset_reader.h | 7 ++++
be/src/olap/rowset/rowset_reader.h | 2 +
be/src/olap/rowset/segment_v2/segment_iterator.h | 31 ++++++++++++++
be/src/vec/exec/scan/new_olap_scan_node.cpp | 6 +--
be/src/vec/exec/scan/new_olap_scanner.cpp | 13 +++---
be/src/vec/exec/scan/new_olap_scanner.h | 6 ++-
be/src/vec/olap/block_reader.h | 4 ++
be/src/vec/olap/vcollect_iterator.h | 23 +++++++++++
be/src/vec/olap/vgeneric_iterators.cpp | 14 +++++++
17 files changed, 180 insertions(+), 9 deletions(-)
diff --git a/be/src/olap/bloom_filter_predicate.h
b/be/src/olap/bloom_filter_predicate.h
index 8c2af710fa..3712833a8d 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -20,6 +20,7 @@
#include "exprs/bloomfilter_predicate.h"
#include "exprs/runtime_filter.h"
#include "olap/column_predicate.h"
+#include "runtime/primitive_type.h"
#include "vec/columns/column_dictionary.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_vector.h"
@@ -119,6 +120,11 @@ private:
return new_size;
}
+ std::string _debug_string() override {
+ std::string info = "BloomFilterColumnPredicate(" + type_to_string(T) +
")";
+ return info;
+ }
+
std::shared_ptr<BloomFilterFuncBase> _filter;
SpecificFilter* _specific_filter; // owned by _filter
mutable uint64_t _evaluated_rows = 1;
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index e20683c205..350d945f7c 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -47,6 +47,50 @@ enum class PredicateType {
BF = 11, // BloomFilter
};
+inline std::string type_to_string(PredicateType type) {
+ switch (type) {
+ case PredicateType::UNKNOWN:
+ return "UNKNOWN";
+
+ case PredicateType::EQ:
+ return "EQ";
+
+ case PredicateType::NE:
+ return "NE";
+
+ case PredicateType::LT:
+ return "LT";
+
+ case PredicateType::LE:
+ return "LE";
+
+ case PredicateType::GT:
+ return "GT";
+
+ case PredicateType::GE:
+ return "GE";
+
+ case PredicateType::IN_LIST:
+ return "IN_LIST";
+
+ case PredicateType::NOT_IN_LIST:
+ return "NOT_IN_LIST";
+
+ case PredicateType::IS_NULL:
+ return "IS_NULL";
+
+ case PredicateType::IS_NOT_NULL:
+ return "IS_NOT_NULL";
+
+ case PredicateType::BF:
+ return "BF";
+ default:
+ return "";
+ };
+
+ return "";
+}
+
struct PredicateTypeTraits {
static constexpr bool is_range(PredicateType type) {
return (type == PredicateType::LT || type == PredicateType::LE ||
@@ -121,7 +165,14 @@ public:
}
uint32_t column_id() const { return _column_id; }
+ virtual std::string debug_string() {
+ return _debug_string() + ", column_id=" + std::to_string(_column_id) +
+ ", opposite=" + (_opposite ? "true" : "false");
+ }
+
protected:
+ virtual std::string _debug_string() = 0;
+
uint32_t _column_id;
// TODO: the value is only in delete condition, better be template value
bool _opposite;
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 914b1989d3..f52a2082e7 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -601,6 +601,12 @@ private:
}
}
+ std::string _debug_string() override {
+ std::string info =
+ "ComparisonPredicateBase(" + type_to_string(Type) + ", " +
type_to_string(PT) + ")";
+ return info;
+ }
+
T _value;
static constexpr PrimitiveType EvalType = (Type == TYPE_CHAR ? TYPE_STRING
: Type);
};
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 33e440a5e6..32b0dc2fb0 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -536,6 +536,12 @@ private:
}
}
+ std::string _debug_string() override {
+ std::string info =
+ "InListPredicateBase(" + type_to_string(Type) + ", " +
type_to_string(PT) + ")";
+ return info;
+ }
+
phmap::flat_hash_set<T> _values;
mutable std::vector<vectorized::UInt8> _value_in_dict_flags;
T _min_value;
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index 4f12118c2c..15777226b8 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -136,6 +136,8 @@ public:
// Return the data id such as segment id, used for keep the insert order
when do
// merge sort in priority queue
virtual uint64_t data_id() const { return 0; }
+
+ virtual bool update_profile(RuntimeProfile* profile) { return false; }
};
} // namespace doris
diff --git a/be/src/olap/like_column_predicate.h
b/be/src/olap/like_column_predicate.h
index 0bb53c8119..85f30b5ab4 100644
--- a/be/src/olap/like_column_predicate.h
+++ b/be/src/olap/like_column_predicate.h
@@ -144,6 +144,11 @@ private:
}
}
+ std::string _debug_string() override {
+ std::string info = "LikeColumnPredicate";
+ return info;
+ }
+
std::string _origin;
// life time controlled by scan node
doris_udf::FunctionContext* _fn_ctx;
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index 7279f01259..7155036bce 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -81,6 +81,11 @@ public:
void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool*
flags) const override;
private:
+ std::string _debug_string() override {
+ std::string info = "NullPredicate(" + std::string(_is_null ? "is_null"
: "not_null") + ")";
+ return info;
+ }
+
bool _is_null; //true for null, false for not null
};
diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h
index 7c5dc32fcf..e3f501e063 100644
--- a/be/src/olap/reader.h
+++ b/be/src/olap/reader.h
@@ -147,6 +147,8 @@ public:
const OlapReaderStatistics& stats() const { return _stats; }
OlapReaderStatistics* mutable_stats() { return &_stats; }
+ virtual bool update_profile(RuntimeProfile* profile) { return false; }
+
protected:
friend class CollectIterator;
friend class vectorized::VCollectIterator;
diff --git a/be/src/olap/rowset/beta_rowset_reader.h
b/be/src/olap/rowset/beta_rowset_reader.h
index 5424722c16..0b0ef05fe0 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -65,6 +65,13 @@ public:
Status get_segment_num_rows(std::vector<uint32_t>* segment_num_rows)
override;
+ bool update_profile(RuntimeProfile* profile) override {
+ if (_iterator != nullptr) {
+ return _iterator->update_profile(profile);
+ }
+ return false;
+ }
+
private:
bool _should_push_down_value_predicates() const;
diff --git a/be/src/olap/rowset/rowset_reader.h
b/be/src/olap/rowset/rowset_reader.h
index eecf594254..f9a1acc0d8 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -74,6 +74,8 @@ public:
virtual Status get_segment_num_rows(std::vector<uint32_t>*
segment_num_rows) {
return Status::NotSupported("to be implemented");
}
+
+ virtual bool update_profile(RuntimeProfile* profile) = 0;
};
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 03d0c1d2fb..5a7a35773d 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -65,6 +65,37 @@ public:
bool is_lazy_materialization_read() const override { return
_lazy_materialization_read; }
uint64_t data_id() const override { return _segment->id(); }
+ bool update_profile(RuntimeProfile* profile) override {
+ if (_short_cir_eval_predicate.empty() &&
_pre_eval_block_predicate.empty()) {
+ if (_col_predicates.empty()) {
+ return false;
+ }
+
+ std::string info;
+ for (auto pred : _col_predicates) {
+ info += "\n" + pred->debug_string();
+ }
+ profile->add_info_string("ColumnPredicates", info);
+ } else {
+ if (!_short_cir_eval_predicate.empty()) {
+ std::string info;
+ for (auto pred : _short_cir_eval_predicate) {
+ info += "\n" + pred->debug_string();
+ }
+ profile->add_info_string("Short Circuit ColumnPredicates",
info);
+ }
+ if (!_pre_eval_block_predicate.empty()) {
+ std::string info;
+ for (auto pred : _pre_eval_block_predicate) {
+ info += "\n" + pred->debug_string();
+ }
+ profile->add_info_string("Pre Evaluate Block
ColumnPredicates", info);
+ }
+ }
+
+ return true;
+ }
+
private:
Status _init(bool is_vec = false);
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 5542a71b34..a4253fe1cc 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -310,9 +310,9 @@ Status
NewOlapScanNode::_init_scanners(std::list<VScanner*>* scanners) {
scanner_ranges.push_back((*ranges)[i].get());
}
- NewOlapScanner* scanner = new NewOlapScanner(_state, this,
_limit_per_scanner,
-
_olap_scan_node.is_preaggregation,
- _need_agg_finalize,
*scan_range);
+ NewOlapScanner* scanner = new NewOlapScanner(
+ _state, this, _limit_per_scanner,
_olap_scan_node.is_preaggregation,
+ _need_agg_finalize, *scan_range, _scanner_profile.get());
// add scanner to pool before doing prepare.
// so that scanner can be automatically deconstructed if prepare
failed.
_scanner_pool.add(scanner);
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index ec33346b04..ed82933178 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -25,11 +25,12 @@ namespace doris::vectorized {
NewOlapScanner::NewOlapScanner(RuntimeState* state, NewOlapScanNode* parent,
int64_t limit,
bool aggregation, bool need_agg_finalize,
- const TPaloScanRange& scan_range)
+ const TPaloScanRange& scan_range,
RuntimeProfile* profile)
: VScanner(state, static_cast<VScanNode*>(parent), limit),
_aggregation(aggregation),
_need_agg_finalize(need_agg_finalize),
- _version(-1) {
+ _version(-1),
+ _profile(profile) {
_tablet_schema = std::make_shared<TabletSchema>();
}
@@ -121,6 +122,7 @@ Status NewOlapScanner::open(RuntimeState* state) {
<< ", backend=" << BackendOptions::get_localhost();
return Status::InternalError(ss.str());
}
+
return Status::OK();
}
@@ -299,6 +301,9 @@ Status NewOlapScanner::_init_return_columns(bool
need_seq_col) {
}
Status NewOlapScanner::_get_block_impl(RuntimeState* state, Block* block,
bool* eof) {
+ if (!_profile_updated) {
+ _profile_updated = _tablet_reader->update_profile(_profile);
+ }
// Read one block from block reader
// ATTN: Here we need to let the _get_block_impl method guarantee the
semantics of the interface,
// that is, eof can be set to true only when the returned block is empty.
@@ -342,9 +347,7 @@ void NewOlapScanner::_update_realtime_counters() {
}
void NewOlapScanner::_update_counters_before_close() {
- if (!_state->enable_profile()) return;
-
- if (_has_updated_counter) {
+ if (!_state->enable_profile() || _has_updated_counter) {
return;
}
_has_updated_counter = true;
diff --git a/be/src/vec/exec/scan/new_olap_scanner.h
b/be/src/vec/exec/scan/new_olap_scanner.h
index 6b07438bf1..899a0878e1 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.h
+++ b/be/src/vec/exec/scan/new_olap_scanner.h
@@ -21,6 +21,7 @@
#include "exprs/bloomfilter_predicate.h"
#include "exprs/function_filter.h"
#include "olap/reader.h"
+#include "util/runtime_profile.h"
#include "vec/exec/scan/vscanner.h"
namespace doris {
@@ -34,7 +35,8 @@ class NewOlapScanNode;
class NewOlapScanner : public VScanner {
public:
NewOlapScanner(RuntimeState* state, NewOlapScanNode* parent, int64_t
limit, bool aggregation,
- bool need_agg_finalize, const TPaloScanRange& scan_range);
+ bool need_agg_finalize, const TPaloScanRange& scan_range,
+ RuntimeProfile* profile);
Status open(RuntimeState* state) override;
@@ -81,6 +83,8 @@ private:
// ========= profiles ==========
int64_t _compressed_bytes_read = 0;
int64_t _raw_rows_read = 0;
+ RuntimeProfile* _profile;
+ bool _profile_updated = false;
};
} // namespace vectorized
} // namespace doris
diff --git a/be/src/vec/olap/block_reader.h b/be/src/vec/olap/block_reader.h
index 682caed065..356a5a1ab4 100644
--- a/be/src/vec/olap/block_reader.h
+++ b/be/src/vec/olap/block_reader.h
@@ -47,6 +47,10 @@ public:
std::vector<RowLocation> current_block_row_locations() { return
_block_row_locations; }
+ bool update_profile(RuntimeProfile* profile) override {
+ return _vcollect_iter.update_profile(profile);
+ }
+
ColumnPredicate* _parse_to_predicate(const FunctionFilter&
function_filter) override;
private:
diff --git a/be/src/vec/olap/vcollect_iterator.h
b/be/src/vec/olap/vcollect_iterator.h
index 69ef3da70a..815752f28c 100644
--- a/be/src/vec/olap/vcollect_iterator.h
+++ b/be/src/vec/olap/vcollect_iterator.h
@@ -64,6 +64,13 @@ public:
return _inner_iter->current_block_row_locations(block_row_locations);
}
+ bool update_profile(RuntimeProfile* profile) {
+ if (_inner_iter != nullptr) {
+ return _inner_iter->update_profile(profile);
+ }
+ return false;
+ }
+
private:
// This interface is the actual implementation of the new version of
iterator.
// It currently contains two implementations, one is Level0Iterator,
@@ -101,6 +108,8 @@ private:
virtual Status current_block_row_locations(std::vector<RowLocation>*
row_location) = 0;
+ virtual bool update_profile(RuntimeProfile* profile) = 0;
+
protected:
const TabletSchema& _schema;
IteratorRowRef _ref;
@@ -148,6 +157,13 @@ private:
Status current_block_row_locations(std::vector<RowLocation>*
block_row_locations) override;
+ bool update_profile(RuntimeProfile* profile) override {
+ if (_rs_reader != nullptr) {
+ return _rs_reader->update_profile(profile);
+ }
+ return false;
+ }
+
private:
Status _refresh_current_row();
Status _next_by_ref(IteratorRowRef* ref);
@@ -219,6 +235,13 @@ private:
~Level1Iterator() override;
+ bool update_profile(RuntimeProfile* profile) override {
+ if (_cur_child != nullptr) {
+ return _cur_child->update_profile(profile);
+ }
+ return false;
+ }
+
private:
Status _merge_next(IteratorRowRef* ref);
diff --git a/be/src/vec/olap/vgeneric_iterators.cpp
b/be/src/vec/olap/vgeneric_iterators.cpp
index e64668a838..bc74ab0862 100644
--- a/be/src/vec/olap/vgeneric_iterators.cpp
+++ b/be/src/vec/olap/vgeneric_iterators.cpp
@@ -453,6 +453,13 @@ public:
return Status::OK();
}
+ bool update_profile(RuntimeProfile* profile) override {
+ if (!_origin_iters.empty()) {
+ return (*_origin_iters.begin())->update_profile(profile);
+ }
+ return false;
+ }
+
private:
int _get_size(Block* block) { return block->rows(); }
int _get_size(BlockView* block_view) { return block_view->size(); }
@@ -590,6 +597,13 @@ public:
Status current_block_row_locations(std::vector<RowLocation>* locations)
override;
+ bool update_profile(RuntimeProfile* profile) override {
+ if (_cur_iter != nullptr) {
+ return _cur_iter->update_profile(profile);
+ }
+ return false;
+ }
+
private:
const Schema* _schema = nullptr;
RowwiseIterator* _cur_iter = nullptr;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]