xiaokang commented on code in PR #28494:
URL: https://github.com/apache/doris/pull/28494#discussion_r1429322807


##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2570,5 +2573,64 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
     }
 }
 
+bool SegmentIterator::_need_read_pk_data(ColumnId cid, 
vectorized::MutableColumnPtr& column,
+                                         size_t nrows_read) {
+    if (_opts.tablet_schema->keys_type() != KeysType::DUP_KEYS) {

Review Comment:
   Is it OK for MOW?



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2570,5 +2573,64 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
     }
 }
 
+bool SegmentIterator::_need_read_pk_data(ColumnId cid, 
vectorized::MutableColumnPtr& column,
+                                         size_t nrows_read) {
+    if (_opts.tablet_schema->keys_type() != KeysType::DUP_KEYS) {
+        return false;
+    }
+
+    if (_opts.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) {
+        return false;
+    }
+
+    if (!_opts.tablet_schema->column(cid).is_key()) {
+        return false;
+    }
+
+    // Whether the primary key index is an exact query
+    {
+        auto get_max_datetime_string = [](FieldType filed_type) {
+            std::string res;
+            if (filed_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2) {
+                auto max = 
type_limit<PrimitiveTypeTraits<TYPE_DATETIMEV2>::CppType>::max();
+                res.resize(30);
+                max.to_string(res.data());
+            } else if (filed_type == FieldType::OLAP_FIELD_TYPE_INT) {
+                auto max = 
type_limit<PrimitiveTypeTraits<TYPE_INT>::CppType>::max();
+                res = boost::lexical_cast<std::string>(max);
+            }
+            return res;
+        };
+
+        // It is not an exact query if the following two conditions are met:
+        // 1. lower is null
+        // 2. upper is not null and is the maximum value
+        for (auto& key_range : _opts.key_ranges) {
+            auto lower_tuple = key_range.lower_key->to_tuple();
+            auto upper_tuple = key_range.upper_key->to_tuple();
+            for (size_t cid = 0; cid < lower_tuple.size(); cid++) {
+                if (lower_tuple.is_null(cid) && !upper_tuple.is_null(cid)) {
+                    const auto* field = 
key_range.upper_key->column_schema(cid);
+                    auto pk_max = get_max_datetime_string(field->type());
+                    const auto& pk_value = upper_tuple.get_value(cid);
+                    if (pk_max != pk_value) {
+                        return false;
+                    }
+                }
+            }
+        }
+    }
+
+    if (column->is_nullable()) {

Review Comment:
   Do you mean use defaults for not read data?



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2570,5 +2573,64 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
     }
 }
 
+bool SegmentIterator::_need_read_pk_data(ColumnId cid, 
vectorized::MutableColumnPtr& column,
+                                         size_t nrows_read) {
+    if (_opts.tablet_schema->keys_type() != KeysType::DUP_KEYS) {
+        return false;
+    }
+
+    if (_opts.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) {
+        return false;
+    }
+
+    if (!_opts.tablet_schema->column(cid).is_key()) {

Review Comment:
   If the table key is k1,k2,k3, the column for cid is k2, is it right?



##########
be/src/olap/rowset/segment_v2/segment_iterator.cpp:
##########
@@ -2570,5 +2573,64 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
     }
 }
 
+bool SegmentIterator::_need_read_pk_data(ColumnId cid, 
vectorized::MutableColumnPtr& column,
+                                         size_t nrows_read) {
+    if (_opts.tablet_schema->keys_type() != KeysType::DUP_KEYS) {
+        return false;
+    }
+
+    if (_opts.push_down_agg_type_opt != TPushAggOp::COUNT_ON_INDEX) {
+        return false;
+    }
+
+    if (!_opts.tablet_schema->column(cid).is_key()) {
+        return false;
+    }
+
+    // Whether the primary key index is an exact query
+    {
+        auto get_max_datetime_string = [](FieldType filed_type) {
+            std::string res;
+            if (filed_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2) {
+                auto max = 
type_limit<PrimitiveTypeTraits<TYPE_DATETIMEV2>::CppType>::max();
+                res.resize(30);
+                max.to_string(res.data());
+            } else if (filed_type == FieldType::OLAP_FIELD_TYPE_INT) {
+                auto max = 
type_limit<PrimitiveTypeTraits<TYPE_INT>::CppType>::max();
+                res = boost::lexical_cast<std::string>(max);
+            }
+            return res;
+        };
+
+        // It is not an exact query if the following two conditions are met:
+        // 1. lower is null
+        // 2. upper is not null and is the maximum value
+        for (auto& key_range : _opts.key_ranges) {
+            auto lower_tuple = key_range.lower_key->to_tuple();
+            auto upper_tuple = key_range.upper_key->to_tuple();
+            for (size_t cid = 0; cid < lower_tuple.size(); cid++) {
+                if (lower_tuple.is_null(cid) && !upper_tuple.is_null(cid)) {
+                    const auto* field = 
key_range.upper_key->column_schema(cid);
+                    auto pk_max = get_max_datetime_string(field->type());
+                    const auto& pk_value = upper_tuple.get_value(cid);
+                    if (pk_max != pk_value) {

Review Comment:
   It is different from the comment.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to