github-actions[bot] commented on code in PR #26220:
URL: https://github.com/apache/doris/pull/26220#discussion_r1379798672


##########
be/src/olap/rowset/segment_v2/segment.h:
##########
@@ -125,6 +138,34 @@ class Segment : public 
std::enable_shared_from_this<Segment> {
 
     int64_t meta_mem_usage() const { return _meta_mem_usage; }
 
+    // Get the inner file column's data type
+    // ignore_chidren set to false will treat field as variant
+    // when it contains children with field paths
+    std::shared_ptr<const vectorized::IDataType> get_data_type_of(const Field& 
filed,

Review Comment:
   warning: function 'get_data_type_of' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] std::shared_ptr<const vectorized::IDataType> 
get_data_type_of(const Field& filed,
   ```
   



##########
be/src/olap/tablet_schema.h:
##########
@@ -79,6 +81,7 @@ class TabletColumn {
     bool is_bf_column() const { return _is_bf_column; }
     bool has_bitmap_index() const { return _has_bitmap_index; }
     bool is_array_type() const { return _type == 
FieldType::OLAP_FIELD_TYPE_ARRAY; }
+    bool is_jsonb_type() const { return _type == 
FieldType::OLAP_FIELD_TYPE_JSONB; }

Review Comment:
   warning: function 'is_jsonb_type' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool is_jsonb_type() const { return _type == 
FieldType::OLAP_FIELD_TYPE_JSONB; }
   ```
   



##########
be/src/olap/rowset/segment_v2/segment.h:
##########
@@ -125,6 +138,34 @@
 
     int64_t meta_mem_usage() const { return _meta_mem_usage; }
 
+    // Get the inner file column's data type
+    // ignore_chidren set to false will treat field as variant
+    // when it contains children with field paths
+    std::shared_ptr<const vectorized::IDataType> get_data_type_of(const Field& 
filed,
+                                                                  bool 
ignore_children) const;
+
+    // Check is schema read type equals storage column type
+    bool same_with_storage_type(int32_t cid, const Schema& schema, bool 
ignore_children) const;

Review Comment:
   warning: function 'same_with_storage_type' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool same_with_storage_type(int32_t cid, const Schema& 
schema, bool ignore_children) const;
   ```
   



##########
be/src/olap/rowset/segment_v2/segment_iterator.h:
##########
@@ -223,16 +226,46 @@ class SegmentIterator : public RowwiseIterator {
                                                  uint16_t* sel_rowid_idx, 
size_t select_size,
                                                  vectorized::MutableColumns* 
mutable_columns);
 
+    Status copy_column_data_by_selector(vectorized::IColumn* input_col_ptr,
+                                        vectorized::MutableColumnPtr& 
output_col,
+                                        uint16_t* sel_rowid_idx, uint16_t 
select_size,
+                                        size_t batch_size);
+
     template <class Container>
     [[nodiscard]] Status _output_column_by_sel_idx(vectorized::Block* block,
                                                    const Container& column_ids,
                                                    uint16_t* sel_rowid_idx, 
uint16_t select_size) {
         SCOPED_RAW_TIMER(&_opts.stats->output_col_ns);
         for (auto cid : column_ids) {
             int block_cid = _schema_block_id_map[cid];
-            
RETURN_IF_ERROR(block->copy_column_data_to_block(_current_return_columns[cid].get(),
-                                                             sel_rowid_idx, 
select_size, block_cid,
+            // Only the additional deleted filter condition need to 
materialize column be at the end of the block
+            // We should not to materialize the column of query engine do not 
need. So here just return OK.
+            // Eg:
+            //      `delete from table where a = 10;`
+            //      `select b from table;`
+            // a column only effective in segment iterator, the block from 
query engine only contain the b column.
+            // so the `block_cid >= data.size()` is true
+            if (block_cid >= block->columns()) {
+                continue;
+            }
+            vectorized::DataTypePtr storage_type =
+                    _segment->get_data_type_of(*_schema->column(cid), false);
+            if (storage_type && 
!storage_type->equals(*block->get_by_position(block_cid).type)) {
+                // Do additional cast
+                vectorized::MutableColumnPtr tmp = 
storage_type->create_column();

Review Comment:
   warning: variable 'tmp' is not initialized [cppcoreguidelines-init-variables]
   
   ```suggestion
                   vectorized::MutableColumnPtr tmp = 0 = 
storage_type->create_column();
   ```
   



##########
be/src/olap/tablet_schema.h:
##########
@@ -203,16 +207,15 @@
 
         return 0;
     }
-    TabletIndex(const TabletIndex& other) {
-        _index_id = other._index_id;
-        _index_name = other._index_name;
-        _index_type = other._index_type;
-        _col_unique_ids = other._col_unique_ids;
-        _properties = other._properties;
-    }
+
+    const std::string& get_escaped_index_suffix_path() const { return 
_escaped_index_suffix_path; }

Review Comment:
   warning: function 'get_escaped_index_suffix_path' should be marked 
[[nodiscard]] [modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] const std::string& get_escaped_index_suffix_path() const { 
return _escaped_index_suffix_path; }
   ```
   



##########
be/src/olap/tablet_schema.h:
##########
@@ -282,10 +287,12 @@
     segment_v2::CompressionTypePB compression_type() const { return 
_compression_type; }
 
     const std::vector<TabletIndex>& indexes() const { return _indexes; }
-    std::vector<const TabletIndex*> get_indexes_for_column(int32_t 
col_unique_id) const;
-    bool has_inverted_index(int32_t col_unique_id) const;
-    bool has_inverted_index_with_index_id(int32_t index_id) const;
-    const TabletIndex* get_inverted_index(int32_t col_unique_id) const;
+    std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& 
col) const;
+    bool has_inverted_index(const TabletColumn& col) const;
+    bool has_inverted_index_with_index_id(int32_t index_id, const std::string& 
suffix_path) const;
+    const TabletIndex* get_inverted_index(const TabletColumn& col) const;
+    const TabletIndex* get_inverted_index(int32_t col_unique_id,

Review Comment:
   warning: function 'get_inverted_index' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] const TabletIndex* get_inverted_index(int32_t 
col_unique_id,
   ```
   



##########
be/src/vec/columns/column.h:
##########
@@ -654,6 +660,18 @@
         return 0;
     }
 
+    /// Returns ratio of values in column, that are equal to default value of 
column.
+    /// Checks only @sample_ratio ratio of rows.
+    virtual double get_ratio_of_default_rows(double sample_ratio = 1.0) const {

Review Comment:
   warning: function 'get_ratio_of_default_rows' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] virtual double get_ratio_of_default_rows(double 
sample_ratio = 1.0) const {
   ```
   



##########
be/src/vec/columns/column.h:
##########
@@ -603,6 +607,8 @@
 
     virtual bool is_hll() const { return false; }
 
+    virtual bool is_variant() const { return false; }

Review Comment:
   warning: function 'is_variant' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] virtual bool is_variant() const { return false; }
   ```
   



##########
be/src/udf/udf.h:
##########
@@ -79,6 +79,22 @@ class FunctionContext {
         return _check_overflow_for_decimal = check_overflow_for_decimal;
     }
 
+    void set_string_as_jsonb_string(bool string_as_jsonb_string) {
+        _string_as_jsonb_string = string_as_jsonb_string;
+    }
+
+    void set_jsonb_string_as_string(bool jsonb_string_as_string) {
+        _jsonb_string_as_string = jsonb_string_as_string;
+    }
+
+    // Cast flag, when enable string_as_jsonb_string, string casting to jsonb 
will not parse string
+    // instead just insert a string literal
+    bool string_as_jsonb_string() const { return _string_as_jsonb_string; }

Review Comment:
   warning: function 'string_as_jsonb_string' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool string_as_jsonb_string() const { return 
_string_as_jsonb_string; }
   ```
   



##########
be/src/vec/columns/column_dictionary.h:
##########
@@ -114,14 +114,11 @@ class ColumnDictionary final : public COWHelper<IColumn, 
ColumnDictionary<T>> {
 
     void reserve(size_t n) override { _codes.reserve(n); }
 
-    [[noreturn]] TypeIndex get_data_type() const override {
-        LOG(FATAL) << "ColumnDictionary get_data_type not implemeted";
-    }
-
     const char* get_family_name() const override { return "ColumnDictionary"; }
 
-    [[noreturn]] MutableColumnPtr clone_resized(size_t size) const override {
-        LOG(FATAL) << "clone_resized not supported in ColumnDictionary";
+    MutableColumnPtr clone_resized(size_t size) const override {

Review Comment:
   warning: function 'clone_resized' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] MutableColumnPtr clone_resized(size_t size) const override 
{
   ```
   



##########
be/src/vec/columns/column_impl.h:
##########
@@ -86,4 +86,33 @@
     }
 }
 
+template <typename Derived>
+double IColumn::get_ratio_of_default_rows_impl(double sample_ratio) const {
+    if (sample_ratio <= 0.0 || sample_ratio > 1.0) {
+        LOG(FATAL) << "Value of 'sample_ratio' must be in interval (0.0; 1.0], 
but got: "
+                   << sample_ratio;
+    }
+    static constexpr auto max_number_of_rows_for_full_search = 1000;
+    size_t num_rows = size();
+    size_t num_sampled_rows = std::min(static_cast<size_t>(num_rows * 
sample_ratio), num_rows);
+    size_t num_checked_rows = 0;
+    size_t res = 0;
+    if (num_sampled_rows == num_rows || num_rows <= 
max_number_of_rows_for_full_search) {
+        for (size_t i = 0; i < num_rows; ++i)
+            res += static_cast<const Derived&>(*this).is_default_at(i);

Review Comment:
   warning: statement should be inside braces 
[readability-braces-around-statements]
   
   ```suggestion
           for (size_t i = 0; i < num_rows; ++i) {
               res += static_cast<const Derived&>(*this).is_default_at(i);
   }
   ```
   



##########
be/src/olap/tablet_schema.h:
##########
@@ -282,10 +287,12 @@
     segment_v2::CompressionTypePB compression_type() const { return 
_compression_type; }
 
     const std::vector<TabletIndex>& indexes() const { return _indexes; }
-    std::vector<const TabletIndex*> get_indexes_for_column(int32_t 
col_unique_id) const;
-    bool has_inverted_index(int32_t col_unique_id) const;
-    bool has_inverted_index_with_index_id(int32_t index_id) const;
-    const TabletIndex* get_inverted_index(int32_t col_unique_id) const;
+    std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& 
col) const;

Review Comment:
   warning: function 'get_indexes_for_column' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] std::vector<const TabletIndex*> 
get_indexes_for_column(const TabletColumn& col) const;
   ```
   



##########
be/src/vec/columns/column_nothing.h:
##########
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/ColumnNothing.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_dummy.h"
+
+namespace doris::vectorized {
+
+class ColumnNothing final : public COWHelper<IColumnDummy, ColumnNothing> {
+private:
+    friend class COWHelper<IColumnDummy, ColumnNothing>;
+
+    ColumnNothing(size_t s_) { s = s_; }
+
+    ColumnNothing(const ColumnNothing&) = default;
+
+public:
+    const char* get_family_name() const override { return "Nothing"; }

Review Comment:
   warning: function 'get_family_name' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] const char* get_family_name() const override { return 
"Nothing"; }
   ```
   



##########
be/src/vec/columns/column_nothing.h:
##########
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/ColumnNothing.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_dummy.h"
+
+namespace doris::vectorized {
+
+class ColumnNothing final : public COWHelper<IColumnDummy, ColumnNothing> {
+private:
+    friend class COWHelper<IColumnDummy, ColumnNothing>;
+
+    ColumnNothing(size_t s_) { s = s_; }
+
+    ColumnNothing(const ColumnNothing&) = default;
+
+public:
+    const char* get_family_name() const override { return "Nothing"; }
+    MutableColumnPtr clone_dummy(size_t s_) const override { return 
ColumnNothing::create(s_); }

Review Comment:
   warning: function 'clone_dummy' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] MutableColumnPtr clone_dummy(size_t s_) const override { 
return ColumnNothing::create(s_); }
   ```
   



##########
be/src/vec/columns/column_nothing.h:
##########
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/ColumnNothing.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_dummy.h"
+
+namespace doris::vectorized {
+
+class ColumnNothing final : public COWHelper<IColumnDummy, ColumnNothing> {
+private:
+    friend class COWHelper<IColumnDummy, ColumnNothing>;
+
+    ColumnNothing(size_t s_) { s = s_; }
+
+    ColumnNothing(const ColumnNothing&) = default;
+
+public:
+    const char* get_family_name() const override { return "Nothing"; }
+    MutableColumnPtr clone_dummy(size_t s_) const override { return 
ColumnNothing::create(s_); }
+
+    bool can_be_inside_nullable() const override { return true; }

Review Comment:
   warning: function 'can_be_inside_nullable' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool can_be_inside_nullable() const override { return 
true; }
   ```
   



##########
be/src/olap/rowset/segment_v2/segment_iterator.h:
##########
@@ -223,16 +226,46 @@
                                                  uint16_t* sel_rowid_idx, 
size_t select_size,
                                                  vectorized::MutableColumns* 
mutable_columns);
 
+    Status copy_column_data_by_selector(vectorized::IColumn* input_col_ptr,
+                                        vectorized::MutableColumnPtr& 
output_col,
+                                        uint16_t* sel_rowid_idx, uint16_t 
select_size,
+                                        size_t batch_size);
+
     template <class Container>
     [[nodiscard]] Status _output_column_by_sel_idx(vectorized::Block* block,
                                                    const Container& column_ids,
                                                    uint16_t* sel_rowid_idx, 
uint16_t select_size) {
         SCOPED_RAW_TIMER(&_opts.stats->output_col_ns);
         for (auto cid : column_ids) {
             int block_cid = _schema_block_id_map[cid];
-            
RETURN_IF_ERROR(block->copy_column_data_to_block(_current_return_columns[cid].get(),
-                                                             sel_rowid_idx, 
select_size, block_cid,
+            // Only the additional deleted filter condition need to 
materialize column be at the end of the block
+            // We should not to materialize the column of query engine do not 
need. So here just return OK.
+            // Eg:
+            //      `delete from table where a = 10;`
+            //      `select b from table;`
+            // a column only effective in segment iterator, the block from 
query engine only contain the b column.
+            // so the `block_cid >= data.size()` is true
+            if (block_cid >= block->columns()) {
+                continue;
+            }
+            vectorized::DataTypePtr storage_type =
+                    _segment->get_data_type_of(*_schema->column(cid), false);
+            if (storage_type && 
!storage_type->equals(*block->get_by_position(block_cid).type)) {
+                // Do additional cast
+                vectorized::MutableColumnPtr tmp = 
storage_type->create_column();
+                
RETURN_IF_ERROR(copy_column_data_by_selector(_current_return_columns[cid].get(),
+                                                             tmp, 
sel_rowid_idx, select_size,
                                                              
_opts.block_row_max));
+                RETURN_IF_ERROR(vectorized::schema_util::cast_column(
+                        {tmp->get_ptr(), storage_type, ""}, 
block->get_by_position(block_cid).type,
+                        &block->get_by_position(block_cid).column));
+            } else {
+                vectorized::MutableColumnPtr output_column =

Review Comment:
   warning: variable 'output_column' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
                   vectorized::MutableColumnPtr output_column = 0 =
   ```
   



##########
be/src/olap/tablet_schema.h:
##########
@@ -282,10 +287,12 @@
     segment_v2::CompressionTypePB compression_type() const { return 
_compression_type; }
 
     const std::vector<TabletIndex>& indexes() const { return _indexes; }
-    std::vector<const TabletIndex*> get_indexes_for_column(int32_t 
col_unique_id) const;
-    bool has_inverted_index(int32_t col_unique_id) const;
-    bool has_inverted_index_with_index_id(int32_t index_id) const;
-    const TabletIndex* get_inverted_index(int32_t col_unique_id) const;
+    std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& 
col) const;
+    bool has_inverted_index(const TabletColumn& col) const;
+    bool has_inverted_index_with_index_id(int32_t index_id, const std::string& 
suffix_path) const;
+    const TabletIndex* get_inverted_index(const TabletColumn& col) const;

Review Comment:
   warning: function 'get_inverted_index' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] const TabletIndex* get_inverted_index(const TabletColumn& 
col) const;
   ```
   



##########
be/src/vec/columns/column.h:
##########
@@ -144,15 +144,19 @@ class IColumn : public COW<IColumn> {
         return nullptr;
     }
 
+    /// Some columns may require finalization before using of other operations.
+    virtual void finalize() {}
+
+    MutablePtr clone_finalized() const {

Review Comment:
   warning: function 'clone_finalized' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] MutablePtr clone_finalized() const {
   ```
   



##########
be/src/olap/tablet_schema.h:
##########
@@ -282,10 +287,12 @@
     segment_v2::CompressionTypePB compression_type() const { return 
_compression_type; }
 
     const std::vector<TabletIndex>& indexes() const { return _indexes; }
-    std::vector<const TabletIndex*> get_indexes_for_column(int32_t 
col_unique_id) const;
-    bool has_inverted_index(int32_t col_unique_id) const;
-    bool has_inverted_index_with_index_id(int32_t index_id) const;
-    const TabletIndex* get_inverted_index(int32_t col_unique_id) const;
+    std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& 
col) const;
+    bool has_inverted_index(const TabletColumn& col) const;

Review Comment:
   warning: function 'has_inverted_index' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool has_inverted_index(const TabletColumn& col) const;
   ```
   



##########
be/src/udf/udf.h:
##########
@@ -79,6 +79,22 @@
         return _check_overflow_for_decimal = check_overflow_for_decimal;
     }
 
+    void set_string_as_jsonb_string(bool string_as_jsonb_string) {
+        _string_as_jsonb_string = string_as_jsonb_string;
+    }
+
+    void set_jsonb_string_as_string(bool jsonb_string_as_string) {
+        _jsonb_string_as_string = jsonb_string_as_string;
+    }
+
+    // Cast flag, when enable string_as_jsonb_string, string casting to jsonb 
will not parse string
+    // instead just insert a string literal
+    bool string_as_jsonb_string() const { return _string_as_jsonb_string; }
+
+    // Cast flag, when enable jsonb_string_as_string, jsonb string casting to 
string will not parse string
+    // instead just insert a string literal
+    bool jsonb_string_as_string() const { return _jsonb_string_as_string; }

Review Comment:
   warning: function 'jsonb_string_as_string' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool jsonb_string_as_string() const { return 
_jsonb_string_as_string; }
   ```
   



##########
be/src/olap/tablet_schema.h:
##########
@@ -282,10 +287,12 @@
     segment_v2::CompressionTypePB compression_type() const { return 
_compression_type; }
 
     const std::vector<TabletIndex>& indexes() const { return _indexes; }
-    std::vector<const TabletIndex*> get_indexes_for_column(int32_t 
col_unique_id) const;
-    bool has_inverted_index(int32_t col_unique_id) const;
-    bool has_inverted_index_with_index_id(int32_t index_id) const;
-    const TabletIndex* get_inverted_index(int32_t col_unique_id) const;
+    std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& 
col) const;
+    bool has_inverted_index(const TabletColumn& col) const;
+    bool has_inverted_index_with_index_id(int32_t index_id, const std::string& 
suffix_path) const;

Review Comment:
   warning: function 'has_inverted_index_with_index_id' should be marked 
[[nodiscard]] [modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool has_inverted_index_with_index_id(int32_t index_id, 
const std::string& suffix_path) const;
   ```
   



##########
be/src/vec/columns/column.h:
##########
@@ -654,6 +660,18 @@
         return 0;
     }
 
+    /// Returns ratio of values in column, that are equal to default value of 
column.
+    /// Checks only @sample_ratio ratio of rows.
+    virtual double get_ratio_of_default_rows(double sample_ratio = 1.0) const {
+        LOG(FATAL) << fmt::format("get_ratio_of_default_rows of column {} are 
not implemented.",
+                                  get_name());
+        return 0.0;
+    }
+
+    /// Template is to devirtualize calls to 'isDefaultAt' method.
+    template <typename Derived>
+    double get_ratio_of_default_rows_impl(double sample_ratio) const;

Review Comment:
   warning: function 'get_ratio_of_default_rows_impl' should be marked 
[[nodiscard]] [modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] double get_ratio_of_default_rows_impl(double sample_ratio) 
const;
   ```
   



##########
be/src/vec/columns/column_const.h:
##########
@@ -130,6 +128,8 @@ class ColumnConst final : public COWHelper<IColumn, 
ColumnConst> {
 
     void pop_back(size_t n) override { s -= n; }
 
+    bool can_be_inside_nullable() const override { return true; }

Review Comment:
   warning: function 'can_be_inside_nullable' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] bool can_be_inside_nullable() const override { return 
true; }
   ```
   



##########
be/src/vec/columns/column_impl.h:
##########
@@ -86,4 +86,33 @@ void 
IColumn::get_indices_of_non_default_rows_impl(IColumn::Offsets64& indices,
     }
 }
 
+template <typename Derived>
+double IColumn::get_ratio_of_default_rows_impl(double sample_ratio) const {
+    if (sample_ratio <= 0.0 || sample_ratio > 1.0) {
+        LOG(FATAL) << "Value of 'sample_ratio' must be in interval (0.0; 1.0], 
but got: "
+                   << sample_ratio;
+    }
+    static constexpr auto max_number_of_rows_for_full_search = 1000;
+    size_t num_rows = size();
+    size_t num_sampled_rows = std::min(static_cast<size_t>(num_rows * 
sample_ratio), num_rows);

Review Comment:
   warning: variable 'num_sampled_rows' is not initialized 
[cppcoreguidelines-init-variables]
   
   ```suggestion
       size_t num_sampled_rows = 0 = std::min(static_cast<size_t>(num_rows * 
sample_ratio), num_rows);
   ```
   



##########
be/src/vec/columns/column_array.h:
##########
@@ -256,6 +255,8 @@ class ColumnArray final : public COWHelper<IColumn, 
ColumnArray> {
 
     ColumnPtr index(const IColumn& indexes, size_t limit) const override;
 
+    double get_ratio_of_default_rows(double sample_ratio) const override;

Review Comment:
   warning: function 'get_ratio_of_default_rows' should be marked [[nodiscard]] 
[modernize-use-nodiscard]
   
   ```suggestion
       [[nodiscard]] double get_ratio_of_default_rows(double sample_ratio) 
const override;
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to