github-actions[bot] commented on code in PR #38216:
URL: https://github.com/apache/doris/pull/38216#discussion_r1686444046


##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -721,110 +784,136 @@ Status 
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
     return Status::OK();
 }
 
-Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& 
mutable_full_columns,
-                                           const std::vector<bool>& 
use_default_or_null_flag,
-                                           bool has_default_or_nullable,
-                                           const size_t& segment_start_pos,
-                                           const vectorized::Block* block) {
+void SegmentWriter::calc_indicator_maps(uint32_t row_pos, uint32_t num_rows,
+                                        const IndicatorMapsVertical& 
indicator_maps_vertical) {
+    _indicator_maps = std::make_shared<std::map<uint32_t, 
std::vector<uint32_t>>>();
+    for (auto [cid, indicator_map] : indicator_maps_vertical) {
+        for (uint32_t pos = row_pos; pos < row_pos + num_rows; pos++) {
+            if (indicator_map != nullptr && indicator_map[pos] != 0) {
+                (*_indicator_maps)[pos].emplace_back(cid);
+            }
+        }
+    }
+}
+
+// Consider a merge-on-write unique table with colums [k1, k2, v1, v1, v2, v3, 
v4, v5] where k1, k2 are key columns
+// and v1, v2, v3, v4, v5 are value columns. The table has the following data:
+//  k1|k2|v1|v2|v3|v4|v5
+//  1 |1 |1 |1 |1 |1 |1
+//  2 |2 |2 |2 |2 |2 |2
+//  3 |3 |3 |3 |3 |3 |3
+//  4 |4 |4 |4 |4 |4 |4
+//  5 |5 |5 |5 |5 |5 |5
+// The user inserts the following data for partial update. Charactor `?` means 
that the cell is filled
+// with indicator value(currently we use null as indicator value).
+// row_num   k1|k2|v1|v2|v3
+// 1         1 |1 |10|10|10
+// 2         2 |2 |? |20|20
+// 3         3 |3 |30|30|?
+// 4         4 |4 |40|40|40
+// 5         5 |5 |50|? |50
+// Here, full_columns = [k1, k2, v1, v2, v3, v4, v5]
+//       old_full_read_columns = [v4, v5], the old values from the previous 
rows will be read into these columns.
+//       old_point_read_columns = [v1, v2, v3], the old values from the 
previous rows will be read into these columns if the correspoding columns in 
the input block has cell with indicator value.
+// Becase the column is immutable, filled_including_value_columns will store 
the data merged from
+// the original input block and old_point_read_columns. After the insertion, 
the data in the table will be:
+//  k1|k2|v1|v2|v3|v4|v5
+//  1 |1 |10|10|10|1 |1
+//  2 |2 |2 |20|20|2 |2
+//  3 |3 |30|30|3 |3 |3
+//  4 |4 |40|40|40|4 |4
+//  5 |5 |50|5 |50|5 |5
+Status SegmentWriter::fill_missing_columns(

Review Comment:
   warning: function 'fill_missing_columns' has cognitive complexity of 69 
(threshold 50) [readability-function-cognitive-complexity]
   ```cpp
   Status SegmentWriter::fill_missing_columns(
                         ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:831:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (config::is_cloud_mode()) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:850:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (is_unique_key_replace_if_not_null) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:851:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(_tablet->read_columns_by_plan(
           ^
   ```
   **be/src/common/status.h:618:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:851:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(_tablet->read_columns_by_plan(
           ^
   ```
   **be/src/common/status.h:620:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:855:** +1, nesting level 
increased to 1
   ```cpp
       } else {
         ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:856:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           RETURN_IF_ERROR(_tablet->read_columns_by_plan(_tablet_schema, 
_rsid_to_rowset, read_plan,
           ^
   ```
   **be/src/common/status.h:618:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:856:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
           RETURN_IF_ERROR(_tablet->read_columns_by_plan(_tablet_schema, 
_rsid_to_rowset, read_plan,
           ^
   ```
   **be/src/common/status.h:620:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:866:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (const vectorized::ColumnWithTypeAndName* delete_sign_column =
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:874:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (has_default_or_nullable || delete_sign_column_data != nullptr) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:874:** +1
   ```cpp
       if (has_default_or_nullable || delete_sign_column_data != nullptr) {
                                   ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:875:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           for (auto i = 0; i < cids_full_read.size(); ++i) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:877:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (column.has_default_value()) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:882:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
                   
RETURN_IF_ERROR(old_full_read_block.get_by_position(i).type->from_string(
                   ^
   ```
   **be/src/common/status.h:618:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
       do {                                \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:882:** +5, including 
nesting penalty of 4, nesting level increased to 5
   ```cpp
                   
RETURN_IF_ERROR(old_full_read_block.get_by_position(i).type->from_string(
                   ^
   ```
   **be/src/common/status.h:620:** expanded from macro 'RETURN_IF_ERROR'
   ```cpp
           if (UNLIKELY(!_status_.ok())) { \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:889:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:897:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (use_default_or_null_flag[idx] || (delete_sign_column_data != 
nullptr &&
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:897:** +1
   ```cpp
           if (use_default_or_null_flag[idx] || (delete_sign_column_data != 
nullptr &&
                                             ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:897:** +1
   ```cpp
           if (use_default_or_null_flag[idx] || (delete_sign_column_data != 
nullptr &&
                                                                                
    ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:899:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               for (auto i = 0; i < cids_full_read.size(); ++i) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:904:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
                   if (tablet_column.has_default_value()) {
                   ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:906:** +1, nesting level 
increased to 4
   ```cpp
                   } else if (tablet_column.is_nullable()) {
                          ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:910:** +1, nesting level 
increased to 4
   ```cpp
                   } else if (_tablet_schema->auto_increment_column() == 
tablet_column.name()) {
                          ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:911:** nesting level 
increased to 5
   ```cpp
                       const auto& column = *DORIS_TRY(
                                             ^
   ```
   **be/src/common/status.h:700:** expanded from macro 'DORIS_TRY'
   ```cpp
       ({                                           \
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:911:** +6, including 
nesting penalty of 5, nesting level increased to 6
   ```cpp
                       const auto& column = *DORIS_TRY(
                                             ^
   ```
   **be/src/common/status.h:703:** expanded from macro 'DORIS_TRY'
   ```cpp
           if (!res.has_value()) [[unlikely]] {     \
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:921:** +1, nesting level 
increased to 4
   ```cpp
                   } else {
                     ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:933:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           for (auto i = 0; i < cids_full_read.size(); ++i) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:935:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               if (full_block->get_by_position(cid).name != 
BeConsts::ROW_STORE_COL) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:942:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           if (is_unique_key_replace_if_not_null) {
           ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:943:** +3, including 
nesting penalty of 2, nesting level increased to 3
   ```cpp
               for (size_t i = 0; i < cids_point_read.size(); i++) {
               ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:945:** +4, including 
nesting penalty of 3, nesting level increased to 4
   ```cpp
                   if (parital_update_cols_read_index[cid].contains(idx + 
segment_start_pos)) {
                   ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:951:** +1, nesting level 
increased to 4
   ```cpp
                   } else {
                     ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:959:** +1, including 
nesting penalty of 0, nesting level increased to 1
   ```cpp
       if (is_unique_key_replace_if_not_null) {
       ^
   ```
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:960:** +2, including 
nesting penalty of 1, nesting level increased to 2
   ```cpp
           for (size_t i = 0; i < cids_point_read.size(); i++) {
           ^
   ```
   
   </details>
   



##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -721,110 +784,136 @@
     return Status::OK();
 }
 
-Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& 
mutable_full_columns,
-                                           const std::vector<bool>& 
use_default_or_null_flag,
-                                           bool has_default_or_nullable,
-                                           const size_t& segment_start_pos,
-                                           const vectorized::Block* block) {
+void SegmentWriter::calc_indicator_maps(uint32_t row_pos, uint32_t num_rows,
+                                        const IndicatorMapsVertical& 
indicator_maps_vertical) {
+    _indicator_maps = std::make_shared<std::map<uint32_t, 
std::vector<uint32_t>>>();
+    for (auto [cid, indicator_map] : indicator_maps_vertical) {
+        for (uint32_t pos = row_pos; pos < row_pos + num_rows; pos++) {
+            if (indicator_map != nullptr && indicator_map[pos] != 0) {
+                (*_indicator_maps)[pos].emplace_back(cid);
+            }
+        }
+    }
+}
+
+// Consider a merge-on-write unique table with colums [k1, k2, v1, v1, v2, v3, 
v4, v5] where k1, k2 are key columns
+// and v1, v2, v3, v4, v5 are value columns. The table has the following data:
+//  k1|k2|v1|v2|v3|v4|v5
+//  1 |1 |1 |1 |1 |1 |1
+//  2 |2 |2 |2 |2 |2 |2
+//  3 |3 |3 |3 |3 |3 |3
+//  4 |4 |4 |4 |4 |4 |4
+//  5 |5 |5 |5 |5 |5 |5
+// The user inserts the following data for partial update. Charactor `?` means 
that the cell is filled
+// with indicator value(currently we use null as indicator value).
+// row_num   k1|k2|v1|v2|v3
+// 1         1 |1 |10|10|10
+// 2         2 |2 |? |20|20
+// 3         3 |3 |30|30|?
+// 4         4 |4 |40|40|40
+// 5         5 |5 |50|? |50
+// Here, full_columns = [k1, k2, v1, v2, v3, v4, v5]
+//       old_full_read_columns = [v4, v5], the old values from the previous 
rows will be read into these columns.
+//       old_point_read_columns = [v1, v2, v3], the old values from the 
previous rows will be read into these columns if the correspoding columns in 
the input block has cell with indicator value.
+// Becase the column is immutable, filled_including_value_columns will store 
the data merged from
+// the original input block and old_point_read_columns. After the insertion, 
the data in the table will be:
+//  k1|k2|v1|v2|v3|v4|v5
+//  1 |1 |10|10|10|1 |1
+//  2 |2 |2 |20|20|2 |2
+//  3 |3 |30|30|3 |3 |3
+//  4 |4 |40|40|40|4 |4
+//  5 |5 |50|5 |50|5 |5
+Status SegmentWriter::fill_missing_columns(

Review Comment:
   warning: function 'fill_missing_columns' exceeds recommended size/complexity 
thresholds [readability-function-size]
   ```cpp
   Status SegmentWriter::fill_missing_columns(
                         ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/olap/rowset/segment_v2/segment_writer.cpp:825:** 136 lines 
including whitespace and comments (threshold 80)
   ```cpp
   Status SegmentWriter::fill_missing_columns(
                         ^
   ```
   
   </details>
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to