github-actions[bot] commented on code in PR #38216:
URL: https://github.com/apache/doris/pull/38216#discussion_r1686444046
##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -721,110 +784,136 @@ Status
SegmentWriter::append_block_with_partial_content(const vectorized::Block*
return Status::OK();
}
-Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns&
mutable_full_columns,
- const std::vector<bool>&
use_default_or_null_flag,
- bool has_default_or_nullable,
- const size_t& segment_start_pos,
- const vectorized::Block* block) {
+void SegmentWriter::calc_indicator_maps(uint32_t row_pos, uint32_t num_rows,
+ const IndicatorMapsVertical&
indicator_maps_vertical) {
+ _indicator_maps = std::make_shared<std::map<uint32_t,
std::vector<uint32_t>>>();
+ for (auto [cid, indicator_map] : indicator_maps_vertical) {
+ for (uint32_t pos = row_pos; pos < row_pos + num_rows; pos++) {
+ if (indicator_map != nullptr && indicator_map[pos] != 0) {
+ (*_indicator_maps)[pos].emplace_back(cid);
+ }
+ }
+ }
+}
+
+// Consider a merge-on-write unique table with colums [k1, k2, v1, v1, v2, v3,
v4, v5] where k1, k2 are key columns
+// and v1, v2, v3, v4, v5 are value columns. The table has the following data:
+// k1|k2|v1|v2|v3|v4|v5
+// 1 |1 |1 |1 |1 |1 |1
+// 2 |2 |2 |2 |2 |2 |2
+// 3 |3 |3 |3 |3 |3 |3
+// 4 |4 |4 |4 |4 |4 |4
+// 5 |5 |5 |5 |5 |5 |5
+// The user inserts the following data for partial update. Charactor `?` means
that the cell is filled
+// with indicator value(currently we use null as indicator value).
+// row_num k1|k2|v1|v2|v3
+// 1 1 |1 |10|10|10
+// 2 2 |2 |? |20|20
+// 3 3 |3 |30|30|?
+// 4 4 |4 |40|40|40
+// 5 5 |5 |50|? |50
+// Here, full_columns = [k1, k2, v1, v2, v3, v4, v5]
+// old_full_read_columns = [v4, v5], the old values from the previous
rows will be read into these columns.
+// old_point_read_columns = [v1, v2, v3], the old values from the
previous rows will be read into these columns if the correspoding columns in
the input block has cell with indicator value.
+// Becase the column is immutable, filled_including_value_columns will store
the data merged from
+// the original input block and old_point_read_columns. After the insertion,
the data in the table will be:
+// k1|k2|v1|v2|v3|v4|v5
+// 1 |1 |10|10|10|1 |1
+// 2 |2 |2 |20|20|2 |2
+// 3 |3 |30|30|3 |3 |3
+// 4 |4 |40|40|40|4 |4
+// 5 |5 |50|5 |50|5 |5
+Status SegmentWriter::fill_missing_columns(
Review Comment:
warning: function 'fill_missing_columns' has cognitive complexity of 69
(threshold 50) [readability-function-cognitive-complexity]
```cpp
Status SegmentWriter::fill_missing_columns(
^
```
<details>
<summary>Additional context</summary>
**be/src/olap/rowset/segment_v2/segment_writer.cpp:831:** +1, including
nesting penalty of 0, nesting level increased to 1
```cpp
if (config::is_cloud_mode()) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:850:** +1, including
nesting penalty of 0, nesting level increased to 1
```cpp
if (is_unique_key_replace_if_not_null) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:851:** +2, including
nesting penalty of 1, nesting level increased to 2
```cpp
RETURN_IF_ERROR(_tablet->read_columns_by_plan(
^
```
**be/src/common/status.h:618:** expanded from macro 'RETURN_IF_ERROR'
```cpp
do { \
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:851:** +3, including
nesting penalty of 2, nesting level increased to 3
```cpp
RETURN_IF_ERROR(_tablet->read_columns_by_plan(
^
```
**be/src/common/status.h:620:** expanded from macro 'RETURN_IF_ERROR'
```cpp
if (UNLIKELY(!_status_.ok())) { \
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:855:** +1, nesting level
increased to 1
```cpp
} else {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:856:** +2, including
nesting penalty of 1, nesting level increased to 2
```cpp
RETURN_IF_ERROR(_tablet->read_columns_by_plan(_tablet_schema,
_rsid_to_rowset, read_plan,
^
```
**be/src/common/status.h:618:** expanded from macro 'RETURN_IF_ERROR'
```cpp
do { \
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:856:** +3, including
nesting penalty of 2, nesting level increased to 3
```cpp
RETURN_IF_ERROR(_tablet->read_columns_by_plan(_tablet_schema,
_rsid_to_rowset, read_plan,
^
```
**be/src/common/status.h:620:** expanded from macro 'RETURN_IF_ERROR'
```cpp
if (UNLIKELY(!_status_.ok())) { \
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:866:** +1, including
nesting penalty of 0, nesting level increased to 1
```cpp
if (const vectorized::ColumnWithTypeAndName* delete_sign_column =
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:874:** +1, including
nesting penalty of 0, nesting level increased to 1
```cpp
if (has_default_or_nullable || delete_sign_column_data != nullptr) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:874:** +1
```cpp
if (has_default_or_nullable || delete_sign_column_data != nullptr) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:875:** +2, including
nesting penalty of 1, nesting level increased to 2
```cpp
for (auto i = 0; i < cids_full_read.size(); ++i) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:877:** +3, including
nesting penalty of 2, nesting level increased to 3
```cpp
if (column.has_default_value()) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:882:** +4, including
nesting penalty of 3, nesting level increased to 4
```cpp
RETURN_IF_ERROR(old_full_read_block.get_by_position(i).type->from_string(
^
```
**be/src/common/status.h:618:** expanded from macro 'RETURN_IF_ERROR'
```cpp
do { \
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:882:** +5, including
nesting penalty of 4, nesting level increased to 5
```cpp
RETURN_IF_ERROR(old_full_read_block.get_by_position(i).type->from_string(
^
```
**be/src/common/status.h:620:** expanded from macro 'RETURN_IF_ERROR'
```cpp
if (UNLIKELY(!_status_.ok())) { \
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:889:** +1, including
nesting penalty of 0, nesting level increased to 1
```cpp
for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:897:** +2, including
nesting penalty of 1, nesting level increased to 2
```cpp
if (use_default_or_null_flag[idx] || (delete_sign_column_data !=
nullptr &&
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:897:** +1
```cpp
if (use_default_or_null_flag[idx] || (delete_sign_column_data !=
nullptr &&
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:897:** +1
```cpp
if (use_default_or_null_flag[idx] || (delete_sign_column_data !=
nullptr &&
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:899:** +3, including
nesting penalty of 2, nesting level increased to 3
```cpp
for (auto i = 0; i < cids_full_read.size(); ++i) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:904:** +4, including
nesting penalty of 3, nesting level increased to 4
```cpp
if (tablet_column.has_default_value()) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:906:** +1, nesting level
increased to 4
```cpp
} else if (tablet_column.is_nullable()) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:910:** +1, nesting level
increased to 4
```cpp
} else if (_tablet_schema->auto_increment_column() ==
tablet_column.name()) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:911:** nesting level
increased to 5
```cpp
const auto& column = *DORIS_TRY(
^
```
**be/src/common/status.h:700:** expanded from macro 'DORIS_TRY'
```cpp
({ \
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:911:** +6, including
nesting penalty of 5, nesting level increased to 6
```cpp
const auto& column = *DORIS_TRY(
^
```
**be/src/common/status.h:703:** expanded from macro 'DORIS_TRY'
```cpp
if (!res.has_value()) [[unlikely]] { \
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:921:** +1, nesting level
increased to 4
```cpp
} else {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:933:** +2, including
nesting penalty of 1, nesting level increased to 2
```cpp
for (auto i = 0; i < cids_full_read.size(); ++i) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:935:** +3, including
nesting penalty of 2, nesting level increased to 3
```cpp
if (full_block->get_by_position(cid).name !=
BeConsts::ROW_STORE_COL) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:942:** +2, including
nesting penalty of 1, nesting level increased to 2
```cpp
if (is_unique_key_replace_if_not_null) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:943:** +3, including
nesting penalty of 2, nesting level increased to 3
```cpp
for (size_t i = 0; i < cids_point_read.size(); i++) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:945:** +4, including
nesting penalty of 3, nesting level increased to 4
```cpp
if (parital_update_cols_read_index[cid].contains(idx +
segment_start_pos)) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:951:** +1, nesting level
increased to 4
```cpp
} else {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:959:** +1, including
nesting penalty of 0, nesting level increased to 1
```cpp
if (is_unique_key_replace_if_not_null) {
^
```
**be/src/olap/rowset/segment_v2/segment_writer.cpp:960:** +2, including
nesting penalty of 1, nesting level increased to 2
```cpp
for (size_t i = 0; i < cids_point_read.size(); i++) {
^
```
</details>
##########
be/src/olap/rowset/segment_v2/segment_writer.cpp:
##########
@@ -721,110 +784,136 @@
return Status::OK();
}
-Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns&
mutable_full_columns,
- const std::vector<bool>&
use_default_or_null_flag,
- bool has_default_or_nullable,
- const size_t& segment_start_pos,
- const vectorized::Block* block) {
+void SegmentWriter::calc_indicator_maps(uint32_t row_pos, uint32_t num_rows,
+ const IndicatorMapsVertical&
indicator_maps_vertical) {
+ _indicator_maps = std::make_shared<std::map<uint32_t,
std::vector<uint32_t>>>();
+ for (auto [cid, indicator_map] : indicator_maps_vertical) {
+ for (uint32_t pos = row_pos; pos < row_pos + num_rows; pos++) {
+ if (indicator_map != nullptr && indicator_map[pos] != 0) {
+ (*_indicator_maps)[pos].emplace_back(cid);
+ }
+ }
+ }
+}
+
+// Consider a merge-on-write unique table with colums [k1, k2, v1, v1, v2, v3,
v4, v5] where k1, k2 are key columns
+// and v1, v2, v3, v4, v5 are value columns. The table has the following data:
+// k1|k2|v1|v2|v3|v4|v5
+// 1 |1 |1 |1 |1 |1 |1
+// 2 |2 |2 |2 |2 |2 |2
+// 3 |3 |3 |3 |3 |3 |3
+// 4 |4 |4 |4 |4 |4 |4
+// 5 |5 |5 |5 |5 |5 |5
+// The user inserts the following data for partial update. Charactor `?` means
that the cell is filled
+// with indicator value(currently we use null as indicator value).
+// row_num k1|k2|v1|v2|v3
+// 1 1 |1 |10|10|10
+// 2 2 |2 |? |20|20
+// 3 3 |3 |30|30|?
+// 4 4 |4 |40|40|40
+// 5 5 |5 |50|? |50
+// Here, full_columns = [k1, k2, v1, v2, v3, v4, v5]
+// old_full_read_columns = [v4, v5], the old values from the previous
rows will be read into these columns.
+// old_point_read_columns = [v1, v2, v3], the old values from the
previous rows will be read into these columns if the correspoding columns in
the input block has cell with indicator value.
+// Becase the column is immutable, filled_including_value_columns will store
the data merged from
+// the original input block and old_point_read_columns. After the insertion,
the data in the table will be:
+// k1|k2|v1|v2|v3|v4|v5
+// 1 |1 |10|10|10|1 |1
+// 2 |2 |2 |20|20|2 |2
+// 3 |3 |30|30|3 |3 |3
+// 4 |4 |40|40|40|4 |4
+// 5 |5 |50|5 |50|5 |5
+Status SegmentWriter::fill_missing_columns(
Review Comment:
warning: function 'fill_missing_columns' exceeds recommended size/complexity
thresholds [readability-function-size]
```cpp
Status SegmentWriter::fill_missing_columns(
^
```
<details>
<summary>Additional context</summary>
**be/src/olap/rowset/segment_v2/segment_writer.cpp:825:** 136 lines
including whitespace and comments (threshold 80)
```cpp
Status SegmentWriter::fill_missing_columns(
^
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]