This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 3e4fac3f08c75cc8da881b70c6a37356cbb9825a Author: Pxl <[email protected]> AuthorDate: Mon Aug 28 14:40:51 2023 +0800 [Bug](materialized-view) fix core dump on create materialized view when diffrent mv column have same reference base column (#23425) * Remove redundant predicates on scan node update fix core dump on create materialized view when diffrent mv column have same reference base column Revert "update" This reverts commit d9ef8dca123b281dc8f1c936ae5130267dff2964. Revert "Remove redundant predicates on scan node" This reverts commit f24931758163f59bfc47ee10509634ca97358676. * update * fix * update * update --- be/src/common/status.h | 10 ++-- be/src/exec/tablet_info.cpp | 7 ++- .../rowset/segment_v2/inverted_index_reader.cpp | 3 +- be/src/olap/schema_change.cpp | 64 +++++++++++----------- be/src/olap/schema_change.h | 4 +- be/src/vec/columns/column_nullable.h | 2 +- be/src/vec/olap/olap_data_convertor.cpp | 6 ++ .../ssb/multiple_no_where/multiple_no_where.out | 4 ++ .../ssb/multiple_no_where/multiple_no_where.groovy | 8 +++ .../partial_update/test_partial_update.groovy | 2 +- .../test_partial_update_strict_mode.groovy | 4 +- 11 files changed, 65 insertions(+), 49 deletions(-) diff --git a/be/src/common/status.h b/be/src/common/status.h index 9264b3ae84..5950bc1663 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -276,7 +276,7 @@ E(INVERTED_INDEX_BUILD_WAITTING, -6008); // clang-format off // whether to capture stacktrace -inline bool capture_stacktrace(int code) { +constexpr bool capture_stacktrace(int code) { return code != ErrorCode::OK && code != ErrorCode::END_OF_FILE && code != ErrorCode::MEM_LIMIT_EXCEEDED @@ -376,10 +376,10 @@ public: static Status OK() { return Status(); } -#define ERROR_CTOR(name, code) \ - template <typename... Args> \ - static Status name(std::string_view msg, Args&&... args) { \ - return Error<ErrorCode::code, false>(msg, std::forward<Args>(args)...); \ +#define ERROR_CTOR(name, code) \ + template <typename... Args> \ + static Status name(std::string_view msg, Args&&... args) { \ + return Error<ErrorCode::code, true>(msg, std::forward<Args>(args)...); \ } ERROR_CTOR(PublishTimeout, PUBLISH_TIMEOUT) diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index 3413436c72..f000e4d206 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -36,6 +36,7 @@ #include "runtime/types.h" #include "util/hash_util.hpp" #include "util/string_parser.hpp" +#include "util/string_util.h" #include "vec/common/string_ref.h" #include "vec/exprs/vexpr.h" #include "vec/runtime/vdatetime_value.h" @@ -127,7 +128,7 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { for (auto& t_slot_desc : tschema.slot_descs) { auto slot_desc = _obj_pool.add(new SlotDescriptor(t_slot_desc)); _tuple_desc->add_slot(slot_desc); - slots_map.emplace(slot_desc->col_name(), slot_desc); + slots_map.emplace(to_lower(slot_desc->col_name()), slot_desc); } for (auto& t_index : tschema.indexes) { @@ -138,7 +139,7 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { if (_is_partial_update && _partial_update_input_columns.count(col) == 0) { continue; } - auto it = slots_map.find(col); + auto it = slots_map.find(to_lower(col)); if (it == std::end(slots_map)) { return Status::InternalError("unknown index column, column={}", col); } @@ -155,7 +156,7 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { for (auto& tindex_desc : t_index.indexes_desc) { std::vector<int32_t> column_unique_ids(tindex_desc.columns.size()); for (size_t i = 0; i < tindex_desc.columns.size(); i++) { - auto it = slots_map.find(tindex_desc.columns[i]); + auto it = slots_map.find(to_lower(tindex_desc.columns[i])); if (it != std::end(slots_map)) { column_unique_ids[i] = it->second->col_unique_id(); } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 50b9196b7d..bb6b1de780 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -213,9 +213,8 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach if (owned_dir) { FINALLY_FINALIZE_INPUT(dir); } - LOG(WARNING) << "Inverted index read null bitmap error occurred: " << e.what(); return Status::Error<doris::ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( - "Inverted index read null bitmap error occurred"); + "Inverted index read null bitmap error occurred, reason={}", e.what()); } return Status::OK(); diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 7dea290606..48cb9367ae 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -283,11 +283,8 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, const int column_size = new_block->columns(); // swap ref_block[key] and new_block[value] - std::map<int, int> swap_idx_map; - + std::list<std::pair<int, int>> swap_idx_list; for (int idx = 0; idx < column_size; idx++) { - int ref_idx = _schema_mapping[idx].ref_column; - if (_schema_mapping[idx].expr != nullptr) { vectorized::VExprContextSPtr ctx; RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(*_schema_mapping[idx].expr, ctx)); @@ -303,14 +300,11 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, "{} size invalid, expect={}, real={}", new_block->get_by_position(idx).name, row_size, ref_block->get_by_position(result_column_id).column->size()); } - - if (_type != ROLLUP) { - RETURN_IF_ERROR( - _check_cast_valid(ref_block->get_by_position(ref_idx).column, - ref_block->get_by_position(result_column_id).column)); - } - swap_idx_map[result_column_id] = idx; - } else if (ref_idx < 0) { + RETURN_IF_ERROR(_check_cast_valid(ref_block->get_by_position(idx).column, + ref_block->get_by_position(result_column_id).column, + _type)); + swap_idx_list.push_back({result_column_id, idx}); + } else if (_schema_mapping[idx].ref_column < 0) { if (_type != ROLLUP) { // new column, write default value auto value = _schema_mapping[idx].default_value; @@ -330,24 +324,24 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, } } else { // same type, just swap column - swap_idx_map[ref_idx] = idx; + swap_idx_list.push_back({_schema_mapping[idx].ref_column, idx}); } } - for (auto it : swap_idx_map) { - auto& ref_col = ref_block->get_by_position(it.first); - auto& new_col = new_block->get_by_position(it.second); + for (auto it : swap_idx_list) { + auto& ref_col = ref_block->get_by_position(it.first).column; + auto& new_col = new_block->get_by_position(it.second).column; - bool ref_col_nullable = ref_col.column->is_nullable(); - bool new_col_nullable = new_col.column->is_nullable(); + bool ref_col_nullable = ref_col->is_nullable(); + bool new_col_nullable = new_col->is_nullable(); if (ref_col_nullable != new_col_nullable) { // not nullable to nullable if (new_col_nullable) { - auto* new_nullable_col = assert_cast<vectorized::ColumnNullable*>( - new_col.column->assume_mutable().get()); + auto* new_nullable_col = + assert_cast<vectorized::ColumnNullable*>(new_col->assume_mutable().get()); - new_nullable_col->swap_nested_column(ref_col.column); + new_nullable_col->change_nested_column(ref_col); new_nullable_col->get_null_map_data().resize_fill(new_nullable_col->size()); } else { // nullable to not nullable: @@ -355,14 +349,14 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, // then do schema change `alter table test modify column c_phone int not null`, // the cast expr of schema change is `CastExpr(CAST String to Nullable(Int32))`, // so need to handle nullable to not nullable here - auto* ref_nullable_col = assert_cast<vectorized::ColumnNullable*>( - ref_col.column->assume_mutable().get()); + auto* ref_nullable_col = + assert_cast<vectorized::ColumnNullable*>(ref_col->assume_mutable().get()); - ref_nullable_col->swap_nested_column(new_col.column); + new_col = ref_nullable_col->get_nested_column_ptr(); } } else { - new_block->get_by_position(it.second).column.swap( - ref_block->get_by_position(it.first).column); + new_block->get_by_position(it.second).column = + ref_block->get_by_position(it.first).column; } } return Status::OK(); @@ -370,7 +364,16 @@ Status BlockChanger::change_block(vectorized::Block* ref_block, // This check is to prevent schema-change from causing data loss Status BlockChanger::_check_cast_valid(vectorized::ColumnPtr ref_column, - vectorized::ColumnPtr new_column) const { + vectorized::ColumnPtr new_column, + AlterTabletType type) const { + if (ref_column->size() != new_column->size()) { + return Status::InternalError( + "column size is changed, ref_column_size={}, new_column_size={}", + ref_column->size(), new_column->size()); + } + if (type == ROLLUP) { + return Status::OK(); + } if (ref_column->is_nullable() != new_column->is_nullable()) { if (ref_column->is_nullable()) { auto* ref_null_map = @@ -487,10 +490,7 @@ Status VSchemaChangeDirectly::_inner_process(RowsetReaderSharedPtr rowset_reader RETURN_IF_ERROR(rowset_writer->add_block(new_block.get())); } while (true); - if (!rowset_writer->flush()) { - return Status::Error<ALTER_STATUS_ERR>("rowset_writer flush failed"); - } - + RETURN_IF_ERROR(rowset_writer->flush()); return Status::OK(); } @@ -593,7 +593,6 @@ Status VSchemaChangeWithSorting::_internal_sorting( SegmentsOverlapPB segments_overlap, RowsetSharedPtr* rowset) { uint64_t merged_rows = 0; MultiBlockMerger merger(new_tablet); - std::unique_ptr<RowsetWriter> rowset_writer; RowsetWriterContext context; context.version = version; @@ -630,7 +629,6 @@ Status VSchemaChangeWithSorting::_external_sorting(vector<RowsetSharedPtr>& src_ RETURN_IF_ERROR(Merger::vmerge_rowsets(new_tablet, ReaderType::READER_ALTER_TABLE, new_tablet->tablet_schema(), rs_readers, rowset_writer, &stats)); - _add_merged_rows(stats.merged_rows); _add_filtered_rows(stats.filtered_rows); return Status::OK(); diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h index f9db4c488a..2c5075b9ce 100644 --- a/be/src/olap/schema_change.h +++ b/be/src/olap/schema_change.h @@ -83,8 +83,8 @@ public: bool has_where() const { return _where_expr != nullptr; } private: - Status _check_cast_valid(vectorized::ColumnPtr ref_column, - vectorized::ColumnPtr new_column) const; + Status _check_cast_valid(vectorized::ColumnPtr ref_column, vectorized::ColumnPtr new_column, + AlterTabletType type) const; // @brief column-mapping specification of new schema SchemaMapping _schema_mapping; diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index c3ee24600e..261eea0f61 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -277,7 +277,7 @@ public: bool only_null() const override { return nested_column->is_dummy(); } // used in schema change - void swap_nested_column(ColumnPtr& other) { ((ColumnPtr&)nested_column).swap(other); } + void change_nested_column(ColumnPtr& other) { ((ColumnPtr&)nested_column) = other; } /// Return the column that represents values. IColumn& get_nested_column() { return *nested_column; } diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index a4965e8421..b28bdc76f8 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -22,6 +22,8 @@ // IWYU pragma: no_include <opentelemetry/common/threadlocal.h> #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" +#include "common/exception.h" +#include "common/status.h" #include "olap/hll.h" #include "olap/olap_common.h" #include "olap/tablet_schema.h" @@ -189,6 +191,10 @@ void OlapBlockDataConvertor::set_source_content(const vectorized::Block* block, block->columns() == _convertors.size()); size_t cid = 0; for (const auto& typed_column : *block) { + if (typed_column.column->size() != block->rows()) { + throw Exception(ErrorCode::INTERNAL_ERROR, "input invalid block, block={}", + block->dump_structure()); + } _convertors[cid]->set_source_column(typed_column, row_pos, num_rows); ++cid; } diff --git a/regression-test/data/mv_p0/ssb/multiple_no_where/multiple_no_where.out b/regression-test/data/mv_p0/ssb/multiple_no_where/multiple_no_where.out index 00e05ab400..ceec2c1235 100644 --- a/regression-test/data/mv_p0/ssb/multiple_no_where/multiple_no_where.out +++ b/regression-test/data/mv_p0/ssb/multiple_no_where/multiple_no_where.out @@ -22,3 +22,7 @@ ASIA ASIA 1992 1 0 nation 0 1993 nation 0 +-- !select_temp_2 -- +1 4 1 1 +2 8 2 2 + diff --git a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy index b92b37c5e1..e671f11823 100644 --- a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy +++ b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy @@ -120,6 +120,8 @@ suite ("multiple_no_where") { FROM lineorder_flat GROUP BY YEAR, C_NATION,C_REGION,S_REGION,P_MFGR;""") + createMV ("""create materialized view temp_2 as SELECT lo_orderkey, sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice) from lineorder_flat group by lo_orderkey;""") + sql """INSERT INTO lineorder_flat (LO_ORDERDATE, LO_ORDERKEY, LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY, LO_SUPPKEY, LO_ORDERPRIORITY, LO_SHIPPRIORITY, LO_QUANTITY, LO_EXTENDEDPRICE, LO_ORDTOTALPRICE, LO_DISCOUNT, LO_REVENUE, LO_SUPPLYCOST, LO_TAX, LO_COMMITDATE, LO_SHIPMODE,C_NAME,C_ADDRESS,C_CITY,C_NATION,C_REGION,C_PHONE,C_MKTSEGMENT,S_NAME,S_ADDRESS,S_CITY,S_NATION,S_REGION,S_PHONE,P_NAME,P_MFGR,P_CATEGORY,P_BRAND,P_COLOR,P_TYPE,P_SIZE,P_CONTAINER) VALUES (19930101 , 2 , 2 , 2 , 2 , [...] sql """INSERT INTO lineorder_flat (LO_ORDERDATE, LO_ORDERKEY, LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY, LO_SUPPKEY, LO_ORDERPRIORITY, LO_SHIPPRIORITY, LO_QUANTITY, LO_EXTENDEDPRICE, LO_ORDTOTALPRICE, LO_DISCOUNT, LO_REVENUE, LO_SUPPLYCOST, LO_TAX, LO_COMMITDATE, LO_SHIPMODE, C_NAME, C_ADDRESS, C_CITY, C_NATION, C_REGION, C_PHONE, C_MKTSEGMENT, S_NAME, S_ADDRESS, S_CITY, S_NATION, S_REGION, S_PHONE, P_NAME, P_MFGR, P_CATEGORY, P_BRAND, P_COLOR,P_TYPE,P_SIZE,P_CONTAINER) VALUES (19930101 [...] @@ -223,4 +225,10 @@ suite ("multiple_no_where") { AND P_MFGR IN ('MFGR#1', 'MFGR#2') GROUP BY YEAR, C_NATION ORDER BY YEAR ASC, C_NATION ASC;""" + + explain { + sql("""SELECT lo_orderkey, sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice) from lineorder_flat group by lo_orderkey order by lo_orderkey;""") + contains "(temp_2)" + } + qt_select_temp_2 """SELECT lo_orderkey, sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice) from lineorder_flat group by lo_orderkey order by lo_orderkey;""" } diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy index b8052cf525..6af0e86948 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy @@ -166,7 +166,7 @@ suite("test_primary_key_partial_update", "p0") { assertTrue(exception == null) def json = parseJson(result) assertEquals("Fail", json.Status) - assertEquals("[INTERNAL_ERROR]too many filtered rows", json.Message) + assertTrue(json.Message.contains("[INTERNAL_ERROR]too many filtered rows")) assertEquals(3, json.NumberTotalRows) assertEquals(1, json.NumberLoadedRows) assertEquals(2, json.NumberFilteredRows) diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy index 9262d7116f..256cf018d5 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy @@ -108,7 +108,7 @@ suite("test_partial_update_strict_mode", "p0") { assertTrue(exception == null) def json = parseJson(result) assertEquals("Fail", json.Status) - assertEquals("[INTERNAL_ERROR]too many filtered rows", json.Message) + assertTrue(json.Message.contains("[INTERNAL_ERROR]too many filtered rows")) assertEquals(3, json.NumberTotalRows) assertEquals(1, json.NumberLoadedRows) assertEquals(2, json.NumberFilteredRows) @@ -157,7 +157,7 @@ suite("test_partial_update_strict_mode", "p0") { assertTrue(exception == null) def json = parseJson(result) assertEquals("Fail", json.Status) - assertEquals("[INTERNAL_ERROR]too many filtered rows", json.Message) + assertTrue(json.Message.contains("[INTERNAL_ERROR]too many filtered rows")) assertEquals(3, json.NumberTotalRows) assertEquals(1, json.NumberLoadedRows) assertEquals(2, json.NumberFilteredRows) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
