[doris] 15/50: [Bug](materialized-view) fix core dump on create materialized view when diffrent mv column have same reference base column (#23425)

kxiao Wed, 30 Aug 2023 09:56:01 -0700

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


commit 3e4fac3f08c75cc8da881b70c6a37356cbb9825a
Author: Pxl <[email protected]>
AuthorDate: Mon Aug 28 14:40:51 2023 +0800

    [Bug](materialized-view) fix core dump on create materialized view when 
diffrent mv column have same reference base column (#23425)
    
    * Remove redundant predicates on scan node
    
    update
    
    fix core dump on create materialized view when diffrent mv column have same 
reference base column
    
    Revert "update"
    
    This reverts commit d9ef8dca123b281dc8f1c936ae5130267dff2964.
    
    Revert "Remove redundant predicates on scan node"
    
    This reverts commit f24931758163f59bfc47ee10509634ca97358676.
    
    * update
    
    * fix
    
    * update
    
    * update
---
 be/src/common/status.h                             | 10 ++--
 be/src/exec/tablet_info.cpp                        |  7 ++-
 .../rowset/segment_v2/inverted_index_reader.cpp    |  3 +-
 be/src/olap/schema_change.cpp                      | 64 +++++++++++-----------
 be/src/olap/schema_change.h                        |  4 +-
 be/src/vec/columns/column_nullable.h               |  2 +-
 be/src/vec/olap/olap_data_convertor.cpp            |  6 ++
 .../ssb/multiple_no_where/multiple_no_where.out    |  4 ++
 .../ssb/multiple_no_where/multiple_no_where.groovy |  8 +++
 .../partial_update/test_partial_update.groovy      |  2 +-
 .../test_partial_update_strict_mode.groovy         |  4 +-
 11 files changed, 65 insertions(+), 49 deletions(-)

diff --git a/be/src/common/status.h b/be/src/common/status.h
index 9264b3ae84..5950bc1663 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -276,7 +276,7 @@ E(INVERTED_INDEX_BUILD_WAITTING, -6008);
 
 // clang-format off
 // whether to capture stacktrace
-inline bool capture_stacktrace(int code) {
+constexpr bool capture_stacktrace(int code) {
     return code != ErrorCode::OK
         && code != ErrorCode::END_OF_FILE
         && code != ErrorCode::MEM_LIMIT_EXCEEDED
@@ -376,10 +376,10 @@ public:
 
     static Status OK() { return Status(); }
 
-#define ERROR_CTOR(name, code)                                                 
 \
-    template <typename... Args>                                                
 \
-    static Status name(std::string_view msg, Args&&... args) {                 
 \
-        return Error<ErrorCode::code, false>(msg, 
std::forward<Args>(args)...); \
+#define ERROR_CTOR(name, code)                                                 
\
+    template <typename... Args>                                                
\
+    static Status name(std::string_view msg, Args&&... args) {                 
\
+        return Error<ErrorCode::code, true>(msg, std::forward<Args>(args)...); 
\
     }
 
     ERROR_CTOR(PublishTimeout, PUBLISH_TIMEOUT)
diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp
index 3413436c72..f000e4d206 100644
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@@ -36,6 +36,7 @@
 #include "runtime/types.h"
 #include "util/hash_util.hpp"
 #include "util/string_parser.hpp"
+#include "util/string_util.h"
 #include "vec/common/string_ref.h"
 #include "vec/exprs/vexpr.h"
 #include "vec/runtime/vdatetime_value.h"
@@ -127,7 +128,7 @@ Status OlapTableSchemaParam::init(const 
TOlapTableSchemaParam& tschema) {
     for (auto& t_slot_desc : tschema.slot_descs) {
         auto slot_desc = _obj_pool.add(new SlotDescriptor(t_slot_desc));
         _tuple_desc->add_slot(slot_desc);
-        slots_map.emplace(slot_desc->col_name(), slot_desc);
+        slots_map.emplace(to_lower(slot_desc->col_name()), slot_desc);
     }
 
     for (auto& t_index : tschema.indexes) {
@@ -138,7 +139,7 @@ Status OlapTableSchemaParam::init(const 
TOlapTableSchemaParam& tschema) {
             if (_is_partial_update && _partial_update_input_columns.count(col) 
== 0) {
                 continue;
             }
-            auto it = slots_map.find(col);
+            auto it = slots_map.find(to_lower(col));
             if (it == std::end(slots_map)) {
                 return Status::InternalError("unknown index column, 
column={}", col);
             }
@@ -155,7 +156,7 @@ Status OlapTableSchemaParam::init(const 
TOlapTableSchemaParam& tschema) {
             for (auto& tindex_desc : t_index.indexes_desc) {
                 std::vector<int32_t> 
column_unique_ids(tindex_desc.columns.size());
                 for (size_t i = 0; i < tindex_desc.columns.size(); i++) {
-                    auto it = slots_map.find(tindex_desc.columns[i]);
+                    auto it = slots_map.find(to_lower(tindex_desc.columns[i]));
                     if (it != std::end(slots_map)) {
                         column_unique_ids[i] = it->second->col_unique_id();
                     }
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index 50b9196b7d..bb6b1de780 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -213,9 +213,8 @@ Status 
InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach
         if (owned_dir) {
             FINALLY_FINALIZE_INPUT(dir);
         }
-        LOG(WARNING) << "Inverted index read null bitmap error occurred: " << 
e.what();
         return Status::Error<doris::ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
-                "Inverted index read null bitmap error occurred");
+                "Inverted index read null bitmap error occurred, reason={}", 
e.what());
     }
 
     return Status::OK();
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 7dea290606..48cb9367ae 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -283,11 +283,8 @@ Status BlockChanger::change_block(vectorized::Block* 
ref_block,
     const int column_size = new_block->columns();
 
     // swap ref_block[key] and new_block[value]
-    std::map<int, int> swap_idx_map;
-
+    std::list<std::pair<int, int>> swap_idx_list;
     for (int idx = 0; idx < column_size; idx++) {
-        int ref_idx = _schema_mapping[idx].ref_column;
-
         if (_schema_mapping[idx].expr != nullptr) {
             vectorized::VExprContextSPtr ctx;
             
RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(*_schema_mapping[idx].expr, 
ctx));
@@ -303,14 +300,11 @@ Status BlockChanger::change_block(vectorized::Block* 
ref_block,
                         "{} size invalid, expect={}, real={}", 
new_block->get_by_position(idx).name,
                         row_size, 
ref_block->get_by_position(result_column_id).column->size());
             }
-
-            if (_type != ROLLUP) {
-                RETURN_IF_ERROR(
-                        
_check_cast_valid(ref_block->get_by_position(ref_idx).column,
-                                          
ref_block->get_by_position(result_column_id).column));
-            }
-            swap_idx_map[result_column_id] = idx;
-        } else if (ref_idx < 0) {
+            
RETURN_IF_ERROR(_check_cast_valid(ref_block->get_by_position(idx).column,
+                                              
ref_block->get_by_position(result_column_id).column,
+                                              _type));
+            swap_idx_list.push_back({result_column_id, idx});
+        } else if (_schema_mapping[idx].ref_column < 0) {
             if (_type != ROLLUP) {
                 // new column, write default value
                 auto value = _schema_mapping[idx].default_value;
@@ -330,24 +324,24 @@ Status BlockChanger::change_block(vectorized::Block* 
ref_block,
             }
         } else {
             // same type, just swap column
-            swap_idx_map[ref_idx] = idx;
+            swap_idx_list.push_back({_schema_mapping[idx].ref_column, idx});
         }
     }
 
-    for (auto it : swap_idx_map) {
-        auto& ref_col = ref_block->get_by_position(it.first);
-        auto& new_col = new_block->get_by_position(it.second);
+    for (auto it : swap_idx_list) {
+        auto& ref_col = ref_block->get_by_position(it.first).column;
+        auto& new_col = new_block->get_by_position(it.second).column;
 
-        bool ref_col_nullable = ref_col.column->is_nullable();
-        bool new_col_nullable = new_col.column->is_nullable();
+        bool ref_col_nullable = ref_col->is_nullable();
+        bool new_col_nullable = new_col->is_nullable();
 
         if (ref_col_nullable != new_col_nullable) {
             // not nullable to nullable
             if (new_col_nullable) {
-                auto* new_nullable_col = 
assert_cast<vectorized::ColumnNullable*>(
-                        new_col.column->assume_mutable().get());
+                auto* new_nullable_col =
+                        
assert_cast<vectorized::ColumnNullable*>(new_col->assume_mutable().get());
 
-                new_nullable_col->swap_nested_column(ref_col.column);
+                new_nullable_col->change_nested_column(ref_col);
                 
new_nullable_col->get_null_map_data().resize_fill(new_nullable_col->size());
             } else {
                 // nullable to not nullable:
@@ -355,14 +349,14 @@ Status BlockChanger::change_block(vectorized::Block* 
ref_block,
                 // then do schema change `alter table test modify column 
c_phone int not null`,
                 // the cast expr of schema change is `CastExpr(CAST String to 
Nullable(Int32))`,
                 // so need to handle nullable to not nullable here
-                auto* ref_nullable_col = 
assert_cast<vectorized::ColumnNullable*>(
-                        ref_col.column->assume_mutable().get());
+                auto* ref_nullable_col =
+                        
assert_cast<vectorized::ColumnNullable*>(ref_col->assume_mutable().get());
 
-                ref_nullable_col->swap_nested_column(new_col.column);
+                new_col = ref_nullable_col->get_nested_column_ptr();
             }
         } else {
-            new_block->get_by_position(it.second).column.swap(
-                    ref_block->get_by_position(it.first).column);
+            new_block->get_by_position(it.second).column =
+                    ref_block->get_by_position(it.first).column;
         }
     }
     return Status::OK();
@@ -370,7 +364,16 @@ Status BlockChanger::change_block(vectorized::Block* 
ref_block,
 
 // This check is to prevent schema-change from causing data loss
 Status BlockChanger::_check_cast_valid(vectorized::ColumnPtr ref_column,
-                                       vectorized::ColumnPtr new_column) const 
{
+                                       vectorized::ColumnPtr new_column,
+                                       AlterTabletType type) const {
+    if (ref_column->size() != new_column->size()) {
+        return Status::InternalError(
+                "column size is changed, ref_column_size={}, 
new_column_size={}",
+                ref_column->size(), new_column->size());
+    }
+    if (type == ROLLUP) {
+        return Status::OK();
+    }
     if (ref_column->is_nullable() != new_column->is_nullable()) {
         if (ref_column->is_nullable()) {
             auto* ref_null_map =
@@ -487,10 +490,7 @@ Status 
VSchemaChangeDirectly::_inner_process(RowsetReaderSharedPtr rowset_reader
         RETURN_IF_ERROR(rowset_writer->add_block(new_block.get()));
     } while (true);
 
-    if (!rowset_writer->flush()) {
-        return Status::Error<ALTER_STATUS_ERR>("rowset_writer flush failed");
-    }
-
+    RETURN_IF_ERROR(rowset_writer->flush());
     return Status::OK();
 }
 
@@ -593,7 +593,6 @@ Status VSchemaChangeWithSorting::_internal_sorting(
         SegmentsOverlapPB segments_overlap, RowsetSharedPtr* rowset) {
     uint64_t merged_rows = 0;
     MultiBlockMerger merger(new_tablet);
-
     std::unique_ptr<RowsetWriter> rowset_writer;
     RowsetWriterContext context;
     context.version = version;
@@ -630,7 +629,6 @@ Status 
VSchemaChangeWithSorting::_external_sorting(vector<RowsetSharedPtr>& src_
     RETURN_IF_ERROR(Merger::vmerge_rowsets(new_tablet, 
ReaderType::READER_ALTER_TABLE,
                                            new_tablet->tablet_schema(), 
rs_readers, rowset_writer,
                                            &stats));
-
     _add_merged_rows(stats.merged_rows);
     _add_filtered_rows(stats.filtered_rows);
     return Status::OK();
diff --git a/be/src/olap/schema_change.h b/be/src/olap/schema_change.h
index f9db4c488a..2c5075b9ce 100644
--- a/be/src/olap/schema_change.h
+++ b/be/src/olap/schema_change.h
@@ -83,8 +83,8 @@ public:
     bool has_where() const { return _where_expr != nullptr; }
 
 private:
-    Status _check_cast_valid(vectorized::ColumnPtr ref_column,
-                             vectorized::ColumnPtr new_column) const;
+    Status _check_cast_valid(vectorized::ColumnPtr ref_column, 
vectorized::ColumnPtr new_column,
+                             AlterTabletType type) const;
 
     // @brief column-mapping specification of new schema
     SchemaMapping _schema_mapping;
diff --git a/be/src/vec/columns/column_nullable.h 
b/be/src/vec/columns/column_nullable.h
index c3ee24600e..261eea0f61 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -277,7 +277,7 @@ public:
     bool only_null() const override { return nested_column->is_dummy(); }
 
     // used in schema change
-    void swap_nested_column(ColumnPtr& other) { 
((ColumnPtr&)nested_column).swap(other); }
+    void change_nested_column(ColumnPtr& other) { ((ColumnPtr&)nested_column) 
= other; }
 
     /// Return the column that represents values.
     IColumn& get_nested_column() { return *nested_column; }
diff --git a/be/src/vec/olap/olap_data_convertor.cpp 
b/be/src/vec/olap/olap_data_convertor.cpp
index a4965e8421..b28bdc76f8 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -22,6 +22,8 @@
 // IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
 #include "common/compiler_util.h" // IWYU pragma: keep
 #include "common/config.h"
+#include "common/exception.h"
+#include "common/status.h"
 #include "olap/hll.h"
 #include "olap/olap_common.h"
 #include "olap/tablet_schema.h"
@@ -189,6 +191,10 @@ void OlapBlockDataConvertor::set_source_content(const 
vectorized::Block* block,
            block->columns() == _convertors.size());
     size_t cid = 0;
     for (const auto& typed_column : *block) {
+        if (typed_column.column->size() != block->rows()) {
+            throw Exception(ErrorCode::INTERNAL_ERROR, "input invalid block, 
block={}",
+                            block->dump_structure());
+        }
         _convertors[cid]->set_source_column(typed_column, row_pos, num_rows);
         ++cid;
     }
diff --git 
a/regression-test/data/mv_p0/ssb/multiple_no_where/multiple_no_where.out 
b/regression-test/data/mv_p0/ssb/multiple_no_where/multiple_no_where.out
index 00e05ab400..ceec2c1235 100644
--- a/regression-test/data/mv_p0/ssb/multiple_no_where/multiple_no_where.out
+++ b/regression-test/data/mv_p0/ssb/multiple_no_where/multiple_no_where.out
@@ -22,3 +22,7 @@ ASIA  ASIA    1992    1
 0      nation  0
 1993   nation  0
 
+-- !select_temp_2 --
+1      4       1       1
+2      8       2       2
+
diff --git 
a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy 
b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy
index b92b37c5e1..e671f11823 100644
--- 
a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy
+++ 
b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy
@@ -120,6 +120,8 @@ suite ("multiple_no_where") {
                 FROM lineorder_flat
                 GROUP BY YEAR, C_NATION,C_REGION,S_REGION,P_MFGR;""")
 
+    createMV ("""create materialized view temp_2 as SELECT lo_orderkey, 
sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice)  from  
lineorder_flat  group by lo_orderkey;""")
+
     sql """INSERT INTO lineorder_flat (LO_ORDERDATE, LO_ORDERKEY, 
LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY, LO_SUPPKEY, LO_ORDERPRIORITY, 
LO_SHIPPRIORITY, LO_QUANTITY, LO_EXTENDEDPRICE, LO_ORDTOTALPRICE, LO_DISCOUNT, 
LO_REVENUE, LO_SUPPLYCOST, LO_TAX, LO_COMMITDATE, 
LO_SHIPMODE,C_NAME,C_ADDRESS,C_CITY,C_NATION,C_REGION,C_PHONE,C_MKTSEGMENT,S_NAME,S_ADDRESS,S_CITY,S_NATION,S_REGION,S_PHONE,P_NAME,P_MFGR,P_CATEGORY,P_BRAND,P_COLOR,P_TYPE,P_SIZE,P_CONTAINER)
 VALUES (19930101 , 2 , 2 , 2 , 2 ,  [...]
 
     sql """INSERT INTO lineorder_flat (LO_ORDERDATE, LO_ORDERKEY, 
LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY, LO_SUPPKEY, LO_ORDERPRIORITY, 
LO_SHIPPRIORITY, LO_QUANTITY, LO_EXTENDEDPRICE, LO_ORDTOTALPRICE, LO_DISCOUNT, 
LO_REVENUE, LO_SUPPLYCOST, LO_TAX, LO_COMMITDATE, LO_SHIPMODE, C_NAME, 
C_ADDRESS, C_CITY, C_NATION, C_REGION, C_PHONE, C_MKTSEGMENT, S_NAME, 
S_ADDRESS, S_CITY, S_NATION, S_REGION, S_PHONE, P_NAME, P_MFGR, P_CATEGORY, 
P_BRAND, P_COLOR,P_TYPE,P_SIZE,P_CONTAINER) VALUES (19930101  [...]
@@ -223,4 +225,10 @@ suite ("multiple_no_where") {
                 AND P_MFGR IN ('MFGR#1', 'MFGR#2')
                 GROUP BY YEAR, C_NATION
                 ORDER BY YEAR ASC, C_NATION ASC;"""
+
+    explain {
+        sql("""SELECT lo_orderkey, 
sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice)  from  
lineorder_flat  group by lo_orderkey order by lo_orderkey;""")
+        contains "(temp_2)"
+    }
+    qt_select_temp_2 """SELECT lo_orderkey, 
sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice)  from  
lineorder_flat  group by lo_orderkey order by lo_orderkey;"""
 }
diff --git 
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy
 
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy
index b8052cf525..6af0e86948 100644
--- 
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy
+++ 
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update.groovy
@@ -166,7 +166,7 @@ suite("test_primary_key_partial_update", "p0") {
             assertTrue(exception == null)
             def json = parseJson(result)
             assertEquals("Fail", json.Status)
-            assertEquals("[INTERNAL_ERROR]too many filtered rows", 
json.Message)
+            assertTrue(json.Message.contains("[INTERNAL_ERROR]too many 
filtered rows"))
             assertEquals(3, json.NumberTotalRows)
             assertEquals(1, json.NumberLoadedRows)
             assertEquals(2, json.NumberFilteredRows)
diff --git 
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy
 
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy
index 9262d7116f..256cf018d5 100644
--- 
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy
+++ 
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_strict_mode.groovy
@@ -108,7 +108,7 @@ suite("test_partial_update_strict_mode", "p0") {
             assertTrue(exception == null)
             def json = parseJson(result)
             assertEquals("Fail", json.Status)
-            assertEquals("[INTERNAL_ERROR]too many filtered rows", 
json.Message)
+            assertTrue(json.Message.contains("[INTERNAL_ERROR]too many 
filtered rows"))
             assertEquals(3, json.NumberTotalRows)
             assertEquals(1, json.NumberLoadedRows)
             assertEquals(2, json.NumberFilteredRows)
@@ -157,7 +157,7 @@ suite("test_partial_update_strict_mode", "p0") {
             assertTrue(exception == null)
             def json = parseJson(result)
             assertEquals("Fail", json.Status)
-            assertEquals("[INTERNAL_ERROR]too many filtered rows", 
json.Message)
+            assertTrue(json.Message.contains("[INTERNAL_ERROR]too many 
filtered rows"))
             assertEquals(3, json.NumberTotalRows)
             assertEquals(1, json.NumberLoadedRows)
             assertEquals(2, json.NumberFilteredRows)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] 15/50: [Bug](materialized-view) fix core dump on create materialized view when diffrent mv column have same reference base column (#23425)

Reply via email to