This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 0a888a09cb6 [fix](schema-change) fix the bug of alter column nullable 
when double writing (#41737) (#42656)
0a888a09cb6 is described below

commit 0a888a09cb62cd8bfa7a9acc200ac60010a2bec5
Author: Luwei <[email protected]>
AuthorDate: Tue Oct 29 22:34:28 2024 +0800

    [fix](schema-change) fix the bug of alter column nullable when double 
writing (#41737) (#42656)
    
    pick master #41737
    
    ## problem
    
    CREATE TABLE t (
        `k1` VARCHAR(30) NOT NULL,
        `v1` INT NOT NULL
    )
    
    alter table t modify column `v1` INT NULL
    
    insert into value ('1', 2), ('1', 3);
    
    core dump
    
    ## reason
    
    Schema change leads to double writing, during double writing, the two
    schemas and slots are as follows
    
    ```
    old tablet schema
    k1 varchar not null
    v1 int not null
    ```
    
    ```
    new tablet scheam
    k1 varchar not null
    v1 int null
    ```
    
    ```
    slot
    k1 varchar not null
    v1 int not null
    v1 int null
    ```
    
    During the double writing process, when selecting slots through the
    schema, only the column names and types were compared, without comparing
    the nullable attributes, which led to the selection of the wrong slot.
    Since the slot determines the nullable attribute of the block, the
    nullable attribute of the columns in the block is different from that of
    the columns in the schema, resulting in a core dump.
---
 be/src/exec/tablet_info.cpp                        | 32 ++++++----
 .../schema_change_p0/test_alter_uniq_null.groovy   | 71 ++++++++++++++++++++++
 2 files changed, 92 insertions(+), 11 deletions(-)

diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp
index 44846ded868..a544e59c5b9 100644
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@@ -29,6 +29,7 @@
 #include <cstdint>
 #include <memory>
 #include <ostream>
+#include <string>
 #include <tuple>
 
 #include "common/exception.h"
@@ -137,7 +138,8 @@ Status OlapTableSchemaParam::init(const 
POlapTableSchemaParam& pschema) {
     for (const auto& col : pschema.partial_update_input_columns()) {
         _partial_update_input_columns.insert(col);
     }
-    std::unordered_map<std::pair<std::string, FieldType>, SlotDescriptor*> 
slots_map;
+    std::unordered_map<std::string, SlotDescriptor*> slots_map;
+
     _tuple_desc = _obj_pool.add(new TupleDescriptor(pschema.tuple_desc()));
 
     for (const auto& p_slot_desc : pschema.slot_descs()) {
@@ -145,8 +147,10 @@ Status OlapTableSchemaParam::init(const 
POlapTableSchemaParam& pschema) {
         _tuple_desc->add_slot(slot_desc);
         string data_type;
         EnumToString(TPrimitiveType, to_thrift(slot_desc->col_type()), 
data_type);
-        slots_map.emplace(std::make_pair(to_lower(slot_desc->col_name()),
-                                         
TabletColumn::get_field_type_by_string(data_type)),
+        std::string is_null_str = slot_desc->is_nullable() ? "true" : "false";
+        std::string data_type_str =
+                
std::to_string(int64_t(TabletColumn::get_field_type_by_string(data_type)));
+        slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + 
data_type_str + is_null_str,
                           slot_desc);
     }
 
@@ -157,9 +161,11 @@ Status OlapTableSchemaParam::init(const 
POlapTableSchemaParam& pschema) {
         for (const auto& pcolumn_desc : p_index.columns_desc()) {
             if (!_is_partial_update ||
                 _partial_update_input_columns.contains(pcolumn_desc.name())) {
-                auto it = slots_map.find(std::make_pair(
-                        to_lower(pcolumn_desc.name()),
-                        
TabletColumn::get_field_type_by_string(pcolumn_desc.type())));
+                std::string is_null_str = pcolumn_desc.is_nullable() ? "true" 
: "false";
+                std::string data_type_str = std::to_string(
+                        
int64_t(TabletColumn::get_field_type_by_string(pcolumn_desc.type())));
+                auto it = slots_map.find(to_lower(pcolumn_desc.name()) + "+" + 
data_type_str +
+                                         is_null_str);
                 if (it == std::end(slots_map)) {
                     return Status::InternalError("unknown index column, 
column={}, type={}",
                                                  pcolumn_desc.name(), 
pcolumn_desc.type());
@@ -206,12 +212,14 @@ Status OlapTableSchemaParam::init(const 
TOlapTableSchemaParam& tschema) {
     for (const auto& tcolumn : tschema.partial_update_input_columns) {
         _partial_update_input_columns.insert(tcolumn);
     }
-    std::unordered_map<std::pair<std::string, PrimitiveType>, SlotDescriptor*> 
slots_map;
+    std::unordered_map<std::string, SlotDescriptor*> slots_map;
     _tuple_desc = _obj_pool.add(new TupleDescriptor(tschema.tuple_desc));
     for (const auto& t_slot_desc : tschema.slot_descs) {
         auto* slot_desc = _obj_pool.add(new SlotDescriptor(t_slot_desc));
         _tuple_desc->add_slot(slot_desc);
-        slots_map.emplace(std::make_pair(to_lower(slot_desc->col_name()), 
slot_desc->col_type()),
+        std::string is_null_str = slot_desc->is_nullable() ? "true" : "false";
+        std::string data_type_str = 
std::to_string(int64_t(slot_desc->col_type()));
+        slots_map.emplace(to_lower(slot_desc->col_name()) + "+" + 
data_type_str + is_null_str,
                           slot_desc);
     }
 
@@ -223,9 +231,11 @@ Status OlapTableSchemaParam::init(const 
TOlapTableSchemaParam& tschema) {
         for (const auto& tcolumn_desc : t_index.columns_desc) {
             if (!_is_partial_update ||
                 
_partial_update_input_columns.contains(tcolumn_desc.column_name)) {
-                auto it = slots_map.find(
-                        std::make_pair(to_lower(tcolumn_desc.column_name),
-                                       
thrift_to_type(tcolumn_desc.column_type.type)));
+                std::string is_null_str = tcolumn_desc.is_allow_null ? "true" 
: "false";
+                std::string data_type_str =
+                        
std::to_string(int64_t(thrift_to_type(tcolumn_desc.column_type.type)));
+                auto it = slots_map.find(to_lower(tcolumn_desc.column_name) + 
"+" + data_type_str +
+                                         is_null_str);
                 if (it == slots_map.end()) {
                     return Status::InternalError("unknown index column, 
column={}, type={}",
                                                  tcolumn_desc.column_name,
diff --git 
a/regression-test/suites/schema_change_p0/test_alter_uniq_null.groovy 
b/regression-test/suites/schema_change_p0/test_alter_uniq_null.groovy
new file mode 100644
index 00000000000..fcedf4c2fa4
--- /dev/null
+++ b/regression-test/suites/schema_change_p0/test_alter_uniq_null.groovy
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+// /testing/trino-product-tests/src/main/resources/sql-tests/testcases
+// and modified by Doris.
+
+suite("test_alter_uniq_null") {
+    def tableName = "test_alter_uniq_null_tbl"
+
+    def getJobState = { tableName1 ->
+        def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE 
IndexName='${tableName1}' ORDER BY createtime DESC LIMIT 1 """
+        println jobStateResult
+        return jobStateResult[0][9]
+    }
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+
+    sql """
+        CREATE TABLE ${tableName} (
+        `k1` VARCHAR(30) NOT NULL,
+        `k2` VARCHAR(24) NOT NULL,
+        `v1` VARCHAR(6) NULL,
+        `v2` INT NOT NULL
+        ) ENGINE=OLAP
+        UNIQUE KEY(`k1`, `k2`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`k2`) BUCKETS 1
+        PROPERTIES (
+        "file_cache_ttl_seconds" = "0",
+        "light_schema_change" = "true",
+        "group_commit_interval_ms" = "10000",
+        "group_commit_data_bytes" = "134217728",
+        "enable_unique_key_merge_on_write" = "false",
+        "replication_num" = "1"
+        );
+    """
+
+    sql """alter table ${tableName} modify column `v2` INT NULL"""
+    sleep(10)
+    max_try_num = 1000
+    while (max_try_num--) {
+        String res = getJobState(tableName)
+        if (res == "FINISHED" || res == "CANCELLED") {
+            assertEquals("FINISHED", res)
+            break
+        } else {
+          int val = 100000 + max_try_num
+          sql """ insert into ${tableName} values ("${val}", "client", "3", 
100), ("${val}", "client", "4", 200)"""
+          sleep(10)
+          if (max_try_num < 1) {
+              println "test timeout," + "state:" + res
+              assertEquals("FINISHED",res)
+          }
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to