[doris] branch master updated: [fix](struct-type) fix struct subtype support (#17081)

xuyang Mon, 27 Feb 2023 19:37:23 -0800

This is an automated email from the ASF dual-hosted git repository.

xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 4d8b310de0 [fix](struct-type) fix struct subtype support (#17081)
4d8b310de0 is described below

commit 4d8b310de0990ff69b6e76859f44c50b1bc227ce
Author: camby <[email protected]>
AuthorDate: Tue Feb 28 11:37:07 2023 +0800

    [fix](struct-type) fix struct subtype support (#17081)
    
    1. Make sure all sub types which STRUCT supported work correctly;
    2. remove unused variable `_need_validate_data`;
    3. lazy init min or max decimal to support nested DecimalV2 column validate;
    
    Co-authored-by: cambyzju <[email protected]>
---
 be/src/vec/sink/vtablet_sink.cpp                   |  74 ++++++++++---------
 be/src/vec/sink/vtablet_sink.h                     |  10 ++-
 .../org/apache/doris/analysis/StructLiteral.java   |   2 +-
 .../data/insert_p0/test_struct_insert.out          | Bin 0 -> 444 bytes
 .../suites/insert_p0/test_struct_insert.groovy     |  81 +++++++++++++++++++++
 5 files changed, 129 insertions(+), 38 deletions(-)

diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index c535392a31..bd35929f85 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -37,6 +37,7 @@
 #include "util/time.h"
 #include "util/uid_util.h"
 #include "vec/columns/column_array.h"
+#include "vec/columns/column_struct.h"
 #include "vec/core/block.h"
 #include "vec/exprs/vexpr.h"
 #include "vec/exprs/vexpr_context.h"
@@ -846,36 +847,6 @@ Status VOlapTableSink::prepare(RuntimeState* state) {
 
     _output_row_desc = _pool->add(new RowDescriptor(_output_tuple_desc, 
false));
 
-    _max_decimalv2_val.resize(_output_tuple_desc->slots().size());
-    _min_decimalv2_val.resize(_output_tuple_desc->slots().size());
-    // check if need validate batch
-    for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) {
-        auto slot = _output_tuple_desc->slots()[i];
-        switch (slot->type().type) {
-        // For DECIMAL32,DECIMAL64,DECIMAL128, we have done precision and 
scale conversion so just
-        // skip data validation here.
-        case TYPE_DECIMALV2:
-            _max_decimalv2_val[i].to_max_decimal(slot->type().precision, 
slot->type().scale);
-            _min_decimalv2_val[i].to_min_decimal(slot->type().precision, 
slot->type().scale);
-            _need_validate_data = true;
-            break;
-        case TYPE_CHAR:
-        case TYPE_VARCHAR:
-        case TYPE_DATE:
-        case TYPE_DATETIME:
-        case TYPE_DATEV2:
-        case TYPE_DATETIMEV2:
-        case TYPE_HLL:
-        case TYPE_OBJECT:
-        case TYPE_STRING:
-        case TYPE_ARRAY:
-            _need_validate_data = true;
-            break;
-        default:
-            break;
-        }
-    }
-
     // add all counter
     _input_rows_counter = ADD_COUNTER(_profile, "RowsRead", TUnit::UNIT);
     _output_rows_counter = ADD_COUNTER(_profile, "RowsReturned", TUnit::UNIT);
@@ -1272,6 +1243,32 @@ Status VOlapTableSink::close(RuntimeState* state, Status 
exec_status) {
     return status;
 }
 
+template <bool is_min>
+DecimalV2Value VOlapTableSink::_get_decimalv2_min_or_max(const TypeDescriptor& 
type) {
+    std::map<std::pair<int, int>, DecimalV2Value>* pmap = nullptr;
+    if constexpr (is_min) {
+        pmap = &_min_decimalv2_val;
+    } else {
+        pmap = &_max_decimalv2_val;
+    }
+
+    // found
+    auto iter = pmap->find({type.precision, type.scale});
+    if (iter != pmap->end()) {
+        return iter->second;
+    }
+
+    // save min or max DecimalV2Value for next time
+    DecimalV2Value value;
+    if constexpr (is_min) {
+        value.to_min_decimal(type.precision, type.scale);
+    } else {
+        value.to_max_decimal(type.precision, type.scale);
+    }
+    pmap->emplace(std::pair<int, int> {type.precision, type.scale}, value);
+    return value;
+}
+
 Status VOlapTableSink::_validate_column(RuntimeState* state, const 
TypeDescriptor& type,
                                         bool is_nullable, 
vectorized::ColumnPtr column,
                                         size_t slot_index, Bitmap* 
filter_bitmap,
@@ -1387,8 +1384,9 @@ Status VOlapTableSink::_validate_column(RuntimeState* 
state, const TypeDescripto
                         invalid = true;
                     }
                 }
-                if (dec_val > _max_decimalv2_val[slot_index] ||
-                    dec_val < _min_decimalv2_val[slot_index]) {
+
+                if (dec_val > _get_decimalv2_min_or_max<false>(type) ||
+                    dec_val < _get_decimalv2_min_or_max<true>(type)) {
                     fmt::format_to(error_msg, "{}", "decimal value is not 
valid for definition");
                     fmt::format_to(error_msg, ", value={}", 
dec_val.to_string());
                     fmt::format_to(error_msg, ", precision={}, scale={}; ", 
type.precision,
@@ -1425,7 +1423,17 @@ Status VOlapTableSink::_validate_column(RuntimeState* 
state, const TypeDescripto
         }
         break;
     }
-    // TODO(xy): add struct type validate
+    case TYPE_STRUCT: {
+        const auto column_struct =
+                assert_cast<const 
vectorized::ColumnStruct*>(real_column_ptr.get());
+        DCHECK(type.children.size() == column_struct->tuple_size());
+        for (size_t sc = 0; sc < column_struct->tuple_size(); ++sc) {
+            RETURN_IF_ERROR(_validate_column(state, type.children[sc], 
type.contains_nulls[sc],
+                                             
column_struct->get_column_ptr(sc), slot_index,
+                                             filter_bitmap, stop_processing, 
error_prefix));
+        }
+        break;
+    }
     default:
         break;
     }
diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h
index cb48057ec5..5200c29f37 100644
--- a/be/src/vec/sink/vtablet_sink.h
+++ b/be/src/vec/sink/vtablet_sink.h
@@ -427,6 +427,10 @@ private:
     // set stop_processing if we want to stop the whole process now.
     Status _validate_data(RuntimeState* state, vectorized::Block* block, 
Bitmap* filter_bitmap,
                           int* filtered_rows, bool* stop_processing);
+
+    template <bool is_min>
+    DecimalV2Value _get_decimalv2_min_or_max(const TypeDescriptor& type);
+
     Status _validate_column(RuntimeState* state, const TypeDescriptor& type, 
bool is_nullable,
                             vectorized::ColumnPtr column, size_t slot_index, 
Bitmap* filter_bitmap,
                             bool* stop_processing, fmt::memory_buffer& 
error_prefix,
@@ -455,8 +459,6 @@ private:
     TupleDescriptor* _output_tuple_desc = nullptr;
     RowDescriptor* _output_row_desc = nullptr;
 
-    bool _need_validate_data = false;
-
     // number of senders used to insert into OlapTable, if we only support 
single node insert,
     // all data from select should collectted and then send to OlapTable.
     // To support multiple senders, we maintain a channel for each sender.
@@ -486,8 +488,8 @@ private:
     scoped_refptr<Thread> _sender_thread;
     std::unique_ptr<ThreadPoolToken> _send_batch_thread_pool_token;
 
-    std::vector<DecimalV2Value> _max_decimalv2_val;
-    std::vector<DecimalV2Value> _min_decimalv2_val;
+    std::map<std::pair<int, int>, DecimalV2Value> _max_decimalv2_val;
+    std::map<std::pair<int, int>, DecimalV2Value> _min_decimalv2_val;
 
     // Stats for this
     int64_t _validate_data_ns = 0;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
index 95362a37ab..a06fe9e53e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
@@ -43,7 +43,7 @@ public class StructLiteral extends LiteralExpr {
         type = new StructType();
         children = new ArrayList<>();
         for (LiteralExpr expr : exprs) {
-            if (!type.supportSubType(expr.getType())) {
+            if (!expr.getType().isNull() && 
!type.supportSubType(expr.getType())) {
                 throw new AnalysisException("Invalid element type in STRUCT.");
             }
             ((StructType) type).addField(new StructField(expr.getType()));
diff --git a/regression-test/data/insert_p0/test_struct_insert.out 
b/regression-test/data/insert_p0/test_struct_insert.out
new file mode 100644
index 0000000000..b0bf805c04
Binary files /dev/null and 
b/regression-test/data/insert_p0/test_struct_insert.out differ
diff --git a/regression-test/suites/insert_p0/test_struct_insert.groovy 
b/regression-test/suites/insert_p0/test_struct_insert.groovy
new file mode 100644
index 0000000000..8696e0552a
--- /dev/null
+++ b/regression-test/suites/insert_p0/test_struct_insert.groovy
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_struct_insert") {
+    // define a sql table
+    def testTable = "tbl_test_struct_insert"
+
+    def create_test_table = {testTablex ->
+        def result1 = sql """
+            CREATE TABLE IF NOT EXISTS ${testTable} (
+              `k1` INT(11) NULL,
+              `k2` 
STRUCT<f1:BOOLEAN,f2:TINYINT,f3:SMALLINT,f4:INT,f5:INT,f6:BIGINT,f7:LARGEINT> 
NULL,
+              `k3` STRUCT<f1:FLOAT,f2:DOUBLE,f3:DECIMAL(3,3)> NULL,
+              `k4` STRUCT<f1:DATE,f2:DATETIME,f3:DATEV2,f4:DATETIMEV2> NULL,
+              `k5` STRUCT<f1:CHAR(10),f2:VARCHAR(10),f3:STRING> NOT NULL
+            )
+            DUPLICATE KEY(`k1`)
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+            )
+            """
+
+        // DDL/DML return 1 row and 5 column, the only value is update row 
count
+        assertTrue(result1.size() == 1)
+        assertTrue(result1[0].size() == 1)
+        assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+    }
+
+    sql "ADMIN SET FRONTEND CONFIG ('enable_struct_type' = 'true')"
+
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    create_test_table.call(testTable)
+
+    sql "set enable_insert_strict = true"
+
+    // invalid cases
+    test {
+        // k5 is not nullable, can not insert null
+        sql "insert into ${testTable} values (111,null,null,null,null)"
+        exception "Insert has filtered data"
+    }
+    test {
+        // size of char type in struct is 10, can not insert string with 
length more than 10 
+        sql "insert into ${testTable} values 
(112,null,null,null,{'1234567890123',null,null})"
+        exception "Insert has filtered data"
+    }
+    test {
+        // size of varchar type in struct is 10, can not insert string with 
length more than 10 
+        sql "insert into ${testTable} values 
(113,null,null,null,{null,'12345678901234',null})"
+        exception "Insert has filtered data"
+    }
+    test {
+        // input decimal is invalid
+        sql "insert into ${testTable} values 
(114,null,{null,null,1234.1234},null,{null,'',null})"
+        exception "Insert has filtered data"
+    }
+
+    // normal cases include nullable and nullable nested fields
+    sql "INSERT INTO ${testTable} VALUES(1, 
{1,11,111,1111,11111,11111,111111},null,null,{'','',''})"
+    sql "INSERT INTO ${testTable} VALUES(2, 
{null,null,null,null,null,null,null},{2.1,2.22,2.333},null,{null,null,null})"
+    sql "INSERT INTO ${testTable} VALUES(3, 
null,{null,null,null},{'2023-02-23','2023-02-23 
00:10:19','2023-02-23','2023-02-23 00:10:19'},{'','',''})"
+    sql "INSERT INTO ${testTable} VALUES(4, 
null,null,{null,null,null,null},{'abc','def','hij'})"
+
+    // select the table and check whether the data is correct
+    qt_select "select * from ${testTable} order by k1"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] branch master updated: [fix](struct-type) fix struct subtype support (#17081)

Reply via email to