This is an automated email from the ASF dual-hosted git repository.
xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 4d8b310de0 [fix](struct-type) fix struct subtype support (#17081)
4d8b310de0 is described below
commit 4d8b310de0990ff69b6e76859f44c50b1bc227ce
Author: camby <[email protected]>
AuthorDate: Tue Feb 28 11:37:07 2023 +0800
[fix](struct-type) fix struct subtype support (#17081)
1. Make sure all sub types which STRUCT supported work correctly;
2. remove unused variable `_need_validate_data`;
3. lazy init min or max decimal to support nested DecimalV2 column validate;
Co-authored-by: cambyzju <[email protected]>
---
be/src/vec/sink/vtablet_sink.cpp | 74 ++++++++++---------
be/src/vec/sink/vtablet_sink.h | 10 ++-
.../org/apache/doris/analysis/StructLiteral.java | 2 +-
.../data/insert_p0/test_struct_insert.out | Bin 0 -> 444 bytes
.../suites/insert_p0/test_struct_insert.groovy | 81 +++++++++++++++++++++
5 files changed, 129 insertions(+), 38 deletions(-)
diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index c535392a31..bd35929f85 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -37,6 +37,7 @@
#include "util/time.h"
#include "util/uid_util.h"
#include "vec/columns/column_array.h"
+#include "vec/columns/column_struct.h"
#include "vec/core/block.h"
#include "vec/exprs/vexpr.h"
#include "vec/exprs/vexpr_context.h"
@@ -846,36 +847,6 @@ Status VOlapTableSink::prepare(RuntimeState* state) {
_output_row_desc = _pool->add(new RowDescriptor(_output_tuple_desc,
false));
- _max_decimalv2_val.resize(_output_tuple_desc->slots().size());
- _min_decimalv2_val.resize(_output_tuple_desc->slots().size());
- // check if need validate batch
- for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) {
- auto slot = _output_tuple_desc->slots()[i];
- switch (slot->type().type) {
- // For DECIMAL32,DECIMAL64,DECIMAL128, we have done precision and
scale conversion so just
- // skip data validation here.
- case TYPE_DECIMALV2:
- _max_decimalv2_val[i].to_max_decimal(slot->type().precision,
slot->type().scale);
- _min_decimalv2_val[i].to_min_decimal(slot->type().precision,
slot->type().scale);
- _need_validate_data = true;
- break;
- case TYPE_CHAR:
- case TYPE_VARCHAR:
- case TYPE_DATE:
- case TYPE_DATETIME:
- case TYPE_DATEV2:
- case TYPE_DATETIMEV2:
- case TYPE_HLL:
- case TYPE_OBJECT:
- case TYPE_STRING:
- case TYPE_ARRAY:
- _need_validate_data = true;
- break;
- default:
- break;
- }
- }
-
// add all counter
_input_rows_counter = ADD_COUNTER(_profile, "RowsRead", TUnit::UNIT);
_output_rows_counter = ADD_COUNTER(_profile, "RowsReturned", TUnit::UNIT);
@@ -1272,6 +1243,32 @@ Status VOlapTableSink::close(RuntimeState* state, Status
exec_status) {
return status;
}
+template <bool is_min>
+DecimalV2Value VOlapTableSink::_get_decimalv2_min_or_max(const TypeDescriptor&
type) {
+ std::map<std::pair<int, int>, DecimalV2Value>* pmap = nullptr;
+ if constexpr (is_min) {
+ pmap = &_min_decimalv2_val;
+ } else {
+ pmap = &_max_decimalv2_val;
+ }
+
+ // found
+ auto iter = pmap->find({type.precision, type.scale});
+ if (iter != pmap->end()) {
+ return iter->second;
+ }
+
+ // save min or max DecimalV2Value for next time
+ DecimalV2Value value;
+ if constexpr (is_min) {
+ value.to_min_decimal(type.precision, type.scale);
+ } else {
+ value.to_max_decimal(type.precision, type.scale);
+ }
+ pmap->emplace(std::pair<int, int> {type.precision, type.scale}, value);
+ return value;
+}
+
Status VOlapTableSink::_validate_column(RuntimeState* state, const
TypeDescriptor& type,
bool is_nullable,
vectorized::ColumnPtr column,
size_t slot_index, Bitmap*
filter_bitmap,
@@ -1387,8 +1384,9 @@ Status VOlapTableSink::_validate_column(RuntimeState*
state, const TypeDescripto
invalid = true;
}
}
- if (dec_val > _max_decimalv2_val[slot_index] ||
- dec_val < _min_decimalv2_val[slot_index]) {
+
+ if (dec_val > _get_decimalv2_min_or_max<false>(type) ||
+ dec_val < _get_decimalv2_min_or_max<true>(type)) {
fmt::format_to(error_msg, "{}", "decimal value is not
valid for definition");
fmt::format_to(error_msg, ", value={}",
dec_val.to_string());
fmt::format_to(error_msg, ", precision={}, scale={}; ",
type.precision,
@@ -1425,7 +1423,17 @@ Status VOlapTableSink::_validate_column(RuntimeState*
state, const TypeDescripto
}
break;
}
- // TODO(xy): add struct type validate
+ case TYPE_STRUCT: {
+ const auto column_struct =
+ assert_cast<const
vectorized::ColumnStruct*>(real_column_ptr.get());
+ DCHECK(type.children.size() == column_struct->tuple_size());
+ for (size_t sc = 0; sc < column_struct->tuple_size(); ++sc) {
+ RETURN_IF_ERROR(_validate_column(state, type.children[sc],
type.contains_nulls[sc],
+
column_struct->get_column_ptr(sc), slot_index,
+ filter_bitmap, stop_processing,
error_prefix));
+ }
+ break;
+ }
default:
break;
}
diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h
index cb48057ec5..5200c29f37 100644
--- a/be/src/vec/sink/vtablet_sink.h
+++ b/be/src/vec/sink/vtablet_sink.h
@@ -427,6 +427,10 @@ private:
// set stop_processing if we want to stop the whole process now.
Status _validate_data(RuntimeState* state, vectorized::Block* block,
Bitmap* filter_bitmap,
int* filtered_rows, bool* stop_processing);
+
+ template <bool is_min>
+ DecimalV2Value _get_decimalv2_min_or_max(const TypeDescriptor& type);
+
Status _validate_column(RuntimeState* state, const TypeDescriptor& type,
bool is_nullable,
vectorized::ColumnPtr column, size_t slot_index,
Bitmap* filter_bitmap,
bool* stop_processing, fmt::memory_buffer&
error_prefix,
@@ -455,8 +459,6 @@ private:
TupleDescriptor* _output_tuple_desc = nullptr;
RowDescriptor* _output_row_desc = nullptr;
- bool _need_validate_data = false;
-
// number of senders used to insert into OlapTable, if we only support
single node insert,
// all data from select should collectted and then send to OlapTable.
// To support multiple senders, we maintain a channel for each sender.
@@ -486,8 +488,8 @@ private:
scoped_refptr<Thread> _sender_thread;
std::unique_ptr<ThreadPoolToken> _send_batch_thread_pool_token;
- std::vector<DecimalV2Value> _max_decimalv2_val;
- std::vector<DecimalV2Value> _min_decimalv2_val;
+ std::map<std::pair<int, int>, DecimalV2Value> _max_decimalv2_val;
+ std::map<std::pair<int, int>, DecimalV2Value> _min_decimalv2_val;
// Stats for this
int64_t _validate_data_ns = 0;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
index 95362a37ab..a06fe9e53e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
@@ -43,7 +43,7 @@ public class StructLiteral extends LiteralExpr {
type = new StructType();
children = new ArrayList<>();
for (LiteralExpr expr : exprs) {
- if (!type.supportSubType(expr.getType())) {
+ if (!expr.getType().isNull() &&
!type.supportSubType(expr.getType())) {
throw new AnalysisException("Invalid element type in STRUCT.");
}
((StructType) type).addField(new StructField(expr.getType()));
diff --git a/regression-test/data/insert_p0/test_struct_insert.out
b/regression-test/data/insert_p0/test_struct_insert.out
new file mode 100644
index 0000000000..b0bf805c04
Binary files /dev/null and
b/regression-test/data/insert_p0/test_struct_insert.out differ
diff --git a/regression-test/suites/insert_p0/test_struct_insert.groovy
b/regression-test/suites/insert_p0/test_struct_insert.groovy
new file mode 100644
index 0000000000..8696e0552a
--- /dev/null
+++ b/regression-test/suites/insert_p0/test_struct_insert.groovy
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_struct_insert") {
+ // define a sql table
+ def testTable = "tbl_test_struct_insert"
+
+ def create_test_table = {testTablex ->
+ def result1 = sql """
+ CREATE TABLE IF NOT EXISTS ${testTable} (
+ `k1` INT(11) NULL,
+ `k2`
STRUCT<f1:BOOLEAN,f2:TINYINT,f3:SMALLINT,f4:INT,f5:INT,f6:BIGINT,f7:LARGEINT>
NULL,
+ `k3` STRUCT<f1:FLOAT,f2:DOUBLE,f3:DECIMAL(3,3)> NULL,
+ `k4` STRUCT<f1:DATE,f2:DATETIME,f3:DATEV2,f4:DATETIMEV2> NULL,
+ `k5` STRUCT<f1:CHAR(10),f2:VARCHAR(10),f3:STRING> NOT NULL
+ )
+ DUPLICATE KEY(`k1`)
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ )
+ """
+
+ // DDL/DML return 1 row and 5 column, the only value is update row
count
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+ }
+
+ sql "ADMIN SET FRONTEND CONFIG ('enable_struct_type' = 'true')"
+
+ sql "DROP TABLE IF EXISTS ${testTable}"
+ create_test_table.call(testTable)
+
+ sql "set enable_insert_strict = true"
+
+ // invalid cases
+ test {
+ // k5 is not nullable, can not insert null
+ sql "insert into ${testTable} values (111,null,null,null,null)"
+ exception "Insert has filtered data"
+ }
+ test {
+ // size of char type in struct is 10, can not insert string with
length more than 10
+ sql "insert into ${testTable} values
(112,null,null,null,{'1234567890123',null,null})"
+ exception "Insert has filtered data"
+ }
+ test {
+ // size of varchar type in struct is 10, can not insert string with
length more than 10
+ sql "insert into ${testTable} values
(113,null,null,null,{null,'12345678901234',null})"
+ exception "Insert has filtered data"
+ }
+ test {
+ // input decimal is invalid
+ sql "insert into ${testTable} values
(114,null,{null,null,1234.1234},null,{null,'',null})"
+ exception "Insert has filtered data"
+ }
+
+ // normal cases include nullable and nullable nested fields
+ sql "INSERT INTO ${testTable} VALUES(1,
{1,11,111,1111,11111,11111,111111},null,null,{'','',''})"
+ sql "INSERT INTO ${testTable} VALUES(2,
{null,null,null,null,null,null,null},{2.1,2.22,2.333},null,{null,null,null})"
+ sql "INSERT INTO ${testTable} VALUES(3,
null,{null,null,null},{'2023-02-23','2023-02-23
00:10:19','2023-02-23','2023-02-23 00:10:19'},{'','',''})"
+ sql "INSERT INTO ${testTable} VALUES(4,
null,null,{null,null,null,null},{'abc','def','hij'})"
+
+ // select the table and check whether the data is correct
+ qt_select "select * from ${testTable} order by k1"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]