This is an automated email from the ASF dual-hosted git repository.
xuyang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8b70bfdc31 [Feature](map-type) Support stream load and fix some bugs
for map type (#16776)
8b70bfdc31 is described below
commit 8b70bfdc31141332cbb72008083a423abcb7e741
Author: amory <[email protected]>
AuthorDate: Sun Feb 19 15:11:54 2023 +0800
[Feature](map-type) Support stream load and fix some bugs for map type
(#16776)
1、support stream load with json, csv format for map
2、fix olap convertor when compaction action in map column which has null
3、support select outToFile for map
4、add some regression-test
---
be/src/olap/rowset/segment_v2/column_writer.cpp | 13 +-
be/src/olap/rowset/segment_v2/column_writer.h | 2 +-
be/src/vec/data_types/data_type_factory.cpp | 5 +-
be/src/vec/data_types/data_type_factory.hpp | 3 +-
be/src/vec/data_types/data_type_map.cpp | 171 +++++++++++++++------
be/src/vec/exprs/vexpr.cpp | 9 ++
be/src/vec/functions/function_cast.h | 12 ++
be/src/vec/olap/olap_data_convertor.h | 1 -
be/src/vec/runtime/vfile_result_writer.cpp | 4 +
be/src/vec/sink/vmysql_result_writer.cpp | 6 +
regression-test/data/export/test_map_export.out | 8 +
regression-test/data/load/insert/test_map_dml.out | 8 +
.../data/load_p0/stream_load/test_map.csv | 15 ++
.../stream_load/test_map_load_and_function.out | 37 +++++
regression-test/data/map_p0/test_map_dml.out | 11 ++
.../data/query_p0/show/test_map_show_create.out | 4 +
.../suites/export/test_map_export.groovy | 131 ++++++++++++++++
.../suites/load/insert/test_map_dml.groovy | 114 ++++++++++++++
.../stream_load/test_map_load_and_function.groovy | 74 +++++++++
.../query_p0/show/test_map_show_create.groovy | 69 +++++++++
20 files changed, 643 insertions(+), 54 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index 583becb2b0..a35c840eb5 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -1000,7 +1000,15 @@ Status MapColumnWriter::finish() {
return Status::OK();
}
-// todo. make keys and values write
+Status MapColumnWriter::append_nullable(const uint8_t* null_map, const
uint8_t** ptr,
+ size_t num_rows) {
+ if (is_nullable()) {
+ RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows));
+ }
+ RETURN_IF_ERROR(append_data(ptr, num_rows));
+ return Status::OK();
+}
+
Status MapColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) {
auto kv_ptr = reinterpret_cast<const uint64_t*>(*ptr);
for (size_t i = 0; i < 2; ++i) {
@@ -1008,9 +1016,6 @@ Status MapColumnWriter::append_data(const uint8_t** ptr,
size_t num_rows) {
const uint8_t* val_ptr = (const uint8_t*)data;
RETURN_IF_ERROR(_kv_writers[i]->append_data(&val_ptr, num_rows));
}
- if (is_nullable()) {
- return write_null_column(num_rows, false);
- }
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h
b/be/src/olap/rowset/segment_v2/column_writer.h
index 7d140324dd..036c4d3baf 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -382,7 +382,7 @@ public:
Status init() override;
Status append_data(const uint8_t** ptr, size_t num_rows) override;
-
+ Status append_nullable(const uint8_t* null_map, const uint8_t** ptr,
size_t num_rows) override;
uint64_t estimate_buffer_size() override;
Status finish() override;
diff --git a/be/src/vec/data_types/data_type_factory.cpp
b/be/src/vec/data_types/data_type_factory.cpp
index 65510dd9f2..91fc51187b 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -169,9 +169,10 @@ DataTypePtr DataTypeFactory::create_data_type(const
TypeDescriptor& col_desc, bo
break;
case TYPE_MAP:
DCHECK(col_desc.children.size() == 2);
+ // todo. (Amory) Support Map contains_nulls in FE MapType.java Later PR
nested = std::make_shared<vectorized::DataTypeMap>(
- create_data_type(col_desc.children[0],
col_desc.contains_nulls[0]),
- create_data_type(col_desc.children[1],
col_desc.contains_nulls[1]));
+ create_data_type(col_desc.children[0], true),
+ create_data_type(col_desc.children[1], true));
break;
case TYPE_STRUCT: {
DCHECK(col_desc.children.size() >= 1);
diff --git a/be/src/vec/data_types/data_type_factory.hpp
b/be/src/vec/data_types/data_type_factory.hpp
index 879418a326..b2623b06da 100644
--- a/be/src/vec/data_types/data_type_factory.hpp
+++ b/be/src/vec/data_types/data_type_factory.hpp
@@ -113,7 +113,8 @@ public:
return entity.second;
}
}
- if (type_ptr->get_type_id() == TypeIndex::Struct) {
+ if (type_ptr->get_type_id() == TypeIndex::Struct ||
+ type_ptr->get_type_id() == TypeIndex::Map) {
DataTypeFactory::instance().register_data_type(type_ptr->get_name(), type_ptr);
for (const auto& entity : _invert_data_type_map) {
if (entity.first->equals(*type_ptr)) {
diff --git a/be/src/vec/data_types/data_type_map.cpp
b/be/src/vec/data_types/data_type_map.cpp
index c40e0362c5..daf683c3ab 100644
--- a/be/src/vec/data_types/data_type_map.cpp
+++ b/be/src/vec/data_types/data_type_map.cpp
@@ -20,14 +20,16 @@
#include "gen_cpp/data.pb.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_map.h"
+#include "vec/columns/column_nullable.h"
#include "vec/common/assert_cast.h"
-#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_nullable.h"
namespace doris::vectorized {
DataTypeMap::DataTypeMap(const DataTypePtr& keys_, const DataTypePtr& values_)
{
- key_type = keys_;
- value_type = values_;
+ key_type = make_nullable(keys_);
+ value_type = make_nullable(values_);
keys = std::make_shared<DataTypeArray>(key_type);
values = std::make_shared<DataTypeArray>(value_type);
@@ -53,7 +55,7 @@ std::string DataTypeMap::to_string(const IColumn& column,
size_t row_num) const
ss << ", ";
}
if (nested_keys_column.is_null_at(i)) {
- ss << "NULL";
+ ss << "null";
} else if
(WhichDataType(remove_nullable(key_type)).is_string_or_fixed_string()) {
ss << "'" << key_type->to_string(nested_keys_column, i) << "'";
} else {
@@ -61,7 +63,7 @@ std::string DataTypeMap::to_string(const IColumn& column,
size_t row_num) const
}
ss << ":";
if (nested_values_column.is_null_at(i)) {
- ss << "NULL";
+ ss << "null";
} else if
(WhichDataType(remove_nullable(value_type)).is_string_or_fixed_string()) {
ss << "'" << value_type->to_string(nested_values_column, i) << "'";
} else {
@@ -78,6 +80,85 @@ void DataTypeMap::to_string(const class
doris::vectorized::IColumn& column, size
ostr.write(ss.c_str(), strlen(ss.c_str()));
}
+bool next_slot_from_string(ReadBuffer& rb, StringRef& output, bool& has_quota)
{
+ StringRef element(rb.position(), 0);
+ has_quota = false;
+ if (rb.eof()) {
+ return false;
+ }
+
+ // ltrim
+ while (!rb.eof() && isspace(*rb.position())) {
+ ++rb.position();
+ element.data = rb.position();
+ }
+
+ // parse string
+ if (*rb.position() == '"' || *rb.position() == '\'') {
+ const char str_sep = *rb.position();
+ size_t str_len = 1;
+ // search until next '"' or '\''
+ while (str_len < rb.count() && *(rb.position() + str_len) != str_sep) {
+ ++str_len;
+ }
+ // invalid string
+ if (str_len >= rb.count()) {
+ rb.position() = rb.end();
+ return false;
+ }
+ has_quota = true;
+ rb.position() += str_len + 1;
+ element.size += str_len + 1;
+ }
+
+ // parse array element until map separator ':' or ',' or end '}'
+ while (!rb.eof() && (*rb.position() != ':') && (*rb.position() != ',') &&
+ (rb.count() != 1 || *rb.position() != '}')) {
+ if (has_quota && !isspace(*rb.position())) {
+ return false;
+ }
+ ++rb.position();
+ ++element.size;
+ }
+ // invalid array element
+ if (rb.eof()) {
+ return false;
+ }
+ // adjust read buffer position to first char of next array element
+ ++rb.position();
+
+ // rtrim
+ while (element.size > 0 && isspace(element.data[element.size - 1])) {
+ --element.size;
+ }
+
+ // trim '"' and '\'' for string
+ if (element.size >= 2 && (element.data[0] == '"' || element.data[0] ==
'\'') &&
+ element.data[0] == element.data[element.size - 1]) {
+ ++element.data;
+ element.size -= 2;
+ }
+ output = element;
+ return true;
+}
+
+bool is_empty_null_element(StringRef element, IColumn* nested_column, bool
has_quota) {
+ auto& nested_null_col = reinterpret_cast<ColumnNullable&>(*nested_column);
+ // handle empty element
+ if (element.size == 0) {
+ nested_null_col.get_nested_column().insert_default();
+ nested_null_col.get_null_map_data().push_back(0);
+ return true;
+ }
+
+ // handle null element
+ if (!has_quota && element.size == 4 && strncmp(element.data, "null", 4) ==
0) {
+ nested_null_col.get_nested_column().insert_default();
+ nested_null_col.get_null_map_data().push_back(1);
+ return true;
+ }
+ return false;
+}
Status DataTypeMap::from_string(ReadBuffer& rb, IColumn* column) const {
DCHECK(!rb.eof());
auto* map_column = assert_cast<ColumnMap*>(column);
@@ -91,57 +172,57 @@ Status DataTypeMap::from_string(ReadBuffer& rb, IColumn*
column) const {
*(rb.end() - 1));
}
- std::stringstream keyCharset;
- std::stringstream valCharset;
-
if (rb.count() == 2) {
// empty map {} , need to make empty array to add offset
- keyCharset << "[]";
- valCharset << "[]";
+ map_column->insert_default();
} else {
- // {"aaa": 1, "bbb": 20}, need to handle key and value to make key
column arr and value arr
+ // {"aaa": 1, "bbb": 20}, need to handle key slot and value slot to
make key column arr and value arr
// skip "{"
++rb.position();
- keyCharset << "[";
- valCharset << "[";
+ auto& keys_arr =
reinterpret_cast<ColumnArray&>(map_column->get_keys());
+ ColumnArray::Offsets64& key_off = keys_arr.get_offsets();
+ auto& values_arr =
reinterpret_cast<ColumnArray&>(map_column->get_values());
+ ColumnArray::Offsets64& val_off = values_arr.get_offsets();
+
+ IColumn& nested_key_column = keys_arr.get_data();
+ DCHECK(nested_key_column.is_nullable());
+ IColumn& nested_val_column = values_arr.get_data();
+ DCHECK(nested_val_column.is_nullable());
+
+ size_t element_num = 0;
while (!rb.eof()) {
- size_t kv_len = 0;
- auto start = rb.position();
- while (!rb.eof() && *start != ',' && *start != '}') {
- kv_len++;
- start++;
+ StringRef key_element(rb.position(), rb.count());
+ bool has_quota = false;
+ if (!next_slot_from_string(rb, key_element, has_quota)) {
+ return Status::InvalidArgument("Cannot read map key from text
'{}'",
+ key_element.to_string());
}
- if (kv_len >= rb.count()) {
- return Status::InvalidArgument("Invalid Length");
+ if (!is_empty_null_element(key_element, &nested_key_column,
has_quota)) {
+ ReadBuffer krb(const_cast<char*>(key_element.data),
key_element.size);
+ if (auto st = key_type->from_string(krb, &nested_key_column);
!st.ok()) {
+ map_column->pop_back(element_num);
+ return st;
+ }
}
- size_t k_len = 0;
- auto k_rb = rb.position();
- while (kv_len > 0 && *k_rb != ':') {
- k_len++;
- k_rb++;
+ has_quota = false;
+ StringRef value_element(rb.position(), rb.count());
+ if (!next_slot_from_string(rb, value_element, has_quota)) {
+ return Status::InvalidArgument("Cannot read map value from
text '{}'",
+ value_element.to_string());
}
- ReadBuffer key_rb(rb.position(), k_len);
- ReadBuffer val_rb(k_rb + 1, kv_len - k_len - 1);
-
- // handle key
- keyCharset << key_rb.to_string();
- keyCharset << ",";
-
- // handle value
- valCharset << val_rb.to_string();
- valCharset << ",";
-
- rb.position() += kv_len + 1;
+ if (!is_empty_null_element(value_element, &nested_val_column,
has_quota)) {
+ ReadBuffer vrb(const_cast<char*>(value_element.data),
value_element.size);
+ if (auto st = value_type->from_string(vrb,
&nested_val_column); !st.ok()) {
+ map_column->pop_back(element_num);
+ return st;
+ }
+ }
+ ++element_num;
}
- keyCharset << ']';
- valCharset << ']';
+ key_off.push_back(key_off.back() + element_num);
+ val_off.push_back(val_off.back() + element_num);
}
-
- ReadBuffer kb(keyCharset.str().data(), keyCharset.str().length());
- ReadBuffer vb(valCharset.str().data(), valCharset.str().length());
- keys->from_string(kb, &map_column->get_keys());
- values->from_string(vb, &map_column->get_values());
return Status::OK();
}
@@ -199,4 +280,4 @@ const char* DataTypeMap::deserialize(const char* buf,
IColumn* column, int data_
data_version);
}
-} // namespace doris::vectorized
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index 69ccf0f5bd..f90993884d 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -372,6 +372,15 @@ FunctionContext::TypeDesc
VExpr::column_type_to_type_desc(const TypeDescriptor&
out.children.push_back(VExpr::column_type_to_type_desc(t));
}
break;
+ case TYPE_MAP:
+ CHECK(type.children.size() == 2);
+ // only support map key is scalar
+ CHECK(!type.children[0].is_complex_type());
+ out.type = FunctionContext::TYPE_MAP;
+ for (const auto& t : type.children) {
+ out.children.push_back(VExpr::column_type_to_type_desc(t));
+ }
+ break;
case TYPE_STRING:
out.type = FunctionContext::TYPE_STRING;
out.len = type.len;
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index c8dc4282df..7ac3316fdb 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1541,6 +1541,16 @@ private:
return &ConvertImplGenericToJsonb::execute;
}
}
+
+ WrapperType create_map_wrapper(const DataTypePtr& from_type, const
DataTypeMap& to_type) const {
+ switch (from_type->get_type_id()) {
+ case TypeIndex::String:
+ return &ConvertImplGenericFromString<ColumnString>::execute;
+ default:
+ return create_unsupport_wrapper(from_type->get_name(),
to_type.get_name());
+ }
+ }
+
// check struct value type and get to_type value
// TODO: need handle another type to cast struct
WrapperType create_struct_wrapper(const DataTypePtr& from_type,
@@ -1727,6 +1737,8 @@ private:
static_cast<const
DataTypeArray&>(*to_type));
case TypeIndex::Struct:
return create_struct_wrapper(from_type, static_cast<const
DataTypeStruct&>(*to_type));
+ case TypeIndex::Map:
+ return create_map_wrapper(from_type, static_cast<const
DataTypeMap&>(*to_type));
default:
break;
}
diff --git a/be/src/vec/olap/olap_data_convertor.h
b/be/src/vec/olap/olap_data_convertor.h
index 0efd5c1bd8..b23ba5ee5f 100644
--- a/be/src/vec/olap/olap_data_convertor.h
+++ b/be/src/vec/olap/olap_data_convertor.h
@@ -413,7 +413,6 @@ private:
Status convert_to_olap() override;
const void* get_data() const override { return _results.data(); };
-
const void* get_data_at(size_t offset) const override {
LOG(FATAL) << "now not support get_data_at for
OlapColumnDataConvertorMap";
};
diff --git a/be/src/vec/runtime/vfile_result_writer.cpp
b/be/src/vec/runtime/vfile_result_writer.cpp
index 2782008b34..16d48bde08 100644
--- a/be/src/vec/runtime/vfile_result_writer.cpp
+++ b/be/src/vec/runtime/vfile_result_writer.cpp
@@ -342,6 +342,10 @@ Status VFileResultWriter::_write_csv_file(const Block&
block) {
_plain_text_outstream << col.type->to_string(*col.column,
i);
break;
}
+ case TYPE_MAP: {
+ _plain_text_outstream << col.type->to_string(*col.column,
i);
+ break;
+ }
default: {
// not supported type, like BITMAP, just export null
_plain_text_outstream << NULL_IN_CSV;
diff --git a/be/src/vec/sink/vmysql_result_writer.cpp
b/be/src/vec/sink/vmysql_result_writer.cpp
index f74c3f94a4..bade808912 100644
--- a/be/src/vec/sink/vmysql_result_writer.cpp
+++ b/be/src/vec/sink/vmysql_result_writer.cpp
@@ -195,6 +195,12 @@ Status
VMysqlResultWriter<is_binary_format>::_add_one_column(
return Status::InternalError("pack mysql buffer failed.");
}
+ if constexpr (is_nullable) {
+ if (column_ptr->is_null_at(i)) {
+ buf_ret = rows_buffer[i].push_null();
+ continue;
+ }
+ }
rows_buffer[i].open_dynamic_mode();
std::string cell_str = map_type.to_string(*column, i);
buf_ret = rows_buffer[i].push_string(cell_str.c_str(),
strlen(cell_str.c_str()));
diff --git a/regression-test/data/export/test_map_export.out
b/regression-test/data/export/test_map_export.out
new file mode 100644
index 0000000000..0e0cfc3f7c
--- /dev/null
+++ b/regression-test/data/export/test_map_export.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+1 \N
+2 {}
+3 {' 33,amory ':2, ' bet ':20, ' cler ':26}
+4 {'k3':23, null:20, 'k4':null}
+5 {null:null}
+
diff --git a/regression-test/data/load/insert/test_map_dml.out
b/regression-test/data/load/insert/test_map_dml.out
new file mode 100644
index 0000000000..446b0a9c59
--- /dev/null
+++ b/regression-test/data/load/insert/test_map_dml.out
@@ -0,0 +1,8 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+1 {' amory ':6, 'happy':38}
+6 {'amory':6, 'is':38, 'cl':0}
+
+-- !select --
+100 {1:'1', 2:'2', 3:'3'} {32767:'32767', 32768:'32768', 32769:'32769'}
[65534, 65535, 65536] {2022-07-13:1} {2022-07-13 12:30:00:'2022-07-13
12:30:00'} {0.33:33, 0.67:67}
+
diff --git a/regression-test/data/load_p0/stream_load/test_map.csv
b/regression-test/data/load_p0/stream_load/test_map.csv
new file mode 100644
index 0000000000..cbace425cc
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_map.csv
@@ -0,0 +1,15 @@
+1 \N
+2 {" 11amory ":23, "beat":20, " clever ": 66}
+3 {"k1": 31, "k2": 300}
+4 {}
+5 \N
+6 {"k1":41, "k2": 400}
+7 {" 33,amory ":2, " bet ":20, " cler ": 26}
+8 {}
+9 {' 1,amy ':2, " k2 ":90, " k7 ": 33}
+10 {}
+11 {"k1': 4, "k2": 400}
+12 {"k3":23, null: 20, "k4": null}
+13 {"null":1}
+15 {:2, "k2":}
+16 {null:null}
diff --git
a/regression-test/data/load_p0/stream_load/test_map_load_and_function.out
b/regression-test/data/load_p0/stream_load/test_map_load_and_function.out
new file mode 100644
index 0000000000..1b7eb1f5a3
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_map_load_and_function.out
@@ -0,0 +1,37 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+1 \N
+2 {' 11amory ':23, 'beat':20, ' clever ':66}
+3 {'k1':31, 'k2':300}
+4 {}
+5 \N
+6 {'k1':41, 'k2':400}
+7 {' 33,amory ':2, ' bet ':20, ' cler ':26}
+8 {}
+9 {' 1,amy ':2, ' k2 ':90, ' k7 ':33}
+10 {}
+11 \N
+12 {'k3':23, null:20, 'k4':null}
+13 {'null':1}
+15 {'':2, 'k2':0}
+16 {null:null}
+
+-- !select --
+\N
+\N
+300
+\N
+\N
+400
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+\N
+130
+0
+\N
+
diff --git a/regression-test/data/map_p0/test_map_dml.out
b/regression-test/data/map_p0/test_map_dml.out
new file mode 100644
index 0000000000..531ce9ac2e
--- /dev/null
+++ b/regression-test/data/map_p0/test_map_dml.out
@@ -0,0 +1,11 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+1 {' amory ':6, 'happy':38}
+6 {'amory':6, 'is':38, 'cl':0}
+
+-- !test --
+100 {1:'1', 2:'2', 3:'3'} {32767:'32767', 32768:'32768', 32769:'32769'}
[65534, 65535, 65536] {2022-07-13:1} {2022-07-13 12:30:00:'2022-07-13
12:30:00'} {0.33:33, 0.67:67}
+
+-- !select --
+100 {1:'1', 2:'2', 3:'3'} {32767:'32767', 32768:'32768', 32769:'32769'}
[65534, 65535, 65536] {2022-07-13:1} {2022-07-13 12:30:00:'2022-07-13
12:30:00'} {0.33:33, 0.67:67}
+
diff --git a/regression-test/data/query_p0/show/test_map_show_create.out
b/regression-test/data/query_p0/show/test_map_show_create.out
new file mode 100644
index 0000000000..78b706ca7d
--- /dev/null
+++ b/regression-test/data/query_p0/show/test_map_show_create.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+test_map_show_create CREATE TABLE `test_map_show_create` (\n `k1` int(11)
NULL,\n `k2` MAP<smallint(6),text> NULL,\n `k3` MAP<int(11),text> NULL,\n
`k4` MAP<date,int(11)> NULL,\n `k5` MAP<datetime,text> NULL,\n `k6`
MAP<float,int(11)> NULL\n) ENGINE=OLAP\nDUPLICATE KEY(`k1`)\nCOMMENT
'OLAP'\nDISTRIBUTED BY HASH(`k1`) BUCKETS 1\nPROPERTIES
(\n"replication_allocation" = "tag.location.default: 1",\n"in_memory" =
"false",\n"storage_format" = "V2",\n"light_schema_change" = "true",\n"dis [...]
+
diff --git a/regression-test/suites/export/test_map_export.groovy
b/regression-test/suites/export/test_map_export.groovy
new file mode 100644
index 0000000000..9084a0a85d
--- /dev/null
+++ b/regression-test/suites/export/test_map_export.groovy
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_map_export", "export") {
+ // check whether the FE config 'enable_outfile_to_local' is true
+ StringBuilder strBuilder = new StringBuilder()
+ strBuilder.append("curl --location-trusted -u " + context.config.jdbcUser
+ ":" + context.config.jdbcPassword)
+ strBuilder.append(" http://" + context.config.feHttpAddress +
"/rest/v1/config/fe")
+
+ String command = strBuilder.toString()
+ def process = command.toString().execute()
+ def code = process.waitFor()
+ def err = IOGroovyMethods.getText(new BufferedReader(new
InputStreamReader(process.getErrorStream())));
+ def out = process.getText()
+ logger.info("Request FE Config: code=" + code + ", out=" + out + ", err="
+ err)
+ assertEquals(code, 0)
+ def response = parseJson(out.trim())
+ assertEquals(response.code, 0)
+ assertEquals(response.msg, "success")
+ def configJson = response.data.rows
+ boolean enableOutfileToLocal = false
+ for (Object conf: configJson) {
+ assert conf instanceof Map
+ if (((Map<String, String>) conf).get("Name").toLowerCase() ==
"enable_outfile_to_local") {
+ enableOutfileToLocal = ((Map<String, String>)
conf).get("Value").toLowerCase() == "true"
+ }
+ }
+ if (!enableOutfileToLocal) {
+ logger.warn("Please set enable_outfile_to_local to true to run
test_outfile")
+ return
+ }
+
+ // define the table
+ def testTable = "tbl_test_map"
+
+ sql "DROP TABLE IF EXISTS ${testTable}"
+ sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${testTable} (
+ id INT,
+ m Map<STRING, INT>
+ )
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 10
+ PROPERTIES("replication_num" = "1");
+ """
+
+ // make data
+ sql """ INSERT INTO ${testTable} VALUES (1, NULL); """
+ sql """ INSERT INTO ${testTable} VALUES (2, {}); """
+ sql """ INSERT INTO ${testTable} VALUES (3, {" 33,amory ":2, " bet ":20,
" cler ": 26}); """
+ sql """ INSERT INTO ${testTable} VALUES (4, {"k3":23, null: 20, "k4":
null}); """
+ sql """ INSERT INTO ${testTable} VALUES (5, {null:null}); """
+
+ // check result
+ qt_select """ SELECT * FROM ${testTable} ORDER BY id; """
+
+ def outFilePath = """${context.file.parent}/tmp"""
+ logger.info("test_map_export the outFilePath=" + outFilePath)
+ // map select into outfile
+ try {
+ File path = new File(outFilePath)
+ if (!path.exists()) {
+ assert path.mkdirs()
+ } else {
+ throw new IllegalStateException("""${outFilePath} already exists!
""")
+ }
+ sql """
+ SELECT * FROM ${testTable} ORDER BY id INTO OUTFILE
"file://${outFilePath}/";
+ """
+ File[] files = path.listFiles()
+ assert files.length == 1
+
+ List<String> outLines =
Files.readAllLines(Paths.get(files[0].getAbsolutePath()),
StandardCharsets.UTF_8)
+ assert outLines.size() == 5
+ for (int r = 0; r < outLines.size(); r++) {
+ String[] outLine = outLines.get(r).split("\t")
+ assert outLine.size() == 2
+ // check NULL
+ if (outLine[0] == 1) {
+ assert outLine[1] == "\\N"
+ }
+ // check empty
+ if (outLine[0] == 2) {
+ assert outLine[1] == "{}"
+ }
+ // check key contains ','
+ if (outLine[0] == 3) {
+ assert outLine[1] == "{\" 33,amory \":2, \" bet \":20, \"
cler \": 26}"
+ }
+ // check key val NULL
+ if (outLine[0] == 4) {
+ assert outLine[1] == "{\"k3\":23, null: 20, \"k4\": null}"
+ }
+ // check key val empty
+ if (outLine[0] == 5) {
+ assert outLine[1] == "{null:null}"
+ }
+ }
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${testTable}")
+ File path = new File(outFilePath)
+ if (path.exists()) {
+ for (File f : path.listFiles()) {
+ f.delete();
+ }
+ path.delete();
+ }
+ }
+}
diff --git a/regression-test/suites/load/insert/test_map_dml.groovy
b/regression-test/suites/load/insert/test_map_dml.groovy
new file mode 100644
index 0000000000..438bd0b496
--- /dev/null
+++ b/regression-test/suites/load/insert/test_map_dml.groovy
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_map_dml", "load") {
+ // define a sql table
+ def testTable = "tbl_test_map_string_int"
+ def testTable01 = "tbl_test_map_normal"
+
+ sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')"
+
+ def create_test_table = {testTablex ->
+ def result1 = sql """
+ CREATE TABLE IF NOT EXISTS ${testTable} (
+ `k1` INT(11) NULL COMMENT "",
+ `k2` Map<STRING, INT> NULL COMMENT ""
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`)
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ )
+ """
+
+ // DDL/DML return 1 row and 3 column, the only value is update row
count
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+
+ // insert 1 row to check whether the table is ok
+ def result2 = sql "INSERT INTO ${testTable} VALUES (6, {'amory': 6,
'is': 38, 'cl': 0})"
+ assertTrue(result2.size() == 1)
+ assertTrue(result2[0].size() == 1)
+ assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+ }
+
+ def create_test_table01 = {testTablez ->
+ def result1 = sql """
+ CREATE TABLE IF NOT EXISTS ${testTable01} (
+ `k1` int(11) NULL,
+ `k2` Map<smallint(6), string> NULL,
+ `k3` Map<int(11), string> NULL,
+ `k4` array<bigint(20)> NULL,
+ `k5` Map<date, int> NULL,
+ `k6` Map<datetime, string> NULL,
+ `k7` Map<float, int> NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2",
+ "disable_auto_compaction" = "false"
+ )
+ """
+
+ // DDL/DML return 1 row and 3 column, the only value is update row
count
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+
+
+ def result2 = sql """ INSERT INTO ${testTable01} VALUES (100, {1: '1',
2: '2', 3:'3'}, {32767: '32767', 32768: '32768', 32769: '32769'},
+ [65534, 65535, 65536], {'2022-07-13': 1}, {'2022-07-13
12:30:00': '2022-07-13 12:30:00'},
+ {0.33: 33, 0.67: 67})
+ """
+
+ assertTrue(result2.size() == 1)
+ assertTrue(result2[0].size() == 1)
+ assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+ }
+
+
+ // case1: string_int for map
+ try {
+ def res = sql "DROP TABLE IF EXISTS ${testTable}"
+ create_test_table.call(testTable)
+ sql "INSERT INTO ${testTable} VALUES (1, {' amory ': 6, 'happy': 38})"
+
+ // select the table and check whether the data is correct
+ qt_select "SELECT * FROM ${testTable} ORDER BY k1"
+
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${testTable}")
+ }
+
+ // case2: normal key val type for map
+ try {
+ def res = sql "DROP TABLE IF EXISTS ${testTable01}"
+
+ create_test_table01.call(testTable)
+ // select the table and check whether the data is correct
+ qt_select "SELECT * FROM ${testTable01} ORDER BY k1"
+
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${testTable01}")
+ }
+}
diff --git
a/regression-test/suites/load_p0/stream_load/test_map_load_and_function.groovy
b/regression-test/suites/load_p0/stream_load/test_map_load_and_function.groovy
new file mode 100644
index 0000000000..d796c08b2e
--- /dev/null
+++
b/regression-test/suites/load_p0/stream_load/test_map_load_and_function.groovy
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_map_load_and_function", "p0") {
+ // define a sql table
+ def testTable = "tbl_test_map"
+ def dataFile = "test_map.csv"
+
+ sql "DROP TABLE IF EXISTS ${testTable}"
+ sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${testTable} (
+ id INT,
+ m Map<STRING, INT>
+ )
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 10
+ PROPERTIES("replication_num" = "1");
+ """
+
+ // load the map data from csv file
+ streamLoad {
+ table testTable
+
+ file dataFile // import csv file
+ time 10000 // limit inflight 10s
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals("OK", json.Message)
+ assertEquals(15, json.NumberTotalRows)
+ assertTrue(json.LoadBytes > 0)
+
+ }
+ }
+
+ // check result
+ qt_select "SELECT * FROM ${testTable} ORDER BY id"
+
+ // insert into valid json rows
+ sql """INSERT INTO ${testTable} VALUES(12, NULL)"""
+ sql """INSERT INTO ${testTable} VALUES(13, {"k1":100, "k2": 130})"""
+
+ // map element_at
+ qt_select "SELECT m['k2'] FROM ${testTable} ORDER BY id"
+}
diff --git a/regression-test/suites/query_p0/show/test_map_show_create.groovy
b/regression-test/suites/query_p0/show/test_map_show_create.groovy
new file mode 100644
index 0000000000..463cb2babc
--- /dev/null
+++ b/regression-test/suites/query_p0/show/test_map_show_create.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_map_show_create", "query") {
+ // define a sql table
+ def testTable = "test_map_show_create"
+
+ def create_test_table = {testTablex ->
+ def result1 = sql """
+ CREATE TABLE IF NOT EXISTS ${testTable} (
+ `k1` INT(11) NULL,
+ `k2` MAP<SMALLINT(6), STRING> NULL,
+ `k3` MAP<INT(11), STRING> NULL,
+ `k4` MAP<DATE, INT> NULL,
+ `k5` MAP<DATETIME, STRING> NULL,
+ `k6` Map<FLOAT, INT> NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2",
+ "disable_auto_compaction" = "false"
+ )
+ """
+
+ // DDL/DML return 1 row and 3 column, the only value is update row
count
+ assertTrue(result1.size() == 1)
+ assertTrue(result1[0].size() == 1)
+ assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
+
+ // insert 1 row to check whether the table is ok
+ def result2 = sql """ INSERT INTO ${testTable} VALUES (100, {1: '1',
2: '2', 3:'3'},
+ {32767: '32767', 32768: '32768', 32769: '32769'},
{'2022-07-13': 1},
+ {'2022-07-13 12:30:00': '2022-07-13 12:30:00'}, {0.33:
33, 0.67: 67})
+ """
+ assertTrue(result2.size() == 1)
+ assertTrue(result2[0].size() == 1)
+ assertTrue(result2[0][0] == 1, "Insert should update 1 rows")
+ }
+
+ try {
+ sql "DROP TABLE IF EXISTS ${testTable}"
+ sql "ADMIN SET FRONTEND CONFIG ('enable_map_type' = 'true')"
+
+ create_test_table.call(testTable)
+
+ qt_select "SHOW CREATE TABLE ${testTable}"
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${testTable}")
+ }
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]