This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new e6a722613d6 [fix](cases)fix shrink columns (#25936)
e6a722613d6 is described below
commit e6a722613d63dc4123a8a97a88b9de37f7e38ea3
Author: amory <[email protected]>
AuthorDate: Thu Oct 26 13:18:51 2023 +0800
[fix](cases)fix shrink columns (#25936)
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 40 ++++++++++++++-------
be/src/olap/rowset/segment_v2/segment_iterator.h | 1 +
be/src/vec/columns/column.h | 2 ++
be/src/vec/columns/column_map.cpp | 21 +++++++++++
be/src/vec/columns/column_map.h | 4 ++-
be/src/vec/columns/column_nullable.h | 2 ++
be/src/vec/columns/column_struct.cpp | 15 ++++++++
be/src/vec/columns/column_struct.h | 4 +++
regression-test/data/export/test_struct_export.out | Bin 469 -> 442 bytes
.../data/insert_p0/test_struct_insert.out | Bin 444 -> 417 bytes
.../data/load_p0/stream_load/map_char_test.csv | 4 +++
.../data/load_p0/stream_load/test_stream_load.out | 6 ++++
.../aggregate/aggregate_group_by_metric_type.out | Bin 170 -> 154 bytes
.../struct_functions/test_struct_functions.out | Bin 1376 -> 1294 bytes
.../load_p0/stream_load/test_stream_load.groovy | 40 ++++++++++++++++++++-
15 files changed, 124 insertions(+), 15 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index bd2b9df914c..ce04b05d4b4 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1498,24 +1498,38 @@ bool
SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) {
}
}
+bool SegmentIterator::_has_char_type(const Field& column_desc) {
+ switch (column_desc.type()) {
+ case FieldType::OLAP_FIELD_TYPE_CHAR:
+ return true;
+ case FieldType::OLAP_FIELD_TYPE_ARRAY:
+ return _has_char_type(*column_desc.get_sub_field(0));
+ case FieldType::OLAP_FIELD_TYPE_MAP:
+ return _has_char_type(*column_desc.get_sub_field(0)) ||
+ _has_char_type(*column_desc.get_sub_field(1));
+ case FieldType::OLAP_FIELD_TYPE_STRUCT:
+ for (int idx = 0; idx < column_desc.get_sub_field_count(); ++idx) {
+ if (_has_char_type(*column_desc.get_sub_field(idx))) {
+ return true;
+ }
+ }
+ return false;
+ default:
+ return false;
+ }
+};
+
void SegmentIterator::_vec_init_char_column_id() {
for (size_t i = 0; i < _schema->num_column_ids(); i++) {
auto cid = _schema->column_id(i);
- auto column_desc = _schema->column(cid);
+ const Field* column_desc = _schema->column(cid);
- do {
- if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
- _char_type_idx.emplace_back(i);
- if (i != 0) {
- _char_type_idx_no_0.emplace_back(i);
- }
- break;
- } else if (column_desc->type() !=
FieldType::OLAP_FIELD_TYPE_ARRAY) {
- break;
+ if (_has_char_type(*column_desc)) {
+ _char_type_idx.emplace_back(i);
+ if (i != 0) {
+ _char_type_idx_no_0.emplace_back(i);
}
- // for Array<Char> or Array<Array<Char>>
- column_desc = column_desc->get_sub_field(0);
- } while (column_desc != nullptr);
+ }
}
}
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h
b/be/src/olap/rowset/segment_v2/segment_iterator.h
index d53ad9d62b0..33d3a3f5f9c 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -201,6 +201,7 @@ private:
// CHAR type in storage layer padding the 0 in length. But query engine
need ignore the padding 0.
// so segment iterator need to shrink char column before output it. only
use in vec query engine.
void _vec_init_char_column_id();
+ bool _has_char_type(const Field& column_desc);
uint32_t segment_id() const { return _segment->id(); }
uint32_t num_rows() const { return _segment->num_rows(); }
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index df4c66fa14c..139a23f7935 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -670,6 +670,8 @@ public:
virtual bool is_column_map() const { return false; }
+ virtual bool is_column_struct() const { return false; }
+
/// If the only value column can contain is NULL.
virtual bool only_null() const { return false; }
diff --git a/be/src/vec/columns/column_map.cpp
b/be/src/vec/columns/column_map.cpp
index f10890b0093..9a550097af9 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -442,6 +442,27 @@ void ColumnMap::replicate(const uint32_t* indexs, size_t
target_size, IColumn& c
->replicate(indexs, target_size,
res.values_column->assume_mutable_ref());
}
+MutableColumnPtr ColumnMap::get_shrinked_column() {
+ MutableColumns new_columns(2);
+
+ if (keys_column->is_column_string() || keys_column->is_column_array() ||
+ keys_column->is_column_map() || keys_column->is_column_struct()) {
+ new_columns[0] = keys_column->get_shrinked_column();
+ } else {
+ new_columns[0] = keys_column->get_ptr();
+ }
+
+ if (values_column->is_column_string() || values_column->is_column_array()
||
+ values_column->is_column_map() || values_column->is_column_struct()) {
+ new_columns[1] = values_column->get_shrinked_column();
+ } else {
+ new_columns[1] = values_column->get_ptr();
+ }
+
+ return ColumnMap::create(new_columns[0]->assume_mutable(),
new_columns[1]->assume_mutable(),
+ offsets_column->assume_mutable());
+}
+
void ColumnMap::reserve(size_t n) {
get_offsets().reserve(n);
keys_column->reserve(n);
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index 0c09f0a9dae..f8bc8a4812d 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -36,6 +36,8 @@
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_impl.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_struct.h"
#include "vec/columns/column_vector.h"
#include "vec/common/assert_cast.h"
#include "vec/common/cow.h"
@@ -110,7 +112,7 @@ public:
const char* deserialize_and_insert_from_arena(const char* pos) override;
void update_hash_with_value(size_t n, SipHash& hash) const override;
-
+ MutableColumnPtr get_shrinked_column() override;
ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const
override;
size_t filter(const Filter& filter) override;
ColumnPtr permute(const Permutation& perm, size_t limit) const override;
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index 465e7dc2d96..4350bead62d 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -262,6 +262,8 @@ public:
bool is_column_decimal() const override { return
get_nested_column().is_column_decimal(); }
bool is_column_string() const override { return
get_nested_column().is_column_string(); }
bool is_column_array() const override { return
get_nested_column().is_column_array(); }
+ bool is_column_map() const override { return
get_nested_column().is_column_map(); }
+ bool is_column_struct() const override { return
get_nested_column().is_column_struct(); }
bool is_fixed_and_contiguous() const override { return false; }
bool values_have_fixed_size() const override { return
nested_column->values_have_fixed_size(); }
diff --git a/be/src/vec/columns/column_struct.cpp
b/be/src/vec/columns/column_struct.cpp
index 88391370398..a2d1eae5709 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -293,6 +293,21 @@ void ColumnStruct::replicate(const uint32_t* indexs,
size_t target_size, IColumn
}
}
+MutableColumnPtr ColumnStruct::get_shrinked_column() {
+ const size_t tuple_size = columns.size();
+ MutableColumns new_columns(tuple_size);
+
+ for (size_t i = 0; i < tuple_size; ++i) {
+ if (columns[i]->is_column_string() || columns[i]->is_column_array() ||
+ columns[i]->is_column_map() || columns[i]->is_column_struct()) {
+ new_columns[i] = columns[i]->get_shrinked_column();
+ } else {
+ new_columns[i] = columns[i]->get_ptr();
+ }
+ }
+ return ColumnStruct::create(std::move(new_columns));
+}
+
MutableColumns ColumnStruct::scatter(ColumnIndex num_columns, const Selector&
selector) const {
const size_t tuple_size = columns.size();
std::vector<MutableColumns> scattered_tuple_elements(tuple_size);
diff --git a/be/src/vec/columns/column_struct.h
b/be/src/vec/columns/column_struct.h
index 9d219c3c989..db9633f6032 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -82,6 +82,7 @@ public:
}
std::string get_name() const override;
+ bool is_column_struct() const override { return true; }
const char* get_family_name() const override { return "Struct"; }
bool can_be_inside_nullable() const override { return true; }
MutableColumnPtr clone_empty() const override;
@@ -160,6 +161,9 @@ public:
LOG(FATAL) << "compare_at not implemented";
}
void get_extremes(Field& min, Field& max) const override;
+
+ MutableColumnPtr get_shrinked_column() override;
+
void reserve(size_t n) override;
void resize(size_t n) override;
size_t byte_size() const override;
diff --git a/regression-test/data/export/test_struct_export.out
b/regression-test/data/export/test_struct_export.out
index 0dc07fa7cb6..21141b417d3 100644
Binary files a/regression-test/data/export/test_struct_export.out and
b/regression-test/data/export/test_struct_export.out differ
diff --git a/regression-test/data/insert_p0/test_struct_insert.out
b/regression-test/data/insert_p0/test_struct_insert.out
index 2c202a651e7..7bd960b0e3d 100644
Binary files a/regression-test/data/insert_p0/test_struct_insert.out and
b/regression-test/data/insert_p0/test_struct_insert.out differ
diff --git a/regression-test/data/load_p0/stream_load/map_char_test.csv
b/regression-test/data/load_p0/stream_load/map_char_test.csv
new file mode 100644
index 00000000000..4b545d1ea36
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/map_char_test.csv
@@ -0,0 +1,4 @@
+1 {1:"1", 2:"22", 3:"333", 4:"4444", 5:"55555", 6:"666666", 7:"7777777"}
+3 {1:"11", 2:"22", 3:"33", 4:"44", 5:"55", 6:"66", 7:"77"}
+2 {1:"1", 2:"2", 3:"3", 4:"4", 5:"5", 6:"6", 7:"7"}
+4 {1:"111", 2:"22", 3:"333", 4:"444", 5:"55", 6:"66", 7:"777"}
diff --git a/regression-test/data/load_p0/stream_load/test_stream_load.out
b/regression-test/data/load_p0/stream_load/test_stream_load.out
index 8816fce94b8..505b3110cf2 100644
--- a/regression-test/data/load_p0/stream_load/test_stream_load.out
+++ b/regression-test/data/load_p0/stream_load/test_stream_load.out
@@ -6,6 +6,12 @@
-- !sql1 --
2019 9 9 9 7.700 a 2019-09-09
1970-01-01T08:33:39 k7 9.0 9.0
+-- !map11 --
+1 {1:"1", 2:"22", 3:"333", 4:"4444", 5:"55555", 6:"666666", 7:"7777777"}
+2 {1:"1", 2:"2", 3:"3", 4:"4", 5:"5", 6:"6", 7:"7"}
+3 {1:"11", 2:"22", 3:"33", 4:"44", 5:"55", 6:"66", 7:"77"}
+4 {1:"111", 2:"22", 3:"333", 4:"444", 5:"55", 6:"66", 7:"777"}
+
-- !all11 --
2500
diff --git
a/regression-test/data/query_p0/aggregate/aggregate_group_by_metric_type.out
b/regression-test/data/query_p0/aggregate/aggregate_group_by_metric_type.out
index 32cdb5a79e9..d37e5e62bb0 100644
Binary files
a/regression-test/data/query_p0/aggregate/aggregate_group_by_metric_type.out
and
b/regression-test/data/query_p0/aggregate/aggregate_group_by_metric_type.out
differ
diff --git
a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out
b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out
index ffce5c3e196..fb744aec56d 100644
Binary files
a/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out
and
b/regression-test/data/query_p0/sql_functions/struct_functions/test_struct_functions.out
differ
diff --git a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
index d9cf9c72938..0884330f2d3 100644
--- a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
@@ -191,6 +191,8 @@ suite("test_stream_load", "p0") {
def tableName7 = "test_unique_key_with_delete"
def tableName8 = "test_array"
def tableName10 = "test_struct"
+ def tableName11 = "test_map"
+
sql """ DROP TABLE IF EXISTS ${tableName3} """
sql """ DROP TABLE IF EXISTS ${tableName4} """
sql """ DROP TABLE IF EXISTS ${tableName5} """
@@ -198,6 +200,7 @@ suite("test_stream_load", "p0") {
sql """ DROP TABLE IF EXISTS ${tableName7} """
sql """ DROP TABLE IF EXISTS ${tableName8} """
sql """ DROP TABLE IF EXISTS ${tableName10} """
+ sql """ DROP TABLE IF EXISTS ${tableName11} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName3} (
`k1` int(11) NULL,
@@ -281,7 +284,7 @@ suite("test_stream_load", "p0") {
`k4` ARRAY<BIGINT> NULL COMMENT "",
`k5` ARRAY<CHAR> NULL COMMENT "",
`k6` ARRAY<VARCHAR(20)> NULL COMMENT "",
- `k7` ARRAY<DATE> NULL COMMENT "",
+ `k7` ARRAY<DATE> NULL COMMENT "",
`k8` ARRAY<DATETIME> NULL COMMENT "",
`k9` ARRAY<FLOAT> NULL COMMENT "",
`k10` ARRAY<DOUBLE> NULL COMMENT "",
@@ -316,6 +319,41 @@ suite("test_stream_load", "p0") {
);
"""
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName11} (
+ `k1` int(11) NULL,
+ `k2` map<int, char(7)> NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k1`)
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 3
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ // load map with specific-length char with non-specific-length data
+ streamLoad {
+ table "${tableName11}"
+
+ set 'column_separator', '\t'
+
+ file 'map_char_test.csv'
+ time 10000 // limit inflight 10s
+
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals(4, json.NumberTotalRows)
+ assertEquals(0, json.NumberFilteredRows)
+ }
+ }
+ sql "sync"
+ order_qt_map11 "SELECT * FROM ${tableName11} order by k1"
+
// load all columns
streamLoad {
table "${tableName3}"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]