This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch nested_column_prune
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/nested_column_prune by this
push:
new 7e7ad8ad500 [fix] Adapt to the pruned data type
7e7ad8ad500 is described below
commit 7e7ad8ad5005aaf543c0649bb82b3a68a3d83e63
Author: Hu Shenggang <[email protected]>
AuthorDate: Fri Oct 24 10:04:09 2025 +0800
[fix] Adapt to the pruned data type
---
be/src/olap/rowset/segment_v2/column_reader.cpp | 106 ++++++++++++++++-----
be/src/olap/rowset/segment_v2/column_reader.h | 42 +++++++-
be/src/olap/rowset/segment_v2/segment.cpp | 1 +
be/src/olap/tablet_schema.cpp | 16 ++++
be/src/olap/tablet_schema.h | 10 ++
be/src/vec/exec/scan/olap_scanner.cpp | 7 +-
.../olap/rowset/segment_v2/column_reader_test.cpp | 11 +--
.../complex_types/test_pruned_columns.out | 24 +++++
.../complex_types/test_pruned_columns.groovy | 61 ++++++++++++
9 files changed, 245 insertions(+), 33 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 48915887a79..fff3c6c3f42 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -938,30 +938,16 @@ Result<TColumnAccessPaths>
ColumnIterator::_get_sub_access_paths(
}
if (name_path.path[0] != _column_name) {
- if (typeid_cast<ArrayFileColumnIterator*>(this) != nullptr) {
- if (name_path.path[0] != "*") {
- return ResultError(Status::InternalError(
- R"(Invalid access path for array column: expected
name "{}", got "{}")",
- _column_name, name_path.path[0]));
- }
- } else if (typeid_cast<MapFileColumnIterator*>(this) != nullptr) {
- if (name_path.path[0] != "KEYS" && name_path.path[0] !=
"VALUES" &&
- name_path.path[0] != "*") {
- return ResultError(Status::InternalError(
- R"(Invalid access path for map column: expected
name "{}", got "{}")",
- _column_name, name_path.path[0]));
- }
- } else {
- return ResultError(Status::InternalError(
- R"(Invalid access path for column: expected name "{}",
got "{}")",
- _column_name, name_path.path[0]));
- }
+ return ResultError(Status::InternalError(
+ R"(Invalid access path for column: expected name "{}", got
"{}")", _column_name,
+ name_path.path[0]));
}
name_path.path.erase(name_path.path.begin());
if (!name_path.path.empty()) {
++it;
} else {
+ set_need_to_read();
it = sub_access_paths.name_access_paths.erase(it);
}
}
@@ -1092,6 +1078,17 @@ Status MapFileColumnIterator::read_by_rowids(const
rowid_t* rowids, const size_t
return Status::OK();
}
+void MapFileColumnIterator::set_need_to_read() {
+ set_reading_flag(ReadingFlag::NEED_TO_READ);
+ _key_iterator->set_need_to_read();
+ _val_iterator->set_need_to_read();
+}
+
+void MapFileColumnIterator::remove_pruned_sub_iterators() {
+ _key_iterator->remove_pruned_sub_iterators();
+ _val_iterator->remove_pruned_sub_iterators();
+}
+
Status MapFileColumnIterator::set_access_paths(const TColumnAccessPaths&
all_access_paths,
const TColumnAccessPaths&
predicate_access_paths) {
if (all_access_paths.name_access_paths.empty()) {
@@ -1099,7 +1096,7 @@ Status MapFileColumnIterator::set_access_paths(const
TColumnAccessPaths& all_acc
}
if (!predicate_access_paths.name_access_paths.empty()) {
- _reading_flag = ReadingFlag::READING_FOR_PREDICATE;
+ set_reading_flag(ReadingFlag::READING_FOR_PREDICATE);
LOG(INFO) << "Map column iterator set sub-column " << _column_name
<< " to READING_FOR_PREDICATE";
}
@@ -1116,31 +1113,40 @@ Status MapFileColumnIterator::set_access_paths(const
TColumnAccessPaths& all_acc
TColumnAccessPaths key_predicate_access_paths;
TColumnAccessPaths val_predicate_access_paths;
- for (const auto& paths : sub_all_access_paths.name_access_paths) {
+ for (auto paths : sub_all_access_paths.name_access_paths) {
if (paths.path[0] == "*") {
+ paths.path[0] = _key_iterator->column_name();
key_all_access_paths.name_access_paths.push_back(paths);
+ paths.path[0] = _val_iterator->column_name();
val_all_access_paths.name_access_paths.push_back(paths);
} else if (paths.path[0] == "KEYS") {
+ paths.path[0] = _key_iterator->column_name();
key_all_access_paths.name_access_paths.push_back(paths);
} else if (paths.path[0] == "VALUES") {
+ paths.path[0] = _val_iterator->column_name();
val_all_access_paths.name_access_paths.push_back(paths);
}
}
const auto need_read_keys =
!key_all_access_paths.name_access_paths.empty();
const auto need_read_values =
!val_all_access_paths.name_access_paths.empty();
- for (const auto& paths : sub_predicate_access_paths.name_access_paths) {
+ for (auto paths : sub_predicate_access_paths.name_access_paths) {
if (paths.path[0] == "*") {
+ paths.path[0] = _key_iterator->column_name();
key_predicate_access_paths.name_access_paths.push_back(paths);
+ paths.path[0] = _val_iterator->column_name();
val_predicate_access_paths.name_access_paths.push_back(paths);
} else if (paths.path[0] == "KEYS") {
+ paths.path[0] = _key_iterator->column_name();
key_predicate_access_paths.name_access_paths.push_back(paths);
} else if (paths.path[0] == "VALUES") {
+ paths.path[0] = _val_iterator->column_name();
val_predicate_access_paths.name_access_paths.push_back(paths);
}
}
if (need_read_keys) {
+ _key_iterator->set_reading_flag(ReadingFlag::NEED_TO_READ);
RETURN_IF_ERROR(
_key_iterator->set_access_paths(key_all_access_paths,
key_predicate_access_paths));
} else {
@@ -1149,6 +1155,7 @@ Status MapFileColumnIterator::set_access_paths(const
TColumnAccessPaths& all_acc
}
if (need_read_values) {
+ _val_iterator->set_reading_flag(ReadingFlag::NEED_TO_READ);
RETURN_IF_ERROR(
_val_iterator->set_access_paths(val_all_access_paths,
val_predicate_access_paths));
} else {
@@ -1259,6 +1266,27 @@ Status StructFileColumnIterator::read_by_rowids(const
rowid_t* rowids, const siz
return Status::OK();
}
+void StructFileColumnIterator::set_need_to_read() {
+ set_reading_flag(ReadingFlag::NEED_TO_READ);
+ for (auto& sub_iterator : _sub_column_iterators) {
+ sub_iterator->set_need_to_read();
+ }
+}
+
+void StructFileColumnIterator::remove_pruned_sub_iterators() {
+ for (auto it = _sub_column_iterators.begin(); it !=
_sub_column_iterators.end();) {
+ auto& sub_iterator = *it;
+ if (sub_iterator->reading_flag() == ReadingFlag::SKIP_READING) {
+ DLOG(INFO) << "Struct column iterator remove pruned sub-column "
+ << sub_iterator->column_name();
+ it = _sub_column_iterators.erase(it);
+ } else {
+ sub_iterator->remove_pruned_sub_iterators();
+ ++it;
+ }
+ }
+}
+
Status StructFileColumnIterator::set_access_paths(
const TColumnAccessPaths& all_access_paths,
const TColumnAccessPaths& predicate_access_paths) {
@@ -1267,7 +1295,7 @@ Status StructFileColumnIterator::set_access_paths(
}
if (!predicate_access_paths.name_access_paths.empty()) {
- _reading_flag = ReadingFlag::READING_FOR_PREDICATE;
+ set_reading_flag(ReadingFlag::READING_FOR_PREDICATE);
LOG(INFO) << "Struct column iterator set sub-column " << _column_name
<< " to READING_FOR_PREDICATE";
}
@@ -1278,7 +1306,7 @@ Status StructFileColumnIterator::set_access_paths(
const auto no_predicate_sub_column =
sub_predicate_access_paths.name_access_paths.empty();
for (auto& sub_iterator : _sub_column_iterators) {
- const auto& name = sub_iterator->column_name();
+ const auto name = sub_iterator->column_name();
bool need_to_read = no_sub_column_to_skip;
TColumnAccessPaths sub_all_access_paths_of_this;
if (!need_to_read) {
@@ -1291,10 +1319,13 @@ Status StructFileColumnIterator::set_access_paths(
}
if (!need_to_read) {
+ set_reading_flag(ReadingFlag::SKIP_READING);
sub_iterator->set_reading_flag(ReadingFlag::SKIP_READING);
LOG(INFO) << "Struct column iterator set sub-column " << name << "
to SKIP_READING";
continue;
}
+ set_reading_flag(ReadingFlag::NEED_TO_READ);
+ sub_iterator->set_reading_flag(ReadingFlag::NEED_TO_READ);
TColumnAccessPaths sub_predicate_access_paths_of_this;
@@ -1305,6 +1336,7 @@ Status StructFileColumnIterator::set_access_paths(
}
}
}
+
RETURN_IF_ERROR(sub_iterator->set_access_paths(sub_all_access_paths_of_this,
sub_predicate_access_paths_of_this));
}
@@ -1488,6 +1520,15 @@ Status ArrayFileColumnIterator::read_by_rowids(const
rowid_t* rowids, const size
return Status::OK();
}
+void ArrayFileColumnIterator::set_need_to_read() {
+ set_reading_flag(ReadingFlag::NEED_TO_READ);
+ _item_iterator->set_need_to_read();
+}
+
+void ArrayFileColumnIterator::remove_pruned_sub_iterators() {
+ _item_iterator->remove_pruned_sub_iterators();
+}
+
Status ArrayFileColumnIterator::set_access_paths(const TColumnAccessPaths&
all_access_paths,
const TColumnAccessPaths&
predicate_access_paths) {
if (all_access_paths.name_access_paths.empty()) {
@@ -1495,7 +1536,7 @@ Status ArrayFileColumnIterator::set_access_paths(const
TColumnAccessPaths& all_a
}
if (!predicate_access_paths.name_access_paths.empty()) {
- _reading_flag = ReadingFlag::READING_FOR_PREDICATE;
+ set_reading_flag(ReadingFlag::READING_FOR_PREDICATE);
LOG(INFO) << "Array column iterator set sub-column " << _column_name
<< " to READING_FOR_PREDICATE";
}
@@ -1506,7 +1547,24 @@ Status ArrayFileColumnIterator::set_access_paths(const
TColumnAccessPaths& all_a
const auto no_sub_column_to_skip =
sub_all_access_paths.name_access_paths.empty();
const auto no_predicate_sub_column =
sub_predicate_access_paths.name_access_paths.empty();
+ if (!no_sub_column_to_skip) {
+ for (auto& path : sub_all_access_paths.name_access_paths) {
+ if (path.path[0] == "*") {
+ path.path[0] = _item_iterator->column_name();
+ }
+ }
+ }
+
+ if (!no_predicate_sub_column) {
+ for (auto& path : sub_predicate_access_paths.name_access_paths) {
+ if (path.path[0] == "*") {
+ path.path[0] = _item_iterator->column_name();
+ }
+ }
+ }
+
if (!no_sub_column_to_skip || !no_predicate_sub_column) {
+ _item_iterator->set_reading_flag(ReadingFlag::NEED_TO_READ);
RETURN_IF_ERROR(
_item_iterator->set_access_paths(sub_all_access_paths,
sub_predicate_access_paths));
}
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h
b/be/src/olap/rowset/segment_v2/column_reader.h
index d4ae7f86ea0..0abd06f937e 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -378,8 +378,35 @@ public:
const std::string& column_name() const { return _column_name; }
- enum class ReadingFlag { NORMAL_READING, READING_FOR_PREDICATE,
SKIP_READING };
- void set_reading_flag(ReadingFlag flag) { _reading_flag = flag; }
+ // Since there may be multiple paths with conflicts or overlaps,
+ // we need to define several reading flags:
+ //
+ // NORMAL_READING — Default value, indicating that the column should be
read.
+ // SKIP_READING — The column should not be read.
+ // NEED_TO_READ — The column must be read.
+ // READING_FOR_PREDICATE — The column is required for predicate evaluation.
+ //
+ // For example, suppose there are two paths:
+ // - Path 1 specifies that column A needs to be read, so it is marked as
NEED_TO_READ.
+ // - Path 2 specifies that the column should not be read, but since it is
already marked as NEED_TO_READ,
+ // it should not be changed to SKIP_READING.
+ enum class ReadingFlag : int {
+ NORMAL_READING,
+ SKIP_READING,
+ NEED_TO_READ,
+ READING_FOR_PREDICATE
+ };
+ void set_reading_flag(ReadingFlag flag) {
+ if (static_cast<int>(flag) > static_cast<int>(_reading_flag)) {
+ _reading_flag = flag;
+ }
+ }
+
+ ReadingFlag reading_flag() const { return _reading_flag; }
+
+ virtual void set_need_to_read() {
set_reading_flag(ReadingFlag::NEED_TO_READ); }
+
+ virtual void remove_pruned_sub_iterators() {};
protected:
Result<TColumnAccessPaths> _get_sub_access_paths(const TColumnAccessPaths&
access_paths);
@@ -526,6 +553,10 @@ public:
Status set_access_paths(const TColumnAccessPaths& all_access_paths,
const TColumnAccessPaths& predicate_access_paths)
override;
+ void set_need_to_read() override;
+
+ void remove_pruned_sub_iterators() override;
+
private:
std::shared_ptr<ColumnReader> _map_reader = nullptr;
ColumnIteratorUPtr _null_iterator;
@@ -558,6 +589,10 @@ public:
Status set_access_paths(const TColumnAccessPaths& all_access_paths,
const TColumnAccessPaths& predicate_access_paths)
override;
+ void set_need_to_read() override;
+
+ void remove_pruned_sub_iterators() override;
+
private:
std::shared_ptr<ColumnReader> _struct_reader = nullptr;
ColumnIteratorUPtr _null_iterator;
@@ -588,6 +623,9 @@ public:
Status set_access_paths(const TColumnAccessPaths& all_access_paths,
const TColumnAccessPaths& predicate_access_paths)
override;
+ void set_need_to_read() override;
+
+ void remove_pruned_sub_iterators() override;
private:
std::shared_ptr<ColumnReader> _array_reader = nullptr;
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 1c9f40a1c38..26df6e69c2e 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -789,6 +789,7 @@ Status Segment::new_column_iterator(const TabletColumn&
tablet_column,
<< ", predicate_access_paths=" << ss2.str();
return st;
}
+ (*iter)->remove_pruned_sub_iterators();
}
}
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index e86ab940f56..f9b4c440d9f 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -1705,6 +1705,11 @@ vectorized::Block TabletSchema::create_block(
tablet_columns_need_convert_null->find(cid) !=
tablet_columns_need_convert_null->end());
auto data_type =
vectorized::DataTypeFactory::instance().create_data_type(col, is_nullable);
+ if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
+ if (_pruned_columns_data_type.contains(col.unique_id())) {
+ data_type = _pruned_columns_data_type.at(col.unique_id());
+ }
+ }
if (_vir_col_idx_to_unique_id.contains(cid)) {
block.insert({vectorized::ColumnNothing::create(0), data_type,
col.name()});
VLOG_DEBUG << fmt::format(
@@ -1724,7 +1729,13 @@ vectorized::Block TabletSchema::create_block(bool
ignore_dropped_col) const {
if (ignore_dropped_col && is_dropped_column(*col)) {
continue;
}
+
auto data_type =
vectorized::DataTypeFactory::instance().create_data_type(*col);
+ if (col->type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
+ if (_pruned_columns_data_type.contains(col->unique_id())) {
+ data_type = _pruned_columns_data_type.at(col->unique_id());
+ }
+ }
block.insert({data_type->create_column(), data_type, col->name()});
}
return block;
@@ -1735,6 +1746,11 @@ vectorized::Block
TabletSchema::create_block_by_cids(const std::vector<uint32_t>
for (const auto& cid : cids) {
const auto& col = *_cols[cid];
auto data_type =
vectorized::DataTypeFactory::instance().create_data_type(col);
+ if (col.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
+ if (_pruned_columns_data_type.contains(col.unique_id())) {
+ data_type = _pruned_columns_data_type.at(col.unique_id());
+ }
+ }
block.insert({data_type->create_column(), data_type, col.name()});
}
return block;
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index ca4971866cb..cbe5850e07e 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -42,6 +42,7 @@
#include "runtime/define_primitive_type.h"
#include "runtime/descriptors.h"
#include "runtime/memory/lru_cache_policy.h"
+#include "udf/udf.h"
#include "util/debug_points.h"
#include "util/string_parser.hpp"
#include "util/string_util.h"
@@ -672,6 +673,14 @@ public:
return 0;
}
+ void add_pruned_columns_data_type(int32_t col_unique_id,
vectorized::DataTypePtr data_type) {
+ _pruned_columns_data_type[col_unique_id] = std::move(data_type);
+ }
+
+ void clear_pruned_columns_data_type() { _pruned_columns_data_type.clear();
}
+
+ bool has_pruned_columns() const { return
!_pruned_columns_data_type.empty(); }
+
private:
friend bool operator==(const TabletSchema& a, const TabletSchema& b);
friend bool operator!=(const TabletSchema& a, const TabletSchema& b);
@@ -742,6 +751,7 @@ private:
bool _enable_variant_flatten_nested = false;
std::map<size_t, int32_t> _vir_col_idx_to_unique_id;
+ std::map<int32_t, vectorized::DataTypePtr> _pruned_columns_data_type;
// value: extracted path set and sparse path set
std::unordered_map<int32_t, PathsSetInfo> _path_set_info_map;
diff --git a/be/src/vec/exec/scan/olap_scanner.cpp
b/be/src/vec/exec/scan/olap_scanner.cpp
index b736ecce044..ab4e69e2294 100644
--- a/be/src/vec/exec/scan/olap_scanner.cpp
+++ b/be/src/vec/exec/scan/olap_scanner.cpp
@@ -258,7 +258,7 @@ Status OlapScanner::prepare() {
// Add newly created tablet schema to schema cache if it does not have
virtual columns.
if (cached_schema == nullptr && !schema_key.empty() &&
- tablet_schema->num_virtual_columns() == 0) {
+ tablet_schema->num_virtual_columns() == 0 &&
!tablet_schema->has_pruned_columns()) {
SchemaCache::instance()->insert_schema(schema_key, tablet_schema);
}
@@ -552,6 +552,11 @@ Status OlapScanner::_init_return_columns() {
{column.unique_id(), slot->predicate_access_paths()});
}
+ if (slot->type()->get_primitive_type() == PrimitiveType::TYPE_STRUCT &&
+ !slot->all_access_paths().name_access_paths.empty()) {
+ tablet_schema->add_pruned_columns_data_type(column.unique_id(),
slot->type());
+ }
+
_return_columns.push_back(index);
if (slot->is_nullable() &&
!tablet_schema->column(index).is_nullable()) {
_tablet_columns_convert_to_null_set.emplace(index);
diff --git a/be/test/olap/rowset/segment_v2/column_reader_test.cpp
b/be/test/olap/rowset/segment_v2/column_reader_test.cpp
index 3523413d9b4..495a0dcaf4e 100644
--- a/be/test/olap/rowset/segment_v2/column_reader_test.cpp
+++ b/be/test/olap/rowset/segment_v2/column_reader_test.cpp
@@ -114,7 +114,7 @@ TEST_F(ColumnReaderTest, StructAccessPaths) {
ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag,
ColumnIterator::ReadingFlag::READING_FOR_PREDICATE);
ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag,
- ColumnIterator::ReadingFlag::NORMAL_READING);
+ ColumnIterator::ReadingFlag::NEED_TO_READ);
}
TEST_F(ColumnReaderTest, MultiAccessPaths) {
@@ -202,20 +202,19 @@ TEST_F(ColumnReaderTest, MultiAccessPaths) {
auto st = iterator->set_access_paths(all_access_paths,
predicate_access_paths);
ASSERT_TRUE(st.ok()) << "failed to set access paths: " << st.to_string();
- ASSERT_EQ(iterator->_reading_flag,
ColumnIterator::ReadingFlag::NORMAL_READING);
+ ASSERT_EQ(iterator->_reading_flag,
ColumnIterator::ReadingFlag::NEED_TO_READ);
ASSERT_EQ(iterator->_sub_column_iterators[0]->_reading_flag,
ColumnIterator::ReadingFlag::SKIP_READING);
ASSERT_EQ(iterator->_sub_column_iterators[1]->_reading_flag,
- ColumnIterator::ReadingFlag::NORMAL_READING);
+ ColumnIterator::ReadingFlag::NEED_TO_READ);
auto* array_iter =
static_cast<ArrayFileColumnIterator*>(iterator->_sub_column_iterators[1].get());
- ASSERT_EQ(array_iter->_item_iterator->_reading_flag,
- ColumnIterator::ReadingFlag::NORMAL_READING);
+ ASSERT_EQ(array_iter->_item_iterator->_reading_flag,
ColumnIterator::ReadingFlag::NEED_TO_READ);
auto* map_iter =
static_cast<MapFileColumnIterator*>(array_iter->_item_iterator.get());
- ASSERT_EQ(map_iter->_key_iterator->_reading_flag,
ColumnIterator::ReadingFlag::NORMAL_READING);
+ ASSERT_EQ(map_iter->_key_iterator->_reading_flag,
ColumnIterator::ReadingFlag::NEED_TO_READ);
ASSERT_EQ(map_iter->_val_iterator->_reading_flag,
ColumnIterator::ReadingFlag::SKIP_READING);
}
} // namespace doris::segment_v2
\ No newline at end of file
diff --git
a/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out
b/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out
new file mode 100644
index 00000000000..74f336d27e5
--- /dev/null
+++ b/regression-test/data/datatype_p0/complex_types/test_pruned_columns.out
@@ -0,0 +1,24 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+1 {"city":"beijing", "data":[{1:{"a":10, "b":20}, 2:{"a":30, "b":40}}]}
+2 {"city":"shanghai", "data":[{2:{"a":50, "b":40}, 1:{"a":70, "b":80}}]}
+
+-- !sql1 --
+1 [10]
+
+-- !sql2 --
+1 beijing
+2 shanghai
+
+-- !sql3 --
+1 [{1:{"a":10, "b":20}, 2:{"a":30, "b":40}}]
+2 [{2:{"a":50, "b":40}, 1:{"a":70, "b":80}}]
+
+-- !sql4 --
+1 [{1:{"a":10, "b":20}, 2:{"a":30, "b":40}}]
+2 [{2:{"a":50, "b":40}, 1:{"a":70, "b":80}}]
+
+-- !sql5 --
+1 beijing
+2 shanghai
+
diff --git
a/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy
b/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy
new file mode 100644
index 00000000000..ad032fab8f2
--- /dev/null
+++
b/regression-test/suites/datatype_p0/complex_types/test_pruned_columns.groovy
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pruned_columns") {
+ sql """DROP TABLE IF EXISTS `tbl_test_pruned_columns`"""
+ sql """
+ CREATE TABLE `tbl_test_pruned_columns` (
+ `id` int NULL,
+ `s` struct<city:text,data:array<map<int,struct<a:int,b:double>>>>
NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY RANDOM BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ insert into `tbl_test_pruned_columns` values
+ (1, named_struct('city', 'beijing', 'data', array(map(1,
named_struct('a', 10, 'b', 20.0), 2, named_struct('a', 30, 'b', 40))))),
+ (2, named_struct('city', 'shanghai', 'data', array(map(2,
named_struct('a', 50, 'b', 40.0), 1, named_struct('a', 70, 'b', 80)))));
+ """
+
+ qt_sql """
+ select * from `tbl_test_pruned_columns` order by 1;
+ """
+
+ qt_sql1 """
+ select b.id, array_map(x -> struct_element(map_values(x)[1], 'a'),
struct_element(s, 'data')) from `tbl_test_pruned_columns` t join (select 1 id)
b on t.id = b.id order by 1;
+ """
+
+ qt_sql2 """
+ select id, struct_element(s, 'city') from `tbl_test_pruned_columns`
order by 1;
+ """
+
+ qt_sql3 """
+ select id, struct_element(s, 'data') from `tbl_test_pruned_columns`
order by 1;
+ """
+
+ qt_sql4 """
+ select id, struct_element(s, 'data') from `tbl_test_pruned_columns`
where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 order by 1;
+ """
+
+ qt_sql5 """
+ select id, struct_element(s, 'city') from `tbl_test_pruned_columns`
where struct_element(struct_element(s, 'data')[1][2], 'b') = 40 order by 1;
+ """
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]