This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a1653f8d67c [feature](array) Support group by Array Column (#52452)
a1653f8d67c is described below
commit a1653f8d67ce90e423bcc964dcc5888e062214d0
Author: Gabriel <[email protected]>
AuthorDate: Mon Jun 30 19:36:07 2025 +0800
[feature](array) Support group by Array Column (#52452)
---
.../aggregate_function_uniq.cpp | 4 +
.../aggregate_functions/aggregate_function_uniq.h | 27 +++-
be/src/vec/columns/column_array.cpp | 152 +++++++++++++++++++++
be/src/vec/columns/column_array.h | 9 +-
be/src/vec/columns/column_nullable.h | 7 +-
be/src/vec/common/hash_table/hash_key_type.h | 5 +-
be/src/vec/common/hash_table/hash_map_context.h | 1 +
be/src/vec/core/sort_block.h | 15 +-
be/test/vec/columns/column_array_test.cpp | 8 +-
.../main/java/org/apache/doris/catalog/Type.java | 4 +
.../org/apache/doris/analysis/AggregateInfo.java | 2 +-
.../org/apache/doris/analysis/GroupByClause.java | 2 +-
.../java/org/apache/doris/analysis/SelectStmt.java | 3 +
.../glue/translator/PhysicalPlanTranslator.java | 4 +
.../nereids/rules/analysis/CheckAfterRewrite.java | 9 +-
.../commands/CreateMaterializedViewCommand.java | 5 +-
.../org/apache/doris/nereids/types/DataType.java | 4 +
.../org/apache/doris/planner/OlapScanNode.java | 3 +
.../data/correctness_p0/test_array_order_by.out | Bin 845 -> 3891 bytes
.../correctness_p0/test_array_string_order_by.out | Bin 0 -> 5109 bytes
.../correctness_p0/test_array_order_by.groovy | 37 ++++-
.../test_array_string_order_by.groovy | 62 +++++++++
.../one_level_nestedtypes_with_s3data.groovy | 21 ---
.../test_create_mv_complex_type.groovy | 18 ---
.../suites/nereids_p0/join/test_join_on.groovy | 4 -
.../aggregate_group_by_metric_type.groovy | 18 ---
26 files changed, 329 insertions(+), 95 deletions(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
b/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
index 3477415138a..ee9a67c3a12 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
@@ -70,6 +70,10 @@ AggregateFunctionPtr create_aggregate_function_uniq(const
std::string& name,
return creator_without_type::create<
AggregateFunctionUniq<TYPE_STRING,
Data<TYPE_STRING>>>(argument_types,
result_is_nullable);
+ case TYPE_ARRAY:
+ return creator_without_type::create<
+ AggregateFunctionUniq<TYPE_ARRAY,
Data<TYPE_ARRAY>>>(argument_types,
+
result_is_nullable);
default:
break;
}
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h
b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
index eb0a828e536..b68a202b965 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_uniq.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
@@ -58,19 +58,24 @@ class ColumnDecimal;
template <PrimitiveType T>
struct AggregateFunctionUniqExactData {
static constexpr bool is_string_key = is_string_type(T);
- using Key =
- std::conditional_t<is_string_key, UInt128,
+ using Key = std::conditional_t<
+ is_string_key, UInt128,
+ std::conditional_t<T == TYPE_ARRAY, UInt64,
std::conditional_t<T == TYPE_BOOLEAN, UInt8,
- typename
PrimitiveTypeTraits<T>::CppNativeType>>;
+ typename
PrimitiveTypeTraits<T>::CppNativeType>>>;
using Hash = HashCRC32<Key>;
using Set = flat_hash_set<Key, Hash>;
- // TODO: replace SipHash with xxhash to speed up
static UInt128 ALWAYS_INLINE get_key(const StringRef& value) {
auto hash_value = XXH_INLINE_XXH128(value.data, value.size, 0);
return UInt128 {hash_value.high64, hash_value.low64};
}
+ static UInt64 ALWAYS_INLINE get_key(const IColumn& column, size_t row_num)
{
+ UInt64 hash_value = 0;
+ column.update_xxHash_with_value(row_num, row_num + 1, hash_value,
nullptr);
+ return hash_value;
+ }
Set set;
@@ -90,6 +95,8 @@ struct OneAdder {
if constexpr (is_string_type(T)) {
StringRef value = column.get_data_at(row_num);
data.set.insert(Data::get_key(value));
+ } else if constexpr (T == TYPE_ARRAY) {
+ data.set.insert(Data::get_key(column, row_num));
} else {
data.set.insert(assert_cast<const typename
PrimitiveTypeTraits<T>::ColumnType&,
TypeCheckOnRelease::DISABLE>(column)
@@ -105,8 +112,10 @@ template <PrimitiveType T, typename Data>
class AggregateFunctionUniq final
: public IAggregateFunctionDataHelper<Data, AggregateFunctionUniq<T,
Data>> {
public:
- using KeyType = std::conditional_t<is_string_type(T), UInt128,
- typename
PrimitiveTypeTraits<T>::ColumnItemType>;
+ using KeyType =
+ std::conditional_t<is_string_type(T), UInt128,
+ std::conditional_t<T == TYPE_ARRAY, UInt64,
+ typename
PrimitiveTypeTraits<T>::ColumnItemType>>;
AggregateFunctionUniq(const DataTypes& argument_types_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionUniq<T,
Data>>(argument_types_) {}
@@ -130,6 +139,12 @@ public:
keys_container[i] = Data::get_key(value);
}
return keys_container.data();
+ } else if constexpr (T == TYPE_ARRAY) {
+ keys_container.resize(batch_size);
+ for (size_t i = 0; i != batch_size; ++i) {
+ keys_container[i] = Data::get_key(column, i);
+ }
+ return keys_container.data();
} else {
return assert_cast<const typename
PrimitiveTypeTraits<T>::ColumnType&>(column)
.get_data()
diff --git a/be/src/vec/columns/column_array.cpp
b/be/src/vec/columns/column_array.cpp
index f8c5a7f8531..abd3fc7fb06 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -27,6 +27,8 @@
#include "common/status.h"
#include "runtime/primitive_type.h"
+#include "util/simd/bits.h"
+#include "util/simd/vstring_function.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_nullable.h"
@@ -37,6 +39,7 @@
#include "vec/common/memcpy_small.h"
#include "vec/common/typeid_cast.h"
#include "vec/common/unaligned.h"
+#include "vec/core/sort_block.h"
#include "vec/data_types/data_type.h"
class SipHash;
@@ -221,6 +224,12 @@ void ColumnArray::get_permutation(bool reverse, size_t
limit, int nan_direction_
}
}
+void ColumnArray::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
+ IColumn::Permutation& perms, EqualRange& range,
+ bool last_column) const {
+ sorter->sort_column(static_cast<const ColumnArray&>(*this), flags, perms,
range, last_column);
+}
+
int ColumnArray::compare_at(size_t n, size_t m, const IColumn& rhs_, int
nan_direction_hint) const {
// since column type is complex, we can't use this function
const auto& rhs = assert_cast<const ColumnArray&,
TypeCheckOnRelease::DISABLE>(rhs_);
@@ -241,6 +250,149 @@ int ColumnArray::compare_at(size_t n, size_t m, const
IColumn& rhs_, int nan_dir
return lhs_size < rhs_size ? -1 : (lhs_size == rhs_size ? 0 : 1);
}
+size_t ColumnArray::get_max_row_byte_size() const {
+ DCHECK(!data->is_variable_length() || data->is_column_string() ||
data->is_column_string64());
+ size_t max_size = 0;
+ size_t num_rows = size();
+ for (size_t i = 0; i < num_rows; ++i) {
+ max_size = std::max(max_size,
+ size_at(i) * data->get_max_row_byte_size() +
+ (data->is_variable_length() ?
sizeof(size_t) * size_at(i) : 0));
+ }
+
+ return sizeof(size_t) + max_size;
+}
+
+void ColumnArray::serialize_vec_with_null_map(StringRef* keys, size_t num_rows,
+ const UInt8* null_map) const {
+ DCHECK(null_map != nullptr);
+ DCHECK(!data->is_variable_length() || data->is_column_string() ||
data->is_column_string64());
+
+ const bool has_null = simd::contain_byte(null_map, num_rows, 1);
+
+ const auto* nested_col = data->get_ptr().get();
+ if (data->is_nullable()) {
+ nested_col = assert_cast<const ColumnNullable*>(data->get_ptr().get())
+ ->get_nested_column_ptr()
+ .get();
+ }
+ auto serialize_impl = [&](size_t i, char* __restrict dest) {
+ size_t array_size = size_at(i);
+ size_t offset = offset_at(i);
+
+ memcpy(dest, &array_size, sizeof(array_size));
+ dest += sizeof(array_size);
+ keys[i].size += sizeof(array_size);
+ for (size_t j = 0; j < array_size; ++j) {
+ if (data->is_nullable()) {
+ auto flag = assert_cast<const
ColumnNullable*>(data->get_ptr().get())
+ ->get_null_map_data()[offset + j];
+ memcpy(dest, &flag, sizeof(flag));
+ dest += sizeof(flag);
+ keys[i].size += sizeof(flag);
+ if (flag) {
+ continue;
+ }
+ }
+ const auto& it = nested_col->get_data_at(offset + j);
+ if (nested_col->is_variable_length()) {
+ memcpy(dest, &it.size, sizeof(it.size));
+ dest += sizeof(it.size);
+ keys[i].size += sizeof(it.size);
+ }
+ memcpy(dest, it.data, it.size);
+ dest += it.size;
+ keys[i].size += it.size;
+ }
+ };
+ if (has_null) {
+ for (size_t i = 0; i < num_rows; ++i) {
+ char* __restrict dest = const_cast<char*>(keys[i].data +
keys[i].size);
+ // serialize null first
+ memcpy(dest, null_map + i, sizeof(uint8_t));
+ dest += sizeof(uint8_t);
+ keys[i].size += sizeof(uint8_t);
+ if (null_map[i] == 0) {
+ serialize_impl(i, dest);
+ }
+ }
+ } else {
+ // All rows are not null, serialize null & value
+ for (size_t i = 0; i < num_rows; ++i) {
+ char* __restrict dest = const_cast<char*>(keys[i].data +
keys[i].size);
+ // serialize null first
+ memcpy(dest, null_map + i, sizeof(uint8_t));
+ dest += sizeof(uint8_t);
+ keys[i].size += sizeof(uint8_t);
+ serialize_impl(i, dest);
+ }
+ }
+}
+
+void ColumnArray::deserialize_vec_with_null_map(StringRef* keys, const size_t
num_rows,
+ const uint8_t* null_map) {
+ DCHECK(!data->is_variable_length() || data->is_column_string() ||
data->is_column_string64());
+ auto item_sz =
remove_nullable(get_data().get_ptr())->get_max_row_byte_size();
+ for (size_t i = 0; i != num_rows; ++i) {
+ const auto* original_ptr = keys[i].data;
+ const auto* pos = keys[i].data;
+ if (null_map[i] == 0) {
+ size_t array_size = unaligned_load<size_t>(pos);
+ pos += sizeof(size_t);
+ for (size_t j = 0; j < array_size; j++) {
+ auto null_flag = unaligned_load<uint8_t>(pos);
+ pos += sizeof(uint8_t);
+ if (null_flag) {
+ DCHECK(data->is_nullable()) << data->get_name();
+ get_data().insert_default();
+ } else {
+ size_t it_sz = item_sz;
+ if (data->is_variable_length()) {
+ it_sz = unaligned_load<size_t>(pos);
+ pos += sizeof(it_sz);
+ }
+ get_data().insert_data(pos, it_sz);
+ pos += it_sz;
+ }
+ }
+ get_offsets().push_back(get_offsets().back() + array_size);
+ keys[i].data = pos;
+ } else {
+ insert_default();
+ }
+ keys[i].size -= (keys[i].data - original_ptr);
+ }
+}
+
+void ColumnArray::deserialize_vec(StringRef* keys, const size_t num_rows) {
+ auto item_sz =
remove_nullable(get_data().get_ptr())->get_max_row_byte_size();
+ for (size_t i = 0; i != num_rows; ++i) {
+ const auto* original_ptr = keys[i].data;
+ const auto* pos = keys[i].data;
+ size_t array_size = unaligned_load<size_t>(pos);
+ pos += sizeof(size_t);
+ for (size_t j = 0; j < array_size; j++) {
+ auto null_flag = unaligned_load<uint8_t>(pos);
+ pos += sizeof(uint8_t);
+ if (null_flag) {
+ DCHECK(data->is_nullable()) << data->get_name();
+ get_data().insert_default();
+ } else {
+ size_t it_sz = item_sz;
+ if (data->is_variable_length()) {
+ it_sz = unaligned_load<size_t>(pos);
+ pos += sizeof(it_sz);
+ }
+ get_data().insert_data(pos, it_sz);
+ pos += it_sz;
+ }
+ }
+ get_offsets().push_back(get_offsets().back() + array_size);
+ keys[i].data = pos;
+ keys[i].size -= (keys[i].data - original_ptr);
+ }
+}
+
const char* ColumnArray::deserialize_and_insert_from_arena(const char* pos) {
size_t array_size = unaligned_load<size_t>(pos);
pos += sizeof(array_size);
diff --git a/be/src/vec/columns/column_array.h
b/be/src/vec/columns/column_array.h
index 3d80304d2dd..33d36ec8fe8 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -168,7 +168,14 @@ public:
void insert_many_from(const IColumn& src, size_t position, size_t length)
override;
void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
IColumn::Permutation& res) const override;
-
+ void sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms,
+ EqualRange& range, bool last_column) const override;
+ void deserialize_vec(StringRef* keys, const size_t num_rows) override;
+ size_t get_max_row_byte_size() const override;
+ void serialize_vec_with_null_map(StringRef* keys, size_t num_rows,
+ const uint8_t* null_map) const override;
+ void deserialize_vec_with_null_map(StringRef* keys, const size_t num_rows,
+ const uint8_t* null_map) override;
/** More efficient methods of manipulation */
IColumn& get_data() { return *data; }
const IColumn& get_data() const { return *data; }
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index b6a806943e4..87eb7ce6ad7 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -145,9 +145,10 @@ public:
void sanity_check() const override {
if (nested_column->size() != get_null_map_data().size()) {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR,
- "Size of nested column {} is not equal to
size of null map {}",
- nested_column->size(),
get_null_map_data().size());
+ throw doris::Exception(
+ ErrorCode::INTERNAL_ERROR,
+ "Size of nested column {} with size {} is not equal to
size of null map {}",
+ nested_column->get_name(), nested_column->size(),
get_null_map_data().size());
}
nested_column->sanity_check();
}
diff --git a/be/src/vec/common/hash_table/hash_key_type.h
b/be/src/vec/common/hash_table/hash_key_type.h
index 6cc7afbb3ba..481023af3a5 100644
--- a/be/src/vec/common/hash_table/hash_key_type.h
+++ b/be/src/vec/common/hash_table/hash_key_type.h
@@ -96,11 +96,14 @@ inline HashKeyType get_hash_key_type(const
std::vector<vectorized::DataTypePtr>&
if (data_types.empty()) {
return HashKeyType::without_key;
}
+ if (data_types[0]->get_primitive_type() == TYPE_ARRAY) {
+ return HashKeyType::serialized;
+ }
auto t = remove_nullable(data_types[0]);
// serialized cannot be used in the case of single column, because the
join operator will have some processing of column nullable, resulting in
incorrect serialized results.
if (!t->have_maximum_size_of_value()) {
- if (is_string_type(t->get_primitive_type())) {
+ if (is_string_type(t->get_primitive_type()) || t->get_primitive_type()
== TYPE_ARRAY) {
return HashKeyType::string_key;
}
throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type,
type={}", t->get_name());
diff --git a/be/src/vec/common/hash_table/hash_map_context.h
b/be/src/vec/common/hash_table/hash_map_context.h
index 470b5c309ca..96384e8531a 100644
--- a/be/src/vec/common/hash_table/hash_map_context.h
+++ b/be/src/vec/common/hash_table/hash_map_context.h
@@ -21,6 +21,7 @@
#include <utility>
#include "common/compiler_util.h"
+#include "vec/columns/column_array.h"
#include "vec/columns/column_nullable.h"
#include "vec/common/arena.h"
#include "vec/common/assert_cast.h"
diff --git a/be/src/vec/core/sort_block.h b/be/src/vec/core/sort_block.h
index b282b30e57f..eb5cd07a3b2 100644
--- a/be/src/vec/core/sort_block.h
+++ b/be/src/vec/core/sort_block.h
@@ -32,6 +32,7 @@
#include "common/compiler_util.h" // IWYU pragma: keep
#include "util/simd/bits.h"
#include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/common/memcmp_small.h"
@@ -40,14 +41,12 @@
#include "vec/core/sort_description.h"
#include "vec/core/types.h"
-namespace doris {
-namespace vectorized {
+namespace doris::vectorized {
template <PrimitiveType T>
class ColumnDecimal;
template <PrimitiveType T>
class ColumnVector;
-} // namespace vectorized
-} // namespace doris
+} // namespace doris::vectorized
namespace doris::vectorized {
@@ -235,6 +234,11 @@ public:
}
}
+ void sort_column(const ColumnArray& column, EqualFlags& flags,
IColumn::Permutation& perms,
+ EqualRange& range, bool last_column) const {
+ _sort_by_default(column, flags, perms, range, last_column);
+ }
+
void sort_column(const ColumnString64& column, EqualFlags& flags,
IColumn::Permutation& perms,
EqualRange& range, bool last_column) const {
if (!_should_inline_value(perms)) {
@@ -361,7 +365,8 @@ private:
int new_limit = _limit;
auto comparator = [&](const size_t a, const size_t b) {
if constexpr (!std::is_same_v<ColumnType, ColumnString> &&
- !std::is_same_v<ColumnType, ColumnString64>) {
+ !std::is_same_v<ColumnType, ColumnString64> &&
+ !std::is_same_v<ColumnType, ColumnArray>) {
auto value_a = column.get_data()[a];
auto value_b = column.get_data()[b];
return value_a > value_b ? 1 : (value_a < value_b ? -1 : 0);
diff --git a/be/test/vec/columns/column_array_test.cpp
b/be/test/vec/columns/column_array_test.cpp
index 10eec203c5a..801b9df92f5 100644
--- a/be/test/vec/columns/column_array_test.cpp
+++ b/be/test/vec/columns/column_array_test.cpp
@@ -472,10 +472,10 @@ TEST_F(ColumnArrayTest, GetRatioOfDefaultRowsTest) {
assert_get_ratio_of_default_rows(array_columns, serdes);
}
-TEST_F(ColumnArrayTest, SerDeVecTest) {
- // get_max_row_byte_size is not support in column_array
- EXPECT_ANY_THROW(ser_deser_vec(array_columns, array_types));
-}
+//TEST_F(ColumnArrayTest, SerDeVecTest) {
+// // get_max_row_byte_size is not support in column_array
+// EXPECT_ANY_THROW(ser_deser_vec(array_columns, array_types));
+//}
TEST_F(ColumnArrayTest, serDeserializeWithArenaImpl) {
ser_deserialize_with_arena_impl(array_columns, array_types);
diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
index 9b17cbe1856..4740467377b 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java
@@ -582,6 +582,10 @@ public abstract class Type {
return isObjectStored() || isComplexType() || isJsonbType() ||
isVariantType();
}
+ public boolean isArrayTypeNestedBaseType() {
+ return isArrayType() && !((ArrayType)
this).getItemType().isOnlyMetricType();
+ }
+
public static final String OnlyObjectTypeErrorMsg =
"Doris hll, bitmap column must use with specific function, and
don't"
+ " support filter, group by or order by. please run 'help
hll' or 'help bitmap'"
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
index 25851f4b020..6f6c5933472 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AggregateInfo.java
@@ -162,7 +162,7 @@ public final class AggregateInfo extends AggregateInfoBase {
private static void validateGroupingExprs(List<Expr> groupingExprs) throws
AnalysisException {
for (Expr expr : groupingExprs) {
- if (expr.getType().isOnlyMetricType()) {
+ if (expr.getType().isOnlyMetricType() &&
!expr.getType().isArrayTypeNestedBaseType()) {
throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/GroupByClause.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/GroupByClause.java
index 9b505594467..f2f691b86b3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/GroupByClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/GroupByClause.java
@@ -204,7 +204,7 @@ public class GroupByClause implements ParseNode {
+ groupingExpr.toSql());
}
- if (groupingExpr.type.isOnlyMetricType()) {
+ if (groupingExpr.type.isOnlyMetricType() &&
!groupingExpr.type.isArrayTypeNestedBaseType()) {
throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
index c64004aab71..ff0cbd51123 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -859,6 +859,9 @@ public class SelectStmt extends QueryStmt implements
NotFallbackInParser {
|| getLimit() >
ConnectContext.get().getSessionVariable().topnOptLimitThreshold) {
return false;
}
+ if (getOrderByElements().stream().anyMatch(e ->
e.getExpr().getType().isArrayType())) {
+ return false;
+ }
// Check order by exprs are all slot refs
// Rethink? implement more generic to support all exprs
if (LOG.isDebugEnabled()) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 72629cd2dc9..80e787bb8a2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -3032,6 +3032,10 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
return false;
}
+ if (sortNode.getSortInfo().getOrderingExprs().stream().anyMatch(e ->
e.getType().isArrayType())) {
+ return false;
+ }
+
// Ensure all isAscOrder is same, ande length != 0. Can't be z-order.
if (sortNode.getSortInfo().getIsAscOrder().stream().distinct().count()
!= 1 || olapTable.isZOrderSort()) {
return false;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
index d8b0ce0e23f..3534043de15 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
@@ -151,7 +151,8 @@ public class CheckAfterRewrite extends
OneAnalysisRuleFactory {
if (plan instanceof LogicalAggregate) {
LogicalAggregate<?> agg = (LogicalAggregate<?>) plan;
for (Expression groupBy : agg.getGroupByExpressions()) {
- if (groupBy.getDataType().isOnlyMetricType()) {
+ if (groupBy.getDataType().isOnlyMetricType()
+ && !groupBy.getDataType().isArrayTypeNestedBaseType())
{
throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
}
}
@@ -178,11 +179,13 @@ public class CheckAfterRewrite extends
OneAnalysisRuleFactory {
}
WindowExpression windowExpression = (WindowExpression)
((Alias) a).child();
if (windowExpression.getOrderKeys().stream().anyMatch((
- orderKey ->
orderKey.getDataType().isOnlyMetricType()))) {
+ orderKey -> orderKey.getDataType().isOnlyMetricType()
+ && !orderKey.getDataType().isArrayType()))) {
throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
}
if (windowExpression.getPartitionKeys().stream().anyMatch((
- partitionKey ->
partitionKey.getDataType().isOnlyMetricType()))) {
+ partitionKey ->
partitionKey.getDataType().isOnlyMetricType()
+ &&
!partitionKey.getDataType().isArrayTypeNestedBaseType()))) {
throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
}
});
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateMaterializedViewCommand.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateMaterializedViewCommand.java
index 088e42561a0..86caacc8aca 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateMaterializedViewCommand.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateMaterializedViewCommand.java
@@ -341,7 +341,8 @@ public class CreateMaterializedViewCommand extends Command
implements ForwardWit
int groupByExprCount = aggregate.getGroupByExpressions().size();
context.groupByExprs = Maps.newHashMap();
for (int i = 0; i < groupByExprCount; ++i) {
- if (outputs.get(i).getDataType().isOnlyMetricType()) {
+ if (outputs.get(i).getDataType().isOnlyMetricType()
+ &&
!outputs.get(i).getDataType().isArrayTypeNestedBaseType()) {
throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
}
context.groupByExprs.put(outputs.get(i).getExprId(),
outputs.get(i));
@@ -359,7 +360,7 @@ public class CreateMaterializedViewCommand extends Command
implements ForwardWit
checkNoNondeterministicFunction(sort);
if (sort.getOrderKeys().stream().anyMatch((
orderKey -> orderKey.getExpr().getDataType()
- .isOnlyMetricType()))) {
+ .isOnlyMetricType() &&
!orderKey.getExpr().getDataType().isArrayType()))) {
throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
}
context.orderByExprs = (List<NamedExpression>)
sort.getExpressions();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
index 30aa80d54e8..e31b4e8456c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DataType.java
@@ -635,6 +635,10 @@ public abstract class DataType {
return isObjectType() || isComplexType() || isJsonType() ||
isVariantType();
}
+ public boolean isArrayTypeNestedBaseType() {
+ return isArrayType() && !((ArrayType)
this).getItemType().isOnlyMetricType();
+ }
+
public boolean isObjectType() {
return isHllType() || isBitmapType() || isQuantileStateType();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index 33aae2afc31..b6648019a7d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -1282,6 +1282,9 @@ public class OlapScanNode extends ScanNode {
|| sortNode.getLimit() >
ConnectContext.get().getSessionVariable().topnOptLimitThreshold) {
return false;
}
+ if (sortNode.getSortInfo().getOrderingExprs().stream().anyMatch(e ->
e.getType().isArrayType())) {
+ return false;
+ }
// Ensure all isAscOrder is same, ande length != 0.
// Can't be zorder.
diff --git a/regression-test/data/correctness_p0/test_array_order_by.out
b/regression-test/data/correctness_p0/test_array_order_by.out
index 137d4698955..7f5675cea0f 100644
Binary files a/regression-test/data/correctness_p0/test_array_order_by.out and
b/regression-test/data/correctness_p0/test_array_order_by.out differ
diff --git a/regression-test/data/correctness_p0/test_array_string_order_by.out
b/regression-test/data/correctness_p0/test_array_string_order_by.out
new file mode 100644
index 00000000000..6431060a640
Binary files /dev/null and
b/regression-test/data/correctness_p0/test_array_string_order_by.out differ
diff --git a/regression-test/suites/correctness_p0/test_array_order_by.groovy
b/regression-test/suites/correctness_p0/test_array_order_by.groovy
index b104ec3e78f..62bf33c6686 100644
--- a/regression-test/suites/correctness_p0/test_array_order_by.groovy
+++ b/regression-test/suites/correctness_p0/test_array_order_by.groovy
@@ -29,11 +29,34 @@ suite("test_array_order_by") {
DUPLICATE KEY(typ_id)
DISTRIBUTED BY HASH(typ_id) BUCKETS 10
PROPERTIES ("replication_allocation" =
"tag.location.default: 1");"""
- sql """insert into test_array_order_by values(1,'name1',NULL),
(1,'name2',[1,2,3,4,5]), (1,'name3',[-1,2,-2]), (1,'name4',[6]),
(1,'name2',[1,2,3,4,5]), (1,'name2',[1,2,3]);"""
- qt_select1 """ select * from test_array_order_by order by arr ASC; """
- qt_select2 """ select * from test_array_order_by order by arr DESC; """
- qt_select3 """ select * from test_array_order_by order by name,arr ASC;
"""
- qt_select4 """ select * from test_array_order_by order by name,arr DESC;
"""
- qt_select5 """ select * from test_array_order_by order by typ_id,arr ASC;
"""
- qt_select6 """ select * from test_array_order_by order by typ_id,arr DESC;
"""
+ sql """insert into test_array_order_by values(1,'name1',NULL),
(1,'name2',[1,2,3,4,5]), (1,'name3',[-1,2,-2]), (1,'name4',[6]),
(1,'name2',[1,2,3,4,5]), (1,'name2',[1,2,3]), (1,'name2',[1,NULL,3]),
(1,'name2',[1,0,3]);"""
+ qt_select1 """ select * from test_array_order_by order by arr ASC LIMIT
10; """
+ qt_select2 """ select * from test_array_order_by order by arr DESC LIMIT
10; """
+ qt_select3 """ select * from test_array_order_by order by name,arr ASC
LIMIT 10; """
+ qt_select4 """ select * from test_array_order_by order by name,arr DESC
LIMIT 10; """
+ qt_select5 """ select * from test_array_order_by order by typ_id,arr ASC
LIMIT 10; """
+ qt_select6 """ select * from test_array_order_by order by typ_id,arr DESC
LIMIT 10; """
+
+ qt_select7 """ select arr, count(typ_id) from test_array_order_by group by
arr order by arr; """
+ qt_select8 """ select arr,name, count(typ_id) from test_array_order_by
group by name,arr order by name,arr; """
+ qt_select9 """ select arr,name, count(typ_id) from test_array_order_by
group by arr,name order by name,arr; """
+ qt_select10 """ select multi_distinct_count(arr) from
test_array_order_by; """
+ qt_select11 """ select arr,name, sum(typ_id) over(partition by arr,name
order by arr,name rows between unbounded preceding and current row) from
test_array_order_by order by arr """
+ qt_select12 """ select arr,name, sum(typ_id) over(partition by name, arr
order by name,arr rows between unbounded preceding and current row) from
test_array_order_by order by arr """
+ qt_select13 """ select arr, sum(typ_id) over(partition by arr order by arr
rows between unbounded preceding and current row) from test_array_order_by
order by arr """
+
+ sql """ set force_sort_algorithm=topn; """
+ qt_select1 """ select * from test_array_order_by order by arr ASC LIMIT
10; """
+ qt_select2 """ select * from test_array_order_by order by arr DESC LIMIT
10; """
+ qt_select3 """ select * from test_array_order_by order by name,arr ASC
LIMIT 10; """
+ qt_select4 """ select * from test_array_order_by order by name,arr DESC
LIMIT 10; """
+ qt_select5 """ select * from test_array_order_by order by typ_id,arr ASC
LIMIT 10; """
+ qt_select6 """ select * from test_array_order_by order by typ_id,arr DESC
LIMIT 10; """
+ sql """ set force_sort_algorithm=heap; """
+ qt_select1 """ select * from test_array_order_by order by arr ASC LIMIT
10; """
+ qt_select2 """ select * from test_array_order_by order by arr DESC LIMIT
10; """
+ qt_select3 """ select * from test_array_order_by order by name,arr ASC
LIMIT 10; """
+ qt_select4 """ select * from test_array_order_by order by name,arr DESC
LIMIT 10; """
+ qt_select5 """ select * from test_array_order_by order by typ_id,arr ASC
LIMIT 10; """
+ qt_select6 """ select * from test_array_order_by order by typ_id,arr DESC
LIMIT 10; """
}
diff --git
a/regression-test/suites/correctness_p0/test_array_string_order_by.groovy
b/regression-test/suites/correctness_p0/test_array_string_order_by.groovy
new file mode 100644
index 00000000000..24f7e4483ad
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_array_string_order_by.groovy
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+//
/testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate
+// and modified by Doris.
+
+suite("test_array_string_order_by") {
+ sql """drop table if exists test_array_string_order_by;"""
+ sql """CREATE TABLE test_array_string_order_by(
+ typ_id BIGINT NOT NULL COMMENT "ID",
+ name VARCHAR(20) NULL COMMENT "名称",
+ arr ARRAY<text> NULL COMMENT "数组"
+ )
+ DUPLICATE KEY(typ_id)
+ DISTRIBUTED BY HASH(typ_id) BUCKETS 10
+ PROPERTIES ("replication_allocation" =
"tag.location.default: 1");"""
+ sql """insert into test_array_string_order_by values(1,'name1',NULL),
(1,'name2',['a','b','c','d','e']), (1,'name3',['b','d','c']), (1,'name4',['a',
'a' , 'd']), (1,'name2',['a','b','c','d','e']), (1,'name2',['a', 'e' , 'd']),
(1,'name2',['a', 'b' , 'd']), (1,'name2',['a', NULL , 'd']);"""
+ qt_select1 """ select * from test_array_string_order_by order by arr ASC
LIMIT 10; """
+ qt_select2 """ select * from test_array_string_order_by order by arr DESC
LIMIT 10; """
+ qt_select3 """ select * from test_array_string_order_by order by name,arr
ASC LIMIT 10; """
+ qt_select4 """ select * from test_array_string_order_by order by name,arr
DESC LIMIT 10; """
+ qt_select5 """ select * from test_array_string_order_by order by
typ_id,arr ASC LIMIT 10; """
+ qt_select6 """ select * from test_array_string_order_by order by
typ_id,arr DESC LIMIT 10; """
+
+ qt_select7 """ select arr, count(typ_id) from test_array_string_order_by
group by arr order by arr; """
+ qt_select8 """ select arr,name, count(typ_id) from
test_array_string_order_by group by name,arr order by name,arr; """
+ qt_select9 """ select arr,name, count(typ_id) from
test_array_string_order_by group by arr,name order by name,arr; """
+ qt_select10 """ select multi_distinct_count(arr) from
test_array_string_order_by; """
+ qt_select11 """ select arr,name, sum(typ_id) over(partition by arr,name
order by arr,name rows between unbounded preceding and current row) from
test_array_string_order_by order by arr """
+ qt_select12 """ select arr,name, sum(typ_id) over(partition by name, arr
order by name,arr rows between unbounded preceding and current row) from
test_array_string_order_by order by arr """
+ qt_select13 """ select arr, sum(typ_id) over(partition by arr order by arr
rows between unbounded preceding and current row) from
test_array_string_order_by order by arr """
+
+ sql """ set force_sort_algorithm=topn; """
+ qt_select1 """ select * from test_array_string_order_by order by arr ASC
LIMIT 10; """
+ qt_select2 """ select * from test_array_string_order_by order by arr DESC
LIMIT 10; """
+ qt_select3 """ select * from test_array_string_order_by order by name,arr
ASC LIMIT 10; """
+ qt_select4 """ select * from test_array_string_order_by order by name,arr
DESC LIMIT 10; """
+ qt_select5 """ select * from test_array_string_order_by order by
typ_id,arr ASC LIMIT 10; """
+ qt_select6 """ select * from test_array_string_order_by order by
typ_id,arr DESC LIMIT 10; """
+ sql """ set force_sort_algorithm=heap; """
+ qt_select1 """ select * from test_array_string_order_by order by arr ASC
LIMIT 10; """
+ qt_select2 """ select * from test_array_string_order_by order by arr DESC
LIMIT 10; """
+ qt_select3 """ select * from test_array_string_order_by order by name,arr
ASC LIMIT 10; """
+ qt_select4 """ select * from test_array_string_order_by order by name,arr
DESC LIMIT 10; """
+ qt_select5 """ select * from test_array_string_order_by order by
typ_id,arr ASC LIMIT 10; """
+ qt_select6 """ select * from test_array_string_order_by order by
typ_id,arr DESC LIMIT 10; """
+}
diff --git
a/regression-test/suites/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.groovy
b/regression-test/suites/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.groovy
index c56215c2f9c..cc73f9ffebb 100644
---
a/regression-test/suites/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.groovy
+++
b/regression-test/suites/datatype_p0/nested_types/base_cases/one_level_nestedtypes_with_s3data.groovy
@@ -35,17 +35,6 @@ suite("one_level_nestedtypes_with_s3data") {
"c_double", "c_decimal", "c_decimalv3", "c_date",
"c_datetime", "c_datev2", "c_datetimev2",
"c_char", "c_varchar", "c_string"]
- def groupby_or_orderby_exception = {is_groupby, table_name, col_name ->
- test {
- if (is_groupby) {
- sql "select ${col_name} from ${table_name} group by
${col_name};"
- } else {
- sql "select ${col_name} from ${table_name} order by
${col_name};"
- }
- exception("errCode = 2, detailMessage = Doris hll, bitmap, array,
map, struct, jsonb, variant column must use with specific function, and don't
support filter, group by or order by")
- }
- }
-
def groupby_or_orderby_element_at = {is_groupby, table_name, agg_expr ->
if (is_groupby) {
order_qt_sql "select ${agg_expr} from ${table_name} where k1 IS
NOT NULL group by ${agg_expr};"
@@ -177,10 +166,6 @@ suite("one_level_nestedtypes_with_s3data") {
for (String col : colNameArr) {
order_qt_select_arr "select ${col}[1], ${col}[-1] from
${table_names[0]} where k1 IS NOT NULL AND ${col}[1]<${col}[-1] order by k1
limit 10;"
}
- // select * from table where groupby|orderby column will meet exception
- for (String col : colNameArr) {
- groupby_or_orderby_exception(true, table_names[0], col)
- }
// select * from table where groupby|orderby element_at(column)
for (String col : colNameArr) {
String agg_expr = "${col}[1]"
@@ -218,10 +203,6 @@ suite("one_level_nestedtypes_with_s3data") {
for (String col : colNameArr) {
order_qt_select_map "select ${col}[map_keys(${col})[1]],
${col}[map_keys(${col})[-1]] from ${table_names[1]} where
${col}[map_keys(${col})[1]]<${col}[map_keys(${col})[-1]] AND k1 IS NOT NULL
order by k1 limit 10;"
}
- // select * from table where groupby|orderby column will meet exception
- for (String col : colNameArr) {
- groupby_or_orderby_exception(true, table_names[1], col)
- }
// select * from table where groupby|orderby element_at(column)
for (String col : colNameArr) {
String agg_expr = "${col}[map_keys(${col})[1]]"
@@ -271,8 +252,6 @@ suite("one_level_nestedtypes_with_s3data") {
// select * from table where groupby|orderby column will meet exception
- groupby_or_orderby_exception(true, table_names[2], colNameArr[0])
-
// select * from table where groupby|orderby element_at(column)
String agg_expr = "struct_element(${colNameArr[0]}, 1)"
groupby_or_orderby_element_at(true, table_names[2], agg_expr)
diff --git
a/regression-test/suites/mv_p0/test_create_mv_complex_type/test_create_mv_complex_type.groovy
b/regression-test/suites/mv_p0/test_create_mv_complex_type/test_create_mv_complex_type.groovy
index 2ce17abcc0b..4c03019a263 100644
---
a/regression-test/suites/mv_p0/test_create_mv_complex_type/test_create_mv_complex_type.groovy
+++
b/regression-test/suites/mv_p0/test_create_mv_complex_type/test_create_mv_complex_type.groovy
@@ -125,15 +125,6 @@ suite ("create_mv_complex_type") {
}
assertFalse(success)
- success = false
- try {
- sql """create materialized view mv as select c_bigint, c_int, c_array,
count(c_bigint) from base_table group by c_bigint, c_int, c_array;"""
- success = true
- } catch (Exception e) {
- assertTrue(e.getMessage().contains("don't support filter, group by"),
e.getMessage())
- }
- assertFalse(success)
-
success = false
try {
sql """create materialized view mv as select c_bigint, c_int, c_map,
count(c_bigint) from base_table group by c_bigint, c_int, c_map;"""
@@ -163,15 +154,6 @@ suite ("create_mv_complex_type") {
}
assertFalse(success)
- success = false
- try {
- sql """create materialized view mv as select c_bigint, c_int, c_array
from base_table order by c_bigint, c_int, c_array;"""
- success = true
- } catch (Exception e) {
- assertTrue(e.getMessage().contains("don't support filter, group by"),
e.getMessage())
- }
- assertFalse(success)
-
success = false
try {
sql """create materialized view mv as select c_bigint, c_int, c_map
from base_table order by c_bigint, c_int, c_map;"""
diff --git a/regression-test/suites/nereids_p0/join/test_join_on.groovy
b/regression-test/suites/nereids_p0/join/test_join_on.groovy
index ebd43a544cf..d7253ebcce2 100644
--- a/regression-test/suites/nereids_p0/join/test_join_on.groovy
+++ b/regression-test/suites/nereids_p0/join/test_join_on.groovy
@@ -37,10 +37,6 @@ suite("test_join_on", "nereids_p0") {
sql """insert into join_on values (1, [1, 2], hll_hash(1),
bitmap_from_string('1, 3, 5, 7, 9, 11, 13, 99, 19910811, 20150402')); """
sql """insert into join_on values (2, [2, 3], hll_hash(2),
bitmap_from_string('2, 4, 6, 8, 10, 12, 14, 100, 19910812, 20150403')); """
qt_sql """ select * from join_on order by k1; """
- test {
- sql """ select * from join_on as j1 inner join join_on as j2 on
j1.d_array = j2.d_array; """
- exception "errCode = 2"
- }
test {
sql """ select * from join_on as j1 inner join join_on as j2 on
j1.hll_col = j2.hll_col; """
exception "errCode = 2"
diff --git
a/regression-test/suites/query_p0/aggregate/aggregate_group_by_metric_type.groovy
b/regression-test/suites/query_p0/aggregate/aggregate_group_by_metric_type.groovy
index e480663de8e..951df516147 100644
---
a/regression-test/suites/query_p0/aggregate/aggregate_group_by_metric_type.groovy
+++
b/regression-test/suites/query_p0/aggregate/aggregate_group_by_metric_type.groovy
@@ -65,24 +65,6 @@ suite("aggregate_group_by_metric_type") {
}
sql "DROP TABLE test_group_by_hll_and_bitmap"
- sql "DROP TABLE IF EXISTS test_group_by_array"
- sql """
- CREATE TABLE IF NOT EXISTS test_group_by_array (id int, c_array
array<int>) ENGINE=OLAP DUPLICATE KEY(`id`)
- DISTRIBUTED BY HASH(`id`) BUCKETS 1 properties("replication_num" =
"1");
- """
- sql "insert into test_group_by_array values(1, [1,2,3])"
-
- test {
- sql "select distinct c_array from test_group_by_array"
- exception "${error_msg}"
- }
- test {
- sql "select c_array,count(*) from test_group_by_array group by c_array"
- exception "${error_msg}"
- }
-
- sql "DROP TABLE test_group_by_array"
-
sql "DROP TABLE IF EXISTS test_group_by_struct"
sql """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]