This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 5286b901c17 [feat](nereids)Compressed materialize for aggregate and
sort (#42408)
5286b901c17 is described below
commit 5286b901c175071dc438456b2d63c91e63eacaba
Author: minghong <[email protected]>
AuthorDate: Tue Nov 12 16:33:16 2024 +0800
[feat](nereids)Compressed materialize for aggregate and sort (#42408)
## Proposed changes
if A is a short string column (len<4)
1. 'select A from T group by A' => 'select
decodeAsChar(encode_as_int(A)) from T group by encode_as_int(A)'
2. 'select * from T order by A' => 'select * from T order by
encode_as_int(A)'
for other sizes, short string will be wrapped by
encode_as_smallint/encode_as_bigint/encode_as_largeInt
session variable ENABLE_COMPRESS_MATERIALIZE is used to swith on/off
this optimization
Issue Number: close #xxx
<!--Describe your changes.-->
---
.../doris/nereids/jobs/executor/Analyzer.java | 2 +
.../org/apache/doris/nereids/rules/RuleType.java | 2 +
.../rules/analysis/CompressedMaterialize.java | 166 +++++++++++++++++
.../rewrite/PushDownFilterThroughProject.java | 57 +++++-
.../functions/scalar/EncodeAsBigInt.java | 2 +-
.../expressions/functions/scalar/EncodeAsInt.java | 2 +-
.../functions/scalar/EncodeAsLargeInt.java | 2 +-
.../functions/scalar/EncodeAsSmallInt.java | 2 +-
.../functions/scalar/EncodeStrToInteger.java | 24 +++
.../plans/physical/PhysicalHashAggregate.java | 7 +
.../java/org/apache/doris/qe/SessionVariable.java | 9 +
.../compress_materialize/compress_materialize.out | 55 ++++++
.../compress_materialize.groovy | 198 +++++++++++++++++++++
13 files changed, 520 insertions(+), 8 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
index 894d4264201..8985dadc0bd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
@@ -31,6 +31,7 @@ import org.apache.doris.nereids.rules.analysis.CheckAnalysis;
import org.apache.doris.nereids.rules.analysis.CheckPolicy;
import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint;
import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias;
+import org.apache.doris.nereids.rules.analysis.CompressedMaterialize;
import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant;
import org.apache.doris.nereids.rules.analysis.EliminateGroupByConstant;
import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint;
@@ -166,6 +167,7 @@ public class Analyzer extends AbstractBatchJobExecutor {
topDown(new EliminateGroupByConstant()),
topDown(new SimplifyAggGroupBy()),
+ topDown(new CompressedMaterialize()),
topDown(new NormalizeAggregate()),
topDown(new HavingToFilter()),
topDown(new QualifyToFilter()),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index dbf96ef2f1f..beb8bd43655 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -106,6 +106,8 @@ public enum RuleType {
CHECK_DATA_TYPES(RuleTypeClass.CHECK),
// rewrite rules
+ COMPRESSED_MATERIALIZE_AGG(RuleTypeClass.REWRITE),
+ COMPRESSED_MATERIALIZE_SORT(RuleTypeClass.REWRITE),
NORMALIZE_AGGREGATE(RuleTypeClass.REWRITE),
NORMALIZE_SORT(RuleTypeClass.REWRITE),
NORMALIZE_REPEAT(RuleTypeClass.REWRITE),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
new file mode 100644
index 00000000000..7d8a7664f82
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.analysis;
+
+import org.apache.doris.nereids.properties.OrderKey;
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsBigInt;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsInt;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsLargeInt;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsSmallInt;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
+import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.types.coercion.CharacterType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+import org.apache.doris.qe.ConnectContext;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * convert string to int in order to improve performance for aggregation and
sorting.
+ *
+ * 1. AGG
+ * select A from T group by A
+ * =>
+ * select DecodeAsVarchar(encode_as_int(A)) from T group by encode_as_int(A)
+ *
+ * 2. Sort
+ * select * from T order by A
+ * =>
+ * select * from T order by encode_as_int(A)
+ */
+public class CompressedMaterialize implements AnalysisRuleFactory {
+ @Override
+ public List<Rule> buildRules() {
+ return ImmutableList.of(
+ RuleType.COMPRESSED_MATERIALIZE_AGG.build(
+ logicalAggregate().when(a -> ConnectContext.get() !=
null
+ &&
ConnectContext.get().getSessionVariable().enableCompressMaterialize)
+ .then(this::compressedMaterializeAggregate)),
+ RuleType.COMPRESSED_MATERIALIZE_SORT.build(
+ logicalSort().when(a -> ConnectContext.get() != null
+ &&
ConnectContext.get().getSessionVariable().enableCompressMaterialize)
+ .then(this::compressMaterializeSort)
+ )
+ );
+ }
+
+ private LogicalSort<Plan> compressMaterializeSort(LogicalSort<Plan> sort) {
+ List<OrderKey> newOrderKeys = Lists.newArrayList();
+ boolean changed = false;
+ for (OrderKey orderKey : sort.getOrderKeys()) {
+ Expression expr = orderKey.getExpr();
+ Optional<Expression> encode = getEncodeExpression(expr);
+ if (encode.isPresent()) {
+ newOrderKeys.add(new OrderKey(encode.get(),
+ orderKey.isAsc(),
+ orderKey.isNullFirst()));
+ changed = true;
+ } else {
+ newOrderKeys.add(orderKey);
+ }
+ }
+ return changed ? sort.withOrderKeys(newOrderKeys) : sort;
+ }
+
+ private Optional<Expression> getEncodeExpression(Expression expression) {
+ DataType type = expression.getDataType();
+ Expression encodeExpr = null;
+ if (type instanceof CharacterType) {
+ CharacterType ct = (CharacterType) type;
+ if (ct.getLen() > 0) {
+ // skip column from variant, like 'L.var["L_SHIPMODE"] AS TEXT'
+ if (ct.getLen() < 2) {
+ encodeExpr = new EncodeAsSmallInt(expression);
+ } else if (ct.getLen() < 4) {
+ encodeExpr = new EncodeAsInt(expression);
+ } else if (ct.getLen() < 7) {
+ encodeExpr = new EncodeAsBigInt(expression);
+ } else if (ct.getLen() < 15) {
+ encodeExpr = new EncodeAsLargeInt(expression);
+ }
+ }
+ }
+ return Optional.ofNullable(encodeExpr);
+ }
+
+ /*
+ example:
+ [support] select sum(v) from t group by substring(k, 1,2)
+ [not support] select substring(k, 1,2), sum(v) from t group by
substring(k, 1,2)
+ [support] select k, sum(v) from t group by k
+ [not support] select substring(k, 1,2), sum(v) from t group by k
+ [support] select A as B from T group by A
+ */
+ private Map<Expression, Expression>
getEncodeGroupByExpressions(LogicalAggregate<Plan> aggregate) {
+ Map<Expression, Expression> encodeGroupbyExpressions =
Maps.newHashMap();
+ for (Expression gb : aggregate.getGroupByExpressions()) {
+ Optional<Expression> encodeExpr = getEncodeExpression(gb);
+ encodeExpr.ifPresent(expression ->
encodeGroupbyExpressions.put(gb, expression));
+ }
+ return encodeGroupbyExpressions;
+ }
+
+ private LogicalAggregate<Plan>
compressedMaterializeAggregate(LogicalAggregate<Plan> aggregate) {
+ Map<Expression, Expression> encodeGroupByExpressions =
getEncodeGroupByExpressions(aggregate);
+ if (!encodeGroupByExpressions.isEmpty()) {
+ List<Expression> newGroupByExpressions = Lists.newArrayList();
+ for (Expression gp : aggregate.getGroupByExpressions()) {
+
newGroupByExpressions.add(encodeGroupByExpressions.getOrDefault(gp, gp));
+ }
+ List<NamedExpression> newOutputs = Lists.newArrayList();
+ Map<Expression, Expression> decodeMap = new HashMap<>();
+ for (Expression gp : encodeGroupByExpressions.keySet()) {
+ decodeMap.put(gp, new
DecodeAsVarchar(encodeGroupByExpressions.get(gp)));
+ }
+ for (NamedExpression out : aggregate.getOutputExpressions()) {
+ Expression replaced = ExpressionUtils.replace(out, decodeMap);
+ if (out != replaced) {
+ if (out instanceof SlotReference) {
+ newOutputs.add(new Alias(out.getExprId(), replaced,
out.getName()));
+ } else if (out instanceof Alias) {
+ newOutputs.add(((Alias)
out).withChildren(replaced.children()));
+ } else {
+ // should not reach here
+ Preconditions.checkArgument(false, "output abnormal: "
+ aggregate);
+ }
+ } else {
+ newOutputs.add(out);
+ }
+ }
+ aggregate = aggregate.withGroupByAndOutput(newGroupByExpressions,
newOutputs);
+ }
+ return aggregate;
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java
index f6f7c2d1100..38b687ba838 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java
@@ -22,16 +22,21 @@ import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Slot;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar;
+import
org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeStrToInteger;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.PlanUtils;
+import org.apache.doris.qe.ConnectContext;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
@@ -80,8 +85,15 @@ public class PushDownFilterThroughProject implements
RewriteRuleFactory {
// just return unchanged plan
return null;
}
- project = (LogicalProject<? extends Plan>) project.withChildren(new
LogicalFilter<>(
- ExpressionUtils.replace(splitConjuncts.second,
project.getAliasToProducer()),
+ Set<Expression> conjuncts;
+ if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().enableCompressMaterialize) {
+ conjuncts =
ExpressionUtils.replace(eliminateDecodeAndEncode(splitConjuncts.second),
+ project.getAliasToProducer());
+ } else {
+ conjuncts = ExpressionUtils.replace(splitConjuncts.second,
+ project.getAliasToProducer());
+ }
+ project = (LogicalProject<? extends Plan>) project.withChildren(new
LogicalFilter<>(conjuncts,
project.child()));
return PlanUtils.filterOrSelf(splitConjuncts.first, project);
}
@@ -97,10 +109,17 @@ public class PushDownFilterThroughProject implements
RewriteRuleFactory {
if (splitConjuncts.second.isEmpty()) {
return null;
}
+ Set<Expression> conjuncts;
+ if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().enableCompressMaterialize) {
+ conjuncts =
ExpressionUtils.replace(eliminateDecodeAndEncode(splitConjuncts.second),
+ project.getAliasToProducer());
+ } else {
+ conjuncts = ExpressionUtils.replace(splitConjuncts.second,
+ project.getAliasToProducer());
+ }
project = project.withProjectsAndChild(project.getProjects(),
new LogicalFilter<>(
- ExpressionUtils.replace(splitConjuncts.second,
- project.getAliasToProducer()),
+ conjuncts,
limit.withChildren(project.child())));
return PlanUtils.filterOrSelf(splitConjuncts.first, project);
}
@@ -119,4 +138,34 @@ public class PushDownFilterThroughProject implements
RewriteRuleFactory {
}
return Pair.of(remainPredicates, pushDownPredicates);
}
+
+ private static Set<Expression> eliminateDecodeAndEncode(Set<Expression>
expressions) {
+ LinkedHashSet<Expression> eliminated = new LinkedHashSet<Expression>();
+ // keep expression order
+ for (Expression expression : expressions) {
+ eliminated.add(eliminateDecodeAndEncode(expression));
+ }
+ return eliminated;
+ }
+
+ private static Expression eliminateDecodeAndEncode(Expression expression) {
+ if (expression instanceof DecodeAsVarchar && expression.child(0)
instanceof EncodeStrToInteger) {
+ return expression.child(0).child(0);
+ }
+ boolean hasNewChild = false;
+ List<Expression> newChildren = Lists.newArrayList();
+ for (Expression child : expression.children()) {
+ Expression replace = eliminateDecodeAndEncode(child);
+ if (replace != child) {
+ hasNewChild = true;
+ newChildren.add(replace);
+ } else {
+ newChildren.add(child);
+ }
+ }
+ if (hasNewChild) {
+ return expression.withChildren(newChildren);
+ }
+ return expression;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java
index 59a31b4da49..7d798ecf3e8 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java
@@ -34,7 +34,7 @@ import java.util.List;
* ScalarFunction 'EncodeAsBigInt'.
*/
public class EncodeAsBigInt extends ScalarFunction
- implements ExplicitlyCastableSignature, PropagateNullable {
+ implements ExplicitlyCastableSignature, PropagateNullable,
EncodeStrToInteger {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java
index 30729354379..5c6382d6ea1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java
@@ -34,7 +34,7 @@ import java.util.List;
* ScalarFunction 'EncodeAsInt'.
*/
public class EncodeAsInt extends ScalarFunction
- implements ExplicitlyCastableSignature, PropagateNullable {
+ implements ExplicitlyCastableSignature, PropagateNullable,
EncodeStrToInteger {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(IntegerType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java
index 7cfce246257..bb30a9a8e8a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java
@@ -34,7 +34,7 @@ import java.util.List;
* ScalarFunction 'EncodeAsLargeInt'.
*/
public class EncodeAsLargeInt extends ScalarFunction
- implements ExplicitlyCastableSignature, PropagateNullable {
+ implements ExplicitlyCastableSignature, PropagateNullable,
EncodeStrToInteger {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(LargeIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java
index 0809c935a57..355a740197c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java
@@ -34,7 +34,7 @@ import java.util.List;
* ScalarFunction 'CompressAsSmallInt'.
*/
public class EncodeAsSmallInt extends ScalarFunction
- implements ExplicitlyCastableSignature, PropagateNullable {
+ implements ExplicitlyCastableSignature, PropagateNullable,
EncodeStrToInteger {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(SmallIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java
new file mode 100644
index 00000000000..87a9c43687d
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+/**
+ * Encode_as_XXXInt
+ */
+public interface EncodeStrToInteger {
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
index 404c30fe379..2a78b063a97 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
@@ -236,6 +236,13 @@ public class PhysicalHashAggregate<CHILD_TYPE extends
Plan> extends PhysicalUnar
aggregateParam, maybeUsingStream, requireProperties);
}
+ public PhysicalHashAggregate<Plan> withGroupByExpressions(List<Expression>
newGroupByExpressions) {
+ return new PhysicalHashAggregate<>(newGroupByExpressions,
outputExpressions, partitionExpressions,
+ aggregateParam, maybeUsingStream, groupExpression,
getLogicalProperties(),
+ requireProperties, physicalProperties, statistics,
+ child());
+ }
+
@Override
public PhysicalHashAggregate<Plan> withChildren(List<Plan> children) {
Preconditions.checkArgument(children.size() == 1);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 4b1049649b8..99043902819 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -2180,6 +2180,15 @@ public class SessionVariable implements Serializable,
Writable {
needForward = true, fuzzy = true)
public boolean enableSortSpill = false;
+ @VariableMgr.VarAttr(
+ name = "ENABLE_COMPRESS_MATERIALIZE",
+ description = {"控制是否启用compress materialize。",
+ "enable compress-materialize. "},
+ needForward = true, fuzzy = false,
+ varType = VariableAnnotation.EXPERIMENTAL
+ )
+ public boolean enableCompressMaterialize = false;
+
@VariableMgr.VarAttr(
name = ENABLE_AGG_SPILL,
description = {"控制是否启用聚合算子落盘。默认为 false。",
diff --git
a/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out
b/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out
new file mode 100644
index 00000000000..eee04795628
--- /dev/null
+++
b/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out
@@ -0,0 +1,55 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !agg_exec --
+aaaaa
+bbbbb
+
+-- !output_contains_gpk --
+aaaaa aaaaa
+bbbbb bbbbb
+
+-- !expr --
+aaa
+bbb
+
+-- !encodeexpr --
+12
+3
+
+-- !sort --
+\N 6
+ 7
+a 1
+aa 2
+b 4
+b 5
+bb 3
+中 8
+国 9
+
+-- !sort --
+国 9
+中 8
+bb 3
+b 4
+b 5
+aa 2
+a 1
+ 7
+\N 6
+
+-- !sort --
+国 9
+中 8
+bb 3
+b 5
+b 4
+aa 2
+a 1
+ 7
+\N 6
+
+-- !sort --
+国 9
+中 8
+bb 3
+
diff --git
a/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy
b/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy
new file mode 100644
index 00000000000..8489de2aa2a
--- /dev/null
+++
b/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("compress_materialize") {
+ sql """
+ drop table if exists compress;
+ CREATE TABLE `compress` (
+ `k` varchar(5) NOT NULL,
+ `v` int NOT NULL
+ ) ENGINE=OLAP
+ duplicate KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+
+
+ insert into compress values ("aaaaaa", 1), ("aaaaaa", 2), ("bbbbb", 3),
("bbbbb", 4), ("bbbbb", 5);
+
+
+ drop table if exists cmt2;
+ CREATE TABLE `cmt2` (
+ `k2` varchar(5) NOT NULL,
+ `v2` int NOT NULL
+ ) ENGINE=OLAP
+ duplicate KEY(`k2`)
+ DISTRIBUTED BY random
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+
+ insert into cmt2 values ("aaaa", 1), ("b", 3);
+ insert into cmt2 values("123456", 123456);
+
+ set ENABLE_COMPRESS_MATERIALIZE = true;
+ """
+
+ explain{
+ sql ("""
+ select k from compress group by k;
+ """)
+ contains("encode_as_bigint")
+ }
+ order_qt_agg_exec "select k from compress group by k;"
+
+ explain{
+ sql ("""
+ select k, substring(k, 1), sum(v) from compress group by k;
+ """)
+ contains("encode_as_bigint(k)")
+ }
+ order_qt_output_contains_gpk "select k, substring(k, 1) from compress
group by k;"
+
+ order_qt_expr """ select substring(k,1,3) from compress group by
substring(k,1,3);"""
+ explain{
+ sql "select substring(k,1,3) from compress group by substring(k,1,3);"
+ contains("encode_as_int(substring(k, 1, 3))")
+ }
+
+ explain {
+ sql("select sum(v) from compress group by substring(k, 1, 3);")
+ contains("group by: encode_as_int(substring(k, 1, 3))")
+ }
+
+ explain {
+ sql("select sum(v) from compress group by substring(k, 1, 4);")
+ contains("group by: encode_as_bigint(substring(k, 1, 4))")
+ }
+
+ order_qt_encodeexpr "select sum(v) from compress group by substring(k, 1,
3);"
+
+ // TODO: RF targets on compressed_materialze column is broken
+ // // verify that compressed materialization do not block runtime filter
generation
+ // sql """
+ // set disable_join_reorder=true;
+ // set runtime_filter_mode = GLOBAL;
+ // set runtime_filter_type=2;
+ // set enable_runtime_filter_prune=false;
+ // """
+
+ // qt_join """
+ // explain shape plan
+ // select *
+ // from (
+ // select k from compress group by k
+ // ) T join[broadcast] cmt2 on T.k = cmt2.k2;
+ // """
+
+
+ sql """
+ drop table if exists compressInt;
+ CREATE TABLE `compressInt` (
+ `k` varchar(3) NOT NULL,
+ `v` int NOT NULL
+ ) ENGINE=OLAP
+ duplicate KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+
+
+ insert into compressInt values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4),
("b", 5);
+ """
+ explain{
+ sql "select k from compressInt group by k"
+ contains("encode_as_int")
+ }
+
+ sql """
+ drop table if exists compressLargeInt;
+ CREATE TABLE `compressLargeInt` (
+ `k` varchar(10) NOT NULL,
+ `v` int NOT NULL
+ ) ENGINE=OLAP
+ duplicate KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+
+
+ insert into compressLargeInt values ("a", 1), ("aa", 2), ("bb", 3), ("b",
4), ("b", 5);
+ """
+ explain{
+ sql "select k from compressLargeInt group by k"
+ contains("group by: encode_as_largeint(k)")
+ }
+
+
+ sql """
+ drop table if exists notcompress;
+ CREATE TABLE `notcompress` (
+ `k` varchar(16) NOT NULL,
+ `v` int NOT NULL
+ ) ENGINE=OLAP
+ duplicate KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+
+
+ insert into notcompress values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4),
("b", 5);
+ """
+ explain{
+ sql "select k from notcompress group by k"
+ notContains("encode_as_")
+ }
+
+ sql """
+ drop table if exists compressSort;
+ CREATE TABLE `compressSort` (
+ `k` varchar(3) NULL,
+ `v` int NOT NULL
+ ) ENGINE=OLAP
+ duplicate KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+
+
+ insert into compressSort values ("a", 1), ("aa", 2), ("bb", 3), ("b",
4), ("b", 5);
+ insert into compressSort(v) values (6);
+ insert into compressSort values ("",7), ("中", 8), ("国", 9);
+ """
+ explain {
+ sql "select v from compressSort order by k"
+ contains("order by: encode_as_int(k)")
+// expect plan fragment:
+// 1:VSORT(140)
|
+// order by: encode_as_int(k)[#5] ASC
|
+// algorithm: full sort
|
+// offset: 0
|
+// distribute expr lists:
+ }
+ qt_sort "select * from compressSort order by k asc, v";
+ qt_sort "select * from compressSort order by k desc, v";
+ qt_sort "select * from compressSort order by k desc nulls last";
+ qt_sort "select * from compressSort order by k desc nulls last, v limit 3";
+
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]