This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new c76763bdbed [fix](maxcompute)add mc catalog read partition table
partition prune(#44508) (#44868)
c76763bdbed is described below
commit c76763bdbed82bf62d07fd1c772d6e87258d0d7c
Author: daidai <[email protected]>
AuthorDate: Thu Dec 5 00:26:49 2024 +0800
[fix](maxcompute)add mc catalog read partition table partition
prune(#44508) (#44868)
bp #44508
---
.../org/apache/doris/datasource/ExternalTable.java | 9 +-
.../doris/datasource/hive/HMSExternalTable.java | 4 +-
.../doris/datasource/hive/source/HiveScanNode.java | 2 +-
.../maxcompute/MaxComputeExternalTable.java | 33 ++-
.../maxcompute/source/MaxComputeScanNode.java | 50 +++-
.../glue/translator/PhysicalPlanTranslator.java | 3 +-
.../rules/expression/rules/PartitionPruner.java | 2 +-
.../rules/rewrite/PruneFileScanPartition.java | 4 +-
.../apache/doris/planner/SingleNodePlanner.java | 5 +-
.../test_max_compute_partition_prune.out | 125 +++++++++
.../test_max_compute_partition_prune.groovy | 282 +++++++++++++++++++++
11 files changed, 497 insertions(+), 22 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
index 93710f0a129..bd1e36e7bc9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java
@@ -382,7 +382,7 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
* @return
*/
public SelectedPartitions initSelectedPartitions(Optional<MvccSnapshot>
snapshot) {
- if (!supportPartitionPruned()) {
+ if (!supportInternalPartitionPruned()) {
return SelectedPartitions.NOT_PRUNED;
}
if (CollectionUtils.isEmpty(this.getPartitionColumns(snapshot))) {
@@ -399,7 +399,7 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
* @param snapshot if not support mvcc, ignore this
* @return partitionName ==> PartitionItem
*/
- public Map<String, PartitionItem>
getNameToPartitionItems(Optional<MvccSnapshot> snapshot) {
+ protected Map<String, PartitionItem>
getNameToPartitionItems(Optional<MvccSnapshot> snapshot) {
return Collections.emptyMap();
}
@@ -415,11 +415,12 @@ public class ExternalTable implements TableIf, Writable,
GsonPostProcessable {
}
/**
- * Does it support partition cpruned, If so, this method needs to be
overridden in subclasses
+ * Does it support Internal partition pruned, If so, this method needs to
be overridden in subclasses
+ * Internal partition pruned : Implement partition pruning logic without
relying on external APIs.
*
* @return
*/
- public boolean supportPartitionPruned() {
+ public boolean supportInternalPartitionPruned() {
return false;
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 6d65f8bcdbc..134ad362fa1 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -301,12 +301,12 @@ public class HMSExternalTable extends ExternalTable
implements MTMVRelatedTableI
}
@Override
- public boolean supportPartitionPruned() {
+ public boolean supportInternalPartitionPruned() {
return getDlaType() == DLAType.HIVE;
}
@Override
- public Map<String, PartitionItem>
getNameToPartitionItems(Optional<MvccSnapshot> snapshot) {
+ protected Map<String, PartitionItem>
getNameToPartitionItems(Optional<MvccSnapshot> snapshot) {
return getNameToPartitionItems();
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 3a2a4d3eb5c..99d3cd1cd21 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -82,7 +82,7 @@ public class HiveScanNode extends FileQueryScanNode {
// will only be set in Nereids, for lagency planner, it should be null
@Setter
- private SelectedPartitions selectedPartitions = null;
+ protected SelectedPartitions selectedPartitions = null;
private boolean partitionInit = false;
private final AtomicReference<UserException> batchException = new
AtomicReference<>(null);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
index dc3232f79f5..0f748f59e92 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/MaxComputeExternalTable.java
@@ -21,6 +21,7 @@ import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.MapType;
+import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.StructField;
import org.apache.doris.catalog.StructType;
@@ -28,6 +29,7 @@ import org.apache.doris.catalog.Type;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.SchemaCacheValue;
import org.apache.doris.datasource.TablePartitionValues;
+import org.apache.doris.datasource.mvcc.MvccSnapshot;
import org.apache.doris.thrift.TMCTable;
import org.apache.doris.thrift.TTableDescriptor;
import org.apache.doris.thrift.TTableType;
@@ -50,6 +52,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Optional;
import java.util.stream.Collectors;
@@ -71,6 +74,15 @@ public class MaxComputeExternalTable extends ExternalTable {
}
}
+ @Override
+ public boolean supportInternalPartitionPruned() {
+ return true;
+ }
+
+ @Override
+ public List<Column> getPartitionColumns(Optional<MvccSnapshot> snapshot) {
+ return getPartitionColumns();
+ }
public List<Column> getPartitionColumns() {
makeSureInitialized();
@@ -79,7 +91,24 @@ public class MaxComputeExternalTable extends ExternalTable {
.orElse(Collections.emptyList());
}
- public TablePartitionValues getPartitionValues() {
+ @Override
+ protected Map<String, PartitionItem>
getNameToPartitionItems(Optional<MvccSnapshot> snapshot) {
+ if (getPartitionColumns().isEmpty()) {
+ return Collections.emptyMap();
+ }
+
+ TablePartitionValues tablePartitionValues = getPartitionValues();
+ Map<Long, PartitionItem> idToPartitionItem =
tablePartitionValues.getIdToPartitionItem();
+ Map<Long, String> idToNameMap =
tablePartitionValues.getPartitionIdToNameMap();
+
+ Map<String, PartitionItem> nameToPartitionItem =
Maps.newHashMapWithExpectedSize(idToPartitionItem.size());
+ for (Entry<Long, PartitionItem> entry : idToPartitionItem.entrySet()) {
+ nameToPartitionItem.put(idToNameMap.get(entry.getKey()),
entry.getValue());
+ }
+ return nameToPartitionItem;
+ }
+
+ private TablePartitionValues getPartitionValues() {
makeSureInitialized();
Optional<SchemaCacheValue> schemaCacheValue = getSchemaCacheValue();
if (!schemaCacheValue.isPresent()) {
@@ -110,6 +139,8 @@ public class MaxComputeExternalTable extends ExternalTable {
/**
* parse all values from partitionPath to a single list.
+ * In MaxCompute : Support special characters : _$#.!@
+ * Ref : MaxCompute Error Code: ODPS-0130071 Invalid partition value.
*
* @param partitionColumns partitionColumns can contain the
part1,part2,part3...
* @param partitionPath partitionPath format is like the
'part1=123/part2=abc/part3=1bc'
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java
index e0b84b0860e..e177e9d8b7c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java
@@ -40,6 +40,7 @@ import
org.apache.doris.datasource.maxcompute.MaxComputeExternalCatalog;
import org.apache.doris.datasource.maxcompute.MaxComputeExternalTable;
import org.apache.doris.datasource.maxcompute.source.MaxComputeSplit.SplitType;
import org.apache.doris.datasource.property.constants.MCProperties;
+import
org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
import org.apache.doris.nereids.util.DateUtils;
import org.apache.doris.planner.PlanNodeId;
import org.apache.doris.spi.Split;
@@ -50,6 +51,7 @@ import org.apache.doris.thrift.TMaxComputeFileDesc;
import org.apache.doris.thrift.TTableFormatFileDesc;
import com.aliyun.odps.OdpsType;
+import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.table.TableIdentifier;
import com.aliyun.odps.table.configuration.ArrowOptions;
import com.aliyun.odps.table.configuration.ArrowOptions.TimestampUnit;
@@ -60,6 +62,7 @@ import com.aliyun.odps.table.read.split.InputSplitAssigner;
import com.aliyun.odps.table.read.split.impl.IndexedInputSplit;
import com.google.common.collect.Maps;
import jline.internal.Log;
+import lombok.Setter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -86,14 +89,28 @@ public class MaxComputeScanNode extends FileQueryScanNode {
private static final LocationPath ROW_OFFSET_PATH = new
LocationPath("/row_offset", Maps.newHashMap());
private static final LocationPath BYTE_SIZE_PATH = new
LocationPath("/byte_size", Maps.newHashMap());
+ @Setter
+ private SelectedPartitions selectedPartitions = null;
+
+ // For new planner
+ public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc,
+ SelectedPartitions selectedPartitions, boolean
needCheckColumnPriv) {
+ this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE,
+ selectedPartitions, needCheckColumnPriv);
+ }
+
+ // For old planner
public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, boolean
needCheckColumnPriv) {
- this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE,
needCheckColumnPriv);
+ this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE,
+ SelectedPartitions.NOT_PRUNED, needCheckColumnPriv);
}
- public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, String
planNodeName,
- StatisticalType statisticalType, boolean
needCheckColumnPriv) {
+ private MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, String
planNodeName,
+ StatisticalType statisticalType, SelectedPartitions
selectedPartitions,
+ boolean needCheckColumnPriv) {
super(id, desc, planNodeName, statisticalType, needCheckColumnPriv);
table = (MaxComputeExternalTable) desc.getTable();
+ this.selectedPartitions = selectedPartitions;
}
@Override
@@ -117,10 +134,27 @@ public class MaxComputeScanNode extends FileQueryScanNode
{
rangeDesc.setSize(maxComputeSplit.getLength());
}
- void createTableBatchReadSession() throws UserException {
+ // Return false if no need to read any partition data.
+ // Return true if need to read partition data.
+ boolean createTableBatchReadSession() throws UserException {
List<String> requiredPartitionColumns = new ArrayList<>();
List<String> orderedRequiredDataColumns = new ArrayList<>();
+ List<PartitionSpec> requiredPartitionSpecs = new ArrayList<>();
+ //if requiredPartitionSpecs is empty, get all partition data.
+ if (!table.getPartitionColumns().isEmpty() && selectedPartitions !=
SelectedPartitions.NOT_PRUNED) {
+ this.totalPartitionNum = selectedPartitions.totalPartitionNum;
+ this.selectedPartitionNum =
selectedPartitions.selectedPartitions.size();
+
+ if (selectedPartitions.selectedPartitions.isEmpty()) {
+ //no need read any partition data.
+ return false;
+ }
+ selectedPartitions.selectedPartitions.forEach(
+ (key, value) -> requiredPartitionSpecs.add(new
PartitionSpec(key))
+ );
+ }
+
Set<String> requiredSlots =
desc.getSlots().stream().map(e ->
e.getColumn().getName()).collect(Collectors.toSet());
@@ -150,6 +184,7 @@ public class MaxComputeScanNode extends FileQueryScanNode {
.withSettings(mcCatalog.getSettings())
.withSplitOptions(mcCatalog.getSplitOption())
.requiredPartitionColumns(requiredPartitionColumns)
+ .requiredPartitions(requiredPartitionSpecs)
.requiredDataColumns(orderedRequiredDataColumns)
.withArrowOptions(
ArrowOptions.newBuilder()
@@ -162,7 +197,7 @@ public class MaxComputeScanNode extends FileQueryScanNode {
} catch (java.io.IOException e) {
throw new RuntimeException(e);
}
-
+ return true;
}
@Override
@@ -430,7 +465,10 @@ public class MaxComputeScanNode extends FileQueryScanNode {
if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) {
return result;
}
- createTableBatchReadSession();
+
+ if (!createTableBatchReadSession()) {
+ return result;
+ }
try {
String scanSessionSerialize =
serializeSession(tableBatchReadSession);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 3a31381854c..a53337c7d86 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -582,7 +582,8 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
} else if (table instanceof TrinoConnectorExternalTable) {
scanNode = new TrinoConnectorScanNode(context.nextPlanNodeId(),
tupleDescriptor, false);
} else if (table instanceof MaxComputeExternalTable) {
- scanNode = new MaxComputeScanNode(context.nextPlanNodeId(),
tupleDescriptor, false);
+ scanNode = new MaxComputeScanNode(context.nextPlanNodeId(),
tupleDescriptor,
+ fileScan.getSelectedPartitions(), false);
} else if (table instanceof LakeSoulExternalTable) {
scanNode = new LakeSoulScanNode(context.nextPlanNodeId(),
tupleDescriptor, false);
} else {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java
index fac1a7f82d2..ed783aa3d5a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java
@@ -55,7 +55,7 @@ public class PartitionPruner extends
DefaultExpressionRewriter<Void> {
/** Different type of table may have different partition prune behavior. */
public enum PartitionTableType {
OLAP,
- HIVE
+ EXTERNAL
}
private PartitionPruner(List<OnePartitionEvaluator> partitions, Expression
partitionPredicate) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
index 4bbb0a8aa76..ba8b270d1f3 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneFileScanPartition.java
@@ -59,7 +59,7 @@ public class PruneFileScanPartition extends
OneRewriteRuleFactory {
ExternalTable tbl = scan.getTable();
SelectedPartitions selectedPartitions;
- if (tbl.supportPartitionPruned()) {
+ if (tbl.supportInternalPartitionPruned()) {
selectedPartitions = pruneExternalPartitions(tbl,
filter, scan, ctx.cascadesContext);
} else {
// set isPruned so that it won't go pass the partition
prune again
@@ -91,7 +91,7 @@ public class PruneFileScanPartition extends
OneRewriteRuleFactory {
Map<String, PartitionItem> nameToPartitionItem =
scan.getSelectedPartitions().selectedPartitions;
List<String> prunedPartitions = new ArrayList<>(PartitionPruner.prune(
- partitionSlots, filter.getPredicate(), nameToPartitionItem,
ctx, PartitionTableType.HIVE));
+ partitionSlots, filter.getPredicate(), nameToPartitionItem,
ctx, PartitionTableType.EXTERNAL));
for (String name : prunedPartitions) {
selectedPartitionItems.put(name, nameToPartitionItem.get(name));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index d94ad0a2552..4091640066c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -76,7 +76,6 @@ import
org.apache.doris.datasource.paimon.source.PaimonScanNode;
import
org.apache.doris.datasource.trinoconnector.source.TrinoConnectorScanNode;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.rewrite.mvrewrite.MVSelectFailedException;
-import org.apache.doris.statistics.StatisticalType;
import org.apache.doris.thrift.TPushAggOp;
import com.google.common.base.Preconditions;
@@ -1993,9 +1992,7 @@ public class SingleNodePlanner {
scanNode = new TrinoConnectorScanNode(ctx.getNextNodeId(),
tblRef.getDesc(), true);
break;
case MAX_COMPUTE_EXTERNAL_TABLE:
- // TODO: support max compute scan node
- scanNode = new MaxComputeScanNode(ctx.getNextNodeId(),
tblRef.getDesc(), "MCScanNode",
- StatisticalType.MAX_COMPUTE_SCAN_NODE, true);
+ scanNode = new MaxComputeScanNode(ctx.getNextNodeId(),
tblRef.getDesc(), true);
break;
case ES_EXTERNAL_TABLE:
scanNode = new EsScanNode(ctx.getNextNodeId(),
tblRef.getDesc(), true);
diff --git
a/regression-test/data/external_table_p2/maxcompute/test_max_compute_partition_prune.out
b/regression-test/data/external_table_p2/maxcompute/test_max_compute_partition_prune.out
new file mode 100644
index 00000000000..8f443829c4c
--- /dev/null
+++
b/regression-test/data/external_table_p2/maxcompute/test_max_compute_partition_prune.out
@@ -0,0 +1,125 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !one_partition_1_1 --
+1 Alice 2024
+2 Bob 2024
+3 Charlie 2024
+
+-- !one_partition_2_1 --
+4 David 2025
+5 Eva 2025
+
+-- !one_partition_3_all --
+1 Alice 2024
+2 Bob 2024
+3 Charlie 2024
+4 David 2025
+5 Eva 2025
+
+-- !one_partition_4_all --
+5 Eva 2025
+
+-- !one_partition_5_1 --
+3 Charlie 2024
+
+-- !two_partition_1_1 --
+1 Alice US 1
+2 Bob US 1
+3 Charlie US 1
+
+-- !two_partition_2_1 --
+8 Hannah EU 2
+9 Ivy EU 2
+10 Jack EU 2
+
+-- !two_partition_3_2 --
+1 Alice US 1
+2 Bob US 1
+3 Charlie US 1
+4 David US 2
+5 Eva US 2
+
+-- !two_partition_4_all --
+1 Alice US 1
+2 Bob US 1
+3 Charlie US 1
+4 David US 2
+5 Eva US 2
+6 Frank EU 1
+7 Grace EU 1
+8 Hannah EU 2
+9 Ivy EU 2
+10 Jack EU 2
+
+-- !two_partition_5_1 --
+
+-- !two_partition_6_1 --
+8 Hannah EU 2
+9 Ivy EU 2
+10 Jack EU 2
+
+-- !three_partition_1_1 --
+1 Alice US 2024 Q1
+2 Bob US 2024 Q1
+3 Charlie US 2024 Q1
+
+-- !three_partition_2_1 --
+10 Jack EU 2025 Q2
+11 Leo EU 2025 Q2
+
+-- !three_partition_3_3 --
+13 Nina AS 2025 Q1
+14 Oscar AS 2025 Q2
+15 Paul AS 2025 Q3
+
+-- !three_partition_4_2 --
+1 Alice US 2024 Q1
+2 Bob US 2024 Q1
+3 Charlie US 2024 Q1
+6 Frank US 2025 Q1
+
+-- !three_partition_5_all --
+1 Alice US 2024 Q1
+2 Bob US 2024 Q1
+3 Charlie US 2024 Q1
+4 David US 2024 Q2
+5 Eva US 2024 Q2
+6 Frank US 2025 Q1
+7 Grace US 2025 Q2
+8 Hannah EU 2024 Q1
+9 Ivy EU 2024 Q1
+10 Jack EU 2025 Q2
+11 Leo EU 2025 Q2
+12 Mia EU 2025 Q3
+13 Nina AS 2025 Q1
+14 Oscar AS 2025 Q2
+15 Paul AS 2025 Q3
+
+-- !three_partition_6_1 --
+8 Hannah EU 2024 Q1
+9 Ivy EU 2024 Q1
+
+-- !three_partition_7_7 --
+6 Frank US 2025 Q1
+7 Grace US 2025 Q2
+10 Jack EU 2025 Q2
+11 Leo EU 2025 Q2
+12 Mia EU 2025 Q3
+13 Nina AS 2025 Q1
+14 Oscar AS 2025 Q2
+15 Paul AS 2025 Q3
+
+-- !three_partition_8_2 --
+7 Grace US 2025 Q2
+
+-- !one_partition_6_0 --
+
+-- !two_partition_7_0 --
+
+-- !two_partition_8_0 --
+
+-- !three_partition_9_0 --
+
+-- !three_partition_10_0 --
+
+-- !three_partition_11_0 --
+
diff --git
a/regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy
b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy
new file mode 100644
index 00000000000..e34569117a1
--- /dev/null
+++
b/regression-test/suites/external_table_p2/maxcompute/test_max_compute_partition_prune.groovy
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+
+/*
+CREATE TABLE one_partition_tb (
+ id INT,
+ name string
+)
+PARTITIONED BY (part1 INT);
+INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (1, 'Alice');
+INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (2, 'Bob');
+INSERT INTO one_partition_tb PARTITION (part1=2024) VALUES (3, 'Charlie');
+INSERT INTO one_partition_tb PARTITION (part1=2025) VALUES (4, 'David');
+INSERT INTO one_partition_tb PARTITION (part1=2025) VALUES (5, 'Eva');
+CREATE TABLE two_partition_tb (
+ id INT,
+ name string
+)
+PARTITIONED BY (part1 STRING, part2 int);
+INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (1,
'Alice');
+INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (2, 'Bob');
+INSERT INTO two_partition_tb PARTITION (part1='US', part2=1) VALUES (3,
'Charlie');
+INSERT INTO two_partition_tb PARTITION (part1='US', part2=2) VALUES (4,
'David');
+INSERT INTO two_partition_tb PARTITION (part1='US', part2=2) VALUES (5, 'Eva');
+INSERT INTO two_partition_tb PARTITION (part1='EU', part2=1) VALUES (6,
'Frank');
+INSERT INTO two_partition_tb PARTITION (part1='EU', part2=1) VALUES (7,
'Grace');
+INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (8,
'Hannah');
+INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (9, 'Ivy');
+INSERT INTO two_partition_tb PARTITION (part1='EU', part2=2) VALUES (10,
'Jack');
+CREATE TABLE three_partition_tb (
+ id INT,
+ name string
+)
+PARTITIONED BY (part1 STRING, part2 INT, part3 STRING);
+INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1')
VALUES (1, 'Alice');
+INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1')
VALUES (2, 'Bob');
+INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q1')
VALUES (3, 'Charlie');
+INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q2')
VALUES (4, 'David');
+INSERT INTO three_partition_tb PARTITION (part1='US', part2=2024, part3='Q2')
VALUES (5, 'Eva');
+INSERT INTO three_partition_tb PARTITION (part1='US', part2=2025, part3='Q1')
VALUES (6, 'Frank');
+INSERT INTO three_partition_tb PARTITION (part1='US', part2=2025, part3='Q2')
VALUES (7, 'Grace');
+INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2024, part3='Q1')
VALUES (8, 'Hannah');
+INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2024, part3='Q1')
VALUES (9, 'Ivy');
+INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q2')
VALUES (10, 'Jack');
+INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q2')
VALUES (11, 'Leo');
+INSERT INTO three_partition_tb PARTITION (part1='EU', part2=2025, part3='Q3')
VALUES (12, 'Mia');
+INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q1')
VALUES (13, 'Nina');
+INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q2')
VALUES (14, 'Oscar');
+INSERT INTO three_partition_tb PARTITION (part1='AS', part2=2025, part3='Q3')
VALUES (15, 'Paul');
+select * from one_partition_tb;
+select * from two_partition_tb;
+select * from three_partition_tb;
+show partitions one_partition_tb;
+show partitions two_partition_tb;
+show partitions three_partition_tb;
+*/
+
+suite("test_max_compute_partition_prune",
"p2,external,maxcompute,external_remote,external_remote_maxcompute") {
+
+
+ def one_partition_1_1 = """SELECT * FROM one_partition_tb WHERE part1 =
2024 ORDER BY id;"""
+ def one_partition_2_1 = """SELECT * FROM one_partition_tb WHERE part1 =
2025 ORDER BY id;"""
+ def one_partition_3_all = """SELECT * FROM one_partition_tb ORDER BY id;"""
+ def one_partition_4_all = """SELECT * FROM one_partition_tb WHERE id = 5
ORDER BY id;"""
+ def one_partition_5_1 = """SELECT * FROM one_partition_tb WHERE part1 =
2024 AND id >= 3 ORDER BY id;"""
+
+ def two_partition_1_1 = """SELECT * FROM two_partition_tb WHERE part1 =
'US' AND part2 = 1 ORDER BY id;"""
+ def two_partition_2_1 = """SELECT * FROM two_partition_tb WHERE part1 =
'EU' AND part2 = 2 ORDER BY id;"""
+ def two_partition_3_2 = """SELECT * FROM two_partition_tb WHERE part1 =
'US' ORDER BY id;"""
+ def two_partition_4_all = """SELECT * FROM two_partition_tb ORDER BY id;"""
+ def two_partition_5_1 = """SELECT * FROM two_partition_tb WHERE part1 =
'US' AND part2 = 2 AND id > 5 ORDER BY id;"""
+ def two_partition_6_1 = """SELECT * FROM two_partition_tb WHERE part1 =
'EU' AND part2 = 2 ORDER BY id;"""
+
+ def three_partition_1_1 = """SELECT * FROM three_partition_tb WHERE part1
= 'US' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;"""
+ def three_partition_2_1 = """SELECT * FROM three_partition_tb WHERE part1
= 'EU' AND part2 = 2025 AND part3 = 'Q2' ORDER BY id;"""
+ def three_partition_3_3 = """SELECT * FROM three_partition_tb WHERE part1
= 'AS' AND part2 = 2025 ORDER BY id;"""
+ def three_partition_4_2 = """SELECT * FROM three_partition_tb WHERE part1
= 'US' AND part3 = 'Q1' ORDER BY id;"""
+ def three_partition_5_all = """SELECT * FROM three_partition_tb ORDER BY
id;"""
+ def three_partition_6_1 = """SELECT * FROM three_partition_tb WHERE part1
= 'EU' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;"""
+ def three_partition_7_7 = """SELECT * FROM three_partition_tb WHERE part2
= 2025 ORDER BY id;"""
+ def three_partition_8_2 = """SELECT * FROM three_partition_tb WHERE part1
= 'US' AND part3 = 'Q2' AND id BETWEEN 6 AND 10 ORDER BY id;"""
+
+
+ String enabled = context.config.otherConfigs.get("enableMaxComputeTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String ak = context.config.otherConfigs.get("ak")
+ String sk = context.config.otherConfigs.get("sk");
+ String mc_db = "mc_datalake"
+ String mc_catalog_name = "test_max_compute_partition_prune"
+
+ sql """drop catalog if exists ${mc_catalog_name};"""
+ sql """
+ create catalog if not exists ${mc_catalog_name} properties (
+ "type" = "max_compute",
+ "mc.default.project" = "${mc_db}",
+ "mc.access_key" = "${ak}",
+ "mc.secret_key" = "${sk}",
+ "mc.endpoint" =
"http://service.cn-beijing-vpc.maxcompute.aliyun-inc.com/api"
+ );
+ """
+ sql """ switch ${mc_catalog_name} """
+ sql """ use ${mc_db}"""
+
+ qt_one_partition_1_1 one_partition_1_1
+ explain {
+ sql("${one_partition_1_1}")
+ contains "partition=1/2"
+ }
+
+ qt_one_partition_2_1 one_partition_2_1
+ explain {
+ sql("${one_partition_2_1}")
+ contains "partition=1/2"
+ }
+
+ qt_one_partition_3_all one_partition_3_all
+ explain {
+ sql("${one_partition_3_all}")
+ contains "partition=2/2"
+ }
+
+ qt_one_partition_4_all one_partition_4_all
+ explain {
+ sql("${one_partition_4_all}")
+ contains "partition=2/2"
+ }
+
+ qt_one_partition_5_1 one_partition_5_1
+ explain {
+ sql("${one_partition_5_1}")
+ contains "partition=1/2"
+ }
+
+
+ qt_two_partition_1_1 two_partition_1_1
+ explain {
+ sql("${two_partition_1_1}")
+ contains "partition=1/4"
+ }
+
+ qt_two_partition_2_1 two_partition_2_1
+ explain {
+ sql("${two_partition_2_1}")
+ contains "partition=1/4"
+ }
+
+ qt_two_partition_3_2 two_partition_3_2
+ explain {
+ sql("${two_partition_3_2}")
+ contains "partition=2/4"
+ }
+
+ qt_two_partition_4_all two_partition_4_all
+ explain {
+ sql("${two_partition_4_all}")
+ contains "partition=4/4"
+ }
+
+ qt_two_partition_5_1 two_partition_5_1
+ explain {
+ sql("${two_partition_5_1}")
+ contains "partition=1/4"
+ }
+
+ qt_two_partition_6_1 two_partition_6_1
+ explain {
+ sql("${two_partition_6_1}")
+ contains "partition=1/4"
+ }
+
+
+
+ qt_three_partition_1_1 three_partition_1_1
+ explain {
+ sql("${three_partition_1_1}")
+ contains "partition=1/10"
+ }
+
+ qt_three_partition_2_1 three_partition_2_1
+ explain {
+ sql("${three_partition_2_1}")
+ contains "partition=1/10"
+ }
+
+ qt_three_partition_3_3 three_partition_3_3
+ explain {
+ sql("${three_partition_3_3}")
+ contains "partition=3/10"
+ }
+
+ qt_three_partition_4_2 three_partition_4_2
+ explain {
+ sql("${three_partition_4_2}")
+ contains "partition=2/10"
+ }
+
+ qt_three_partition_5_all three_partition_5_all
+ explain {
+ sql("${three_partition_5_all}")
+ contains "partition=10/10"
+ }
+
+ qt_three_partition_6_1 three_partition_6_1
+ explain {
+ sql("${three_partition_6_1}")
+ contains "partition=1/10"
+ }
+
+ qt_three_partition_7_7 three_partition_7_7
+ explain {
+ sql("${three_partition_7_7}")
+ contains "partition=7/10"
+ }
+
+ qt_three_partition_8_2 three_partition_8_2
+ explain {
+ sql("${three_partition_8_2}")
+ contains "partition=2/10"
+ }
+
+
+ // 0 partitions
+ def one_partition_6_0 = """SELECT * FROM one_partition_tb WHERE part1
= 2023 ORDER BY id;"""
+ qt_one_partition_6_0 one_partition_6_0
+ explain {
+ sql("${one_partition_6_0}")
+ contains "partition=0/2"
+ }
+
+ def two_partition_7_0 = """SELECT * FROM two_partition_tb WHERE part1
= 'CN' AND part2 = 1 ORDER BY id;"""
+ qt_two_partition_7_0 two_partition_7_0
+ explain {
+ sql("${two_partition_7_0}")
+ contains "partition=0/4"
+ }
+
+ def two_partition_8_0 = """SELECT * FROM two_partition_tb WHERE part1
= 'US' AND part2 = 3 ORDER BY id;"""
+ qt_two_partition_8_0 two_partition_8_0
+ explain {
+ sql("${two_partition_8_0}")
+ contains "partition=0/4"
+ }
+
+ def three_partition_9_0 = """SELECT * FROM three_partition_tb WHERE
part1 = 'US' AND part2 = 2023 AND part3 = 'Q1' ORDER BY id;"""
+ qt_three_partition_9_0 three_partition_9_0
+ explain {
+ sql("${three_partition_9_0}")
+ contains "partition=0/10"
+ }
+
+ def three_partition_10_0 = """SELECT * FROM three_partition_tb WHERE
part1 = 'EU' AND part2 = 2024 AND part3 = 'Q4' ORDER BY id;"""
+ qt_three_partition_10_0 three_partition_10_0
+ explain {
+ sql("${three_partition_10_0}")
+ contains "partition=0/10"
+ }
+
+ def three_partition_11_0 = """SELECT * FROM three_partition_tb WHERE
part1 = 'AS' AND part2 = 2025 AND part3 = 'Q4' ORDER BY id;"""
+ qt_three_partition_11_0 three_partition_11_0
+ explain {
+ sql("${three_partition_11_0}")
+ contains "partition=0/10"
+ }
+
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]