[ https://issues.apache.org/jira/browse/DRILL-4232?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17684213#comment-17684213 ]
ASF GitHub Bot commented on DRILL-4232: --------------------------------------- vvysotskyi commented on code in PR #2599: URL: https://github.com/apache/drill/pull/2599#discussion_r1096581680 ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillAddAggForExceptRule.java: ########## @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.trace.CalciteTrace; +import org.apache.curator.shaded.com.google.common.collect.ImmutableList; +import org.apache.drill.exec.planner.physical.PrelUtil; +import org.slf4j.Logger; + +import static org.apache.drill.exec.ExecConstants.EXCEPT_ADD_AGG_BELOW; + +/** + * Rule that try to add agg for Except set op. + */ +public class DrillAddAggForExceptRule extends RelOptRule { + public static final RelOptRule INSTANCE = new DrillAddAggForExceptRule(RelOptHelper.any(DrillExceptRel.class), "DrillAddAggForExceptRule"); + protected static final Logger tracer = CalciteTrace.getPlannerTracer(); + + public DrillAddAggForExceptRule(RelOptRuleOperand operand, String description) { + super(operand, description); + } + + @Override + public boolean matches(RelOptRuleCall call) { + DrillExceptRel drillExceptRel = call.rel(0); + return !drillExceptRel.all && !drillExceptRel.isAggAdded() && !findAggRel(drillExceptRel.getInput(0)); Review Comment: I'm not sure whether this check would work properly in some cases. For example, the volcano planner will use RelSet to wrap nodes, and perhaps there are some other cases. Instead, I propose using `RelMetadataQuery.getUniqueKeys()` to ensure that input columns have unique values, and if it is so, do not add aggregate. It calls methods from `org.apache.calcite.rel.metadata.RelMdUniqueKeys` for specific node types and should handle more cases than existing checks. In this case, the `isAggAdded` field wouldn't be required. ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillAddAggForExceptRule.java: ########## @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.trace.CalciteTrace; +import org.apache.curator.shaded.com.google.common.collect.ImmutableList; +import org.apache.drill.exec.planner.physical.PrelUtil; +import org.slf4j.Logger; + +import static org.apache.drill.exec.ExecConstants.EXCEPT_ADD_AGG_BELOW; + +/** + * Rule that try to add agg for Except set op. + */ +public class DrillAddAggForExceptRule extends RelOptRule { + public static final RelOptRule INSTANCE = new DrillAddAggForExceptRule(RelOptHelper.any(DrillExceptRel.class), "DrillAddAggForExceptRule"); + protected static final Logger tracer = CalciteTrace.getPlannerTracer(); + + public DrillAddAggForExceptRule(RelOptRuleOperand operand, String description) { + super(operand, description); + } + + @Override + public boolean matches(RelOptRuleCall call) { + DrillExceptRel drillExceptRel = call.rel(0); + return !drillExceptRel.all && !drillExceptRel.isAggAdded() && !findAggRel(drillExceptRel.getInput(0)); + } + + private boolean findAggRel(RelNode relNode) { + if (relNode instanceof HepRelVertex) { + return findAggRel(((HepRelVertex) relNode).getCurrentRel()); + } + if (relNode instanceof DrillAggregateRel) { + return true; + } + if (relNode.getInputs().size() == 1 && relNode.getInput(0) != null) { + return findAggRel(relNode.getInput(0)); + } + return false; + } + + @Override + public void onMatch(RelOptRuleCall call) { + final DrillExceptRel drillExceptRel = call.rel(0); + boolean addAggBelow = PrelUtil.getPlannerSettings(call.getPlanner()).getOptions().getOption(EXCEPT_ADD_AGG_BELOW); + if (addAggBelow) { + RelNode aggNode = new DrillAggregateRel(drillExceptRel.getCluster(), drillExceptRel.getTraitSet(), drillExceptRel.getInput(0), + ImmutableBitSet.range(0, drillExceptRel.getInput(0).getRowType().getFieldList().size()), ImmutableList.of(), ImmutableList.of()); + call.transformTo(drillExceptRel.copy(ImmutableList.of(aggNode, drillExceptRel.getInput(1)), true)); + } else { + call.transformTo(new DrillAggregateRel(drillExceptRel.getCluster(), drillExceptRel.getTraitSet(), drillExceptRel.copy(true), Review Comment: Do we need to add aggregate on top of except? ########## exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillSetOpRule.java: ########## @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.logical; + +import org.apache.calcite.plan.Convention; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.rel.InvalidRelException; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.logical.LogicalIntersect; +import org.apache.calcite.rel.logical.LogicalMinus; +import org.apache.calcite.util.trace.CalciteTrace; +import org.slf4j.Logger; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Rule that converts {@link LogicalIntersect} or {@link LogicalMinus} to + * {@link DrillIntersectRel} or {@link DrillExceptRel}. + */ +public class DrillSetOpRule extends RelOptRule { + public static final List<RelOptRule> INSTANCES = Arrays.asList( + new DrillSetOpRule(RelOptHelper.any(LogicalIntersect.class, Convention.NONE), "DrillIntersectRelRule"), + new DrillSetOpRule(RelOptHelper.any(LogicalMinus.class, Convention.NONE), "DrillExceptRelRule") + ); + protected static final Logger tracer = CalciteTrace.getPlannerTracer(); + + public DrillSetOpRule(RelOptRuleOperand operand, String description) { + super(operand, description); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final SetOp setOp = call.rel(0); + final RelTraitSet traits = setOp.getTraitSet().plus(DrillRel.DRILL_LOGICAL); + final List<RelNode> convertedInputs = new ArrayList<>(); + for (RelNode input : setOp.getInputs()) { + RelNode convertedInput = convert(input, input.getTraitSet().plus(DrillRel.DRILL_LOGICAL).simplify()); + convertedInputs.add(convertedInput); + } + try { + if (setOp instanceof LogicalMinus) { Review Comment: We could create and pass a specific rel factory to the place where the rule is created and use it here instead of checking which type of node is. ########## exec/java-exec/src/test/java/org/apache/drill/TestSetOp.java: ########## @@ -0,0 +1,1183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill; + +import org.apache.drill.exec.planner.physical.PlannerSettings; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.BatchSchemaBuilder; +import org.apache.drill.exec.record.metadata.SchemaBuilder; +import org.apache.drill.shaded.guava.com.google.common.collect.Lists; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.drill.categories.OperatorTest; +import org.apache.drill.categories.SqlTest; +import org.apache.drill.categories.UnlikelyTest; +import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.test.ClusterFixture; +import org.apache.drill.test.ClusterTest; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.nio.file.Paths; +import java.util.List; + +import static org.junit.Assert.assertTrue; + +@Category({SqlTest.class, OperatorTest.class}) +public class TestSetOp extends ClusterTest { + private static final String EMPTY_DIR_NAME = "empty_directory"; + private static final String SLICE_TARGET_DEFAULT = "alter session reset `planner.slice_target`"; + + @BeforeClass + public static void setupTestFiles() throws Exception { + startCluster(ClusterFixture.builder(dirTestWatcher)); + dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "parquet")); + dirTestWatcher.makeTestTmpSubDir(Paths.get(EMPTY_DIR_NAME)); + } + + @Test + public void TestExceptionWithSchemaLessDataSource() { + boolean exceptionEncountered = true; + String root = "/multilevel/csv/1994/Q1/orders_94_q1.csv"; + try { + testBuilder() + .sqlQuery("select * from cp.`%s` intersect select * from cp.`%s`", root, root) + .unOrdered() + .baselineColumns("a", "b") + .baselineValues(1, 1) + .go(); + exceptionEncountered = false; Review Comment: instead of the flag please use the `Assert.fail("reason")` method here. ########## logical/src/main/java/org/apache/drill/common/logical/data/visitors/LogicalVisitor.java: ########## @@ -62,7 +64,10 @@ public RETURN visitRunningAggregate(RunningAggregate runningAggregate, EXTRA value) throws EXCEP; public RETURN visitTransform(Transform transform, EXTRA value) throws EXCEP; public RETURN visitUnion(Union union, EXTRA value) throws EXCEP; - public RETURN visitWindow(Window window, EXTRA value) throws EXCEP; + public RETURN visitExcept(Except except, EXTRA value) throws EXCEP; + public RETURN visitIntersect(Intersect intersect, EXTRA value) throws EXCEP; + + public RETURN visitWindow(Window window, EXTRA value) throws EXCEP; Review Comment: Please fix the indentation here. ########## exec/java-exec/src/test/java/org/apache/drill/TestSetOp.java: ########## @@ -0,0 +1,1183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill; + +import org.apache.drill.exec.planner.physical.PlannerSettings; +import org.apache.drill.exec.record.BatchSchema; +import org.apache.drill.exec.record.BatchSchemaBuilder; +import org.apache.drill.exec.record.metadata.SchemaBuilder; +import org.apache.drill.shaded.guava.com.google.common.collect.Lists; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.drill.categories.OperatorTest; +import org.apache.drill.categories.SqlTest; +import org.apache.drill.categories.UnlikelyTest; +import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.test.ClusterFixture; +import org.apache.drill.test.ClusterTest; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.nio.file.Paths; +import java.util.List; + +import static org.junit.Assert.assertTrue; + +@Category({SqlTest.class, OperatorTest.class}) +public class TestSetOp extends ClusterTest { + private static final String EMPTY_DIR_NAME = "empty_directory"; + private static final String SLICE_TARGET_DEFAULT = "alter session reset `planner.slice_target`"; + + @BeforeClass + public static void setupTestFiles() throws Exception { + startCluster(ClusterFixture.builder(dirTestWatcher)); + dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "parquet")); + dirTestWatcher.makeTestTmpSubDir(Paths.get(EMPTY_DIR_NAME)); + } + + @Test + public void TestExceptionWithSchemaLessDataSource() { + boolean exceptionEncountered = true; + String root = "/multilevel/csv/1994/Q1/orders_94_q1.csv"; + try { + testBuilder() + .sqlQuery("select * from cp.`%s` intersect select * from cp.`%s`", root, root) + .unOrdered() + .baselineColumns("a", "b") + .baselineValues(1, 1) + .go(); + exceptionEncountered = false; + } catch (Exception ex) { + assertTrue(ex.getMessage(), Review Comment: `assertThat` + `containsString` will show more informative messages in the case of failures. > Support for EXCEPT set operator > ------------------------------- > > Key: DRILL-4232 > URL: https://issues.apache.org/jira/browse/DRILL-4232 > Project: Apache Drill > Issue Type: New Feature > Components: Query Planning & Optimization > Reporter: Victoria Markman > Assignee: Tengfei Wang > Priority: Major > -- This message was sent by Atlassian Jira (v8.20.10#820010)