http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index ff9f758..da2091a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1732,7 +1732,104 @@ spark.only.query.negative.files=spark_job_max_tasks.q,\ spark_submit_negative_executor_memory.q spark.perf.disabled.query.files=query14.q,\ - query64.q + query64.q,\ + cbo_query1.q,\ + cbo_query10.q,\ + cbo_query11.q,\ + cbo_query12.q,\ + cbo_query13.q,\ + cbo_query14.q,\ + cbo_query15.q,\ + cbo_query16.q,\ + cbo_query17.q,\ + cbo_query18.q,\ + cbo_query19.q,\ + cbo_query2.q,\ + cbo_query20.q,\ + cbo_query21.q,\ + cbo_query22.q,\ + cbo_query23.q,\ + cbo_query24.q,\ + cbo_query25.q,\ + cbo_query26.q,\ + cbo_query27.q,\ + cbo_query28.q,\ + cbo_query29.q,\ + cbo_query3.q,\ + cbo_query30.q,\ + cbo_query31.q,\ + cbo_query32.q,\ + cbo_query33.q,\ + cbo_query34.q,\ + cbo_query35.q,\ + cbo_query36.q,\ + cbo_query37.q,\ + cbo_query38.q,\ + cbo_query39.q,\ + cbo_query4.q,\ + cbo_query40.q,\ + cbo_query42.q,\ + cbo_query43.q,\ + cbo_query44.q,\ + cbo_query45.q,\ + cbo_query46.q,\ + cbo_query47.q,\ + cbo_query48.q,\ + cbo_query49.q,\ + cbo_query5.q,\ + cbo_query50.q,\ + cbo_query51.q,\ + cbo_query52.q,\ + cbo_query53.q,\ + cbo_query54.q,\ + cbo_query55.q,\ + cbo_query56.q,\ + cbo_query57.q,\ + cbo_query58.q,\ + cbo_query59.q,\ + cbo_query6.q,\ + cbo_query60.q,\ + cbo_query61.q,\ + cbo_query63.q,\ + cbo_query64.q,\ + cbo_query65.q,\ + cbo_query66.q,\ + cbo_query67.q,\ + cbo_query68.q,\ + cbo_query69.q,\ + cbo_query7.q,\ + cbo_query70.q,\ + cbo_query71.q,\ + cbo_query72.q,\ + cbo_query73.q,\ + cbo_query74.q,\ + cbo_query75.q,\ + cbo_query76.q,\ + cbo_query77.q,\ + cbo_query78.q,\ + cbo_query79.q,\ + cbo_query8.q,\ + cbo_query80.q,\ + cbo_query81.q,\ + cbo_query82.q,\ + cbo_query83.q,\ + cbo_query84.q,\ + cbo_query85.q,\ + cbo_query86.q,\ + cbo_query87.q,\ + cbo_query88.q,\ + cbo_query89.q,\ + cbo_query9.q,\ + cbo_query90.q,\ + cbo_query91.q,\ + cbo_query92.q,\ + cbo_query93.q,\ + cbo_query94.q,\ + cbo_query95.q,\ + cbo_query96.q,\ + cbo_query97.q,\ + cbo_query98.q,\ + cbo_query99.q druid.query.files=druidmini_test1.q,\ druidmini_test_ts.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 5e1e88e..afff0df 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -280,7 +280,7 @@ public class CliConfigs { } public static class TezPerfCliConfig extends AbstractCliConfig { - public TezPerfCliConfig() { + public TezPerfCliConfig(boolean useConstraints) { super(CorePerfCliDriver.class); try { setQueryDir("ql/src/test/queries/clientpositive/perf"); @@ -290,10 +290,21 @@ public class CliConfigs { excludesFrom(testConfigProps, "encrypted.query.files"); excludesFrom(testConfigProps, "erasurecoding.only.query.files"); - setResultsDir("ql/src/test/results/clientpositive/perf/tez"); + excludeQuery("cbo_query44.q"); // TODO: Enable when we move to Calcite 1.18 + excludeQuery("cbo_query45.q"); // TODO: Enable when we move to Calcite 1.18 + excludeQuery("cbo_query67.q"); // TODO: Enable when we move to Calcite 1.18 + excludeQuery("cbo_query70.q"); // TODO: Enable when we move to Calcite 1.18 + excludeQuery("cbo_query86.q"); // TODO: Enable when we move to Calcite 1.18 + setLogDir("itests/qtest/target/qfile-results/clientpositive/tez"); - setInitScript("q_perf_test_init.sql"); + if (useConstraints) { + setInitScript("q_perf_test_init_constraints.sql"); + setResultsDir("ql/src/test/results/clientpositive/perf/tez/constraints"); + } else { + setInitScript("q_perf_test_init.sql"); + setResultsDir("ql/src/test/results/clientpositive/perf/tez"); + } setCleanupScript("q_perf_test_cleanup.sql"); setHiveConfDir("data/conf/perf-reg/tez"); http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/Context.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java index b4d5806..aabc34d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -105,6 +105,7 @@ public class Context { protected String cboInfo; protected boolean cboSucceeded; protected String optimizedSql; + protected String calcitePlan; protected String cmd = ""; private TokenRewriteStream tokenRewriteStream; // Holds the qualified name to tokenRewriteStream for the views @@ -1021,6 +1022,14 @@ public class Context { this.cboSucceeded = cboSucceeded; } + public String getCalcitePlan() { + return this.calcitePlan; + } + + public void setCalcitePlan(String calcitePlan) { + this.calcitePlan = calcitePlan; + } + public Table getMaterializedTable(String cteName) { return cteTables.get(cteName); } http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 46bf088..4cc5fa8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -140,6 +140,16 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { return outJSONObject; } + public String outputCboPlan(String cboPlan, PrintStream out, boolean jsonOutput) + throws JSONException { + if (out != null) { + out.println("CBO PLAN:"); + out.println(cboPlan); + } + + return jsonOutput ? cboPlan : null; + } + public JSONObject getJSONLogicalPlan(PrintStream out, ExplainWork work) throws Exception { isLogical = true; @@ -385,7 +395,11 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { OutputStream outS = resFile.getFileSystem(conf).create(resFile); out = new PrintStream(outS); - if (work.isLogical()) { + if (work.isCbo()) { + if (work.getCboPlan() != null) { + outputCboPlan(work.getCboPlan(), out, work.isFormatted()); + } + } else if (work.isLogical()) { JSONObject jsonLogicalPlan = getJSONLogicalPlan(out, work); if (work.isFormatted()) { out.print(jsonLogicalPlan); http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java index 92fcfec..8b10823 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java @@ -268,7 +268,8 @@ public class ATSHook implements ExecuteWithHookContext { null,// analyzer config, //explainConfig null, // cboInfo - plan.getOptimizedQueryString() // optimizedSQL + plan.getOptimizedQueryString(), // optimizedSQL + null ); @SuppressWarnings("unchecked") ExplainTask explain = (ExplainTask) TaskFactory.get(work); http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 0af30d4..5a613b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -495,7 +495,8 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { null, null, // analyzer config, // explainConfig plan.getCboInfo(), // cboInfo, - plan.getOptimizedQueryString() + plan.getOptimizedQueryString(), + null ); ExplainTask explain = (ExplainTask) TaskFactory.get(work, conf); explain.initialize(hookContext.getQueryState(), plan, null, null); http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index dc0a84b..9aa3012 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -18,20 +18,28 @@ package org.apache.hadoop.hive.ql.optimizer.calcite; import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashSet; import java.util.List; import com.google.common.collect.ImmutableList; +import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelReferentialConstraint; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Aggregate.Group; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.Sort; @@ -45,12 +53,15 @@ import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexTableInputRef; +import org.apache.calcite.rex.RexTableInputRef.RelTableRef; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -524,4 +535,231 @@ public class HiveRelOptUtil extends RelOptUtil { // It passed all the tests return false; } + + public static Pair<Boolean, List<RexNode>> isRewritablePKFKJoin(RelBuilder builder, Join join, + boolean leftInputPotentialFK, RelMetadataQuery mq) { + final JoinRelType joinType = join.getJoinType(); + final RexNode cond = join.getCondition(); + final RelNode fkInput = leftInputPotentialFK ? join.getLeft() : join.getRight(); + final RelNode nonFkInput = leftInputPotentialFK ? join.getRight() : join.getLeft(); + final Pair<Boolean, List<RexNode>> nonRewritable = Pair.of(false, null); + + if (joinType != JoinRelType.INNER) { + // If it is not an inner, we transform it as the metadata + // providers for expressions do not pull information through + // outer join (as it would not be correct) + join = (Join) builder + .push(join.getLeft()).push(join.getRight()) + .join(JoinRelType.INNER, cond) + .build(); + } + + // 1) Check whether there is any filtering condition on the + // non-FK side. Basically we check whether the operators + // below altered the PK cardinality in any way + if (HiveRelOptUtil.isRowFilteringPlan(mq, nonFkInput)) { + return nonRewritable; + } + + // 2) Check whether there is an FK relationship + final Map<RexTableInputRef, RexNode> refToRex = new HashMap<>(); + final EquivalenceClasses ec = new EquivalenceClasses(); + for (RexNode conj : RelOptUtil.conjunctions(cond)) { + if (!conj.isA(SqlKind.EQUALS)) { + // Not an equality, we bail out + return nonRewritable; + } + RexCall equiCond = (RexCall) conj; + RexNode eqOp1 = equiCond.getOperands().get(0); + Set<RexNode> eqOp1ExprsLineage = mq.getExpressionLineage(join, eqOp1); + if (eqOp1ExprsLineage == null) { + // Cannot be mapped, bail out + return nonRewritable; + } + RexNode eqOp2 = equiCond.getOperands().get(1); + Set<RexNode> eqOp2ExprsLineage = mq.getExpressionLineage(join, eqOp2); + if (eqOp2ExprsLineage == null) { + // Cannot be mapped, bail out + return nonRewritable; + } + List<RexTableInputRef> eqOp2ExprsFiltered = null; + for (RexNode eqOpExprLineage1 : eqOp1ExprsLineage) { + RexTableInputRef inputRef1 = extractTableInputRef(eqOpExprLineage1); + if (inputRef1 == null) { + // Bail out as this condition could not be map into an input reference + return nonRewritable; + } + refToRex.put(inputRef1, eqOp1); + if (eqOp2ExprsFiltered == null) { + // First iteration + eqOp2ExprsFiltered = new ArrayList<>(); + for (RexNode eqOpExprLineage2 : eqOp2ExprsLineage) { + RexTableInputRef inputRef2 = extractTableInputRef(eqOpExprLineage2); + if (inputRef2 == null) { + // Bail out as this condition could not be map into an input reference + return nonRewritable; + } + // Add to list of expressions for follow-up iterations + eqOp2ExprsFiltered.add(inputRef2); + // Add to equivalence classes and backwards mapping + ec.addEquivalenceClass(inputRef1, inputRef2); + refToRex.put(inputRef2, eqOp2); + } + } else { + // Rest of iterations, only adding, no checking + for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { + ec.addEquivalenceClass(inputRef1, inputRef2); + } + } + } + } + if (ec.getEquivalenceClassesMap().isEmpty()) { + // This may be a cartesian product, we bail out + return nonRewritable; + } + + // 3) Gather all tables from the FK side and the table from the + // non-FK side + final Set<RelTableRef> leftTables = mq.getTableReferences(join.getLeft()); + final Set<RelTableRef> rightTables = + Sets.difference(mq.getTableReferences(join), mq.getTableReferences(join.getLeft())); + final Set<RelTableRef> fkTables = join.getLeft() == fkInput ? leftTables : rightTables; + final Set<RelTableRef> nonFkTables = join.getLeft() == fkInput ? rightTables : leftTables; + assert nonFkTables.size() == 1; + final RelTableRef nonFkTable = nonFkTables.iterator().next(); + final List<String> nonFkTableQName = nonFkTable.getQualifiedName(); + + // 4) For each table, check whether there is a matching on the non-FK side. + // If there is and it is the only condition, we are ready to transform + boolean canBeRewritten = false; + List<RexNode> nullableNodes = null; + for (RelTableRef tRef : fkTables) { + List<RelReferentialConstraint> constraints = tRef.getTable().getReferentialConstraints(); + for (RelReferentialConstraint constraint : constraints) { + if (constraint.getTargetQualifiedName().equals(nonFkTableQName)) { + nullableNodes = new ArrayList<>(); + EquivalenceClasses ecT = EquivalenceClasses.copy(ec); + boolean allContained = true; + for (int pos = 0; pos < constraint.getNumColumns(); pos++) { + int foreignKeyPos = constraint.getColumnPairs().get(pos).source; + RelDataType foreignKeyColumnType = + tRef.getTable().getRowType().getFieldList().get(foreignKeyPos).getType(); + RexTableInputRef foreignKeyColumnRef = + RexTableInputRef.of(tRef, foreignKeyPos, foreignKeyColumnType); + int uniqueKeyPos = constraint.getColumnPairs().get(pos).target; + RexTableInputRef uniqueKeyColumnRef = RexTableInputRef.of(nonFkTable, uniqueKeyPos, + nonFkTable.getTable().getRowType().getFieldList().get(uniqueKeyPos).getType()); + if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) && + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) { + if (foreignKeyColumnType.isNullable()) { + if (joinType == JoinRelType.INNER) { + // If it is nullable and it is an INNER, we just need a IS NOT NULL filter + RexNode originalCondOp = refToRex.get(foreignKeyColumnRef); + assert originalCondOp != null; + nullableNodes.add(originalCondOp); + } else { + // If it is nullable and this is not an INNER, we cannot execute any transformation + allContained = false; + break; + } + } + // Remove this condition from eq classes as we have checked that it is present + // in the join condition + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).remove(foreignKeyColumnRef); + if (ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).size() == 1) { // self + ecT.getEquivalenceClassesMap().remove(uniqueKeyColumnRef); + } + ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).remove(uniqueKeyColumnRef); + if (ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).size() == 1) { // self + ecT.getEquivalenceClassesMap().remove(foreignKeyColumnRef); + } + } else { + // No relationship, we cannot do anything + allContained = false; + break; + } + } + if (allContained && ecT.getEquivalenceClassesMap().isEmpty()) { + // We made it + canBeRewritten = true; + break; + } + } + } + } + + return Pair.of(canBeRewritten, nullableNodes); + } + + private static RexTableInputRef extractTableInputRef(RexNode node) { + RexTableInputRef ref = null; + if (node instanceof RexTableInputRef) { + ref = (RexTableInputRef) node; + } else if (RexUtil.isLosslessCast(node) && + ((RexCall) node).getOperands().get(0) instanceof RexTableInputRef) { + ref = (RexTableInputRef) ((RexCall) node).getOperands().get(0); + } + return ref; + } + + /** + * Class representing an equivalence class, i.e., a set of equivalent columns + * + * TODO: This is a subset of a private class in materialized view rewriting + * in Calcite. It should be moved to its own class in Calcite so it can be + * accessible here. + */ + private static class EquivalenceClasses { + + private final Map<RexTableInputRef, Set<RexTableInputRef>> nodeToEquivalenceClass; + + protected EquivalenceClasses() { + nodeToEquivalenceClass = new HashMap<>(); + } + + protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { + Set<RexTableInputRef> c1 = nodeToEquivalenceClass.get(p1); + Set<RexTableInputRef> c2 = nodeToEquivalenceClass.get(p2); + if (c1 != null && c2 != null) { + // Both present, we need to merge + if (c1.size() < c2.size()) { + // We swap them to merge + Set<RexTableInputRef> c2Temp = c2; + c2 = c1; + c1 = c2Temp; + } + for (RexTableInputRef newRef : c2) { + c1.add(newRef); + nodeToEquivalenceClass.put(newRef, c1); + } + } else if (c1 != null) { + // p1 present, we need to merge into it + c1.add(p2); + nodeToEquivalenceClass.put(p2, c1); + } else if (c2 != null) { + // p2 present, we need to merge into it + c2.add(p1); + nodeToEquivalenceClass.put(p1, c2); + } else { + // None are present, add to same equivalence class + Set<RexTableInputRef> equivalenceClass = new LinkedHashSet<>(); + equivalenceClass.add(p1); + equivalenceClass.add(p2); + nodeToEquivalenceClass.put(p1, equivalenceClass); + nodeToEquivalenceClass.put(p2, equivalenceClass); + } + } + + protected Map<RexTableInputRef, Set<RexTableInputRef>> getEquivalenceClassesMap() { + return nodeToEquivalenceClass; + } + + protected static EquivalenceClasses copy(EquivalenceClasses ec) { + final EquivalenceClasses newEc = new EquivalenceClasses(); + for (Entry<RexTableInputRef, Set<RexTableInputRef>> e : ec.nodeToEquivalenceClass.entrySet()) { + newEc.nodeToEquivalenceClass.put(e.getKey(), Sets.newLinkedHashSet(e.getValue())); + } + return newEc; + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java index 0a307f2..534a5c9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java @@ -53,6 +53,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; import org.apache.calcite.util.mapping.Mapping; import org.apache.calcite.util.mapping.MappingType; import org.apache.calcite.util.mapping.Mappings; @@ -105,7 +106,6 @@ public class HiveJoinConstraintsRule extends RelOptRule { // If it is a left outer, left will be the FK side. // If it is a right outer, right will be the FK side. final RelNode fkInput; - final RelNode nonFkInput; final ImmutableBitSet topRefs = RelOptUtil.InputFinder.bits(topProjExprs, null); final ImmutableBitSet leftBits = @@ -204,17 +204,14 @@ public class HiveJoinConstraintsRule extends RelOptRule { return; } fkInput = leftInputPotentialFK ? leftInput : rightInput; - nonFkInput = leftInputPotentialFK ? rightInput : leftInput; mode = Mode.REMOVE; break; case LEFT: fkInput = leftInput; - nonFkInput = rightInput; mode = leftInputPotentialFK && !rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM; break; case RIGHT: fkInput = rightInput; - nonFkInput = leftInput; mode = !leftInputPotentialFK && rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM; break; default: @@ -222,151 +219,13 @@ public class HiveJoinConstraintsRule extends RelOptRule { return; } - // 2) Check whether there is any filtering condition on the - // non-FK side. Basically we check whether the operators - // below altered the PK cardinality in any way - final RelMetadataQuery mq = call.getMetadataQuery(); - if (HiveRelOptUtil.isRowFilteringPlan(mq, nonFkInput)) { - return; - } - - // 3) Check whether there is an FK relationship - if (join.getJoinType() != JoinRelType.INNER) { - // If it is not an inner, we transform it as the metadata - // providers for expressions do not pull information through - // outer join (as it would not be correct) - join = (Join) call.builder() - .push(leftInput).push(rightInput) - .join(JoinRelType.INNER, cond) - .build(); - } - final Map<RexTableInputRef, RexNode> refToRex = new HashMap<>(); - final EquivalenceClasses ec = new EquivalenceClasses(); - for (RexNode conj : RelOptUtil.conjunctions(cond)) { - if (!conj.isA(SqlKind.EQUALS)) { - // Not an equality, we bail out - return; - } - RexCall equiCond = (RexCall) conj; - RexNode eqOp1 = equiCond.getOperands().get(0); - Set<RexNode> eqOp1ExprsLineage = mq.getExpressionLineage(join, eqOp1); - if (eqOp1ExprsLineage == null) { - // Cannot be mapped, bail out - return; - } - RexNode eqOp2 = equiCond.getOperands().get(1); - Set<RexNode> eqOp2ExprsLineage = mq.getExpressionLineage(join, eqOp2); - if (eqOp2ExprsLineage == null) { - // Cannot be mapped, bail out - return; - } - List<RexTableInputRef> eqOp2ExprsFiltered = null; - for (RexNode eqOpExprLineage1 : eqOp1ExprsLineage) { - RexTableInputRef inputRef1 = extractTableInputRef(eqOpExprLineage1); - if (inputRef1 == null) { - // Bail out as this condition could not be map into an input reference - return; - } - refToRex.put(inputRef1, eqOp1); - if (eqOp2ExprsFiltered == null) { - // First iteration - eqOp2ExprsFiltered = new ArrayList<>(); - for (RexNode eqOpExprLineage2 : eqOp2ExprsLineage) { - RexTableInputRef inputRef2 = extractTableInputRef(eqOpExprLineage2); - if (inputRef2 == null) { - // Bail out as this condition could not be map into an input reference - return; - } - // Add to list of expressions for follow-up iterations - eqOp2ExprsFiltered.add(inputRef2); - // Add to equivalence classes and backwards mapping - ec.addEquivalenceClass(inputRef1, inputRef2); - refToRex.put(inputRef2, eqOp2); - } - } else { - // Rest of iterations, only adding, no checking - for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { - ec.addEquivalenceClass(inputRef1, inputRef2); - } - } - } - } - if (ec.getEquivalenceClassesMap().isEmpty()) { - // This may be a cartesian product, we bail out - return; - } - - // 4) Gather all tables from the FK side and the table from the - // non-FK side - final Set<RelTableRef> leftTables = mq.getTableReferences(leftInput); - final Set<RelTableRef> rightTables = - Sets.difference(mq.getTableReferences(join), mq.getTableReferences(leftInput)); - final Set<RelTableRef> fkTables = leftInputPotentialFK ? leftTables : rightTables; - final Set<RelTableRef> nonFkTables = leftInputPotentialFK ? rightTables : leftTables; - assert nonFkTables.size() == 1; - final RelTableRef nonFkTable = nonFkTables.iterator().next(); - final List<String> nonFkTableQName = nonFkTable.getQualifiedName(); + // 2) Check whether this join can be rewritten or removed + Pair<Boolean, List<RexNode>> r = HiveRelOptUtil.isRewritablePKFKJoin(call.builder(), + join, leftInput == fkInput, call.getMetadataQuery()); - // 5) For each table, check whether there is a matching on the non-FK side. - // If there is and it is the only condition, we are ready to transform - boolean canBeRewritten = false; - List<RexNode> nullableNodes = new ArrayList<>(); - for (RelTableRef tRef : fkTables) { - List<RelReferentialConstraint> constraints = tRef.getTable().getReferentialConstraints(); - for (RelReferentialConstraint constraint : constraints) { - if (constraint.getTargetQualifiedName().equals(nonFkTableQName)) { - EquivalenceClasses ecT = EquivalenceClasses.copy(ec); - boolean allContained = true; - for (int pos = 0; pos < constraint.getNumColumns(); pos++) { - int foreignKeyPos = constraint.getColumnPairs().get(pos).source; - RelDataType foreignKeyColumnType = - tRef.getTable().getRowType().getFieldList().get(foreignKeyPos).getType(); - RexTableInputRef foreignKeyColumnRef = - RexTableInputRef.of(tRef, foreignKeyPos, foreignKeyColumnType); - if (foreignKeyColumnType.isNullable()) { - if (joinType == JoinRelType.INNER) { - // If it is nullable and it is an INNER, we just need a IS NOT NULL filter - RexNode originalCondOp = refToRex.get(foreignKeyColumnRef); - assert originalCondOp != null; - nullableNodes.add(originalCondOp); - } else { - // If it is nullable and this is not an INNER, we cannot execute any transformation - allContained = false; - break; - } - } - int uniqueKeyPos = constraint.getColumnPairs().get(pos).target; - RexTableInputRef uniqueKeyColumnRef = RexTableInputRef.of(nonFkTable, uniqueKeyPos, - nonFkTable.getTable().getRowType().getFieldList().get(uniqueKeyPos).getType()); - if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) && - ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) { - // Remove this condition from eq classes as we have checked that it is present - // in the join condition - ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).remove(foreignKeyColumnRef); - if (ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).size() == 1) { // self - ecT.getEquivalenceClassesMap().remove(uniqueKeyColumnRef); - } - ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).remove(uniqueKeyColumnRef); - if (ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).size() == 1) { // self - ecT.getEquivalenceClassesMap().remove(foreignKeyColumnRef); - } - } else { - // No relationship, we cannot do anything - allContained = false; - break; - } - } - if (allContained && ecT.getEquivalenceClassesMap().isEmpty()) { - // We made it - canBeRewritten = true; - break; - } - } - } - } - - // 6) If it is the only condition, we can trigger the rewriting - if (canBeRewritten) { + // 3) If it is the only condition, we can trigger the rewriting + if (r.left) { + List<RexNode> nullableNodes = r.right; // If we reach here, we trigger the transform if (mode == Mode.REMOVE) { if (rightInputPotentialFK) { @@ -410,84 +269,13 @@ public class HiveJoinConstraintsRule extends RelOptRule { call.transformTo(call.builder() .push(leftInput).push(rightInput) .join(JoinRelType.INNER, join.getCondition()) + .convert(call.rel(1).getRowType(), false) // Preserve nullability .project(project.getChildExps()) .build()); } } } - private static RexTableInputRef extractTableInputRef(RexNode node) { - RexTableInputRef ref = null; - if (node instanceof RexTableInputRef) { - ref = (RexTableInputRef) node; - } else if (RexUtil.isLosslessCast(node) && - ((RexCall) node).getOperands().get(0) instanceof RexTableInputRef) { - ref = (RexTableInputRef) ((RexCall) node).getOperands().get(0); - } - return ref; - } - - /** - * Class representing an equivalence class, i.e., a set of equivalent columns - * - * TODO: This is a subset of a private class in materialized view rewriting - * in Calcite. It should be moved to its own class in Calcite so it can be - * accessible here. - */ - private static class EquivalenceClasses { - - private final Map<RexTableInputRef, Set<RexTableInputRef>> nodeToEquivalenceClass; - - protected EquivalenceClasses() { - nodeToEquivalenceClass = new HashMap<>(); - } - - protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { - Set<RexTableInputRef> c1 = nodeToEquivalenceClass.get(p1); - Set<RexTableInputRef> c2 = nodeToEquivalenceClass.get(p2); - if (c1 != null && c2 != null) { - // Both present, we need to merge - if (c1.size() < c2.size()) { - // We swap them to merge - Set<RexTableInputRef> c2Temp = c2; - c2 = c1; - c1 = c2Temp; - } - for (RexTableInputRef newRef : c2) { - c1.add(newRef); - nodeToEquivalenceClass.put(newRef, c1); - } - } else if (c1 != null) { - // p1 present, we need to merge into it - c1.add(p2); - nodeToEquivalenceClass.put(p2, c1); - } else if (c2 != null) { - // p2 present, we need to merge into it - c2.add(p1); - nodeToEquivalenceClass.put(p1, c2); - } else { - // None are present, add to same equivalence class - Set<RexTableInputRef> equivalenceClass = new LinkedHashSet<>(); - equivalenceClass.add(p1); - equivalenceClass.add(p2); - nodeToEquivalenceClass.put(p1, equivalenceClass); - nodeToEquivalenceClass.put(p2, equivalenceClass); - } - } - - protected Map<RexTableInputRef, Set<RexTableInputRef>> getEquivalenceClassesMap() { - return nodeToEquivalenceClass; - } - - protected static EquivalenceClasses copy(EquivalenceClasses ec) { - final EquivalenceClasses newEc = new EquivalenceClasses(); - for (Entry<RexTableInputRef, Set<RexTableInputRef>> e : ec.nodeToEquivalenceClass.entrySet()) { - newEc.nodeToEquivalenceClass.put(e.getKey(), Sets.newLinkedHashSet(e.getValue())); - } - return newEc; - } - } - private enum Mode { // Removes join operator from the plan REMOVE, http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 1085845..82e975a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -528,7 +528,15 @@ public class CalcitePlanner extends SemanticAnalyzer { this.ctx.setCboSucceeded(true); if (this.ctx.isExplainPlan()) { ExplainConfiguration explainConfig = this.ctx.getExplainConfig(); - if (explainConfig.isExtended() || explainConfig.isFormatted()) { + if (explainConfig.isCbo()) { + if (explainConfig.isCboExtended()) { + // Include join cost + this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan, SqlExplainLevel.ALL_ATTRIBUTES)); + } else { + // Do not include join cost + this.ctx.setCalcitePlan(RelOptUtil.toString(newPlan)); + } + } else if (explainConfig.isExtended() || explainConfig.isFormatted()) { this.ctx.setOptimizedSql(getOptimizedSql(newPlan)); } } http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index a92502e..28a7b43 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -41,6 +41,8 @@ public class ExplainConfiguration { private boolean extended = false; private boolean formatted = false; private boolean dependency = false; + private boolean cbo = false; + private boolean cboExtended = false; private boolean logical = false; private boolean authorize = false; private boolean userLevelExplain = false; @@ -84,6 +86,22 @@ public class ExplainConfiguration { this.dependency = dependency; } + public boolean isCbo() { + return cbo; + } + + public void setCbo(boolean cbo) { + this.cbo = cbo; + } + + public boolean isCboExtended() { + return cboExtended; + } + + public void setCboExtended(boolean cboExtended) { + this.cboExtended = cboExtended; + } + public boolean isLogical() { return logical; } http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 49b6146..6721a37 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -75,6 +75,14 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { config.setExtended(true); } else if (explainOptions == HiveParser.KW_DEPENDENCY) { config.setDependency(true); + } else if (explainOptions == HiveParser.KW_CBO) { + config.setCbo(true); + if (i + 1 < childCount) { + if (ast.getChild(i + 1).getType() == HiveParser.KW_EXTENDED) { + config.setCboExtended(true); + i++; + } + } } else if (explainOptions == HiveParser.KW_LOGICAL) { config.setLogical(true); } else if (explainOptions == HiveParser.KW_AUTHORIZATION) { @@ -191,6 +199,7 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { config.setUserLevelExplain(!config.isExtended() && !config.isFormatted() && !config.isDependency() + && !config.isCbo() && !config.isLogical() && !config.isAuthorize() && ( @@ -216,7 +225,8 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { sem, config, ctx.getCboInfo(), - ctx.getOptimizedSql()); + ctx.getOptimizedSql(), + ctx.getCalcitePlan()); work.setAppendTaskType( HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES)); http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 8bf9cc0..253633c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -193,6 +193,7 @@ KW_DEBUG: 'DEBUG'; KW_FORMATTED: 'FORMATTED'; KW_DEPENDENCY: 'DEPENDENCY'; KW_LOGICAL: 'LOGICAL'; +KW_CBO: 'CBO'; KW_SERDE: 'SERDE'; KW_WITH: 'WITH'; KW_DEFERRED: 'DEFERRED'; http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index bc95c46..7dda8b3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -805,6 +805,7 @@ explainOption : KW_EXTENDED | KW_FORMATTED | KW_DEPENDENCY + | KW_CBO KW_EXTENDED? | KW_LOGICAL | KW_AUTHORIZATION | KW_ANALYZE http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index fa033d7..417955c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -794,7 +794,7 @@ principalIdentifier nonReserved : KW_ABORT | KW_ADD | KW_ADMIN | KW_AFTER | KW_ANALYZE | KW_ARCHIVE | KW_ASC | KW_BEFORE | KW_BUCKET | KW_BUCKETS - | KW_CASCADE | KW_CHANGE | KW_CHECK | KW_CLUSTER | KW_CLUSTERED | KW_CLUSTERSTATUS | KW_COLLECTION | KW_COLUMNS + | KW_CASCADE | KW_CBO | KW_CHANGE | KW_CHECK | KW_CLUSTER | KW_CLUSTERED | KW_CLUSTERSTATUS | KW_COLLECTION | KW_COLUMNS | KW_COMMENT | KW_COMPACT | KW_COMPACTIONS | KW_COMPUTE | KW_CONCATENATE | KW_CONTINUE | KW_DATA | KW_DAY | KW_DATABASES | KW_DATETIME | KW_DBPROPERTIES | KW_DEFERRED | KW_DEFINED | KW_DELIMITED | KW_DEPENDENCY | KW_DESC | KW_DIRECTORIES | KW_DIRECTORY | KW_DISABLE | KW_DISTRIBUTE | KW_DOW | KW_ELEM_TYPE http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 01da4d5..8a60d59 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -54,6 +54,7 @@ public class ExplainWork implements Serializable { boolean appendTaskType; String cboInfo; + String cboPlan; private String optimizedSQL; @@ -70,7 +71,8 @@ public class ExplainWork implements Serializable { BaseSemanticAnalyzer analyzer, ExplainConfiguration config, String cboInfo, - String optimizedSQL) { + String optimizedSQL, + String cboPlan) { this.resFile = resFile; this.rootTasks = new ArrayList<Task<?>>(rootTasks); this.fetchTask = fetchTask; @@ -87,6 +89,7 @@ public class ExplainWork implements Serializable { this.pCtx = pCtx; this.cboInfo = cboInfo; this.optimizedSQL = optimizedSQL; + this.cboPlan = cboPlan; this.config = config; } @@ -177,6 +180,10 @@ public class ExplainWork implements Serializable { this.pCtx = pCtx; } + public boolean isCbo() { + return config.isCbo(); + } + public boolean isLogical() { return config.isLogical(); } @@ -217,6 +224,14 @@ public class ExplainWork implements Serializable { this.optimizedSQL = optimizedSQL; } + public String getCboPlan() { + return cboPlan; + } + + public void setCboPlan(String cboPlan) { + this.cboPlan = cboPlan; + } + public ExplainConfiguration getConfig() { return config; } http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java index 932f4e8..f449c6b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java @@ -300,7 +300,7 @@ public class TestUpdateDeleteSemanticAnalyzer { ExplainConfiguration config = new ExplainConfiguration(); config.setExtended(true); ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(), - sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString()); + sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString(), null); ExplainTask task = new ExplainTask(); task.setWork(work); task.initialize(queryState, plan, null, null); http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query1.q b/ql/src/test/queries/clientpositive/perf/cbo_query1.q new file mode 100644 index 0000000..7cb0cd2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query1.q @@ -0,0 +1,27 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query1.tpl and seed 2031708268 +explain cbo +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_FEE) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'NM' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id +limit 100; + +-- end query 1 in stream 0 using template query1.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query10.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query10.q b/ql/src/test/queries/clientpositive/perf/cbo_query10.q new file mode 100644 index 0000000..fbdc9db --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query10.q @@ -0,0 +1,61 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query10.tpl and seed 797269820 +explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 ANd 4+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 4 and 4+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100; + +-- end query 1 in stream 0 using template query10.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query11.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query11.q b/ql/src/test/queries/clientpositive/perf/cbo_query11.q new file mode 100644 index 0000000..09d9529 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query11.q @@ -0,0 +1,77 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query11.tpl and seed 1819994127 +explain cbo +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select t_s_secyear.c_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by t_s_secyear.c_preferred_cust_flag +limit 100; + +-- end query 1 in stream 0 using template query11.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query12.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query12.q b/ql/src/test/queries/clientpositive/perf/cbo_query12.q new file mode 100644 index 0000000..41029cf --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query12.q @@ -0,0 +1,35 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query12.tpl and seed 345591136 +explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ws_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100; + +-- end query 1 in stream 0 using template query12.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query13.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query13.q b/ql/src/test/queries/clientpositive/perf/cbo_query13.q new file mode 100644 index 0000000..72eb08d --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query13.q @@ -0,0 +1,54 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query13.tpl and seed 622697896 +explain cbo +select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = '4 yr Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'D' + and cd_education_status = 'Primary' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'U' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('KY', 'GA', 'NM') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('MT', 'OR', 'IN') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('WI', 'MO', 'WV') + and ss_net_profit between 50 and 250 + )) +; + +-- end query 1 in stream 0 using template query13.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query14.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query14.q b/ql/src/test/queries/clientpositive/perf/cbo_query14.q new file mode 100644 index 0000000..eaee914 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query14.q @@ -0,0 +1,104 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query14.tpl and seed 1819994127 +explain cbo +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 2001 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100; http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query15.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query15.q b/ql/src/test/queries/clientpositive/perf/cbo_query15.q new file mode 100644 index 0000000..3beea4e --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query15.q @@ -0,0 +1,22 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query15.tpl and seed 1819994127 +explain cbo +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100; + +-- end query 1 in stream 0 using template query15.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query16.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query16.q b/ql/src/test/queries/clientpositive/perf/cbo_query16.q new file mode 100644 index 0000000..74245aa --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query16.q @@ -0,0 +1,33 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query16.tpl and seed 171719422 +explain cbo +select + count(distinct cs_order_number) as `order count` + ,sum(cs_ext_ship_cost) as `total shipping cost` + ,sum(cs_net_profit) as `total net profit` +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between '2001-4-01' and + (cast('2001-4-01' as date) + 60 days) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'NY' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish', + 'Daviess County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) +limit 100; + +-- end query 1 in stream 0 using template query16.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query17.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query17.q b/ql/src/test/queries/clientpositive/perf/cbo_query17.q new file mode 100644 index 0000000..5bf9864 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query17.q @@ -0,0 +1,47 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query17.tpl and seed 1819994127 +explain cbo +select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as_store_returns_quantitycount + ,avg(sr_return_quantity) as_store_returns_quantityave + ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2000Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state +limit 100; + +-- end query 1 in stream 0 using template query17.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query18.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query18.q b/ql/src/test/queries/clientpositive/perf/cbo_query18.q new file mode 100644 index 0000000..110bee5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query18.q @@ -0,0 +1,36 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query18.tpl and seed 1978355063 +explain cbo +select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as numeric(12,2))) agg1, + avg( cast(cs_list_price as numeric(12,2))) agg2, + avg( cast(cs_coupon_amt as numeric(12,2))) agg3, + avg( cast(cs_sales_price as numeric(12,2))) agg4, + avg( cast(cs_net_profit as numeric(12,2))) agg5, + avg( cast(c_birth_year as numeric(12,2))) agg6, + avg( cast(cd1.cd_dep_count as numeric(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'M' and + cd1.cd_education_status = 'College' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (9,5,12,4,1,10) and + d_year = 2001 and + ca_state in ('ND','WI','AL' + ,'NC','OK','MS','TN') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100; + +-- end query 1 in stream 0 using template query18.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query19.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query19.q b/ql/src/test/queries/clientpositive/perf/cbo_query19.q new file mode 100644 index 0000000..abcec36 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query19.q @@ -0,0 +1,27 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query19.tpl and seed 1930872976 +explain cbo +select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=7 + and d_moy=11 + and d_year=1999 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact +limit 100 ; + +-- end query 1 in stream 0 using template query19.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query2.q b/ql/src/test/queries/clientpositive/perf/cbo_query2.q new file mode 100644 index 0000000..9fcccbf --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query2.q @@ -0,0 +1,62 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query2.tpl and seed 1819994127 +explain cbo +with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales) x + union all + (select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales)), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1; + +-- end query 1 in stream 0 using template query2.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query20.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query20.q b/ql/src/test/queries/clientpositive/perf/cbo_query20.q new file mode 100644 index 0000000..1d361b8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query20.q @@ -0,0 +1,31 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query20.tpl and seed 345591136 +explain cbo +select i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and cs_sold_date_sk = d_date_sk + and d_date between cast('2001-01-12' as date) + and (cast('2001-01-12' as date) + 30 days) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio +limit 100; + +-- end query 1 in stream 0 using template query20.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query21.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query21.q b/ql/src/test/queries/clientpositive/perf/cbo_query21.q new file mode 100644 index 0000000..90daea0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query21.q @@ -0,0 +1,32 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query21.tpl and seed 1819994127 +explain cbo +select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('1998-04-08' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between (cast ('1998-04-08' as date) - 30 days) + and (cast ('1998-04-08' as date) + 30 days) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100; + +-- end query 1 in stream 0 using template query21.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query22.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query22.q b/ql/src/test/queries/clientpositive/perf/cbo_query22.q new file mode 100644 index 0000000..14ceec4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query22.q @@ -0,0 +1,24 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query22.tpl and seed 1819994127 +explain cbo +select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + ,warehouse + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and d_month_seq between 1212 and 1212 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category +limit 100; + +-- end query 1 in stream 0 using template query22.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query23.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query23.q b/ql/src/test/queries/clientpositive/perf/cbo_query23.q new file mode 100644 index 0000000..a1c661b --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query23.q @@ -0,0 +1,52 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query23.tpl and seed 2031708268 +explain cbo +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (1999,1999+1,1999+2,1999+3) + group by c_customer_sk) x), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from ((select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + union all + (select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 1999 + and d_moy = 1 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y + limit 100; http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query24.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query24.q b/ql/src/test/queries/clientpositive/perf/cbo_query24.q new file mode 100644 index 0000000..02bcbaf --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query24.q @@ -0,0 +1,51 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query24.tpl and seed 1220860970 +explain cbo +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_sales_price) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_birth_country = upper(ca_country) + and s_zip = ca_zip +and s_market_id=7 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'orchid' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +; http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query25.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query25.q b/ql/src/test/queries/clientpositive/perf/cbo_query25.q new file mode 100644 index 0000000..9611e28 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query25.q @@ -0,0 +1,50 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query25.tpl and seed 1819994127 +explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2000 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2000 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2000 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100; + +-- end query 1 in stream 0 using template query25.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query26.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query26.q b/ql/src/test/queries/clientpositive/perf/cbo_query26.q new file mode 100644 index 0000000..8b874c8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query26.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query26.tpl and seed 1930872976 +explain cbo +select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'F' and + cd_marital_status = 'W' and + cd_education_status = 'Primary' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 1998 + group by i_item_id + order by i_item_id + limit 100; + +-- end query 1 in stream 0 using template query26.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query27.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query27.q b/ql/src/test/queries/clientpositive/perf/cbo_query27.q new file mode 100644 index 0000000..48eaad5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query27.q @@ -0,0 +1,25 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query27.tpl and seed 2017787633 +explain cbo +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100; + +-- end query 1 in stream 0 using template query27.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query28.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query28.q b/ql/src/test/queries/clientpositive/perf/cbo_query28.q new file mode 100644 index 0000000..ad9dacd --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query28.q @@ -0,0 +1,59 @@ +set hive.mapred.mode=nonstrict; +set hive.optimize.metadataonly=true; + +-- start query 1 in stream 0 using template query28.tpl and seed 444293455 +explain cbo +select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 14180 and 14180+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 2513 and 2513+1000 + or ss_wholesale_cost between 42 and 42+20)) B6 +limit 100; + +-- end query 1 in stream 0 using template query28.tpl + +set hive.optimize.metadataonly=false; http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query29.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query29.q b/ql/src/test/queries/clientpositive/perf/cbo_query29.q new file mode 100644 index 0000000..ea9ec12 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query29.q @@ -0,0 +1,49 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query29.tpl and seed 2031708268 +explain cbo +select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_quantity) as store_sales_quantity + ,sum(sr_return_quantity) as store_returns_quantity + ,sum(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 4 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100; + +-- end query 1 in stream 0 using template query29.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query3.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query3.q b/ql/src/test/queries/clientpositive/perf/cbo_query3.q new file mode 100644 index 0000000..b1bc55b --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query3.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query3.tpl and seed 2031708268 +explain cbo +select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 436 + and dt.d_moy=12 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100; + +-- end query 1 in stream 0 using template query3.tpl
