IMPALA-5547: Rework FK/PK join detection. Reworks the FK/PK join detection logic to: - more accurately recognize many-to-many joins - avoid dim/dim joins for multi-column PKs
The new detection logic maintains our existing philosophy of generally assuming a FK/PK join, unless there is strong evidence to the contrary, as follows. For each set of simple equi-join conjuncts between two tables, we compute the joint NDV of the right-hand side columns by multiplication, and if the joint NDV is significantly smaller than the right-hand side row count, then we are fairly confident that the right-hand side is not a PK. Otherwise, we assume the set of conjuncts could represent a FK/PK relationship. Extends the explain plan to include the outcome of the FK/PK detection at EXPLAIN_LEVEL > STANDARD. Performance testing: 1. Full TPC-DS run on 10TB: - Q10 improved by >100x - Q72 improved by >25x - Q17,Q26,Q29 improved by 2x - Q64 regressed by 10x - Total runtime: Improved by 2x - Geomean: Minor improvement The regression of Q64 is understood and we will try to address it in follow-on changes. The previous plan was better by accident and not because of superior logic. 2. Nightly TPC-H and TPC-DS runs: - No perf differences Testing: - The existing planner test cover the changes. - Code/hdfs run passed. Change-Id: I49074fe743a28573cff541ef7dbd0edd88892067 Reviewed-on: http://gerrit.cloudera.org:8080/7257 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/9f678a74 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/9f678a74 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/9f678a74 Branch: refs/heads/master Commit: 9f678a74269250bf5c7ae2c5e8afd93c5b3734de Parents: 931bf49 Author: Alex Behm <[email protected]> Authored: Tue Jun 6 16:54:41 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Mon Jul 3 00:04:54 2017 +0000 ---------------------------------------------------------------------- .../apache/impala/analysis/JoinOperator.java | 14 +- .../org/apache/impala/planner/HashJoinNode.java | 17 + .../org/apache/impala/planner/JoinNode.java | 309 ++-- .../org/apache/impala/planner/PlannerTest.java | 8 + .../queries/PlannerTest/constant-folding.test | 1 + .../PlannerTest/fk-pk-join-detection.test | 440 ++++++ .../queries/PlannerTest/hbase.test | 24 +- .../queries/PlannerTest/joins.test | 98 +- .../queries/PlannerTest/mt-dop-validation.test | 2 + .../PlannerTest/resource-requirements.test | 46 +- .../PlannerTest/spillable-buffer-sizing.test | 12 + .../queries/PlannerTest/tablesample.test | 14 +- .../queries/PlannerTest/tpcds-all.test | 1335 +++++++++--------- .../queries/PlannerTest/tpch-nested.test | 262 ++-- .../queries/QueryTest/explain-level2.test | 1 + .../queries/QueryTest/explain-level3.test | 1 + 16 files changed, 1591 insertions(+), 993 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java b/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java index f79e490..e2321f0 100644 --- a/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java +++ b/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java @@ -44,18 +44,10 @@ public enum JoinOperator { } @Override - public String toString() { - return description_; - } - - public TJoinOp toThrift() { - return thriftJoinOp_; - } - - public boolean isInnerJoin() { - return this == INNER_JOIN; - } + public String toString() { return description_; } + public TJoinOp toThrift() { return thriftJoinOp_; } + public boolean isInnerJoin() { return this == INNER_JOIN; } public boolean isLeftOuterJoin() { return this == LEFT_OUTER_JOIN; } public boolean isRightOuterJoin() { return this == RIGHT_OUTER_JOIN; } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java b/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java index 030f9c5..48492b1 100644 --- a/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java +++ b/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java @@ -37,6 +37,7 @@ import org.apache.impala.thrift.TPlanNodeType; import org.apache.impala.thrift.TQueryOptions; import org.apache.impala.util.BitUtil; +import com.google.common.base.Joiner; import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -163,6 +164,22 @@ public class HashJoinNode extends JoinNode { if (i + 1 != eqJoinConjuncts_.size()) output.append(", "); } output.append("\n"); + + // Optionally print FK/PK equi-join conjuncts. + if (joinOp_.isInnerJoin() || joinOp_.isOuterJoin()) { + if (detailLevel.ordinal() > TExplainLevel.STANDARD.ordinal()) { + output.append(detailPrefix + "fk/pk conjuncts: "); + if (fkPkEqJoinConjuncts_ == null) { + output.append("none"); + } else if (fkPkEqJoinConjuncts_.isEmpty()) { + output.append("assumed fk/pk"); + } else { + output.append(Joiner.on(", ").join(fkPkEqJoinConjuncts_)); + } + output.append("\n"); + } + } + if (!otherJoinConjuncts_.isEmpty()) { output.append(detailPrefix + "other join predicates: ") .append(getExplainString(otherJoinConjuncts_) + "\n"); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/fe/src/main/java/org/apache/impala/planner/JoinNode.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/JoinNode.java b/fe/src/main/java/org/apache/impala/planner/JoinNode.java index 0c983d9..47fa3e5 100644 --- a/fe/src/main/java/org/apache/impala/planner/JoinNode.java +++ b/fe/src/main/java/org/apache/impala/planner/JoinNode.java @@ -19,9 +19,7 @@ package org.apache.impala.planner; import java.util.Collections; import java.util.List; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.Map; import org.apache.impala.analysis.Analyzer; import org.apache.impala.analysis.BinaryPredicate; @@ -29,12 +27,18 @@ import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.JoinOperator; import org.apache.impala.analysis.SlotDescriptor; import org.apache.impala.analysis.SlotRef; +import org.apache.impala.analysis.TupleId; import org.apache.impala.catalog.ColumnStats; import org.apache.impala.catalog.Table; import org.apache.impala.common.ImpalaException; +import org.apache.impala.common.Pair; import org.apache.impala.thrift.TJoinDistributionMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; /** * Logical join operator. Subclasses correspond to implementations of the join operator @@ -72,6 +76,20 @@ public abstract class JoinNode extends PlanNode { // joinTableId_ protected JoinTableId joinTableId_ = JoinTableId.INVALID; + // List of equi-join conjuncts believed to be involved in a FK/PK relationship. + // The conjuncts are grouped by the tuple ids of the joined base table refs. A conjunct + // is only included in this list if it is of the form <SlotRef> = <SlotRef> and the + // underlying columns and tables on both sides have stats. See getFkPkEqJoinConjuncts() + // for more details on the FK/PK detection logic. + // The value of this member represents three different states: + // - null: There are eligible join conjuncts and we have high confidence that none of + // them represent a FK/PK relationship. + // - non-null and empty: There are no eligible join conjuncts. We assume a FK/PK join. + // - non-null and non-empty: There are eligible join conjuncts that could represent + // a FK/PK relationship. + // Theses conjuncts are printed in the explain plan. + protected List<EqJoinConjunctScanSlots> fkPkEqJoinConjuncts_; + public enum DistributionMode { NONE("NONE"), BROADCAST("BROADCAST"), @@ -178,14 +196,16 @@ public abstract class JoinNode extends PlanNode { * * We estimate the cardinality based on equality join predicates of the form * "L.c = R.d", with L being a table from child(0) and R a table from child(1). - * For each such join predicate we try to determine whether it is a foreign/primary - * key (FK/PK) join condition, and either use a special FK/PK estimation or a generic - * estimation method. We maintain the minimum cardinality for each method separately, - * and finally return in order of preference: - * - the FK/PK estimate, if there was at least one FP/PK predicate - * - the generic estimate, if there was at least one predicate with sufficient stats - * - otherwise, we optimistically assume a FK/PK join with a join selectivity of 1, - * and return |child(0)| + * For each set of such join predicates between two tables, we try to determine whether + * the tables might have foreign/primary key (FK/PK) relationship, and either use a + * special FK/PK estimation or a generic estimation method. Once the estimation method + * has been determined we compute the final cardinality based on the single most + * selective join predicate. We do not attempt to estimate the joint selectivity of + * multiple join predicates to avoid underestimation. + * The FK/PK detection logic is based on the assumption that most joins are FK/PK. We + * only use the generic estimation method if we have high confidence that there is no + * FK/PK relationship. In the absence of relevant stats, we assume FK/PK with a join + * selectivity of 1. * * FK/PK estimation: * cardinality = |child(0)| * (|child(1)| / |R|) * (NDV(R.d) / NDV(L.c)) @@ -208,111 +228,206 @@ public abstract class JoinNode extends PlanNode { * might have reduce the cardinality and NDVs */ private long getJoinCardinality(Analyzer analyzer) { - Preconditions.checkState( - joinOp_ == JoinOperator.INNER_JOIN || joinOp_.isOuterJoin()); + Preconditions.checkState(joinOp_.isInnerJoin() || joinOp_.isOuterJoin()); + fkPkEqJoinConjuncts_ = Collections.emptyList(); long lhsCard = getChild(0).cardinality_; long rhsCard = getChild(1).cardinality_; - if (lhsCard == -1 || rhsCard == -1) return -1; + if (lhsCard == -1 || rhsCard == -1) { + // Assume FK/PK with a join selectivity of 1. + return lhsCard; + } - // Minimum of estimated join cardinalities for FK/PK join conditions. - long fkPkJoinCard = -1; - // Minimum of estimated join cardinalities for other join conditions. - long genericJoinCard = -1; + // Collect join conjuncts that are eligible to participate in cardinality estimation. + List<EqJoinConjunctScanSlots> eqJoinConjunctSlots = Lists.newArrayList(); for (Expr eqJoinConjunct: eqJoinConjuncts_) { - SlotStats lhsStats = SlotStats.create(eqJoinConjunct.getChild(0)); - SlotStats rhsStats = SlotStats.create(eqJoinConjunct.getChild(1)); - // Ignore the equi-join conjunct if we have no relevant table or column stats. - if (lhsStats == null || rhsStats == null) continue; - - // We assume a FK/PK join based on the following intuitions: - // 1. NDV(L.c) <= NDV(R.d) - // The reasoning is that a FK/PK join is unlikely if the foreign key - // side has a higher NDV than the primary key side. We may miss true - // FK/PK joins due to inaccurate and/or stale stats. - // 2. R.d is probably a primary key. - // Requires that NDV(R.d) is very close to |R|. - // The idea is that, by default, we assume that every join is a FK/PK join unless - // we have compelling evidence that suggests otherwise, so by using || we give the - // FK/PK assumption more chances to succeed. - if (lhsStats.ndv <= rhsStats.ndv * (1.0 + FK_PK_MAX_STATS_DELTA_PERC) || - Math.abs(rhsStats.numRows - rhsStats.ndv) / (double) rhsStats.numRows - <= FK_PK_MAX_STATS_DELTA_PERC) { - // Adjust the join selectivity based on the NDV ratio to avoid underestimating - // the cardinality if the PK side has a higher NDV than the FK side. - double ndvRatio = (double) rhsStats.ndv / (double) lhsStats.ndv; - double rhsSelectivity = (double) rhsCard / (double) rhsStats.numRows; - long joinCard = (long) Math.ceil(lhsCard * rhsSelectivity * ndvRatio); - // FK/PK join cardinality must be <= the lhs cardinality. - joinCard = Math.min(lhsCard, joinCard); - if (fkPkJoinCard == -1) { - fkPkJoinCard = joinCard; - } else { - fkPkJoinCard = Math.min(fkPkJoinCard, joinCard); - } + EqJoinConjunctScanSlots slots = EqJoinConjunctScanSlots.create(eqJoinConjunct); + if (slots != null) eqJoinConjunctSlots.add(slots); + } + + if (eqJoinConjunctSlots.isEmpty()) { + // There are no eligible equi-join conjuncts. Optimistically assume FK/PK with a + // join selectivity of 1. + return lhsCard; + } + + fkPkEqJoinConjuncts_ = getFkPkEqJoinConjuncts(eqJoinConjunctSlots); + if (fkPkEqJoinConjuncts_ != null) { + return getFkPkJoinCardinality(fkPkEqJoinConjuncts_, lhsCard, rhsCard); + } else { + return getGenericJoinCardinality(eqJoinConjunctSlots, lhsCard, rhsCard); + } + } + + /** + * Returns a list of equi-join conjuncts believed to have a FK/PK relationship based on + * whether the right-hand side might be a PK. The conjuncts are grouped by the tuple + * ids of the joined base table refs. We prefer to include the conjuncts in the result + * unless we have high confidence that a FK/PK relationship is not present. The + * right-hand side columns are unlikely to form a PK if their joint NDV is less than + * the right-hand side row count. If the joint NDV is close to or higher than the row + * count, then it might be a PK. + * The given list of eligible join conjuncts must be non-empty. + */ + private List<EqJoinConjunctScanSlots> getFkPkEqJoinConjuncts( + List<EqJoinConjunctScanSlots> eqJoinConjunctSlots) { + Preconditions.checkState(!eqJoinConjunctSlots.isEmpty()); + Map<Pair<TupleId, TupleId>, List<EqJoinConjunctScanSlots>> scanSlotsByJoinedTids = + EqJoinConjunctScanSlots.groupByJoinedTupleIds(eqJoinConjunctSlots); + + List<EqJoinConjunctScanSlots> result = null; + // Iterate over all groups of conjuncts that belong to the same joined tuple id pair. + // For each group, we compute the join NDV of the rhs slots and compare it to the + // number of rows in the rhs table. + for (List<EqJoinConjunctScanSlots> fkPkCandidate: scanSlotsByJoinedTids.values()) { + double jointNdv = 1.0; + for (EqJoinConjunctScanSlots slots: fkPkCandidate) jointNdv *= slots.rhsNdv(); + double rhsNumRows = fkPkCandidate.get(0).rhsNumRows(); + if (jointNdv >= Math.round(rhsNumRows * (1.0 - FK_PK_MAX_STATS_DELTA_PERC))) { + // We cannot disprove that the RHS is a PK. + if (result == null) result = Lists.newArrayList(); + result.addAll(fkPkCandidate); + } + } + return result; + } + + /** + * Returns the estimated join cardinality of a FK/PK inner or outer join based on the + * given list of equi-join conjunct slots and the join input cardinalities. + * The returned result is >= 0. + * The list of join conjuncts must be non-empty and the cardinalities must be >= 0. + */ + private long getFkPkJoinCardinality(List<EqJoinConjunctScanSlots> eqJoinConjunctSlots, + long lhsCard, long rhsCard) { + Preconditions.checkState(!eqJoinConjunctSlots.isEmpty()); + Preconditions.checkState(lhsCard >= 0 && rhsCard >= 0); + + long result = -1; + for (EqJoinConjunctScanSlots slots: eqJoinConjunctSlots) { + // Adjust the join selectivity based on the NDV ratio to avoid underestimating + // the cardinality if the PK side has a higher NDV than the FK side. + double ndvRatio = 1.0; + if (slots.lhsNdv() > 0) ndvRatio = slots.rhsNdv() / slots.lhsNdv(); + double rhsSelectivity = Double.MIN_VALUE; + if (slots.rhsNumRows() > 0) rhsSelectivity = rhsCard / slots.rhsNumRows(); + long joinCard = (long) Math.ceil(lhsCard * rhsSelectivity * ndvRatio); + if (result == -1) { + result = joinCard; } else { - // Adjust the NDVs on both sides to account for predicates. Intuitively, the NDVs - // should only decrease, so we bail if the adjustment would lead to an increase. - // TODO: Adjust the NDVs more systematically throughout the plan tree to - // get a more accurate NDV at this plan node. - if (lhsCard > lhsStats.numRows || rhsCard > rhsStats.numRows) continue; - double lhsAdjNdv = lhsStats.ndv * ((double)lhsCard / lhsStats.numRows); - double rhsAdjNdv = rhsStats.ndv * ((double)rhsCard / rhsStats.numRows); - // Generic join cardinality estimation. - long joinCard = (long) Math.ceil( - (lhsCard / Math.max(lhsAdjNdv, rhsAdjNdv)) * rhsCard); - if (genericJoinCard == -1) { - genericJoinCard = joinCard; - } else { - genericJoinCard = Math.min(genericJoinCard, joinCard); - } + result = Math.min(result, joinCard); } } + // FK/PK join cardinality must be <= the lhs cardinality. + result = Math.min(result, lhsCard); + Preconditions.checkState(result >= 0); + return result; + } - if (fkPkJoinCard != -1) { - return fkPkJoinCard; - } else if (genericJoinCard != -1) { - return genericJoinCard; - } else { - // Optimistic FK/PK assumption with join selectivity of 1. - return lhsCard; + /** + * Returns the estimated join cardinality of a generic N:M inner or outer join based + * on the given list of equi-join conjunct slots and the join input cardinalities. + * The returned result is >= 0. + * The list of join conjuncts must be non-empty and the cardinalities must be >= 0. + */ + private long getGenericJoinCardinality(List<EqJoinConjunctScanSlots> eqJoinConjunctSlots, + long lhsCard, long rhsCard) { + Preconditions.checkState(joinOp_.isInnerJoin() || joinOp_.isOuterJoin()); + Preconditions.checkState(!eqJoinConjunctSlots.isEmpty()); + Preconditions.checkState(lhsCard >= 0 && rhsCard >= 0); + + long result = -1; + for (EqJoinConjunctScanSlots slots: eqJoinConjunctSlots) { + // Adjust the NDVs on both sides to account for predicates. Intuitively, the NDVs + // should only decrease. We ignore adjustments that would lead to an increase. + double lhsAdjNdv = slots.lhsNdv(); + if (slots.lhsNumRows() > lhsCard) lhsAdjNdv *= lhsCard / slots.lhsNumRows(); + double rhsAdjNdv = slots.rhsNdv(); + if (slots.rhsNumRows() > rhsCard) rhsAdjNdv *= rhsCard / slots.rhsNumRows(); + long joinCard = Math.round((lhsCard / Math.max(lhsAdjNdv, rhsAdjNdv)) * rhsCard); + if (result == -1) { + result = joinCard; + } else { + result = Math.min(result, joinCard); + } } + Preconditions.checkState(result >= 0); + return result; } /** - * Class combining column and table stats for a particular slot. Contains the NDV - * for the slot and the number of rows in the originating table. + * Holds the source scan slots of a <SlotRef> = <SlotRef> join predicate. + * The underlying table and column on both sides have stats. */ - private static class SlotStats { - // Number of distinct values of the slot. - public final long ndv; - // Number of rows in the originating table. - public final long numRows; - - public SlotStats(long ndv, long numRows) { - // Cap NDV at num rows of the table. - this.ndv = Math.min(ndv, numRows); - this.numRows = numRows; + public static final class EqJoinConjunctScanSlots { + private final Expr eqJoinConjunct_; + private final SlotDescriptor lhs_; + private final SlotDescriptor rhs_; + + private EqJoinConjunctScanSlots(Expr eqJoinConjunct, SlotDescriptor lhs, + SlotDescriptor rhs) { + eqJoinConjunct_ = eqJoinConjunct; + lhs_ = lhs; + rhs_ = rhs; + } + + // Convenience functions. They return double to avoid excessive casts in callers. + public double lhsNdv() { + return Math.min(lhs_.getStats().getNumDistinctValues(), lhsNumRows()); + } + public double rhsNdv() { + return Math.min(rhs_.getStats().getNumDistinctValues(), rhsNumRows()); + } + public double lhsNumRows() { return lhs_.getParent().getTable().getNumRows(); } + public double rhsNumRows() { return rhs_.getParent().getTable().getNumRows(); } + + public TupleId lhsTid() { return lhs_.getParent().getId(); } + public TupleId rhsTid() { return rhs_.getParent().getId(); } + + /** + * Returns a new EqJoinConjunctScanSlots for the given equi-join conjunct or null if + * the given conjunct is not of the form <SlotRef> = <SlotRef> or if the underlying + * table/column of at least one side is missing stats. + */ + public static EqJoinConjunctScanSlots create(Expr eqJoinConjunct) { + if (!Expr.IS_EQ_BINARY_PREDICATE.apply(eqJoinConjunct)) return null; + SlotDescriptor lhsScanSlot = eqJoinConjunct.getChild(0).findSrcScanSlot(); + if (lhsScanSlot == null || !hasNumRowsAndNdvStats(lhsScanSlot)) return null; + SlotDescriptor rhsScanSlot = eqJoinConjunct.getChild(1).findSrcScanSlot(); + if (rhsScanSlot == null || !hasNumRowsAndNdvStats(rhsScanSlot)) return null; + return new EqJoinConjunctScanSlots(eqJoinConjunct, lhsScanSlot, rhsScanSlot); + } + + private static boolean hasNumRowsAndNdvStats(SlotDescriptor slotDesc) { + if (slotDesc.getColumn() == null) return false; + if (!slotDesc.getStats().hasNumDistinctValues()) return false; + Table tbl = slotDesc.getParent().getTable(); + if (tbl == null || tbl.getNumRows() == -1) return false; + return true; } /** - * Returns a new SlotStats object from the given expr that is guaranteed - * to have valid stats. - * Returns null if 'e' is not a SlotRef or a cast SlotRef, or if there are no - * valid table/column stats for 'e'. + * Groups the given EqJoinConjunctScanSlots by the lhs/rhs tuple combination + * and returns the result as a map. */ - public static SlotStats create(Expr e) { - // We need both the table and column stats, but 'e' might not directly reference - // a scan slot, e.g., if 'e' references a grouping slot of an agg. So we look for - // that source scan slot, traversing through materialization points if necessary. - SlotDescriptor slotDesc = e.findSrcScanSlot(); - if (slotDesc == null) return null; - Table table = slotDesc.getParent().getTable(); - if (table == null || table.getNumRows() == -1) return null; - if (!slotDesc.getStats().hasNumDistinctValues()) return null; - return new SlotStats( - slotDesc.getStats().getNumDistinctValues(), table.getNumRows()); + public static Map<Pair<TupleId, TupleId>, List<EqJoinConjunctScanSlots>> + groupByJoinedTupleIds(List<EqJoinConjunctScanSlots> eqJoinConjunctSlots) { + Map<Pair<TupleId, TupleId>, List<EqJoinConjunctScanSlots>> scanSlotsByJoinedTids = + Maps.newLinkedHashMap(); + for (EqJoinConjunctScanSlots slots: eqJoinConjunctSlots) { + Pair<TupleId, TupleId> tids = Pair.create(slots.lhsTid(), slots.rhsTid()); + List<EqJoinConjunctScanSlots> scanSlots = scanSlotsByJoinedTids.get(tids); + if (scanSlots == null) { + scanSlots = Lists.newArrayList(); + scanSlotsByJoinedTids.put(tids, scanSlots); + } + scanSlots.add(slots); + } + return scanSlotsByJoinedTids; } + + @Override + public String toString() { return eqJoinConjunct_.toSql(); } } /** http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/fe/src/test/java/org/apache/impala/planner/PlannerTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index b920555..d33e678 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -143,6 +143,14 @@ public class PlannerTest extends PlannerTestBase { } @Test + public void testFkPkJoinDetection() { + TQueryOptions options = defaultQueryOptions(); + // The FK/PK detection result is included in EXTENDED or higher. + options.setExplain_level(TExplainLevel.EXTENDED); + runPlannerTestFile("fk-pk-join-detection", options); + } + + @Test public void testOrder() { runPlannerTestFile("order"); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test index 3600aef..82f5c3e 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test @@ -130,6 +130,7 @@ PLAN-ROOT SINK | 02:HASH JOIN [LEFT OUTER JOIN] | hash predicates: 2 + a.id = b.id - 2 +| fk/pk conjuncts: assumed fk/pk | other join predicates: a.int_col <= b.bigint_col + 97, a.int_col >= 0 + b.bigint_col | other predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1 | mem-estimate=15.68KB mem-reservation=136.00MB http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test new file mode 100644 index 0000000..c2065ed --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test @@ -0,0 +1,440 @@ +# Single-column FK/PK join detection. +select * from +tpcds.store_sales inner join tpcds.customer +on ss_customer_sk = c_customer_sk +where c_salutation = 'Mrs.' +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +02:HASH JOIN [INNER JOIN] +| hash predicates: ss_customer_sk = c_customer_sk +| fk/pk conjuncts: ss_customer_sk = c_customer_sk +| runtime filters: RF000 <- c_customer_sk +| mem-estimate=4.46MB mem-reservation=136.00MB +| tuple-ids=0,1 row-size=355B cardinality=529700 +| +|--01:SCAN HDFS [tpcds.customer] +| partitions=1/1 files=1 size=12.60MB +| predicates: c_salutation = 'Mrs.' +| stats-rows=100000 extrapolated-rows=disabled +| table stats: rows=100000 size=12.60MB +| column stats: all +| parquet dictionary predicates: c_salutation = 'Mrs.' +| mem-estimate=48.00MB mem-reservation=0B +| tuple-ids=1 row-size=255B cardinality=16667 +| +00:SCAN HDFS [tpcds.store_sales] + partitions=1824/1824 files=1824 size=326.32MB + runtime filters: RF000 -> ss_customer_sk + stats-rows=2880404 extrapolated-rows=disabled + table stats: rows=2880404 size=326.32MB + column stats: all + mem-estimate=128.00MB mem-reservation=0B + tuple-ids=0 row-size=100B cardinality=2880404 +==== +# Single-column FK/PK join detection on left outer join. The join cardinality +# is not reduced based on the selectivity of the rhs. +select * from +tpcds.store_sales left outer join tpcds.customer +on ss_customer_sk = c_customer_sk +where c_salutation = 'Mrs.' +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +02:HASH JOIN [LEFT OUTER JOIN] +| hash predicates: ss_customer_sk = c_customer_sk +| fk/pk conjuncts: ss_customer_sk = c_customer_sk +| other predicates: c_salutation = 'Mrs.' +| mem-estimate=4.46MB mem-reservation=136.00MB +| tuple-ids=0,1N row-size=355B cardinality=2880404 +| +|--01:SCAN HDFS [tpcds.customer] +| partitions=1/1 files=1 size=12.60MB +| predicates: c_salutation = 'Mrs.' +| stats-rows=100000 extrapolated-rows=disabled +| table stats: rows=100000 size=12.60MB +| column stats: all +| parquet dictionary predicates: c_salutation = 'Mrs.' +| mem-estimate=48.00MB mem-reservation=0B +| tuple-ids=1 row-size=255B cardinality=16667 +| +00:SCAN HDFS [tpcds.store_sales] + partitions=1824/1824 files=1824 size=326.32MB + stats-rows=2880404 extrapolated-rows=disabled + table stats: rows=2880404 size=326.32MB + column stats: all + mem-estimate=128.00MB mem-reservation=0B + tuple-ids=0 row-size=100B cardinality=2880404 +==== +# Single-column FK/PK join detection on right outer join. The join cardinality +# is reduced based on the selectivity of the rhs. +select * from +tpcds.store_sales right outer join tpcds.customer +on ss_customer_sk = c_customer_sk +where c_salutation = 'Mrs.' +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +02:HASH JOIN [RIGHT OUTER JOIN] +| hash predicates: ss_customer_sk = c_customer_sk +| fk/pk conjuncts: ss_customer_sk = c_customer_sk +| runtime filters: RF000 <- c_customer_sk +| mem-estimate=4.46MB mem-reservation=136.00MB +| tuple-ids=0N,1 row-size=355B cardinality=529700 +| +|--01:SCAN HDFS [tpcds.customer] +| partitions=1/1 files=1 size=12.60MB +| predicates: c_salutation = 'Mrs.' +| stats-rows=100000 extrapolated-rows=disabled +| table stats: rows=100000 size=12.60MB +| column stats: all +| parquet dictionary predicates: c_salutation = 'Mrs.' +| mem-estimate=48.00MB mem-reservation=0B +| tuple-ids=1 row-size=255B cardinality=16667 +| +00:SCAN HDFS [tpcds.store_sales] + partitions=1824/1824 files=1824 size=326.32MB + runtime filters: RF000 -> ss_customer_sk + stats-rows=2880404 extrapolated-rows=disabled + table stats: rows=2880404 size=326.32MB + column stats: all + mem-estimate=128.00MB mem-reservation=0B + tuple-ids=0 row-size=100B cardinality=2880404 +==== +# Multi-column FK/PK join detection +select * from +tpcds.store_sales inner join tpcds.store_returns +on ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number +where sr_return_quantity < 10 +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +02:HASH JOIN [INNER JOIN] +| hash predicates: ss_item_sk = sr_item_sk, ss_ticket_number = sr_ticket_number +| fk/pk conjuncts: ss_item_sk = sr_item_sk, ss_ticket_number = sr_ticket_number +| runtime filters: RF000 <- sr_item_sk, RF001 <- sr_ticket_number +| mem-estimate=2.65MB mem-reservation=136.00MB +| tuple-ids=0,1 row-size=188B cardinality=211838 +| +|--01:SCAN HDFS [tpcds.store_returns] +| partitions=1/1 files=1 size=31.19MB +| predicates: sr_return_quantity < 10 +| stats-rows=287514 extrapolated-rows=disabled +| table stats: rows=287514 size=31.19MB +| column stats: all +| parquet dictionary predicates: sr_return_quantity < 10 +| mem-estimate=80.00MB mem-reservation=0B +| tuple-ids=1 row-size=88B cardinality=28751 +| +00:SCAN HDFS [tpcds.store_sales] + partitions=1824/1824 files=1824 size=326.32MB + runtime filters: RF000 -> ss_item_sk, RF001 -> ss_ticket_number + stats-rows=2880404 extrapolated-rows=disabled + table stats: rows=2880404 size=326.32MB + column stats: all + mem-estimate=128.00MB mem-reservation=0B + tuple-ids=0 row-size=100B cardinality=2880404 +==== +# Many-to-many join detection. +select * from +tpcds.store_sales inner join tpcds.web_sales +on ss_sold_time_sk = ws_sold_time_sk +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +02:HASH JOIN [INNER JOIN] +| hash predicates: ss_sold_time_sk = ws_sold_time_sk +| fk/pk conjuncts: none +| runtime filters: RF000 <- ws_sold_time_sk +| mem-estimate=108.67MB mem-reservation=136.00MB +| tuple-ids=0,1 row-size=244B cardinality=44136418 +| +|--01:SCAN HDFS [tpcds.web_sales] +| partitions=1/1 files=1 size=140.07MB +| stats-rows=719384 extrapolated-rows=disabled +| table stats: rows=719384 size=140.07MB +| column stats: all +| mem-estimate=160.00MB mem-reservation=0B +| tuple-ids=1 row-size=144B cardinality=719384 +| +00:SCAN HDFS [tpcds.store_sales] + partitions=1824/1824 files=1824 size=326.32MB + runtime filters: RF000 -> ss_sold_time_sk + stats-rows=2880404 extrapolated-rows=disabled + table stats: rows=2880404 size=326.32MB + column stats: all + mem-estimate=128.00MB mem-reservation=0B + tuple-ids=0 row-size=100B cardinality=2880404 +==== +# PK/PK join is detected as FK/PK. +select * from +tpcds.date_dim a inner join tpcds.date_dim b +on a.d_date_sk = b.d_date_sk +where a.d_holiday = "Y" +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +02:HASH JOIN [INNER JOIN] +| hash predicates: b.d_date_sk = a.d_date_sk +| fk/pk conjuncts: b.d_date_sk = a.d_date_sk +| runtime filters: RF000 <- a.d_date_sk +| mem-estimate=11.62MB mem-reservation=136.00MB +| tuple-ids=1,0 row-size=606B cardinality=36525 +| +|--00:SCAN HDFS [tpcds.date_dim a] +| partitions=1/1 files=1 size=9.84MB +| predicates: a.d_holiday = 'Y' +| stats-rows=73049 extrapolated-rows=disabled +| table stats: rows=73049 size=9.84MB +| column stats: all +| parquet dictionary predicates: a.d_holiday = 'Y' +| mem-estimate=48.00MB mem-reservation=0B +| tuple-ids=0 row-size=303B cardinality=36525 +| +01:SCAN HDFS [tpcds.date_dim b] + partitions=1/1 files=1 size=9.84MB + runtime filters: RF000 -> b.d_date_sk + stats-rows=73049 extrapolated-rows=disabled + table stats: rows=73049 size=9.84MB + column stats: all + mem-estimate=48.00MB mem-reservation=0B + tuple-ids=1 row-size=303B cardinality=73049 +==== +# Single query with various join types combined. +select 1 from + tpcds.store_sales, tpcds.store_returns, tpcds.customer, + tpcds.date_dim d1, tpcds.date_dim d2 +where ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number + and ss_sold_date_sk = d1.d_date_sk + and sr_returned_date_sk = d2.d_date_sk + and ss_addr_sk = c_current_addr_sk + and d1.d_fy_week_seq = 1000 +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +08:HASH JOIN [INNER JOIN] +| hash predicates: ss_addr_sk = c_current_addr_sk +| fk/pk conjuncts: none +| runtime filters: RF000 <- c_current_addr_sk +| mem-estimate=429.69KB mem-reservation=136.00MB +| tuple-ids=1,0,3,4,2 row-size=60B cardinality=19358 +| +|--02:SCAN HDFS [tpcds.customer] +| partitions=1/1 files=1 size=12.60MB +| stats-rows=100000 extrapolated-rows=disabled +| table stats: rows=100000 size=12.60MB +| column stats: all +| mem-estimate=48.00MB mem-reservation=0B +| tuple-ids=2 row-size=4B cardinality=100000 +| +07:HASH JOIN [INNER JOIN] +| hash predicates: sr_returned_date_sk = d2.d_date_sk +| fk/pk conjuncts: sr_returned_date_sk = d2.d_date_sk +| runtime filters: RF001 <- d2.d_date_sk +| mem-estimate=313.88KB mem-reservation=136.00MB +| tuple-ids=1,0,3,4 row-size=56B cardinality=8131 +| +|--04:SCAN HDFS [tpcds.date_dim d2] +| partitions=1/1 files=1 size=9.84MB +| stats-rows=73049 extrapolated-rows=disabled +| table stats: rows=73049 size=9.84MB +| column stats: all +| mem-estimate=48.00MB mem-reservation=0B +| tuple-ids=4 row-size=4B cardinality=73049 +| +06:HASH JOIN [INNER JOIN] +| hash predicates: sr_item_sk = ss_item_sk, sr_ticket_number = ss_ticket_number +| fk/pk conjuncts: sr_item_sk = ss_item_sk, sr_ticket_number = ss_ticket_number +| runtime filters: RF002 <- ss_item_sk, RF003 <- ss_ticket_number +| mem-estimate=380.02KB mem-reservation=136.00MB +| tuple-ids=1,0,3 row-size=52B cardinality=8131 +| +|--05:HASH JOIN [INNER JOIN] +| | hash predicates: ss_sold_date_sk = d1.d_date_sk +| | fk/pk conjuncts: ss_sold_date_sk = d1.d_date_sk +| | runtime filters: RF004 <- d1.d_date_sk +| | mem-estimate=62B mem-reservation=136.00MB +| | tuple-ids=0,3 row-size=32B cardinality=11055 +| | +| |--03:SCAN HDFS [tpcds.date_dim d1] +| | partitions=1/1 files=1 size=9.84MB +| | predicates: d1.d_fy_week_seq = 1000 +| | stats-rows=73049 extrapolated-rows=disabled +| | table stats: rows=73049 size=9.84MB +| | column stats: all +| | parquet dictionary predicates: d1.d_fy_week_seq = 1000 +| | mem-estimate=48.00MB mem-reservation=0B +| | tuple-ids=3 row-size=8B cardinality=7 +| | +| 00:SCAN HDFS [tpcds.store_sales] +| partitions=1824/1824 files=1824 size=326.32MB +| runtime filters: RF000 -> ss_addr_sk, RF004 -> ss_sold_date_sk +| stats-rows=2880404 extrapolated-rows=disabled +| table stats: rows=2880404 size=326.32MB +| column stats: all +| mem-estimate=128.00MB mem-reservation=0B +| tuple-ids=0 row-size=24B cardinality=2880404 +| +01:SCAN HDFS [tpcds.store_returns] + partitions=1/1 files=1 size=31.19MB + runtime filters: RF001 -> sr_returned_date_sk, RF002 -> sr_item_sk, RF003 -> sr_ticket_number + stats-rows=287514 extrapolated-rows=disabled + table stats: rows=287514 size=31.19MB + column stats: all + mem-estimate=80.00MB mem-reservation=0B + tuple-ids=1 row-size=20B cardinality=287514 +==== +# Assumed FK/PK join becasue of non-trivial equi-join exprs. +select * from +tpcds.store_sales inner join tpcds.customer +on ss_customer_sk % 10 = c_customer_sk / 100 +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +02:HASH JOIN [INNER JOIN] +| hash predicates: ss_customer_sk % 10 = c_customer_sk / 100 +| fk/pk conjuncts: assumed fk/pk +| runtime filters: RF000 <- c_customer_sk / 100 +| mem-estimate=26.79MB mem-reservation=136.00MB +| tuple-ids=0,1 row-size=355B cardinality=2880404 +| +|--01:SCAN HDFS [tpcds.customer] +| partitions=1/1 files=1 size=12.60MB +| stats-rows=100000 extrapolated-rows=disabled +| table stats: rows=100000 size=12.60MB +| column stats: all +| mem-estimate=48.00MB mem-reservation=0B +| tuple-ids=1 row-size=255B cardinality=100000 +| +00:SCAN HDFS [tpcds.store_sales] + partitions=1824/1824 files=1824 size=326.32MB + runtime filters: RF000 -> ss_customer_sk % 10 + stats-rows=2880404 extrapolated-rows=disabled + table stats: rows=2880404 size=326.32MB + column stats: all + mem-estimate=128.00MB mem-reservation=0B + tuple-ids=0 row-size=100B cardinality=2880404 +==== +# Assumed FK/PK join due to missing stats on the rhs. Join cardinality is equal to +# the lhs cardinality. +select 1 from +tpcds.store_sales inner join tpcds_seq_snap.customer +on ss_customer_sk = c_customer_sk +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +02:HASH JOIN [INNER JOIN] +| hash predicates: ss_customer_sk = c_customer_sk +| fk/pk conjuncts: assumed fk/pk +| runtime filters: RF000 <- c_customer_sk +| mem-estimate=2.00GB mem-reservation=136.00MB +| tuple-ids=0,1 row-size=8B cardinality=2880404 +| +|--01:SCAN HDFS [tpcds_seq_snap.customer] +| partitions=1/1 files=1 size=8.59MB +| stats-rows=unavailable extrapolated-rows=disabled +| table stats: rows=unavailable size=unavailable +| column stats: unavailable +| mem-estimate=48.00MB mem-reservation=0B +| tuple-ids=1 row-size=4B cardinality=unavailable +| +00:SCAN HDFS [tpcds.store_sales] + partitions=1824/1824 files=1824 size=326.32MB + runtime filters: RF000 -> ss_customer_sk + stats-rows=2880404 extrapolated-rows=disabled + table stats: rows=2880404 size=326.32MB + column stats: all + mem-estimate=128.00MB mem-reservation=0B + tuple-ids=0 row-size=4B cardinality=2880404 +==== +# Assumed FK/PK join due to missing stats on the lhs. Join cardinality is unknown. +select /* +straight_join */ 1 from +tpcds_seq_snap.store_sales inner join tpcds.customer +on ss_customer_sk = c_customer_sk +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +02:HASH JOIN [INNER JOIN] +| hash predicates: ss_customer_sk = c_customer_sk +| fk/pk conjuncts: assumed fk/pk +| runtime filters: RF000 <- c_customer_sk +| mem-estimate=429.69KB mem-reservation=136.00MB +| tuple-ids=0,1 row-size=8B cardinality=unavailable +| +|--01:SCAN HDFS [tpcds.customer] +| partitions=1/1 files=1 size=12.60MB +| stats-rows=100000 extrapolated-rows=disabled +| table stats: rows=100000 size=12.60MB +| column stats: all +| mem-estimate=48.00MB mem-reservation=0B +| tuple-ids=1 row-size=4B cardinality=100000 +| +00:SCAN HDFS [tpcds_seq_snap.store_sales] + partitions=1824/1824 files=1824 size=207.90MB + runtime filters: RF000 -> ss_customer_sk + stats-rows=unavailable extrapolated-rows=disabled + table stats: rows=unavailable size=unavailable + column stats: unavailable + mem-estimate=128.00MB mem-reservation=0B + tuple-ids=0 row-size=4B cardinality=unavailable +==== +# Join is detected as many-to-many even though the rhs join columns +# are in a group by on the rhs input. +select * from +tpcds.store_sales inner join +(select distinct ws_sold_time_sk from tpcds.web_sales) v +on ss_sold_time_sk = ws_sold_time_sk +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 +PLAN-ROOT SINK +| mem-estimate=0B mem-reservation=0B +| +03:HASH JOIN [INNER JOIN] +| hash predicates: ss_sold_time_sk = ws_sold_time_sk +| fk/pk conjuncts: none +| runtime filters: RF000 <- ws_sold_time_sk +| mem-estimate=170.89KB mem-reservation=136.00MB +| tuple-ids=0,2 row-size=104B cardinality=2440073 +| +|--02:AGGREGATE [FINALIZE] +| | group by: ws_sold_time_sk +| | mem-estimate=10.00MB mem-reservation=264.00MB +| | tuple-ids=2 row-size=4B cardinality=39771 +| | +| 01:SCAN HDFS [tpcds.web_sales] +| partitions=1/1 files=1 size=140.07MB +| stats-rows=719384 extrapolated-rows=disabled +| table stats: rows=719384 size=140.07MB +| column stats: all +| mem-estimate=160.00MB mem-reservation=0B +| tuple-ids=1 row-size=4B cardinality=719384 +| +00:SCAN HDFS [tpcds.store_sales] + partitions=1824/1824 files=1824 size=326.32MB + runtime filters: RF000 -> ss_sold_time_sk + stats-rows=2880404 extrapolated-rows=disabled + table stats: rows=2880404 size=326.32MB + column stats: all + mem-estimate=128.00MB mem-reservation=0B + tuple-ids=0 row-size=100B cardinality=2880404 +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test index 91aff74..e0728be 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test @@ -611,16 +611,16 @@ where PLAN-ROOT SINK | 04:HASH JOIN [INNER JOIN] -| hash predicates: b.int_col = a.int_col -| -|--03:HASH JOIN [INNER JOIN] -| | hash predicates: a.int_col = c.int_col -| | -| |--02:SCAN HBASE [functional_hbase.alltypessmall c] -| | predicates: c.month = 4 -| | -| 01:SCAN HBASE [functional_hbase.alltypessmall a] -| -00:SCAN HBASE [functional_hbase.alltypessmall b] - predicates: b.bool_col = FALSE +| hash predicates: a.int_col = b.int_col +| +|--00:SCAN HBASE [functional_hbase.alltypessmall b] +| predicates: b.bool_col = FALSE +| +03:HASH JOIN [INNER JOIN] +| hash predicates: a.int_col = c.int_col +| +|--02:SCAN HBASE [functional_hbase.alltypessmall c] +| predicates: c.month = 4 +| +01:SCAN HBASE [functional_hbase.alltypessmall a] ==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/joins.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test index 0fdb19d..42493ff 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test @@ -830,64 +830,64 @@ on (b.int_col = c.int_col and c.bool_col = b.bool_col) PLAN-ROOT SINK | 05:HASH JOIN [INNER JOIN] -| hash predicates: b.bool_col = a.bool_col, b.int_col = a.int_col -| runtime filters: RF000 <- a.bool_col, RF001 <- a.int_col +| hash predicates: a.bool_col = b.bool_col, a.int_col = b.int_col +| runtime filters: RF000 <- b.bool_col, RF001 <- b.int_col | -|--04:HASH JOIN [INNER JOIN] -| | hash predicates: a.bool_col = bool_col, a.int_col = int_col -| | runtime filters: RF002 <- bool_col, RF003 <- int_col -| | -| |--03:AGGREGATE [FINALIZE] -| | | output: count(*) -| | | group by: int_col, bool_col -| | | -| | 02:SCAN HDFS [functional.alltypes] -| | partitions=24/24 files=24 size=478.45KB +|--01:SCAN HDFS [functional.alltypes b] +| partitions=24/24 files=24 size=478.45KB +| +04:HASH JOIN [INNER JOIN] +| hash predicates: a.bool_col = bool_col, a.int_col = int_col +| runtime filters: RF002 <- bool_col, RF003 <- int_col +| +|--03:AGGREGATE [FINALIZE] +| | output: count(*) +| | group by: int_col, bool_col | | -| 00:SCAN HDFS [functional.alltypes a] +| 02:SCAN HDFS [functional.alltypes] | partitions=24/24 files=24 size=478.45KB -| runtime filters: RF002 -> a.bool_col, RF003 -> a.int_col +| runtime filters: RF000 -> functional.alltypes.bool_col, RF001 -> functional.alltypes.int_col | -01:SCAN HDFS [functional.alltypes b] +00:SCAN HDFS [functional.alltypes a] partitions=24/24 files=24 size=478.45KB - runtime filters: RF000 -> b.bool_col, RF001 -> b.int_col + runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col, RF002 -> a.bool_col, RF003 -> a.int_col ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 10:EXCHANGE [UNPARTITIONED] | 05:HASH JOIN [INNER JOIN, PARTITIONED] -| hash predicates: b.bool_col = a.bool_col, b.int_col = a.int_col -| runtime filters: RF000 <- a.bool_col, RF001 <- a.int_col +| hash predicates: a.bool_col = b.bool_col, a.int_col = b.int_col +| runtime filters: RF000 <- b.bool_col, RF001 <- b.int_col | -|--04:HASH JOIN [INNER JOIN, PARTITIONED] -| | hash predicates: a.bool_col = bool_col, a.int_col = int_col -| | runtime filters: RF002 <- bool_col, RF003 <- int_col +|--09:EXCHANGE [HASH(b.int_col,b.bool_col)] | | -| |--07:AGGREGATE [FINALIZE] -| | | output: count:merge(*) -| | | group by: int_col, bool_col -| | | -| | 06:EXCHANGE [HASH(int_col,bool_col)] -| | | -| | 03:AGGREGATE [STREAMING] -| | | output: count(*) -| | | group by: int_col, bool_col -| | | -| | 02:SCAN HDFS [functional.alltypes] -| | partitions=24/24 files=24 size=478.45KB +| 01:SCAN HDFS [functional.alltypes b] +| partitions=24/24 files=24 size=478.45KB +| +04:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: a.bool_col = bool_col, a.int_col = int_col +| runtime filters: RF002 <- bool_col, RF003 <- int_col +| +|--07:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | group by: int_col, bool_col | | -| 08:EXCHANGE [HASH(a.int_col,a.bool_col)] +| 06:EXCHANGE [HASH(int_col,bool_col)] | | -| 00:SCAN HDFS [functional.alltypes a] +| 03:AGGREGATE [STREAMING] +| | output: count(*) +| | group by: int_col, bool_col +| | +| 02:SCAN HDFS [functional.alltypes] | partitions=24/24 files=24 size=478.45KB -| runtime filters: RF002 -> a.bool_col, RF003 -> a.int_col +| runtime filters: RF000 -> functional.alltypes.bool_col, RF001 -> functional.alltypes.int_col | -09:EXCHANGE [HASH(b.int_col,b.bool_col)] +08:EXCHANGE [HASH(a.int_col,a.bool_col)] | -01:SCAN HDFS [functional.alltypes b] +00:SCAN HDFS [functional.alltypes a] partitions=24/24 files=24 size=478.45KB - runtime filters: RF000 -> b.bool_col, RF001 -> b.int_col + runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col, RF002 -> a.bool_col, RF003 -> a.int_col ==== # Tests that all predicates from the On-clause are applied (IMPALA-805) # and that slot equivalences are enforced at lowest possible plan node. @@ -987,20 +987,20 @@ where a.id = c.id and b.int_col = c.int_col and b.int_col = c.id PLAN-ROOT SINK | 04:HASH JOIN [INNER JOIN] -| hash predicates: c.id = a.id -| runtime filters: RF000 <- a.id -| -|--00:SCAN HDFS [functional.alltypestiny a] -| partitions=4/4 files=4 size=460B -| -03:HASH JOIN [INNER JOIN] -| hash predicates: b.int_col = c.int_col -| runtime filters: RF001 <- c.int_col +| hash predicates: a.id = c.id +| runtime filters: RF000 <- c.id | |--02:SCAN HDFS [functional.alltypessmall c] | partitions=4/4 files=4 size=6.32KB | predicates: c.id = c.int_col -| runtime filters: RF000 -> c.id +| +03:HASH JOIN [INNER JOIN] +| hash predicates: b.int_col = a.id +| runtime filters: RF001 <- a.id +| +|--00:SCAN HDFS [functional.alltypestiny a] +| partitions=4/4 files=4 size=460B +| runtime filters: RF000 -> a.id | 01:SCAN HDFS [functional.alltypes b] partitions=24/24 files=24 size=478.45KB http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test index 8147d0c..41b76bc 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test @@ -311,6 +311,7 @@ PLAN-ROOT SINK | |--06:HASH JOIN [INNER JOIN] | | hash predicates: o1.o_orderkey = o2.o_orderkey + 2 +| | fk/pk conjuncts: assumed fk/pk | | mem-estimate=0B mem-reservation=136.00MB | | tuple-ids=1,0,2 row-size=286B cardinality=10 | | @@ -358,6 +359,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9 | |--06:HASH JOIN [INNER JOIN] | | hash predicates: o1.o_orderkey = o2.o_orderkey + 2 +| | fk/pk conjuncts: assumed fk/pk | | mem-estimate=0B mem-reservation=136.00MB | | tuple-ids=1,0,2 row-size=286B cardinality=10 | | http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test b/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test index e49938c..0d527de 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test @@ -14,9 +14,9 @@ PLAN-ROOT SINK | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=80.00MB mem-reservation=0B tuple-ids=0 row-size=263B cardinality=6001215 @@ -34,9 +34,9 @@ PLAN-ROOT SINK | F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=80.00MB mem-reservation=0B tuple-ids=0 row-size=263B cardinality=6001215 @@ -272,9 +272,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 | tuple-ids=1 row-size=16B cardinality=1563438 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=80.00MB mem-reservation=0B tuple-ids=0 row-size=8B cardinality=6001215 @@ -309,9 +309,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6 | tuple-ids=1 row-size=16B cardinality=1563438 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=80.00MB mem-reservation=0B tuple-ids=0 row-size=8B cardinality=6001215 @@ -342,9 +342,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 | tuple-ids=1 row-size=8B cardinality=1 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=0B mem-reservation=0B tuple-ids=0 row-size=0B cardinality=6001215 @@ -372,9 +372,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6 | tuple-ids=1 row-size=8B cardinality=1 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=80.00MB mem-reservation=0B tuple-ids=0 row-size=0B cardinality=6001215 @@ -403,9 +403,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 | tuple-ids=1 row-size=263B cardinality=6001215 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=80.00MB mem-reservation=0B tuple-ids=0 row-size=263B cardinality=6001215 @@ -429,9 +429,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6 | tuple-ids=1 row-size=263B cardinality=6001215 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=80.00MB mem-reservation=0B tuple-ids=0 row-size=263B cardinality=6001215 @@ -462,9 +462,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 | tuple-ids=1 row-size=263B cardinality=100 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=80.00MB mem-reservation=0B tuple-ids=0 row-size=263B cardinality=6001215 @@ -489,9 +489,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6 | tuple-ids=1 row-size=263B cardinality=100 | 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM] - partitions=1/1 files=3 size=193.61MB + partitions=1/1 files=3 size=193.92MB stats-rows=6001215 extrapolated-rows=disabled - table stats: rows=6001215 size=193.61MB + table stats: rows=6001215 size=193.92MB column stats: all mem-estimate=80.00MB mem-reservation=0B tuple-ids=0 row-size=263B cardinality=6001215 @@ -514,6 +514,7 @@ PLAN-ROOT SINK F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: l_orderkey = o_orderkey +| fk/pk conjuncts: l_orderkey = o_orderkey | runtime filters: RF000 <- o_orderkey | mem-estimate=300.41MB mem-reservation=136.00MB | tuple-ids=0,1 row-size=454B cardinality=5757710 @@ -555,6 +556,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6 02:HASH JOIN [INNER JOIN, BROADCAST] | hash-table-id=00 | hash predicates: l_orderkey = o_orderkey +| fk/pk conjuncts: l_orderkey = o_orderkey | runtime filters: RF000 <- o_orderkey | mem-estimate=300.41MB mem-reservation=136.00MB | tuple-ids=0,1 row-size=454B cardinality=5757710 @@ -882,6 +884,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=3 | 06:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: o_custkey = c_custkey +| fk/pk conjuncts: o_custkey = c_custkey | runtime filters: RF001 <- c_custkey | mem-estimate=6.61MB mem-reservation=136.00MB | tuple-ids=2,1,0 row-size=108B cardinality=5757710 @@ -901,6 +904,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=3 | 05:HASH JOIN [INNER JOIN, PARTITIONED] | hash predicates: l_orderkey = o_orderkey +| fk/pk conjuncts: l_orderkey = o_orderkey | runtime filters: RF002 <- o_orderkey | mem-estimate=26.23MB mem-reservation=136.00MB | tuple-ids=2,1 row-size=66B cardinality=5757710 @@ -1011,6 +1015,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=6 06:HASH JOIN [INNER JOIN, BROADCAST] | hash-table-id=01 | hash predicates: o_custkey = c_custkey +| fk/pk conjuncts: o_custkey = c_custkey | runtime filters: RF001 <- c_custkey | mem-estimate=6.61MB mem-reservation=136.00MB | tuple-ids=2,1,0 row-size=108B cardinality=5757710 @@ -1037,6 +1042,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=6 05:HASH JOIN [INNER JOIN, PARTITIONED] | hash-table-id=02 | hash predicates: l_orderkey = o_orderkey +| fk/pk conjuncts: l_orderkey = o_orderkey | runtime filters: RF002 <- o_orderkey | mem-estimate=13.11MB mem-reservation=136.00MB | tuple-ids=2,1 row-size=66B cardinality=5757710 http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test b/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test index 3aa2cba..fb30e09 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test @@ -17,6 +17,7 @@ PLAN-ROOT SINK F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: c_nationkey = n_nationkey +| fk/pk conjuncts: c_nationkey = n_nationkey | runtime filters: RF000 <- n_nationkey | mem-estimate=3.15KB mem-reservation=1.06MB | tuple-ids=0,1 row-size=355B cardinality=150000 @@ -58,6 +59,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=2 02:HASH JOIN [INNER JOIN, BROADCAST] | hash-table-id=00 | hash predicates: c_nationkey = n_nationkey +| fk/pk conjuncts: c_nationkey = n_nationkey | runtime filters: RF000 <- n_nationkey | mem-estimate=3.15KB mem-reservation=1.06MB | tuple-ids=0,1 row-size=355B cardinality=150000 @@ -109,6 +111,7 @@ PLAN-ROOT SINK F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: l_orderkey = o_orderkey +| fk/pk conjuncts: l_orderkey = o_orderkey | mem-estimate=300.41MB mem-reservation=136.00MB | tuple-ids=0,1N row-size=454B cardinality=6001215 | @@ -148,6 +151,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash-table-id=00 | hash predicates: l_orderkey = o_orderkey +| fk/pk conjuncts: l_orderkey = o_orderkey | mem-estimate=300.41MB mem-reservation=136.00MB | tuple-ids=0,1N row-size=454B cardinality=6001215 | @@ -197,6 +201,7 @@ PLAN-ROOT SINK F02:PLAN FRAGMENT [HASH(o_custkey)] hosts=2 instances=2 02:HASH JOIN [INNER JOIN, PARTITIONED] | hash predicates: o_custkey = c_custkey +| fk/pk conjuncts: o_custkey = c_custkey | runtime filters: RF000 <- c_custkey | mem-estimate=18.69MB mem-reservation=34.00MB | tuple-ids=0,1 row-size=428B cardinality=1500000 @@ -243,6 +248,7 @@ F02:PLAN FRAGMENT [HASH(o_custkey)] hosts=2 instances=4 02:HASH JOIN [INNER JOIN, PARTITIONED] | hash-table-id=00 | hash predicates: o_custkey = c_custkey +| fk/pk conjuncts: o_custkey = c_custkey | runtime filters: RF000 <- c_custkey | mem-estimate=9.35MB mem-reservation=17.00MB | tuple-ids=0,1 row-size=428B cardinality=1500000 @@ -299,6 +305,7 @@ PLAN-ROOT SINK F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=2 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: o_custkey = c_custkey +| fk/pk conjuncts: o_custkey = c_custkey | runtime filters: RF000 <- c_custkey | mem-estimate=37.38MB mem-reservation=68.00MB | tuple-ids=0,1 row-size=428B cardinality=1500000 @@ -340,6 +347,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=4 02:HASH JOIN [INNER JOIN, BROADCAST] | hash-table-id=00 | hash predicates: o_custkey = c_custkey +| fk/pk conjuncts: o_custkey = c_custkey | runtime filters: RF000 <- c_custkey | mem-estimate=37.38MB mem-reservation=68.00MB | tuple-ids=0,1 row-size=428B cardinality=1500000 @@ -393,6 +401,7 @@ PLAN-ROOT SINK F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash predicates: alltypes.id = alltypestiny.id +| fk/pk conjuncts: assumed fk/pk | mem-estimate=2.00GB mem-reservation=136.00MB | tuple-ids=0,1N row-size=176B cardinality=unavailable | @@ -434,6 +443,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST] | hash-table-id=00 | hash predicates: alltypes.id = alltypestiny.id +| fk/pk conjuncts: assumed fk/pk | mem-estimate=2.00GB mem-reservation=136.00MB | tuple-ids=0,1N row-size=176B cardinality=unavailable | @@ -582,6 +592,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=3 | 02:HASH JOIN [INNER JOIN, PARTITIONED] | hash predicates: l_orderkey = o_orderkey +| fk/pk conjuncts: l_orderkey = o_orderkey | runtime filters: RF000 <- o_orderkey | mem-estimate=13.11MB mem-reservation=17.00MB | tuple-ids=0,1 row-size=33B cardinality=5757710 @@ -646,6 +657,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=6 02:HASH JOIN [INNER JOIN, PARTITIONED] | hash-table-id=00 | hash predicates: l_orderkey = o_orderkey +| fk/pk conjuncts: l_orderkey = o_orderkey | runtime filters: RF000 <- o_orderkey | mem-estimate=6.56MB mem-reservation=8.50MB | tuple-ids=0,1 row-size=33B cardinality=5757710 http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test index 6712200..fc8778e 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test @@ -6,7 +6,7 @@ PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | 00:SCAN HDFS [functional.alltypes] - partitions=3/24 files=3 size=60.02KB + partitions=3/24 files=3 size=60.68KB stats-rows=7300 extrapolated-rows=disabled table stats: rows=7300 size=478.45KB column stats: all @@ -21,7 +21,7 @@ PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | 00:SCAN HDFS [functional.alltypes] - partitions=12/24 files=12 size=239.44KB + partitions=12/24 files=12 size=239.26KB stats-rows=7300 extrapolated-rows=disabled table stats: rows=7300 size=478.45KB column stats: all @@ -38,7 +38,7 @@ PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | 00:SCAN HDFS [functional.alltypes] - partitions=12/24 files=12 size=239.44KB + partitions=12/24 files=12 size=239.26KB predicates: id < 10 stats-rows=7300 extrapolated-rows=disabled table stats: rows=7300 size=478.45KB @@ -56,7 +56,7 @@ PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | 00:SCAN HDFS [functional.alltypes] - partitions=6/24 files=6 size=120.46KB + partitions=6/24 files=6 size=119.70KB stats-rows=3650 extrapolated-rows=disabled table stats: rows=7300 size=478.45KB column stats: all @@ -132,7 +132,7 @@ PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | 00:SCAN HDFS [functional_parquet.alltypes] - partitions=3/24 files=3 size=22.05KB + partitions=3/24 files=3 size=22.53KB stats-rows=unavailable extrapolated-rows=disabled table stats: rows=unavailable size=unavailable column stats: unavailable @@ -155,7 +155,7 @@ PLAN-ROOT SINK | tuple-ids=0 row-size=4B cardinality=10 | |--01:SCAN HDFS [functional.alltypessmall t2] -| partitions=1/4 files=1 size=1.58KB +| partitions=1/4 files=1 size=1.57KB | stats-rows=100 extrapolated-rows=disabled | table stats: rows=100 size=6.32KB | column stats: all @@ -180,7 +180,7 @@ PLAN-ROOT SINK | mem-estimate=0B mem-reservation=0B | 00:SCAN HDFS [functional.alltypes] - partitions=3/24 files=3 size=60.02KB + partitions=3/24 files=3 size=60.68KB stats-rows=7300 extrapolated-rows=disabled table stats: rows=7300 size=478.45KB column stats: all
