Repository: trafodion Updated Branches: refs/heads/master b06e0dc5d -> 3e7ad94f7
[TRAFODION-3031] Fix two issues with nested subquery plans Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/ee91b337 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/ee91b337 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/ee91b337 Branch: refs/heads/master Commit: ee91b337ae1d23ad2b15034602e06c4d3bf1103d Parents: 1e100f4 Author: Dave Birdsall <[email protected]> Authored: Thu Apr 19 22:59:16 2018 +0000 Committer: Dave Birdsall <[email protected]> Committed: Thu Apr 19 22:59:16 2018 +0000 ---------------------------------------------------------------------- core/sql/optimizer/NormRelExpr.cpp | 42 ++++++++++++++++++++++++-------- core/sql/optimizer/RelExpr.cpp | 6 +++-- core/sql/sqlcomp/DefaultConstants.h | 3 +++ core/sql/sqlcomp/nadefaults.cpp | 6 +++++ 4 files changed, 45 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/ee91b337/core/sql/optimizer/NormRelExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/NormRelExpr.cpp b/core/sql/optimizer/NormRelExpr.cpp index f574b17..89504a2 100644 --- a/core/sql/optimizer/NormRelExpr.cpp +++ b/core/sql/optimizer/NormRelExpr.cpp @@ -2767,25 +2767,26 @@ Here t2.a is a unique key of table t2. The following transformation is made Semi Join {pred : t1.b = t2.a} Join {pred : t1.b = t2.a} / \ -------> / \ - / \ / \ -Scan t1 Scan t2 Scan t1 Scan t2 + / \ / \ + Scan t1 Scan t2 Scan t1 Scan t2 b) If the right child is not unique in the joining column then we transform the semijoin into an inner join followed by a groupby as the join's right child. This transformation is enabled by default -only if the right side is an IN list, otherwise a CQD has to be used. +only if the right side is an IN list or if the groupby's reduction +ratio is greater than 5.0, otherwise a CQD has to be used. select t1.a from t1 where t1.b in (1,2,3,4,...,101) ; - Semi Join {pred : t1.b = t2.a} Join {pred : t1.b = InList.col} + Semi Join {pred : t1.b = InList.col} Join {pred : t1.b = InList.col} / \ -------> / \ / \ / \ -Scan t1 Scan t2 Scan t1 GroupBy {group cols: InList.col} +Scan t1 TupleList Scan t1 GroupBy {group cols: InList.col} | | TupleList @@ -2826,18 +2827,39 @@ RelExpr* Join::transformSemiJoin(NormWA& normWARef) /* Apply the transformation described in item b) above. The transformation below is done if there are no non-equijoin preds either - and the inner side has no base tables (i.e. is an IN LIST) or if we have - used a CQD to turn this transformation on for a specific user. For the general - case we are not certain if this transformation is always beneficial, so it is - not on by default */ + and the inner side has no base tables (i.e. is an IN LIST) OR if the groupby + is expected to provide a reduction > SEMIJOIN_TO_INNERJOIN_REDUCTION_RATIO + (default is 5.0) OR the inner row count is small OR if we have used a CQD to + turn this transformation on. Some rationale: A data reduction might reduce + the amount of data for the inner table of a hash join (or it might not! + hash-semi-join sometimes does duplicate elimination itself, but not always). + Converting to a join allows the join to be commuted; if the number of rows + is small, nested join might be profitably chosen in that case. */ ValueIdSet preds ; preds += joinPred(); preds += selectionPred(); preds -= getEquiJoinPredicates() ; + EstLogPropSharedPtr innerEstLogProp = child(1)->getGroupAttr()->outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP)); + CostScalar innerRowCount = innerEstLogProp->getResultCardinality(); + CostScalar innerUec = innerEstLogProp->getAggregateUec(equiJoinCols1); + NABoolean haveSignificantReduction = FALSE; + CostScalar reductionThreshold = + ((ActiveSchemaDB()->getDefaults()).getAsDouble(SEMIJOIN_TO_INNERJOIN_REDUCTION_RATIO)); + NABoolean noInnerStats = innerEstLogProp->getColStats().containsAtLeastOneFake(); + // have a valid value of uec, have something other than default + // cardinality and satisfy reduction requirement. + if ((innerUec > 0) && (!noInnerStats) && + (innerRowCount/innerUec > reductionThreshold)) + haveSignificantReduction = TRUE; + CostScalar innerAllowance = + ((ActiveSchemaDB()->getDefaults()).getAsDouble(SEMIJOIN_TO_INNERJOIN_INNER_ALLOWANCE)); + if (preds.isEmpty() && - ((child(1)->getGroupAttr()->getNumBaseTables() == 0) || + ((child(1)->getGroupAttr()->getNumBaseTables() == 0) || + haveSignificantReduction || + (innerRowCount < innerAllowance) || (CmpCommon::getDefault(SEMIJOIN_TO_INNERJOIN_TRANSFORMATION) == DF_ON))) { CollHeap *stmtHeap = CmpCommon::statementHeap() ; http://git-wip-us.apache.org/repos/asf/trafodion/blob/ee91b337/core/sql/optimizer/RelExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelExpr.cpp b/core/sql/optimizer/RelExpr.cpp index 5096b8b..38f3d12 100644 --- a/core/sql/optimizer/RelExpr.cpp +++ b/core/sql/optimizer/RelExpr.cpp @@ -8792,10 +8792,12 @@ void Scan::addIndexInfo() for (CollIndex i = 0; i < possibleIndexJoins_.entries(); i++) { NABoolean isASupersetIndex = - possibleIndexJoins_[i]->outputsFromIndex_.contains(newOutputsFromIndex); + possibleIndexJoins_[i]->outputsFromIndex_.contains(newOutputsFromIndex) && + possibleIndexJoins_[i]->indexPredicates_.contains(newIndexPredicates); NABoolean isASubsetIndex = - newOutputsFromIndex.contains(possibleIndexJoins_[i]->outputsFromIndex_) ; + newOutputsFromIndex.contains(possibleIndexJoins_[i]->outputsFromIndex_) && + newIndexPredicates.contains(possibleIndexJoins_[i]->indexPredicates_); NABoolean isASuperOrSubsetIndex = isASupersetIndex || isASubsetIndex; http://git-wip-us.apache.org/repos/asf/trafodion/blob/ee91b337/core/sql/sqlcomp/DefaultConstants.h ---------------------------------------------------------------------- diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h index fd110de..2d31275 100644 --- a/core/sql/sqlcomp/DefaultConstants.h +++ b/core/sql/sqlcomp/DefaultConstants.h @@ -1965,7 +1965,10 @@ enum DefaultConstants USTAT_FETCHCOUNT_ACTIVE, + SEMIJOIN_TO_INNERJOIN_INNER_ALLOWANCE, + SEMIJOIN_TO_INNERJOIN_REDUCTION_RATIO, SEMIJOIN_TO_INNERJOIN_TRANSFORMATION, + POS_NUM_DISK_POOLS, POS_DISKS_IN_SEGMENT, http://git-wip-us.apache.org/repos/asf/trafodion/blob/ee91b337/core/sql/sqlcomp/nadefaults.cpp ---------------------------------------------------------------------- diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp index 244a2bc..a93cdb7 100644 --- a/core/sql/sqlcomp/nadefaults.cpp +++ b/core/sql/sqlcomp/nadefaults.cpp @@ -2702,6 +2702,12 @@ SDDflt0_(QUERY_CACHE_SELECTIVITY_TOLERANCE, "0"), // SQ_SEAMONSTER which will have a value of 0 or 1. DDkwd__(SEAMONSTER, "SYSTEM"), + // If the inner table of a semi-join has fewer rows than this, + // we'll allow it to be transformed to a join. + DDflt1_(SEMIJOIN_TO_INNERJOIN_INNER_ALLOWANCE, "100.0"), + // Ratio of right child cardinality to uec above which semijoin + // trans. is favored. + DDflt1_(SEMIJOIN_TO_INNERJOIN_REDUCTION_RATIO, "5.0"), SDDkwd__(SEMIJOIN_TO_INNERJOIN_TRANSFORMATION, "SYSTEM"), // Disallow/Allow semi and anti-semi joins in MultiJoin framework DDkwd__(SEMI_JOINS_SPOIL_JBB, "OFF"),
