http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/RelExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelExpr.cpp b/core/sql/optimizer/RelExpr.cpp index 0bd7fdf..b1998f8 100644 --- a/core/sql/optimizer/RelExpr.cpp +++ b/core/sql/optimizer/RelExpr.cpp @@ -8181,10 +8181,9 @@ void Scan::getPotentialOutputValuesAsVEGs(ValueIdSet& outputs) const { outputs.clear(); ValueIdSet tempSet ; - ValueIdList tempList ; + getPotentialOutputValues(tempSet); - getTableDesc()->getEquivVEGCols(tempSet, tempList); - outputs = tempList ; + getTableDesc()->getEquivVEGCols(tempSet, outputs); } @@ -8283,8 +8282,8 @@ RelExpr * Scan::copyTopNode(RelExpr *derivedNode, CollHeap* outHeap) result->setExtraOutputColumns(getExtraOutputColumns()); result->isRewrittenMV_ = isRewrittenMV_; result->matchingMVs_ = matchingMVs_; - result->hbaseAccessOptions_ = hbaseAccessOptions_; + result->commonSubExpr_ = commonSubExpr_; // don't copy values that can be calculated by addIndexInfo() // (could be done, but we are lazy and just call addIndexInfo() again) @@ -11967,6 +11966,39 @@ void MapValueIds::pushdownCoveredExpr( translatedOutputs); requiredValues += translatedOutputs; + if (cseRef_) + { + // If this MapValueIds node represents a common subexpression, + // then don't try to push predicates again that already have + // been pushed down before. VEGPredicates may not be pushable + // at all to the rewritten child, and other predicates might + // be duplicated with different ValueIds for the internal + // operators such as "=", "+", ">". + predicatesOnParent -= cseRef_->getPushedPredicates(); + + // Also, don't push down VEGPredicates on columns that are + // characteristic outputs of the MapValueIds. Those predicates + // (or their equivalents) should have already been pushed down. + for (ValueId g=predicatesOnParent.init(); + predicatesOnParent.next(g); + predicatesOnParent.advance(g)) + if (g.getItemExpr()->getOperatorType() == ITM_VEG_PREDICATE) + { + VEG *veg = + static_cast<VEGPredicate *>(g.getItemExpr())->getVEG(); + ValueIdSet vegMembers(veg->getAllValues()); + + vegMembers += veg->getVEGReference()->getValueId(); + vegMembers.intersectSet( + getGroupAttr()->getCharacteristicOutputs()); + if (!vegMembers.isEmpty()) + // a VEGPred on one of my characteristic outputs, + // assume that my child tree already has the + // associated VEGPreds + predicatesOnParent -= g; + } + } + // rewrite the predicates so they can be applied in the child node getMap().rewriteValueIdSetDown(predicatesOnParent,predsRewrittenForChild); @@ -12052,7 +12084,7 @@ NABoolean MapValueIds::duplicateMatch(const RelExpr & other) const if (includesFavoriteMV_ != o.includesFavoriteMV_) return FALSE; - if (usedByMvqr_ != o.usedByMvqr_) + if (cseRef_ != o.cseRef_) return FALSE; if (map_ != o.map_) @@ -12071,7 +12103,7 @@ RelExpr * MapValueIds::copyTopNode(RelExpr *derivedNode, CollHeap* outHeap) result = static_cast<MapValueIds*>(derivedNode); result->includesFavoriteMV_ = includesFavoriteMV_; - result->usedByMvqr_ = usedByMvqr_; + result->cseRef_ = cseRef_; return RelExpr::copyTopNode(result, outHeap); } @@ -12200,6 +12232,293 @@ void ControlRunningQuery::setComment(NAString &comment) } // ----------------------------------------------------------------------- +// member functions for class CSEInfo (helper for CommonSubExprRef) +// ----------------------------------------------------------------------- + +CSEInfo::CSEAnalysisOutcome CSEInfo::getAnalysisOutcome(Int32 id) const +{ + if (idOfAnalyzingConsumer_ != id && + analysisOutcome_ == CREATE_TEMP) + // only the analyzing consumer creates and reads the temp, the + // others only read it + return TEMP; + else + return analysisOutcome_; +} + +void CSEInfo::addChildCSE(CSEInfo *child) +{ + if (!childCSEs_.contains(child)) + childCSEs_.insert(child); +} + +void CSEInfo::addCSERef(CommonSubExprRef *cse) +{ + CMPASSERT(name_ == cse->getName()); + cse->setId(consumers_.entries()); + consumers_.insert(cse); +} + +// ----------------------------------------------------------------------- +// member functions for class CommonSubExprRef +// ----------------------------------------------------------------------- + +CommonSubExprRef::~CommonSubExprRef() +{ +} + +Int32 CommonSubExprRef::getArity() const +{ + // always return 1 for now, that may change in the future + return 1; +} + +void CommonSubExprRef::addToCmpStatement() +{ + NABoolean alreadySeen = TRUE; + + // look up whether a CSE with this name already exists + CSEInfo *info = CmpCommon::statement()->getCSEInfo(internalName_); + + if (!info) + { + // make a new object to hold a list of all references + // to this CSE (the first one of them will be "this") + info = new(CmpCommon::statementHeap()) + CSEInfo(internalName_, + CmpCommon::statementHeap()); + alreadySeen = FALSE; + } + + info->addCSERef(this); + + if (!alreadySeen) + CmpCommon::statement()->addCSEInfo(info); +} + +NABoolean CommonSubExprRef::isFirstReference() +{ + return ( + // this is the first reference added, or + id_ == 0 || + // this is not yet added an no other reference has been added yet + id_ < 0 && CmpCommon::statement()->getCSEInfo(internalName_) == NULL); +} + +void CommonSubExprRef::addLocalExpr(LIST(ExprNode *) &xlist, + LIST(NAString) &llist) const +{ + if (NOT columnList_.isEmpty()) + { + xlist.insert(columnList_.rebuildExprTree(ITM_ITEM_LIST)); + llist.insert("column_list"); + } + + if(NOT pushedPredicates_.isEmpty()) + { + xlist.insert(pushedPredicates_.rebuildExprTree()); + llist.insert("pushed_predicates"); + } +} + +HashValue CommonSubExprRef::topHash() +{ + HashValue result = RelExpr::topHash(); + + result ^= internalName_; + result ^= id_; + result ^= columnList_; + result ^= pushedPredicates_; + + return result; +} + +NABoolean CommonSubExprRef::duplicateMatch(const RelExpr & other) const +{ + if (NOT RelExpr::duplicateMatch(other)) + return FALSE; + + const CommonSubExprRef &o = static_cast<const CommonSubExprRef &>(other); + + return (internalName_ == o.internalName_ && + id_ == o.id_ && + columnList_ == o.columnList_ && + pushedPredicates_ == o.pushedPredicates_); +} + +RelExpr * CommonSubExprRef::copyTopNode(RelExpr *derivedNode, + CollHeap* outHeap) +{ + CommonSubExprRef *result = NULL; + + if (derivedNode == NULL) + result = new (outHeap) CommonSubExprRef(NULL, + internalName_.data(), + outHeap); + else + result = static_cast<CommonSubExprRef *>(derivedNode); + + if (nodeIsBound()) + { + // if the node is bound, we assume that the copy is serving the same function + // as the original + result->setId(id_); + result->columnList_ = columnList_; + result->pushedPredicates_ = pushedPredicates_; + } + else + // if the node is not bound, we assume that we created a new + // reference to a common subexpression, for example by referencing + // a CTE that itself contains another reference to a CTE (Common + // Table Expression) + result->addToCmpStatement(); + + return result; +} + +const NAString CommonSubExprRef::getText() const +{ + NAString result("cse "); + char buf[20]; + + result += ToAnsiIdentifier(internalName_); + + snprintf(buf, sizeof(buf), " %d", id_); + result += buf; + + return result; +} + +Union * CommonSubExprRef::makeUnion(RelExpr *lc, + RelExpr *rc, + NABoolean blocked) +{ + // Make a regular or blocked union with no characteristic outputs + Union *result; + ValueIdSet newInputs(lc->getGroupAttr()->getCharacteristicInputs()); + + result = new(CmpCommon::statementHeap()) Union(lc, rc); + + newInputs += rc->getGroupAttr()->getCharacteristicInputs(); + + result->setGroupAttr(new (CmpCommon::statementHeap()) GroupAttributes()); + result->getGroupAttr()->addCharacteristicInputs(newInputs); + + if(blocked) + result->setBlockedUnion(); + + return result; +} + +void CommonSubExprRef::display() +{ + printf("Original columns:\n"); + columnList_.display(); + printf("\nCommon inputs:\n"); + commonInputs_.display(); + printf("\nPushed predicates:\n"); + pushedPredicates_.display(); +} + +void CommonSubExprRef::displayAll(const char *optionalId) +{ + const LIST(CSEInfo *) *cses = CmpCommon::statement()->getCSEInfoList(); + + if (cses) + for (CollIndex i=0; i<cses->entries(); i++) + if (!optionalId || + strlen(optionalId) == 0 || + cses->at(i)->getName() == optionalId) + { + CSEInfo *info = cses->at(i); + CollIndex nc = info->getNumConsumers(); + + printf("==========================\n"); + printf("CSE: %s (%d consumers)\n", + info->getName().data(), + nc); + + const LIST(CSEInfo *) &children(info->getChildCSEs()); + + for (CollIndex j=0; j<children.entries(); j++) + printf(" references CSE: %s\n", children[j]->getName().data()); + + if (info->getIdOfAnalyzingConsumer() >= 0) + { + const char *outcome = "?"; + ValueIdList cols; + CommonSubExprRef *consumer = + info->getConsumer(info->getIdOfAnalyzingConsumer()); + const ValueIdList &cCols(consumer->getColumnList()); + + switch (info->getAnalysisOutcome(0)) + { + case CSEInfo::UNKNOWN_ANALYSIS: + outcome = "UNKNOWN"; + break; + case CSEInfo::EXPAND: + outcome = "EXPAND"; + break; + case CSEInfo::CREATE_TEMP: + outcome = "CREATE_TEMP"; + break; + case CSEInfo::TEMP: + outcome = "TEMP"; + break; + case CSEInfo::ERROR: + outcome = "ERROR"; + break; + default: + outcome = "???"; + break; + } + + printf(" analyzed by consumer %d, outcome: %s\n", + info->getIdOfAnalyzingConsumer(), + outcome); + + makeValueIdListFromBitVector(cols, + cCols, + info->getNeededColumns()); + printf(" \ncolumns of temp table:\n"); + cols.display(); + printf(" \ncommonPredicates:\n"); + info->getCommonPredicates().display(); + if (info->getVEGRefsWithDifferingConstants().entries() > 0) + { + printf(" \nvegRefsWithDifferingConstants:\n"); + info->getVEGRefsWithDifferingConstants().display(); + } + if (info->getVEGRefsWithDifferingInputs().entries() > 0) + { + printf(" \nvegRefsWithDifferingInputs:\n"); + info->getVEGRefsWithDifferingInputs().display(); + } + printf(" \ntempTableName: %s\n", + info->getTempTableName(). + getQualifiedNameAsAnsiString().data()); + printf(" \nDDL of temp table:\n%s\n", + info->getTempTableDDL().data()); + } + + for (int c=0; c<nc; c++) + { + printf("\n----- Consumer %d:\n", c); + info->getConsumer(c)->display(); + } + } +} + +void CommonSubExprRef::makeValueIdListFromBitVector(ValueIdList &tgt, + const ValueIdList &src, + const NABitVector &vec) +{ + for (CollIndex b=0; vec.nextUsed(b); b++) + tgt.insert(src[b]); +} + + +// ----------------------------------------------------------------------- // member functions for class GenericUpdate // -----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/RelExpr.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelExpr.h b/core/sql/optimizer/RelExpr.h index 443c8f4..545ee2c 100644 --- a/core/sql/optimizer/RelExpr.h +++ b/core/sql/optimizer/RelExpr.h @@ -96,6 +96,7 @@ class MVInfoForDDL; class PlanPriority; class Hint; class TableMappingUDF; +class CSEInfo; //////////////////// class CANodeIdSet; @@ -585,6 +586,49 @@ public: // and flows CompRefOpt constraints up the query tree. virtual void processCompRefOptConstraints(NormWA * normWAPtr) ; + // Used during SQO to prepare the child tree of a CommonSubExprRef + // for sharing between multiple consumers. This basically undoes + // some of the normalizations, like eliminating unneeded outputs + // and pushing predicates down. This is a recursive tree walk + // method, most nodes don't override this one. + // Returns TRUE for success, FALSE if it was unable to prepare + // (diags will be set). + // Tree remains unchanged if testRun is TRUE. + virtual NABoolean prepareTreeForCSESharing( + const ValueIdSet &outputsToAdd, + const ValueIdSet &predicatesToRemove, + const ValueIdSet &commonPredicatesToAdd, + const ValueIdSet &inputsToRemove, + CSEInfo *info, + NABoolean testRun); + + // A virtual method called on every node from prepareTreeForCSESharing(), + // use this to do the actual work of removing predicates other than + // selection predicates, and to indicate that the node supports this + // method (the default implementation returns FALSE). + // + // This method needs to + // - make sure no changes are done to the node when testRun is TRUE + // - add any required new outputs to the char. outputs, unless they + // are produced by a child + // - make sure no local expression reference any of the inputs to + // be removed + // - indicate in its return value whether it was successful doing so + // + // This method doesn't need to take care of the following, since the + // caller of this method already does it: + // - removing predicates from selectionPred() + // - adding new common predicates to selectionPred(), unless + // they are covered by the children's group attributes + // - adding required outputs that are produced by children + // - removing char. inputs that are requested to be removed + virtual NABoolean prepareMeForCSESharing( + const ValueIdSet &outputsToAdd, + const ValueIdSet &predicatesToRemove, + const ValueIdSet &commonPredicatesToAdd, + const ValueIdSet &inputsToRemove, + CSEInfo *info, + NABoolean testRun); // -------------------------------------------------------------------- // Create a query execution plan. http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/RelFastTransport.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelFastTransport.cpp b/core/sql/optimizer/RelFastTransport.cpp index 4734d2a..8773b5c 100644 --- a/core/sql/optimizer/RelFastTransport.cpp +++ b/core/sql/optimizer/RelFastTransport.cpp @@ -36,6 +36,7 @@ #include "CostMethod.h" #include "AllRelExpr.h" #include "Globals.h" +#include "HDFSHook.h" // ----------------------------------------------------------------------- // methods for class RelFastExtract @@ -61,6 +62,7 @@ FastExtract::FastExtract(const FastExtract & other) selectList_ = other.selectList_; isSequenceFile_ = other.isSequenceFile_; overwriteHiveTable_ = other.overwriteHiveTable_; + isMainQueryOperator_ = other.isMainQueryOperator_; } //! FastExtract::~FastExtract Destructor @@ -69,6 +71,118 @@ FastExtract::~FastExtract() } +RelExpr *FastExtract::makeFastExtractTree( + TableDesc *tableDesc, + RelExpr *child, + NABoolean overwriteTable, + NABoolean calledFromBinder, + NABoolean tempTableForCSE, + BindWA *bindWA) +{ + RelExpr *result = NULL; + const HHDFSTableStats* hTabStats = + tableDesc->getNATable()->getClusteringIndex()->getHHDFSTableStats(); + + const char * hiveTablePath; + NAString hostName; + Int32 hdfsPort; + NAString tableDir; + + char fldSep[2]; + char recSep[2]; + memset(fldSep,'\0',2); + memset(recSep,'\0',2); + fldSep[0] = hTabStats->getFieldTerminator(); + recSep[0] = hTabStats->getRecordTerminator(); + + // don't rely on timeouts to invalidate the HDFS stats for the target table, + // make sure that we invalidate them right after compiling this statement, + // at least for this process + ((NATable*)(tableDesc->getNATable()))->setClearHDFSStatsAfterStmt(TRUE); + + // inserting into tables with multiple partitions is not yet supported + CMPASSERT(hTabStats->entries() == 1); + hiveTablePath = (*hTabStats)[0]->getDirName(); + NABoolean splitSuccess = TableDesc::splitHiveLocation( + hiveTablePath, + hostName, + hdfsPort, + tableDir, + CmpCommon::diags(), + hTabStats->getPortOverride()); + + if (!splitSuccess) { + *CmpCommon::diags() << DgSqlCode(-4224) + << DgString0(hiveTablePath); + bindWA->setErrStatus(); + return NULL; + } + + const NABoolean isSequenceFile = hTabStats->isSequenceFile(); + + FastExtract * unloadRelExpr = + new (bindWA->wHeap()) FastExtract( + child, + new (bindWA->wHeap()) NAString(hiveTablePath, bindWA->wHeap()), + new (bindWA->wHeap()) NAString(hostName, bindWA->wHeap()), + hdfsPort, + tableDesc, + new (bindWA->wHeap()) NAString( + tableDesc->getCorrNameObj().getQualifiedNameObj().getObjectName(), + bindWA->wHeap()), + FastExtract::FILE, + bindWA->wHeap()); + unloadRelExpr->setRecordSeparator(recSep); + unloadRelExpr->setDelimiter(fldSep); + unloadRelExpr->setOverwriteHiveTable(overwriteTable); + unloadRelExpr->setSequenceFile(isSequenceFile); + unloadRelExpr->setIsMainQueryOperator(calledFromBinder); + result = unloadRelExpr; + + if (overwriteTable) + { + ExeUtilHiveTruncate *trunc = new (bindWA->wHeap()) + ExeUtilHiveTruncate(tableDesc->getCorrNameObj(), + NULL, + bindWA->wHeap()); + RelExpr * newRelExpr = trunc; + + if (tempTableForCSE) + { + trunc->setSuppressModCheck(); + + // This table gets created at compile time, unlike most + // other tables. It gets dropped when the statement is + // deallocated. Note that there are three problems: + // a) Statement gets never executed + // b) Process exits before deallocating the statement + // c) Statement gets deallocated, then gets executed again + // + // Todo: CSE: Handle these issues. + // Cases a) and b) are handled like volatile tables, there + // is a cleanup mechanism. + // Case c) gets handled by AQR. + trunc->setDropTableOnDealloc(); + } + + if (calledFromBinder) + //new root to prevent error 4056 when binding + newRelExpr = new (bindWA->wHeap()) RelRoot(newRelExpr); + else + // this node must be bound, even outside the binder, + // to set some values + newRelExpr = newRelExpr->bindNode(bindWA); + + Union *blockedUnion = new (bindWA->wHeap()) Union(newRelExpr, result); + + blockedUnion->setBlockedUnion(); + blockedUnion->setSerialUnion(); + result = blockedUnion; + } + + return result; +} + //! FastExtract::copyTopNode method RelExpr * FastExtract::copyTopNode(RelExpr *derivedNode, CollHeap* outHeap) @@ -99,6 +213,7 @@ RelExpr * FastExtract::copyTopNode(RelExpr *derivedNode, result->selectList_ = selectList_; result->isSequenceFile_ = isSequenceFile_; result->overwriteHiveTable_ = overwriteHiveTable_; + result->isMainQueryOperator_ = isMainQueryOperator_; return RelExpr::copyTopNode(result, outHeap); } http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/RelFastTransport.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelFastTransport.h b/core/sql/optimizer/RelFastTransport.h index 29be039..4a4acb7 100644 --- a/core/sql/optimizer/RelFastTransport.h +++ b/core/sql/optimizer/RelFastTransport.h @@ -115,7 +115,8 @@ public : recordSeparator_(*recordSep, oHeap), overwriteHiveTable_(FALSE), isSequenceFile_(FALSE), - nullStringSpec_((nullString ? TRUE : FALSE)) + nullStringSpec_((nullString ? TRUE : FALSE)), + isMainQueryOperator_(TRUE) { }; @@ -138,7 +139,8 @@ public : recordSeparator_(oHeap), overwriteHiveTable_(FALSE), isSequenceFile_(FALSE), - nullStringSpec_(FALSE) + nullStringSpec_(FALSE), + isMainQueryOperator_(TRUE) { }; @@ -159,7 +161,8 @@ public : recordSeparator_(oHeap), overwriteHiveTable_(FALSE), isSequenceFile_(FALSE), - nullStringSpec_(FALSE) + nullStringSpec_(FALSE), + isMainQueryOperator_(TRUE) { }; FastExtract(RelExpr* child, @@ -186,7 +189,8 @@ public : recordSeparator_(oHeap), overwriteHiveTable_(FALSE), isSequenceFile_(FALSE), - nullStringSpec_(FALSE) + nullStringSpec_(FALSE), + isMainQueryOperator_(TRUE) { }; //! FastExtract Copy Constructor @@ -194,6 +198,16 @@ public : //! ~FastExtract destructor virtual ~FastExtract(); + + // make a FastExtract from a TableDesc + static RelExpr * makeFastExtractTree( + TableDesc *tableDesc, + RelExpr *child, + NABoolean overwriteTable, + NABoolean calledFromBinder, + NABoolean tempTableForCSE, + BindWA *bindWA); + //! copyTopNode method // a virtual function used to copy most of a Node virtual RelExpr * copyTopNode(RelExpr *derivedNode = NULL, @@ -313,6 +327,16 @@ public : isSequenceFile_ = sf; } + void setIsMainQueryOperator(NABoolean m) + { + isMainQueryOperator_ = m; + } + + NABoolean getIsMainQueryOperator() const + { + return isMainQueryOperator_; + } + private: @@ -333,6 +357,7 @@ private: NAString hiveTableName_; NABoolean overwriteHiveTable_; NABoolean isSequenceFile_; + NABoolean isMainQueryOperator_; }; // class FastExtract http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/RelGrby.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelGrby.h b/core/sql/optimizer/RelGrby.h index 72717fb..8595116 100644 --- a/core/sql/optimizer/RelGrby.h +++ b/core/sql/optimizer/RelGrby.h @@ -212,6 +212,14 @@ public: virtual void eliminateCascadedGroupBy(NormWA &normWaRef); + virtual NABoolean prepareMeForCSESharing( + const ValueIdSet &outputsToAdd, + const ValueIdSet &predicatesToRemove, + const ValueIdSet &commonPredicatesToAdd, + const ValueIdSet &inputsToRemove, + CSEInfo *info, + NABoolean testRun); + // flows compRefOpt constraints up the query tree. virtual void processCompRefOptConstraints(NormWA * normWAPtr) ; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/RelJoin.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelJoin.h b/core/sql/optimizer/RelJoin.h index 086f7d0..2c66b31 100644 --- a/core/sql/optimizer/RelJoin.h +++ b/core/sql/optimizer/RelJoin.h @@ -787,6 +787,14 @@ public: NABoolean applyInnerKeyedAccessHeuristic(const GroupByAgg* newGrby, NormWA & normWARef); + virtual NABoolean prepareMeForCSESharing( + const ValueIdSet &outputsToAdd, + const ValueIdSet &predicatesToRemove, + const ValueIdSet &commonPredicatesToAdd, + const ValueIdSet &inputsToRemove, + CSEInfo *info, + NABoolean testRun); + // Detect whether rows coming from the ith child contain multi-column skew for // a set of join predicates. The output argument vidOfEquiJoinWithSkew is the // valueId of a particular join predicate chosen by the method for use by the http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/RelMisc.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelMisc.h b/core/sql/optimizer/RelMisc.h index 619e3db..46eb133 100644 --- a/core/sql/optimizer/RelMisc.h +++ b/core/sql/optimizer/RelMisc.h @@ -47,6 +47,7 @@ #include "LateBindInfo.h" #include "SequenceGeneratorAttributes.h" #include "ComSecurityKey.h" +#include "CmpStatement.h" class TrafDesc; @@ -68,7 +69,13 @@ class PhyPack; class FirstN; class ExtractSource; class SequenceGenerator; -class NextValueFor; +class Rowset; +class RowsetRowwise; +class RowsetInto; +class RowsetFor; +class ControlRunningQuery; +class CommonSubExprRef; +class Union; // The following are physical operators @@ -101,6 +108,8 @@ class ElemDDLNode; class ElemProxyColDef; class ItemExprList; class RtmdCompileTimeObj; +class HbaseAccessOptions; + // TreeStore struct struct TreeStore : public NABasicObject { @@ -268,6 +277,7 @@ public: // subqueries are unnested in this method virtual RelExpr * semanticQueryOptimizeNode(NormWA & normWARef); + RelExpr * inlineTempTablesForCSEs(NormWA & normWARef); // Method to push down predicates from a RelRoot node into the // children @@ -1444,26 +1454,29 @@ public: MapValueIds(RelExpr *child = NULL, CollHeap *oHeap = CmpCommon::statementHeap()) : RelExpr(REL_MAP_VALUEIDS,child,NULL,oHeap), - includesFavoriteMV_(FALSE), usedByMvqr_(FALSE) {} + includesFavoriteMV_(FALSE), + cseRef_(NULL) {} MapValueIds(RelExpr *child, const ValueIdSet &identity, CollHeap *oHeap = CmpCommon::statementHeap()) : RelExpr(REL_MAP_VALUEIDS,child,NULL,oHeap), - map_(identity), includesFavoriteMV_(FALSE), usedByMvqr_(FALSE) {} + map_(identity), includesFavoriteMV_(FALSE), + cseRef_(NULL) {} MapValueIds(RelExpr *child, const ValueIdMap &map, CollHeap *oHeap = CmpCommon::statementHeap()) : RelExpr(REL_MAP_VALUEIDS,child,NULL,oHeap), - map_(map), includesFavoriteMV_(FALSE), usedByMvqr_(FALSE) {} + map_(map), includesFavoriteMV_(FALSE), + cseRef_(NULL) {} MapValueIds(const MapValueIds & other) : RelExpr(REL_MAP_VALUEIDS, other.child(0)), map_(other.map_), valuesNeededForVEGRewrite_(other.valuesNeededForVEGRewrite_), includesFavoriteMV_(other.includesFavoriteMV_), - usedByMvqr_(other.usedByMvqr_) {} + cseRef_(other.cseRef_) {} virtual ~MapValueIds(); @@ -1510,6 +1523,7 @@ public: inline void clearValuesForVEGRewrite() { valuesNeededForVEGRewrite_.clear(); } void addSameMapEntries(const ValueIdSet & newTopBottomValues); + void setCSERef(CommonSubExprRef *cse) { cseRef_ = cse; } // Method to compute child's characteristic outputs virtual @@ -1543,18 +1557,13 @@ public: NABoolean includesFavoriteMV () const {return includesFavoriteMV_;} - void setUsedByMvqr (NABoolean value) - {usedByMvqr_ = value;} - - NABoolean usedByMvqr () const - {return usedByMvqr_;} - private: ValueIdMap map_; ValueIdSet valuesNeededForVEGRewrite_; NABoolean includesFavoriteMV_; - NABoolean usedByMvqr_; + NABoolean replaceVEGUsingList_; + CommonSubExprRef *cseRef_; }; // ----------------------------------------------------------------------- @@ -2546,4 +2555,325 @@ private: NAString comment_; }; +// Container for the common info about a common subexpression. This is +// a helper class for class CommonSubExprRef below. + +// This class is stored in the CmpStatement object, but it is defined +// here because it is closely related to the CommonSubExprRef class +// and because it contains ValueIdSet and ValueIdList data members +// which should not be used by CmpStatement classes directly. + +// Note about acronyms: +// CTE: Common Table Expression syntax (WITH clause) in SQL +// CSE: Common SubExpression (could be WITH clause or +// another type of common subexpression) +class CSEInfo : public NABasicObject +{ +public: + + enum CSEAnalysisOutcome + { + UNKNOWN_ANALYSIS, // analysis not yet done + EXPAND, // expand the common subexpression + CREATE_TEMP, // materialize CSE as temp, then read the temp + TEMP, // read the temp created by someone else + ERROR // error occurred, diags are set + }; + + enum CSETempTableType + { + UNKNOWN_TEMP_TABLE, // temp table type not yet determined + HIVE_TEMP_TABLE, // use a Hive delimited table + VOLATILE_TEMP_TABLE // use a Trafodion volatile table + }; + + CSEInfo(const char *name, + NAMemory *mem) : + name_(name, mem), + cseId_(-1), + childCSEs_(mem), + consumers_(mem), + neededColumns_(mem), + idOfAnalyzingConsumer_(-1), + analysisOutcome_(UNKNOWN_ANALYSIS), + tempTableType_(UNKNOWN_TEMP_TABLE), + tempTableName_(mem), + tempTableDDL_(mem), + tempNATable_(NULL), + insertIntoTemp_(NULL) + {} + + const NAString &getName() const { return name_; } + Int32 getCSEId() const { return cseId_; } + const LIST(CSEInfo *) &getChildCSEs() const { return childCSEs_; } + const CollIndex getNumConsumers() const { return consumers_.entries(); } + CommonSubExprRef *getConsumer(CollIndex i) const { return consumers_[i]; } + + Int32 getIdOfAnalyzingConsumer() const { return idOfAnalyzingConsumer_; } + CSEAnalysisOutcome getAnalysisOutcome(Int32 id) const; + NABoolean usesATempTable() const { return insertIntoTemp_ != NULL; } + CSETempTableType getTempTableType() const { return tempTableType_; } + const NABitVector &getNeededColumns() const { return neededColumns_; } + const ValueIdSet &getCommonPredicates() const { return commonPredicates_; } + const ValueIdSet &getVEGRefsWithDifferingConstants() const + { return vegRefsWithDifferingConstants_; } + const ValueIdSet &getVEGRefsWithDifferingInputs() const + { return vegRefsWithDifferingInputs_; } + const QualifiedName &getTempTableName() const { return tempTableName_; } + const NAString &getTempTableDDL() const { return tempTableDDL_; } + const NATable *getTempNATable() const { return tempNATable_; } + RelExpr *getInsertIntoTemp() const { return insertIntoTemp_; } + + void setCSEId(Int32 id) { cseId_ = id; } + void addChildCSE(CSEInfo *child); + void addCSERef(CommonSubExprRef *cse); + void setIdOfAnalyzingConsumer(Int32 id) { idOfAnalyzingConsumer_ = id; } + void setAnalysisOutcome(CSEAnalysisOutcome outcome) + { analysisOutcome_ = outcome; } + void setTempTableType(CSETempTableType t) { tempTableType_ = t; } + + void setNeededColumns(const NABitVector &v) { neededColumns_ = v; } + void setCommonPredicates(const ValueIdSet &s) { commonPredicates_ = s; } + void addCommonPredicates(const ValueIdSet &s) { commonPredicates_ += s; } + void addVEGRefsWithDifferingConstants(const ValueIdSet &s) + { vegRefsWithDifferingConstants_ += s; } + void addVEGRefsWithDifferingInputs(const ValueIdSet &s) + { vegRefsWithDifferingInputs_ += s; } + void setTempTableName(const QualifiedName &n) { tempTableName_ = n; } + void setTempTableDDL(const char *s) { tempTableDDL_ = s; } + void setTempNATable(const NATable *nat) { tempNATable_ = nat; } + void setInsertIntoTemp(RelExpr *r) {insertIntoTemp_ = r; } + +private: + // name of the Common Subexpression + NAString name_; + + // id of this CSE within the statement + Int32 cseId_; + + // list of other CSEs that are referenced by this one + LIST(CSEInfo *) childCSEs_; + + // list of nodes referring to the common + // subexpression, their index numbers match + // the index in this list + LIST(CommonSubExprRef *) consumers_; + + // a common list of columns and predicate to use used for a + // materialized CSE + + // a list of the actual columns (in terms of the + NABitVector neededColumns_; + ValueIdSet commonPredicates_; + + // VEGies with conflicts between the different + // consumers + ValueIdSet vegRefsWithDifferingConstants_; + ValueIdSet vegRefsWithDifferingInputs_; + + // information for the materialization of the CSE + Int32 idOfAnalyzingConsumer_; + CSEAnalysisOutcome analysisOutcome_; + CSETempTableType tempTableType_; + QualifiedName tempTableName_; + NAString tempTableDDL_; + const NATable *tempNATable_; + RelExpr *insertIntoTemp_; +}; + +// ----------------------------------------------------------------------- +// The CommonSubExprRef class represents a potential common subexpression +// (CSE) in the query tree. The common subexpression has a name, and +// multiple CommonSubExprRef nodes in the tree may refer to the same +// name. This is a unary operator, the child tree defines the common +// subexpression (with a few caveats, see below). Note that in our +// current design, we keep multiple copies of the common subexpression +// around, in case we don't want to materialize the CSE. This is a +// logical operator, it either needs to be removed or it needs to be +// replaced with a scan of a temp table. +// ----------------------------------------------------------------------- +class CommonSubExprRef : public RelExpr +{ +public: + + // constructor + CommonSubExprRef(RelExpr *cse = NULL, + const char *internalName = NULL, + NAMemory *oHeap = CmpCommon::statementHeap()) + : RelExpr(REL_COMMON_SUBEXPR_REF,cse,NULL,oHeap), + internalName_(internalName, oHeap), + id_(-1), + hbAccessOptionsFromCTE_(NULL) + {} + + virtual ~CommonSubExprRef(); + + // the name used in the CTE or a generated name + const NAString &getName() const { return internalName_; } + Int32 getId() const { return id_; } + + virtual Int32 getArity() const; + + // return a read-only reference to the initial list of columns + const ValueIdList & getColumnList() const { return columnList_; } + const ValueIdSet & getNonVEGColumns() const { return nonVEGColumns_; } + + const ValueIdSet &getPushedPredicates() const { return pushedPredicates_; } + + void setId(Int32 id) { CMPASSERT(id_ == -1); id_ = id; } + + // remember HBase access options (if needed) + void setHbaseAccessOptions(HbaseAccessOptions *hbo) + { hbAccessOptionsFromCTE_ = hbo; } + + // add this node to the global list of CommonSubExprRefs kept in CmpStatement + void addToCmpStatement(); + + // is this the first reference to the common subexpression? + NABoolean isFirstReference(); + + // a virtual function for performing name binding within the query tree + virtual RelExpr * bindNode(BindWA *bindWAPtr); + + // normalizer methods + virtual void transformNode(NormWA & normWARef, + ExprGroupId & locationOfPointerToMe); + virtual void pullUpPreds(); + virtual void pushdownCoveredExpr( + const ValueIdSet & outputExprOnOperator, + const ValueIdSet & newExternalInputs, + ValueIdSet& predOnOperator, + const ValueIdSet * nonPredNonOutputExprOnOperator = NULL, + Lng32 childId = (-MAX_REL_ARITY)); + virtual void rewriteNode(NormWA & normWARef); + virtual RelExpr * semanticQueryOptimizeNode(NormWA & normWARef); + + // add all the expressions that are local to this + // node to an existing list of expressions (used by GUI tool) + virtual void addLocalExpr(LIST(ExprNode *) &xlist, + LIST(NAString) &llist) const; + + virtual HashValue topHash(); + virtual NABoolean duplicateMatch(const RelExpr & other) const; + virtual RelExpr * copyTopNode(RelExpr *derivedNode = NULL, + CollHeap* outHeap = 0); + + // synthesize logical properties + virtual void synthLogProp(NormWA * normWAPtr = NULL); + virtual void synthEstLogProp(const EstLogPropSharedPtr& inputEstLogProp); + + // get a printable string that identifies the operator + virtual const NAString getText() const; + + void emitCSEDiagnostics(const char *message, + NABoolean forceError = FALSE); + + // for use by the root node for inlining + static Union *makeUnion(RelExpr *lc, RelExpr *rc, NABoolean blocked); + + // for debugging + void display(); + static void displayAll(const char *optionalId = NULL); + +private: + + // private methods + // --------------- + + // Methods used in the transformation of a plan with CSEs into + // one that creates a temp table, populates it, then replaces the + // CommonSubExprRef nodes with scans of the temp table. These methods + // are meant to be used in the SQO phase and/or in optimizer rules. + + // - Find a list of columns that satisfies all the CSE consumers, + // - Find a set of pushed-down predicates common to all consumers + // - Based on the remaining predicates, determine a STORE BY or + // PRIMARY KEY of the temp table + // - Get an estimate for rows and bytes accessed for each of + // the scan nodes + // - Heuristically decide whether to share the data in a temp table + CSEInfo::CSEAnalysisOutcome analyzeAndPrepareForSharing(CSEInfo &info); + + // decide which type of temp table to use, store the result in info + void determineTempTableType(CSEInfo &info); + + // Based on the information obtained in analyzeSharing(), create + // a volatile table to hold the result of the common subexpression. + // Note that this table is created at compile time. If the user + // compiled this statement in a user-defined transaction, the + // temp table will be created in that transaction. If the user was + // not in a transaction when this statement was compiled, we + // will use a separate transaction that will be committed before + // returning back to the user. + NABoolean createTempTable(CSEInfo &info); + + RelExpr * createInsertIntoTemp(CSEInfo &info, NormWA & normWARef); + + // Create a scan on the temp table, which can replace this + // CommonSubExprRef node when we choose to materialize the common + // subexpression + RelExpr * createTempScan(CSEInfo &info, NormWA & normWARef); + RelExpr * getTempScan() const { return tempScan_; } + + static void makeValueIdListFromBitVector(ValueIdList &tgt, + const ValueIdList &src, + const NABitVector &vec); + + // data members + // ------------ + + // The name of the CTE (Common Table Expression) in internal + // format, if the operator was generated from a reference to + // a CTE. Otherwise, this could be some made-up name. + NAString internalName_; + + // One or more CommonSubExprRef operators may refer to the same + // common subexprssion. These references are numbered 0, 1, ... + Int32 id_; + + // The list of columns produced by the common subexpression. + // We keep the full list here, even when the characteristic + // outputs get reduced during the normalization phase. This + // is a list, since different consumers of the CSE will use + // different value ids, so the only way to equate columns + // of different consumers is by position in this list. + ValueIdList columnList_; + + // same columns without making VEGRefs. This is needed + // in preCodeGen. + ValueIdSet nonVEGColumns_; + + // The common inputs (typically parameters). Pushing down + // new predicates may create additional characteristic + // inputs for this RelExpr, those are not reflected here. + // These common inputs should have the same value ids for + // all consumers. + ValueIdSet commonInputs_; + + // Predicates that got pulled up during normalization. + // Usually those should get pushed down again. + // ValueIdSet pulledPredicates_; + + // Predicates that got pushed into the child tree. These + // modify our copy of the common subexpression. If we want + // to use a temp table, we may need to pull out those predicates + // that are not common between all the users of the CSE + ValueIdSet pushedPredicates_; + + // Predicates that are not common predicates, pushed into the + // common temp table. These need to be applied to the temp + // table, if we decide to read from a temp table. + ValueIdSet nonSharedPredicates_; + + // We allow hints and access options on references to CTEs. + // Those are ignored when we expand the CTE, but they can + // be applied to the scan of the temp node, if we decide to + // create a temp table for the resulting CSE. + HbaseAccessOptions *hbAccessOptionsFromCTE_; + + RelExpr *tempScan_; + +}; // class CommonSubExprRef + + #endif /* RELMISC_H */ http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/RelScan.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelScan.h b/core/sql/optimizer/RelScan.h index ad63ace..70f6d64 100644 --- a/core/sql/optimizer/RelScan.h +++ b/core/sql/optimizer/RelScan.h @@ -87,6 +87,7 @@ class QRDescGenerator; class RangeSpecRef; class MVMatch; +class CommonSubExprRef; /************************* @@ -232,9 +233,10 @@ public: forcedIndexInfo_(FALSE), baseCardinality_(0), // QSTUFF - isRewrittenMV_(FALSE) - ,matchingMVs_(oHeap) - , hbaseAccessOptions_(NULL) + isRewrittenMV_(FALSE), + matchingMVs_(oHeap), + hbaseAccessOptions_(NULL), + commonSubExpr_(NULL) {} Scan(const CorrName& name, @@ -260,9 +262,10 @@ public: forcedIndexInfo_(FALSE), baseCardinality_(0), // QSTUFF - isRewrittenMV_(FALSE) - ,matchingMVs_(CmpCommon::statementHeap()) - , hbaseAccessOptions_(NULL) + isRewrittenMV_(FALSE), + matchingMVs_(CmpCommon::statementHeap()), + hbaseAccessOptions_(NULL), + commonSubExpr_(NULL) {} Scan(const CorrName& name, @@ -291,9 +294,10 @@ public: forcedIndexInfo_(FALSE), baseCardinality_(0), // QSTUFF - isRewrittenMV_(FALSE) - ,matchingMVs_(oHeap) - ,hbaseAccessOptions_(NULL) + isRewrittenMV_(FALSE), + matchingMVs_(oHeap), + hbaseAccessOptions_(NULL), + commonSubExpr_(NULL) {} Scan(OperatorTypeEnum otype, @@ -319,10 +323,11 @@ public: cardinalityHint_(-1.0), forcedIndexInfo_(FALSE), baseCardinality_(0), - // QSTUFF - isRewrittenMV_(FALSE), - hbaseAccessOptions_(NULL), - matchingMVs_(CmpCommon::statementHeap()) + // QSTUFF + isRewrittenMV_(FALSE), + hbaseAccessOptions_(NULL), + matchingMVs_(CmpCommon::statementHeap()), + commonSubExpr_(NULL) {} // virtual destructor @@ -489,6 +494,14 @@ public: // --------------------------------------------------------------------- RelExpr * normalizeNode ( NormWA & normWARef ); + virtual NABoolean prepareMeForCSESharing( + const ValueIdSet &outputsToAdd, + const ValueIdSet &predicatesToRemove, + const ValueIdSet &commonPredicatesToAdd, + const ValueIdSet &inputsToRemove, + CSEInfo *info, + NABoolean testRun); + // synthesizes compRefOpt constraints. virtual void processCompRefOptConstraints(NormWA * normWAPtr) ; @@ -614,6 +627,9 @@ public: matchingMVs_.insert(match); } + CommonSubExprRef *getCommonSubExpr() const { return commonSubExpr_; } + void setCommonSubExpr(CommonSubExprRef *cse) { commonSubExpr_ = cse; } + protected: // Find the most promising index from the LIST, for index joins @@ -762,6 +778,10 @@ private: // List of MV matches that can be substituted for the SCAN using query rewrite. NAList<MVMatch*> matchingMVs_; + + // pointer to the common subexpression, if this is a scan of a + // materialized common subexpr + CommonSubExprRef *commonSubExpr_; }; // ----------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/RelSet.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelSet.h b/core/sql/optimizer/RelSet.h index 73a0ce0..00eff98 100644 --- a/core/sql/optimizer/RelSet.h +++ b/core/sql/optimizer/RelSet.h @@ -255,6 +255,15 @@ public: // The set of values that I can potentially produce as output. virtual void getPotentialOutputValues(ValueIdSet & vs) const; + // methods for common subexpressions + virtual NABoolean prepareTreeForCSESharing( + const ValueIdSet &outputsToAdd, + const ValueIdSet &predicatesToRemove, + const ValueIdSet &commonPredicatesToAdd, + const ValueIdSet &inputsToRemove, + CSEInfo *info, + NABoolean testRun); + // --------------------------------------------------------------------- // Function for testing the eligibility of this // Union to form a plan using the given required physical properties. http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/TableDesc.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/TableDesc.cpp b/core/sql/optimizer/TableDesc.cpp index 9bc2602..dc9a5b3 100644 --- a/core/sql/optimizer/TableDesc.cpp +++ b/core/sql/optimizer/TableDesc.cpp @@ -43,6 +43,7 @@ #include "ex_error.h" #include "Cost.h" /* for lookups in the defaults table */ #include "Analyzer.h" +#include "HDFSHook.h" // ----------------------------------------------------------------------- // Constructor (but note that much more useful stuff goes on in @@ -324,28 +325,24 @@ void TableDesc::getEquivVEGCols (const ValueIdList& columnList, ValueIdList &VEGColumnList) const { for (CollIndex i=0; i < columnList.entries(); i++) - { - ItemExpr *ie = columnList[i].getItemExpr(); - BaseColumn *bc = NULL; + VEGColumnList.insert(getEquivVEGCol(columnList[i])); +} - switch (ie->getOperatorType()) - { - case ITM_BASECOLUMN: - bc = (BaseColumn *) ie; - break; - case ITM_INDEXCOLUMN: - bc = (BaseColumn *) ((IndexColumn *) ie)->getDefinition(). - getItemExpr(); - CMPASSERT(bc->getOperatorType() == ITM_BASECOLUMN); - - break; - default: - ABORT("Invalid argument to TableDesc::getEquivVEGCols()\n"); - } +void TableDesc::getEquivVEGCols (const ValueIdSet& columnSet, + ValueIdSet &VEGColumnSet) const +{ + for (ValueId v=columnSet.init(); + columnSet.next(v); + columnSet.advance(v)) + VEGColumnSet += getEquivVEGCol(v); +} - CMPASSERT(bc->getTableDesc() == this); - VEGColumnList.insert(getColumnVEGList()[bc->getColNumber()]); - } +ValueId TableDesc::getEquivVEGCol (const ValueId& column) const +{ + BaseColumn *bc = column.castToBaseColumn(); + + CMPASSERT(bc->getTableDesc() == this); + return getColumnVEGList()[bc->getColNumber()]; } // ----------------------------------------------------------------------- @@ -956,3 +953,37 @@ void TableDesc::compressHistogramsForCurrentQuery() histsCompressed(TRUE); } +NABoolean TableDesc::splitHiveLocation(const char *tableLocation, + NAString &hdfsHost, + Int32 &hdfsPort, + NAString &tableDir, + ComDiagsArea *diags, + int hdfsPortOverride) +{ + HHDFSDiags hhdfsDiags; + + NABoolean result = HHDFSTableStats::splitLocation( + tableLocation, + hdfsHost, + hdfsPort, + tableDir, + hhdfsDiags, + hdfsPortOverride); + + if (!result) + { + if (!hhdfsDiags.isSuccess()) + { + if (diags) + (*diags) << DgSqlCode(-1215) + << DgString0(tableLocation) + << DgString1(hhdfsDiags.getErrMsg()); + } + else + CMPASSERT(0); + } + else + CMPASSERT(hhdfsDiags.isSuccess()); + + return result; +} http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/TableDesc.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/TableDesc.h b/core/sql/optimizer/TableDesc.h index b8c8eca..ad29c6a 100644 --- a/core/sql/optimizer/TableDesc.h +++ b/core/sql/optimizer/TableDesc.h @@ -141,6 +141,9 @@ public: // which maps base columns to index columns. void getEquivVEGCols (const ValueIdList &columnList, ValueIdList &VEGColumnList) const; + void getEquivVEGCols (const ValueIdSet &columnSet, + ValueIdSet &VEGColumnSet) const; + ValueId getEquivVEGCol (const ValueId &column) const; NABoolean isSpecialObj(); CostScalar getBaseRowCntIfUniqueJoinCol(const ValueIdSet &joinedCols); @@ -221,6 +224,14 @@ public: ValueIdSet getSaltColumnAsSet() ; NABoolean hasIdentityColumnInClusteringKey() const ; + // helper function for Hive tables + static NABoolean splitHiveLocation(const char *tableLocation, + NAString &hdfsHost, + Int32 &hdfsPort, + NAString &tableDir, + ComDiagsArea *diags, + int hdfsPortOverride); + private: ValueIdSet getComputedColumns(NAColumnBooleanFuncPtrT fptr); http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/TransRule.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/TransRule.cpp b/core/sql/optimizer/TransRule.cpp index 2c268a5..bef3d75 100644 --- a/core/sql/optimizer/TransRule.cpp +++ b/core/sql/optimizer/TransRule.cpp @@ -702,6 +702,18 @@ void CreateTransformationRules(RuleSet* set) set->enable(r->getNumber(), set->getSecondPassNumber()); + r = new(CmpCommon::contextHeap()) CommonSubExprRule + ("Eliminate any CommonSubExpr nodes left from the normalizer - for now", + new(CmpCommon::contextHeap()) + CommonSubExprRef(new(CmpCommon::contextHeap()) + CutOp(0, CmpCommon::contextHeap()), + "", + CmpCommon::contextHeap()), + new(CmpCommon::contextHeap()) + CutOp(0, CmpCommon::contextHeap())); + set->insert(r); + set->enable(r->getNumber()); + r = new (CmpCommon::contextHeap()) SampleScanRule ("Transform RelSample above a Scan", new (CmpCommon::contextHeap()) @@ -4017,10 +4029,15 @@ RelExpr * GroupByEliminationRule::nextSubstitute(RelExpr * before, // needed to rewrite VEGReferences into actual columns in the generator. // NOTE: this might cause some unnecessary expressions to be carried to // this node, but the cost for this shouldn't be too high. - ValueIdSet valuesForRewrite; - grby->getValuesRequiredForEvaluatingAggregate(valuesForRewrite); - mvi->addValuesForVEGRewrite(valuesForRewrite); + // Removed 10/10/16 as part of fix for TRAFODION-2127 + // These values were not used in MapValueIds::preCodeGen. + // Could consider adding this if there are issue in preCodeGen. + + // ValueIdSet valuesForRewrite; + + // grby->getValuesRequiredForEvaluatingAggregate(valuesForRewrite); + // mvi->addValuesForVEGRewrite(valuesForRewrite); // If there are having predicates, put a filter below the mvi // node. Then map the having predicates and attach them to @@ -5941,7 +5958,7 @@ RelExpr * ShortCutGroupByRule::nextSubstitute(RelExpr * before, // Genesis case: 10-010315-1747. Synthesizing MapValueId outputs correctly // MapValueIds should produce uppervalues as output, if required. // This also fixes genesis case 10-010320-1817. - ValueIdSet valuesForRewrite; + // ValueIdSet valuesForRewrite; mvi->setGroupAttr(bef->getGroupAttr()); @@ -5959,8 +5976,12 @@ RelExpr * ShortCutGroupByRule::nextSubstitute(RelExpr * before, result->getGroupAttr()->addCharacteristicOutputs(resultOutputs); - bef->getValuesRequiredForEvaluatingAggregate(valuesForRewrite); - mvi->addValuesForVEGRewrite(valuesForRewrite); + // Removed 10/10/16 as part of fix for TRAFODION-2127 + // These values were not used in MapValueIds::preCodeGen. + // Could consider adding this if there are issue in preCodeGen. + + // bef->getValuesRequiredForEvaluatingAggregate(valuesForRewrite); + // mvi->addValuesForVEGRewrite(valuesForRewrite); // perform synthesis on the new child node mvi->child(0)->synthLogProp(); @@ -5988,6 +6009,28 @@ NABoolean ShortCutGroupByRule::canMatchPattern( // ----------------------------------------------------------------------- +// methods for class CommonSubExprRule +// ----------------------------------------------------------------------- + +CommonSubExprRule::~CommonSubExprRule() {} + +RelExpr * CommonSubExprRule::nextSubstitute(RelExpr * before, + Context * /*context*/, + RuleSubstituteMemory *& /*memory*/) +{ + // eliminate this node + return before->child(0); +} + +NABoolean CommonSubExprRule::canMatchPattern( + const RelExpr * /*pattern*/) const +{ + // The CommonSubExprRule can potentially help with nearly any pattern + return TRUE; +} + + +// ----------------------------------------------------------------------- // methods for class SampleScanRule // ----------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/TransRule.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/TransRule.h b/core/sql/optimizer/TransRule.h index b358c16..1a91069 100644 --- a/core/sql/optimizer/TransRule.h +++ b/core/sql/optimizer/TransRule.h @@ -76,6 +76,7 @@ class GroupBySplitRule; class AggrDistinctEliminationRule; class GroupByTernarySplitRule; class ShortCutGroupByRule; +class CommonSubExprRule; class SampleScanRule; class JoinToBushyTreeRule; @@ -945,6 +946,24 @@ public: virtual NABoolean canMatchPattern (const RelExpr * pattern) const; }; +class CommonSubExprRule : public Rule +{ +public: + CommonSubExprRule (const char * name, + RelExpr * pattern, + RelExpr * substitute) : + Rule(name,pattern,substitute) {} + + // copy ctor + CommonSubExprRule (const CommonSubExprRule &) ; // not written + + virtual ~CommonSubExprRule(); + virtual RelExpr * nextSubstitute(RelExpr * before, + Context * context, + RuleSubstituteMemory * & memory); + virtual NABoolean canMatchPattern (const RelExpr * pattern) const; +}; + class SampleScanRule : public Rule { public: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/ValueDesc.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ValueDesc.cpp b/core/sql/optimizer/ValueDesc.cpp index 2245fa8..2fdaa22 100644 --- a/core/sql/optimizer/ValueDesc.cpp +++ b/core/sql/optimizer/ValueDesc.cpp @@ -580,13 +580,17 @@ ValueId::castToBaseColumn(NABoolean *isaConstant) const // loop until we RETURN from this function or its a NULL expression while (ie) { - ValueId vid = NULL_VALUE_ID; + ValueId vid = NULL_VALUE_ID; switch (ie->getOperatorType()) { case ITM_BASECOLUMN: return (BaseColumn *)ie; + case ITM_INDEXCOLUMN: + return (BaseColumn *) ((IndexColumn *) ie)-> + getDefinition().getItemExpr(); + case ITM_INSTANTIATE_NULL: case ITM_UNPACKCOL: ie = (*ie)[0] ; @@ -605,7 +609,7 @@ ValueId::castToBaseColumn(NABoolean *isaConstant) const ie = vid.getItemExpr(); break; - case ITM_ROWSETARRAY_SCAN: + case ITM_ROWSETARRAY_SCAN: // $$$ mar: no good way to do this easily, so for now we punt // $$$ --> post-FCS, do something better // @@ -623,21 +627,21 @@ ValueId::castToBaseColumn(NABoolean *isaConstant) const break; } - default: - if (ie->getArity() > 0) - { - ie = (*ie)[0]; - break; - } - else - { - if (isaConstant) - { - *isaConstant = ie->doesExprEvaluateToConstant - (FALSE,TRUE); - } - return NULL; - } + default: + if (ie->getArity() > 0) + { + ie = (*ie)[0]; + break; + } + else + { + if (isaConstant) + { + *isaConstant = ie->doesExprEvaluateToConstant + (FALSE,TRUE); + } + return NULL; + } } } // end of while return NULL; @@ -2804,9 +2808,10 @@ NABoolean ValueIdSet::isCovered(const ValueIdSet & newExternalInputs, // covered by the available values // ------------------------------------------------------------------------ -void ValueIdSet::removeCoveredExprs(const ValueIdSet & newExternalInputs, - ValueIdSet* usedInputs) +Int32 ValueIdSet::removeCoveredExprs(const ValueIdSet & newExternalInputs, + ValueIdSet* usedInputs) { + Int32 result = 0; NABoolean coverFlag; ValueIdSet referencedInputs; GroupAttributes emptyGA; @@ -2827,10 +2832,13 @@ void ValueIdSet::removeCoveredExprs(const ValueIdSet & newExternalInputs, referencedInputs); if (coverFlag) { subtractElement(exprId); + result++; if (usedInputs) - *usedInputs = referencedInputs ; + *usedInputs += referencedInputs ; } } // for + + return result; } // ValueIdSet::removeCoveredExprs // ------------------------------------------------------------------------ @@ -2840,8 +2848,9 @@ void ValueIdSet::removeCoveredExprs(const ValueIdSet & newExternalInputs, // NOT covered by the available values // ------------------------------------------------------------------------ -void ValueIdSet::removeUnCoveredExprs(const ValueIdSet & newExternalInputs) +Int32 ValueIdSet::removeUnCoveredExprs(const ValueIdSet & newExternalInputs) { + Int32 result = 0; NABoolean coverFlag; ValueIdSet referencedInputs; GroupAttributes emptyGA; @@ -2861,8 +2870,13 @@ void ValueIdSet::removeUnCoveredExprs(const ValueIdSet & newExternalInputs) newExternalInputs, referencedInputs); if (!coverFlag) - subtractElement(exprId); + { + subtractElement(exprId); + result++; + } } // for + + return result; } // ValueIdSet::removeUnCoveredExprs // --------------------------------------------------------------------- @@ -5702,6 +5716,119 @@ void ValueIdMap::flipSides() bottomValues_ = flipList; } +void ValueIdMap::augmentForVEG(NABoolean addVEGPreds, + NABoolean addVEGRefs, + NABoolean compareConstants, + const ValueIdSet *topInputsToCheck, + const ValueIdSet *bottomInputsToCheck, + ValueIdSet *vegRefsWithDifferentConstants, + ValueIdSet *vegRefsWithDifferentInputs) +{ + // If a ValueIdMap maps one VEGReference x to another VEGReference y, + // we may want to be able to map the corresponding VEGPredicates as + // well. This method enables that by finding such pairs of VEGReferences + // and augmenting the map with their VEGPredicates. The method can also + // augment the map from VEGPredicates to VEGReferences. + + // NOTE: Before using this method, make sure it is applicable in + // your case. What it does is somewhat questionable. We may have + // a VEG(a,b,1) in the top values and a VEG(c,d,2) in the bottom + // values. Replacing one VEGPred into another may or may not be + // what we want. Furthermore, a,b,c may be local values, d may + // be a characteristic input. Again, it is questionable whether + // the rewrite is what's desired. + + // The method allows to restrict the rewrite somewhat: + // - compareConstants requires top and bottom VEGPreds to + // have the same constant (or no constants at all) + // - top/bottom inputs to check can be used to exclude + // VEGPreds that differ in the way they use inputs. + + // There are other issues that still may go wrong with this + // method, maybe with predicates like COL1=COL2. + + CollIndex ne = topValues_.entries(); + + for (CollIndex i=0; i<ne; i++) + { + ItemExpr *t = topValues_[i].getItemExpr(); + ItemExpr *b = bottomValues_[i].getItemExpr(); + OperatorTypeEnum to = t->getOperatorType(); + OperatorTypeEnum bo = b->getOperatorType(); + + if (addVEGPreds && + to == ITM_VEG_REFERENCE && + bo == ITM_VEG_REFERENCE) + { + VEG *vegT = static_cast<VEGReference *>(t)->getVEG(); + VEG *vegB = static_cast<VEGReference *>(b)->getVEG(); + ValueId topPred(vegT->getVEGPredicate()->getValueId()); + + if (! topValues_.contains(topPred)) + { + NABoolean ok = TRUE; + ValueId constT = vegT->getAConstant(TRUE); + ValueId constB = vegB->getAConstant(TRUE); + + // check whether constants match + // (or are both NULL_VALUE_ID) + if (compareConstants) + ok = (constT == constB); + + if (!ok && vegRefsWithDifferentConstants) + *vegRefsWithDifferentConstants += + vegT->getVEGReference()->getValueId(); + + if (ok && + ((topInputsToCheck && topInputsToCheck->entries() > 0) || + (bottomInputsToCheck && bottomInputsToCheck->entries() > 0))) + { + ValueIdSet topInputs; + ValueIdSet bottomInputs; + + if (topInputsToCheck) + topInputs = *topInputsToCheck; + if (bottomInputsToCheck) + bottomInputs = *bottomInputsToCheck; + + topInputs.intersectSet(vegT->getAllValues()); + bottomInputs.intersectSet(vegB->getAllValues()); + // if the caller provided inputs to check, we only + // rewrite VEGPreds if their VEGies refer to the + // same inputs (or if they don't refer to any inputs) + ok = (topInputs == bottomInputs); + + if (!ok && vegRefsWithDifferentInputs) + *vegRefsWithDifferentInputs += + vegT->getVEGReference()->getValueId(); + } + + if (ok) + { + topValues_.insert(topPred); + bottomValues_.insert(vegB->getVEGPredicate()->getValueId()); + } + } + } + + if (addVEGRefs && + to == ITM_VEG_PREDICATE && + bo == ITM_VEG_PREDICATE) + { + ValueId topRef(static_cast<VEGPredicate *>(t)-> + getVEG()->getVEGReference()->getValueId()); + + if (! topValues_.contains(topRef)) + { + topValues_.insert(topRef); + bottomValues_.insert( + static_cast<VEGPredicate *>(b)-> + getVEG()->getVEGReference()->getValueId()); + } + } + } +} + NABoolean ValueIdMap::normalizeNode(NormWA & normWARef) { NABoolean t1,t2; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/optimizer/ValueDesc.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ValueDesc.h b/core/sql/optimizer/ValueDesc.h index b221a06..d423eb6 100644 --- a/core/sql/optimizer/ValueDesc.h +++ b/core/sql/optimizer/ValueDesc.h @@ -890,9 +890,10 @@ public: // // This method removes from the this valueid set those values that // are covered by the available inputs. + // It returns the number of elements removed. // -------------------------------------------------------------------- - void removeCoveredExprs(const ValueIdSet & newExternalInputs, - ValueIdSet* usedInputs = NULL); + Int32 removeCoveredExprs(const ValueIdSet & newExternalInputs, + ValueIdSet* usedInputs = NULL); //------------------------------------------------------- //removeCoveredVidSet() @@ -907,8 +908,9 @@ public: // // This method removes from the valueid set that values that // are NOT covered by the available inputs. + // It returns the number of elements removed. // -------------------------------------------------------------------- - void removeUnCoveredExprs(const ValueIdSet & newExternalInputs); + Int32 removeUnCoveredExprs(const ValueIdSet & newExternalInputs); // --------------------------------------------------------------------- // simplifyOrderExpr() @@ -1554,6 +1556,15 @@ public: // flip the top and bottom maps void flipSides(); + // add VEGPreds for VEGRefs contained in the map + void augmentForVEG(NABoolean addVEGPreds, + NABoolean addVEGRefs, + NABoolean compareConstants, + const ValueIdSet *topInputsToCheck, + const ValueIdSet *bottomInputsToCheck, + ValueIdSet *vegRefsWithDifferentConstants = NULL, + ValueIdSet *vegRefsWithDifferentInputs = NULL); + // Normalize the map replacing valueIds with VEGRefs NABoolean normalizeNode(NormWA & normWARef); http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/parser/sqlparser.y ---------------------------------------------------------------------- diff --git a/core/sql/parser/sqlparser.y b/core/sql/parser/sqlparser.y index e7dcd49..beea4d8 100755 --- a/core/sql/parser/sqlparser.y +++ b/core/sql/parser/sqlparser.y @@ -6677,11 +6677,28 @@ table_as_tmudf_function : TOK_UDF '(' table_mapping_function_invocation ')' table_name_and_hint : table_name optimizer_hint hbase_access_options { - NAString tmp = ((*$1).getQualifiedNameAsString()); - if(SqlParser_CurrentParser->hasWithDefinition(&tmp) ) + NAString cteName = ((*$1).getQualifiedNameAsString()); + if(SqlParser_CurrentParser->hasWithDefinition(&cteName) ) { - RelExpr *re = SqlParser_CurrentParser->getWithDefinition(&tmp); - $$=re->copyTree(PARSERHEAP()); + RelExpr *re = SqlParser_CurrentParser->getWithDefinition(&cteName); + if (CmpCommon::getDefault(CSE_FOR_WITH) == DF_ON) + { + CommonSubExprRef *cse = + new(PARSERHEAP()) CommonSubExprRef(re,cteName); + + if (!cse->isFirstReference()) + cse->setChild(0, re->copyTree(PARSERHEAP())); + + if ($2) + cse->setHint($2); + if ($3) + cse->setHbaseAccessOptions($3); + + cse->addToCmpStatement(); + $$ = cse; + } + else + $$=re->copyTree(PARSERHEAP()); delete $1; } else @@ -6994,12 +7011,29 @@ del_stmt_w_acc_type_rtn_list_and_as_clause_col_list : '(' delete_statement acce table_name_as_clause_and_hint : table_name as_clause optimizer_hint hbase_access_options { - NAString tmp = ((*$1).getQualifiedNameAsString()); - if(SqlParser_CurrentParser->hasWithDefinition(&tmp) ) + NAString cteName = ((*$1).getQualifiedNameAsString()); + if(SqlParser_CurrentParser->hasWithDefinition(&cteName) ) { - RelExpr *re = SqlParser_CurrentParser->getWithDefinition(&tmp); - RenameTable *rt = new (PARSERHEAP()) RenameTable(re, *$2); - $$=rt->copyTree(PARSERHEAP()); + RelExpr *re = SqlParser_CurrentParser->getWithDefinition(&cteName); + if (CmpCommon::getDefault(CSE_FOR_WITH) == DF_ON) + { + CommonSubExprRef *cse = + new(PARSERHEAP()) CommonSubExprRef(re,cteName); + + if (!cse->isFirstReference()) + cse->setChild(0, re->copyTree(PARSERHEAP())); + + if ($3) + cse->setHint($3); + if ($4) + cse->setHbaseAccessOptions($4); + + cse->addToCmpStatement(); + $$ = cse; + } + else + $$=re->copyTree(PARSERHEAP()); + $$ = new (PARSERHEAP()) RenameTable($$, *$2); } else { @@ -7072,8 +7106,11 @@ with_clause_elements : with_clause_element with_clause_element : correlation_name TOK_AS '(' query_expression ')' { - RelRoot *root = new (PARSERHEAP()) - RelRoot($4, REL_ROOT); + RelExpr *root = $4; + + if (root->getOperatorType() != REL_ROOT) + root = new (PARSERHEAP()) RelRoot(root, REL_ROOT); + $$= new (PARSERHEAP()) RenameTable(root, *$1); //Duplicated definition of WITH http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/b90dc334/core/sql/regress/compGeneral/EXPECTED005 ---------------------------------------------------------------------- diff --git a/core/sql/regress/compGeneral/EXPECTED005 b/core/sql/regress/compGeneral/EXPECTED005 index 4717d8e..c42c814 100755 --- a/core/sql/regress/compGeneral/EXPECTED005 +++ b/core/sql/regress/compGeneral/EXPECTED005 @@ -1053,9 +1053,6 @@ DNO DNAME ENO DNO *** ERROR[8822] The statement was not prepared. >> ->>cqd mode_special_4 'on'; - ---- SQL operation complete. >>with w1 as (select * from witht1), +>w2 as (select * from w1) +>select * from w2; @@ -1117,9 +1114,6 @@ C1 C2 *** ERROR[8822] The statement was not prepared. ->>cqd mode_special_4 reset; - ---- SQL operation complete. >> >>?section cleanup >>
