http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/3b437720/core/sql/optimizer/NATable.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/NATable.cpp b/core/sql/optimizer/NATable.cpp index d8d4b2e..24cfbf3 100644 --- a/core/sql/optimizer/NATable.cpp +++ b/core/sql/optimizer/NATable.cpp @@ -84,6 +84,7 @@ #include "ComSmallDefs.h" #include "CmpMain.h" #include "TrafDDLdesc.h" +#include "CmpSeabaseDDL.h" #define MAX_NODE_NAME 9 #define MAX_PRECISION_ALLOWED 18 @@ -5319,17 +5320,17 @@ NABoolean createNAFileSets(TrafDesc * table_desc /*IN*/, if (table_desc->tableDesc()->all_col_fams) { // Space delimited col families. - - string buf; // Have a buffer string - stringstream ss(table_desc->tableDesc()->all_col_fams); // Insert the string into a stream - - while (ss >> buf) - { - allColFams_.insert(buf.c_str()); - } + + string buf; // Have a buffer string + stringstream ss(table_desc->tableDesc()->all_col_fams); // Insert the string into a stream + + while (ss >> buf) + { + allColFams_.insert(buf.c_str()); + } } else - allColFams_.insert(defaultColFam_); + allColFams_.insert(defaultColFam_); TrafDesc * files_desc = table_desc->tableDesc()->files_desc; @@ -5337,132 +5338,132 @@ NABoolean createNAFileSets(TrafDesc * table_desc /*IN*/, // Therefore, only setup the partnsDesc_ if this is a partitionable object if (files_desc) { - if (files_desc->filesDesc()->partns_desc) - partnsDesc_ = files_desc->filesDesc()->partns_desc; + if (files_desc->filesDesc()->partns_desc) + partnsDesc_ = files_desc->filesDesc()->partns_desc; } else - partnsDesc_ = NULL; + partnsDesc_ = NULL; // // Insert a NAColumn in the colArray_ for this NATable for each // columns_desc from the ARK SMD. Returns TRUE if error creating NAColumns. // if (createNAColumns(table_desc->tableDesc()->columns_desc, - this, - colArray_ /*OUT*/, - heap_)) - //coverity[leaked_storage] - return; // colcount_ == 0 indicates an error + this, + colArray_ /*OUT*/, + heap_)) + //coverity[leaked_storage] + return; // colcount_ == 0 indicates an error // // Add view information, if this is a view // TrafDesc *view_desc = table_desc->tableDesc()->views_desc; if (view_desc) - { - viewText_ = new (heap_) char[strlen(view_desc->viewDesc()->viewtext) + 2]; - strcpy(viewText_, view_desc->viewDesc()->viewtext); - strcat(viewText_, ";"); - - viewTextCharSet_ = (CharInfo::CharSet)view_desc->viewDesc()->viewtextcharset; - - viewCheck_ = NULL; //initialize - if(view_desc->viewDesc()->viewchecktext){ - UInt32 viewCheckLength = str_len(view_desc->viewDesc()->viewchecktext)+1; - viewCheck_ = new (heap_) char[ viewCheckLength]; - memcpy(viewCheck_, view_desc->viewDesc()->viewchecktext, - viewCheckLength); - } + { + viewText_ = new (heap_) char[strlen(view_desc->viewDesc()->viewtext) + 2]; + strcpy(viewText_, view_desc->viewDesc()->viewtext); + strcat(viewText_, ";"); + + viewTextCharSet_ = (CharInfo::CharSet)view_desc->viewDesc()->viewtextcharset; + + viewCheck_ = NULL; //initialize + if(view_desc->viewDesc()->viewchecktext){ + UInt32 viewCheckLength = str_len(view_desc->viewDesc()->viewchecktext)+1; + viewCheck_ = new (heap_) char[ viewCheckLength]; + memcpy(viewCheck_, view_desc->viewDesc()->viewchecktext, + viewCheckLength); + } - viewColUsages_ = NULL; - if(view_desc->viewDesc()->viewcolusages){ - viewColUsages_ = new (heap_) NAList<ComViewColUsage *>; //initialize empty list - char * beginStr (view_desc->viewDesc()->viewcolusages); - char * endStr = strchr(beginStr, ';'); - while (endStr != NULL) { - ComViewColUsage *colUsage = new (heap_) ComViewColUsage; - NAString currentUsage(beginStr, endStr - beginStr + 1); - colUsage->unpackUsage (currentUsage.data()); - viewColUsages_->insert(colUsage); - beginStr = endStr+1; - endStr = strchr(beginStr, ';'); - } - } + viewColUsages_ = NULL; + if(view_desc->viewDesc()->viewcolusages){ + viewColUsages_ = new (heap_) NAList<ComViewColUsage *>; //initialize empty list + char * beginStr (view_desc->viewDesc()->viewcolusages); + char * endStr = strchr(beginStr, ';'); + while (endStr != NULL) { + ComViewColUsage *colUsage = new (heap_) ComViewColUsage; + NAString currentUsage(beginStr, endStr - beginStr + 1); + colUsage->unpackUsage (currentUsage.data()); + viewColUsages_->insert(colUsage); + beginStr = endStr+1; + endStr = strchr(beginStr, ';'); + } + } - setUpdatable(view_desc->viewDesc()->isUpdatable()); - setInsertable(view_desc->viewDesc()->isInsertable()); + setUpdatable(view_desc->viewDesc()->isUpdatable()); + setInsertable(view_desc->viewDesc()->isInsertable()); - // - // The updatable flag is false for an MP view only if it is NOT a - // protection view. Therefore updatable == FALSE iff it is a - // shorthand view. See ReadTableDef.cpp, l. 3379. - // + // + // The updatable flag is false for an MP view only if it is NOT a + // protection view. Therefore updatable == FALSE iff it is a + // shorthand view. See ReadTableDef.cpp, l. 3379. + // - viewFileName_ = NULL; - CMPASSERT(view_desc->viewDesc()->viewfilename); - UInt32 viewFileNameLength = str_len(view_desc->viewDesc()->viewfilename) + 1; - viewFileName_ = new (heap_) char[viewFileNameLength]; - memcpy(viewFileName_, view_desc->viewDesc()->viewfilename, - viewFileNameLength); - } + viewFileName_ = NULL; + CMPASSERT(view_desc->viewDesc()->viewfilename); + UInt32 viewFileNameLength = str_len(view_desc->viewDesc()->viewfilename) + 1; + viewFileName_ = new (heap_) char[viewFileNameLength]; + memcpy(viewFileName_, view_desc->viewDesc()->viewfilename, + viewFileNameLength); + } else - { - //keep track of memory used by NAFileSets - Lng32 preCreateNAFileSetsMemSize = heap_->getAllocSize(); + { + //keep track of memory used by NAFileSets + Lng32 preCreateNAFileSetsMemSize = heap_->getAllocSize(); - // - // Process indexes and vertical partitions for this table. - // - if (createNAFileSets(table_desc /*IN*/, - this /*IN*/, - colArray_ /*IN*/, - indexes_ /*OUT*/, - vertParts_ /*OUT*/, - clusteringIndex_ /*OUT*/, - tableIdList_ /*OUT*/, - heap_, - bindWA, - newColumns_, /*OUT*/ - maxIndexLevelsPtr)) { - return; // colcount_ == 0 indicates an error - } + // + // Process indexes and vertical partitions for this table. + // + if (createNAFileSets(table_desc /*IN*/, + this /*IN*/, + colArray_ /*IN*/, + indexes_ /*OUT*/, + vertParts_ /*OUT*/, + clusteringIndex_ /*OUT*/, + tableIdList_ /*OUT*/, + heap_, + bindWA, + newColumns_, /*OUT*/ + maxIndexLevelsPtr)) { + return; // colcount_ == 0 indicates an error + } - // Add constraint info. - // - // This call to createConstraintInfo, calls the parser on - // the constraint name - // - - NABoolean errorOccurred = - createConstraintInfo(table_desc /*IN*/, - getTableName() /*IN*/, - getNAColumnArray()/*IN (some columns updated)*/, - checkConstraints_ /*OUT*/, - uniqueConstraints_/*OUT*/, - refConstraints_ /*OUT*/, - heap_, - bindWA); - - if (errorOccurred) { - // return before setting colcount_, indicating that there - // was an error in constructing this NATable. - // - return; - } - - // - // FetchHistograms call used to be here -- moved to getStatistics(). - // - } - - // change partFunc for base table if PARTITION clause has been used - // to limit the number of partitions that will be accessed. - if ((qualifiedName_.isPartitionNameSpecified()) || - (qualifiedName_.isPartitionRangeSpecified())) { - if (filterUnusedPartitions(corrName.getPartnClause())) { - return ; - } - } + // Add constraint info. + // + // This call to createConstraintInfo, calls the parser on + // the constraint name + // + + NABoolean errorOccurred = + createConstraintInfo(table_desc /*IN*/, + getTableName() /*IN*/, + getNAColumnArray()/*IN (some columns updated)*/, + checkConstraints_ /*OUT*/, + uniqueConstraints_/*OUT*/, + refConstraints_ /*OUT*/, + heap_, + bindWA); + + if (errorOccurred) { + // return before setting colcount_, indicating that there + // was an error in constructing this NATable. + // + return; + } + + // + // FetchHistograms call used to be here -- moved to getStatistics(). + // + } + + // change partFunc for base table if PARTITION clause has been used + // to limit the number of partitions that will be accessed. + if ((qualifiedName_.isPartitionNameSpecified()) || + (qualifiedName_.isPartitionRangeSpecified())) { + if (filterUnusedPartitions(corrName.getPartnClause())) { + return ; + } + } // // Set colcount_ after all possible errors (Binder uses nonzero colcount @@ -5484,17 +5485,17 @@ NABoolean createNAFileSets(TrafDesc * table_desc /*IN*/, mvAttributeBitmap_.initBitmap(table_desc->tableDesc()->mvAttributesBitmap); TrafDesc *mvs_desc = NULL; // using mvs not set or returned for traf tables -// Memory Leak + // Memory Leak while (mvs_desc) { - TrafUsingMvDesc* mv = mvs_desc->usingMvDesc(); + TrafUsingMvDesc* mv = mvs_desc->usingMvDesc(); - UsingMvInfo *usingMv = new(heap_) - UsingMvInfo(mv->mvName, mv->refreshType(), mv->rewriteEnabled, - mv->isInitialized, heap_); - mvsUsingMe_.insert(usingMv); + UsingMvInfo *usingMv = new(heap_) + UsingMvInfo(mv->mvName, mv->refreshType(), mv->rewriteEnabled, + mv->isInitialized, heap_); + mvsUsingMe_.insert(usingMv); - mvs_desc = mvs_desc->next; + mvs_desc = mvs_desc->next; } // ++MV @@ -5510,18 +5511,18 @@ NABoolean createNAFileSets(TrafDesc * table_desc /*IN*/, if (isAnMV_) { - switch(qualifiedName_.getSpecialType()) - { - case ExtendedQualName::GHOST_TABLE: - qualifiedName_.setSpecialType(ExtendedQualName::GHOST_MV_TABLE); - break; - case ExtendedQualName::GHOST_MV_TABLE: - // Do not change it - break; - default: - qualifiedName_.setSpecialType(ExtendedQualName::MV_TABLE); - break; - } + switch(qualifiedName_.getSpecialType()) + { + case ExtendedQualName::GHOST_TABLE: + qualifiedName_.setSpecialType(ExtendedQualName::GHOST_MV_TABLE); + break; + case ExtendedQualName::GHOST_MV_TABLE: + // Do not change it + break; + default: + qualifiedName_.setSpecialType(ExtendedQualName::MV_TABLE); + break; + } } // --MV @@ -5529,508 +5530,510 @@ NABoolean createNAFileSets(TrafDesc * table_desc /*IN*/, // Initialize the sequence generator fields TrafDesc *sequence_desc = table_desc->tableDesc()->sequence_generator_desc; if (sequence_desc != NULL) { - TrafSequenceGeneratorDesc *sg_desc = sequence_desc->sequenceGeneratorDesc(); - - if (sg_desc != NULL) - { - sgAttributes_ = - new(heap_) SequenceGeneratorAttributes( - sg_desc->startValue, - sg_desc->increment, - sg_desc->maxValue, - sg_desc->minValue, - sg_desc->sgType(), - (ComSQLDataType)sg_desc->sqlDataType, - (ComFSDataType)sg_desc->fsDataType, - sg_desc->cycleOption, - FALSE, - sg_desc->objectUID, - sg_desc->cache, - sg_desc->nextValue, - 0, - sg_desc->redefTime); - } + TrafSequenceGeneratorDesc *sg_desc = sequence_desc->sequenceGeneratorDesc(); + + if (sg_desc != NULL) + { + sgAttributes_ = + new(heap_) SequenceGeneratorAttributes( + sg_desc->startValue, + sg_desc->increment, + sg_desc->maxValue, + sg_desc->minValue, + sg_desc->sgType(), + (ComSQLDataType)sg_desc->sqlDataType, + (ComFSDataType)sg_desc->fsDataType, + sg_desc->cycleOption, + FALSE, + sg_desc->objectUID, + sg_desc->cache, + sg_desc->nextValue, + 0, + sg_desc->redefTime); + } } #ifndef NDEBUG if (getenv("NATABLE_DEBUG")) - { - cout << "NATable " << (void*)this << " " - << qualifiedName_.getQualifiedNameObj().getQualifiedNameAsAnsiString() << " " - << (Int32)qualifiedName_.getSpecialType() << endl; - colArray_.print(); - } - #endif - //this guy is cacheable + { + cout << "NATable " << (void*)this << " " + << qualifiedName_.getQualifiedNameObj().getQualifiedNameAsAnsiString() << " " + << (Int32)qualifiedName_.getSpecialType() << endl; + colArray_.print(); + } +#endif + //this guy is cacheable if((qualifiedName_.isCacheable())&& - (NOT (isHbaseTable())) && - //this object is not on the statement heap (i.e. it is being cached) - ((heap_ != CmpCommon::statementHeap())|| - (OSIM_runningInCaptureMode()))) + (NOT (isHbaseTable())) && + //this object is not on the statement heap (i.e. it is being cached) + ((heap_ != CmpCommon::statementHeap())|| + (OSIM_runningInCaptureMode()))) { - char * nodeName = NULL; - char * catStr = NULL; - char * schemaStr = NULL; - char * fileStr = NULL; - short nodeNameLen = 0; - Int32 catStrLen = 0; - Int32 schemaStrLen = 0; - Int32 fileStrLen = 0; + char * nodeName = NULL; + char * catStr = NULL; + char * schemaStr = NULL; + char * fileStr = NULL; + short nodeNameLen = 0; + Int32 catStrLen = 0; + Int32 schemaStrLen = 0; + Int32 fileStrLen = 0; #ifdef NA_64BIT - // dg64 - match signature - int_32 primaryNodeNum=0; + // dg64 - match signature + int_32 primaryNodeNum=0; #else - Int32 primaryNodeNum=0; + Int32 primaryNodeNum=0; #endif - short error = 0; - - //clusteringIndex has physical filename that can be used to check - //if a catalog operation has been performed on a table. - //Views don't have clusteringIndex, so we get physical filename - //from the viewFileName_ datamember. - if(viewText_) - { - //view filename starts with node name - //filename is in format \<node_name>.$<volume>.<subvolume>.<file> - //catStr => <volume> - //schemaStr => <subvolume> - //fileStr => <file> - nodeName = viewFileName_; - catStr = nodeName; - - //skip over node name - //measure node name length - //get to begining of volume name - //Measure length of node name - //skip over node name i.e. \MAYA, \AZTEC, etc - //and get to volume name - while((nodeName[nodeNameLen]!='.')&& - (nodeNameLen < 8)){ - catStr++; - nodeNameLen++; - }; - - //skip over '.' and the '$' in volume name - catStr=&nodeName[nodeNameLen+2]; - schemaStr=catStr; - - //skip over the volume/catalog name - //while measuring catalog name length - while((catStr[catStrLen]!='.')&& - (catStrLen < 8)) - { - schemaStr++; - catStrLen++; - } - - //skip over the '.' - schemaStr++; - fileStr=schemaStr; - - //skip over the subvolume/schema name - //while measuring schema name length - while((schemaStr[schemaStrLen]!='.')&& - (schemaStrLen < 8)) - { - fileStr++; - schemaStrLen++; - } - - //skip over the '.' - fileStr++; - fileStrLen = str_len(fileStr); - - //figure out the node number for the node - //which has the primary partition. - primaryNodeNum=0; + short error = 0; - if(!OSIM_runningSimulation()) - primaryNodeNum = gpClusterInfo->mapNodeNameToNodeNum(NAString(nodeName)); - } - else{ - //get qualified name of the clustering index which should - //be the actual physical file name of the table - const QualifiedName fileNameObj = getClusteringIndex()-> - getRandomPartition(); - const NAString fileName = fileNameObj.getObjectName(); - - //get schemaName object - const SchemaName schemaNameObj = fileNameObj.getSchemaName(); - const NAString schemaName = schemaNameObj.getSchemaName(); - - //get catalogName object - //this contains a string in the form \<node_name>.$volume - const CatalogName catalogNameObj = fileNameObj.getCatalogName(); - const NAString catalogName = catalogNameObj.getCatalogName(); - nodeName = (char*) catalogName.data(); - catStr = nodeName; - - //Measure length of node name - //skip over node name i.e. \MAYA, \AZTEC, etc - //and get to volume name - while((nodeName[nodeNameLen]!='.')&& - (nodeNameLen < 8)){ - catStr++; - nodeNameLen++; - }; - - //get volume/catalog name - //skip ".$" - catStr=&nodeName[nodeNameLen+2]; + //clusteringIndex has physical filename that can be used to check + //if a catalog operation has been performed on a table. + //Views don't have clusteringIndex, so we get physical filename + //from the viewFileName_ datamember. + if(viewText_) + { + //view filename starts with node name + //filename is in format \<node_name>.$<volume>.<subvolume>.<file> + //catStr => <volume> + //schemaStr => <subvolume> + //fileStr => <file> + nodeName = viewFileName_; + catStr = nodeName; + + //skip over node name + //measure node name length + //get to begining of volume name + //Measure length of node name + //skip over node name i.e. \MAYA, \AZTEC, etc + //and get to volume name + while((nodeName[nodeNameLen]!='.')&& + (nodeNameLen < 8)){ + catStr++; + nodeNameLen++; + }; + + //skip over '.' and the '$' in volume name + catStr=&nodeName[nodeNameLen+2]; + schemaStr=catStr; + + //skip over the volume/catalog name + //while measuring catalog name length + while((catStr[catStrLen]!='.')&& + (catStrLen < 8)) + { + schemaStr++; + catStrLen++; + } + + //skip over the '.' + schemaStr++; + fileStr=schemaStr; + + //skip over the subvolume/schema name + //while measuring schema name length + while((schemaStr[schemaStrLen]!='.')&& + (schemaStrLen < 8)) + { + fileStr++; + schemaStrLen++; + } + + //skip over the '.' + fileStr++; + fileStrLen = str_len(fileStr); + + //figure out the node number for the node + //which has the primary partition. + primaryNodeNum=0; + + if(!OSIM_runningSimulation()) + primaryNodeNum = gpClusterInfo->mapNodeNameToNodeNum(NAString(nodeName)); + } + else{ + //get qualified name of the clustering index which should + //be the actual physical file name of the table + const QualifiedName fileNameObj = getClusteringIndex()-> + getRandomPartition(); + const NAString fileName = fileNameObj.getObjectName(); + + //get schemaName object + const SchemaName schemaNameObj = fileNameObj.getSchemaName(); + const NAString schemaName = schemaNameObj.getSchemaName(); + + //get catalogName object + //this contains a string in the form \<node_name>.$volume + const CatalogName catalogNameObj = fileNameObj.getCatalogName(); + const NAString catalogName = catalogNameObj.getCatalogName(); + nodeName = (char*) catalogName.data(); + catStr = nodeName; + + //Measure length of node name + //skip over node name i.e. \MAYA, \AZTEC, etc + //and get to volume name + while((nodeName[nodeNameLen]!='.')&& + (nodeNameLen < 8)){ + catStr++; + nodeNameLen++; + }; + + //get volume/catalog name + //skip ".$" + catStr=&nodeName[nodeNameLen+2]; #pragma nowarn(1506) // warning elimination - catStrLen = catalogName.length() - (nodeNameLen+2); + catStrLen = catalogName.length() - (nodeNameLen+2); #pragma warn(1506) // warning elimination - //get subvolume/schema name - schemaStr = (char *) schemaName.data(); + //get subvolume/schema name + schemaStr = (char *) schemaName.data(); #pragma nowarn(1506) // warning elimination - schemaStrLen = schemaName.length(); + schemaStrLen = schemaName.length(); #pragma warn(1506) // warning elimination - //get file name - fileStr = (char *) fileName.data(); + //get file name + fileStr = (char *) fileName.data(); #pragma nowarn(1506) // warning elimination - fileStrLen = fileName.length(); + fileStrLen = fileName.length(); #pragma warn(1506) // warning elimination - //figure out the node number for the node - //which has the primary partition. - primaryNodeNum=0; + //figure out the node number for the node + //which has the primary partition. + primaryNodeNum=0; - primaryNodeNum = gpClusterInfo->mapNodeNameToNodeNum(NAString(nodeName)); - - } + primaryNodeNum = gpClusterInfo->mapNodeNameToNodeNum(NAString(nodeName)); + + } } Lng32 postCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_); if(postCreateNATableWarnings != preCreateNATableWarnings) - tableConstructionHadWarnings_=TRUE; + tableConstructionHadWarnings_=TRUE; const char *lobHdfsServer = CmpCommon::getDefaultString(LOB_HDFS_SERVER); Int32 lobHdfsPort = (Lng32)CmpCommon::getDefaultNumeric(LOB_HDFS_PORT); if (hasLobColumn()) - { - // read lob related information from lob metadata - short *lobNumList = new (heap_) short[getColumnCount()]; - short *lobTypList = new (heap_) short[getColumnCount()]; - char **lobLocList = new (heap_) char*[getColumnCount()]; - - const NAColumnArray &colArray = getNAColumnArray(); - NAColumn *nac = NULL; - - Lng32 j = 0; - for (CollIndex i = 0; i < getColumnCount(); i++) - { - nac = colArray.getColumn(i); - - if (nac->getType()->getTypeQualifier() == NA_LOB_TYPE) + { + // read lob related information from lob metadata + // setFromStoredDesc(TRUE); + // + short *lobNumList = new (heap_) short[getColumnCount()]; + short *lobTypList = new (heap_) short[getColumnCount()]; + char **lobLocList = new (heap_) char*[getColumnCount()]; + + const NAColumnArray &colArray = getNAColumnArray(); + NAColumn *nac = NULL; + + Lng32 j = 0; + for (CollIndex i = 0; i < getColumnCount(); i++) { - lobLocList[j] = new (heap_) char[1024]; - j++; - } - } - - NAString schNam; - schNam = "\""; - schNam += getTableName().getCatalogName(); - schNam += "\".\""; - schNam += getTableName().getSchemaName(); - schNam += "\""; - - Lng32 numLobs = 0; - Lng32 cliRC = SQL_EXEC_LOBddlInterface - ( - (char*)schNam.data(), - schNam.length(), - objectUid().castToInt64(), - numLobs, - LOB_CLI_SELECT_CURSOR, - lobNumList, - lobTypList, - lobLocList,(char *)lobHdfsServer,lobHdfsPort,0,FALSE); - - if (cliRC == 0) - { - for (Lng32 i = 0; i < numLobs; i++) + nac = colArray.getColumn(i); + + if (nac->getType()->getTypeQualifier() == NA_LOB_TYPE) + { + lobLocList[j] = new (heap_) char[1024]; + j++; + } + } + + NAString schNam; + schNam = "\""; + schNam += getTableName().getCatalogName(); + schNam += "\".\""; + schNam += getTableName().getSchemaName(); + schNam += "\""; + + Lng32 numLobs = 0; + Lng32 cliRC = SQL_EXEC_LOBddlInterface + ( + (char*)schNam.data(), + schNam.length(), + objectUid().castToInt64(), + numLobs, + LOB_CLI_SELECT_CURSOR, + lobNumList, + lobTypList, + lobLocList,(char *)lobHdfsServer,lobHdfsPort,0,FALSE); + + if (cliRC == 0) { - nac = colArray.getColumn(lobNumList[i]); - - nac->lobNum() = lobNumList[i]; - nac->lobStorageType() = (LobsStorage)lobTypList[i]; - nac->lobStorageLocation() = lobLocList[i]; - } - } // if + for (Lng32 i = 0; i < numLobs; i++) + { + nac = colArray.getColumn(lobNumList[i]); + + nac->lobNum() = lobNumList[i]; + nac->lobStorageType() = (LobsStorage)lobTypList[i]; + nac->lobStorageLocation() = lobLocList[i]; + } + } // if } // if - -// LCOV_EXCL_STOP + + // LCOV_EXCL_STOP initialSize_ = heap_->getAllocSize(); MonitorMemoryUsage_Exit((char*)mmPhase.data(), heap_, NULL, TRUE); -} // NATable() + } // NATable() #pragma warn(770) // warning elimination // Constructor for a Hive table NATable::NATable(BindWA *bindWA, - const CorrName& corrName, - NAMemory *heap, - struct hive_tbl_desc* htbl) - // - // The NATable heap ( i.e. heap_ ) used to come from ContextHeap - // (i.e. heap) but it creates high memory usage/leakage in Context - // Heap. Although the NATables are deleted at the end of each statement, - // the heap_ is returned to heap (i.e. context heap) which caused - // context heap containing a lot of not used chunk of memory. So it is - // changed to be from whatever heap is passed in at the call in - // NATableDB.getNATable. - // - // Now NATable objects can be cached.If an object is to be cached (persisted - // across statements) a NATable heap is allocated for the object - // and is passed in (this is done in NATableDB::get(CorrName& corrName...). - // Otherwise a reference to the Statement heap is passed in. When a cached - // object is to be deleted the object's heap is deleted which wipes out the - // NATable object all its related stuff. NATable objects that are not cached - // are wiped out at the end of the statement when the statement heap is deleted. - // - : heap_(heap), - referenceCount_(0), - refsIncompatibleDP2Halloween_(FALSE), - isHalloweenTable_(FALSE), - qualifiedName_(corrName.getExtendedQualNameObj(),heap), - synonymReferenceName_(heap), - fileSetName_(corrName.getQualifiedNameObj(),heap), // for now, set equal - clusteringIndex_(NULL), - colcount_(0), - colArray_(heap), - recordLength_(0), - indexes_(heap), - vertParts_(heap), - colStats_(NULL), - statsFetched_(FALSE), - viewFileName_(NULL), - viewText_(NULL), - viewTextInNAWchars_(heap), - viewTextCharSet_(CharInfo::UnknownCharSet), - viewCheck_(NULL), - viewColUsages_(NULL), - flags_(IS_INSERTABLE | IS_UPDATABLE), - insertMode_(COM_REGULAR_TABLE_INSERT_MODE), - isSynonymTranslationDone_(FALSE), - checkConstraints_(heap), - createTime_(htbl->creationTS_), - redefTime_(htbl->redeftime()), - cacheTime_(0), - statsTime_(0), - catalogUID_(0), - schemaUID_(0), - objectUID_(0), - objectType_(COM_UNKNOWN_OBJECT), - partitioningScheme_(COM_UNKNOWN_PARTITIONING), - uniqueConstraints_(heap), - refConstraints_(heap), - isAnMV_(FALSE), - isAnMVMetaData_(FALSE), - mvsUsingMe_(heap), - mvInfo_(NULL), - accessedInCurrentStatement_(TRUE), - setupForStatement_(FALSE), - resetAfterStatement_(FALSE), - hitCount_(0), - replacementCounter_(2), - sizeInCache_(0), - recentlyUsed_(TRUE), - tableConstructionHadWarnings_(FALSE), - isAnMPTableWithAnsiName_(FALSE), - isUMDTable_(FALSE), - isSMDTable_(FALSE), - isMVUMDTable_(FALSE), - - // For virtual tables, we set the object schema version - // to be the current schema version - osv_(COM_VERS_CURR_SCHEMA), - ofv_(COM_VERS_CURR_SCHEMA), - partnsDesc_(NULL), - colsWithMissingStats_(NULL), - originalCardinality_(-1.0), - tableIdList_(heap), - rcb_(NULL), - rcbLen_(0), - keyLength_(0), - parentTableName_(NULL), - sgAttributes_(NULL), - isHive_(TRUE), - isHbase_(FALSE), - isHbaseCell_(FALSE), - isHbaseRow_(FALSE), - isSeabase_(FALSE), - isSeabaseMD_(FALSE), - isSeabasePrivSchemaTable_(FALSE), - isUserUpdatableSeabaseMD_(FALSE), - resetHDFSStatsAfterStmt_(FALSE), - hiveDefaultStringLen_(0), - hiveTableId_(htbl->tblID_), - tableDesc_(NULL), - secKeySet_(heap), - privInfo_(NULL), - newColumns_(heap), - snapshotName_(NULL) + const CorrName& corrName, + NAMemory *heap, + struct hive_tbl_desc* htbl) + // + // The NATable heap ( i.e. heap_ ) used to come from ContextHeap + // (i.e. heap) but it creates high memory usage/leakage in Context + // Heap. Although the NATables are deleted at the end of each statement, + // the heap_ is returned to heap (i.e. context heap) which caused + // context heap containing a lot of not used chunk of memory. So it is + // changed to be from whatever heap is passed in at the call in + // NATableDB.getNATable. + // + // Now NATable objects can be cached.If an object is to be cached (persisted + // across statements) a NATable heap is allocated for the object + // and is passed in (this is done in NATableDB::get(CorrName& corrName...). + // Otherwise a reference to the Statement heap is passed in. When a cached + // object is to be deleted the object's heap is deleted which wipes out the + // NATable object all its related stuff. NATable objects that are not cached + // are wiped out at the end of the statement when the statement heap is deleted. + // + : heap_(heap), + referenceCount_(0), + refsIncompatibleDP2Halloween_(FALSE), + isHalloweenTable_(FALSE), + qualifiedName_(corrName.getExtendedQualNameObj(),heap), + synonymReferenceName_(heap), + fileSetName_(corrName.getQualifiedNameObj(),heap), // for now, set equal + clusteringIndex_(NULL), + colcount_(0), + colArray_(heap), + recordLength_(0), + indexes_(heap), + vertParts_(heap), + colStats_(NULL), + statsFetched_(FALSE), + viewFileName_(NULL), + viewText_(NULL), + viewTextInNAWchars_(heap), + viewTextCharSet_(CharInfo::UnknownCharSet), + viewCheck_(NULL), + viewColUsages_(NULL), + flags_(IS_INSERTABLE | IS_UPDATABLE), + insertMode_(COM_REGULAR_TABLE_INSERT_MODE), + isSynonymTranslationDone_(FALSE), + checkConstraints_(heap), + createTime_(htbl->creationTS_), + redefTime_(htbl->redeftime()), + cacheTime_(0), + statsTime_(0), + catalogUID_(0), + schemaUID_(0), + objectUID_(0), + objectType_(COM_UNKNOWN_OBJECT), + partitioningScheme_(COM_UNKNOWN_PARTITIONING), + uniqueConstraints_(heap), + refConstraints_(heap), + isAnMV_(FALSE), + isAnMVMetaData_(FALSE), + mvsUsingMe_(heap), + mvInfo_(NULL), + accessedInCurrentStatement_(TRUE), + setupForStatement_(FALSE), + resetAfterStatement_(FALSE), + hitCount_(0), + replacementCounter_(2), + sizeInCache_(0), + recentlyUsed_(TRUE), + tableConstructionHadWarnings_(FALSE), + isAnMPTableWithAnsiName_(FALSE), + isUMDTable_(FALSE), + isSMDTable_(FALSE), + isMVUMDTable_(FALSE), + + // For virtual tables, we set the object schema version + // to be the current schema version + osv_(COM_VERS_CURR_SCHEMA), + ofv_(COM_VERS_CURR_SCHEMA), + partnsDesc_(NULL), + colsWithMissingStats_(NULL), + originalCardinality_(-1.0), + tableIdList_(heap), + rcb_(NULL), + rcbLen_(0), + keyLength_(0), + parentTableName_(NULL), + sgAttributes_(NULL), + isHive_(TRUE), + isHbase_(FALSE), + isHbaseCell_(FALSE), + isHbaseRow_(FALSE), + isSeabase_(FALSE), + isSeabaseMD_(FALSE), + isSeabasePrivSchemaTable_(FALSE), + isUserUpdatableSeabaseMD_(FALSE), + resetHDFSStatsAfterStmt_(FALSE), + hiveDefaultStringLen_(0), + hiveTableId_(htbl->tblID_), + tableDesc_(NULL), + secKeySet_(heap), + privInfo_(NULL), + newColumns_(heap), + snapshotName_(NULL) { - NAString tblName = qualifiedName_.getQualifiedNameObj().getQualifiedNameAsString(); - NAString mmPhase; - - Lng32 preCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_); - - //set heap type - if(heap_ == CmpCommon::statementHeap()){ - heapType_ = STATEMENT; - mmPhase = "NATable Init (Stmt) - " + tblName; - }else if (heap_ == CmpCommon::contextHeap()){ - heapType_ = CONTEXT; - mmPhase = "NATable Init (Cnxt) - " + tblName; - }else { - heapType_ = OTHER; - mmPhase = "NATable Init (Other) - " + tblName; - } + NAString tblName = qualifiedName_.getQualifiedNameObj().getQualifiedNameAsString(); + NAString mmPhase; + + Lng32 preCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_); + + //set heap type + if(heap_ == CmpCommon::statementHeap()){ + heapType_ = STATEMENT; + mmPhase = "NATable Init (Stmt) - " + tblName; + }else if (heap_ == CmpCommon::contextHeap()){ + heapType_ = CONTEXT; + mmPhase = "NATable Init (Cnxt) - " + tblName; + }else { + heapType_ = OTHER; + mmPhase = "NATable Init (Other) - " + tblName; + } - MonitorMemoryUsage_Enter((char*)mmPhase.data(), heap_, TRUE); + MonitorMemoryUsage_Enter((char*)mmPhase.data(), heap_, TRUE); - isTrigTempTable_ = FALSE; + isTrigTempTable_ = FALSE; - insertMode_ = - COM_MULTISET_TABLE_INSERT_MODE; // allow dup, to check - //ComInsertMode::COM_MULTISET_TABLE_INSERT_MODE; // allow dup, to check + insertMode_ = + COM_MULTISET_TABLE_INSERT_MODE; // allow dup, to check + //ComInsertMode::COM_MULTISET_TABLE_INSERT_MODE; // allow dup, to check - // - // Add timestamp information. - // + // + // Add timestamp information. + // - // To get from Hive -/* - createTime_ = longArrayToInt64(table_desc->tableDesc()->createtime); - redefTime_ = longArrayToInt64(table_desc->tableDesc()->redeftime); - cacheTime_ = longArrayToInt64(table_desc->tableDesc()->cachetime); -*/ + // To get from Hive + /* + createTime_ = longArrayToInt64(table_desc->tableDesc()->createtime); + redefTime_ = longArrayToInt64(table_desc->tableDesc()->redeftime); + cacheTime_ = longArrayToInt64(table_desc->tableDesc()->cachetime); + */ - // NATable has a schemaUID column, probably should propogate it. - // for now, set to 0. - schemaUID_ = 0; + // NATable has a schemaUID column, probably should propogate it. + // for now, set to 0. + schemaUID_ = 0; - // Set the objectUID_ - // If the HIVE table has been registered in Trafodion, get the objectUID - // from Trafodion, otherwise, set it to 0. - // TBD - does getQualifiedNameObj handle delimited names correctly? - if ( !fetchObjectUIDForNativeTable(corrName) ) - return; + // Set the objectUID_ + // If the HIVE table has been registered in Trafodion, get the objectUID + // from Trafodion, otherwise, set it to 0. + // TBD - does getQualifiedNameObj handle delimited names correctly? + if ( !fetchObjectUIDForNativeTable(corrName) ) + return; - if ( objectUID_ > 0 ) - setHasExternalTable(TRUE); + if ( objectUID_ > 0 ) + setHasExternalTable(TRUE); - // for HIVE objects, the schema owner and table owner is HIVE_ROLE_ID - if (CmpCommon::context()->isAuthorizationEnabled()) - { - owner_ = HIVE_ROLE_ID; - schemaOwner_ = HIVE_ROLE_ID; - } - else - { - owner_ = SUPER_USER; - schemaOwner_ = SUPER_USER; - } + // for HIVE objects, the schema owner and table owner is HIVE_ROLE_ID + if (CmpCommon::context()->isAuthorizationEnabled()) + { + owner_ = HIVE_ROLE_ID; + schemaOwner_ = HIVE_ROLE_ID; + } + else + { + owner_ = SUPER_USER; + schemaOwner_ = SUPER_USER; + } - if (hasExternalTable()) - getPrivileges(NULL); + if (hasExternalTable()) + getPrivileges(NULL); - // TBD - if authorization is enabled and there is no external table to store - // privileges, go get privilege information from HIVE metadata ... - - // TBD - add a check to verify that the column list coming from HIVE matches - // the column list stored in the external table. Maybe some common method - // that can be used to compare other things as well... - - objectType_ = COM_BASE_TABLE_OBJECT; + // TBD - if authorization is enabled and there is no external table to store + // privileges, go get privilege information from HIVE metadata ... -// to check - partitioningScheme_ = COM_UNKNOWN_PARTITIONING; + // TBD - add a check to verify that the column list coming from HIVE matches + // the column list stored in the external table. Maybe some common method + // that can be used to compare other things as well... -// to check - rcb_ = 0; - rcbLen_ = 0; - keyLength_ = 0; + objectType_ = COM_BASE_TABLE_OBJECT; - partnsDesc_ = NULL; + // to check + partitioningScheme_ = COM_UNKNOWN_PARTITIONING; - // - // Insert a NAColumn in the colArray_ for this NATable for each - // columns_desc from the ARK SMD. Returns TRUE if error creating NAColumns. - // + // to check + rcb_ = 0; + rcbLen_ = 0; + keyLength_ = 0; - if (createNAColumns(htbl->getColumns(), - this, - colArray_ /*OUT*/, - heap_)) - //coverity[leaked_storage] - return; + partnsDesc_ = NULL; + // + // Insert a NAColumn in the colArray_ for this NATable for each + // columns_desc from the ARK SMD. Returns TRUE if error creating NAColumns. + // - // - // Set colcount_ after all possible errors (Binder uses nonzero colcount - // as an indicator of valid table definition). - // + if (createNAColumns(htbl->getColumns(), + this, + colArray_ /*OUT*/, + heap_)) + //coverity[leaked_storage] + return; - // To set it via the new createNAColumns() - colcount_ = colArray_.entries(); - // compute record length from colArray + // + // Set colcount_ after all possible errors (Binder uses nonzero colcount + // as an indicator of valid table definition). + // - Int32 recLen = 0; - for ( CollIndex i=0; i<colcount_; i++ ) { - recLen += colArray_[i]->getType()->getNominalSize(); - } + // To set it via the new createNAColumns() + colcount_ = colArray_.entries(); - setRecordLength(recLen); - - if (createNAFileSets(htbl /*IN*/, - this /*IN*/, - colArray_ /*IN*/, - indexes_ /*OUT*/, - vertParts_ /*OUT*/, - clusteringIndex_ /*OUT*/, - tableIdList_ /*OUT*/, - heap_, - bindWA - )) { - colcount_ = 0; // indicates failure - return; - } + // compute record length from colArray - // HIVE-TBD ignore constraint info creation for now + Int32 recLen = 0; + for ( CollIndex i=0; i<colcount_; i++ ) { + recLen += colArray_[i]->getType()->getNominalSize(); + } + setRecordLength(recLen); - // If there is a host variable associated with this table, store it - // for use by the generator to generate late-name resolution information. - // - HostVar *hv = corrName.getPrototype(); - prototype_ = hv ? new (heap_) HostVar(*hv) : NULL; + if (createNAFileSets(htbl /*IN*/, + this /*IN*/, + colArray_ /*IN*/, + indexes_ /*OUT*/, + vertParts_ /*OUT*/, + clusteringIndex_ /*OUT*/, + tableIdList_ /*OUT*/, + heap_, + bindWA + )) { + colcount_ = 0; // indicates failure + return; + } - // MV - // Initialize the MV support data members - isAnMV_ = FALSE; - isAnMVMetaData_ = FALSE; + // HIVE-TBD ignore constraint info creation for now - Lng32 postCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_); - if(postCreateNATableWarnings != preCreateNATableWarnings) - tableConstructionHadWarnings_=TRUE; + // If there is a host variable associated with this table, store it + // for use by the generator to generate late-name resolution information. + // + HostVar *hv = corrName.getPrototype(); + prototype_ = hv ? new (heap_) HostVar(*hv) : NULL; - hiveDefaultStringLen_ = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH); - Int32 hiveDefaultStringLenInBytes = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH_IN_BYTES); - if( hiveDefaultStringLenInBytes != 32000 ) - hiveDefaultStringLen_ = hiveDefaultStringLenInBytes; + // MV + // Initialize the MV support data members + isAnMV_ = FALSE; + isAnMVMetaData_ = FALSE; -// LCOV_EXCL_STOP - initialSize_ = heap_->getAllocSize(); - MonitorMemoryUsage_Exit((char*)mmPhase.data(), heap_, NULL, TRUE); + Lng32 postCreateNATableWarnings = CmpCommon::diags()->getNumber(DgSqlCode::WARNING_); + + if(postCreateNATableWarnings != preCreateNATableWarnings) + tableConstructionHadWarnings_=TRUE; + + hiveDefaultStringLen_ = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH); + Int32 hiveDefaultStringLenInBytes = CmpCommon::getDefaultLong(HIVE_MAX_STRING_LENGTH_IN_BYTES); + if( hiveDefaultStringLenInBytes != 32000 ) + hiveDefaultStringLen_ = hiveDefaultStringLenInBytes; + + // LCOV_EXCL_STOP + initialSize_ = heap_->getAllocSize(); + MonitorMemoryUsage_Exit((char*)mmPhase.data(), heap_, NULL, TRUE); } // NATable() #pragma warn(770) // warning elimination @@ -6039,20 +6042,20 @@ NATable::NATable(BindWA *bindWA, NABoolean NATable::doesMissingStatsWarningExist(CollIndexSet & colsSet) const { - return colsWithMissingStats_->contains(&colsSet); + return colsWithMissingStats_->contains(&colsSet); } NABoolean NATable::insertMissingStatsWarning(CollIndexSet colsSet) const { - CollIndexSet * setOfColsWithMissingStats = new (STMTHEAP) CollIndexSet (colsSet); + CollIndexSet * setOfColsWithMissingStats = new (STMTHEAP) CollIndexSet (colsSet); - Int32 someVar = 1; - CollIndexSet * result = colsWithMissingStats_->insert(setOfColsWithMissingStats, &someVar); + Int32 someVar = 1; + CollIndexSet * result = colsWithMissingStats_->insert(setOfColsWithMissingStats, &someVar); - if (result == NULL) - return FALSE; - else - return TRUE; + if (result == NULL) + return FALSE; + else + return TRUE; } // This gets called in the Optimizer phase -- the Binder phase will already have @@ -6060,523 +6063,523 @@ NABoolean NATable::insertMissingStatsWarning(CollIndexSet colsSet) const // below can decide which histograms and histints to leave in the stats list // and which to remove. // -StatsList & + StatsList & NATable::getStatistics() { - if (!statsFetched_) - { - // mark the kind of histograms needed for this table's columns - markColumnsForHistograms(); - - NAString tblName = qualifiedName_.getQualifiedNameObj().getQualifiedNameAsString(); - NAString mmPhase = "NATable getStats - " + tblName; - MonitorMemoryUsage_Enter((char*)mmPhase.data(), NULL, TRUE); - - //trying to get statistics for a new statement allocate colStats_ - colStats_ = new (CmpCommon::statementHeap()) StatsList(CmpCommon::statementHeap()); + if (!statsFetched_) + { + // mark the kind of histograms needed for this table's columns + markColumnsForHistograms(); + + NAString tblName = qualifiedName_.getQualifiedNameObj().getQualifiedNameAsString(); + NAString mmPhase = "NATable getStats - " + tblName; + MonitorMemoryUsage_Enter((char*)mmPhase.data(), NULL, TRUE); + + //trying to get statistics for a new statement allocate colStats_ + colStats_ = new (CmpCommon::statementHeap()) StatsList(CmpCommon::statementHeap()); + + // Do not create statistics on the fly for the following tables + if (isAnMV() || isUMDTable() || + isSMDTable() || isMVUMDTable() || + isTrigTempTable() ) + CURRSTMT_OPTDEFAULTS->setHistDefaultSampleSize(0); + + CURRCONTEXT_HISTCACHE->getHistograms(*this); + + if ((*colStats_).entries() > 0) + originalCardinality_ = (*colStats_)[0]->getRowcount(); + else + originalCardinality_ = ActiveSchemaDB()->getDefaults().getAsDouble(HIST_NO_STATS_ROWCOUNT); + + // ----------------------------------------------------------------------- + // So now we have read in the contents of the HISTOGRM & HISTINTS + // tables from the system catalog. Before we can use them, we need + // to massage them into a format we can use. In particular, we need + // to make sure that what we read in (which the user may have mucked + // about with) matches the histogram classes' internal semantic + // requirements. Also, we need to generate the MultiColumnUecList. + // ---------------------------------------------------------------------- + + // what did the user set as the max number of intervals? + NADefaults &defs = ActiveSchemaDB()->getDefaults(); + CollIndex maxIntervalCount = defs.getAsLong(HIST_MAX_NUMBER_OF_INTERVALS); + + //----------------------------------------------------------------------------------- + // Need to flag the MC colStatsDesc so it is only used for the range partitioning task + // and not any cardinality calculations tasks. Flagging it also makes the logic + // to check fo the presence for this MC easier (at the time we need to create + // the range partitioning function) + //----------------------------------------------------------------------------------- + + if (CmpCommon::getDefault(HBASE_RANGE_PARTITIONING_MC_SPLIT) == DF_ON && + !(*colStats_).allFakeStats()) + { + CollIndex currentMaxsize = 1; + Int32 posMCtoUse = -1; - // Do not create statistics on the fly for the following tables - if (isAnMV() || isUMDTable() || - isSMDTable() || isMVUMDTable() || - isTrigTempTable() ) - CURRSTMT_OPTDEFAULTS->setHistDefaultSampleSize(0); + NAColumnArray partCols; - CURRCONTEXT_HISTCACHE->getHistograms(*this); + if (getClusteringIndex()->getPartitioningKeyColumns().entries() > 0) + partCols = getClusteringIndex()->getPartitioningKeyColumns(); + else + partCols = getClusteringIndex()->getIndexKeyColumns(); - if ((*colStats_).entries() > 0) - originalCardinality_ = (*colStats_)[0]->getRowcount(); - else - originalCardinality_ = ActiveSchemaDB()->getDefaults().getAsDouble(HIST_NO_STATS_ROWCOUNT); - - // ----------------------------------------------------------------------- - // So now we have read in the contents of the HISTOGRM & HISTINTS - // tables from the system catalog. Before we can use them, we need - // to massage them into a format we can use. In particular, we need - // to make sure that what we read in (which the user may have mucked - // about with) matches the histogram classes' internal semantic - // requirements. Also, we need to generate the MultiColumnUecList. - // ---------------------------------------------------------------------- - - // what did the user set as the max number of intervals? - NADefaults &defs = ActiveSchemaDB()->getDefaults(); - CollIndex maxIntervalCount = defs.getAsLong(HIST_MAX_NUMBER_OF_INTERVALS); - - //----------------------------------------------------------------------------------- - // Need to flag the MC colStatsDesc so it is only used for the range partitioning task - // and not any cardinality calculations tasks. Flagging it also makes the logic - // to check fo the presence for this MC easier (at the time we need to create - // the range partitioning function) - //----------------------------------------------------------------------------------- - - if (CmpCommon::getDefault(HBASE_RANGE_PARTITIONING_MC_SPLIT) == DF_ON && - !(*colStats_).allFakeStats()) - { - CollIndex currentMaxsize = 1; - Int32 posMCtoUse = -1; + CollIndex partColNum = partCols.entries(); - NAColumnArray partCols; + // look for MC histograms that have multiple intervals and whose columns are a prefix for the + // paritition column list. If multiple pick the one with the most matching columns + for (Int32 i=0; i < (*colStats_).entries(); i++) + { + NAColumnArray statsCols = (*colStats_)[i]->getStatColumns(); + CollIndex colNum = statsCols.entries(); + + CollIndex j = 0; + + NABoolean potentialMatch = TRUE; + if ((colNum > currentMaxsize) && + (!(*colStats_)[i]->isSingleIntHist()) && // no SIH -- number of histograms is large enough to do splitting + (colNum <= partColNum)) + { + while ((j < colNum) && potentialMatch) + { + j++; + NAColumn * col = partCols[j-1]; + if (statsCols[j-1]->getPosition() != partCols[j-1]->getPosition()) + { + potentialMatch = FALSE; + break; + } + } + } + else + { + potentialMatch = FALSE; + } + + if (potentialMatch) + { + currentMaxsize = j; + posMCtoUse = i; + } + + // we got what we need, just return + if (potentialMatch && (currentMaxsize == partColNum)) + { + break; + } + } + + if (posMCtoUse >= 0) + { + (*colStats_)[posMCtoUse]->setMCforHbasePartitioning (TRUE); + } + } - if (getClusteringIndex()->getPartitioningKeyColumns().entries() > 0) - partCols = getClusteringIndex()->getPartitioningKeyColumns(); - else - partCols = getClusteringIndex()->getIndexKeyColumns(); + // ************************************************************************* + // FIRST: Generate the stats necessary to later create the + // MultiColumnUecList; then filter out the multi-column histograms + // because later code doesn't know how to handle them + // In the same loop, also mark another flag for originally fake histogram + // This is to differentiate the cases when the histogram is fake because + // it has no statistics and the case where the histogram has been termed + // fake by the optimizer because its statistics is no longer reliable. + // ************************************************************************* + CollIndex i ; + for ( i = 0 ; i < (*colStats_).entries() ; /* no automatic increment */ ) + { + // the StatsList has two lists which it uses to store the information we + // need to fill the MultiColumnUecList with <table-col-list,uec value> pairs: + // + // LIST(NAColumnArray) groupUecColumns_ + // LIST(CostScalar) groupUecValues_ + // + // ==> insert the NAColumnArray & uec total values for each + // entry in colStats_ + + // don't bother storing multicolumnuec info for fake histograms + // but do set the originallly fake histogram flag to TRUE + if ( (*colStats_)[i]->isFakeHistogram() ) + (*colStats_)[i]->setOrigFakeHist(TRUE); + else + { + NAColumnArray cols = (*colStats_)[i]->getStatColumns() ; + (*colStats_).groupUecColumns_.insert(cols) ; + + CostScalar uecs = (*colStats_)[i]->getTotalUec() ; + (*colStats_).groupUecValues_.insert(uecs) ; + + if (CmpCommon::getDefault(USTAT_COLLECT_MC_SKEW_VALUES) == DF_ON) + { + MCSkewedValueList mcSkewedValueList = (*colStats_)[i]->getMCSkewedValueList() ; + (*colStats_).groupMCSkewedValueLists_.insert(mcSkewedValueList) ; + } + } + + // MCH: + // once we've stored the column/uec information, filter out the + // multi-column histograms, since our synthesis code doesn't + // handle them + if (( (*colStats_)[i]->getStatColumns().entries() != 1) && + (!(*colStats_)[i]->isMCforHbasePartitioning())) + { + (*colStats_).removeAt(i) ; + } + else + { + i++ ; // in-place removal from a list is a bother! + } + } - CollIndex partColNum = partCols.entries(); + // ************************************************************************* + // SECOND: do some fixup work to make sure the histograms maintain + // the semantics we later expect (& enforce) + // ************************************************************************* + + // ------------------------------------------------------------------------- + // HISTINT fixup-code : char-string histograms + // ------------------------------------------------------------------------- + // problem arises with HISTINTs that are for char* columns + // here's what we can get: + // + // Rows Uec Value + // ---- --- ----- + // 0 0 "value" + // 10 5 "value" + // + // this is not good! The problem is our (lousy) encoding of + // char strings into EncodedValue's + // + // After much deliberation, here's our current fix: + // + // Rows Uec Value + // ---- --- ----- + // 0 0 "valu" <-- reduce the min value of 1st interval + // 10 5 "value" by a little bit + // + // When we find two intervals like this where they aren't the + // first intervals in the histogram, we simply merge them into + // one interval (adding row/uec information) and continue; note + // that in this case, we haven't actually lost any information; + // we've merely made sense out of (the garbage) what we've got + // + // ------------------------------------------------------------------------- + // additional HISTINT fixup-code + // ------------------------------------------------------------------------- + // 1. If there are zero or one HISTINTs, then set the HISTINTs to match + // the max/min information contained in the COLSTATS object. + // + // 2. If there are any HISTINTs whose boundary values are out-of-order, + // we abort with an an ERROR message. + // + // 3. If there is a NULL HISTINT at the end of the Histogram, then we + // need to make sure there are *TWO* NULL HISTINTS, to preserve correct + // histogram semantics for single-valued intervals. + // ------------------------------------------------------------------------- + + CollIndex j ; + for ( i = 0 ; i < (*colStats_).entries() ; i++ ) + { + // we only worry about histograms on char string columns + // correction: it turns out that these semantically-deranged + // ---------- histograms were being formed for other, non-char string + // columns, so we commented out the code below + // if ( colStats_[i]->getStatColumns()[0]->getType()->getTypeQualifier() != + // NA_CHARACTER_TYPE) + // continue ; // not a string, skip to next + + ColStatsSharedPtr stats = (*colStats_)[i] ; + + HistogramSharedPtr hist = stats->getHistogramToModify() ; + // histograms for key columns of a table that are not + // referenced in the query are read in with zero intervals + // (to conserve memory); however, internal + // histogram-semantic checking code assumes that any + // histogram which has zero intervals is FAKE; however + // however, MDAM will not be chosen in the case where one of + // the histograms for a key column is FAKE. Thus -- we will + // avoid this entire issue by creating a single interval for + // any Histograms that we read in that are empty. + if ( hist->entries() < 2 ) + { + if(stats->getMinValue() > stats->getMaxValue()) + { + *CmpCommon::diags() << DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE) + << DgString0("") + << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() ); + + stats->createFakeHist(); + continue; + } + + stats->setToSingleInterval ( stats->getMinValue(), + stats->getMaxValue(), + stats->getRowcount(), + stats->getTotalUec() ) ; + // now we have to undo some of the automatic flag-setting + // of ColStats::setToSingleInterval() + stats->setMinSetByPred (FALSE) ; + stats->setMaxSetByPred (FALSE) ; + stats->setShapeChanged (FALSE) ; + continue ; // skip to next ColStats + } + + // NB: we'll handle the first Interval last + for ( j = 1 ; j < hist->entries()-1 ; /* no automatic increment */ ) + { - // look for MC histograms that have multiple intervals and whose columns are a prefix for the - // paritition column list. If multiple pick the one with the most matching columns - for (Int32 i=0; i < (*colStats_).entries(); i++) - { - NAColumnArray statsCols = (*colStats_)[i]->getStatColumns(); - CollIndex colNum = statsCols.entries(); - - CollIndex j = 0; - - NABoolean potentialMatch = TRUE; - if ((colNum > currentMaxsize) && - (!(*colStats_)[i]->isSingleIntHist()) && // no SIH -- number of histograms is large enough to do splitting - (colNum <= partColNum)) - { - while ((j < colNum) && potentialMatch) - { - j++; - NAColumn * col = partCols[j-1]; - if (statsCols[j-1]->getPosition() != partCols[j-1]->getPosition()) - { - potentialMatch = FALSE; - break; - } - } - } - else - { - potentialMatch = FALSE; - } - - if (potentialMatch) - { - currentMaxsize = j; - posMCtoUse = i; - } - - // we got what we need, just return - if (potentialMatch && (currentMaxsize == partColNum)) - { - break; - } - } + if ( (*hist)[j].getUec() == 0 || (*hist)[j].getCardinality() == 0 ) + { + hist->removeAt(j) ; + continue ; // don't increment, loop again + } + + // intervals must be in order! + if ( (*hist)[j].getBoundary() > (*hist)[j+1].getBoundary() ) + { + *CmpCommon::diags() << + DgSqlCode(CATALOG_HISTINTS_TABLES_CONTAIN_BAD_VALUES) + << DgInt0(j) + << DgInt1(j+1) + << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() ); + + stats->createFakeHist(); + break ; // skip to next ColStats + } + + if ( (*hist)[j].getBoundary() == (*hist)[j+1].getBoundary() ) + { + // merge Intervals, if the two consecutive intervals have same + // boundaries and these are not single valued (UEC > 1) + // If there are more two single valued intervals, then merge + // all except the last one. + NABoolean mergeIntervals = FALSE; + + if (CmpCommon::getDefault(COMP_BOOL_79) == DF_ON) + { + mergeIntervals = TRUE; + + if( (j < (hist->entries() - 2)) && ((*hist)[j+1].getUec() == 1) && + ((*hist)[j+1].getBoundary() != (*hist)[j+2].getBoundary()) + || + (j == (hist->entries() - 2)) && ((*hist)[j+1].getUec() == 1) ) + mergeIntervals = FALSE; + } + else + { + if ( (*hist)[j+1].getUec() > 1) + mergeIntervals = TRUE; + } + + if ( mergeIntervals ) + { + // if the intervals with same boundary are not SVI, just merge them + // together. + // Also do the merge, if there are more than one SVIs with same + // encoded interval boundary. Example, we want to avoid intervals + // such as + // boundary inclusive_flag UEC + // 12345.00 < 1 + // 12345.00 < 1 + // 12345.00 <= 1 + // These would be changed to + // 12345.00 < 2 + // 12345.00 <= 1 + CostScalar combinedRows = (*hist)[ j ].getCardinality() + + (*hist)[j+1].getCardinality() ; + CostScalar combinedUec = (*hist)[ j ].getUec() + + (*hist)[j+1].getUec() ; + (*hist)[j].setCardAndUec (combinedRows, combinedUec) ; + stats->setIsColWithBndryConflict(TRUE); + hist->removeAt(j+1) ; + } + else + { + // for some reason, some SVI's aren't being + // generated correctly! + (*hist)[j].setBoundIncl(FALSE) ; + (*hist)[j+1].setBoundIncl(TRUE) ; + j++; + } + } + else + j++ ; // in-place removal from a list is a bother! + } // loop over intervals + + // ---------------------------------------------------------------------- + // now we handle the first interval + // + // first, it must be in order w.r.t. the second interval! + if ( (*hist)[0].getBoundary() > (*hist)[1].getBoundary() ) + { + *CmpCommon::diags() << + DgSqlCode(CATALOG_HISTINTS_TABLES_CONTAIN_BAD_VALUES) + << DgInt0(0) + << DgInt1(1) + << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() ); + + stats->createFakeHist(); + continue ; // skip to next ColStats + } + + // second, handle the case where first and second interval are the same + if ( hist->entries() > 1 && // avoid the exception! might just be a single NULL + // // interval after the loop above + (*hist)[0].getBoundary() == (*hist)[1].getBoundary() && + (*hist)[1].getUec() > 1 ) + { + const double KLUDGE_VALUE = 0.0001 ; + const double oldVal = (*hist)[0].getBoundary().getDblValue() ; + const EncodedValue newVal = + EncodedValue(oldVal - (_ABSOLUTE_VALUE_(oldVal) * KLUDGE_VALUE)) ; // kludge alert! + //Absolute of oldval due to CR 10-010426-2457 + (*hist)[0].setBoundary( newVal ) ; + (*hist)[0].setBoundIncl( FALSE ) ; // no longer a real boundary! + (*colStats_)[i]->setMinValue( newVal ) ; // set aggr info also + } + // done with first interval + // ---------------------------------------------------------------------- + + // + // NULL values must only be stored in single-valued intervals + // in the histograms ; so, just in case we're only getting + // *one* HistInt for the NULL interval, insert a 2nd one + // + // 0 1 2 + // | | | + // | | | entries() == 3 + // NULL + // + // 0 1 2 3 + // | | | | + // | | | | entries() == 4 + // new NULL + // NULL + // + if ( hist->lastHistInt().isNull() ) + { + CollIndex count = hist->entries() ; + if ( !(*hist)[count-2].isNull() ) + { + // insert a 2nd NULL HISTINT, with boundaryIncl value FALSE + HistInt secondLast (hist->lastHistInt().getBoundary(), FALSE) ; + hist->insertAt(count-1,secondLast) ; + // new HISTINT by default has row/uec of 0, which is what we want + } + } + + // + // Now, reduce the total number of intervals to be the number + // that the user wants. This is used to test the tradeoffs + // between compile time & rowcount estimation. + // + (*colStats_)[i]->setMaxIntervalCount (maxIntervalCount) ; + (*colStats_)[i]->reduceToMaxIntervalCount () ; + + if ((*colStats_)[i]->getRowcount() == (*colStats_)[i]->getTotalUec() ) + (*colStats_)[i]->setAlmostUnique(TRUE); + + } // outer for loop -- done with this COLSTATS, continue with next one + // *********************************************************************** + + statsFetched_ = TRUE; + MonitorMemoryUsage_Exit((char*)mmPhase.data(), NULL, NULL, TRUE); + } // !statsFetched_ + + return (*colStats_); +} - if (posMCtoUse >= 0) - { - (*colStats_)[posMCtoUse]->setMCforHbasePartitioning (TRUE); - } - } + StatsList & +NATable::generateFakeStats() +{ + if (colStats_ == NULL) + { + //trying to get statistics for a new statement allocate colStats_ + colStats_ = new (CmpCommon::statementHeap()) StatsList(CmpCommon::statementHeap()); + } - // ************************************************************************* - // FIRST: Generate the stats necessary to later create the - // MultiColumnUecList; then filter out the multi-column histograms - // because later code doesn't know how to handle them - // In the same loop, also mark another flag for originally fake histogram - // This is to differentiate the cases when the histogram is fake because - // it has no statistics and the case where the histogram has been termed - // fake by the optimizer because its statistics is no longer reliable. - // ************************************************************************* - CollIndex i ; - for ( i = 0 ; i < (*colStats_).entries() ; /* no automatic increment */ ) - { - // the StatsList has two lists which it uses to store the information we - // need to fill the MultiColumnUecList with <table-col-list,uec value> pairs: - // - // LIST(NAColumnArray) groupUecColumns_ - // LIST(CostScalar) groupUecValues_ - // - // ==> insert the NAColumnArray & uec total values for each - // entry in colStats_ + if (colStats_->entries() > 0) + return (*colStats_); - // don't bother storing multicolumnuec info for fake histograms - // but do set the originallly fake histogram flag to TRUE - if ( (*colStats_)[i]->isFakeHistogram() ) - (*colStats_)[i]->setOrigFakeHist(TRUE); - else - { - NAColumnArray cols = (*colStats_)[i]->getStatColumns() ; - (*colStats_).groupUecColumns_.insert(cols) ; + NAColumnArray colList = getNAColumnArray() ; + double defaultFakeRowCount = (ActiveSchemaDB()->getDefaults()).getAsDouble(HIST_NO_STATS_ROWCOUNT); + double defaultFakeUec = (ActiveSchemaDB()->getDefaults()).getAsDouble(HIST_NO_STATS_UEC); - CostScalar uecs = (*colStats_)[i]->getTotalUec() ; - (*colStats_).groupUecValues_.insert(uecs) ; + if ( isHiveTable() ) { + defaultFakeRowCount = getOriginalRowCount().value(); + } - if (CmpCommon::getDefault(USTAT_COLLECT_MC_SKEW_VALUES) == DF_ON) - { - MCSkewedValueList mcSkewedValueList = (*colStats_)[i]->getMCSkewedValueList() ; - (*colStats_).groupMCSkewedValueLists_.insert(mcSkewedValueList) ; - } + /* if ( isHbaseTable() ) { + defaultFakeRowCount = getOriginalRowCount().value(); } + */ - // MCH: - // once we've stored the column/uec information, filter out the - // multi-column histograms, since our synthesis code doesn't - // handle them - if (( (*colStats_)[i]->getStatColumns().entries() != 1) && - (!(*colStats_)[i]->isMCforHbasePartitioning())) - { - (*colStats_).removeAt(i) ; - } - else - { - i++ ; // in-place removal from a list is a bother! - } - } + for (CollIndex i = 0; i < colList.entries(); i++ ) + { + NAColumn * col = colList[i]; - // ************************************************************************* - // SECOND: do some fixup work to make sure the histograms maintain - // the semantics we later expect (& enforce) - // ************************************************************************* + if (col->isUnique() ) + defaultFakeUec = defaultFakeRowCount; + else + defaultFakeUec = MINOF(defaultFakeUec, defaultFakeRowCount); - // ------------------------------------------------------------------------- - // HISTINT fixup-code : char-string histograms - // ------------------------------------------------------------------------- - // problem arises with HISTINTs that are for char* columns - // here's what we can get: - // - // Rows Uec Value - // ---- --- ----- - // 0 0 "value" - // 10 5 "value" - // - // this is not good! The problem is our (lousy) encoding of - // char strings into EncodedValue's - // - // After much deliberation, here's our current fix: - // - // Rows Uec Value - // ---- --- ----- - // 0 0 "valu" <-- reduce the min value of 1st interval - // 10 5 "value" by a little bit - // - // When we find two intervals like this where they aren't the - // first intervals in the histogram, we simply merge them into - // one interval (adding row/uec information) and continue; note - // that in this case, we haven't actually lost any information; - // we've merely made sense out of (the garbage) what we've got - // - // ------------------------------------------------------------------------- - // additional HISTINT fixup-code - // ------------------------------------------------------------------------- - // 1. If there are zero or one HISTINTs, then set the HISTINTs to match - // the max/min information contained in the COLSTATS object. - // - // 2. If there are any HISTINTs whose boundary values are out-of-order, - // we abort with an an ERROR message. - // - // 3. If there is a NULL HISTINT at the end of the Histogram, then we - // need to make sure there are *TWO* NULL HISTINTS, to preserve correct - // histogram semantics for single-valued intervals. - // ------------------------------------------------------------------------- + EncodedValue dummyVal(0.0); - CollIndex j ; - for ( i = 0 ; i < (*colStats_).entries() ; i++ ) - { - // we only worry about histograms on char string columns - // correction: it turns out that these semantically-deranged - // ---------- histograms were being formed for other, non-char string - // columns, so we commented out the code below - // if ( colStats_[i]->getStatColumns()[0]->getType()->getTypeQualifier() != - // NA_CHARACTER_TYPE) - // continue ; // not a string, skip to next - - ColStatsSharedPtr stats = (*colStats_)[i] ; - - HistogramSharedPtr hist = stats->getHistogramToModify() ; - // histograms for key columns of a table that are not - // referenced in the query are read in with zero intervals - // (to conserve memory); however, internal - // histogram-semantic checking code assumes that any - // histogram which has zero intervals is FAKE; however - // however, MDAM will not be chosen in the case where one of - // the histograms for a key column is FAKE. Thus -- we will - // avoid this entire issue by creating a single interval for - // any Histograms that we read in that are empty. - if ( hist->entries() < 2 ) - { - if(stats->getMinValue() > stats->getMaxValue()) - { - *CmpCommon::diags() << DgSqlCode(CATALOG_HISTOGRM_HISTINTS_TABLES_CONTAIN_BAD_VALUE) - << DgString0("") - << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() ); + EncodedValue lowBound = dummyVal.minMaxValue(col->getType(), TRUE); + EncodedValue highBound = dummyVal.minMaxValue(col->getType(), FALSE); - stats->createFakeHist(); - continue; - } + HistogramSharedPtr emptyHist(new (HISTHEAP) Histogram(HISTHEAP)); - stats->setToSingleInterval ( stats->getMinValue(), - stats->getMaxValue(), - stats->getRowcount(), - stats->getTotalUec() ) ; - // now we have to undo some of the automatic flag-setting - // of ColStats::setToSingleInterval() - stats->setMinSetByPred (FALSE) ; - stats->setMaxSetByPred (FALSE) ; - stats->setShapeChanged (FALSE) ; - continue ; // skip to next ColStats - } + HistInt newFirstHistInt(lowBound, FALSE); - // NB: we'll handle the first Interval last - for ( j = 1 ; j < hist->entries()-1 ; /* no automatic increment */ ) - { + HistInt newSecondHistInt(highBound, TRUE); - if ( (*hist)[j].getUec() == 0 || (*hist)[j].getCardinality() == 0 ) - { - hist->removeAt(j) ; - continue ; // don't increment, loop again - } + newSecondHistInt.setCardAndUec(defaultFakeRowCount, + defaultFakeUec); - // intervals must be in order! - if ( (*hist)[j].getBoundary() > (*hist)[j+1].getBoundary() ) - { - *CmpCommon::diags() << - DgSqlCode(CATALOG_HISTINTS_TABLES_CONTAIN_BAD_VALUES) - << DgInt0(j) - << DgInt1(j+1) - << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() ); - - stats->createFakeHist(); - break ; // skip to next ColStats - } + emptyHist->insert(newFirstHistInt); + emptyHist->insert(newSecondHistInt); - if ( (*hist)[j].getBoundary() == (*hist)[j+1].getBoundary() ) - { - // merge Intervals, if the two consecutive intervals have same - // boundaries and these are not single valued (UEC > 1) - // If there are more two single valued intervals, then merge - // all except the last one. - NABoolean mergeIntervals = FALSE; + ComUID histid(NA_JulianTimestamp()); + ColStatsSharedPtr fakeColStats( + new (HISTHEAP) ColStats(histid, + defaultFakeUec, + defaultFakeRowCount, + defaultFakeRowCount, + col->isUnique(), + FALSE, + emptyHist, + FALSE, + 1.0, + 1.0, + -1, // avg varchar size + HISTHEAP)); - if (CmpCommon::getDefault(COMP_BOOL_79) == DF_ON) - { - mergeIntervals = TRUE; + fakeColStats->setFakeHistogram(TRUE); + fakeColStats->setOrigFakeHist(TRUE); + fakeColStats->setMinValue(lowBound); + fakeColStats->setMaxValue(highBound); + fakeColStats->statColumns().insert(col); - if( (j < (hist->entries() - 2)) && ((*hist)[j+1].getUec() == 1) && - ((*hist)[j+1].getBoundary() != (*hist)[j+2].getBoundary()) - || - (j == (hist->entries() - 2)) && ((*hist)[j+1].getUec() == 1) ) - mergeIntervals = FALSE; - } - else - { - if ( (*hist)[j+1].getUec() > 1) - mergeIntervals = TRUE; - } + colStats_->insert(fakeColStats); + } + setStatsFetched(TRUE); + setOriginalRowCount(defaultFakeRowCount); - if ( mergeIntervals ) - { - // if the intervals with same boundary are not SVI, just merge them - // together. - // Also do the merge, if there are more than one SVIs with same - // encoded interval boundary. Example, we want to avoid intervals - // such as - // boundary inclusive_flag UEC - // 12345.00 < 1 - // 12345.00 < 1 - // 12345.00 <= 1 - // These would be changed to - // 12345.00 < 2 - // 12345.00 <= 1 - CostScalar combinedRows = (*hist)[ j ].getCardinality() + - (*hist)[j+1].getCardinality() ; - CostScalar combinedUec = (*hist)[ j ].getUec() + - (*hist)[j+1].getUec() ; - (*hist)[j].setCardAndUec (combinedRows, combinedUec) ; - stats->setIsColWithBndryConflict(TRUE); - hist->removeAt(j+1) ; - } - else - { - // for some reason, some SVI's aren't being - // generated correctly! - (*hist)[j].setBoundIncl(FALSE) ; - (*hist)[j+1].setBoundIncl(TRUE) ; - j++; - } - } - else - j++ ; // in-place removal from a list is a bother! - } // loop over intervals - - // ---------------------------------------------------------------------- - // now we handle the first interval - // - // first, it must be in order w.r.t. the second interval! - if ( (*hist)[0].getBoundary() > (*hist)[1].getBoundary() ) - { - *CmpCommon::diags() << - DgSqlCode(CATALOG_HISTINTS_TABLES_CONTAIN_BAD_VALUES) - << DgInt0(0) - << DgInt1(1) - << DgString1(stats->getStatColumns()[0]->getFullColRefNameAsAnsiString().data() ); - - stats->createFakeHist(); - continue ; // skip to next ColStats - } - - // second, handle the case where first and second interval are the same - if ( hist->entries() > 1 && // avoid the exception! might just be a single NULL - // // interval after the loop above - (*hist)[0].getBoundary() == (*hist)[1].getBoundary() && - (*hist)[1].getUec() > 1 ) - { - const double KLUDGE_VALUE = 0.0001 ; - const double oldVal = (*hist)[0].getBoundary().getDblValue() ; - const EncodedValue newVal = - EncodedValue(oldVal - (_ABSOLUTE_VALUE_(oldVal) * KLUDGE_VALUE)) ; // kludge alert! - //Absolute of oldval due to CR 10-010426-2457 - (*hist)[0].setBoundary( newVal ) ; - (*hist)[0].setBoundIncl( FALSE ) ; // no longer a real boundary! - (*colStats_)[i]->setMinValue( newVal ) ; // set aggr info also - } - // done with first interval - // ---------------------------------------------------------------------- - - // - // NULL values must only be stored in single-valued intervals - // in the histograms ; so, just in case we're only getting - // *one* HistInt for the NULL interval, insert a 2nd one - // - // 0 1 2 - // | | | - // | | | entries() == 3 - // NULL - // - // 0 1 2 3 - // | | | | - // | | | | entries() == 4 - // new NULL - // NULL - // - if ( hist->lastHistInt().isNull() ) - { - CollIndex count = hist->entries() ; - if ( !(*hist)[count-2].isNull() ) - { - // insert a 2nd NULL HISTINT, with boundaryIncl value FALSE - HistInt secondLast (hist->lastHistInt().getBoundary(), FALSE) ; - hist->insertAt(count-1,secondLast) ; - // new HISTINT by default has row/uec of 0, which is what we want - } - } - - // - // Now, reduce the total number of intervals to be the number - // that the user wants. This is used to test the tradeoffs - // between compile time & rowcount estimation. - // - (*colStats_)[i]->setMaxIntervalCount (maxIntervalCount) ; - (*colStats_)[i]->reduceToMaxIntervalCount () ; - - if ((*colStats_)[i]->getRowcount() == (*colStats_)[i]->getTotalUec() ) - (*colStats_)[i]->setAlmostUnique(TRUE); - - } // outer for loop -- done with this COLSTATS, continue with next one - // *********************************************************************** - - statsFetched_ = TRUE; - MonitorMemoryUsage_Exit((char*)mmPhase.data(), NULL, NULL, TRUE); - } // !statsFetched_ - - return (*colStats_); -} - -StatsList & -NATable::generateFakeStats() -{ - if (colStats_ == NULL) - { - //trying to get statistics for a new statement allocate colStats_ - colStats_ = new (CmpCommon::statementHeap()) StatsList(CmpCommon::statementHeap()); - } - - if (colStats_->entries() > 0) - return (*colStats_); - - NAColumnArray colList = getNAColumnArray() ; - double defaultFakeRowCount = (ActiveSchemaDB()->getDefaults()).getAsDouble(HIST_NO_STATS_ROWCOUNT); - double defaultFakeUec = (ActiveSchemaDB()->getDefaults()).getAsDouble(HIST_NO_STATS_UEC); - - if ( isHiveTable() ) { - defaultFakeRowCount = getOriginalRowCount().value(); - } - - /* if ( isHbaseTable() ) { - defaultFakeRowCount = getOriginalRowCount().value(); - } - */ - - for (CollIndex i = 0; i < colList.entries(); i++ ) - { - NAColumn * col = colList[i]; - - if (col->isUnique() ) - defaultFakeUec = defaultFakeRowCount; - else - defaultFakeUec = MINOF(defaultFakeUec, defaultFakeRowCount); - - EncodedValue dummyVal(0.0); - - EncodedValue lowBound = dummyVal.minMaxValue(col->getType(), TRUE); - EncodedValue highBound = dummyVal.minMaxValue(col->getType(), FALSE); - - HistogramSharedPtr emptyHist(new (HISTHEAP) Histogram(HISTHEAP)); - - HistInt newFirstHistInt(lowBound, FALSE); - - HistInt newSecondHistInt(highBound, TRUE); - - newSecondHistInt.setCardAndUec(defaultFakeRowCount, - defaultFakeUec); - - emptyHist->insert(newFirstHistInt); - emptyHist->insert(newSecondHistInt); - - ComUID histid(NA_JulianTimestamp()); - ColStatsSharedPtr fakeColStats( - new (HISTHEAP) ColStats(histid, - defaultFakeUec, - defaultFakeRowCount, - defaultFakeRowCount, - col->isUnique(), - FALSE, - emptyHist, - FALSE, - 1.0, - 1.0, - -1, // avg varchar size - HISTHEAP)); - - fakeColStats->setFakeHistogram(TRUE); - fakeColStats->setOrigFakeHist(TRUE); - fakeColStats->setMinValue(lowBound); - fakeColStats->setMaxValue(highBound); - fakeColStats->statColumns().insert(col); - - colStats_->insert(fakeColStats); - } - setStatsFetched(TRUE); - setOriginalRowCount(defaultFakeRowCount); - - return (*colStats_); -} + return (*colStats_); +} NABoolean NATable::rowsArePacked() const { - // If one fileset is packed, they all are - return (getVerticalPartitionList().entries() && - getVerticalPartitionList()[0]->isPacked()); + // If one fileset is packed, they all are + return (getVerticalPartitionList().entries() && + getVerticalPartitionList()[0]->isPacked()); } // MV // Read materialized view information from the catalog manager. MVInfoForDML *NATable::getMVInfo(BindWA *bindWA) { - return mvInfo_; + return mvInfo_; } // MV @@ -6584,43 +6587,43 @@ MVInfoForDML *NATable::getMVInfo(BindWA *bindWA) // If not initialized, keep a list and report error at runtime. NABoolean NATable::verifyMvIsInitializedAndAvailable(BindWA *bindWA) const { - CMPASSERT(isAnMV()); - const ComMvAttributeBitmap& bitmap = getMvAttributeBitmap(); + CMPASSERT(isAnMV()); + const ComMvAttributeBitmap& bitmap = getMvAttributeBitmap(); - // First check if the table is Unavailable. - NAString value; - if (bitmap.getIsMvUnAvailable()) - { + // First check if the table is Unavailable. + NAString value; + if (bitmap.getIsMvUnAvailable()) + { - // 12312 Materialized View $0~TableName is unavailable. - *CmpCommon::diags() << DgSqlCode(-12312) - << DgTableName(getTableName().getQualifiedNameAsString()); - bindWA->setErrStatus(); + // 12312 Materialized View $0~TableName is unavailable. + *CmpCommon::diags() << DgSqlCode(-12312) + << DgTableName(getTableName().getQualifiedNameAsString()); + bindWA->setErrStatus(); - return TRUE; - } + return TRUE; + } - // if the mv is uninitialized, - // add it to the uninitializedMvList in the BindWA - if (bitmap.getIsMvUnInitialized()) - { - - // get physical and ansi names - NAString fileName( - getClusteringIndex()->getFileSetName().getQualifiedNameAsString(), - bindWA->wHeap() ); - - NAString ansiName( getTableName().getQualifiedNameAsAnsiString(), - bindWA->wHeap() ); - - // get physical and ansi name - bindWA->addUninitializedMv( - convertNAString( fileName, bindWA->wHeap() ), - convertNAString( ansiName, bindWA->wHeap() ) ); - } + // if the mv is uninitialized, + // add it to the uninitializedMvList in the BindWA + if (bitmap.getIsMvUnInitialized()) + { + // get physical and ansi names + NAString fileName( + getClusteringIndex()->getFileSetName().getQualifiedNameAsString(), + bindWA->wHeap() ); - return FALSE; + NAString ansiName( getTableName().getQualifiedNameAsAnsiString(), + bindWA->wHeap() ); + + // get physical and ansi name + bindWA->addUninitializedMv( + convertNAString( fileName, bindWA->wHeap() ), + convertNAString( ansiName, bindWA->wHeap() ) ); + } + + + return FALSE; } // Return value: TRUE, found an index or constr. FALSE, not found. @@ -6636,275 +6639,287 @@ NABoolean NATable::verifyMvIsInitializedAndAvailable(BindWA *bindWA) const // FALSE, accept any index that has the // same columns, in any sequence. NABoolean NATable::getCorrespondingIndex(NAList<NAString> &inputCols, - NABoolean lookForExplicitIndex, - NABoolean lookForUniqueIndex, - NABoolean lookForPrimaryKey, - NABoolean lookForAnyIndexOrPkey, - NABoolean lookForSameSequenceOfCols, - NABoolean excludeAlwaysComputedSystemCols, - NAString *indexName) + NABoolean lookForExplicitIndex, + NABoolean lookForUniqueIndex, + NABoolean lookForPrimaryKey, + NABoolean lookForAnyIndexOrPkey, + NABoolean lookForSameSequenceOfCols, + NABoolean excludeAlwaysComputedSystemCols, + NAString *indexName) { - NABoolean indexFound = FALSE; - CollIndex numInputCols = inputCols.entries(); - - if (numInputCols == 0) - { - lookForPrimaryKey = TRUE; - lookForUniqueIndex = FALSE; - lookForAnyIndexOrPkey = FALSE; - } - - Lng32 numBTpkeys = getClusteringIndex()->getIndexKeyColumns().entries(); - - const NAFileSetList &indexList = getIndexList(); - for (Int32 i = 0; (NOT indexFound && (i < indexList.entries())); i++) - { - NABoolean isPrimaryKey = FALSE; - NABoolean isUniqueIndex = FALSE; + NABoolean indexFound = FALSE; + CollIndex numInputCols = inputCols.entries(); - const NAFileSet * naf = indexList[i]; - if (naf->getKeytag() == 0) - isPrimaryKey = TRUE; - else if (naf->uniqueIndex()) - isUniqueIndex = TRUE; - - if ((NOT lookForPrimaryKey) && (isPrimaryKey)) - continue; - - NABoolean found = FALSE; - if (lookForAnyIndexOrPkey) - found = TRUE; - else if (lookForPrimaryKey && isPrimaryKey) - found = TRUE; - else if (lookForUniqueIndex && isUniqueIndex) - found = TRUE; - - if (found) + if (numInputCols == 0) { - if (lookForExplicitIndex) // need an explicit index to match. - { - if ((naf->isCreatedExplicitly()) || - (isPrimaryKey)) - found = TRUE; - else - found = FALSE; - } + lookForPrimaryKey = TRUE; + lookForUniqueIndex = FALSE; + lookForAnyIndexOrPkey = FALSE; } - if (NOT found) - continue; - - Int32 numMatchedCols = 0; - NABoolean allColsMatched = TRUE; - - if (numInputCols > 0) - { - const NAColumnArray &nacArr = naf->getIndexKeyColumns(); - - Lng32 numKeyCols = naf->getCountOfColumns( - TRUE, // exclude non-key cols - !isPrimaryKey, // exclude cols other than user-specified index cols - FALSE, // don't exclude all system cols like SYSKEY - excludeAlwaysComputedSystemCols); + Lng32 numBTpkeys = getClusteringIndex()->getIndexKeyColumns().entries(); - // compare # of columns first and disqualify the index - // if it doesn't have the right number of columns - if (numInputCols != numKeyCols) - continue; + const NAFileSetList &indexList = getIndexList(); + for (Int32 i = 0; (NOT indexFound && (i < indexList.entries())); i++) + { + NABoolean isPrimaryKey = FALSE; + NABoolean isUniqueIndex = FALSE; + + const NAFileSet * naf = indexList[i]; + if (naf->getKeytag() == 0) + isPrimaryKey = TRUE; + else if (naf->uniqueIndex()) + isUniqueIndex = TRUE; + + if ((NOT lookForPrimaryKey) && (isPrimaryKey)) + continue; + + NABoolean found = FALSE; + if (lookForAnyIndexOrPkey) + found = TRUE; + else if (lookForPrimaryKey && isPrimaryKey) + found = TRUE; + else if (lookForUniqueIndex && isUniqueIndex) + found = TRUE; + + if (found) + { + if (lookForExplicitIndex) // need an explicit index to match. + { + if ((naf->isCreatedExplicitly()) || + (isPrimaryKey)) + found = TRUE; + else + found = FALSE; + } + } - // compare individual key columns with the provided input columns - for (Int32 j = 0; j < nacArr.entries() && allColsMatched; j++) - { - NAColumn *nac = nacArr[j]; + if (NOT found) + continue; - // exclude the same types of columns that we excluded in - // the call to naf->getCountOfColumns() above - if (!isPrimaryKey && - nac->getIndexColName() == nac->getColName()) - conti
<TRUNCATED>