Repository: incubator-trafodion Updated Branches: refs/heads/master 3cdea3f9b -> dda167cb0
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_globals.h ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_globals.h b/core/sql/ustat/hs_globals.h index e3d22e1..10381a8 100644 --- a/core/sql/ustat/hs_globals.h +++ b/core/sql/ustat/hs_globals.h @@ -73,7 +73,7 @@ class AbstractFastStatsHist; Lng32 AddNecessaryColumns(); Lng32 AddAllColumnsForIUS(); -Lng32 createSampleOption(Lng32 sampleType, double samplePercent, NAString &sampleOpt, +void createSampleOption(Lng32 sampleType, double samplePercent, NAString &sampleOpt, Int64 sampleValue1=0, Int64 sampleValue2=0); Lng32 doubleToHSDataBuffer(const double dbl, HSDataBuffer& dbf); Lng32 managePersistentSamples(); @@ -1135,6 +1135,7 @@ struct HSColGroupStruct : public NABasicObject Int64 prevUEC; /* uec from existing histogram */ Int64 colSecs; /* Time to sort/group data for column */ CountingBloomFilter* cbf; /* A bloom filter for IUS */ + NAString& cbfFileNameSuffix() { return *colSet[0].colname; } void* boundaryValues; /* List of bounary values for IUS */ void* MFVValues; /* List of MFV values for IUS */ @@ -1157,6 +1158,7 @@ struct HSColGroupStruct : public NABasicObject /* are used by its neighbors. Used to compute group weight */ NABoolean allKeysInsertedIntoCBF; + Int32 backwardWarningCount; // for UERR_UNEXPECTED_BACKWARDS_DATA warnings #ifdef _TEST_ALLOC_FAILURE // Stuff used to test memory allocation failures. @@ -1202,13 +1204,7 @@ class IUSValueIterator virtual ~IUSValueIterator() {} - void init(HSColGroupStruct* group) - { - // Strings must be contiguous in the strData buffer for this iterator to - // work correctly. - HS_ASSERT(group->strDataConsecutive); - vp = (T*)group->data; - } + void init(HSColGroupStruct* group); void next() { @@ -1357,6 +1353,9 @@ public: void getMemoryRequirementsForOneMCGroup(HSColGroupStruct* group, Int64 rows); static Int32 allocateMemoryForColumns(HSColGroupStruct* group, Int64 rows, HSColGroupStruct* mgr = NULL /* used for MC IS */); + static Int32 allocateMemoryForIUSColumns(HSColGroupStruct* group, Int64 rows, + HSColGroupStruct* delGroup, Int64 delRows, + HSColGroupStruct* insGroup, Int64 insRows); // For internal sort or IUS, remove and count nulls for each column from the // rowset just read. @@ -1484,6 +1483,9 @@ public: //Log the current contents of this class. void log(HSLogMan* LM); + // Takes action necessary before throwing exception for an assertion failure. + void preAssertionFailure(const char* condition, const char* fileName, Lng32 lineNum); + // Derive a return code from the contents of the diagnostics area. Lng32 getRetcodeFromDiags(); @@ -1502,14 +1504,20 @@ public: NABoolean forceToFetch = TRUE); Lng32 updatePersistentSampleTableForIUS(NAString& sampleTableName, double sampleRate, NAString& targetTableName); + void generateIUSDeleteQuery(const NAString& smplTable, NAString& queryText); + void generateIUSSelectInsertQuery(const NAString& smplTable, + const NAString& sourceTable, + NAString& queryText); + void getCBFFilePrefix(NAString& sampleTableName, NAString& filePrefix); void detectPersistentCBFsForIUS(NAString& sampleTableName, HSColGroupStruct *group); + Lng32 UpdateIUSPersistentSampleTable(Int64 oldSampleSize, Int64 requestedSampleSize, Int64& newSampleSize); Lng32 readCBFsIntoMemForIUS(NAString& sampleTableName, HSColGroupStruct* group); Lng32 writeCBFstoDiskForIUS(NAString& sampleTableName, HSColGroupStruct* group); - Lng32 deletePersistentCBFsForIUS(NAString& sampleTableName, HSColGroupStruct* group); + Lng32 deletePersistentCBFsForIUS(NAString& sampleTableName, HSColGroupStruct* group, SortState stateToDelete); void logDiagArea(const char* title); - Lng32 begin_IUS_work(char* buffer); + Lng32 begin_IUS_work(); Lng32 end_IUS_work(); // Populate the hash table used to determine when a ustat statement has run @@ -1579,6 +1587,7 @@ public: Int64 numPartitions; /* # of partns in object */ NAString *hstogram_table; /* HISTOGRM table */ NAString *hsintval_table; /* HISTINTS table */ + NAString *hsperssamp_table; /* PERSISTENT_SAMPLES table */ NAString *hssample_table; /* SAMPLING table */ NABoolean externalSampleTable; /* ownership of sample tab */ hs_table_type tableType; /* GUARDIAN | ANSI format */ @@ -1727,6 +1736,10 @@ private: NABoolean internalSortWhenBetter, NABoolean trySampleTableBypass = FALSE); + // After an allocation failure, this is called to reduce the amount of + // memory we estimate is available. + static void memReduceAllowance(); + // When a memory allocation fails, return any memory already allocated for // the group for internal sort, and set any PENDING columns back to // UNPROCESSED state. This function cannot fail. @@ -1756,6 +1769,17 @@ private: // Collect statistics by incrementally updating persistent sample table and // possibly histograms as well. + Lng32 doIUS(NABoolean& done); + + // Collect stats by incrementally updating histograms where possible. Persistent + // sample is also incrementally updated. + Lng32 doFullIUS(Int64 currentSampleSize, Int64 futureSampleSize, NABoolean& done); + + // Causes persistent sample table to be incrementally updated, and other + // preparatory tasks so RUS can be performed using persistent sample. + Lng32 prepareToUsePersistentSample (Int64 currentSampleSize, Int64 futureSampleSize); + + // Incrementally update histograms for a selected batch of columns Lng32 CollectStatisticsForIUS(Int64 currentSampleSize, Int64 futureSampleSize); // @@ -1798,6 +1822,18 @@ private: HSInMemoryTable* iusSampleDeletedInMem; HSInMemoryTable* iusSampleInsertedInMem; + // used by IUS code for clean up purposes + NABoolean sampleIExists_; + + // For IUS, once the persistent sample table has been successfully updated + // in accordance with the IUS predicate, these ptrs will point to the requested + // (expected) and actual number of rows in the sample table. end_IUS_work will + // pass these ptrs to the function that updates the sample table's row in + // SB_PERSISTENT_SAMPLES. If non-null, the values are used for the corresponding + // columns in that table. + Int64* PST_IUSrequestedSampleRows_; + Int64* PST_IUSactualSampleRows_; + template <class T> Int32 processIUSColumn(T* ptr, const NAWchar* format, @@ -1883,6 +1919,36 @@ private: HSColGroupStruct* insGroup, Int64 insrows); template <class T> + class HSHiLowValues + { + public: + + NABoolean seenAtLeastOneValue_; // initially FALSE + // the next two are valid only if seenAtLeastOneValue_ is TRUE + T hiValue_; // highest value seen so far + T lowValue_; // lowest value seen so far + + HSHiLowValues() : seenAtLeastOneValue_(FALSE) { }; + + void findHiLowValues(T& val) + { + if (seenAtLeastOneValue_) + { + if (val < lowValue_) + lowValue_ = val; + else if (val > hiValue_) + hiValue_ = val; + } + else + { + seenAtLeastOneValue_ = TRUE; + lowValue_ = val; + hiValue_ = val; + } + }; + }; + + template <class T> Int16 findInterval(Int16 numInt, T* boundaries, T& val) { Int16 low = 1; @@ -1916,6 +1982,13 @@ private: Int64 stmtStartTime; NABoolean jitLogOn; + // For IUS, was the SB_PERSISTENT_SAMPLES row for the source table updated? + // The change is undone by the HSGlobalsClass dtor, so we need to account for + // the possibility that an IUS statement failed prior to making the change. + // Otherwise, a concurrent IUS operation could have its changes to the row + // overwritten. + NABoolean PSRowUpdated; + static THREAD_P NABoolean performISForMC_; }; // class HSGlobalsClass @@ -2165,6 +2238,8 @@ public: void setHasNull(NABoolean val) { hasNull_ = val; } void setIntBoundary(const Lng32 intNum, const char* value, Int16 len) { intArry_[intNum].boundary_.copyFrom(value, len, TRUE); } + void setIntBoundary(const Lng32 intNum, const HSDataBuffer & newBoundary) + { intArry_[intNum].boundary_ = newBoundary; } void setIntMFVValue(const Lng32 intNum, const char* value, Int16 len) { intArry_[intNum].mostFreqVal_.copyFrom(value, len, TRUE); } @@ -2449,11 +2524,6 @@ class HSInMemoryTable : public NABasicObject void generateSelectDQuery(NAString& smplTable, NAString& queryTex); void generateSelectIQuery(NAString& smplTable, NAString& queryText); - void generateDeleteQuery(NAString& smplTable, NAString& queryText); - - void generateSelectInsertQuery(NAString& smplTable, NAString& sourceTable, - NAString& queryText); - // method for algorithm 1 void generateDeleteQuery(NAString& smplTable, NAString& queryText, NABoolean rollback); @@ -2462,6 +2532,16 @@ class HSInMemoryTable : public NABasicObject Lng32 populate(NAString& queryText); + // The data is actually deallocated by calling freeISMemory() from + // HSGlobalsClass::incrementHistograms() for each column as soon as the + // column is successfully handled by IUS (the data is preserved for use + // by RUS/IS if IUS can't be performed). This function just resets the + // flag that would cause assertion failure when populate() is called, as + // it must be to load data for the next batch of IUS columns. + void depopulate() { + isPopulated_ = FALSE; + } + void logState(const char* title); private: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_la.cpp ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_la.cpp b/core/sql/ustat/hs_la.cpp index ea00289..068aef1 100644 --- a/core/sql/ustat/hs_la.cpp +++ b/core/sql/ustat/hs_la.cpp @@ -952,6 +952,18 @@ NABoolean HSHiveTableDef::objExists(NABoolean createExternalTable) return TRUE; } +NAString HSHiveTableDef::getNodeName() const + { + HS_ASSERT(FALSE); // MP only + return ""; + } + +NAString HSHiveTableDef::getCatalogLoc(formatType format) const + { + HS_ASSERT(FALSE); // MP only + return ""; + } + NAString HSHiveTableDef::getHistLoc(formatType format) const { return HIVE_STATS_CATALOG "." HIVE_STATS_SCHEMA; @@ -1020,6 +1032,18 @@ Lng32 HSHiveTableDef::DescribeColumnNames() //===================================================== // +NAString HSHbaseTableDef::getNodeName() const + { + HS_ASSERT(FALSE); // MP only + return ""; + } + +NAString HSHbaseTableDef::getCatalogLoc(formatType format) const + { + HS_ASSERT(FALSE); // MP only + return ""; + } + NAString HSHbaseTableDef::getHistLoc(formatType format) const { if ( HSGlobalsClass::isNativeHbaseCat(getCatName(format))) { http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_la.h ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_la.h b/core/sql/ustat/hs_la.h index 3344feb..158b905 100644 --- a/core/sql/ustat/hs_la.h +++ b/core/sql/ustat/hs_la.h @@ -237,16 +237,8 @@ class HSHiveTableDef : public HSTableDef { return FALSE; }; - NAString getNodeName() const - { - HS_ASSERT(FALSE); // MP only - return ""; - } - NAString getCatalogLoc(formatType format = INTERNAL_FORMAT) const - { - HS_ASSERT(FALSE); // MP only - return ""; - } + NAString getNodeName() const; + NAString getCatalogLoc(formatType format = INTERNAL_FORMAT) const; NAString getHistLoc(formatType format = INTERNAL_FORMAT) const; Lng32 getFileType() const { @@ -348,16 +340,8 @@ class HSHbaseTableDef : public HSTableDef { return FALSE; }; - NAString getNodeName() const - { - HS_ASSERT(FALSE); // MP only - return ""; - } - NAString getCatalogLoc(formatType format = INTERNAL_FORMAT) const - { - HS_ASSERT(FALSE); // MP only - return ""; - } + NAString getNodeName() const; + NAString getCatalogLoc(formatType format = INTERNAL_FORMAT) const; NAString getHistLoc(formatType format = INTERNAL_FORMAT) const; Lng32 getFileType() const http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_log.h ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_log.h b/core/sql/ustat/hs_log.h index da846dc..18e6ec5 100644 --- a/core/sql/ustat/hs_log.h +++ b/core/sql/ustat/hs_log.h @@ -135,21 +135,16 @@ private: // Log the location of the error. void HSFuncLogError(Lng32 error, char *filename, Lng32 lineno); -// Wrapper to handle assertion failure. +// Wrapper to handle assertion failure. Do not assert a condition with any +// side effects, as it is evaluated a second time if false. #define HS_ASSERT(b) \ + { \ if (NOT (b)) \ { \ - HSTranMan *TM = HSTranMan::Instance(); \ - HSLogMan *LM = HSLogMan::Instance(); \ - if (LM->LogNeeded()) \ - { \ - sprintf(LM->msg, "***[ERROR] INTERNAL ASSERTION (%s) AT %s:%i", "" # b "", __FILE__, __LINE__); \ - LM->Log(LM->msg); \ - } \ - if (TM->StartedTransaction()) \ - TM->Rollback(); \ + GetHSContext()->preAssertionFailure("" # b "", __FILE__, __LINE__); \ CMPASSERT(b); \ - } + } \ + } //Ignore the following WARNINGS // [6008] missing single-column histograms @@ -157,11 +152,13 @@ void HSFuncLogError(Lng32 error, char *filename, Lng32 lineno); // [4030] non-standard DATETIME format // [4] internal Warning #define HSFilterWarning(retcode) \ + { \ if ((retcode == 6008) || \ (retcode == 6007) || \ (retcode == 4030) || \ (retcode == HS_WARNING)) \ - retcode = 0; + retcode = 0; \ + } // Map any error (<0) code other than HS_PKEY_FLOAT_ERROR to -1. #define HSFilterError(retcode) \ http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_parser.cpp ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_parser.cpp b/core/sql/ustat/hs_parser.cpp index 11adbc1..d96acc7 100644 --- a/core/sql/ustat/hs_parser.cpp +++ b/core/sql/ustat/hs_parser.cpp @@ -462,15 +462,19 @@ Lng32 AddTableName( const hs_table_type type *hs_globals->hsintval_table = getHistogramsTableLocation(hs_globals->catSch->data(), FALSE); + *hs_globals->hsperssamp_table = getHistogramsTableLocation(hs_globals->catSch->data(), FALSE); + NABoolean isHbaseOrHive = HSGlobalsClass::isHbaseCat(catName) || HSGlobalsClass::isHiveCat(catName); if (isHbaseOrHive) { hs_globals->hstogram_table->append(".").append(HBASE_HIST_NAME); hs_globals->hsintval_table->append(".").append(HBASE_HISTINT_NAME); + hs_globals->hsperssamp_table->append(".").append(HBASE_PERS_SAMP_NAME); } else { hs_globals->hstogram_table->append(".HISTOGRAMS"); hs_globals->hsintval_table->append(".HISTOGRAM_INTERVALS"); + hs_globals->hsperssamp_table->append(".PERSISTENT_SAMPLES"); } } else http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_update.cpp ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_update.cpp b/core/sql/ustat/hs_update.cpp index e8ca28e..4dd70d6 100644 --- a/core/sql/ustat/hs_update.cpp +++ b/core/sql/ustat/hs_update.cpp @@ -501,9 +501,6 @@ Lng32 UpdateStats(char *input, NABoolean requestedByCompiler) HSClearCLIDiagnostics(); - if ( hs_globals_obj.canDoIUS() ) - hs_globals_obj.end_IUS_work(); - hs_globals_y = NULL; // Remove IUS persistent sample if necessary. http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/021ebd6c/core/sql/ustat/hs_yacc.y ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_yacc.y b/core/sql/ustat/hs_yacc.y index 0007659..0ee1ca4 100644 --- a/core/sql/ustat/hs_yacc.y +++ b/core/sql/ustat/hs_yacc.y @@ -353,19 +353,18 @@ histogram_options : CLEAR { hs_globals_y->optFlags |= CLEAR_OPT; } - | CREATE SAMPLE sample_clause_for_createremove + | CREATE SAMPLE random_clause { - if (hs_globals_y->optFlags & SAMPLE_ALL) + if (hs_globals_y->optFlags & SAMPLE_RAND_2) { - HSFuncMergeDiags(- UERR_INVALID_OPTION, - "ALL", - "RANDOM X PERCENT or r ROWS"); + HSFuncMergeDiags(-UERR_IUS_WRONG_RANDOM); hs_globals_y->parserError = HSGlobalsClass::ERROR_SEMANTICS; return -1; } + hs_globals_y->optFlags |= CREATE_SAMPLE_OPT; } - | REMOVE SAMPLE sample_clause_for_createremove + | REMOVE SAMPLE { hs_globals_y->optFlags |= REMOVE_SAMPLE_OPT; } @@ -402,37 +401,6 @@ histogram_options : CLEAR | /* empty */ ; -sample_clause_for_createremove: ALL - { - hs_globals_y->optFlags |= SAMPLE_ALL; - } - | int_number ROWS - { - if ($1 <= 0) - { - HSFuncMergeDiags(- UERR_INVALID_OPTION, - "SAMPLE ROWS", - "an integer greater than or equal to 0 and within limits"); - hs_globals_y->parserError = HSGlobalsClass::ERROR_SEMANTICS; - return -1; - } - hs_globals_y->optFlags |= SAMPLE_BASIC_1; - hs_globals_y->sampleValue1 = $1; - } - | RANDOM real_number TOK_PERCENT - { - if ($2 <= 0 || $2 >= 100) - { - HSFuncMergeDiags(- UERR_INVALID_OPTION, - "PERCENT", - "a value between 0 and 100"); - hs_globals_y->parserError = HSGlobalsClass::ERROR_SEMANTICS; - return -1; - } - hs_globals_y->optFlags |= SAMPLE_RAND_1; - hs_globals_y->sampleValue1 = (Int64)($2 * HS_SAMP_PCNT_UPSCALE); - } -; on_clause : ON predefined_groups | ON predefined_groups ',' regular_group_list @@ -463,7 +431,10 @@ incremental_clause : INCREMENTAL WHERE WHERE_CONDITION if (LM->LogNeeded() ) LM->Log("incremental clause identified"); - + if (CmpCommon::getDefault(USTAT_INCREMENTAL_UPDATE_STATISTICS) == DF_OFF) { + HSFuncMergeDiags(-UERR_IUS_IS_DISABLED); + } + if (CmpCommon::getDefault(USTAT_IUS_SIMPLE_SYNTAX) == DF_ON) { // Via grammar, the incremental clause can only used without the // on_clause. So it is safe to set the IUS_OPT flag. @@ -585,17 +556,17 @@ sample_clause_body : SAMPLE if (LM->LogNeeded() ) LM->Log("Creation of persistent sample table for IUS requested"); - if (hs_globals_y->optFlags & (REG_GROUP_OPT | EVERYCOL_OPT | EVERYKEY_OPT)) - HSFuncMergeDiags(-UERR_WRONG_ON_CLAUSE_FOR_IUS, "PERSISTENT"); - else if (hs_globals_y->optFlags & SAMPLE_RAND_2) + // Formerly there was code here to limit PERSISTENT to + // ON EXISTING COLUMNS and ON NECESSARY COLUMNS (EXISTING_OPT and + // NECESSARY_OPT in the optFlags). But there doesn't seem to be + // a compelling reason for this limitation. The persistent sample + // table will have all the columns of the base table regardless + // of the ON clause. + + if (hs_globals_y->optFlags & SAMPLE_RAND_2) HSFuncMergeDiags(-UERR_IUS_WRONG_RANDOM); else - { - // This assert is here to make sure we covered all other possible ON - // clauses in the check above. - HS_ASSERT(hs_globals_y->optFlags & (EXISTING_OPT | NECESSARY_OPT)); - hs_globals_y->optFlags |= IUS_PERSIST; - } + hs_globals_y->optFlags |= IUS_PERSIST; } | SAMPLE random_clause rowcount_clause | SAMPLE periodic_clause
