Repository: incubator-trafodion Updated Branches: refs/heads/master 60169923e -> b6478e582
[TRAFODION-2440] Add retry to row count estimation logic Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/932c219f Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/932c219f Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/932c219f Branch: refs/heads/master Commit: 932c219f0fd2a49e1c53a666a1dcbcb3578c4f91 Parents: 67288b3 Author: Dave Birdsall <dave.birds...@esgyn.com> Authored: Thu Jan 19 20:10:33 2017 +0000 Committer: Dave Birdsall <dbirds...@apache.org> Committed: Tue Jan 24 00:06:48 2017 +0000 ---------------------------------------------------------------------- core/sql/bin/SqlciErrors.txt | 1 + core/sql/executor/HBaseClient_JNI.cpp | 27 ++++++-- core/sql/executor/HBaseClient_JNI.h | 2 +- core/sql/exp/ExpHbaseInterface.cpp | 9 ++- core/sql/exp/ExpHbaseInterface.h | 8 ++- core/sql/optimizer/NATable.cpp | 67 ++++++++++++-------- core/sql/optimizer/NATable.h | 2 +- core/sql/optimizer/RelExeUtil.cpp | 5 +- .../java/org/trafodion/sql/HBaseClient.java | 52 ++++++++++++++- core/sql/ustat/hs_cli.cpp | 12 ++++ core/sql/ustat/hs_cli.h | 6 ++ core/sql/ustat/hs_const.h | 1 + core/sql/ustat/hs_globals.cpp | 35 ++++++++-- core/sql/ustat/hs_la.cpp | 29 +++++++-- core/sql/ustat/hs_la.h | 27 +++++++- core/sql/ustat/hs_log.cpp | 6 +- core/sql/ustat/hs_util.cpp | 4 +- 17 files changed, 237 insertions(+), 56 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/bin/SqlciErrors.txt ---------------------------------------------------------------------- diff --git a/core/sql/bin/SqlciErrors.txt b/core/sql/bin/SqlciErrors.txt index 0845c26..b9c9fae 100644 --- a/core/sql/bin/SqlciErrors.txt +++ b/core/sql/bin/SqlciErrors.txt @@ -1926,6 +1926,7 @@ drop the default context 9249 ZZZZZ 99999 BEGINNER MAJOR DBADMIN Incremental UPDATE STATISTICS is disabled. 9250 ZZZZZ 99999 BEGINNER MINOR LOGONLY Incremental UPDATE STATISTICS: non-NULL values added to previously all-NULL histogram for column $0~string0. A regular UPDATE STATISTICS is performed instead. 9251 ZZZZZ 99999 BEGINNER MAJOR DBADMIN A persistent sample table already exists. Use UPDATE STATISTICS ... REMOVE SAMPLE to drop it first if desired. +9252 ZZZZZ 99999 BEGINNER MAJOR DBADMIN Unable to get row count estimate: Error code $0~int0, detail $1~int1. Exception info (if any): $0~string0 9259 ZZZZZ 99999 UUUUUUUU UUUUU UUUUUUU Last UPDATE STATISTICS error. 10000 ZZZZZ 99999 UUUUUUUU UUUUU UUUUUUU Sort Error: First Sort error 10001 ZZZZZ 99999 ADVANCED MAJOR DIALOUT Sort Error : No error text defined. Unexpected error. $0~String0 http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/executor/HBaseClient_JNI.cpp ---------------------------------------------------------------------- diff --git a/core/sql/executor/HBaseClient_JNI.cpp b/core/sql/executor/HBaseClient_JNI.cpp index 2d52c05..32e2203 100644 --- a/core/sql/executor/HBaseClient_JNI.cpp +++ b/core/sql/executor/HBaseClient_JNI.cpp @@ -246,7 +246,7 @@ HBC_RetCode HBaseClient_JNI::init() JavaMethods_[JM_GET_HBLC ].jm_name = "getHBulkLoadClient"; JavaMethods_[JM_GET_HBLC ].jm_signature = "()Lorg/trafodion/sql/HBulkLoadClient;"; JavaMethods_[JM_EST_RC ].jm_name = "estimateRowCount"; - JavaMethods_[JM_EST_RC ].jm_signature = "(Ljava/lang/String;II[J)Z"; + JavaMethods_[JM_EST_RC ].jm_signature = "(Ljava/lang/String;III[J)Z"; JavaMethods_[JM_REL_HBLC ].jm_name = "releaseHBulkLoadClient"; JavaMethods_[JM_REL_HBLC ].jm_signature = "(Lorg/trafodion/sql/HBulkLoadClient;)V"; JavaMethods_[JM_GET_CAC_FRC].jm_name = "getBlockCacheFraction"; @@ -1303,12 +1303,22 @@ HBC_RetCode HBaseClient_JNI::grant(const Text& user, const Text& tblName, const HBC_RetCode HBaseClient_JNI::estimateRowCount(const char* tblName, Int32 partialRowSize, Int32 numCols, - Int64& rowCount) + Int32 retryLimitMilliSeconds, + Int64& rowCount, + Int32& breadCrumb) { QRLogger::log(CAT_SQL_HBASE, LL_DEBUG, "HBaseClient_JNI::estimateRowCount(%s) called.", tblName); - if (initJNIEnv() != JOI_OK) - return HBC_ERROR_INIT_PARAM; + breadCrumb = 1; + if (jenv_ == NULL) + if (initJVM() != JOI_OK) + return HBC_ERROR_INIT_PARAM; + breadCrumb = 2; + if (jenv_->PushLocalFrame(jniHandleCapacity_) != 0) { + getExceptionDetails(); + return HBC_ERROR_ROWCOUNT_EST_EXCEPTION; + } + breadCrumb = 3; jstring js_tblName = jenv_->NewStringUTF(tblName); if (js_tblName == NULL) { @@ -1319,17 +1329,21 @@ HBC_RetCode HBaseClient_JNI::estimateRowCount(const char* tblName, jint jPartialRowSize = partialRowSize; jint jNumCols = numCols; + jint jRetryLimitMilliSeconds = retryLimitMilliSeconds; jlongArray jRowCount = jenv_->NewLongArray(1); tsRecentJMFromJNI = JavaMethods_[JM_EST_RC].jm_full_name; jboolean jresult = jenv_->CallBooleanMethod(javaObj_, JavaMethods_[JM_EST_RC].methodID, js_tblName, jPartialRowSize, - jNumCols, jRowCount); + jNumCols, jRetryLimitMilliSeconds, jRowCount); jboolean isCopy; jlong* arrayElems = jenv_->GetLongArrayElements(jRowCount, &isCopy); rowCount = *arrayElems; if (isCopy == JNI_TRUE) jenv_->ReleaseLongArrayElements(jRowCount, arrayElems, JNI_ABORT); + jenv_->DeleteLocalRef(js_tblName); + + breadCrumb = 4; if (jenv_->ExceptionCheck()) { getExceptionDetails(); @@ -1339,12 +1353,15 @@ HBC_RetCode HBaseClient_JNI::estimateRowCount(const char* tblName, return HBC_ERROR_ROWCOUNT_EST_EXCEPTION; } + breadCrumb = 5; if (jresult == false) { logError(CAT_SQL_HBASE, "HBaseClient_JNI::estimateRowCount()", getLastError()); jenv_->PopLocalFrame(NULL); return HBC_ERROR_ROWCOUNT_EST_EXCEPTION; } + + breadCrumb = 6; jenv_->PopLocalFrame(NULL); return HBC_OK; // Table exists. } http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/executor/HBaseClient_JNI.h ---------------------------------------------------------------------- diff --git a/core/sql/executor/HBaseClient_JNI.h b/core/sql/executor/HBaseClient_JNI.h index 08464ab..c15913e 100644 --- a/core/sql/executor/HBaseClient_JNI.h +++ b/core/sql/executor/HBaseClient_JNI.h @@ -449,7 +449,7 @@ public: HBC_RetCode grant(const Text& user, const Text& tableName, const TextVec& actionCodes); HBC_RetCode revoke(const Text& user, const Text& tableName, const TextVec& actionCodes); HBC_RetCode estimateRowCount(const char* tblName, Int32 partialRowSize, - Int32 numCols, Int64& rowCount); + Int32 numCols, Int32 retryLimitMilliSeconds, Int64& rowCount, Int32 & breadCrumb); HBC_RetCode getLatestSnapshot(const char * tabname, char *& snapshotName, NAHeap * heap); HBC_RetCode cleanSnpTmpLocation(const char * path); HBC_RetCode setArchivePermissions(const char * path); http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/exp/ExpHbaseInterface.cpp ---------------------------------------------------------------------- diff --git a/core/sql/exp/ExpHbaseInterface.cpp b/core/sql/exp/ExpHbaseInterface.cpp index 63494f0..85b0160 100644 --- a/core/sql/exp/ExpHbaseInterface.cpp +++ b/core/sql/exp/ExpHbaseInterface.cpp @@ -1470,16 +1470,21 @@ Lng32 ExpHbaseInterface_JNI::completeAsyncOperation(Int32 timeout, NABoolean *re Lng32 ExpHbaseInterface_JNI::estimateRowCount(HbaseStr& tblName, Int32 partialRowSize, Int32 numCols, - Int64& estRC) + Int32 retryLimitMilliSeconds, + Int64& estRC, + Int32& breadCrumb) { + breadCrumb = 11; if (client_ == NULL) { + breadCrumb = 12; if (init(hbs_) != HBASE_ACCESS_SUCCESS) return -HBASE_ACCESS_ERROR; } estRC = 0; - retCode_ = client_->estimateRowCount(tblName.val, partialRowSize, numCols, estRC); + retCode_ = client_->estimateRowCount(tblName.val, partialRowSize, numCols, + retryLimitMilliSeconds, estRC, breadCrumb /* out */); return retCode_; } http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/exp/ExpHbaseInterface.h ---------------------------------------------------------------------- diff --git a/core/sql/exp/ExpHbaseInterface.h b/core/sql/exp/ExpHbaseInterface.h index 1d7728c..db0bb14 100644 --- a/core/sql/exp/ExpHbaseInterface.h +++ b/core/sql/exp/ExpHbaseInterface.h @@ -354,7 +354,9 @@ class ExpHbaseInterface : public NABasicObject virtual Lng32 estimateRowCount(HbaseStr& tblName, Int32 partialRowSize, Int32 numCols, - Int64& estRC) = 0; + Int32 retryLimitMilliSeconds, + Int64& estRC, + Int32& breadCrumb) = 0; virtual Lng32 getLatestSnapshot(const char * tableName, char *& snapshotName, NAHeap * heap) = 0; virtual Lng32 cleanSnpTmpLocation( const char * path)=0; virtual Lng32 setArchivePermissions( const char * tbl)=0; @@ -666,7 +668,9 @@ virtual Lng32 initHFileParams(HbaseStr &tblName, virtual Lng32 estimateRowCount(HbaseStr& tblName, Int32 partialRowSize, Int32 numCols, - Int64& estRC); + Int32 retryLimitMilliSeconds, + Int64& estRC, + Int32& breadCrumb); virtual Lng32 getLatestSnapshot(const char * tableName, char *& snapshotName, NAHeap * heap); virtual Lng32 cleanSnpTmpLocation( const char * path); http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/optimizer/NATable.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/NATable.cpp b/core/sql/optimizer/NATable.cpp index eaaf45b..ea15cc4 100644 --- a/core/sql/optimizer/NATable.cpp +++ b/core/sql/optimizer/NATable.cpp @@ -7672,36 +7672,47 @@ Int32 NATable::computeHBaseRowSizeFromMetaData() const // For an HBase table, we can estimate the number of rows by dividing the number // of KeyValues in all HFiles of the table by the number of columns (with a few // other considerations). -Int64 NATable::estimateHBaseRowCount() const +Int64 NATable::estimateHBaseRowCount(Int32 retryLimitMilliSeconds, Int32& errorCode, Int32& breadCrumb) const { - Int64 estRowCount = 0; - ExpHbaseInterface* ehi = getHBaseInterface(); - if (ehi) - { - HbaseStr fqTblName; - NAString tblName = getTableName().getQualifiedNameAsString(); - fqTblName.len = tblName.length(); - fqTblName.val = new(STMTHEAP) char[fqTblName.len+1]; - strncpy(fqTblName.val, tblName.data(), fqTblName.len); - fqTblName.val[fqTblName.len] = '\0'; - - Int32 partialRowSize = computeHBaseRowSizeFromMetaData(); - Lng32 retcode = ehi->estimateRowCount(fqTblName, - partialRowSize, - colcount_, - estRowCount); - NADELETEBASIC(fqTblName.val, STMTHEAP); - - // Return 0 as the row count if an error occurred while estimating it. - // The estimate could also be 0 if there is less than 1MB of storage - // dedicated to the table -- no HFiles, and < 1MB in MemStore, for which - // size is reported only in megabytes. - if (retcode < 0) - estRowCount = 0; - delete ehi; - } + Int64 estRowCount = 0; + ExpHbaseInterface* ehi = getHBaseInterface(); + errorCode = -1; + breadCrumb = -1; + if (ehi) + { + HbaseStr fqTblName; + NAString tblName = getTableName().getQualifiedNameAsString(); + fqTblName.len = tblName.length(); + fqTblName.val = new(STMTHEAP) char[fqTblName.len+1]; + strncpy(fqTblName.val, tblName.data(), fqTblName.len); + fqTblName.val[fqTblName.len] = '\0'; + + Int32 partialRowSize = computeHBaseRowSizeFromMetaData(); + errorCode = ehi->estimateRowCount(fqTblName, + partialRowSize, + colcount_, + retryLimitMilliSeconds, + estRowCount, + breadCrumb /* out */); + NADELETEBASIC(fqTblName.val, STMTHEAP); + + // Return 100 million as the row count if an error occurred while + // estimating. One example where this is appropriate is that we might get + // FileNotFoundException from the Java layer if a large table is in + // the midst of a compaction cycle. It is better to return a large + // number in this case than a small number, as plan quality suffers + // more when we vastly underestimate than when we vastly overestimate. + + // The estimate could be 0 if there is less than 1MB of storage + // dedicated to the table -- no HFiles, and < 1MB in MemStore, for which + // size is reported only in megabytes. + if (errorCode < 0) + estRowCount = 100000000; + + delete ehi; + } - return estRowCount; + return estRowCount; } // Method to get hbase regions servers node names http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/optimizer/NATable.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/NATable.h b/core/sql/optimizer/NATable.h index f6b8d85..d4e0c12 100644 --- a/core/sql/optimizer/NATable.h +++ b/core/sql/optimizer/NATable.h @@ -895,7 +895,7 @@ public: // without accessing HBase. The result is passed to estimateHBaseRowCount(), // which completes the row size calculation with HBase info. Int32 computeHBaseRowSizeFromMetaData() const ; - Int64 estimateHBaseRowCount() const; + Int64 estimateHBaseRowCount(Int32 retryLimitMilliSeconds, Int32& errorCode, Int32& breadCrumb) const; NABoolean getHbaseTableInfo(Int32& hbtIndexLevels, Int32& hbtBlockSize) const; NABoolean getRegionsNodeName(Int32 partns, ARRAY(const char *)& nodeNames) const; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/optimizer/RelExeUtil.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/RelExeUtil.cpp b/core/sql/optimizer/RelExeUtil.cpp index c59f323..dbf6514 100644 --- a/core/sql/optimizer/RelExeUtil.cpp +++ b/core/sql/optimizer/RelExeUtil.cpp @@ -5878,7 +5878,10 @@ RelExpr * ExeUtilHbaseCoProcAggr::bindNode(BindWA *bindWA) // BindWA keeps list of coprocessors used, so privileges can be checked. bindWA->insertCoProcAggr(this); - CostScalar rowsAccessed(naTable->estimateHBaseRowCount()); + Int32 retryLimitMilliSeconds = 5000; // use at most 5 seconds on retries + Int32 errorCode = 0; + Int32 breadCrumb = 0; + CostScalar rowsAccessed(naTable->estimateHBaseRowCount(retryLimitMilliSeconds,errorCode /* out */,breadCrumb /* out */)); setEstRowsAccessed(rowsAccessed); return boundExpr; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java ---------------------------------------------------------------------- diff --git a/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java b/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java index 42fc29b..a6d26b5 100644 --- a/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java +++ b/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java @@ -24,6 +24,7 @@ package org.trafodion.sql; import com.google.protobuf.ServiceException; import java.io.IOException; +import java.io.FileNotFoundException; import java.util.Collection; import java.util.Iterator; import java.util.List; @@ -1091,16 +1092,63 @@ public class HBaseClient { return rc; } + + // Estimates row count for tblName. Has a retry loop for the + // case of java.io.FileNotFoundException, which can happen if + // compactions are in flight when we call estimateRowCountBody. + // We try again with geometrically higher timeouts in hopes that + // the compaction will go away. But after 4 minutes plus of retries + // we'll give up and invite the user to try later. + public boolean estimateRowCount(String tblName, int partialRowSize, + int numCols, int retryLimitMilliSeconds, long[] rc) + throws MasterNotRunningException, IOException, ClassNotFoundException, URISyntaxException { + if (logger.isDebugEnabled()) logger.debug("HBaseClient.estimateRowCount(" + tblName + ") called."); + boolean retcode = false; // assume failure + int retryWait = 2000; // initial sleep before retry interval is 2 seconds + int cumulativeSleepTime = 0; + while (retryWait > 0) { + try { + retcode = estimateRowCountBody(tblName,partialRowSize,numCols,rc); + retryWait = 0; // for normal loop exit + } + catch (FileNotFoundException fne) { + + if (cumulativeSleepTime < retryLimitMilliSeconds) { // stop retrying if we've exceeded limit + if (logger.isDebugEnabled()) logger.debug("FileNotFoundException encountered (" + fne.getMessage() + + ") retrying in " + Integer.toString(retryWait/1000) + " seconds." ); + try { + Thread.sleep(retryWait); // sleep for a while or until interrupted + cumulativeSleepTime += retryWait; + } + catch (InterruptedException e) { + // ignore the interruption and keep going + } + retryWait = 2 * retryWait; + } + else { + // we've retried enough; just re-throw + if (logger.isDebugEnabled()) logger.debug("FileNotFoundException encountered (" + fne.getMessage() + + "); not retrying." ); + throw fne; + } + } + } + + return retcode; + } + + + // Estimates row count for tblName by iterating over the HFiles for // the table, extracting the KeyValue entry count from the file's // trailer block, summing the counts, and dividing by the number of // columns in the table. An adjustment is made for the estimated // number of missing values by sampling the first several // hundred KeyValues to see how many are missing. - public boolean estimateRowCount(String tblName, int partialRowSize, + private boolean estimateRowCountBody(String tblName, int partialRowSize, int numCols, long[] rc) throws MasterNotRunningException, IOException, ClassNotFoundException, URISyntaxException { - if (logger.isDebugEnabled()) logger.debug("HBaseClient.estimateRowCount(" + tblName + ") called."); + if (logger.isDebugEnabled()) logger.debug("HBaseClient.estimateRowCountBody(" + tblName + ") called."); final String REGION_NAME_PATTERN = "[0-9a-f]*"; final String HFILE_NAME_PATTERN = "[0-9a-f]*"; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/ustat/hs_cli.cpp ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_cli.cpp b/core/sql/ustat/hs_cli.cpp index 77984f7..5e693a8 100644 --- a/core/sql/ustat/hs_cli.cpp +++ b/core/sql/ustat/hs_cli.cpp @@ -499,6 +499,18 @@ Lng32 HSClearCLIDiagnostics() return retcode; } +// Clear any JNI diagnostic text stored in the CLI +void HSFuncClearJniErrorStr() +{ + GetCliGlobals()->currContext()->setJniErrorStr(""); +} + +// Obtain any JNI diagnostic text stored in the CLI +const char * HSFuncGetJniErrorStr() +{ + return GetCliGlobals()->currContext()->getJniErrorStrPtr(); +} + // ----------------------------------------------------------------------- // Create histogram tables if they don't exist. // ----------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/ustat/hs_cli.h ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_cli.h b/core/sql/ustat/hs_cli.h index b3bf1f9..97ce4ab 100644 --- a/core/sql/ustat/hs_cli.h +++ b/core/sql/ustat/hs_cli.h @@ -129,6 +129,12 @@ Lng32 HSFuncExecDDL( const char *dml ); Lng32 HSClearCLIDiagnostics(); +// Clear any JNI diagnostic text stored in the CLI +void HSFuncClearJniErrorStr(); + +// Obtain any JNI diagnostic text stored in the CLI +const char * HSFuncGetJniErrorStr(); + // Create histogram tables if they don't exist. Lng32 CreateHistTables (const HSGlobalsClass* hsGlobal); Lng32 CreateHistView (const HSGlobalsClass* hsGlobal); http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/ustat/hs_const.h ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_const.h b/core/sql/ustat/hs_const.h index b4f8b12..6b36c4b 100644 --- a/core/sql/ustat/hs_const.h +++ b/core/sql/ustat/hs_const.h @@ -174,6 +174,7 @@ enum USTAT_ERROR_CODES {UERR_SYNTAX_ERROR = 15001, UERR_IUS_IS_DISABLED = 9249, UERR_WARNING_IUS_NO_LONGER_ALL_NULL = 9250, UERR_DROP_PERSISTANT_SAMPLE_FIRST = 9251, + UERR_BAD_EST_ROWCOUNT = 9252, UERR_NO_ERROR = 9259, UERR_LAST_ERROR = 9259 }; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/ustat/hs_globals.cpp ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp index ba33e2a..757bc7e 100644 --- a/core/sql/ustat/hs_globals.cpp +++ b/core/sql/ustat/hs_globals.cpp @@ -3121,25 +3121,43 @@ Lng32 HSGlobalsClass::Initialize() sample_I_generated = FALSE; LM->StartTimer("getRowCount()"); + Int32 errorCode = 0; + Int32 breadCrumb = 0; + HSFuncClearJniErrorStr(); // clear out any stale error info actualRowCount = objDef->getRowCount(currentRowCountIsEstimate_, inserts, deletes, updates, numPartitions, minRowCtPerPartition_, + errorCode /* out */, + breadCrumb /* out */, optFlags & (SAMPLE_REQUESTED | IUS_OPT)); LM->StopTimer(); if (LM->LogNeeded()) { sprintf(LM->msg, "\tcurrentRowCountIsEstimate_=%d from getRowCount()", currentRowCountIsEstimate_); LM->Log(LM->msg); + sprintf(LM->msg, "\terrorCode=%d, breadCrumb=%d", errorCode, breadCrumb); + LM->Log(LM->msg); + const char * jniErrorStr = HSFuncGetJniErrorStr(); + if (strlen(jniErrorStr) > 0) + { + LM->Log("\tJNI exception info:"); + LM->Log(jniErrorStr); + } + } + + if (errorCode) + { + *CmpCommon::diags() << DgSqlCode(-UERR_BAD_EST_ROWCOUNT) << DgInt0(errorCode) + << DgInt1(breadCrumb) << DgString0(HSFuncGetJniErrorStr()); + return -1; } // We only allow an estimate when sampling, and then only if the // estimated row count is at least ustat_min_estimate_for_rowcount (CQD), // because estimation error is high for small or fragmented tables. // Otherwise a SELECT COUNT(*) is used to get the actual row count in - // place of the estimate, unless the user supplied his own row count. - // Note that if getRowCount() fails, it will return -1 and set - // currentRowCountIsEstimate_ to TRUE forcing the SELECT COUNT(*). + // place of the estimate, unless the user supplied his own row count.. if (currentRowCountIsEstimate_ && !(optFlags & CLEAR_OPT)) { if (optFlags & ROWCOUNT_OPT) /* rowcount provided */ @@ -15595,7 +15613,16 @@ Lng32 managePersistentSamples() Int64 sampleRows, tableRows; NABoolean isEstimate = FALSE; - tableRows = hs_globals->objDef->getRowCount(isEstimate); + Int32 errorCode = 0; + Int32 breadCrumb = 0; + tableRows = hs_globals->objDef->getRowCount(isEstimate, + errorCode /* out */, + breadCrumb /* out */); + if (errorCode) + { + *CmpCommon::diags() << DgSqlCode(-UERR_BAD_EST_ROWCOUNT) << DgInt0(errorCode) << DgInt1(breadCrumb); + return -1; + } // tableRows could be zero for a Trafodion or HBase table if the table is new // and all the data is still in memstore. So, in the logic below we dance around http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/ustat/hs_la.cpp ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_la.cpp b/core/sql/ustat/hs_la.cpp index f765491..cdcbe13 100644 --- a/core/sql/ustat/hs_la.cpp +++ b/core/sql/ustat/hs_la.cpp @@ -451,10 +451,13 @@ void HSSqTableDef::resetRowCounts() #endif Int64 HSSqTableDef::getRowCount(NABoolean &isEstimate, + Int32 &errorCode, + Int32 &breadCrumb, NABoolean estimateIfNecessary) { Int64 bogus; - return getRowCount(isEstimate, bogus, bogus, bogus, bogus, bogus, estimateIfNecessary); + return getRowCount(isEstimate, bogus, bogus, bogus, bogus, bogus, + errorCode, breadCrumb, estimateIfNecessary); } /***************************************************************************/ @@ -485,8 +488,12 @@ Int64 HSSqTableDef::getRowCount(NABoolean &isEstimate, Int64 &numUpdates, Int64 &numPartitions, Int64 &minRowCtPerPartition, + Int32 &errorCode, + Int32 &breadCrumb, NABoolean estimateIfNecessary) { + errorCode = 0; + breadCrumb = -5; isEstimate = TRUE; numInserts = numDeletes = @@ -978,8 +985,12 @@ Int64 HSHiveTableDef::getRowCount(NABoolean &isEstimate, Int64 &numUpdates, Int64 &numPartitions, Int64 &minRowCtPerPartition, + Int32 &errorCode, + Int32 &breadCrumb, NABoolean estimateIfNecessary) { + errorCode = 0; + breadCrumb = -6; if (minPartitionRows_ == -1) { Int64 partitionEstRows; @@ -995,7 +1006,7 @@ Int64 HSHiveTableDef::getRowCount(NABoolean &isEstimate, numPartitions = getNumPartitions(); minRowCtPerPartition = minPartitionRows_; - return getRowCount(isEstimate, estimateIfNecessary); + return getRowCount(isEstimate, errorCode, breadCrumb, estimateIfNecessary); } Lng32 HSHiveTableDef::DescribeColumnNames() @@ -1189,13 +1200,19 @@ Lng32 HSHbaseTableDef::getNumPartitions() const return getNATable()->getClusteringIndex()->getCountOfPartitions(); } -Int64 HSHbaseTableDef::getRowCount(NABoolean &isEstimate, NABoolean estimateIfNecessary) +Int64 HSHbaseTableDef::getRowCount(NABoolean &isEstimate, + Int32 &errorCode, + Int32 &breadCrumb, + NABoolean estimateIfNecessary) { + errorCode = 0; + breadCrumb = -2; isEstimate = TRUE; if (estimateIfNecessary && !naTbl_->isSeabaseMDTable() && CmpCommon::getDefault(USTAT_ESTIMATE_HBASE_ROW_COUNT) == DF_ON) - return naTbl_->estimateHBaseRowCount(); + // use a 4 minute retry limit (expressed in milliseconds) + return naTbl_->estimateHBaseRowCount(4*60*1000, errorCode, breadCrumb); else return 0; } @@ -1206,6 +1223,8 @@ Int64 HSHbaseTableDef::getRowCount(NABoolean &isEstimate, Int64 &numUpdates, Int64 &numPartitions, Int64 &minRowCtPerPartition, + Int32 &errorCode, + Int32 &breadCrumb, NABoolean estimateIfNecessary) { // Comparable code for Hive tables: @@ -1223,7 +1242,7 @@ Int64 HSHbaseTableDef::getRowCount(NABoolean &isEstimate, //numPartitions = getNumPartitions(); //minRowCtPerPartition = minPartitionRows_; - return getRowCount(isEstimate, estimateIfNecessary); + return getRowCount(isEstimate, errorCode, breadCrumb, estimateIfNecessary); } Lng32 HSHbaseTableDef::DescribeColumnNames() http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/ustat/hs_la.h ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_la.h b/core/sql/ustat/hs_la.h index 158b905..3431d7a 100644 --- a/core/sql/ustat/hs_la.h +++ b/core/sql/ustat/hs_la.h @@ -92,6 +92,8 @@ class HSTableDef : public NABasicObject virtual void getRowChangeCounts(Int64 &inserts, Int64 &deletes, Int64 &updates) = 0; virtual void resetRowCounts() = 0; virtual Int64 getRowCount(NABoolean &isEstimate, + Int32 &errorCode, + Int32 &breadCrumb, NABoolean estimateIfNecessary = TRUE) = 0; virtual Int64 getRowCount(NABoolean &isEstimate, Int64 &numInserts, @@ -99,6 +101,8 @@ class HSTableDef : public NABasicObject Int64 &numUpdates, Int64 &numPartitions, Int64 &minRowCtPerPartition, + Int32 &errorCode, + Int32 &breadCrumb, NABoolean estimateIfNecessary ) = 0; Int64 getRowCountUsingSelect(); @@ -186,13 +190,18 @@ class HSSqTableDef : public HSTableDef void getRowChangeCounts(Int64 &inserts, Int64 &deletes, Int64 &updates); void resetRowCounts(); - Int64 getRowCount(NABoolean &isEstimate, NABoolean estimateIfNecessary = TRUE); + Int64 getRowCount(NABoolean &isEstimate, + Int32 &errorCode, + Int32 &breadCrumb, + NABoolean estimateIfNecessary = TRUE); Int64 getRowCount(NABoolean &isEstimate, Int64 &numInserts, Int64 &numDeletes, Int64 &numUpdates, Int64 &numPartitions, Int64 &minRowCtPerPartition, + Int32 &errorCode, + Int32 &breadCrumb, NABoolean estimateIfNecessary ); Lng32 collectFileStatistics() const; @@ -273,9 +282,14 @@ class HSHiveTableDef : public HSTableDef } void resetRowCounts() {} - Int64 getRowCount(NABoolean &isEstimate, NABoolean estimateIfNecessary = TRUE) + Int64 getRowCount(NABoolean &isEstimate, + Int32 &errorCode, + Int32 &breadCrumb, + NABoolean estimateIfNecessary = TRUE) { isEstimate = TRUE; + errorCode = 0; + breadCrumb = -3; return (estimateIfNecessary ? tableStats_->getEstimatedRowCount() : 0); } Int64 getRowCount(NABoolean &isEstimate, @@ -284,6 +298,8 @@ class HSHiveTableDef : public HSTableDef Int64 &numUpdates, Int64 &numPartitions, Int64 &minRowCtPerPartition, + Int32 &errorCode, + Int32 &breadCrumb, NABoolean estimateIfNecessary); Lng32 collectFileStatistics() const { @@ -377,13 +393,18 @@ class HSHbaseTableDef : public HSTableDef } void resetRowCounts() {} - Int64 getRowCount(NABoolean &isEstimate, NABoolean estimateIfNecessary = TRUE); + Int64 getRowCount(NABoolean &isEstimate, + Int32 &errorCode, + Int32 &breadCrumb, + NABoolean estimateIfNecessary = TRUE); Int64 getRowCount(NABoolean &isEstimate, Int64 &numInserts, Int64 &numDeletes, Int64 &numUpdates, Int64 &numPartitions, Int64 &minRowCtPerPartition, + Int32 &errorCode, + Int32 &breadCrumb, NABoolean estimateIfNecessary); Lng32 collectFileStatistics() const { http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/ustat/hs_log.cpp ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_log.cpp b/core/sql/ustat/hs_log.cpp index 286efa0..776a316 100644 --- a/core/sql/ustat/hs_log.cpp +++ b/core/sql/ustat/hs_log.cpp @@ -284,7 +284,11 @@ void HSLogMan::Log(const char *data) if (logNeeded_) { ofstream fileout(logFile_->data(), ios::app); - fileout << data << endl; + time_t currentTime = time(0); + struct tm * currentTimeExploded = localtime(¤tTime); + char localTime[100]; // way more space than needed + strftime(localTime,sizeof(localTime),"%c",currentTimeExploded); + fileout << "[" << localTime << "] " << data << endl; } } http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/932c219f/core/sql/ustat/hs_util.cpp ---------------------------------------------------------------------- diff --git a/core/sql/ustat/hs_util.cpp b/core/sql/ustat/hs_util.cpp index f676bf5..23ff706 100644 --- a/core/sql/ustat/hs_util.cpp +++ b/core/sql/ustat/hs_util.cpp @@ -1182,7 +1182,9 @@ double getRowCountForFetchFuncs(HSTableDef *tabDef, NABoolean &isEstimate) // On NSK and Linux, getRowCount() will return an accurate count // (from DP2 file label), in all testing environments (and in almost // all other cases). - rows = tabDef->getRowCount(isEstimate); + Int32 errorCode = 0; + Int32 breadCrumb = 0; + rows = tabDef->getRowCount(isEstimate, errorCode /* out */, breadCrumb /* out */); if (!isHbaseTable && !isHiveTable) HSFuncExecQuery("CONTROL QUERY DEFAULT USTAT_FETCHCOUNT_ACTIVE 'OFF'");