This is an automated email from the ASF dual-hosted git repository.
dbirdsall pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafodion.git
The following commit(s) were added to refs/heads/master by this push:
new f6f885b [TRAFODION-3316] Three fixes to UPDATE STATISTICS
new 141cbfb Merge pull request #1848 from DaveBirdsall/Trafodion3316
f6f885b is described below
commit f6f885bf40f942978e7a2b8893f3147ecbf35569
Author: Dave Birdsall <[email protected]>
AuthorDate: Tue Jul 9 17:43:28 2019 +0000
[TRAFODION-3316] Three fixes to UPDATE STATISTICS
---
core/sql/ustat/hs_cli.cpp | 72 ++++++-------------------------------------
core/sql/ustat/hs_cli.h | 2 +-
core/sql/ustat/hs_globals.cpp | 47 ++++++++++++++--------------
3 files changed, 35 insertions(+), 86 deletions(-)
diff --git a/core/sql/ustat/hs_cli.cpp b/core/sql/ustat/hs_cli.cpp
index 0840097..ceb10f5 100644
--- a/core/sql/ustat/hs_cli.cpp
+++ b/core/sql/ustat/hs_cli.cpp
@@ -176,6 +176,9 @@ Lng32 HSExecDirect( SQLSTMT_ID * stmt
// inactivateErrorCatcher = TRUE if the caller already has an
// HSErrorCatcher object active (that is, the caller wants
// to capture diagnostics itself).
+// Note: srcTabRowCount is an obsolete parameter. Its only function now
+// is to control whether plan information should be kept. Later we can
+// clean this out, replacing it with an NABoolean.
// -----------------------------------------------------------------------
Lng32 HSFuncExecQuery( const char *dml
, short sqlcode
@@ -194,7 +197,7 @@ Lng32 HSFuncExecQuery( const char *dml
HSErrorCatcher errorCatcher(retcode, sqlcode, errorToken, TRUE,
inactivateErrorCatcher);
retcode = HSFuncExecQueryBody(dml,sqlcode,rowsAffected,errorToken,
- srcTabRowCount,tabDef,errorToIgnore,checkMdam);
+ srcTabRowCount !=
NULL,tabDef,errorToIgnore,checkMdam);
HSHandleError(retcode);
return retcode;
}
@@ -209,7 +212,7 @@ Lng32 HSFuncExecQueryBody( const char *dml
, short sqlcode
, Int64 *rowsAffected
, const char *errorToken
- , Int64 *srcTabRowCount
+ , NABoolean printPlan
, const HSTableDef *tabDef
, short errorToIgnore
, NABoolean checkMdam
@@ -293,7 +296,7 @@ Lng32 HSFuncExecQueryBody( const char *dml
HSHandleError(retcode);
// execute immediate this statement
- retcode = HSExecDirect(&stmt, &srcDesc, srcTabRowCount != 0, checkMdam);
+ retcode = HSExecDirect(&stmt, &srcDesc, printPlan, checkMdam);
// If retcode is > 0 or sqlcode is HS_WARNING, then set to 0 (no
error/ignore).
if (retcode >= 0) retcode = 0;
// If sqlcode is HS_WARNING, then this means failures should be returned as
@@ -332,11 +335,6 @@ Lng32 HSFuncExecQueryBody( const char *dml
, 0);
SQL_EXEC_GetDiagnosticsStmtInfo(&sql_item, &rc_desc);
SQL_EXEC_DeallocDesc(&rc_desc);
-
- if (srcTabRowCount)
- {
- getRowCountFromStats(srcTabRowCount, tabDef) ;
- }
}
return retcode;
}
@@ -362,6 +360,9 @@ Lng32 HSFuncExecQueryBody( const char *dml
// there is no such expected error.
// checkMdam = if TRUE, determine whether the query uses MDAM, and
// include this information in the ulog.
+// Note: srcTabRowCount is an obsolete parameter. Its only function now
+// is to control whether plan information should be kept. Later we can
+// clean this out, replacing it with an NABoolean.
// -----------------------------------------------------------------------
Lng32 HSFuncExecTransactionalQueryWithRetry( const char *dml
, short sqlcode
@@ -411,7 +412,7 @@ Lng32 HSFuncExecTransactionalQueryWithRetry( const char *dml
// execute the statement
retcode = HSFuncExecQueryBody(dml, sqlcode, rowsAffected, errorToken,
- srcTabRowCount, tabDef, errorToIgnore,
checkMdam);
+ srcTabRowCount != NULL, tabDef, errorToIgnore,
checkMdam);
// Figure out if we want to ignore certain conditions
@@ -5851,59 +5852,6 @@ Lng32 checkMdam(SQLSTMT_ID *stmt)
return retcode;
}
-/***********************************************/
-/* METHOD: getRowCountFromStats(Int64* ) */
-/* PURPOSE: Get row count from stats for the */
-/* previously executed statement. */
-/* Currently this method cannot access*/
-/* stats for an arbitrary statement. */
-/* Used to get an accurate value for */
-/* rowcount when EID sampling is used */
-/* INPUT: Int64* rowsAffected */
-/***********************************************/
-void getRowCountFromStats(Int64 * rowsAffected, const HSTableDef *tabDef)
- {
- // 9/18/2013: The query underlying this function no longer works. It
depended
- // on specific information (in a specific location) for the variable_info
- // column of the Statistics virtual table, which seems to have changed. The
- // necessary info (table name and # accessed rows) is not present in a
single
- // row of that table.
- return;
-
- Lng32 retcode = 0;
- char rowcount[31];
- char tabName[600] ;
- if (!tabDef) return;
-
- str_pad(tabName, 600, ' ') ;
-
- // longest valid ANSI name is 128*3. Allowing some extra space
- // for funny delimited names.
- if (tabDef->getObjectFullName().length() > 596) return;
-
- tabName[0] = '%';
- strcpy(&(tabName[1]), tabDef->getObjectFullName().data());
- tabName[tabDef->getObjectFullName().length()+1] = ' ';
- tabName[tabDef->getObjectFullName().length()+2] = '%';
- tabName[tabDef->getObjectFullName().length()+3] = 0;
-
- HSCliStatement getStats(HSCliStatement::ROWCOUNT_FROM_STATS,
- (char *)&tabName);
-
- retcode = getStats.open();
- if (retcode !=0 ) return ;
-
- retcode = getStats.fetch(1,(void *)&rowcount[0]);
- if ((retcode != 0) && (retcode !=100)) return;
-
- retcode = getStats.close();
- if (retcode !=0 ) return ;
-
- *rowsAffected = atoInt64(rowcount) ;
-
- return ;
- }
-
/**************************************************************************/
/* METHOD: ucsToDouble() */
/* PURPOSE: Interpret the Unicode string pointed to by ucs as a numeric */
diff --git a/core/sql/ustat/hs_cli.h b/core/sql/ustat/hs_cli.h
index 5b5a454..6133193 100644
--- a/core/sql/ustat/hs_cli.h
+++ b/core/sql/ustat/hs_cli.h
@@ -85,7 +85,7 @@ Lng32 HSFuncExecQueryBody( const char *dml
, short sqlcode
, Int64 *rowsAffected
, const char *errorToken
- , Int64 *srcTabRowCount
+ , NABoolean printPlan
, const HSTableDef *tabDef
, short errorToIgnore
, NABoolean checkMdam
diff --git a/core/sql/ustat/hs_globals.cpp b/core/sql/ustat/hs_globals.cpp
index 1762add..fc3cc4c 100644
--- a/core/sql/ustat/hs_globals.cpp
+++ b/core/sql/ustat/hs_globals.cpp
@@ -4293,11 +4293,6 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, //
input
SQL_EXEC_SetParserFlagsForExSqlComp_Internal(hsALLOW_SPECIALTABLETYPE);
}
- // initialize sourceTableRowCount to -1. The method that sets this
parameter
- // will not change this value if there is an error. So if
- // sourceTableRowCount = -1 after the call, we know something went wrong
- // and we do not use this value.
- Int64 sourceTableRowCount = -1;
// on very busy system, some "update statistics" implementation steps like
// "Process_Query" step in HSSample::make() that calls HSFuncExecQuery
// may experience failures resulting in a flurry of callcatcher error 9200
@@ -4307,11 +4302,12 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, //
input
// 2 cqds allow user control of these retries.
Int32 centiSecs = getDefaultAsLong(USTAT_RETRY_DELAY);
Int32 limit = getDefaultAsLong(USTAT_RETRY_LIMIT);
+ Int64 printPlan = 1;
if (limit < 1 || centiSecs < 1) // user does not want any retry
{
LM->StartTimer("Populate sample table");
retcode = HSFuncExecQuery(dml, - UERR_INTERNAL_ERROR, &sampleRowCount,
- HS_QUERY_ERROR, &sourceTableRowCount, objDef);
+ HS_QUERY_ERROR, &printPlan , objDef);
LM->StopTimer();
}
else // user wants retry
@@ -4320,7 +4316,7 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, //
input
HSFuncExecQuery("CONTROL QUERY DEFAULT AUTO_QUERY_RETRY 'ON'");
LM->StartTimer("Populate sample table (with possible retry)");
retcode = HSFuncExecQuery(dml, - UERR_INTERNAL_ERROR, &sampleRowCount,
- HS_QUERY_ERROR, &sourceTableRowCount, objDef);
+ HS_QUERY_ERROR, &printPlan, objDef);
LM->StopTimer();
HSFuncExecQuery("CONTROL QUERY DEFAULT AUTO_QUERY_RETRY RESET");
}
@@ -4392,6 +4388,12 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, //
input
LM->Log(LM->msg);
}
+
+ double sampleRatio = samplePercent / 100;
+ double tableRowCntDbl = ((double)sampleRowCount) / sampleRatio;
+ if (!isnormal(tableRowCntDbl)) // if we get NaN, infinity etc, just use
original row count
+ tableRowCntDbl = (double)tableRowCnt;
+
// TEMP: ignore empty sample set if bulk load is on as rowcount is
currently not
// being returned by bulk load.
if ((sampleRowCount == 0) && // sample set is empty;
@@ -4406,20 +4408,16 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, //
input
hs_globals->sampleTableUsed = TRUE;
hs_globals->samplingUsed = TRUE;
hs_globals->sampleSeconds = getTimeDiff();
- // If (a) current row count is estimate (for R2.3 and later, this is
unlikely)
+ // If (a) current row count is estimate
// (b) user has not specified the rowcount and
- // (c) we appear to get a meaningful rowcount for the source table
- // (source table rowcount >= rows inserted into sample table) and
- // (d) CLUSTER sampling not used
- // we set the actualRowCount to the value obtained from the statistics
table
- // This works since every row of the source table is scanned for EID
sampling
- // and the number of rows scanned is recorded in the stats area.
+ // (c) CLUSTER sampling not used
+ // we set the actualRowCount to the value inferred by the number of
sample
+ // rows and the sampling ratio.
if (rowCountIsEstimate &&
!(hs_globals->optFlags & ROWCOUNT_OPT) &&
- (sourceTableRowCount > sampleRowCount) &&
(hs_globals->optFlags & SAMPLE_REQUESTED) != SAMPLE_RAND_2)
{
- tableRowCnt = sourceTableRowCount;
+ tableRowCnt = (Int64)tableRowCntDbl;
if (LM->LogNeeded())
{
convertInt64ToAscii(tableRowCnt, intStr);
@@ -4428,7 +4426,8 @@ Lng32 HSSample::make(NABoolean rowCountIsEstimate, //
input
}
}
}
- else if (rowCountIsEstimate && sourceTableRowCount > sampleRowCount)
tableRowCnt = sourceTableRowCount;
+ else if (rowCountIsEstimate)
+ tableRowCnt = (Int64)tableRowCntDbl;
LM->StopTimer();
@@ -7044,13 +7043,13 @@ Lng32 HSGlobalsClass::CollectStatisticsForIUS(Int64
currentSampleSize,
// is in the same schema as the one referenced by tblDef). This avoids problems
// in parsing the fully qualified name posed by the possibility of periods
within
// delimited identifiers.
-static const char* extractTblName(const NAString& fullyQualifiedName,
- HSTableDef* tblDef)
+static void extractTblName(const NAString& fullyQualifiedName,
+ HSTableDef* tblDef, NAString & out)
{
Lng32 tblNameOffset = tblDef->getCatName().length() +
tblDef->getSchemaName().length() +
2; // 2 dot separators
- return fullyQualifiedName.data() + tblNameOffset;
+ out = fullyQualifiedName.data() + tblNameOffset;
}
// Update the persistent sample table and determine its new cardinality.
@@ -7134,8 +7133,9 @@ Lng32
HSGlobalsClass::UpdateIUSPersistentSampleTable(Int64 oldSampleSize,
}
rowsAffected = 0;
- const char* insSourceTblName = extractTblName(*hssample_table + "_I",
objDef);
- NABoolean needEspParReset = setEspParallelism(objDef, insSourceTblName);
+ NAString insSourceTblName;
+ extractTblName(*hssample_table + "_I", objDef, insSourceTblName /* out */);
+ NABoolean needEspParReset = setEspParallelism(objDef,
insSourceTblName.data());
// can't retry this one, as it uses non-transactional upsert using load +
random
// select; a retry might add *another* random sample to a partial sample from
@@ -14375,7 +14375,8 @@ Int32 copyValue(Int64 value, char *valueBuff, const
HSColumnStruct &colDesc, sho
colDesc.scale, // display width for fractional
seconds
// Fractional second; compute microseconds, remove
trailing
// zeroes beyond the scale.
- (dtvals[6] * 1000 + dtvals[7]) / (Int32)pow(10,
6-colDesc.scale));
+ (dtvals[6] * 1000 + dtvals[7]) /
+ (Int32)pow(10, 6 - MINOF(6, colDesc.scale)));
else
sprintf(valueBuff, "%04d-%02d-%02d %02d:%02d:%02d",
dtvals[0], dtvals[1], dtvals[2],