Repository: trafodion Updated Branches: refs/heads/master 9d4af12b5 -> d0b512549
[TRAFODION-3223] Don't scale down for non-Puts when estimating row counts Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/898812f8 Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/898812f8 Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/898812f8 Branch: refs/heads/master Commit: 898812f84c510ab8798d5af6e3e63559f4078a07 Parents: 3ee76e1 Author: Dave Birdsall <[email protected]> Authored: Wed Oct 17 22:06:44 2018 +0000 Committer: Dave Birdsall <[email protected]> Committed: Wed Oct 17 22:06:44 2018 +0000 ---------------------------------------------------------------------- core/sql/executor/ex_frag_rt.cpp | 3 +- core/sql/regress/tools/regress-filter-linux | 10 ++- .../java/org/trafodion/sql/HBaseClient.java | 71 +++++++++++++++----- 3 files changed, 65 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/898812f8/core/sql/executor/ex_frag_rt.cpp ---------------------------------------------------------------------- diff --git a/core/sql/executor/ex_frag_rt.cpp b/core/sql/executor/ex_frag_rt.cpp index be19fc0..2dfcd7e 100644 --- a/core/sql/executor/ex_frag_rt.cpp +++ b/core/sql/executor/ex_frag_rt.cpp @@ -133,7 +133,8 @@ ExRtFragTable::ExRtFragTable(ExMasterStmtGlobals *glob, ExFragDir *fragDir, char *generatedObject) : fragmentEntries_(glob->getDefaultHeap(),fragDir->getNumEntries()), - outstandingServiceRequests_(glob->getDefaultHeap()) + outstandingServiceRequests_(glob->getDefaultHeap()), + displayInGui_(FALSE) { glob_ = glob; fragDir_ = fragDir; http://git-wip-us.apache.org/repos/asf/trafodion/blob/898812f8/core/sql/regress/tools/regress-filter-linux ---------------------------------------------------------------------- diff --git a/core/sql/regress/tools/regress-filter-linux b/core/sql/regress/tools/regress-filter-linux index c32d3ea..dc74b1b 100755 --- a/core/sql/regress/tools/regress-filter-linux +++ b/core/sql/regress/tools/regress-filter-linux @@ -35,10 +35,18 @@ if [ "$fil" = "" ]; then exit 1 fi SYSKEY=`grep '\<SYSKEY\>' $fil | egrep -v '(--|^>>|^\*\*)'` +# A SYSKEY can be as few as 15 decimal digits. The high order 15 bits +# are an XOR of the low 15 bits of the node id and the linux thread ID, +# while the low 49 bits are taken from JULIANTIMESTAMP. It is possible +# for the high order bits to be zero (e.g. node id 0, linux TID 16384), +# in which case all the decimal digits come from JULIANTIMESTAMP. At +# present, JULIANTIMESTAMP yields 15 decimal digits and will until it +# wraps many many years from now. See the code in function +# generateUniqueValueFast in sqlshare/CatSQLShare.cpp. if [ "$SYSKEY" = "" ]; then SYSKEY='@syskey@' else - SYSKEY='[0-9]\{16,20\}\>' + SYSKEY='[0-9]\{15,20\}\>' fi # 123456789 123456789 http://git-wip-us.apache.org/repos/asf/trafodion/blob/898812f8/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java ---------------------------------------------------------------------- diff --git a/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java b/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java index 8853495..ddd7a7b 100644 --- a/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java +++ b/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java @@ -1275,15 +1275,15 @@ public class HBaseClient { final String HFILE_NAME_PATTERN = "[0-9a-f]*"; // To estimate incidence of nulls, read the first 500 rows worth - // of KeyValues. - final int ROWS_TO_SAMPLE = 500; + // of KeyValues. For aligned format (numCols == 1), the whole row + // is in one cell so we don't need to look for missing cells. + final int ROWS_TO_SAMPLE = ((numCols > 1) ? 500 : 0); // don't bother sampling for aligned format int putKVsSampled = 0; int nonPutKVsSampled = 0; int missingKVsCount = 0; int sampleRowCount = 0; long totalEntries = 0; // KeyValues in all HFiles for table long totalSizeBytes = 0; // Size of all HFiles for table - long estimatedTotalPuts = 0; boolean more = true; // Make sure the config doesn't specify HBase bucket cache. If it does, @@ -1398,13 +1398,32 @@ public class HBaseClient { long estimatedEntries = (ROWS_TO_SAMPLE > 0 ? 0 // get from sample data, below : totalEntries); // no sampling, use stored value - if (putKVsSampled > 0) // avoid div by 0 if no Put KVs in sample - { - estimatedTotalPuts = (putKVsSampled * totalEntries) / - (putKVsSampled + nonPutKVsSampled); - estimatedEntries = ((putKVsSampled + missingKVsCount) * estimatedTotalPuts) + if ((putKVsSampled > 0) && // avoid div by 0 if no Put KVs in sample + (putKVsSampled >= ROWS_TO_SAMPLE/10)) { // avoid really small samples + // Formerly, we would multiply this by a factor of + // putKVsSampled / (putKVsSampled + nonPutKVsSampled). + // If non-put records are evenly distributed among the cells, then + // that would give a better estimate. However, we find that often + // (e.g. time-ordered data that is being aged out), the non-put cells + // clump up in one place -- they might even take a whole HFile! + // There is no real way to compensate other than reading the entire + // table. So, we don't try to scale down the number of rows based + // on the proportion of non-Put cells. That means the value below + // will sometimes over-estimate, but it is much better to over- + // estimate than to under-estimate when it comes to row counts. + estimatedEntries = ((putKVsSampled + missingKVsCount) * totalEntries) / putKVsSampled; - } + } else { // few or no Puts found + // The first file might have been full of deletes, which can happen + // when time-ordered data ages out. We don't want to infer that the + // table as a whole is all deletes (it almost certainly isn't in the + // time-ordered data age-out case). We could just keep reading HFiles + // until we find one with a decent sample of rows but that might take + // awhile. Instead, we'll just punt and use totalEntries for our + // estimate. This will over-estimate, but it is far better to do that + // than to under-estimate. + estimatedEntries = totalEntries; + } // Calculate estimate of rows in all HFiles of table. rc[0] = (estimatedEntries + (numCols/2)) / numCols; // round instead of truncate @@ -1442,8 +1461,6 @@ public class HBaseClient { long memStoreRows = estimateMemStoreRows(tblName, rowSize); if (logger.isDebugEnabled()) logger.debug(tblName + " contains a total of " + totalEntries + " KeyValues in all HFiles."); - if (logger.isDebugEnabled()) logger.debug("Based on a sample, it is estimated that " + estimatedTotalPuts + - " of these KeyValues are of type Put."); if (putKVsSampled + missingKVsCount > 0) if (logger.isDebugEnabled()) logger.debug("Sampling indicates a null incidence of " + (missingKVsCount * 100)/(putKVsSampled + missingKVsCount) + @@ -1555,13 +1572,33 @@ public class HBaseClient { long estimatedEntries = ((ROWS_TO_SAMPLE > 0) && (numCols > 1) ? 0 // get from sample data, below : totalEntries); // no sampling, use stored value - if (putKVsSampled > 0) // avoid div by 0 if no Put KVs in sample - { - long estimatedTotalPuts = (putKVsSampled * totalEntries) / - (putKVsSampled + nonPutKVsSampled); - estimatedEntries = ((putKVsSampled + missingKVsCount) * estimatedTotalPuts) + + if ((putKVsSampled > 0) && // avoid div by 0 if no Put KVs in sample + (putKVsSampled >= ROWS_TO_SAMPLE/10)) { // avoid really small samples + // Formerly, we would multiply this by a factor of + // putKVsSampled / (putKVsSampled + nonPutKVsSampled). + // If non-put records are evenly distributed among the cells, then + // that would give a better estimate. However, we find that often + // (e.g. time-ordered data that is being aged out), the non-put cells + // clump up in one place -- they might even take a whole HFile! + // There is no real way to compensate other than reading the entire + // table. So, we don't try to scale down the number of rows based + // on the proportion of non-Put cells. That means the value below + // will sometimes over-estimate, but it is much better to over- + // estimate than to under-estimate when it comes to row counts. + estimatedEntries = ((putKVsSampled + missingKVsCount) * totalEntries) / putKVsSampled; - } + } else { // few or no Puts found + // The first file might have been full of deletes, which can happen + // when time-ordered data ages out. We don't want to infer that the + // table as a whole is all deletes (it almost certainly isn't in the + // time-ordered data age-out case). We could just keep reading HFiles + // until we find one with a decent sample of rows but that might take + // awhile. Instead, we'll just punt and use totalEntries for our + // estimate. This will over-estimate, but it is far better to do that + // than to under-estimate. + estimatedEntries = totalEntries; + } if (logger.isDebugEnabled()) { logger.debug("estimatedEntries = " + estimatedEntries + ", numCols = " + numCols);
