[1/2] trafodion git commit: [TRAFODION-3223] Don't scale down for non-Puts when estimating row counts

dbirdsall Thu, 18 Oct 2018 15:12:22 -0700

Repository: trafodion
Updated Branches:
  refs/heads/master 9d4af12b5 -> d0b512549



[TRAFODION-3223] Don't scale down for non-Puts when estimating row counts


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/898812f8
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/898812f8
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/898812f8

Branch: refs/heads/master
Commit: 898812f84c510ab8798d5af6e3e63559f4078a07
Parents: 3ee76e1
Author: Dave Birdsall <[email protected]>
Authored: Wed Oct 17 22:06:44 2018 +0000
Committer: Dave Birdsall <[email protected]>
Committed: Wed Oct 17 22:06:44 2018 +0000

----------------------------------------------------------------------
 core/sql/executor/ex_frag_rt.cpp                |  3 +-
 core/sql/regress/tools/regress-filter-linux     | 10 ++-
 .../java/org/trafodion/sql/HBaseClient.java     | 71 +++++++++++++++-----
 3 files changed, 65 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/898812f8/core/sql/executor/ex_frag_rt.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/ex_frag_rt.cpp b/core/sql/executor/ex_frag_rt.cpp
index be19fc0..2dfcd7e 100644
--- a/core/sql/executor/ex_frag_rt.cpp
+++ b/core/sql/executor/ex_frag_rt.cpp
@@ -133,7 +133,8 @@ ExRtFragTable::ExRtFragTable(ExMasterStmtGlobals *glob,
                             ExFragDir *fragDir,
                             char  *generatedObject) :
      fragmentEntries_(glob->getDefaultHeap(),fragDir->getNumEntries()),
-     outstandingServiceRequests_(glob->getDefaultHeap())
+     outstandingServiceRequests_(glob->getDefaultHeap()),
+     displayInGui_(FALSE)
 {
   glob_                     = glob;
   fragDir_                  = fragDir;

http://git-wip-us.apache.org/repos/asf/trafodion/blob/898812f8/core/sql/regress/tools/regress-filter-linux
----------------------------------------------------------------------
diff --git a/core/sql/regress/tools/regress-filter-linux 
b/core/sql/regress/tools/regress-filter-linux
index c32d3ea..dc74b1b 100755
--- a/core/sql/regress/tools/regress-filter-linux
+++ b/core/sql/regress/tools/regress-filter-linux
@@ -35,10 +35,18 @@ if [ "$fil" = "" ]; then
   exit 1
 fi
 SYSKEY=`grep '\<SYSKEY\>' $fil | egrep -v '(--|^>>|^\*\*)'`
+# A SYSKEY can be as few as 15 decimal digits. The high order 15 bits
+# are an XOR of the low 15 bits of the node id and the linux thread ID,
+# while the low 49 bits are taken from JULIANTIMESTAMP. It is possible
+# for the high order bits to be zero (e.g. node id 0, linux TID 16384),
+# in which case all the decimal digits come from JULIANTIMESTAMP. At
+# present, JULIANTIMESTAMP yields 15 decimal digits and will until it
+# wraps many many years from now. See the code in function
+# generateUniqueValueFast in sqlshare/CatSQLShare.cpp.
 if [ "$SYSKEY" = "" ]; then
   SYSKEY='@syskey@'
 else
-  SYSKEY='[0-9]\{16,20\}\>'
+  SYSKEY='[0-9]\{15,20\}\>'
 fi
 #       123456789 123456789
 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/898812f8/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java
----------------------------------------------------------------------
diff --git a/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java 
b/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java
index 8853495..ddd7a7b 100644
--- a/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java
+++ b/core/sql/src/main/java/org/trafodion/sql/HBaseClient.java
@@ -1275,15 +1275,15 @@ public class HBaseClient {
       final String HFILE_NAME_PATTERN  = "[0-9a-f]*";
 
       // To estimate incidence of nulls, read the first 500 rows worth
-      // of KeyValues.
-      final int ROWS_TO_SAMPLE = 500;
+      // of KeyValues. For aligned format (numCols == 1), the whole row 
+      // is in one cell so we don't need to look for missing cells.
+      final int ROWS_TO_SAMPLE = ((numCols > 1) ? 500 : 0);  // don't bother 
sampling for aligned format
       int putKVsSampled = 0;
       int nonPutKVsSampled = 0;
       int missingKVsCount = 0;
       int sampleRowCount = 0;
       long totalEntries = 0;   // KeyValues in all HFiles for table
       long totalSizeBytes = 0; // Size of all HFiles for table 
-      long estimatedTotalPuts = 0;
       boolean more = true;
 
       // Make sure the config doesn't specify HBase bucket cache. If it does,
@@ -1398,13 +1398,32 @@ public class HBaseClient {
       long estimatedEntries = (ROWS_TO_SAMPLE > 0
                                  ? 0               // get from sample data, 
below
                                  : totalEntries);  // no sampling, use stored 
value
-      if (putKVsSampled > 0) // avoid div by 0 if no Put KVs in sample
-        {
-          estimatedTotalPuts = (putKVsSampled * totalEntries) / 
-                               (putKVsSampled + nonPutKVsSampled);
-          estimatedEntries = ((putKVsSampled + missingKVsCount) * 
estimatedTotalPuts)
+      if ((putKVsSampled > 0) && // avoid div by 0 if no Put KVs in sample
+          (putKVsSampled >= ROWS_TO_SAMPLE/10)) { // avoid really small samples
+        // Formerly, we would multiply this by a factor of 
+        // putKVsSampled / (putKVsSampled + nonPutKVsSampled).
+        // If non-put records are evenly distributed among the cells, then
+        // that would give a better estimate. However, we find that often
+        // (e.g. time-ordered data that is being aged out), the non-put cells
+        // clump up in one place -- they might even take a whole HFile!
+        // There is no real way to compensate other than reading the entire
+        // table. So, we don't try to scale down the number of rows based 
+        // on the proportion of non-Put cells. That means the value below
+        // will sometimes over-estimate, but it is much better to over-
+        // estimate than to under-estimate when it comes to row counts.
+        estimatedEntries = ((putKVsSampled + missingKVsCount) * totalEntries)
                                    / putKVsSampled;
-        }
+      } else { // few or no Puts found
+        // The first file might have been full of deletes, which can happen
+        // when time-ordered data ages out. We don't want to infer that the
+        // table as a whole is all deletes (it almost certainly isn't in the
+        // time-ordered data age-out case). We could just keep reading HFiles
+        // until we find one with a decent sample of rows but that might take
+        // awhile. Instead, we'll just punt and use totalEntries for our
+        // estimate. This will over-estimate, but it is far better to do that
+        // than to under-estimate.
+        estimatedEntries = totalEntries;
+      }
 
       // Calculate estimate of rows in all HFiles of table.
       rc[0] = (estimatedEntries + (numCols/2)) / numCols; // round instead of 
truncate
@@ -1442,8 +1461,6 @@ public class HBaseClient {
       long memStoreRows = estimateMemStoreRows(tblName, rowSize);
 
       if (logger.isDebugEnabled()) logger.debug(tblName + " contains a total 
of " + totalEntries + " KeyValues in all HFiles.");
-      if (logger.isDebugEnabled()) logger.debug("Based on a sample, it is 
estimated that " + estimatedTotalPuts +
-                   " of these KeyValues are of type Put.");
       if (putKVsSampled + missingKVsCount > 0)
         if (logger.isDebugEnabled()) logger.debug("Sampling indicates a null 
incidence of " + 
                      (missingKVsCount * 100)/(putKVsSampled + missingKVsCount) 
+
@@ -1555,13 +1572,33 @@ public class HBaseClient {
       long estimatedEntries = ((ROWS_TO_SAMPLE > 0) && (numCols > 1)
                                  ? 0               // get from sample data, 
below
                                  : totalEntries);  // no sampling, use stored 
value
-      if (putKVsSampled > 0) // avoid div by 0 if no Put KVs in sample
-        {
-          long estimatedTotalPuts = (putKVsSampled * totalEntries) / 
-                               (putKVsSampled + nonPutKVsSampled);
-          estimatedEntries = ((putKVsSampled + missingKVsCount) * 
estimatedTotalPuts)
+
+      if ((putKVsSampled > 0) && // avoid div by 0 if no Put KVs in sample
+          (putKVsSampled >= ROWS_TO_SAMPLE/10)) { // avoid really small samples
+        // Formerly, we would multiply this by a factor of 
+        // putKVsSampled / (putKVsSampled + nonPutKVsSampled).
+        // If non-put records are evenly distributed among the cells, then
+        // that would give a better estimate. However, we find that often
+        // (e.g. time-ordered data that is being aged out), the non-put cells
+        // clump up in one place -- they might even take a whole HFile!
+        // There is no real way to compensate other than reading the entire
+        // table. So, we don't try to scale down the number of rows based 
+        // on the proportion of non-Put cells. That means the value below
+        // will sometimes over-estimate, but it is much better to over-
+        // estimate than to under-estimate when it comes to row counts.
+        estimatedEntries = ((putKVsSampled + missingKVsCount) * totalEntries)
                                    / putKVsSampled;
-        }
+      } else { // few or no Puts found
+        // The first file might have been full of deletes, which can happen
+        // when time-ordered data ages out. We don't want to infer that the
+        // table as a whole is all deletes (it almost certainly isn't in the
+        // time-ordered data age-out case). We could just keep reading HFiles
+        // until we find one with a decent sample of rows but that might take
+        // awhile. Instead, we'll just punt and use totalEntries for our
+        // estimate. This will over-estimate, but it is far better to do that
+        // than to under-estimate.
+        estimatedEntries = totalEntries;
+      }
 
       if (logger.isDebugEnabled()) { 
         logger.debug("estimatedEntries = " + estimatedEntries + ", numCols = " 
+ numCols);

[1/2] trafodion git commit: [TRAFODION-3223] Don't scale down for non-Puts when estimating row counts

Reply via email to