IMPALA-7307 (part 1). Support stats extrapolation in LocalCatalog

Tested by running 'run-tests.py -k stats_extrap' and the tests passed.

Change-Id: I479b7f517091dd558768601e1e0704a1902b78a5
Reviewed-on: http://gerrit.cloudera.org:8080/10971
Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
Reviewed-by: Todd Lipcon <t...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/aa26087f
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/aa26087f
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/aa26087f

Branch: refs/heads/master
Commit: aa26087fea6f48f3764c9ac39385221737cf4047
Parents: 4aec504
Author: Todd Lipcon <t...@cloudera.com>
Authored: Thu Jul 12 16:45:01 2018 -0700
Committer: Todd Lipcon <t...@apache.org>
Committed: Tue Aug 7 17:38:04 2018 +0000

----------------------------------------------------------------------
 .../impala/analysis/ComputeStatsStmt.java       |  8 ++-
 .../org/apache/impala/catalog/FeFsTable.java    | 58 ++++++++++++++++----
 .../org/apache/impala/catalog/HdfsTable.java    | 46 ++--------------
 .../impala/catalog/local/LocalFsTable.java      | 12 ----
 .../org/apache/impala/planner/HdfsScanNode.java |  4 +-
 .../impala/planner/StatsExtrapolationTest.java  |  4 +-
 6 files changed, 62 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
index 4b515e9..098c862 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
@@ -497,7 +497,8 @@ public class ComputeStatsStmt extends StatementBase {
       // Not computing incremental stats.
       expectAllPartitions_ = true;
       if (table_ instanceof FeFsTable) {
-        expectAllPartitions_ = !((FeFsTable) 
table_).isStatsExtrapolationEnabled();
+        expectAllPartitions_ = !FeFsTable.Utils.isStatsExtrapolationEnabled(
+            (FeFsTable) table_);
       }
     }
 
@@ -593,7 +594,7 @@ public class ComputeStatsStmt extends StatementBase {
       throw new AnalysisException("TABLESAMPLE is only supported on HDFS 
tables.");
     }
     FeFsTable hdfsTable = (FeFsTable) table_;
-    if (!hdfsTable.isStatsExtrapolationEnabled()) {
+    if (!FeFsTable.Utils.isStatsExtrapolationEnabled(hdfsTable)) {
       throw new AnalysisException(String.format(
           "COMPUTE STATS TABLESAMPLE requires stats extrapolation which is 
disabled.\n" +
           "Stats extrapolation can be enabled service-wide with %s=true or by 
altering " +
@@ -718,7 +719,8 @@ public class ComputeStatsStmt extends StatementBase {
    */
   private boolean updateTableStatsOnly() {
     if (!(table_ instanceof FeFsTable)) return true;
-    return !isIncremental_ && ((FeFsTable) 
table_).isStatsExtrapolationEnabled();
+    return !isIncremental_ && FeFsTable.Utils.isStatsExtrapolationEnabled(
+        (FeFsTable) table_);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java 
b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java
index 4dbbe0b..faaf5bd 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java
@@ -25,9 +25,11 @@ import java.util.TreeMap;
 
 import org.apache.impala.analysis.LiteralExpr;
 import org.apache.impala.catalog.HdfsPartition.FileDescriptor;
+import org.apache.impala.service.BackendConfig;
 import org.apache.impala.thrift.TNetworkAddress;
 import org.apache.impala.thrift.TPartitionKeyValue;
 import org.apache.impala.thrift.TResultSet;
+import org.apache.impala.thrift.TTableStats;
 import org.apache.impala.util.ListMap;
 
 /**
@@ -119,17 +121,6 @@ public interface FeFsTable extends FeTable {
   public String getFirstLocationWithoutWriteAccess();
 
   /**
-   * @param totalBytes_ the known number of bytes in the table
-   * @return Returns an estimated row count for the given number of file bytes
-   */
-  public long getExtrapolatedNumRows(long totalBytes);
-
-  /**
-   * @return true if stats extrapolation is enabled for this table, false 
otherwise.
-   */
-  boolean isStatsExtrapolationEnabled();
-
-  /**
    * @return statistics on this table as a tabular result set. Used for the
    * SHOW TABLE STATS statement. The schema of the returned TResultSet is set
    * inside this method.
@@ -198,4 +189,47 @@ public interface FeFsTable extends FeTable {
    */
   ListMap<TNetworkAddress> getHostIndex();
 
- }
+  /**
+   * Utility functions for operating on FeFsTable. When we move fully to Java 
8,
+   * these can become default methods of the interface.
+   */
+  abstract class Utils {
+    /**
+     * Returns true if stats extrapolation is enabled for this table, false 
otherwise.
+     * Reconciles the Impalad-wide --enable_stats_extrapolation flag and the
+     * TBL_PROP_ENABLE_STATS_EXTRAPOLATION table property
+     */
+    public static boolean isStatsExtrapolationEnabled(FeFsTable table) {
+      org.apache.hadoop.hive.metastore.api.Table msTbl = 
table.getMetaStoreTable();
+      String propVal = msTbl.getParameters().get(
+          HdfsTable.TBL_PROP_ENABLE_STATS_EXTRAPOLATION);
+      if (propVal == null) return 
BackendConfig.INSTANCE.isStatsExtrapolationEnabled();
+      return Boolean.parseBoolean(propVal);
+    }
+
+    /**
+     * Returns an estimated row count for the given number of file bytes. The 
row count is
+     * extrapolated using the table-level row count and file bytes statistics.
+     * Returns zero only if the given file bytes is zero.
+     * Returns -1 if:
+     * - stats extrapolation has been disabled
+     * - the given file bytes statistic is negative
+     * - the row count or the file byte statistic is missing
+     * - the file bytes statistic is zero or negative
+     * - the row count statistic is zero and the file bytes is non-zero
+     * Otherwise, returns a value >= 1.
+     */
+    public static long getExtrapolatedNumRows(FeFsTable table, long fileBytes) 
{
+      if (!isStatsExtrapolationEnabled(table)) return -1;
+      if (fileBytes == 0) return 0;
+      if (fileBytes < 0) return -1;
+
+      TTableStats tableStats = table.getTTableStats();
+      if (tableStats.num_rows < 0 || tableStats.total_file_bytes <= 0) return 
-1;
+      if (tableStats.num_rows == 0 && tableStats.total_file_bytes != 0) return 
-1;
+      double rowsPerByte = tableStats.num_rows / (double) 
tableStats.total_file_bytes;
+      double extrapolatedNumRows = fileBytes * rowsPerByte;
+      return (long) Math.max(1, Math.round(extrapolatedNumRows));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java 
b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 3d65cbb..b704a81 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -1940,43 +1940,6 @@ public class HdfsTable extends Table implements 
FeFsTable {
     return new Pair<String, LiteralExpr>(value, expr);
   }
 
-  /**
-   * Returns an estimated row count for the given number of file bytes. The 
row count is
-   * extrapolated using the table-level row count and file bytes statistics.
-   * Returns zero only if the given file bytes is zero.
-   * Returns -1 if:
-   * - stats extrapolation has been disabled
-   * - the given file bytes statistic is negative
-   * - the row count or the file byte statistic is missing
-   * - the file bytes statistic is zero or negative
-   * - the row count statistic is zero and the file bytes is non-zero
-   * Otherwise, returns a value >= 1.
-   */
-  @Override // FeFsTable
-  public long getExtrapolatedNumRows(long fileBytes) {
-    if (!isStatsExtrapolationEnabled()) return -1;
-    if (fileBytes == 0) return 0;
-    if (fileBytes < 0) return -1;
-    if (tableStats_.num_rows < 0 || tableStats_.total_file_bytes <= 0) return 
-1;
-    if (tableStats_.num_rows == 0 && tableStats_.total_file_bytes != 0) return 
-1;
-    double rowsPerByte = tableStats_.num_rows / (double) 
tableStats_.total_file_bytes;
-    double extrapolatedNumRows = fileBytes * rowsPerByte;
-    return (long) Math.max(1, Math.round(extrapolatedNumRows));
-  }
-
-  /**
-   * Returns true if stats extrapolation is enabled for this table, false 
otherwise.
-   * Reconciles the Impalad-wide --enable_stats_extrapolation flag and the
-   * TBL_PROP_ENABLE_STATS_EXTRAPOLATION table property
-   */
-  @Override // FeFsTable
-  public boolean isStatsExtrapolationEnabled() {
-    org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable();
-    String propVal = 
msTbl.getParameters().get(TBL_PROP_ENABLE_STATS_EXTRAPOLATION);
-    if (propVal == null) return 
BackendConfig.INSTANCE.isStatsExtrapolationEnabled();
-    return Boolean.parseBoolean(propVal);
-  }
-
   @Override // FeFsTable
   public TResultSet getTableStats() {
     return getTableStats(this);
@@ -1996,7 +1959,7 @@ public class HdfsTable extends Table implements FeFsTable 
{
       resultSchema.addToColumns(colDesc);
     }
 
-    boolean statsExtrap = table.isStatsExtrapolationEnabled();
+    boolean statsExtrap = Utils.isStatsExtrapolationEnabled(table);
 
     resultSchema.addToColumns(new TColumn("#Rows", Type.BIGINT.toThrift()));
     if (statsExtrap) {
@@ -2036,7 +1999,9 @@ public class HdfsTable extends Table implements FeFsTable 
{
       // Compute and report the extrapolated row count because the set of 
files could
       // have changed since we last computed stats for this partition. We also 
follow
       // this policy during scan-cardinality estimation.
-      if (statsExtrap) rowBuilder.add(table.getExtrapolatedNumRows(size));
+      if (statsExtrap) {
+        rowBuilder.add(Utils.getExtrapolatedNumRows(table, size));
+      }
 
       rowBuilder.add(numFiles).addBytes(size);
       if (!p.isMarkedCached()) {
@@ -2090,7 +2055,8 @@ public class HdfsTable extends Table implements FeFsTable 
{
       // have changed since we last computed stats for this partition. We also 
follow
       // this policy during scan-cardinality estimation.
       if (statsExtrap) {
-        
rowBuilder.add(table.getExtrapolatedNumRows(table.getTotalHdfsBytes()));
+        rowBuilder.add(Utils.getExtrapolatedNumRows(
+            table, table.getTotalHdfsBytes()));
       }
       rowBuilder.add(totalNumFiles)
           .addBytes(totalBytes)

http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java 
b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
index 2f7ae89..a47497a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
@@ -270,18 +270,6 @@ public class LocalFsTable extends LocalTable implements 
FeFsTable {
   }
 
   @Override
-  public long getExtrapolatedNumRows(long totalBytes) {
-    // TODO Auto-generated method stub
-    return -1;
-  }
-
-  @Override
-  public boolean isStatsExtrapolationEnabled() {
-    // TODO Auto-generated method stub
-    return false;
-  }
-
-  @Override
   public TResultSet getTableStats() {
     return HdfsTable.getTableStats(this);
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 151cbe0..706093c 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -998,7 +998,7 @@ public class HdfsScanNode extends ScanNode {
    */
   private void computeCardinalities() {
     // Choose between the extrapolated row count and the one based on stored 
stats.
-    extrapolatedNumRows_ = tbl_.getExtrapolatedNumRows(totalBytes_);
+    extrapolatedNumRows_ = FeFsTable.Utils.getExtrapolatedNumRows(tbl_, 
totalBytes_);
     long statsNumRows = getStatsNumRows();
     if (extrapolatedNumRows_ != -1) {
       // The extrapolated row count is based on the 'totalBytes_' which 
already accounts
@@ -1242,7 +1242,7 @@ public class HdfsScanNode extends ScanNode {
       output.append(getStatsExplainString(detailPrefix));
       output.append("\n");
       String extrapRows = String.valueOf(extrapolatedNumRows_);
-      if (!tbl_.isStatsExtrapolationEnabled()) {
+      if (!FeFsTable.Utils.isStatsExtrapolationEnabled(tbl_)) {
         extrapRows = "disabled";
       } else if (extrapolatedNumRows_ == -1) {
         extrapRows = "unavailable";

http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java
----------------------------------------------------------------------
diff --git 
a/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java 
b/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java
index a0a8566..2582f02 100644
--- a/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java
@@ -23,6 +23,7 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.impala.catalog.FeFsTable;
 import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.common.FrontendTestBase;
@@ -70,7 +71,8 @@ public class StatsExtrapolationTest extends FrontendTestBase {
       long fileBytes, long expectedExtrapNumRows) {
     Preconditions.checkState(tbl instanceof HdfsTable);
     setStats(tbl, rowCount, totalSize);
-    long actualExtrapNumRows = 
((HdfsTable)tbl).getExtrapolatedNumRows(fileBytes);
+    long actualExtrapNumRows = FeFsTable.Utils.getExtrapolatedNumRows(
+        (HdfsTable)tbl, fileBytes);
     assertEquals(expectedExtrapNumRows, actualExtrapNumRows);
   }
 

Reply via email to