IMPALA-7307 (part 1). Support stats extrapolation in LocalCatalog Tested by running 'run-tests.py -k stats_extrap' and the tests passed.
Change-Id: I479b7f517091dd558768601e1e0704a1902b78a5 Reviewed-on: http://gerrit.cloudera.org:8080/10971 Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Reviewed-by: Todd Lipcon <t...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/aa26087f Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/aa26087f Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/aa26087f Branch: refs/heads/master Commit: aa26087fea6f48f3764c9ac39385221737cf4047 Parents: 4aec504 Author: Todd Lipcon <t...@cloudera.com> Authored: Thu Jul 12 16:45:01 2018 -0700 Committer: Todd Lipcon <t...@apache.org> Committed: Tue Aug 7 17:38:04 2018 +0000 ---------------------------------------------------------------------- .../impala/analysis/ComputeStatsStmt.java | 8 ++- .../org/apache/impala/catalog/FeFsTable.java | 58 ++++++++++++++++---- .../org/apache/impala/catalog/HdfsTable.java | 46 ++-------------- .../impala/catalog/local/LocalFsTable.java | 12 ---- .../org/apache/impala/planner/HdfsScanNode.java | 4 +- .../impala/planner/StatsExtrapolationTest.java | 4 +- 6 files changed, 62 insertions(+), 70 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java index 4b515e9..098c862 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java @@ -497,7 +497,8 @@ public class ComputeStatsStmt extends StatementBase { // Not computing incremental stats. expectAllPartitions_ = true; if (table_ instanceof FeFsTable) { - expectAllPartitions_ = !((FeFsTable) table_).isStatsExtrapolationEnabled(); + expectAllPartitions_ = !FeFsTable.Utils.isStatsExtrapolationEnabled( + (FeFsTable) table_); } } @@ -593,7 +594,7 @@ public class ComputeStatsStmt extends StatementBase { throw new AnalysisException("TABLESAMPLE is only supported on HDFS tables."); } FeFsTable hdfsTable = (FeFsTable) table_; - if (!hdfsTable.isStatsExtrapolationEnabled()) { + if (!FeFsTable.Utils.isStatsExtrapolationEnabled(hdfsTable)) { throw new AnalysisException(String.format( "COMPUTE STATS TABLESAMPLE requires stats extrapolation which is disabled.\n" + "Stats extrapolation can be enabled service-wide with %s=true or by altering " + @@ -718,7 +719,8 @@ public class ComputeStatsStmt extends StatementBase { */ private boolean updateTableStatsOnly() { if (!(table_ instanceof FeFsTable)) return true; - return !isIncremental_ && ((FeFsTable) table_).isStatsExtrapolationEnabled(); + return !isIncremental_ && FeFsTable.Utils.isStatsExtrapolationEnabled( + (FeFsTable) table_); } /** http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java index 4dbbe0b..faaf5bd 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/FeFsTable.java @@ -25,9 +25,11 @@ import java.util.TreeMap; import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.catalog.HdfsPartition.FileDescriptor; +import org.apache.impala.service.BackendConfig; import org.apache.impala.thrift.TNetworkAddress; import org.apache.impala.thrift.TPartitionKeyValue; import org.apache.impala.thrift.TResultSet; +import org.apache.impala.thrift.TTableStats; import org.apache.impala.util.ListMap; /** @@ -119,17 +121,6 @@ public interface FeFsTable extends FeTable { public String getFirstLocationWithoutWriteAccess(); /** - * @param totalBytes_ the known number of bytes in the table - * @return Returns an estimated row count for the given number of file bytes - */ - public long getExtrapolatedNumRows(long totalBytes); - - /** - * @return true if stats extrapolation is enabled for this table, false otherwise. - */ - boolean isStatsExtrapolationEnabled(); - - /** * @return statistics on this table as a tabular result set. Used for the * SHOW TABLE STATS statement. The schema of the returned TResultSet is set * inside this method. @@ -198,4 +189,47 @@ public interface FeFsTable extends FeTable { */ ListMap<TNetworkAddress> getHostIndex(); - } + /** + * Utility functions for operating on FeFsTable. When we move fully to Java 8, + * these can become default methods of the interface. + */ + abstract class Utils { + /** + * Returns true if stats extrapolation is enabled for this table, false otherwise. + * Reconciles the Impalad-wide --enable_stats_extrapolation flag and the + * TBL_PROP_ENABLE_STATS_EXTRAPOLATION table property + */ + public static boolean isStatsExtrapolationEnabled(FeFsTable table) { + org.apache.hadoop.hive.metastore.api.Table msTbl = table.getMetaStoreTable(); + String propVal = msTbl.getParameters().get( + HdfsTable.TBL_PROP_ENABLE_STATS_EXTRAPOLATION); + if (propVal == null) return BackendConfig.INSTANCE.isStatsExtrapolationEnabled(); + return Boolean.parseBoolean(propVal); + } + + /** + * Returns an estimated row count for the given number of file bytes. The row count is + * extrapolated using the table-level row count and file bytes statistics. + * Returns zero only if the given file bytes is zero. + * Returns -1 if: + * - stats extrapolation has been disabled + * - the given file bytes statistic is negative + * - the row count or the file byte statistic is missing + * - the file bytes statistic is zero or negative + * - the row count statistic is zero and the file bytes is non-zero + * Otherwise, returns a value >= 1. + */ + public static long getExtrapolatedNumRows(FeFsTable table, long fileBytes) { + if (!isStatsExtrapolationEnabled(table)) return -1; + if (fileBytes == 0) return 0; + if (fileBytes < 0) return -1; + + TTableStats tableStats = table.getTTableStats(); + if (tableStats.num_rows < 0 || tableStats.total_file_bytes <= 0) return -1; + if (tableStats.num_rows == 0 && tableStats.total_file_bytes != 0) return -1; + double rowsPerByte = tableStats.num_rows / (double) tableStats.total_file_bytes; + double extrapolatedNumRows = fileBytes * rowsPerByte; + return (long) Math.max(1, Math.round(extrapolatedNumRows)); + } + } +} http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 3d65cbb..b704a81 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -1940,43 +1940,6 @@ public class HdfsTable extends Table implements FeFsTable { return new Pair<String, LiteralExpr>(value, expr); } - /** - * Returns an estimated row count for the given number of file bytes. The row count is - * extrapolated using the table-level row count and file bytes statistics. - * Returns zero only if the given file bytes is zero. - * Returns -1 if: - * - stats extrapolation has been disabled - * - the given file bytes statistic is negative - * - the row count or the file byte statistic is missing - * - the file bytes statistic is zero or negative - * - the row count statistic is zero and the file bytes is non-zero - * Otherwise, returns a value >= 1. - */ - @Override // FeFsTable - public long getExtrapolatedNumRows(long fileBytes) { - if (!isStatsExtrapolationEnabled()) return -1; - if (fileBytes == 0) return 0; - if (fileBytes < 0) return -1; - if (tableStats_.num_rows < 0 || tableStats_.total_file_bytes <= 0) return -1; - if (tableStats_.num_rows == 0 && tableStats_.total_file_bytes != 0) return -1; - double rowsPerByte = tableStats_.num_rows / (double) tableStats_.total_file_bytes; - double extrapolatedNumRows = fileBytes * rowsPerByte; - return (long) Math.max(1, Math.round(extrapolatedNumRows)); - } - - /** - * Returns true if stats extrapolation is enabled for this table, false otherwise. - * Reconciles the Impalad-wide --enable_stats_extrapolation flag and the - * TBL_PROP_ENABLE_STATS_EXTRAPOLATION table property - */ - @Override // FeFsTable - public boolean isStatsExtrapolationEnabled() { - org.apache.hadoop.hive.metastore.api.Table msTbl = getMetaStoreTable(); - String propVal = msTbl.getParameters().get(TBL_PROP_ENABLE_STATS_EXTRAPOLATION); - if (propVal == null) return BackendConfig.INSTANCE.isStatsExtrapolationEnabled(); - return Boolean.parseBoolean(propVal); - } - @Override // FeFsTable public TResultSet getTableStats() { return getTableStats(this); @@ -1996,7 +1959,7 @@ public class HdfsTable extends Table implements FeFsTable { resultSchema.addToColumns(colDesc); } - boolean statsExtrap = table.isStatsExtrapolationEnabled(); + boolean statsExtrap = Utils.isStatsExtrapolationEnabled(table); resultSchema.addToColumns(new TColumn("#Rows", Type.BIGINT.toThrift())); if (statsExtrap) { @@ -2036,7 +1999,9 @@ public class HdfsTable extends Table implements FeFsTable { // Compute and report the extrapolated row count because the set of files could // have changed since we last computed stats for this partition. We also follow // this policy during scan-cardinality estimation. - if (statsExtrap) rowBuilder.add(table.getExtrapolatedNumRows(size)); + if (statsExtrap) { + rowBuilder.add(Utils.getExtrapolatedNumRows(table, size)); + } rowBuilder.add(numFiles).addBytes(size); if (!p.isMarkedCached()) { @@ -2090,7 +2055,8 @@ public class HdfsTable extends Table implements FeFsTable { // have changed since we last computed stats for this partition. We also follow // this policy during scan-cardinality estimation. if (statsExtrap) { - rowBuilder.add(table.getExtrapolatedNumRows(table.getTotalHdfsBytes())); + rowBuilder.add(Utils.getExtrapolatedNumRows( + table, table.getTotalHdfsBytes())); } rowBuilder.add(totalNumFiles) .addBytes(totalBytes) http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java index 2f7ae89..a47497a 100644 --- a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java @@ -270,18 +270,6 @@ public class LocalFsTable extends LocalTable implements FeFsTable { } @Override - public long getExtrapolatedNumRows(long totalBytes) { - // TODO Auto-generated method stub - return -1; - } - - @Override - public boolean isStatsExtrapolationEnabled() { - // TODO Auto-generated method stub - return false; - } - - @Override public TResultSet getTableStats() { return HdfsTable.getTableStats(this); } http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java index 151cbe0..706093c 100644 --- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java @@ -998,7 +998,7 @@ public class HdfsScanNode extends ScanNode { */ private void computeCardinalities() { // Choose between the extrapolated row count and the one based on stored stats. - extrapolatedNumRows_ = tbl_.getExtrapolatedNumRows(totalBytes_); + extrapolatedNumRows_ = FeFsTable.Utils.getExtrapolatedNumRows(tbl_, totalBytes_); long statsNumRows = getStatsNumRows(); if (extrapolatedNumRows_ != -1) { // The extrapolated row count is based on the 'totalBytes_' which already accounts @@ -1242,7 +1242,7 @@ public class HdfsScanNode extends ScanNode { output.append(getStatsExplainString(detailPrefix)); output.append("\n"); String extrapRows = String.valueOf(extrapolatedNumRows_); - if (!tbl_.isStatsExtrapolationEnabled()) { + if (!FeFsTable.Utils.isStatsExtrapolationEnabled(tbl_)) { extrapRows = "disabled"; } else if (extrapolatedNumRows_ == -1) { extrapRows = "unavailable"; http://git-wip-us.apache.org/repos/asf/impala/blob/aa26087f/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java b/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java index a0a8566..2582f02 100644 --- a/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java +++ b/fe/src/test/java/org/apache/impala/planner/StatsExtrapolationTest.java @@ -23,6 +23,7 @@ import java.util.HashMap; import java.util.Map; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.impala.catalog.FeFsTable; import org.apache.impala.catalog.HdfsTable; import org.apache.impala.catalog.Table; import org.apache.impala.common.FrontendTestBase; @@ -70,7 +71,8 @@ public class StatsExtrapolationTest extends FrontendTestBase { long fileBytes, long expectedExtrapNumRows) { Preconditions.checkState(tbl instanceof HdfsTable); setStats(tbl, rowCount, totalSize); - long actualExtrapNumRows = ((HdfsTable)tbl).getExtrapolatedNumRows(fileBytes); + long actualExtrapNumRows = FeFsTable.Utils.getExtrapolatedNumRows( + (HdfsTable)tbl, fileBytes); assertEquals(expectedExtrapNumRows, actualExtrapNumRows); }