Repository: impala Updated Branches: refs/heads/master 6399a65a0 -> 9f5c5e6df
IMPALA-7689: Reduce per column per partition stats estimate size With the improvements in the incremental stats memory representation (IMPALA-7424), the per column per partition stats estimate should be reduced to account for the compressed memory footprint. Doing some experiments on various test tables, I see the size is down by 50-70%. This patch reduces the size estimate by 50% (conservative). Ideally we don't need to estimate on the Catalog server during serialization since we can compute the byte sizes by looping through all the partitions. However this patch retains the current logic to keep it consistent with "compute incremental stats" analysis. Change-Id: I347b41d9b298d7cd73ec812692172e0511415eee Reviewed-on: http://gerrit.cloudera.org:8080/11706 Reviewed-by: Bharath Vissapragada <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/5af5456a Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/5af5456a Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/5af5456a Branch: refs/heads/master Commit: 5af5456a2d95a43ce63f4e364ff0b9631729bb1a Parents: 6399a65 Author: Bharath Vissapragada <[email protected]> Authored: Tue Oct 16 18:26:13 2018 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Thu Oct 18 00:57:47 2018 +0000 ---------------------------------------------------------------------- fe/src/main/java/org/apache/impala/catalog/HdfsTable.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/5af5456a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java index 88c1fd5..1bca8ec 100644 --- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java +++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java @@ -195,8 +195,9 @@ public class HdfsTable extends Table implements FeFsTable { @VisibleForTesting HdfsPartition prototypePartition_; - // Estimate (in bytes) of the incremental stats size per column per partition - public static final long STATS_SIZE_PER_COLUMN_BYTES = 400; + // Empirical estimate (in bytes) of the incremental stats size per column per + // partition. + public static final long STATS_SIZE_PER_COLUMN_BYTES = 200; // Bi-directional map between an integer index and a unique datanode // TNetworkAddresses, each of which contains blocks of 1 or more @@ -1753,8 +1754,6 @@ public class HdfsTable extends Table implements FeFsTable { * fetch from catalogd). */ private boolean shouldSendIncrementalStats(int numPartitions) { - // TODO(bharath): Revisit the constant STATS_SIZE_PER_COLUMN_BYTES after the - // new incremental stats in-memory representation changes. long statsSizeEstimate = numPartitions * getColumns().size() * STATS_SIZE_PER_COLUMN_BYTES; return statsSizeEstimate < BackendConfig.INSTANCE.getIncStatsMaxSize()
