This is an automated email from the ASF dual-hosted git repository.

abstractdog pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 1f20cc34a8a589cea734f7f0b4e2e1431e65f516
Author: Ivan Suller <[email protected]>
AuthorDate: Tue Jun 18 15:54:46 2019 +0200

    HIVE-21842: Code cleanup in 
org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics (Ivan Suller via 
Laszlo Bodor)
    
    Signed-off-by: Laszlo Bodor <[email protected]>
---
 .../apache/hadoop/hive/ql/stats/StatsUtils.java    | 81 ++++++++--------------
 1 file changed, 28 insertions(+), 53 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index e184b9d..e0f401b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -263,7 +263,7 @@ public class StatsUtils {
       //      long nr = getNumRows(conf, schema, neededColumns, table, ds);
       long ds = basicStats.getDataSize();
       long nr = basicStats.getNumRows();
-      List<ColStatistics> colStats = Lists.newArrayList();
+      List<ColStatistics> colStats = Collections.emptyList();
 
       long numErasureCodedFiles = getErasureCodedFiles(table);
 
@@ -303,11 +303,6 @@ public class StatsUtils {
       }
       BasicStats bbs = BasicStats.buildFrom(partStats);
 
-      List<Long> rowCounts = Lists.newArrayList();
-      for (BasicStats basicStats : partStats) {
-        rowCounts.add(basicStats.getNumRows());
-      }
-
       long nr = bbs.getNumRows();
       long ds = bbs.getDataSize();
 
@@ -316,7 +311,7 @@ public class StatsUtils {
       long numErasureCodedFiles = getSumIgnoreNegatives(erasureCodedFiles);
 
       if (nr == 0) {
-        nr=1;
+        nr = 1;
       }
       stats = new Statistics(nr, ds, numErasureCodedFiles);
       stats.setBasicStatsState(bbs.getState());
@@ -331,51 +326,12 @@ public class StatsUtils {
         List<String> partitionCols = getPartitionColumns(schema, 
neededColumns, referencedColumns);
 
         // We will retrieve stats from the metastore only for columns that are 
not cached
-        List<String> neededColsToRetrieve;
-        List<String> partitionColsToRetrieve;
         List<ColStatistics> columnStats = new ArrayList<>();
-        if (colStatsCache != null) {
-          neededColsToRetrieve = new ArrayList<String>(neededColumns.size());
-          for (String colName : neededColumns) {
-            ColStatistics colStats = colStatsCache.getColStats().get(colName);
-            if (colStats == null) {
-              neededColsToRetrieve.add(colName);
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("Stats for column " + colName +
-                    " in table " + table.getCompleteName() + " could not be 
retrieved from cache");
-              }
-            } else {
-              columnStats.add(colStats);
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("Stats for column " + colName +
-                    " in table " + table.getCompleteName() + " retrieved from 
cache");
-              }
-            }
-          }
-          partitionColsToRetrieve = new ArrayList<>(partitionCols.size());
-          for (String colName : partitionCols) {
-            ColStatistics colStats = colStatsCache.getColStats().get(colName);
-            if (colStats == null) {
-              partitionColsToRetrieve.add(colName);
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("Stats for column " + colName +
-                    " in table " + table.getCompleteName() + " could not be 
retrieved from cache");
-              }
-            } else {
-              columnStats.add(colStats);
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("Stats for column " + colName +
-                    " in table " + table.getCompleteName() + " retrieved from 
cache");
-              }
-            }
-          }
-        } else {
-          neededColsToRetrieve = neededColumns;
-          partitionColsToRetrieve = partitionCols;
-        }
+        List<String> neededColsToRetrieve = extractColumnStates(table, 
neededColumns, colStatsCache, columnStats);
+        List<String> partitionColsToRetrieve = extractColumnStates(table, 
partitionCols, colStatsCache, columnStats);
 
         // List of partitions
-        List<String> partNames = new 
ArrayList<String>(partList.getNotDeniedPartns().size());
+        List<String> partNames = new 
ArrayList<>(partList.getNotDeniedPartns().size());
         for (Partition part : partList.getNotDeniedPartns()) {
           partNames.add(part.getName());
         }
@@ -384,15 +340,14 @@ public class StatsUtils {
         // We check the sizes of neededColumns and partNames here. If either
         // size is 0, aggrStats is null after several retries. Thus, we can
         // skip the step to connect to the metastore.
-        if (fetchColStats && neededColsToRetrieve.size() > 0 && 
partNames.size() > 0) {
+        if (fetchColStats && !neededColsToRetrieve.isEmpty() && 
!partNames.isEmpty()) {
           aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), 
table.getTableName(),
               neededColsToRetrieve, partNames, false);
         }
 
         boolean statsRetrieved = aggrStats != null &&
             aggrStats.getColStats() != null && aggrStats.getColStatsSize() != 
0;
-        if (neededColumns.size() == 0 ||
-            (neededColsToRetrieve.size() > 0 && !statsRetrieved)) {
+        if (neededColumns.isEmpty() || (!neededColsToRetrieve.isEmpty() && 
!statsRetrieved)) {
           estimateStatsForMissingCols(neededColsToRetrieve, columnStats, 
table, conf, nr, schema);
           // There are some partitions with no state (or we didn't fetch any 
state).
           // Update the stats with empty list to reflect that in the
@@ -439,7 +394,7 @@ public class StatsUtils {
           }
         }
 
-        if(rowCounts.size() == 0 ) {
+        if (partStats.isEmpty()) {
           // all partitions are filtered by partition pruning
           stats.setBasicStatsState(State.COMPLETE);
         }
@@ -459,6 +414,26 @@ public class StatsUtils {
     return stats;
   }
 
+  private static List<String> extractColumnStates(Table table, List<String> 
columns,
+      ColumnStatsList colStatsCache, List<ColStatistics> columnStats) {
+    if (colStatsCache == null) {
+      return columns;
+    }
+    List<String> neededColsToRetrieve = new ArrayList<>(columns.size());
+    for (String colName : columns) {
+      ColStatistics colStats = colStatsCache.getColStats().get(colName);
+      if (colStats == null) {
+        neededColsToRetrieve.add(colName);
+        LOG.debug("Stats for column {} in table {} could not be retrieved from 
cache", colName,
+            table.getCompleteName());
+      } else {
+        columnStats.add(colStats);
+        LOG.debug("Stats for column {} in table {} retrieved from cache", 
colName, table.getCompleteName());
+      }
+    }
+    return neededColsToRetrieve;
+  }
+
 
   /**
    * Based on the provided column statistics and number of rows, this method 
infers if the column

Reply via email to