[3/4] hive git commit: HIVE-18036: Stats: Remove usage of clone() methods (Bertalan Kondrat via Zoltan Haindrich)

kgyrtkirk Tue, 05 Dec 2017 01:59:53 -0800

HIVE-18036: Stats: Remove usage of clone() methods (Bertalan Kondrat via Zoltan 
Haindrich)


Signed-off-by: Zoltan Haindrich <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/63f2ec19
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/63f2ec19
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/63f2ec19

Branch: refs/heads/master
Commit: 63f2ec1902016537fcf83262bfcda1d604b236d8
Parents: c673041
Author: Bertalan Kondrat <[email protected]>
Authored: Tue Dec 5 10:44:34 2017 +0100
Committer: Zoltan Haindrich <[email protected]>
Committed: Tue Dec 5 10:44:34 2017 +0100

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/ErrorMsg.java     |   2 -
 .../optimizer/spark/SparkMapJoinOptimizer.java  |   4 -
 .../stats/annotation/StatsRulesProcFactory.java | 557 +++++++++----------
 .../hadoop/hive/ql/plan/ColStatistics.java      |   2 +-
 .../apache/hadoop/hive/ql/plan/Statistics.java  |   9 +-
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  21 +-
 6 files changed, 262 insertions(+), 333 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 2f7284f..6b949d2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -527,8 +527,6 @@ public enum ErrorMsg {
   COLUMNSTATSCOLLECTOR_INVALID_COLUMN(30012, "Column statistics are not 
supported "
       + "for partition columns"),
 
-  STATISTICS_CLONING_FAILED(30013, "Cloning of statistics failed"),
-
   STATSAGGREGATOR_SOURCETASK_NULL(30014, "SourceTask of StatsTask should not 
be null"),
   STATSAGGREGATOR_CONNECTION_ERROR(30015,
       "Stats aggregator of type {0} cannot be connected to", true),

http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
index 8cedbe5..8425911 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java
@@ -242,11 +242,7 @@ public class SparkMapJoinOptimizer implements 
NodeProcessor {
         // Not adding other stats (e.g., # of rows, col stats) since only data 
size is used here
         for (TableScanOperator root : 
OperatorUtils.findOperatorsUpstream(parentOp, TableScanOperator.class)) {
           if (currInputStat == null) {
-            try {
               currInputStat = root.getStatistics().clone();
-            } catch (CloneNotSupportedException e) {
-              throw new 
RuntimeException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
-            }
           } else {
             currInputStat.addBasicStats(root.getStatistics());
           }

http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index 86b8724..fcfdce9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -138,8 +138,6 @@ public class StatsRulesProcFactory {
           LOG.debug("[0] STATS-" + tsop.toString() + " (" + 
table.getTableName() + "): " +
               stats.extendedToString());
         }
-      } catch (CloneNotSupportedException e) {
-        throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
       } catch (HiveException e) {
         LOG.debug("Failed to retrieve stats ",e);
         throw new SemanticException(e);
@@ -177,41 +175,33 @@ public class StatsRulesProcFactory {
       Statistics stats = null;
 
       if (parentStats != null) {
-        try {
-          stats = parentStats.clone();
-        } catch (CloneNotSupportedException e) {
-          throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
-        }
+        stats = parentStats.clone();
       }
 
-      try {
-        if (satisfyPrecondition(parentStats)) {
-          // this will take care of mapping between input column names and 
output column names. The
-          // returned column stats will have the output column names.
-          List<ColStatistics> colStats = 
StatsUtils.getColStatisticsFromExprMap(conf, parentStats,
-              sop.getColumnExprMap(), sop.getSchema());
-          stats.setColumnStats(colStats);
-          // in case of select(*) the data size does not change
-          if (!sop.getConf().isSelectStar() && 
!sop.getConf().isSelStarNoCompute()) {
-            long dataSize = 
StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats);
-            stats.setDataSize(dataSize);
-          }
-          sop.setStatistics(stats);
+      if (satisfyPrecondition(parentStats)) {
+        // this will take care of mapping between input column names and 
output column names. The
+        // returned column stats will have the output column names.
+        List<ColStatistics> colStats = 
StatsUtils.getColStatisticsFromExprMap(conf, parentStats,
+            sop.getColumnExprMap(), sop.getSchema());
+        stats.setColumnStats(colStats);
+        // in case of select(*) the data size does not change
+        if (!sop.getConf().isSelectStar() && 
!sop.getConf().isSelStarNoCompute()) {
+          long dataSize = 
StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats);
+          stats.setDataSize(dataSize);
+        }
+        sop.setStatistics(stats);
 
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("[0] STATS-" + sop.toString() + ": " + 
stats.extendedToString());
-          }
-        } else {
-          if (parentStats != null) {
-            sop.setStatistics(parentStats.clone());
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("[0] STATS-" + sop.toString() + ": " + 
stats.extendedToString());
+        }
+      } else {
+        if (parentStats != null) {
+          sop.setStatistics(parentStats.clone());
 
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("[1] STATS-" + sop.toString() + ": " + 
parentStats.extendedToString());
-            }
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[1] STATS-" + sop.toString() + ": " + 
parentStats.extendedToString());
           }
         }
-      } catch (CloneNotSupportedException e) {
-        throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
       }
       return null;
     }
@@ -276,51 +266,48 @@ public class StatsRulesProcFactory {
         neededCols = tsop.getNeededColumns();
       }
 
-      try {
-        if (parentStats != null) {
-          ExprNodeDesc pred = fop.getConf().getPredicate();
 
-          // evaluate filter expression and update statistics
-          long newNumRows = evaluateExpression(parentStats, pred, aspCtx,
-              neededCols, fop, parentStats.getNumRows());
-          Statistics st = parentStats.clone();
-
-          if (satisfyPrecondition(parentStats)) {
-
-            // update statistics based on column statistics.
-            // OR conditions keeps adding the stats independently, this may
-            // result in number of rows getting more than the input rows in
-            // which case stats need not be updated
-            if (newNumRows <= parentStats.getNumRows()) {
-              updateStats(st, newNumRows, true, fop);
-            }
+      if (parentStats != null) {
+        ExprNodeDesc pred = fop.getConf().getPredicate();
 
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("[0] STATS-" + fop.toString() + ": " + 
st.extendedToString());
-            }
-          } else {
+        // evaluate filter expression and update statistics
+        long newNumRows = evaluateExpression(parentStats, pred, aspCtx,
+            neededCols, fop, parentStats.getNumRows());
+        Statistics st = parentStats.clone();
 
-            // update only the basic statistics in the absence of column 
statistics
-            if (newNumRows <= parentStats.getNumRows()) {
-              updateStats(st, newNumRows, false, fop);
-            }
+        if (satisfyPrecondition(parentStats)) {
 
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("[1] STATS-" + fop.toString() + ": " + 
st.extendedToString());
-            }
+          // update statistics based on column statistics.
+          // OR conditions keeps adding the stats independently, this may
+          // result in number of rows getting more than the input rows in
+          // which case stats need not be updated
+          if (newNumRows <= parentStats.getNumRows()) {
+            updateStats(st, newNumRows, true, fop);
+          }
+
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[0] STATS-" + fop.toString() + ": " + 
st.extendedToString());
+          }
+        } else {
+
+          // update only the basic statistics in the absence of column 
statistics
+          if (newNumRows <= parentStats.getNumRows()) {
+            updateStats(st, newNumRows, false, fop);
+          }
+
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[1] STATS-" + fop.toString() + ": " + 
st.extendedToString());
           }
-          fop.setStatistics(st);
-          aspCtx.setAndExprStats(null);
         }
-      } catch (CloneNotSupportedException e) {
-        throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+        fop.setStatistics(st);
+        aspCtx.setAndExprStats(null);
       }
       return null;
     }
 
     protected long evaluateExpression(Statistics stats, ExprNodeDesc pred,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols,
-        Operator<?> op, long currNumRows) throws CloneNotSupportedException, 
SemanticException {
+        Operator<?> op, long currNumRows) throws SemanticException {
       long newNumRows = 0;
       Statistics andStats = null;
 
@@ -505,7 +492,7 @@ public class StatsRulesProcFactory {
     }
 
     private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long 
currNumRows, AnnotateStatsProcCtx aspCtx,
-            List<String> neededCols, Operator<?> op) throws SemanticException, 
CloneNotSupportedException {
+            List<String> neededCols, Operator<?> op) throws SemanticException {
       final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred;
       final boolean invert = Boolean.TRUE.equals(
           ((ExprNodeConstantDesc) fd.getChildren().get(0)).getValue()); // 
boolean invert (not)
@@ -538,7 +525,7 @@ public class StatsRulesProcFactory {
 
     private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, long 
currNumRows,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols, Operator<?> op)
-        throws CloneNotSupportedException, SemanticException {
+        throws SemanticException {
 
       long numRows = currNumRows;
 
@@ -837,7 +824,7 @@ public class StatsRulesProcFactory {
 
     private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
         AnnotateStatsProcCtx aspCtx, List<String> neededCols,
-        Operator<?> op, long currNumRows) throws CloneNotSupportedException, 
SemanticException {
+        Operator<?> op, long currNumRows) throws SemanticException {
 
       long numRows = currNumRows;
 
@@ -1066,210 +1053,206 @@ public class StatsRulesProcFactory {
             containsGroupingSet + " sizeOfGroupingSet: " + sizeOfGroupingSet);
       }
 
-      try {
-        // satisfying precondition means column statistics is available
-        if (satisfyPrecondition(parentStats)) {
+      // satisfying precondition means column statistics is available
+      if (satisfyPrecondition(parentStats)) {
 
-          // check if map side aggregation is possible or not based on column 
stats
-          hashAgg = checkMapSideAggregation(gop, colStats, conf);
+        // check if map side aggregation is possible or not based on column 
stats
+        hashAgg = checkMapSideAggregation(gop, colStats, conf);
 
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("STATS-" + gop.toString() + " hashAgg: " + hashAgg);
-          }
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("STATS-" + gop.toString() + " hashAgg: " + hashAgg);
+        }
 
-          stats = parentStats.clone();
-          stats.setColumnStats(colStats);
-          long ndvProduct = 1;
-          final long parentNumRows = stats.getNumRows();
+        stats = parentStats.clone();
+        stats.setColumnStats(colStats);
+        long ndvProduct = 1;
+        final long parentNumRows = stats.getNumRows();
 
-          // compute product of distinct values of grouping columns
-          for (ColStatistics cs : colStats) {
-            if (cs != null) {
-              long ndv = cs.getCountDistint();
-              if (cs.getNumNulls() > 0) {
-                ndv = StatsUtils.safeAdd(ndv, 1);
-              }
-              ndvProduct = StatsUtils.safeMult(ndvProduct, ndv);
+        // compute product of distinct values of grouping columns
+        for (ColStatistics cs : colStats) {
+          if (cs != null) {
+            long ndv = cs.getCountDistint();
+            if (cs.getNumNulls() > 0) {
+              ndv = StatsUtils.safeAdd(ndv, 1);
+            }
+            ndvProduct = StatsUtils.safeMult(ndvProduct, ndv);
+          } else {
+            if 
(parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) {
+              // the column must be an aggregate column inserted by GBY. We
+              // don't have to account for this column when computing product
+              // of NDVs
+              continue;
             } else {
-              if 
(parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) {
-                // the column must be an aggregate column inserted by GBY. We
-                // don't have to account for this column when computing product
-                // of NDVs
-                continue;
-              } else {
-                // partial column statistics on grouping attributes case.
-                // if column statistics on grouping attribute is missing, then
-                // assume worst case.
-                // GBY rule will emit half the number of rows if ndvProduct is 0
-                ndvProduct = 0;
-              }
-              break;
+              // partial column statistics on grouping attributes case.
+              // if column statistics on grouping attribute is missing, then
+              // assume worst case.
+              // GBY rule will emit half the number of rows if ndvProduct is 0
+              ndvProduct = 0;
             }
+            break;
           }
+        }
 
-          // if ndvProduct is 0 then column stats state must be partial and we 
are missing
-          // column stats for a group by column
-          if (ndvProduct == 0) {
-            ndvProduct = parentNumRows / 2;
+        // if ndvProduct is 0 then column stats state must be partial and we 
are missing
+        // column stats for a group by column
+        if (ndvProduct == 0) {
+          ndvProduct = parentNumRows / 2;
 
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("STATS-" + gop.toString() + ": ndvProduct became 0 as 
some column does not" +
-                  " have stats. ndvProduct changed to: " + ndvProduct);
-            }
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("STATS-" + gop.toString() + ": ndvProduct became 0 as 
some column does not" +
+                " have stats. ndvProduct changed to: " + ndvProduct);
           }
+        }
 
-          if (interReduction) {
-
-            if (hashAgg) {
-              if (containsGroupingSet) {
-                // Case 4: column stats, hash aggregation, grouping sets
-                cardinality = Math.min(
-                    (StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet)) / 
2,
-                    StatsUtils.safeMult(StatsUtils.safeMult(ndvProduct, 
parallelism), sizeOfGroupingSet));
+        if (interReduction) {
 
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug("[Case 4] STATS-" + gop.toString() + ": 
cardinality: " + cardinality);
-                }
-              } else {
-                // Case 3: column stats, hash aggregation, NO grouping sets
-                cardinality = Math.min(parentNumRows / 2, 
StatsUtils.safeMult(ndvProduct, parallelism));
+          if (hashAgg) {
+            if (containsGroupingSet) {
+              // Case 4: column stats, hash aggregation, grouping sets
+              cardinality = Math.min(
+                  (StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet)) / 2,
+                  StatsUtils.safeMult(StatsUtils.safeMult(ndvProduct, 
parallelism), sizeOfGroupingSet));
 
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug("[Case 3] STATS-" + gop.toString() + ": 
cardinality: " + cardinality);
-                }
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("[Case 4] STATS-" + gop.toString() + ": cardinality: 
" + cardinality);
               }
             } else {
-              if (containsGroupingSet) {
-                // Case 6: column stats, NO hash aggregation, grouping sets
-                cardinality = StatsUtils.safeMult(parentNumRows, 
sizeOfGroupingSet);
+              // Case 3: column stats, hash aggregation, NO grouping sets
+              cardinality = Math.min(parentNumRows / 2, 
StatsUtils.safeMult(ndvProduct, parallelism));
 
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug("[Case 6] STATS-" + gop.toString() + ": 
cardinality: " + cardinality);
-                }
-              } else {
-                // Case 5: column stats, NO hash aggregation, NO grouping sets
-                cardinality = parentNumRows;
-
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug("[Case 5] STATS-" + gop.toString() + ": 
cardinality: " + cardinality);
-                }
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("[Case 3] STATS-" + gop.toString() + ": cardinality: 
" + cardinality);
               }
             }
           } else {
-
-            // in reduce side GBY, we don't know if the grouping set was 
present or not. so get it
-            // from map side GBY
-            GroupByOperator mGop = 
OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class);
-            if (mGop != null) {
-              containsGroupingSet = mGop.getConf().isGroupingSetsPresent();
-            }
-
             if (containsGroupingSet) {
-              // Case 8: column stats, grouping sets
-              sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size();
-              cardinality = Math.min(parentNumRows, 
StatsUtils.safeMult(ndvProduct, sizeOfGroupingSet));
+              // Case 6: column stats, NO hash aggregation, grouping sets
+              cardinality = StatsUtils.safeMult(parentNumRows, 
sizeOfGroupingSet);
 
               if (LOG.isDebugEnabled()) {
-                LOG.debug("[Case 8] STATS-" + gop.toString() + ": cardinality: 
" + cardinality);
+                LOG.debug("[Case 6] STATS-" + gop.toString() + ": cardinality: 
" + cardinality);
               }
             } else {
-              // Case 9: column stats, NO grouping sets
-              cardinality = Math.min(parentNumRows, ndvProduct);
+              // Case 5: column stats, NO hash aggregation, NO grouping sets
+              cardinality = parentNumRows;
 
               if (LOG.isDebugEnabled()) {
-                LOG.debug("[Case 9] STATS-" + gop.toString() + ": cardinality: 
" + cardinality);
+                LOG.debug("[Case 5] STATS-" + gop.toString() + ": cardinality: 
" + cardinality);
               }
             }
           }
-
-          // update stats, but don't update NDV as it will not change
-          updateStats(stats, cardinality, true, gop, false);
         } else {
 
-          // NO COLUMN STATS
-          if (parentStats != null) {
+          // in reduce side GBY, we don't know if the grouping set was present 
or not. so get it
+          // from map side GBY
+          GroupByOperator mGop = 
OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class);
+          if (mGop != null) {
+            containsGroupingSet = mGop.getConf().isGroupingSetsPresent();
+          }
+
+          if (containsGroupingSet) {
+            // Case 8: column stats, grouping sets
+            sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size();
+            cardinality = Math.min(parentNumRows, 
StatsUtils.safeMult(ndvProduct, sizeOfGroupingSet));
 
-            stats = parentStats.clone();
-            final long parentNumRows = stats.getNumRows();
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("[Case 8] STATS-" + gop.toString() + ": cardinality: " 
+ cardinality);
+            }
+          } else {
+            // Case 9: column stats, NO grouping sets
+            cardinality = Math.min(parentNumRows, ndvProduct);
 
-            // if we don't have column stats, we just assume hash aggregation 
is disabled
-            if (interReduction) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("[Case 9] STATS-" + gop.toString() + ": cardinality: " 
+ cardinality);
+            }
+          }
+        }
 
-              if (containsGroupingSet) {
-                // Case 2: NO column stats, NO hash aggregation, grouping sets
-                cardinality = StatsUtils.safeMult(parentNumRows, 
sizeOfGroupingSet);
+        // update stats, but don't update NDV as it will not change
+        updateStats(stats, cardinality, true, gop, false);
+      } else {
 
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug("[Case 2] STATS-" + gop.toString() + ": 
cardinality: " + cardinality);
-                }
-              } else {
-                // Case 1: NO column stats, NO hash aggregation, NO grouping 
sets
-                cardinality = parentNumRows;
+        // NO COLUMN STATS
+        if (parentStats != null) {
 
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug("[Case 1] STATS-" + gop.toString() + ": 
cardinality: " + cardinality);
-                }
+          stats = parentStats.clone();
+          final long parentNumRows = stats.getNumRows();
+
+          // if we don't have column stats, we just assume hash aggregation is 
disabled
+          if (interReduction) {
+
+            if (containsGroupingSet) {
+              // Case 2: NO column stats, NO hash aggregation, grouping sets
+              cardinality = StatsUtils.safeMult(parentNumRows, 
sizeOfGroupingSet);
+
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("[Case 2] STATS-" + gop.toString() + ": cardinality: 
" + cardinality);
               }
             } else {
-
-              // Case 7: NO column stats
-              cardinality = parentNumRows / 2;
+              // Case 1: NO column stats, NO hash aggregation, NO grouping sets
+              cardinality = parentNumRows;
 
               if (LOG.isDebugEnabled()) {
-                LOG.debug("[Case 7] STATS-" + gop.toString() + ": cardinality: 
" + cardinality);
+                LOG.debug("[Case 1] STATS-" + gop.toString() + ": cardinality: 
" + cardinality);
               }
             }
+          } else {
 
-            updateStats(stats, cardinality, false, gop);
-          }
-        }
+            // Case 7: NO column stats
+            cardinality = parentNumRows / 2;
 
-        // if UDAFs are present, new columns needs to be added
-        if (!aggDesc.isEmpty() && stats != null) {
-          List<ColStatistics> aggColStats = Lists.newArrayList();
-          for (ColumnInfo ci : rs.getSignature()) {
-
-            // if the columns in row schema is not contained in column
-            // expression map, then those are the aggregate columns that
-            // are added GBY operator. we will estimate the column statistics
-            // for those newly added columns
-            if (!colExprMap.containsKey(ci.getInternalName())) {
-              String colName = ci.getInternalName();
-              String colType = ci.getTypeName();
-              ColStatistics cs = new ColStatistics(colName, colType);
-              cs.setCountDistint(stats.getNumRows());
-              cs.setNumNulls(0);
-              cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, 
ci.getObjectInspector(), colType));
-              aggColStats.add(cs);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("[Case 7] STATS-" + gop.toString() + ": cardinality: " 
+ cardinality);
             }
           }
 
-          // add the new aggregate column and recompute data size
-          if (aggColStats.size() > 0) {
-            stats.addToColumnStats(aggColStats);
+          updateStats(stats, cardinality, false, gop);
+        }
+      }
 
-            // only if the column stats is available, update the data size from
-            // the column stats
-            if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) {
-              updateStats(stats, stats.getNumRows(), true, gop);
-            }
-          }
+      // if UDAFs are present, new columns needs to be added
+      if (!aggDesc.isEmpty() && stats != null) {
+        List<ColStatistics> aggColStats = Lists.newArrayList();
+        for (ColumnInfo ci : rs.getSignature()) {
 
-          // if UDAF present and if column expression map is empty then it must
-          // be full aggregation query like count(*) in which case number of
-          // rows will be 1
-          if (colExprMap.isEmpty()) {
-            updateStats(stats, 1, true, gop);
+          // if the columns in row schema is not contained in column
+          // expression map, then those are the aggregate columns that
+          // are added GBY operator. we will estimate the column statistics
+          // for those newly added columns
+          if (!colExprMap.containsKey(ci.getInternalName())) {
+            String colName = ci.getInternalName();
+            String colType = ci.getTypeName();
+            ColStatistics cs = new ColStatistics(colName, colType);
+            cs.setCountDistint(stats.getNumRows());
+            cs.setNumNulls(0);
+            cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, 
ci.getObjectInspector(), colType));
+            aggColStats.add(cs);
           }
         }
 
-        gop.setStatistics(stats);
+        // add the new aggregate column and recompute data size
+        if (aggColStats.size() > 0) {
+          stats.addToColumnStats(aggColStats);
+
+          // only if the column stats is available, update the data size from
+          // the column stats
+          if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) {
+            updateStats(stats, stats.getNumRows(), true, gop);
+          }
+        }
 
-        if (LOG.isDebugEnabled() && stats != null) {
-          LOG.debug("[0] STATS-" + gop.toString() + ": " + 
stats.extendedToString());
+        // if UDAF present and if column expression map is empty then it must
+        // be full aggregation query like count(*) in which case number of
+        // rows will be 1
+        if (colExprMap.isEmpty()) {
+          updateStats(stats, 1, true, gop);
         }
-      } catch (CloneNotSupportedException e) {
-        throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+      }
+
+      gop.setStatistics(stats);
+
+      if (LOG.isDebugEnabled() && stats != null) {
+        LOG.debug("[0] STATS-" + gop.toString() + ": " + 
stats.extendedToString());
       }
       return null;
     }
@@ -1470,11 +1453,7 @@ public class StatsRulesProcFactory {
         for (int pos = 0; pos < parents.size(); pos++) {
           ReduceSinkOperator parent = (ReduceSinkOperator) 
jop.getParentOperators().get(pos);
           Statistics parentStats;
-          try {
-            parentStats = parent.getStatistics().clone();
-          } catch (CloneNotSupportedException e) {
-            throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
-          }
+          parentStats = parent.getStatistics().clone();
           keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf()
               .getOutputKeyColumnNames());
 
@@ -1581,12 +1560,8 @@ public class StatsRulesProcFactory {
             pred = jop.getConf().getResidualFilterExprs().get(0);
           }
           // evaluate filter expression and update statistics
-          try {
-            newNumRows = evaluateExpression(stats, pred,
-                aspCtx, jop.getSchema().getColumnNames(), jop, 
stats.getNumRows());
-          } catch (CloneNotSupportedException e) {
-            throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
-          }
+          newNumRows = evaluateExpression(stats, pred,
+              aspCtx, jop.getSchema().getColumnNames(), jop, 
stats.getNumRows());
           // update statistics based on column statistics.
           // OR conditions keeps adding the stats independently, this may
           // result in number of rows getting more than the input rows in
@@ -1677,12 +1652,8 @@ public class StatsRulesProcFactory {
             pred = jop.getConf().getResidualFilterExprs().get(0);
           }
           // evaluate filter expression and update statistics
-          try {
             newNumRows = evaluateExpression(wcStats, pred,
                 aspCtx, jop.getSchema().getColumnNames(), jop, 
wcStats.getNumRows());
-          } catch (CloneNotSupportedException e) {
-            throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
-          }
           // update only the basic statistics in the absence of column 
statistics
           if (newNumRows <= joinRowCount) {
             updateStats(wcStats, newNumRows, false, jop);
@@ -2242,42 +2213,37 @@ public class StatsRulesProcFactory {
       LimitOperator lop = (LimitOperator) nd;
       Operator<? extends OperatorDesc> parent = 
lop.getParentOperators().get(0);
       Statistics parentStats = parent.getStatistics();
+      long limit = -1;
+      limit = lop.getConf().getLimit();
 
-      try {
-        long limit = -1;
-        limit = lop.getConf().getLimit();
+      if (satisfyPrecondition(parentStats)) {
+        Statistics stats = parentStats.clone();
+        List<ColStatistics> colStats = 
StatsUtils.getColStatisticsUpdatingTableAlias(
+                parentStats, lop.getSchema());
+        stats.setColumnStats(colStats);
 
-        if (satisfyPrecondition(parentStats)) {
-          Statistics stats = parentStats.clone();
-          List<ColStatistics> colStats = 
StatsUtils.getColStatisticsUpdatingTableAlias(
-                  parentStats, lop.getSchema());
-          stats.setColumnStats(colStats);
-
-          // if limit is greater than available rows then do not update
-          // statistics
-          if (limit <= parentStats.getNumRows()) {
-            updateStats(stats, limit, true, lop);
-          }
-          lop.setStatistics(stats);
+        // if limit is greater than available rows then do not update
+        // statistics
+        if (limit <= parentStats.getNumRows()) {
+          updateStats(stats, limit, true, lop);
+        }
+        lop.setStatistics(stats);
 
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("[0] STATS-" + lop.toString() + ": " + 
stats.extendedToString());
-          }
-        } else {
-          if (parentStats != null) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("[0] STATS-" + lop.toString() + ": " + 
stats.extendedToString());
+        }
+      } else {
+        if (parentStats != null) {
 
-            // in the absence of column statistics, compute data size based on
-            // based on average row size
-            limit = StatsUtils.getMaxIfOverflow(limit);
-            Statistics wcStats = parentStats.scaleToRowCount(limit);
-            lop.setStatistics(wcStats);
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("[1] STATS-" + lop.toString() + ": " + 
wcStats.extendedToString());
-            }
+          // in the absence of column statistics, compute data size based on
+          // based on average row size
+          limit = StatsUtils.getMaxIfOverflow(limit);
+          Statistics wcStats = parentStats.scaleToRowCount(limit);
+          lop.setStatistics(wcStats);
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[1] STATS-" + lop.toString() + ": " + 
wcStats.extendedToString());
           }
         }
-      } catch (CloneNotSupportedException e) {
-        throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
       }
       return null;
     }
@@ -2302,48 +2268,43 @@ public class StatsRulesProcFactory {
       if (parentStats != null) {
         AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
         HiveConf conf = aspCtx.getConf();
-
         List<String> outKeyColNames = rop.getConf().getOutputKeyColumnNames();
         List<String> outValueColNames = 
rop.getConf().getOutputValueColumnNames();
         Map<String, ExprNodeDesc> colExprMap = rop.getColumnExprMap();
-        try {
-          Statistics outStats = parentStats.clone();
-          if (satisfyPrecondition(parentStats)) {
-            List<ColStatistics> colStats = Lists.newArrayList();
-            for (String key : outKeyColNames) {
-              String prefixedKey = Utilities.ReduceField.KEY.toString() + "." 
+ key;
-              ExprNodeDesc end = colExprMap.get(prefixedKey);
-              if (end != null) {
-                ColStatistics cs = StatsUtils
-                    .getColStatisticsFromExpression(conf, parentStats, end);
-                if (cs != null) {
-                  cs.setColumnName(prefixedKey);
-                  colStats.add(cs);
-                }
+        Statistics outStats = parentStats.clone();
+        if (satisfyPrecondition(parentStats)) {
+          List<ColStatistics> colStats = Lists.newArrayList();
+          for (String key : outKeyColNames) {
+            String prefixedKey = Utilities.ReduceField.KEY.toString() + "." + 
key;
+            ExprNodeDesc end = colExprMap.get(prefixedKey);
+            if (end != null) {
+              ColStatistics cs = StatsUtils
+                  .getColStatisticsFromExpression(conf, parentStats, end);
+              if (cs != null) {
+                cs.setColumnName(prefixedKey);
+                colStats.add(cs);
               }
             }
+          }
 
-            for (String val : outValueColNames) {
-              String prefixedVal = Utilities.ReduceField.VALUE.toString() + 
"." + val;
-              ExprNodeDesc end = colExprMap.get(prefixedVal);
-              if (end != null) {
-                ColStatistics cs = StatsUtils
-                    .getColStatisticsFromExpression(conf, parentStats, end);
-                if (cs != null) {
-                  cs.setColumnName(prefixedVal);
-                  colStats.add(cs);
-                }
+          for (String val : outValueColNames) {
+            String prefixedVal = Utilities.ReduceField.VALUE.toString() + "." 
+ val;
+            ExprNodeDesc end = colExprMap.get(prefixedVal);
+            if (end != null) {
+              ColStatistics cs = StatsUtils
+                  .getColStatisticsFromExpression(conf, parentStats, end);
+              if (cs != null) {
+                cs.setColumnName(prefixedVal);
+                colStats.add(cs);
               }
             }
-
-            outStats.setColumnStats(colStats);
-          }
-          rop.setStatistics(outStats);
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("[0] STATS-" + rop.toString() + ": " + 
outStats.extendedToString());
           }
-        } catch (CloneNotSupportedException e) {
-          throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+
+          outStats.setColumnStats(colStats);
+        }
+        rop.setStatistics(outStats);
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("[0] STATS-" + rop.toString() + ": " + 
outStats.extendedToString());
         }
       }
       return null;
@@ -2376,11 +2337,7 @@ public class StatsRulesProcFactory {
               Statistics parentStats = parent.getStatistics();
 
               if (stats == null) {
-                try {
-                  stats = parentStats.clone();
-                } catch (CloneNotSupportedException e) {
-                  throw new 
SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
-                }
+                stats = parentStats.clone();
               } else {
                 stats.addBasicStats(parentStats);
               }

http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
index 1aafa9e..aa0559d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
@@ -140,7 +140,7 @@ public class ColStatistics {
   }
 
   @Override
-  public ColStatistics clone() throws CloneNotSupportedException {
+  public ColStatistics clone() {
     ColStatistics clone = new ColStatistics(colName, colType);
     clone.setAvgColLen(avgColLen);
     clone.setCountDistint(countDistint);

http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index 82df960..013fccc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -167,7 +167,7 @@ public class Statistics implements Serializable {
   }
 
   @Override
-  public Statistics clone() throws CloneNotSupportedException {
+  public Statistics clone() {
     Statistics clone = new Statistics(numRows, dataSize);
     clone.setRunTimeNumRows(runTimeNumRows);
     clone.setBasicStatsState(basicStatsState);
@@ -302,12 +302,7 @@ public class Statistics implements Serializable {
 
   public Statistics scaleToRowCount(long newRowCount) {
     Statistics ret;
-    try {
-      ret = clone();
-    } catch (CloneNotSupportedException e) {
-      // FIXME: remove the Colneable usage 
-      return new Statistics(0,0);
-    }
+    ret = clone();
     if(numRows == 0 || newRowCount >= numRows) {
       return ret;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/63f2ec19/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 20c2f94..e42614c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -1556,11 +1556,7 @@ public class StatsUtils {
 
     for (ColStatistics parentColStat : parentStats.getColumnStats()) {
       ColStatistics colStat;
-      try {
-        colStat = parentColStat.clone();
-      } catch (CloneNotSupportedException e) {
-        colStat = null;
-      }
+      colStat = parentColStat.clone();
       if (colStat != null) {
         cs.add(colStat);
       }
@@ -1604,11 +1600,7 @@ public class StatsUtils {
         ColStatistics colStats = 
parentStats.getColumnStatisticsFromColName(colName);
         if (colStats != null) {
           /* If statistics for the column already exist use it. */
-          try {
             return colStats.clone();
-          } catch (CloneNotSupportedException e) {
-            return null;
-          }
         }
 
         // virtual columns
@@ -1619,11 +1611,7 @@ public class StatsUtils {
         // clone the column stats and return
         ColStatistics result = 
parentStats.getColumnStatisticsFromColName(colName);
         if (result != null) {
-          try {
             return result.clone();
-          } catch (CloneNotSupportedException e) {
-            return null;
-          }
         }
         return null;
       }
@@ -1651,12 +1639,7 @@ public class StatsUtils {
         ColStatistics stats = 
parentStats.getColumnStatisticsFromColName(engfd.getCols().get(0));
         if (stats != null) {
           ColStatistics newStats;
-          try {
-            newStats = stats.clone();
-          } catch (CloneNotSupportedException e) {
-            LOG.warn("error cloning stats, this should not happen");
-            return null;
-          }
+          newStats = stats.clone();
           newStats.setColumnName(colName);
           colType = colType.toLowerCase();
           newStats.setColumnType(colType);

[3/4] hive git commit: HIVE-18036: Stats: Remove usage of clone() methods (Bertalan Kondrat via Zoltan Haindrich)

Reply via email to