Repository: hive
Updated Branches:
  refs/heads/master b04f7ef39 -> 542641d0f


HIVE-14563: StatsOptimizer treats NULL in a wrong way (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/542641d0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/542641d0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/542641d0

Branch: refs/heads/master
Commit: 542641d0f7da53e7a6da6cd45ddbaf2cf865c0b3
Parents: b04f7ef
Author: Pengcheng Xiong <[email protected]>
Authored: Fri Aug 19 11:27:46 2016 -0700
Committer: Pengcheng Xiong <[email protected]>
Committed: Fri Aug 19 11:27:46 2016 -0700

----------------------------------------------------------------------
 .../hive/beeline/TestBeeLineWithArgs.java       |  9 ++--
 .../apache/hive/jdbc/TestJdbcWithMiniHS2.java   |  1 +
 .../hive/ql/optimizer/StatsOptimizer.java       | 56 +++++++++++++-------
 .../clientpositive/stats_null_optimizer.q       |  3 ++
 .../clientpositive/stats_null_optimizer.q.out   | 23 ++++++++
 .../cli/session/TestSessionManagerMetrics.java  |  1 +
 6 files changed, 70 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/542641d0/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
----------------------------------------------------------------------
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
 
b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
index 892c733..49c1120 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
@@ -73,6 +73,7 @@ public class TestBeeLineWithArgs {
     HiveConf hiveConf = new HiveConf();
     // Set to non-zk lock manager to prevent HS2 from trying to connect
     hiveConf.setVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER, 
"org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager");
+    hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, false);
     miniHS2 = new MiniHS2(hiveConf);
     miniHS2.start(new HashMap<String,  String>());
     createTable();
@@ -775,10 +776,10 @@ public class TestBeeLineWithArgs {
     String embeddedJdbcURL = Utils.URL_PREFIX+"/Default";
     List<String> argList = getBaseArgs(embeddedJdbcURL);
     // Set to non-zk lock manager to avoid trying to connect to zookeeper
-    final String SCRIPT_TEXT =
-        "set 
hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;\n" +
-        "create table if not exists embeddedBeelineOutputs(d int);\n" +
-        "set a=1;\nselect count(*) from embeddedBeelineOutputs;\n";
+    final String SCRIPT_TEXT = "set 
hive.lock.manager=org.apache.hadoop.hive.ql.lockmgr.EmbeddedLockManager;\n"
+        + "set hive.compute.query.using.stats=false;\n"
+        + "create table if not exists embeddedBeelineOutputs(d int);\n"
+        + "set a=1;\nselect count(*) from embeddedBeelineOutputs;\n";
     final String EXPECTED_PATTERN = "Stage-1 map =";
     testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/542641d0/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
----------------------------------------------------------------------
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
index 0dcfa49..0249566 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
@@ -548,6 +548,7 @@ public class TestJdbcWithMiniHS2 {
     HiveConf conf = new HiveConf();
     String userName;
     setSerializeInTasksInConf(conf);
+    conf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, false);
     miniHS2 = new MiniHS2(conf);
     Map<String, String> confOverlay = new HashMap<String, String>();
     miniHS2.start(confOverlay);

http://git-wip-us.apache.org/repos/asf/hive/blob/542641d0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
index 0c17246..17510e9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
@@ -371,20 +371,37 @@ public class StatsOptimizer extends Transform {
           else if (udaf instanceof GenericUDAFCount) {
             // always long
             Long rowCnt = 0L;
-            if (aggr.getParameters().isEmpty() || aggr.getParameters().get(0) 
instanceof
-                ExprNodeConstantDesc || ((aggr.getParameters().get(0) 
instanceof ExprNodeColumnDesc) &&
-                    
exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn()) 
instanceof ExprNodeConstantDesc)) {
-              // Its either count (*) or count(1) case
+            if (aggr.getParameters().isEmpty()) {
+              // Its either count (*) or count() case
               rowCnt = getRowCnt(pctx, tsOp, tbl);
-              if(rowCnt == null) {
+              if (rowCnt == null) {
                 return null;
               }
+            } else if (aggr.getParameters().get(0) instanceof 
ExprNodeConstantDesc) {
+              if (((ExprNodeConstantDesc) 
aggr.getParameters().get(0)).getValue() != null) {
+                // count (1)
+                rowCnt = getRowCnt(pctx, tsOp, tbl);
+                if (rowCnt == null) {
+                  return null;
+                }
+              }
+              // otherwise it is count(null), should directly return 0.
+            } else if ((aggr.getParameters().get(0) instanceof 
ExprNodeColumnDesc)
+                && exprMap.get(((ExprNodeColumnDesc) 
aggr.getParameters().get(0)).getColumn()) instanceof ExprNodeConstantDesc) {
+              if (((ExprNodeConstantDesc) (exprMap.get(((ExprNodeColumnDesc) 
aggr.getParameters()
+                  .get(0)).getColumn()))).getValue() != null) {
+                rowCnt = getRowCnt(pctx, tsOp, tbl);
+                if (rowCnt == null) {
+                  return null;
+                }
+              }
             } else {
               // Its count(col) case
-              ExprNodeColumnDesc desc = 
(ExprNodeColumnDesc)exprMap.get(((ExprNodeColumnDesc)aggr.getParameters().get(0)).getColumn());
+              ExprNodeColumnDesc desc = (ExprNodeColumnDesc) 
exprMap.get(((ExprNodeColumnDesc) aggr
+                  .getParameters().get(0)).getColumn());
               String colName = desc.getColumn();
               StatType type = getType(desc.getTypeString());
-              if(!tbl.isPartitioned()) {
+              if (!tbl.isPartitioned()) {
                 if 
(!StatsSetupConst.areBasicStatsUptoDate(tbl.getParameters())) {
                   Logger.debug("Stats for table : " + tbl.getTableName() + " 
are not up to date.");
                   return null;
@@ -400,47 +417,48 @@ public class StatsOptimizer extends Transform {
                   return null;
                 }
                 List<ColumnStatisticsObj> stats = 
hive.getMSC().getTableColumnStatistics(
-                    tbl.getDbName(),tbl.getTableName(), 
Lists.newArrayList(colName));
+                    tbl.getDbName(), tbl.getTableName(), 
Lists.newArrayList(colName));
                 if (stats.isEmpty()) {
                   Logger.debug("No stats for " + tbl.getTableName() + " column 
" + colName);
                   return null;
                 }
                 Long nullCnt = getNullcountFor(type, 
stats.get(0).getStatsData());
                 if (null == nullCnt) {
-                  Logger.debug("Unsupported type: " + desc.getTypeString() + " 
encountered in " +
-                      "metadata optimizer for column : " + colName);
+                  Logger.debug("Unsupported type: " + desc.getTypeString() + " 
encountered in "
+                      + "metadata optimizer for column : " + colName);
                   return null;
                 } else {
                   rowCnt -= nullCnt;
                 }
               } else {
-                Set<Partition> parts = pctx.getPrunedPartitions(
-                    tsOp.getConf().getAlias(), tsOp).getPartitions();
+                Set<Partition> parts = 
pctx.getPrunedPartitions(tsOp.getConf().getAlias(), tsOp)
+                    .getPartitions();
                 for (Partition part : parts) {
                   if 
(!StatsSetupConst.areBasicStatsUptoDate(part.getParameters())) {
                     Logger.debug("Stats for part : " + part.getSpec() + " are 
not up to date.");
                     return null;
                   }
-                  Long partRowCnt = Long.parseLong(part.getParameters()
-                      .get(StatsSetupConst.ROW_COUNT));
+                  Long partRowCnt = Long.parseLong(part.getParameters().get(
+                      StatsSetupConst.ROW_COUNT));
                   if (partRowCnt == null) {
                     Logger.debug("Partition doesn't have up to date stats " + 
part.getSpec());
                     return null;
                   }
                   rowCnt += partRowCnt;
                 }
-                Collection<List<ColumnStatisticsObj>> result =
-                    verifyAndGetPartColumnStats(hive, tbl, colName, parts);
+                Collection<List<ColumnStatisticsObj>> result = 
verifyAndGetPartColumnStats(hive,
+                    tbl, colName, parts);
                 if (result == null) {
                   return null; // logging inside
                 }
                 for (List<ColumnStatisticsObj> statObj : result) {
                   ColumnStatisticsData statData = 
validateSingleColStat(statObj);
-                  if (statData == null) return null;
+                  if (statData == null)
+                    return null;
                   Long nullCnt = getNullcountFor(type, statData);
                   if (nullCnt == null) {
-                    Logger.debug("Unsupported type: " + desc.getTypeString() + 
" encountered in " +
-                        "metadata optimizer for column : " + colName);
+                    Logger.debug("Unsupported type: " + desc.getTypeString() + 
" encountered in "
+                        + "metadata optimizer for column : " + colName);
                     return null;
                   } else {
                     rowCnt -= nullCnt;

http://git-wip-us.apache.org/repos/asf/hive/blob/542641d0/ql/src/test/queries/clientpositive/stats_null_optimizer.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/stats_null_optimizer.q 
b/ql/src/test/queries/clientpositive/stats_null_optimizer.q
new file mode 100644
index 0000000..1114e5a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_null_optimizer.q
@@ -0,0 +1,3 @@
+explain select count(key) from (select null as key from src)src;
+
+select count(key) from (select null as key from src)src;

http://git-wip-us.apache.org/repos/asf/hive/blob/542641d0/ql/src/test/results/clientpositive/stats_null_optimizer.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats_null_optimizer.q.out 
b/ql/src/test/results/clientpositive/stats_null_optimizer.q.out
new file mode 100644
index 0000000..8bc2446
--- /dev/null
+++ b/ql/src/test/results/clientpositive/stats_null_optimizer.q.out
@@ -0,0 +1,23 @@
+PREHOOK: query: explain select count(key) from (select null as key from src)src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(key) from (select null as key from 
src)src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(key) from (select null as key from src)src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select count(key) from (select null as key from src)src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0

http://git-wip-us.apache.org/repos/asf/hive/blob/542641d0/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java
----------------------------------------------------------------------
diff --git 
a/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java
 
b/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java
index 44d57c4..5511c54 100644
--- 
a/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java
+++ 
b/service/src/test/org/apache/hive/service/cli/session/TestSessionManagerMetrics.java
@@ -49,6 +49,7 @@ public class TestSessionManagerMetrics {
     conf.setBoolVar(HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED, true);
     conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
     conf.setVar(HiveConf.ConfVars.HIVE_METRICS_REPORTER, 
MetricsReporting.JSON_FILE.name() + "," + MetricsReporting.JMX.name());
+    conf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, false);
     MetricsFactory.init(conf);
 
     HiveServer2 hs2 = new HiveServer2();

Reply via email to