Repository: hive Updated Branches: refs/heads/branch-3 59f8aae2c -> d0e3c19da
HIVE-19771: allowNullColumnForMissingStats should not be false when column stats are estimated (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d0e3c19d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d0e3c19d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d0e3c19d Branch: refs/heads/branch-3 Commit: d0e3c19daef4e0c09c7f6e0f5fc5c84238ac445e Parents: 59f8aae Author: Jesus Camacho Rodriguez <[email protected]> Authored: Fri Jun 1 16:06:29 2018 -0700 Committer: Jesus Camacho Rodriguez <[email protected]> Committed: Tue Jun 12 16:33:31 2018 -0700 ---------------------------------------------------------------------- .../hive/ql/optimizer/calcite/RelOptHiveTable.java | 15 +++++++++------ .../rules/HiveReduceExpressionsWithStatsRule.java | 4 ++-- .../TestHiveReduceExpressionsWithStatsRule.java | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d0e3c19d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index b10e7b0..e5e475e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -401,7 +401,7 @@ public class RelOptHiveTable extends RelOptAbstractTable { } } - private void updateColStats(Set<Integer> projIndxLst, boolean allowNullColumnForMissingStats) { + private void updateColStats(Set<Integer> projIndxLst, boolean allowMissingStats) { List<String> nonPartColNamesThatRqrStats = new ArrayList<String>(); List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<Integer>(); List<String> partColNamesThatRqrStats = new ArrayList<String>(); @@ -574,7 +574,7 @@ public class RelOptHiveTable extends RelOptAbstractTable { String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: " + getColNamesForLogging(colNamesFailedStats); noColsMissingStats.getAndAdd(colNamesFailedStats.size()); - if (allowNullColumnForMissingStats) { + if (allowMissingStats) { LOG.warn(logMsg); HiveConf conf = SessionState.getSessionConf(); if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_SHOW_WARNINGS)) { @@ -589,10 +589,13 @@ public class RelOptHiveTable extends RelOptAbstractTable { } public List<ColStatistics> getColStat(List<Integer> projIndxLst) { - return getColStat(projIndxLst, false); + // If we allow estimated stats for the columns, then we shall set the boolean to true, + // since otherwise we will throw an exception because columns with estimated stats are + // actually added to the list of columns that do not contain stats. + return getColStat(projIndxLst, HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_STATS_ESTIMATE_STATS)); } - public List<ColStatistics> getColStat(List<Integer> projIndxLst, boolean allowNullColumnForMissingStats) { + public List<ColStatistics> getColStat(List<Integer> projIndxLst, boolean allowMissingStats) { List<ColStatistics> colStatsBldr = Lists.newArrayList(); Set<Integer> projIndxSet = new HashSet<Integer>(projIndxLst); if (projIndxLst != null) { @@ -603,7 +606,7 @@ public class RelOptHiveTable extends RelOptAbstractTable { } } if (!projIndxSet.isEmpty()) { - updateColStats(projIndxSet, allowNullColumnForMissingStats); + updateColStats(projIndxSet, allowMissingStats); for (Integer i : projIndxSet) { colStatsBldr.add(hiveColStatsMap.get(i)); } @@ -616,7 +619,7 @@ public class RelOptHiveTable extends RelOptAbstractTable { } } if (!pILst.isEmpty()) { - updateColStats(new HashSet<Integer>(pILst), allowNullColumnForMissingStats); + updateColStats(new HashSet<Integer>(pILst), allowMissingStats); for (Integer pi : pILst) { colStatsBldr.add(hiveColStatsMap.get(pi)); } http://git-wip-us.apache.org/repos/asf/hive/blob/d0e3c19d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java index 085ad3e..3a5ce39 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java @@ -295,9 +295,9 @@ public class HiveReduceExpressionsWithStatsRule extends RelOptRule { RelOptHiveTable table = (RelOptHiveTable) columnOrigin.getOriginTable(); if (table != null) { ColStatistics colStats = - table.getColStat(Lists.newArrayList(columnOrigin.getOriginColumnOrdinal())).get(0); + table.getColStat(Lists.newArrayList(columnOrigin.getOriginColumnOrdinal()), false).get(0); if (colStats != null && StatsUtils.areColumnStatsUptoDateForQueryAnswering( - table.getHiveTableMD(), table.getHiveTableMD().getParameters(), colStats.getColumnName())) { + table.getHiveTableMD(), table.getHiveTableMD().getParameters(), colStats.getColumnName())) { return colStats; } } http://git-wip-us.apache.org/repos/asf/hive/blob/d0e3c19d/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveReduceExpressionsWithStatsRule.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveReduceExpressionsWithStatsRule.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveReduceExpressionsWithStatsRule.java index a0ce7a0..183f127 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveReduceExpressionsWithStatsRule.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/rules/TestHiveReduceExpressionsWithStatsRule.java @@ -86,7 +86,7 @@ public class TestHiveReduceExpressionsWithStatsRule { Mockito.doReturn(rowTypeMock).when(tableMock).getRowType(); Mockito.doReturn(tableMock).when(schemaMock).getTableForMember(Matchers.any()); statObj = new ColStatistics("_int", "int"); - Mockito.doReturn(Lists.newArrayList(statObj)).when(tableMock).getColStat(Matchers.anyListOf(Integer.class)); + Mockito.doReturn(Lists.newArrayList(statObj)).when(tableMock).getColStat(Matchers.anyListOf(Integer.class), Matchers.eq(false)); Mockito.doReturn(hiveTableMDMock).when(tableMock).getHiveTableMD(); Mockito.doReturn(tableParams).when(hiveTableMDMock).getParameters();
