Author: gunther
Date: Wed Jul 30 01:08:05 2014
New Revision: 1614533
URL: http://svn.apache.org/r1614533
Log:
HIVE-7546: Pull partition and column stats regardless of hive conf settings
(Gunther Hagleitner)
Modified:
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
Modified:
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
URL:
http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java?rev=1614533&r1=1614532&r2=1614533&view=diff
==============================================================================
---
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
(original)
+++
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
Wed Jul 30 01:08:05 2014
@@ -180,7 +180,7 @@ public class RelOptHiveTable extends Rel
} else {
// 2.2 Obtain col stats for full table scan
Statistics stats = StatsUtils.collectStatistics(m_hiveConf,
partitionList,
- m_hiveTblMetadata, m_hiveNonPartitionCols,
nonPartColNamesThatRqrStats);
+ m_hiveTblMetadata, m_hiveNonPartitionCols,
nonPartColNamesThatRqrStats, true, true);
m_rowCount = stats.getNumRows();
hiveColStats = new ArrayList<ColStatistics>();
for (String c : nonPartColNamesThatRqrStats) {
Modified:
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
URL:
http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1614533&r1=1614532&r2=1614533&view=diff
==============================================================================
---
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
(original)
+++
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
Wed Jul 30 01:08:05 2014
@@ -107,19 +107,27 @@ public class StatsUtils {
// column level statistics are required only for the columns that are
needed
List<ColumnInfo> schema = tableScanOperator.getSchema().getSignature();
List<String> neededColumns = tableScanOperator.getNeededColumns();
-
+
return collectStatistics(conf, partList, table, schema, neededColumns);
}
- public static Statistics collectStatistics(HiveConf conf,
PrunedPartitionList partList,
+ private static Statistics collectStatistics(HiveConf conf,
PrunedPartitionList partList,
Table table, List<ColumnInfo> schema, List<String> neededColumns) {
- Statistics stats = new Statistics();
-
boolean fetchColStats =
HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS);
boolean fetchPartStats =
HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_STATS_FETCH_PARTITION_STATS);
+
+ return collectStatistics(conf, partList, table, schema, neededColumns,
fetchColStats, fetchPartStats);
+ }
+
+ public static Statistics collectStatistics(HiveConf conf,
PrunedPartitionList partList,
+ Table table, List<ColumnInfo> schema, List<String> neededColumns,
+ boolean fetchColStats, boolean fetchPartStats) {
+
+ Statistics stats = new Statistics();
+
float deserFactor =
HiveConf.getFloatVar(conf,
HiveConf.ConfVars.HIVE_STATS_DESERIALIZATION_FACTOR);