Author: prasanthj
Date: Mon Feb 2 19:03:26 2015
New Revision: 1656542
URL: http://svn.apache.org/r1656542
Log:
HIVE-9512: HIVE-9327 causing regression in stats annotation (Jesus Camacho
Rodriguez via Prasanth Jayachandran)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1656542&r1=1656541&r2=1656542&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
Mon Feb 2 19:03:26 2015
@@ -1589,6 +1589,9 @@ public class StatsRulesProcFactory {
if (satisfyPrecondition(parentStats)) {
Statistics stats = parentStats.clone();
+ List<ColStatistics> colStats =
StatsUtils.getColStatisticsUpdatingTableAlias(
+ parentStats, lop.getSchema());
+ stats.setColumnStats(colStats);
// if limit is greater than available rows then do not update
// statistics
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1656542&r1=1656541&r2=1656542&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Mon
Feb 2 19:03:26 2015
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.stats;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
-import com.google.common.math.DoubleMath;
import com.google.common.math.LongMath;
import org.apache.commons.logging.Log;
@@ -1020,8 +1019,6 @@ public class StatsUtils {
if (colStat != null) {
colStat.setColumnName(outColName);
colStat.setTableAlias(outTabAlias);
- }
- if (colStat != null) {
cs.add(colStat);
}
}
@@ -1040,6 +1037,39 @@ public class StatsUtils {
return cs;
}
+ /**
+ * Get column statistics from parent statistics given the
+ * row schema of its child.
+ * @param parentStats
+ * - parent statistics
+ * @param rowSchema
+ * - row schema
+ * @return column statistics
+ */
+ public static List<ColStatistics> getColStatisticsUpdatingTableAlias(
+ Statistics parentStats, RowSchema rowSchema) {
+
+ List<ColStatistics> cs = Lists.newArrayList();
+
+ for (ColStatistics parentColStat : parentStats.getColumnStats()) {
+ ColStatistics colStat;
+ try {
+ colStat = parentColStat.clone();
+ } catch (CloneNotSupportedException e) {
+ colStat = null;
+ }
+ if (colStat != null) {
+ ColumnInfo ci = rowSchema.getColumnInfo(colStat.getColumnName());
+ if (ci != null) {
+ colStat.setTableAlias(ci.getTabAlias());
+ }
+ cs.add(colStat);
+ }
+ }
+
+ return cs;
+ }
+
/**
* Get column statistics expression nodes
* @param conf
Modified:
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out?rev=1656542&r1=1656541&r2=1656542&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out
Mon Feb 2 19:03:26 2015
@@ -1062,17 +1062,17 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column
stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column
stats: COMPLETE
Limit
Number of rows: 10
- Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column
stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE
Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), 11.0 (type: double)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column
stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 194 Basic stats: COMPLETE
Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL
Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 194 Basic stats: COMPLETE
Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat