Repository: hive Updated Branches: refs/heads/master 7a35c75bc -> cf72246f9
HIVE-10741 : count distinct rewrite is not firing Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cf72246f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cf72246f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cf72246f Branch: refs/heads/master Commit: cf72246f924859a7d4e89fc7462daf301944fb04 Parents: 7a35c75 Author: Ashutosh Chauhan <[email protected]> Authored: Mon May 18 11:11:51 2015 -0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Wed May 20 15:00:09 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/CalcitePlanner.java | 8 ++-- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 28 ++++++------- .../clientpositive/spark/auto_join32.q.out | 44 ++++++++++++-------- .../results/clientpositive/spark/count.q.out | 10 ++--- .../results/clientpositive/spark/groupby2.q.out | 8 ++-- .../results/clientpositive/spark/groupby3.q.out | 8 ++-- .../clientpositive/spark/groupby3_map.q.out | 8 ++-- .../spark/groupby3_map_multi_distinct.q.out | 8 ++-- .../spark/groupby3_map_skew.q.out | 8 ++-- .../clientpositive/spark/groupby3_noskew.q.out | 6 +-- .../spark/groupby3_noskew_multi_distinct.q.out | 6 +-- .../clientpositive/spark/groupby_map_ppr.q.out | 8 ++-- .../spark/groupby_map_ppr_multi_distinct.q.out | 8 ++-- .../clientpositive/spark/groupby_ppr.q.out | 8 ++-- .../clientpositive/spark/limit_pushdown.q.out | 12 +++--- .../spark/vector_count_distinct.q.out | 6 +-- 16 files changed, 96 insertions(+), 88 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index c412561..4760a22 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -379,7 +379,7 @@ public class CalcitePlanner extends SemanticAnalyzer { } // Now check QB in more detail. canHandleQbForCbo returns null if query can // be handled. - String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage); + String msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage, qb); if (msg == null) { return true; } @@ -408,11 +408,11 @@ public class CalcitePlanner extends SemanticAnalyzer { * 2. Nested Subquery will return false for qbToChk.getIsQuery() */ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, - boolean topLevelQB, boolean verbose) { + boolean topLevelQB, boolean verbose, QB qb) { boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST); boolean isStrictTest = isInTest && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"); - boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest; + boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest || distinctExprsExists(qb); if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() @@ -2711,7 +2711,7 @@ public class CalcitePlanner extends SemanticAnalyzer { // 0. Check if we can handle the SubQuery; // canHandleQbForCbo returns null if the query can be handled. - String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled()); + String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled(), qb); if (reason != null) { String msg = "CBO can not handle Sub Query"; if (LOG.isDebugEnabled()) { http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 30c87ad..086d9a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -233,7 +233,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner; private HashMap<TableScanOperator, PrunedPartitionList> opToPartList; protected HashMap<String, Operator<? extends OperatorDesc>> topOps; - private HashMap<String, Operator<? extends OperatorDesc>> topSelOps; + private final HashMap<String, Operator<? extends OperatorDesc>> topSelOps; protected LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx; private List<LoadTableDesc> loadTableWork; private List<LoadFileDesc> loadFileWork; @@ -294,7 +294,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { /** Not thread-safe. */ final ASTSearcher astSearcher = new ASTSearcher(); - + protected AnalyzeRewriteContext analyzeRewrite; private CreateTableDesc tableDesc; @@ -1421,7 +1421,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { /** * This is phase1 of supporting specifying schema in insert statement * insert into foo(z,y) select a,b from bar; - * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode) + * @see #handleInsertStatementSpec(java.util.List, String, RowResolver, RowResolver, QB, ASTNode) * @throws SemanticException */ private void handleInsertStatementSpecPhase1(ASTNode ast, QBParseInfo qbp, Phase1Ctx ctx_1) throws SemanticException { @@ -3880,14 +3880,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { * create table source (a int, b int); * create table target (x int, y int, z int); * insert into target(z,x) select * from source - * + * * Once the * is resolved to 'a,b', this list needs to rewritten to 'b,null,a' so that it looks * as if the original query was written as * insert into target select b, null, a from source - * + * * if target schema is not specified, this is no-op - * - * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx) + * + * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx) * @throws SemanticException */ private void handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, @@ -3919,7 +3919,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { Table target = qb.getMetaData().getDestTableForAlias(dest); Partition partition = target == null ? qb.getMetaData().getDestPartitionForAlias(dest) : null; if(target == null && partition == null) { - throw new SemanticException(generateErrorMessage(selExprList, + throw new SemanticException(generateErrorMessage(selExprList, "No table/partition found in QB metadata for dest='" + dest + "'")); } ArrayList<ExprNodeDesc> new_col_list = new ArrayList<ExprNodeDesc>(); @@ -8581,7 +8581,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } RowResolver outputRR = inputRR.duplicate(); Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new SelectDesc(colList, columnNames, true), + new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR); output.setColumnExprMap(columnExprMap); return output; @@ -8742,7 +8742,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } // see if there are any distinct expressions - private boolean distinctExprsExists(QB qb) { + protected static boolean distinctExprsExists(QB qb) { QBParseInfo qbp = qb.getParseInfo(); TreeSet<String> ks = new TreeSet<String>(); @@ -8997,9 +8997,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if (leftmap.size() != rightmap.size()) { throw new SemanticException("Schema of both sides of union should match."); } - + RowResolver unionoutRR = new RowResolver(); - + Iterator<Map.Entry<String, ColumnInfo>> lIter = leftmap.entrySet().iterator(); Iterator<Map.Entry<String, ColumnInfo>> rIter = rightmap.entrySet().iterator(); while (lIter.hasNext()) { @@ -9008,7 +9008,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { ColumnInfo lInfo = lEntry.getValue(); ColumnInfo rInfo = rEntry.getValue(); - String field = lEntry.getKey(); // use left alias (~mysql, postgresql) + String field = lEntry.getKey(); // use left alias (~mysql, postgresql) // try widening conversion, otherwise fail union TypeInfo commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(lInfo.getType(), rInfo.getType()); @@ -9158,7 +9158,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { Iterator<ColumnInfo> oIter = origInputFieldMap.values().iterator(); Iterator<ColumnInfo> uIter = fieldMap.values().iterator(); - + List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>(); boolean needsCast = false; while (oIter.hasNext()) { http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/auto_join32.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/auto_join32.q.out b/ql/src/test/results/clientpositive/spark/auto_join32.q.out index c537b95..361a968 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join32.q.out @@ -428,30 +428,38 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: s + alias: v Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (name is not null and (p = 'bar')) (type: boolean) + predicate: ((p = 'bar') and name is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 name (type: string) - 1 name (type: string) - outputColumnNames: _col0, _col9 + Select Operator + expressions: name (type: string), registration (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT _col9) - keys: _col0 (type: string), _col9 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/count.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/count.q.out b/ql/src/test/results/clientpositive/spark/count.q.out index 6923a5f..cb9eda5 100644 --- a/ql/src/test/results/clientpositive/spark/count.q.out +++ b/ql/src/test/results/clientpositive/spark/count.q.out @@ -123,11 +123,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: a, b, c, d + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(1), count(), count(a), count(b), count(c), count(d), count(DISTINCT a), count(DISTINCT b), count(DISTINCT c), count(DISTINCT d), count(DISTINCT a, b), count(DISTINCT b, c), count(DISTINCT c, d), count(DISTINCT a, d), count(DISTINCT a, c), count(DISTINCT b, d), count(DISTINCT a, b, c), count(DISTINCT b, c, d), count(DISTINCT a, c, d), count(DISTINCT a, b, d), count(DISTINCT a, b, c, d) - keys: a (type: int), b (type: int), c (type: int), d (type: int) + aggregations: count(1), count(), count(_col1), count(_col2), count(_col3), count(_col4), count(DISTINCT _col1), count(DISTINCT _col2), count(DISTINCT _col3), count(DISTINCT _col4), count(DISTINCT _col1, _col2), count(DISTINCT _col2, _col3), count(DISTINCT _col3, _col4), count(DISTINCT _col1, _col4), count(DISTINCT _col1, _col3), count(DISTINCT _col2, _col4), count(DISTINCT _col1, _col2, _col3), count(DISTINCT _col2, _col3, _col4), count(DISTINCT _col1, _col3, _col4), count(DISTINCT _col1, _col2, _col4), count(DISTINCT _col1, _col2, _col3, _col4) + keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE @@ -252,10 +252,10 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), b (type: int), c (type: int), d (type: int) - outputColumnNames: a, b, c, d + outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: a (type: int), b (type: int), c (type: int), d (type: int) + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: ++++ Statistics: Num rows: 4 Data size: 78 Basic stats: COMPLETE Column stats: NONE Reducer 2 http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby2.q.out b/ql/src/test/results/clientpositive/spark/groupby2.q.out index f6be571..f9e3459 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2.q.out @@ -32,13 +32,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: substr(key, 1, 1) (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby3.q.out b/ql/src/test/results/clientpositive/spark/groupby3.q.out index af63c0e..e48018c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3.q.out @@ -51,13 +51,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(value, 5) (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: substr(value, 5) (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_map.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out index 8379fc9..f806303 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out @@ -50,12 +50,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) - keys: substr(value, 5) (type: string) + aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out index 5e9d229..3b31dfe 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out @@ -54,12 +54,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)), sum(DISTINCT substr(value, 5)), count(DISTINCT substr(value, 5)) - keys: substr(value, 5) (type: string) + aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0), sum(DISTINCT _col0), count(DISTINCT _col0) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out index d7f90f1..bbad6e7 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out @@ -51,12 +51,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) - keys: substr(value, 5) (type: string) + aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out index 75cb50b..6868eff 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out @@ -50,11 +50,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(value, 5) (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out index 51831db..399bfd8 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out @@ -54,11 +54,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: value + expressions: substr(value, 5) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(value, 5) (type: string) + key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out index 517e492..8a26e81 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out @@ -114,12 +114,12 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT substr(value, 5)), sum(substr(value, 5)) - keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + aggregations: count(DISTINCT _col1), sum(_col1) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out index d247c25..6005381 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out @@ -131,12 +131,12 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT substr(value, 5)), sum(substr(value, 5)), sum(DISTINCT substr(value, 5)), count(DISTINCT value) - keys: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + aggregations: count(DISTINCT _col1), sum(_col1), sum(DISTINCT _col1), count(DISTINCT _col2) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out index 8bc6105..e00d234 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out @@ -114,13 +114,13 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Map-reduce partition columns: substr(key, 1, 1) (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index 40af253..1efa9e7 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -473,11 +473,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cdouble (type: double) - outputColumnNames: ctinyint, cdouble + outputColumnNames: _col0, _col1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT cdouble) - keys: ctinyint (type: tinyint), cdouble (type: double) + aggregations: count(DISTINCT _col1) + keys: _col0 (type: tinyint), _col1 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE @@ -660,11 +660,11 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) - outputColumnNames: ctinyint, cstring1, cstring2 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2) - keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string) + aggregations: count(DISTINCT _col1), count(DISTINCT _col2) + keys: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/cf72246f/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index 7fa7bdb..fecfe0a 100644 --- a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1257,11 +1257,11 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_order_number (type: int) - outputColumnNames: ws_order_number + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(DISTINCT ws_order_number) - keys: ws_order_number (type: int) + aggregations: count(DISTINCT _col0) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE
