Author: vikram Date: Tue Jan 27 22:09:59 2015 New Revision: 1655172 URL: http://svn.apache.org/r1655172 Log: HIVE-9053 : select constant in union all followed by group by gives wrong result (Pengcheng Xiong via Vikram Dixit)
Modified: hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java hive/branches/branch-1.0/ql/src/test/results/clientpositive/metadataonly1.q.out hive/branches/branch-1.0/ql/src/test/results/clientpositive/optimize_nullscan.q.out hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/metadataonly1.q.out hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out Modified: hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java URL: http://svn.apache.org/viewvc/hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java?rev=1655172&r1=1655171&r2=1655172&view=diff ============================================================================== --- hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java (original) +++ hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java Tue Jan 27 22:09:59 2015 @@ -133,8 +133,7 @@ public class MetadataOnlyOptimizer imple WalkerCtx walkerCtx = (WalkerCtx) procCtx; // There can be atmost one element eligible to be converted to // metadata only - if ((walkerCtx.getMayBeMetadataOnlyTableScans().isEmpty()) - || (walkerCtx.getMayBeMetadataOnlyTableScans().size() > 1)) { + if (walkerCtx.getMayBeMetadataOnlyTableScans().isEmpty()) { return nd; } Modified: hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java URL: http://svn.apache.org/viewvc/hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java?rev=1655172&r1=1655171&r2=1655172&view=diff ============================================================================== --- hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java (original) +++ hive/branches/branch-1.0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java Tue Jan 27 22:09:59 2015 @@ -21,9 +21,11 @@ package org.apache.hadoop.hive.ql.optimi import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; -import java.util.Iterator; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Stack; import org.apache.commons.logging.Log; @@ -100,31 +102,51 @@ public class NullScanTaskDispatcher impl return paths; } - - private void processAlias(MapWork work, String alias) { - List<String> paths = getPathsForAlias(work, alias); - if (paths.isEmpty()) { - // partitioned table which don't select any partitions - // there are no paths to replace with fakePath - return; - } - work.setUseOneNullRowInputFormat(true); - - // Change the alias partition desc - PartitionDesc aliasPartn = work.getAliasToPartnInfo().get(alias); - changePartitionToMetadataOnly(aliasPartn); - - - for (String path : paths) { - PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); + + private void processAlias(MapWork work, String path, ArrayList<String> aliasesAffected, + ArrayList<String> aliases) { + // the aliases that are allowed to map to a null scan. + ArrayList<String> allowed = new ArrayList<String>(); + for (String alias : aliasesAffected) { + if (aliases.contains(alias)) { + allowed.add(alias); + } + } + if (allowed.size() > 0) { + work.setUseOneNullRowInputFormat(true); + PartitionDesc partDesc = work.getPathToPartitionInfo().get(path).clone(); PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc); Path fakePath = new Path(physicalContext.getContext().getMRTmpPath() - + newPartition.getTableName() - + encode(newPartition.getPartSpec())); - work.getPathToPartitionInfo().remove(path); + + newPartition.getTableName() + encode(newPartition.getPartSpec())); work.getPathToPartitionInfo().put(fakePath.getName(), newPartition); - ArrayList<String> aliases = work.getPathToAliases().remove(path); - work.getPathToAliases().put(fakePath.getName(), aliases); + work.getPathToAliases().put(fakePath.getName(), new ArrayList<String>(allowed)); + aliasesAffected.removeAll(allowed); + if (aliasesAffected.isEmpty()) { + work.getPathToAliases().remove(path); + work.getPathToPartitionInfo().remove(path); + } + } + } + + private void processAlias(MapWork work, HashSet<TableScanOperator> tableScans) { + ArrayList<String> aliases = new ArrayList<String>(); + for (TableScanOperator tso : tableScans) { + // use LinkedHashMap<String, Operator<? extends OperatorDesc>> + // getAliasToWork() + String alias = getAliasForTableScanOperator(work, tso); + aliases.add(alias); + tso.getConf().setIsMetadataOnly(true); + } + // group path alias according to work + LinkedHashMap<String, ArrayList<String>> candidates = new LinkedHashMap<String, ArrayList<String>>(); + for (String path : work.getPaths()) { + ArrayList<String> aliasesAffected = work.getPathToAliases().get(path); + if (aliasesAffected != null && aliasesAffected.size() > 0) { + candidates.put(path, aliasesAffected); + } + } + for (Entry<String, ArrayList<String>> entry : candidates.entrySet()) { + processAlias(work, entry.getKey(), entry.getValue(), aliases); } } @@ -177,16 +199,8 @@ public class NullScanTaskDispatcher impl LOG.info(String.format("Found %d null table scans", walkerCtx.getMetadataOnlyTableScans().size())); - Iterator<TableScanOperator> iterator - = walkerCtx.getMetadataOnlyTableScans().iterator(); - - while (iterator.hasNext()) { - TableScanOperator tso = iterator.next(); - tso.getConf().setIsMetadataOnly(true); - String alias = getAliasForTableScanOperator(mapWork, tso); - LOG.info("Null table scan for " + alias); - processAlias(mapWork, alias); - } + if (walkerCtx.getMetadataOnlyTableScans().size() > 0) + processAlias(mapWork, walkerCtx.getMetadataOnlyTableScans()); } return null; } Modified: hive/branches/branch-1.0/ql/src/test/results/clientpositive/metadataonly1.q.out URL: http://svn.apache.org/viewvc/hive/branches/branch-1.0/ql/src/test/results/clientpositive/metadataonly1.q.out?rev=1655172&r1=1655171&r2=1655172&view=diff ============================================================================== --- hive/branches/branch-1.0/ql/src/test/results/clientpositive/metadataonly1.q.out (original) +++ hive/branches/branch-1.0/ql/src/test/results/clientpositive/metadataonly1.q.out Tue Jan 27 22:09:59 2015 @@ -146,7 +146,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -292,7 +291,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -621,7 +619,6 @@ STAGE PLANS: Path -> Partition: -mr-10004default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -661,7 +658,6 @@ STAGE PLANS: name: default.test1 -mr-10005default.test1{ds=2} Partition - base file name: ds=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1081,7 +1077,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test2{ds=1, hr=1} Partition - base file name: hr=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1122,7 +1117,6 @@ STAGE PLANS: name: default.test2 -mr-10003default.test2{ds=1, hr=2} Partition - base file name: hr=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1163,7 +1157,6 @@ STAGE PLANS: name: default.test2 -mr-10004default.test2{ds=1, hr=3} Partition - base file name: hr=3 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1560,7 +1553,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1600,7 +1592,6 @@ STAGE PLANS: name: default.test1 -mr-10003default.test1{ds=2} Partition - base file name: ds=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1811,7 +1802,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test2{ds=01_10_10, hr=01} Partition - base file name: hr=01 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1852,7 +1842,6 @@ STAGE PLANS: name: default.test2 -mr-10003default.test2{ds=01_10_20, hr=02} Partition - base file name: hr=02 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1893,7 +1882,6 @@ STAGE PLANS: name: default.test2 -mr-10004default.test2{ds=1, hr=1} Partition - base file name: hr=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1934,7 +1922,6 @@ STAGE PLANS: name: default.test2 -mr-10005default.test2{ds=1, hr=2} Partition - base file name: hr=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1975,7 +1962,6 @@ STAGE PLANS: name: default.test2 -mr-10006default.test2{ds=1, hr=3} Partition - base file name: hr=3 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: Modified: hive/branches/branch-1.0/ql/src/test/results/clientpositive/optimize_nullscan.q.out URL: http://svn.apache.org/viewvc/hive/branches/branch-1.0/ql/src/test/results/clientpositive/optimize_nullscan.q.out?rev=1655172&r1=1655171&r2=1655172&view=diff ============================================================================== Binary files hive/branches/branch-1.0/ql/src/test/results/clientpositive/optimize_nullscan.q.out (original) and hive/branches/branch-1.0/ql/src/test/results/clientpositive/optimize_nullscan.q.out Tue Jan 27 22:09:59 2015 differ Modified: hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/metadataonly1.q.out URL: http://svn.apache.org/viewvc/hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/metadataonly1.q.out?rev=1655172&r1=1655171&r2=1655172&view=diff ============================================================================== --- hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/metadataonly1.q.out (original) +++ hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/metadataonly1.q.out Tue Jan 27 22:09:59 2015 @@ -157,7 +157,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -309,7 +308,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1043,7 +1041,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test2{ds=1, hr=1} Partition - base file name: hr=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1084,7 +1081,6 @@ STAGE PLANS: name: default.test2 -mr-10003default.test2{ds=1, hr=2} Partition - base file name: hr=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1125,7 +1121,6 @@ STAGE PLANS: name: default.test2 -mr-10004default.test2{ds=1, hr=3} Partition - base file name: hr=3 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1534,7 +1529,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1574,7 +1568,6 @@ STAGE PLANS: name: default.test1 -mr-10003default.test1{ds=2} Partition - base file name: ds=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1791,7 +1784,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test2{ds=01_10_10, hr=01} Partition - base file name: hr=01 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1832,7 +1824,6 @@ STAGE PLANS: name: default.test2 -mr-10003default.test2{ds=01_10_20, hr=02} Partition - base file name: hr=02 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1873,7 +1864,6 @@ STAGE PLANS: name: default.test2 -mr-10004default.test2{ds=1, hr=1} Partition - base file name: hr=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1914,7 +1904,6 @@ STAGE PLANS: name: default.test2 -mr-10005default.test2{ds=1, hr=2} Partition - base file name: hr=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1955,7 +1944,6 @@ STAGE PLANS: name: default.test2 -mr-10006default.test2{ds=1, hr=3} Partition - base file name: hr=3 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: Modified: hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out URL: http://svn.apache.org/viewvc/hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out?rev=1655172&r1=1655171&r2=1655172&view=diff ============================================================================== --- hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out (original) +++ hive/branches/branch-1.0/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out Tue Jan 27 22:09:59 2015 @@ -248,7 +248,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -319,7 +318,6 @@ STAGE PLANS: Path -> Partition: -mr-10003default.srcpart{ds=2008-04-08, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -365,7 +363,6 @@ STAGE PLANS: name: default.srcpart -mr-10004default.srcpart{ds=2008-04-08, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -411,7 +408,6 @@ STAGE PLANS: name: default.srcpart -mr-10005default.srcpart{ds=2008-04-09, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -457,7 +453,6 @@ STAGE PLANS: name: default.srcpart -mr-10006default.srcpart{ds=2008-04-09, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -894,7 +889,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1125,7 +1119,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1196,7 +1189,6 @@ STAGE PLANS: Path -> Partition: -mr-10003default.srcpart{ds=2008-04-08, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1242,7 +1234,6 @@ STAGE PLANS: name: default.srcpart -mr-10004default.srcpart{ds=2008-04-08, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1288,7 +1279,6 @@ STAGE PLANS: name: default.srcpart -mr-10005default.srcpart{ds=2008-04-09, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1334,7 +1324,6 @@ STAGE PLANS: name: default.srcpart -mr-10006default.srcpart{ds=2008-04-09, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1645,7 +1634,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1815,11 +1803,10 @@ STAGE PLANS: value expressions: key (type: string) auto parallelism: true Path -> Alias: - -mr-10002default.src{} [s2] + -mr-10003default.src{} [s2] Path -> Partition: - -mr-10002default.src{} + -mr-10003default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1862,7 +1849,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10002default.src{} [s2] + -mr-10003default.src{} [s2] Map 3 Map Operator Tree: TableScan @@ -1882,11 +1869,10 @@ STAGE PLANS: value expressions: key (type: string) auto parallelism: true Path -> Alias: - -mr-10003default.src{} [s1] + -mr-10002default.src{} [s1] Path -> Partition: - -mr-10003default.src{} + -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1929,7 +1915,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10003default.src{} [s1] + -mr-10002default.src{} [s1] Reducer 2 Needs Tagging: false Reduce Operator Tree: