Author: hashutosh
Date: Fri Dec 19 00:30:41 2014
New Revision: 1646596
URL: http://svn.apache.org/r1646596
Log:
HIVE-9106 : improve the performance of null scan optimizer when several table
scans share a physical path (Pengcheng Xiong via Ashutosh Chauhan)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java
hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
hive/trunk/ql/src/test/results/clientpositive/optimize_nullscan.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java?rev=1646596&r1=1646595&r2=1646596&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java
Fri Dec 19 00:30:41 2014
@@ -102,47 +102,51 @@ public class NullScanTaskDispatcher impl
return paths;
}
-
- private void processAlias(MapWork work, ArrayList<String> aliases, String
path) {
-
- work.setUseOneNullRowInputFormat(true);
- for (String alias : aliases) {
- // Change the conf for tableScanOp
- TableScanOperator tso = (TableScanOperator)
work.getAliasToWork().get(alias);
- tso.getConf().setIsMetadataOnly(true);
- // Change the alias partition desc
- PartitionDesc aliasPartn = work.getAliasToPartnInfo().get(alias);
- changePartitionToMetadataOnly(aliasPartn);
+
+ private void processAlias(MapWork work, String path, ArrayList<String>
aliasesAffected,
+ ArrayList<String> aliases) {
+ // the aliases that are allowed to map to a null scan.
+ ArrayList<String> allowed = new ArrayList<String>();
+ for (String alias : aliasesAffected) {
+ if (aliases.contains(alias)) {
+ allowed.add(alias);
+ }
+ }
+ if (allowed.size() > 0) {
+ work.setUseOneNullRowInputFormat(true);
+ PartitionDesc partDesc = work.getPathToPartitionInfo().get(path).clone();
+ PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc);
+ Path fakePath = new Path(physicalContext.getContext().getMRTmpPath()
+ + newPartition.getTableName() + encode(newPartition.getPartSpec()));
+ work.getPathToPartitionInfo().put(fakePath.getName(), newPartition);
+ work.getPathToAliases().put(fakePath.getName(), new
ArrayList<String>(allowed));
+ aliasesAffected.removeAll(allowed);
+ if (aliasesAffected.isEmpty()) {
+ work.getPathToAliases().remove(path);
+ work.getPathToPartitionInfo().remove(path);
+ }
}
-
- PartitionDesc partDesc = work.getPathToPartitionInfo().get(path);
- PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc);
- Path fakePath = new Path(physicalContext.getContext().getMRTmpPath()
- + newPartition.getTableName() + encode(newPartition.getPartSpec()));
- work.getPathToPartitionInfo().remove(path);
- work.getPathToPartitionInfo().put(fakePath.getName(), newPartition);
- assert(work.getPathToAliases().remove(path).equals(aliases));
- work.getPathToAliases().put(fakePath.getName(), aliases);
}
private void processAlias(MapWork work, HashSet<TableScanOperator>
tableScans) {
- ArrayList<String> aliasList = new ArrayList<String>();
+ ArrayList<String> aliases = new ArrayList<String>();
for (TableScanOperator tso : tableScans) {
// use LinkedHashMap<String, Operator<? extends OperatorDesc>>
// getAliasToWork()
String alias = getAliasForTableScanOperator(work, tso);
- aliasList.add(alias);
+ aliases.add(alias);
+ tso.getConf().setIsMetadataOnly(true);
}
// group path alias according to work
LinkedHashMap<String, ArrayList<String>> candidates = new
LinkedHashMap<String, ArrayList<String>>();
for (String path : work.getPaths()) {
- ArrayList<String> aliases = work.getPathToAliases().get(path);
- if (aliases != null && aliasList.containsAll(aliases)) {
- candidates.put(path, aliases);
+ ArrayList<String> aliasesAffected = work.getPathToAliases().get(path);
+ if (aliasesAffected != null && aliasesAffected.size() > 0) {
+ candidates.put(path, aliasesAffected);
}
}
for (Entry<String, ArrayList<String>> entry : candidates.entrySet()) {
- processAlias(work, entry.getValue(), entry.getKey());
+ processAlias(work, entry.getKey(), entry.getValue(), aliases);
}
}
Modified: hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out?rev=1646596&r1=1646595&r2=1646596&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out Fri Dec
19 00:30:41 2014
@@ -146,7 +146,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test1{ds=1}
Partition
- base file name: ds=1
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -292,7 +291,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test1{ds=1}
Partition
- base file name: ds=1
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -621,7 +619,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10005default.test1{ds=1}
Partition
- base file name: ds=1
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -661,7 +658,6 @@ STAGE PLANS:
name: default.test1
-mr-10006default.test1{ds=2}
Partition
- base file name: ds=2
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1079,7 +1075,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test2{ds=1, hr=1}
Partition
- base file name: hr=1
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1120,7 +1115,6 @@ STAGE PLANS:
name: default.test2
-mr-10004default.test2{ds=1, hr=2}
Partition
- base file name: hr=2
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1161,7 +1155,6 @@ STAGE PLANS:
name: default.test2
-mr-10005default.test2{ds=1, hr=3}
Partition
- base file name: hr=3
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1558,7 +1551,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test1{ds=1}
Partition
- base file name: ds=1
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1598,7 +1590,6 @@ STAGE PLANS:
name: default.test1
-mr-10004default.test1{ds=2}
Partition
- base file name: ds=2
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1809,7 +1800,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test2{ds=01_10_10, hr=01}
Partition
- base file name: hr=01
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1850,7 +1840,6 @@ STAGE PLANS:
name: default.test2
-mr-10004default.test2{ds=01_10_20, hr=02}
Partition
- base file name: hr=02
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1891,7 +1880,6 @@ STAGE PLANS:
name: default.test2
-mr-10005default.test2{ds=1, hr=1}
Partition
- base file name: hr=1
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1932,7 +1920,6 @@ STAGE PLANS:
name: default.test2
-mr-10006default.test2{ds=1, hr=2}
Partition
- base file name: hr=2
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1973,7 +1960,6 @@ STAGE PLANS:
name: default.test2
-mr-10007default.test2{ds=1, hr=3}
Partition
- base file name: hr=3
input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
Modified: hive/trunk/ql/src/test/results/clientpositive/optimize_nullscan.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/optimize_nullscan.q.out?rev=1646596&r1=1646595&r2=1646596&view=diff
==============================================================================
Binary files
hive/trunk/ql/src/test/results/clientpositive/optimize_nullscan.q.out
(original) and
hive/trunk/ql/src/test/results/clientpositive/optimize_nullscan.q.out Fri Dec
19 00:30:41 2014 differ
Modified: hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out?rev=1646596&r1=1646595&r2=1646596&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out Fri
Dec 19 00:30:41 2014
@@ -157,7 +157,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test1{ds=1}
Partition
- base file name: ds=1
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -309,7 +308,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test1{ds=1}
Partition
- base file name: ds=1
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1041,7 +1039,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test2{ds=1, hr=1}
Partition
- base file name: hr=1
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1082,7 +1079,6 @@ STAGE PLANS:
name: default.test2
-mr-10004default.test2{ds=1, hr=2}
Partition
- base file name: hr=2
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1123,7 +1119,6 @@ STAGE PLANS:
name: default.test2
-mr-10005default.test2{ds=1, hr=3}
Partition
- base file name: hr=3
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1532,7 +1527,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test1{ds=1}
Partition
- base file name: ds=1
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1572,7 +1566,6 @@ STAGE PLANS:
name: default.test1
-mr-10004default.test1{ds=2}
Partition
- base file name: ds=2
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1789,7 +1782,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.test2{ds=01_10_10, hr=01}
Partition
- base file name: hr=01
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1830,7 +1822,6 @@ STAGE PLANS:
name: default.test2
-mr-10004default.test2{ds=01_10_20, hr=02}
Partition
- base file name: hr=02
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1871,7 +1862,6 @@ STAGE PLANS:
name: default.test2
-mr-10005default.test2{ds=1, hr=1}
Partition
- base file name: hr=1
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1912,7 +1902,6 @@ STAGE PLANS:
name: default.test2
-mr-10006default.test2{ds=1, hr=2}
Partition
- base file name: hr=2
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1953,7 +1942,6 @@ STAGE PLANS:
name: default.test2
-mr-10007default.test2{ds=1, hr=3}
Partition
- base file name: hr=3
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
Modified:
hive/trunk/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out?rev=1646596&r1=1646595&r2=1646596&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out
(original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out
Fri Dec 19 00:30:41 2014
@@ -254,7 +254,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10002default.src{}
Partition
- base file name: src
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -325,7 +324,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.srcpart{ds=2008-04-08, hr=11}
Partition
- base file name: hr=11
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -371,7 +369,6 @@ STAGE PLANS:
name: default.srcpart
-mr-10004default.srcpart{ds=2008-04-08, hr=12}
Partition
- base file name: hr=12
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -417,7 +414,6 @@ STAGE PLANS:
name: default.srcpart
-mr-10005default.srcpart{ds=2008-04-09, hr=11}
Partition
- base file name: hr=11
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -463,7 +459,6 @@ STAGE PLANS:
name: default.srcpart
-mr-10006default.srcpart{ds=2008-04-09, hr=12}
Partition
- base file name: hr=12
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -683,7 +678,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10002default.src{}
Partition
- base file name: src
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -1121,7 +1115,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10002default.src{}
Partition
- base file name: src
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -1192,7 +1185,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.srcpart{ds=2008-04-08, hr=11}
Partition
- base file name: hr=11
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1238,7 +1230,6 @@ STAGE PLANS:
name: default.srcpart
-mr-10004default.srcpart{ds=2008-04-08, hr=12}
Partition
- base file name: hr=12
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1284,7 +1275,6 @@ STAGE PLANS:
name: default.srcpart
-mr-10005default.srcpart{ds=2008-04-09, hr=11}
Partition
- base file name: hr=11
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1330,7 +1320,6 @@ STAGE PLANS:
name: default.srcpart
-mr-10006default.srcpart{ds=2008-04-09, hr=12}
Partition
- base file name: hr=12
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
partition values:
@@ -1640,7 +1629,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10002default.src{}
Partition
- base file name: src
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -1812,7 +1800,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10002default.src{}
Partition
- base file name: src
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
@@ -1879,7 +1866,6 @@ STAGE PLANS:
Path -> Partition:
-mr-10003default.src{}
Partition
- base file name: src
input format:
org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties: