HIVE-18218 : SMB Join : Handle buckets with no splits. (Deepak Jaiswal, 
reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9fdb601e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9fdb601e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9fdb601e

Branch: refs/heads/standalone-metastore
Commit: 9fdb601ea131da260e270495801dd2c4e6c08e53
Parents: ddd4c9a
Author: Deepak Jaiswal <djais...@apache.org>
Authored: Sun Feb 11 00:56:10 2018 -0800
Committer: Deepak Jaiswal <djais...@apache.org>
Committed: Sun Feb 11 00:56:10 2018 -0800

----------------------------------------------------------------------
 .../hive/ql/exec/tez/CustomPartitionVertex.java |  81 ++-
 .../ql/exec/tez/CustomVertexConfiguration.java  |  31 +-
 .../hadoop/hive/ql/exec/tez/DagUtils.java       |  16 +-
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |   1 +
 .../hive/ql/parse/LoadSemanticAnalyzer.java     |  43 ++
 .../clientnegative/bucket_mapjoin_mismatch1.q   |   4 +-
 .../clientpositive/auto_sortmerge_join_16.q     | 154 +++--
 .../bucket_mapjoin_mismatch1.q.out              |  20 +-
 .../llap/auto_sortmerge_join_16.q.out           | 562 ++++++++++++++++---
 .../spark/auto_sortmerge_join_16.q.out          | 538 +++++++++++++++---
 .../spark/auto_sortmerge_join_16.q.out_spark    | 538 +++++++++++++++---
 11 files changed, 1730 insertions(+), 258 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
index 26afe90..bd1f54d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomPartitionVertex.java
@@ -108,14 +108,15 @@ public class CustomPartitionVertex extends 
VertexManagerPlugin {
   private final Multimap<Integer, Integer> bucketToTaskMap = 
HashMultimap.<Integer, Integer> create();
 
   private final Map<String, Multimap<Integer, InputSplit>> 
inputToGroupedSplitMap =
-      new HashMap<String, Multimap<Integer, InputSplit>>();
+      new HashMap<>();
 
   private int numInputsAffectingRootInputSpecUpdate = 1;
   private int numInputsSeenSoFar = 0;
   private final Map<String, EdgeManagerPluginDescriptor> emMap = 
Maps.newHashMap();
   private final List<InputSplit> finalSplits = Lists.newLinkedList();
   private final Map<String, InputSpecUpdate> inputNameInputSpecMap =
-      new HashMap<String, InputSpecUpdate>();
+      new HashMap<>();
+  private Map<String, Integer> inputToBucketMap;
 
   public CustomPartitionVertex(VertexManagerPluginContext context) {
     super(context);
@@ -137,6 +138,7 @@ public class CustomPartitionVertex extends 
VertexManagerPlugin {
     this.mainWorkName = vertexConf.getInputName();
     this.vertexType = vertexConf.getVertexType();
     this.numInputsAffectingRootInputSpecUpdate = vertexConf.getNumInputs();
+    this.inputToBucketMap = vertexConf.getInputToBucketMap();
   }
 
   @Override
@@ -242,7 +244,7 @@ public class CustomPartitionVertex extends 
VertexManagerPlugin {
     }
 
     Multimap<Integer, InputSplit> bucketToInitialSplitMap =
-        getBucketSplitMapForPath(pathFileSplitsMap);
+        getBucketSplitMapForPath(inputName, pathFileSplitsMap);
 
     try {
       int totalResource = context.getTotalAvailableResource().getMemory();
@@ -532,20 +534,50 @@ public class CustomPartitionVertex extends 
VertexManagerPlugin {
   /*
    * This method generates the map of bucket to file splits.
    */
-  private Multimap<Integer, InputSplit> getBucketSplitMapForPath(
+  private Multimap<Integer, InputSplit> getBucketSplitMapForPath(String 
inputName,
       Map<String, Set<FileSplit>> pathFileSplitsMap) {
 
-    int bucketNum = 0;
 
     Multimap<Integer, InputSplit> bucketToInitialSplitMap =
-        ArrayListMultimap.<Integer, InputSplit> create();
+        ArrayListMultimap.create();
 
+    boolean fallback = false;
+    Map<Integer, Integer> bucketIds = new HashMap<>();
     for (Map.Entry<String, Set<FileSplit>> entry : 
pathFileSplitsMap.entrySet()) {
-      int bucketId = bucketNum % numBuckets;
+      // Extract the buckedID from pathFilesMap, this is more accurate method,
+      // however. it may not work in certain cases where buckets are named
+      // after files used while loading data. In such case, fallback to old
+      // potential inaccurate method.
+      // The accepted file names are such as 000000_0, 000001_0_copy_1.
+      String bucketIdStr =
+              Utilities.getBucketFileNameFromPathSubString(entry.getKey());
+      int bucketId = Utilities.getBucketIdFromFile(bucketIdStr);
+      if (bucketId == -1) {
+        fallback = true;
+        LOG.info("Fallback to using older sort based logic to assign " +
+                "buckets to splits.");
+        bucketIds.clear();
+        break;
+      }
+      // Make sure the bucketId is at max the numBuckets
+      bucketId = bucketId % numBuckets;
+      bucketIds.put(bucketId, bucketId);
       for (FileSplit fsplit : entry.getValue()) {
         bucketToInitialSplitMap.put(bucketId, fsplit);
       }
-      bucketNum++;
+    }
+
+    int bucketNum = 0;
+    if (fallback) {
+      // This is the old logic which assumes that the filenames are sorted in
+      // alphanumeric order and mapped to appropriate bucket number.
+      for (Map.Entry<String, Set<FileSplit>> entry : 
pathFileSplitsMap.entrySet()) {
+        int bucketId = bucketNum % numBuckets;
+        for (FileSplit fsplit : entry.getValue()) {
+          bucketToInitialSplitMap.put(bucketId, fsplit);
+        }
+        bucketNum++;
+      }
     }
 
     // this is just for SMB join use-case. The numBuckets would be equal to 
that of the big table
@@ -553,16 +585,35 @@ public class CustomPartitionVertex extends 
VertexManagerPlugin {
     // data from the right buckets to the big table side. For e.g. Big table 
has 8 buckets and small
     // table has 4 buckets, bucket 0 of small table needs to be sent to bucket 
4 of the big table as
     // well.
-    if (bucketNum < numBuckets) {
-      int loopedBucketId = 0;
-      for (; bucketNum < numBuckets; bucketNum++) {
-        for (InputSplit fsplit : bucketToInitialSplitMap.get(loopedBucketId)) {
-          bucketToInitialSplitMap.put(bucketNum, fsplit);
+    if (numInputsAffectingRootInputSpecUpdate != 1) {
+      // small table
+      if (fallback && bucketNum < numBuckets) {
+        // Old logic.
+        int loopedBucketId = 0;
+        for (; bucketNum < numBuckets; bucketNum++) {
+          for (InputSplit fsplit : 
bucketToInitialSplitMap.get(loopedBucketId)) {
+            bucketToInitialSplitMap.put(bucketNum, fsplit);
+          }
+          loopedBucketId++;
+        }
+      } else {
+        // new logic.
+        if (inputToBucketMap.containsKey(inputName)) {
+          int inputNumBuckets = inputToBucketMap.get(inputName);
+          if (inputNumBuckets < numBuckets) {
+            // Need to send the splits to multiple buckets
+            for (int i = 1; i < numBuckets / inputNumBuckets; i++) {
+              int bucketIdBase = i * inputNumBuckets;
+              for (Integer bucketId : bucketIds.keySet()) {
+                for (InputSplit fsplit : 
bucketToInitialSplitMap.get(bucketId)) {
+                  bucketToInitialSplitMap.put(bucketIdBase + bucketId, fsplit);
+                }
+              }
+            }
+          }
         }
-        loopedBucketId++;
       }
     }
-
     return bucketToInitialSplitMap;
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java
index ef5e7ed..4301829 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/CustomVertexConfiguration.java
@@ -21,7 +21,10 @@ package org.apache.hadoop.hive.ql.exec.tez;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.hive.ql.plan.TezWork.VertexType;
 import org.apache.hadoop.io.Writable;
 
@@ -39,22 +42,24 @@ public class CustomVertexConfiguration implements Writable {
   private VertexType vertexType = VertexType.AUTO_INITIALIZED_EDGES;
   private int numInputs;
   private String inputName;
+  private Map<String, Integer> inputToBucketMap;
 
   public CustomVertexConfiguration() {
   }
 
   // this is the constructor to use for the Bucket map join case.
   public CustomVertexConfiguration(int numBuckets, VertexType vertexType) {
-    this(numBuckets, vertexType, "", 1);
+    this(numBuckets, vertexType, "", 1, null);
   }
 
   // this is the constructor to use for SMB.
   public CustomVertexConfiguration(int numBuckets, VertexType vertexType, 
String inputName,
-      int numInputs) {
+                                   int numInputs, Map<String, Integer> 
inputToBucketMap) {
     this.numBuckets = numBuckets;
     this.vertexType = vertexType;
     this.numInputs = numInputs;
     this.inputName = inputName;
+    this.inputToBucketMap = inputToBucketMap;
   }
 
   @Override
@@ -63,6 +68,14 @@ public class CustomVertexConfiguration implements Writable {
     out.writeInt(this.numBuckets);
     out.writeInt(numInputs);
     out.writeUTF(inputName);
+    int sz = inputToBucketMap != null ? inputToBucketMap.size() : 0;
+    out.writeInt(sz);
+    if (sz > 0) {
+      for (Map.Entry<String, Integer> entry : inputToBucketMap.entrySet()) {
+        out.writeUTF(entry.getKey());
+        out.writeInt(entry.getValue());
+      }
+    }
   }
 
   @Override
@@ -71,6 +84,16 @@ public class CustomVertexConfiguration implements Writable {
     this.numBuckets = in.readInt();
     this.numInputs = in.readInt();
     this.inputName = in.readUTF();
+    int sz = in.readInt();
+    Preconditions.checkState(sz >= 0);
+    if (sz == 0) {
+      this.inputToBucketMap = null;
+    } else {
+      this.inputToBucketMap = new HashMap<>();
+      for (int i = 0; i < sz; i++) {
+        this.inputToBucketMap.put(in.readUTF(), in.readInt());
+      }
+    }
   }
 
   public int getNumBuckets() {
@@ -88,4 +111,8 @@ public class CustomVertexConfiguration implements Writable {
   public int getNumInputs() {
     return numInputs;
   }
+
+  public Map<String, Integer> getInputToBucketMap() {
+    return inputToBucketMap;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
index 9885038..0e75f6e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java
@@ -21,6 +21,7 @@ import java.util.Collection;
 
 import java.util.concurrent.ConcurrentHashMap;
 import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
 import javax.security.auth.login.LoginException;
@@ -568,13 +569,26 @@ public class DagUtils {
             MultiMRInput.createConfigBuilder(conf, 
HiveInputFormat.class).build());
       }
 
+      // To be populated for SMB joins only for all the small tables
+      Map<String, Integer> inputToBucketMap = new HashMap<>();
+      if (mergeJoinWork.getMergeJoinOperator().getParentOperators().size() == 1
+              && mergeJoinWork.getMergeJoinOperator().getOpTraits() != null) {
+        // This is an SMB join.
+        for (BaseWork work : mapWorkList) {
+          MapWork mw = (MapWork) work;
+          Map<String, Operator<?>> aliasToWork = mw.getAliasToWork();
+          Preconditions.checkState(aliasToWork.size() == 1,
+                  "More than 1 alias in SMB mapwork");
+          inputToBucketMap.put(mw.getName(), 
mw.getWorks().get(0).getOpTraits().getNumBuckets());
+        }
+      }
       VertexManagerPluginDescriptor desc =
         
VertexManagerPluginDescriptor.create(CustomPartitionVertex.class.getName());
       // the +1 to the size is because of the main work.
       CustomVertexConfiguration vertexConf =
           new 
CustomVertexConfiguration(mergeJoinWork.getMergeJoinOperator().getConf()
               .getNumBuckets(), vertexType, mergeJoinWork.getBigTableAlias(),
-              mapWorkList.size() + 1);
+              mapWorkList.size() + 1, inputToBucketMap);
       DataOutputBuffer dob = new DataOutputBuffer();
       vertexConf.write(dob);
       byte[] userPayload = dob.getData();

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index dc698c8..a235f3f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -576,6 +576,7 @@ public class ConvertJoinMapJoin implements NodeProcessor {
         return false;
       }
       ReduceSinkOperator rsOp = (ReduceSinkOperator) parentOp;
+
       if 
(!checkColEquality(rsOp.getParentOperators().get(0).getOpTraits().getSortCols(),
 rsOp
           .getOpTraits().getSortCols(), rsOp.getColumnExprMap(), false)) {
         LOG.info("We cannot convert to SMB because the sort column names do 
not match.");

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index 54f5bab..3619763 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.io.Serializable;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.util.Arrays;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -160,6 +161,48 @@ public class LoadSemanticAnalyzer extends 
BaseSemanticAnalyzer {
               "source contains directory: " + oneSrc.getPath().toString()));
         }
       }
+      // Do another loop if table is bucketed
+      List<String> bucketCols = table.getBucketCols();
+      if (bucketCols != null && !bucketCols.isEmpty()) {
+        // Hive assumes that user names the files as per the corresponding
+        // bucket. For e.g, file names should follow the format 000000_0, 
000000_1 etc.
+        // Here the 1st file will belong to bucket 0 and 2nd to bucket 1 and 
so on.
+        boolean[] bucketArray = new boolean[table.getNumBuckets()];
+        // initialize the array
+        Arrays.fill(bucketArray, false);
+        int numBuckets = table.getNumBuckets();
+
+        for (FileStatus oneSrc : srcs) {
+          String bucketName = oneSrc.getPath().getName();
+
+          //get the bucket id
+          String bucketIdStr =
+                  Utilities.getBucketFileNameFromPathSubString(bucketName);
+          int bucketId = Utilities.getBucketIdFromFile(bucketIdStr);
+          LOG.debug("bucket ID for file " + oneSrc.getPath() + " = " + bucketId
+          + " for table " + table.getFullyQualifiedName());
+          if (bucketId == -1) {
+            throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(
+                    "The file name is invalid : "
+                            + oneSrc.getPath().toString() + " for table "
+            + table.getFullyQualifiedName()));
+          }
+          if (bucketId >= numBuckets) {
+            throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(
+                    "The file name corresponds to invalid bucketId : "
+                            + oneSrc.getPath().toString())
+                    + ". Maximum number of buckets can be " + numBuckets
+            + " for table " + table.getFullyQualifiedName());
+          }
+          if (bucketArray[bucketId]) {
+            throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(
+                    "Multiple files for same bucket : " + bucketId
+                            + ". Only 1 file per bucket allowed in single load 
command. To load multiple files for same bucket, use multiple statements for 
table "
+            + table.getFullyQualifiedName()));
+          }
+          bucketArray[bucketId] = true;
+        }
+      }
     } catch (IOException e) {
       // Has to use full name to make sure it does not conflict with
       // org.apache.commons.lang.StringUtils

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q 
b/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
index 5f653bc..118db89 100644
--- a/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
+++ b/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q
@@ -14,9 +14,9 @@ load data local inpath '../../data/files/bmj/000002_0'
 CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string)
   partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS
   STORED AS TEXTFILE;
-load data local inpath '../../data/files/bmj/000002_0'
+load data local inpath '../../data/files/bmj/000000_0'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
-load data local inpath '../../data/files/bmj/000003_0'
+load data local inpath '../../data/files/bmj/000001_0'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
 
 -- The number of buckets in the 2 tables above (being joined later) dont match.

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q 
b/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q
index 8216b53..6d177aa 100644
--- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q
+++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_16.q
@@ -1,12 +1,12 @@
 set hive.strict.checks.bucketing=false;
 
-set hive.auto.convert.join=true;
+set hive.auto.convert.join=false;
 
 set hive.exec.dynamic.partition.mode=nonstrict;
 
 
 
-set hive.auto.convert.sortmerge.join=true;
+set hive.auto.convert.sortmerge.join=false;
 set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
 
@@ -58,39 +58,121 @@ where file_tag between 1 and 2;
 
 load data local inpath '../../data/files/auto_sortmerge_join/small/000000_0' 
overwrite into table stage_bucket_big partition (file_tag='1');
 
-insert overwrite table bucket_big partition(day,pri) 
-select 
-key, 
-value, 
-'day1' as day, 
-1 as pri 
-from 
-stage_bucket_big 
-where 
-file_tag='1'; 
-
-select 
-a.key , 
-a.value , 
-b.value , 
-'day1' as day, 
-1 as pri 
-from 
-( 
-select 
-key, 
-value 
-from bucket_big where day='day1'
-) a 
-left outer join 
-( 
-select 
-key, 
-value
-from bucket_small 
-where pri between 1 and 2
-) b 
-on 
+insert overwrite table bucket_big partition(day,pri)
+ select key, value, 'day1' as day, 1 as pri
+   from stage_bucket_big
+   where file_tag='1';
+
+explain select a.key , a.value , b.value , 'day1' as day, 1 as pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+          left outer join
+          ( select key, value
+            from bucket_small
+            where pri between 1 and 2 ) b
+            on
+          (a.key = b.key)
+;
+
+select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+  left outer join
+  ( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
 (a.key = b.key) 
-; 
+;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask.size=1;
+set hive.auto.convert.sortmerge.join=true;
+
+explain select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+;
+
+select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+;
+
+drop table bucket_big;
+drop table bucket_small;
+
+-- Test to make sure SMB is not kicked in when small table has more buckets 
than big table
+
+CREATE TABLE bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (day STRING, pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile;
+
+CREATE TABLE bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (pri bigint)
+clustered by (key) sorted by (key) into 24 buckets
+stored as RCFile;
+
+insert overwrite table bucket_small partition(pri)
+select
+key,
+value,
+file_tag as pri
+from
+stage_bucket_small
+where file_tag between 1 and 2;
 
+insert overwrite table bucket_big partition(day,pri)
+select key, value, 'day1' as day, 1 as pri
+from stage_bucket_big
+where file_tag='1';
+
+
+explain select a.key , a.value , b.value , 'day1' as day, 1 as pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+        left outer join
+        ( select key, value
+          from bucket_small
+          where pri between 1 and 2 ) b
+        on
+        (a.key = b.key)
+;
+
+select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out 
b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
index b9c2e6f..6ca6c5d 100644
--- a/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
+++ b/ql/src/test/results/clientnegative/bucket_mapjoin_mismatch1.q.out
@@ -53,23 +53,23 @@ POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key 
int, value string)
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@srcbucket_mapjoin_part_2
-PREHOOK: query: load data local inpath '../../data/files/bmj/000002_0'
+PREHOOK: query: load data local inpath '../../data/files/bmj/000000_0'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
 PREHOOK: type: LOAD
 #### A masked pattern was here ####
 PREHOOK: Output: default@srcbucket_mapjoin_part_2
-POSTHOOK: query: load data local inpath '../../data/files/bmj/000002_0'
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000000_0'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@srcbucket_mapjoin_part_2
 POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-PREHOOK: query: load data local inpath '../../data/files/bmj/000003_0'
+PREHOOK: query: load data local inpath '../../data/files/bmj/000001_0'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
 PREHOOK: type: LOAD
 #### A masked pattern was here ####
 PREHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08
-POSTHOOK: query: load data local inpath '../../data/files/bmj/000003_0'
+POSTHOOK: query: load data local inpath '../../data/files/bmj/000001_0'
   INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
@@ -110,19 +110,19 @@ STAGE PLANS:
                   value expressions: _col1 (type: string)
           TableScan
             alias: b
-            Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 70 Data size: 27500 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 78 Data size: 30620 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 70 Data size: 27500 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: int), value (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 78 Data size: 30620 Basic stats: 
COMPLETE Column stats: NONE
+                Statistics: Num rows: 70 Data size: 27500 Basic stats: 
COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: int)
-                  Statistics: Num rows: 78 Data size: 30620 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 70 Data size: 27500 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col1 (type: string)
       Reduce Operator Tree:
         Join Operator
@@ -177,10 +177,10 @@ STAGE PLANS:
         b 
           TableScan
             alias: b
-            Statistics: Num rows: 102 Data size: 30620 Basic stats: COMPLETE 
Column stats: NONE
+            Statistics: Num rows: 92 Data size: 27500 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
               predicate: key is not null (type: boolean)
-              Statistics: Num rows: 102 Data size: 30620 Basic stats: COMPLETE 
Column stats: NONE
+              Statistics: Num rows: 92 Data size: 27500 Basic stats: COMPLETE 
Column stats: NONE
               HashTable Sink Operator
                 keys:
                   0 key (type: int)

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out 
b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out
index 91408df..b67deea 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_16.q.out
@@ -136,58 +136,130 @@ POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@stage_bucket_big
 POSTHOOK: Output: default@stage_bucket_big@file_tag=1
-PREHOOK: query: insert overwrite table bucket_big partition(day,pri) 
-select 
-key, 
-value, 
-'day1' as day, 
-1 as pri 
-from 
-stage_bucket_big 
-where 
-file_tag='1'
+PREHOOK: query: insert overwrite table bucket_big partition(day,pri)
+ select key, value, 'day1' as day, 1 as pri
+   from stage_bucket_big
+   where file_tag='1'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@stage_bucket_big
 PREHOOK: Input: default@stage_bucket_big@file_tag=1
 PREHOOK: Output: default@bucket_big
-POSTHOOK: query: insert overwrite table bucket_big partition(day,pri) 
-select 
-key, 
-value, 
-'day1' as day, 
-1 as pri 
-from 
-stage_bucket_big 
-where 
-file_tag='1'
+POSTHOOK: query: insert overwrite table bucket_big partition(day,pri)
+ select key, value, 'day1' as day, 1 as pri
+   from stage_bucket_big
+   where file_tag='1'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@stage_bucket_big
 POSTHOOK: Input: default@stage_bucket_big@file_tag=1
 POSTHOOK: Output: default@bucket_big@day=day1/pri=1
 POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).key SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:key, type:bigint, 
comment:null), ]
 POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).value SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:value, type:string, 
comment:null), ]
-PREHOOK: query: select 
-a.key , 
-a.value , 
-b.value , 
-'day1' as day, 
-1 as pri 
-from 
-( 
-select 
-key, 
-value 
-from bucket_big where day='day1'
-) a 
-left outer join 
-( 
-select 
-key, 
-value
-from bucket_small 
-where pri between 1 and 2
-) b 
-on 
+PREHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 as 
pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+          left outer join
+          ( select key, value
+            from bucket_small
+            where pri between 1 and 2 ) b
+            on
+          (a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 
as pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+          left outer join
+          ( select key, value
+            from bucket_small
+            where pri between 1 and 2 ) b
+            on
+          (a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_big
+                  Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 495 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 5 Data size: 495 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_small
+                  Statistics: Num rows: 236 Data size: 23364 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 236 Data size: 23364 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 236 Data size: 23364 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col1 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 15 Data size: 2850 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: bigint), _col1 (type: string), 
_col3 (type: string), 'day1' (type: string), 1 (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 15 Data size: 4230 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 15 Data size: 4230 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+  left outer join
+  ( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
 (a.key = b.key)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@bucket_big
@@ -196,28 +268,390 @@ PREHOOK: Input: default@bucket_small
 PREHOOK: Input: default@bucket_small@pri=1
 PREHOOK: Input: default@bucket_small@pri=2
 #### A masked pattern was here ####
-POSTHOOK: query: select 
-a.key , 
-a.value , 
-b.value , 
-'day1' as day, 
-1 as pri 
-from 
-( 
-select 
-key, 
-value 
-from bucket_big where day='day1'
-) a 
-left outer join 
-( 
-select 
-key, 
-value
-from bucket_small 
-where pri between 1 and 2
-) b 
-on 
+POSTHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+  left outer join
+  ( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@day=day1/pri=1
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@pri=1
+POSTHOOK: Input: default@bucket_small@pri=2
+#### A masked pattern was here ####
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+374    val_374 val_374 day1    1
+374    val_374 val_374 day1    1
+PREHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 as 
pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 
as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_small
+                  Statistics: Num rows: 236 Data size: 23364 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 236 Data size: 23364 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_big
+                  Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 495 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Merge Join Operator
+                      condition map:
+                           Left Outer Join 0 to 1
+                      keys:
+                        0 _col0 (type: bigint)
+                        1 _col0 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col3
+                      Statistics: Num rows: 15 Data size: 2850 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: bigint), _col1 (type: 
string), _col3 (type: string), 'day1' (type: string), 1 (type: int)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                        Statistics: Num rows: 15 Data size: 4230 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 15 Data size: 4230 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@day=day1/pri=1
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@pri=1
+PREHOOK: Input: default@bucket_small@pri=2
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@day=day1/pri=1
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@pri=1
+POSTHOOK: Input: default@bucket_small@pri=2
+#### A masked pattern was here ####
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+374    val_374 val_374 day1    1
+374    val_374 val_374 day1    1
+PREHOOK: query: drop table bucket_big
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@bucket_big
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: drop table bucket_big
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Output: default@bucket_big
+PREHOOK: query: drop table bucket_small
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@bucket_small
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: drop table bucket_small
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Output: default@bucket_small
+PREHOOK: query: CREATE TABLE bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (day STRING, pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: CREATE TABLE bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (day STRING, pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucket_big
+PREHOOK: query: CREATE TABLE bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (pri bigint)
+clustered by (key) sorted by (key) into 24 buckets
+stored as RCFile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: CREATE TABLE bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (pri bigint)
+clustered by (key) sorted by (key) into 24 buckets
+stored as RCFile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucket_small
+PREHOOK: query: insert overwrite table bucket_small partition(pri)
+select
+key,
+value,
+file_tag as pri
+from
+stage_bucket_small
+where file_tag between 1 and 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stage_bucket_small
+PREHOOK: Input: default@stage_bucket_small@file_tag=1
+PREHOOK: Input: default@stage_bucket_small@file_tag=2
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: insert overwrite table bucket_small partition(pri)
+select
+key,
+value,
+file_tag as pri
+from
+stage_bucket_small
+where file_tag between 1 and 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stage_bucket_small
+POSTHOOK: Input: default@stage_bucket_small@file_tag=1
+POSTHOOK: Input: default@stage_bucket_small@file_tag=2
+POSTHOOK: Output: default@bucket_small@pri=1
+POSTHOOK: Output: default@bucket_small@pri=2
+POSTHOOK: Lineage: bucket_small PARTITION(pri=1).key SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:key, type:bigint, 
comment:null), ]
+POSTHOOK: Lineage: bucket_small PARTITION(pri=1).value SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:value, type:string, 
comment:null), ]
+POSTHOOK: Lineage: bucket_small PARTITION(pri=2).key SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:key, type:bigint, 
comment:null), ]
+POSTHOOK: Lineage: bucket_small PARTITION(pri=2).value SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:value, type:string, 
comment:null), ]
+PREHOOK: query: insert overwrite table bucket_big partition(day,pri)
+select key, value, 'day1' as day, 1 as pri
+from stage_bucket_big
+where file_tag='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stage_bucket_big
+PREHOOK: Input: default@stage_bucket_big@file_tag=1
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: insert overwrite table bucket_big partition(day,pri)
+select key, value, 'day1' as day, 1 as pri
+from stage_bucket_big
+where file_tag='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stage_bucket_big
+POSTHOOK: Input: default@stage_bucket_big@file_tag=1
+POSTHOOK: Output: default@bucket_big@day=day1/pri=1
+POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).key SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:key, type:bigint, 
comment:null), ]
+POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).value SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:value, type:string, 
comment:null), ]
+PREHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 as 
pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+        left outer join
+        ( select key, value
+          from bucket_small
+          where pri between 1 and 2 ) b
+        on
+        (a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 
as pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+        left outer join
+        ( select key, value
+          from bucket_small
+          where pri between 1 and 2 ) b
+        on
+        (a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_small
+                  Statistics: Num rows: 236 Data size: 23364 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 236 Data size: 23364 Basic stats: 
COMPLETE Column stats: COMPLETE
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_big
+                  Statistics: Num rows: 5 Data size: 495 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 495 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Merge Join Operator
+                      condition map:
+                           Left Outer Join 0 to 1
+                      keys:
+                        0 _col0 (type: bigint)
+                        1 _col0 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col3
+                      Statistics: Num rows: 15 Data size: 2850 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Select Operator
+                        expressions: _col0 (type: bigint), _col1 (type: 
string), _col3 (type: string), 'day1' (type: string), 1 (type: int)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                        Statistics: Num rows: 15 Data size: 4230 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 15 Data size: 4230 Basic 
stats: COMPLETE Column stats: COMPLETE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@day=day1/pri=1
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@pri=1
+PREHOOK: Input: default@bucket_small@pri=2
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
 (a.key = b.key)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@bucket_big

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out 
b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out
index 9939e83..3e2aafc 100644
--- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out
@@ -136,58 +136,124 @@ POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@stage_bucket_big
 POSTHOOK: Output: default@stage_bucket_big@file_tag=1
-PREHOOK: query: insert overwrite table bucket_big partition(day,pri) 
-select 
-key, 
-value, 
-'day1' as day, 
-1 as pri 
-from 
-stage_bucket_big 
-where 
-file_tag='1'
+PREHOOK: query: insert overwrite table bucket_big partition(day,pri)
+ select key, value, 'day1' as day, 1 as pri
+   from stage_bucket_big
+   where file_tag='1'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@stage_bucket_big
 PREHOOK: Input: default@stage_bucket_big@file_tag=1
 PREHOOK: Output: default@bucket_big
-POSTHOOK: query: insert overwrite table bucket_big partition(day,pri) 
-select 
-key, 
-value, 
-'day1' as day, 
-1 as pri 
-from 
-stage_bucket_big 
-where 
-file_tag='1'
+POSTHOOK: query: insert overwrite table bucket_big partition(day,pri)
+ select key, value, 'day1' as day, 1 as pri
+   from stage_bucket_big
+   where file_tag='1'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@stage_bucket_big
 POSTHOOK: Input: default@stage_bucket_big@file_tag=1
 POSTHOOK: Output: default@bucket_big@day=day1/pri=1
 POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).key SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:key, type:bigint, 
comment:null), ]
 POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).value SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:value, type:string, 
comment:null), ]
-PREHOOK: query: select 
-a.key , 
-a.value , 
-b.value , 
-'day1' as day, 
-1 as pri 
-from 
-( 
-select 
-key, 
-value 
-from bucket_big where day='day1'
-) a 
-left outer join 
-( 
-select 
-key, 
-value
-from bucket_small 
-where pri between 1 and 2
-) b 
-on 
+PREHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 as 
pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+          left outer join
+          ( select key, value
+            from bucket_small
+            where pri between 1 and 2 ) b
+            on
+          (a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 
as pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+          left outer join
+          ( select key, value
+            from bucket_small
+            where pri between 1 and 2 ) b
+            on
+          (a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL 
SORT, 4)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_big
+                  Statistics: Num rows: 5 Data size: 46 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 46 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 5 Data size: 46 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_small
+                  Statistics: Num rows: 236 Data size: 2244 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 236 Data size: 2244 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 236 Data size: 2244 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), _col1 (type: string), 
_col3 (type: string), 'day1' (type: string), 1 (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+  left outer join
+  ( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
 (a.key = b.key)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@bucket_big
@@ -196,28 +262,372 @@ PREHOOK: Input: default@bucket_small
 PREHOOK: Input: default@bucket_small@pri=1
 PREHOOK: Input: default@bucket_small@pri=2
 PREHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: query: select 
-a.key , 
-a.value , 
-b.value , 
-'day1' as day, 
-1 as pri 
-from 
-( 
-select 
-key, 
-value 
-from bucket_big where day='day1'
-) a 
-left outer join 
-( 
-select 
-key, 
-value
-from bucket_small 
-where pri between 1 and 2
-) b 
-on 
+POSTHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+  left outer join
+  ( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@day=day1/pri=1
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@pri=1
+POSTHOOK: Input: default@bucket_small@pri=2
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+374    val_374 val_374 day1    1
+374    val_374 val_374 day1    1
+PREHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 as 
pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 
as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_big
+                  Statistics: Num rows: 5 Data size: 46 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 46 Basic stats: 
COMPLETE Column stats: NONE
+                    Sorted Merge Bucket Map Join Operator
+                      condition map:
+                           Left Outer Join 0 to 1
+                      keys:
+                        0 _col0 (type: bigint)
+                        1 _col0 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col3
+                      Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: bigint), _col1 (type: 
string), _col3 (type: string), 'day1' (type: string), 1 (type: int)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                        Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 259 Data size: 2468 Basic 
stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@day=day1/pri=1
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@pri=1
+PREHOOK: Input: default@bucket_small@pri=2
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@day=day1/pri=1
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@pri=1
+POSTHOOK: Input: default@bucket_small@pri=2
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+374    val_374 val_374 day1    1
+374    val_374 val_374 day1    1
+PREHOOK: query: drop table bucket_big
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@bucket_big
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: drop table bucket_big
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Output: default@bucket_big
+PREHOOK: query: drop table bucket_small
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@bucket_small
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: drop table bucket_small
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Output: default@bucket_small
+PREHOOK: query: CREATE TABLE bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (day STRING, pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: CREATE TABLE bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (day STRING, pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucket_big
+PREHOOK: query: CREATE TABLE bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (pri bigint)
+clustered by (key) sorted by (key) into 24 buckets
+stored as RCFile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: CREATE TABLE bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (pri bigint)
+clustered by (key) sorted by (key) into 24 buckets
+stored as RCFile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucket_small
+PREHOOK: query: insert overwrite table bucket_small partition(pri)
+select
+key,
+value,
+file_tag as pri
+from
+stage_bucket_small
+where file_tag between 1 and 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stage_bucket_small
+PREHOOK: Input: default@stage_bucket_small@file_tag=1
+PREHOOK: Input: default@stage_bucket_small@file_tag=2
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: insert overwrite table bucket_small partition(pri)
+select
+key,
+value,
+file_tag as pri
+from
+stage_bucket_small
+where file_tag between 1 and 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stage_bucket_small
+POSTHOOK: Input: default@stage_bucket_small@file_tag=1
+POSTHOOK: Input: default@stage_bucket_small@file_tag=2
+POSTHOOK: Output: default@bucket_small@pri=1
+POSTHOOK: Output: default@bucket_small@pri=2
+POSTHOOK: Lineage: bucket_small PARTITION(pri=1).key SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:key, type:bigint, 
comment:null), ]
+POSTHOOK: Lineage: bucket_small PARTITION(pri=1).value SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:value, type:string, 
comment:null), ]
+POSTHOOK: Lineage: bucket_small PARTITION(pri=2).key SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:key, type:bigint, 
comment:null), ]
+POSTHOOK: Lineage: bucket_small PARTITION(pri=2).value SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:value, type:string, 
comment:null), ]
+PREHOOK: query: insert overwrite table bucket_big partition(day,pri)
+select key, value, 'day1' as day, 1 as pri
+from stage_bucket_big
+where file_tag='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stage_bucket_big
+PREHOOK: Input: default@stage_bucket_big@file_tag=1
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: insert overwrite table bucket_big partition(day,pri)
+select key, value, 'day1' as day, 1 as pri
+from stage_bucket_big
+where file_tag='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stage_bucket_big
+POSTHOOK: Input: default@stage_bucket_big@file_tag=1
+POSTHOOK: Output: default@bucket_big@day=day1/pri=1
+POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).key SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:key, type:bigint, 
comment:null), ]
+POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).value SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:value, type:string, 
comment:null), ]
+PREHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 as 
pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+        left outer join
+        ( select key, value
+          from bucket_small
+          where pri between 1 and 2 ) b
+        on
+        (a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 
as pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+        left outer join
+        ( select key, value
+          from bucket_small
+          where pri between 1 and 2 ) b
+        on
+        (a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_big
+                  Statistics: Num rows: 5 Data size: 46 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 46 Basic stats: 
COMPLETE Column stats: NONE
+                    Sorted Merge Bucket Map Join Operator
+                      condition map:
+                           Left Outer Join 0 to 1
+                      keys:
+                        0 _col0 (type: bigint)
+                        1 _col0 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col3
+                      Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: bigint), _col1 (type: 
string), _col3 (type: string), 'day1' (type: string), 1 (type: int)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                        Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 259 Data size: 2468 Basic 
stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@day=day1/pri=1
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@pri=1
+PREHOOK: Input: default@bucket_small@pri=2
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
 (a.key = b.key)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@bucket_big

http://git-wip-us.apache.org/repos/asf/hive/blob/9fdb601e/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out_spark
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out_spark 
b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out_spark
index 91408df..6aa60ba 100644
--- 
a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out_spark
+++ 
b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_16.q.out_spark
@@ -136,58 +136,124 @@ POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@stage_bucket_big
 POSTHOOK: Output: default@stage_bucket_big@file_tag=1
-PREHOOK: query: insert overwrite table bucket_big partition(day,pri) 
-select 
-key, 
-value, 
-'day1' as day, 
-1 as pri 
-from 
-stage_bucket_big 
-where 
-file_tag='1'
+PREHOOK: query: insert overwrite table bucket_big partition(day,pri)
+ select key, value, 'day1' as day, 1 as pri
+   from stage_bucket_big
+   where file_tag='1'
 PREHOOK: type: QUERY
 PREHOOK: Input: default@stage_bucket_big
 PREHOOK: Input: default@stage_bucket_big@file_tag=1
 PREHOOK: Output: default@bucket_big
-POSTHOOK: query: insert overwrite table bucket_big partition(day,pri) 
-select 
-key, 
-value, 
-'day1' as day, 
-1 as pri 
-from 
-stage_bucket_big 
-where 
-file_tag='1'
+POSTHOOK: query: insert overwrite table bucket_big partition(day,pri)
+ select key, value, 'day1' as day, 1 as pri
+   from stage_bucket_big
+   where file_tag='1'
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@stage_bucket_big
 POSTHOOK: Input: default@stage_bucket_big@file_tag=1
 POSTHOOK: Output: default@bucket_big@day=day1/pri=1
 POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).key SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:key, type:bigint, 
comment:null), ]
 POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).value SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:value, type:string, 
comment:null), ]
-PREHOOK: query: select 
-a.key , 
-a.value , 
-b.value , 
-'day1' as day, 
-1 as pri 
-from 
-( 
-select 
-key, 
-value 
-from bucket_big where day='day1'
-) a 
-left outer join 
-( 
-select 
-key, 
-value
-from bucket_small 
-where pri between 1 and 2
-) b 
-on 
+PREHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 as 
pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+          left outer join
+          ( select key, value
+            from bucket_small
+            where pri between 1 and 2 ) b
+            on
+          (a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 
as pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+          left outer join
+          ( select key, value
+            from bucket_small
+            where pri between 1 and 2 ) b
+            on
+          (a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL 
SORT, 2)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_big
+                  Statistics: Num rows: 5 Data size: 46 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 46 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 5 Data size: 46 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_small
+                  Statistics: Num rows: 236 Data size: 2244 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 236 Data size: 2244 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 236 Data size: 2244 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), _col1 (type: string), 
_col3 (type: string), 'day1' (type: string), 1 (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+  left outer join
+  ( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
 (a.key = b.key)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@bucket_big
@@ -196,28 +262,372 @@ PREHOOK: Input: default@bucket_small
 PREHOOK: Input: default@bucket_small@pri=1
 PREHOOK: Input: default@bucket_small@pri=2
 #### A masked pattern was here ####
-POSTHOOK: query: select 
-a.key , 
-a.value , 
-b.value , 
-'day1' as day, 
-1 as pri 
-from 
-( 
-select 
-key, 
-value 
-from bucket_big where day='day1'
-) a 
-left outer join 
-( 
-select 
-key, 
-value
-from bucket_small 
-where pri between 1 and 2
-) b 
-on 
+POSTHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+  left outer join
+  ( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@day=day1/pri=1
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@pri=1
+POSTHOOK: Input: default@bucket_small@pri=2
+#### A masked pattern was here ####
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+374    val_374 val_374 day1    1
+374    val_374 val_374 day1    1
+PREHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 as 
pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 
as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_big
+                  Statistics: Num rows: 5 Data size: 46 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 46 Basic stats: 
COMPLETE Column stats: NONE
+                    Sorted Merge Bucket Map Join Operator
+                      condition map:
+                           Left Outer Join 0 to 1
+                      keys:
+                        0 _col0 (type: bigint)
+                        1 _col0 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col3
+                      Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: bigint), _col1 (type: 
string), _col3 (type: string), 'day1' (type: string), 1 (type: int)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                        Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 259 Data size: 2468 Basic 
stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@day=day1/pri=1
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@pri=1
+PREHOOK: Input: default@bucket_small@pri=2
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Input: default@bucket_big@day=day1/pri=1
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Input: default@bucket_small@pri=1
+POSTHOOK: Input: default@bucket_small@pri=2
+#### A masked pattern was here ####
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+0      val_0   val_0   day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+103    val_103 val_103 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+169    val_169 val_169 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+172    val_172 val_172 day1    1
+374    val_374 val_374 day1    1
+374    val_374 val_374 day1    1
+PREHOOK: query: drop table bucket_big
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@bucket_big
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: drop table bucket_big
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@bucket_big
+POSTHOOK: Output: default@bucket_big
+PREHOOK: query: drop table bucket_small
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@bucket_small
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: drop table bucket_small
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@bucket_small
+POSTHOOK: Output: default@bucket_small
+PREHOOK: query: CREATE TABLE bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (day STRING, pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: CREATE TABLE bucket_big
+(
+key BIGINT,
+value STRING
+)
+PARTITIONED BY (day STRING, pri bigint)
+clustered by (key) sorted by (key) into 12 buckets
+stored as RCFile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucket_big
+PREHOOK: query: CREATE TABLE bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (pri bigint)
+clustered by (key) sorted by (key) into 24 buckets
+stored as RCFile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: CREATE TABLE bucket_small
+(
+key BIGINT,
+value string
+)
+PARTITIONED BY (pri bigint)
+clustered by (key) sorted by (key) into 24 buckets
+stored as RCFile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucket_small
+PREHOOK: query: insert overwrite table bucket_small partition(pri)
+select
+key,
+value,
+file_tag as pri
+from
+stage_bucket_small
+where file_tag between 1 and 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stage_bucket_small
+PREHOOK: Input: default@stage_bucket_small@file_tag=1
+PREHOOK: Input: default@stage_bucket_small@file_tag=2
+PREHOOK: Output: default@bucket_small
+POSTHOOK: query: insert overwrite table bucket_small partition(pri)
+select
+key,
+value,
+file_tag as pri
+from
+stage_bucket_small
+where file_tag between 1 and 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stage_bucket_small
+POSTHOOK: Input: default@stage_bucket_small@file_tag=1
+POSTHOOK: Input: default@stage_bucket_small@file_tag=2
+POSTHOOK: Output: default@bucket_small@pri=1
+POSTHOOK: Output: default@bucket_small@pri=2
+POSTHOOK: Lineage: bucket_small PARTITION(pri=1).key SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:key, type:bigint, 
comment:null), ]
+POSTHOOK: Lineage: bucket_small PARTITION(pri=1).value SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:value, type:string, 
comment:null), ]
+POSTHOOK: Lineage: bucket_small PARTITION(pri=2).key SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:key, type:bigint, 
comment:null), ]
+POSTHOOK: Lineage: bucket_small PARTITION(pri=2).value SIMPLE 
[(stage_bucket_small)stage_bucket_small.FieldSchema(name:value, type:string, 
comment:null), ]
+PREHOOK: query: insert overwrite table bucket_big partition(day,pri)
+select key, value, 'day1' as day, 1 as pri
+from stage_bucket_big
+where file_tag='1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@stage_bucket_big
+PREHOOK: Input: default@stage_bucket_big@file_tag=1
+PREHOOK: Output: default@bucket_big
+POSTHOOK: query: insert overwrite table bucket_big partition(day,pri)
+select key, value, 'day1' as day, 1 as pri
+from stage_bucket_big
+where file_tag='1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@stage_bucket_big
+POSTHOOK: Input: default@stage_bucket_big@file_tag=1
+POSTHOOK: Output: default@bucket_big@day=day1/pri=1
+POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).key SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:key, type:bigint, 
comment:null), ]
+POSTHOOK: Lineage: bucket_big PARTITION(day=day1,pri=1).value SIMPLE 
[(stage_bucket_big)stage_bucket_big.FieldSchema(name:value, type:string, 
comment:null), ]
+PREHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 as 
pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+        left outer join
+        ( select key, value
+          from bucket_small
+          where pri between 1 and 2 ) b
+        on
+        (a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.key , a.value , b.value , 'day1' as day, 1 
as pri
+        from
+        ( select key, value
+          from bucket_big where day='day1' ) a
+        left outer join
+        ( select key, value
+          from bucket_small
+          where pri between 1 and 2 ) b
+        on
+        (a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: bucket_big
+                  Statistics: Num rows: 5 Data size: 46 Basic stats: COMPLETE 
Column stats: NONE
+                  Select Operator
+                    expressions: key (type: bigint), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 5 Data size: 46 Basic stats: 
COMPLETE Column stats: NONE
+                    Sorted Merge Bucket Map Join Operator
+                      condition map:
+                           Left Outer Join 0 to 1
+                      keys:
+                        0 _col0 (type: bigint)
+                        1 _col0 (type: bigint)
+                      outputColumnNames: _col0, _col1, _col3
+                      Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: bigint), _col1 (type: 
string), _col3 (type: string), 'day1' (type: string), 1 (type: int)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                        Statistics: Num rows: 259 Data size: 2468 Basic stats: 
COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 259 Data size: 2468 Basic 
stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
+(a.key = b.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@bucket_big
+PREHOOK: Input: default@bucket_big@day=day1/pri=1
+PREHOOK: Input: default@bucket_small
+PREHOOK: Input: default@bucket_small@pri=1
+PREHOOK: Input: default@bucket_small@pri=2
+#### A masked pattern was here ####
+POSTHOOK: query: select a.key , a.value , b.value , 'day1' as day, 1 as pri
+from
+( select key, value
+  from bucket_big where day='day1' ) a
+left outer join
+( select key, value
+  from bucket_small
+  where pri between 1 and 2 ) b
+on
 (a.key = b.key)
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@bucket_big

Reply via email to