Repository: hive
Updated Branches:
  refs/heads/hive-14535 6cba3e7e4 -> f883d67e8


HIVE-16051 : MM tables: skewjoin test fails (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f883d67e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f883d67e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f883d67e

Branch: refs/heads/hive-14535
Commit: f883d67e8d1d09fada761756ad81287f56e21981
Parents: 6cba3e7
Author: Sergey Shelukhin <[email protected]>
Authored: Tue Feb 28 17:58:46 2017 -0800
Committer: Sergey Shelukhin <[email protected]>
Committed: Tue Feb 28 17:58:46 2017 -0800

----------------------------------------------------------------------
 .../java/org/apache/hadoop/hive/ql/Driver.java  |   1 -
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |  21 +++-
 .../apache/hadoop/hive/ql/metadata/Hive.java    |   3 +-
 .../hive/ql/optimizer/GenMapRedUtils.java       |   6 +-
 .../ql/optimizer/physical/SkewJoinResolver.java |  27 ++++-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   4 +-
 .../hadoop/hive/ql/plan/LoadFileDesc.java       |  16 ++-
 ...TestGenMapRedUtilsCreateConditionalTask.java |  17 ++-
 ql/src/test/queries/clientpositive/mm_all.q     |  12 +-
 .../results/clientpositive/llap/mm_all.q.out    | 117 +++++++++----------
 ql/src/test/results/clientpositive/mm_all.q.out |  53 +++++++--
 11 files changed, 181 insertions(+), 96 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index f01c3d5..4b2f0d8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -1876,7 +1876,6 @@ public class Driver implements CommandProcessor {
       for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
         // This should never happen, if it does, it's a bug with the potential 
to produce
         // incorrect results.
-        LOG.error("TODO# running " + tsk);
         assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
         driverCxt.addToRunnable(tsk);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 37c3a96..140ac29 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -326,13 +326,28 @@ public class FileSinkOperator extends 
TerminalOperator<FileSinkDesc> implements
             // MM tables don't support concat so we don't expect the merge of 
merged files.
             subdirPath += ".merged";
           }
+          Path finalPath = null;
           if (!bDynParts && !isSkewedStoredAsSubDirectories) {
-            finalPaths[filesIdx] = getFinalPath(subdirPath, specPath, 
extension);
+            finalPath = getFinalPath(subdirPath, specPath, extension);
           } else {
             // Note: tmpPath here has the correct partition key
-            finalPaths[filesIdx] = getFinalPath(subdirPath, tmpPath, 
extension);
+            finalPath = getFinalPath(subdirPath, tmpPath, extension);
           }
-          outPaths[filesIdx] = finalPaths[filesIdx];
+          // In the cases that have multi-stage insert, e.g. a 
"hive.skewjoin.key"-based skew join,
+          // it can happen that we want multiple commits into the same 
directory from different
+          // tasks (not just task instances). In non-MM case, 
Utilities.renameOrMoveFiles ensures
+          // unique names. We could do the same here, but this will still 
cause the old file to be
+          // deleted because it has not been committed in /this/ FSOP. We are 
going to fail to be
+          // safe. Potentially, we could implement some partial commit between 
stages, if this
+          // affects some less obscure scenario.
+          try {
+            FileSystem fpfs = finalPath.getFileSystem(hconf);
+            if (fpfs.exists(finalPath)) throw new RuntimeException(finalPath + 
" already exists");
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+          finalPaths[filesIdx] = finalPath;
+          outPaths[filesIdx] = finalPath;
         }
         if (isInfoEnabled) {
           LOG.info("Final Path: FS " + finalPaths[filesIdx]);

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 2e157ad..ea87cb4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2172,7 +2172,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
         destPath = new Path(destPath, 
ValidWriteIds.getMmFilePrefix(mmWriteId));
         filter = replace ? new ValidWriteIds.IdPathFilter(mmWriteId, false, 
true) : filter;
       }
-      Utilities.LOG14535.info("moving " + loadPath + " to " + tblPath);
+      Utilities.LOG14535.info("moving " + loadPath + " to " + tblPath + " 
(replace = " + replace + ")");
       if (replace) {
         replaceFiles(tblPath, loadPath, destPath, tblPath,
             sessionConf, isSrcLocal, filter, mmWriteId != null);
@@ -3104,7 +3104,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
               SessionState.setCurrentSessionState(parentSession);
 
               Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, 
isSrcLocal, isRenameAllowed);
-
               if (inheritPerms) {
                 HdfsUtils.setFullFileStatus(conf, fullDestStatus, srcGroup, 
destFs, destPath, false);
               }

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index a777475..5334ddc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -1367,11 +1367,11 @@ public final class GenMapRedUtils {
       // Only create the movework for non-MM table. No action needed for a MM 
table.
       Utilities.LOG14535.info("creating dummy movetask for merge (with lfd)");
       dummyMv = new MoveWork(null, null, null,
-         new LoadFileDesc(inputDirName, finalName, true, null, null), false);
+         new LoadFileDesc(inputDirName, finalName, true, null, null, false), 
false);
     } else {
-      // TODO# create the noop MoveWork to avoid q file changes for now. else 
should be removed.
+      // TODO# create the noop MoveWork to avoid q file changes for now. 
Should be removed.
       dummyMv = new MoveWork(null, null, null,
-          new LoadFileDesc(inputDirName, finalName, true, null, null), false);
+          new LoadFileDesc(inputDirName, finalName, true, null, null, false), 
false);
       dummyMv.setNoop(true);
     }
     // Use the original fsOp path here in case of MM - while the new FSOP 
merges files inside the

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java
index f48d118..64db005 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java
@@ -38,7 +38,10 @@ import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
+import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.plan.MapredWork;
+import org.slf4j.Logger;
 
 /**
  * An implementation of PhysicalPlanResolver. It iterator each task with a rule
@@ -46,6 +49,8 @@ import org.apache.hadoop.hive.ql.plan.MapredWork;
  * it will try to add a conditional task associated a list of skew join tasks.
  */
 public class SkewJoinResolver implements PhysicalPlanResolver {
+  private final static Logger LOG = 
org.slf4j.LoggerFactory.getLogger(SkewJoinResolver.class);
+
   @Override
   public PhysicalContext resolve(PhysicalContext pctx) throws 
SemanticException {
     Dispatcher disp = new SkewJoinTaskDispatcher(pctx);
@@ -78,8 +83,26 @@ public class SkewJoinResolver implements 
PhysicalPlanResolver {
         return null;
       }
 
-      SkewJoinProcCtx skewJoinProcContext = new SkewJoinProcCtx(task,
-          physicalContext.getParseContext());
+      ParseContext pc = physicalContext.getParseContext();
+      if (pc.getLoadTableWork() != null) {
+        for (LoadTableDesc ltd : pc.getLoadTableWork()) {
+          if (ltd.getMmWriteId() == null) continue;
+          // See the path in FSOP that calls fs.exists on finalPath.
+          LOG.debug("Not using skew join because the destination table "
+              + ltd.getTable().getTableName() + " is an insert_only table");
+          return null;
+        }
+      }
+      if (pc.getLoadFileWork() != null) {
+        for (LoadFileDesc lfd : pc.getLoadFileWork()) {
+          if (!lfd.isMmCtas()) continue;
+          LOG.debug("Not using skew join because the destination table "
+              + lfd.getDestinationCreateTable() + " is an insert_only table");
+          return null;
+        }
+      }
+
+      SkewJoinProcCtx skewJoinProcContext = new SkewJoinProcCtx(task, pc);
 
       Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, 
NodeProcessor>();
       opRules.put(new RuleRegExp("R1",

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 2d1d47f..015d0ee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -6900,9 +6900,9 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       }
 
       boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE);
-      // Create LFD even for MM CTAS - it's a no-op move, but it still seems 
to be uses for stats.
+      // Create LFD even for MM CTAS - it's a no-op move, but it still seems 
to be used for stats.
       loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc,
-          queryTmpdir, dest_path, isDfsDir, cols, colTypes));
+          queryTmpdir, dest_path, isDfsDir, cols, colTypes, isMmCtas));
 
       if (tblDesc == null) {
         if (viewDesc != null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java
index b06177e..6fad710 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java
@@ -36,6 +36,7 @@ public class LoadFileDesc extends LoadDesc implements 
Serializable {
   private String columns;
   private String columnTypes;
   private String destinationCreateTable;
+  private boolean isMmCtas;
 
   public LoadFileDesc() {
   }
@@ -48,12 +49,13 @@ public class LoadFileDesc extends LoadDesc implements 
Serializable {
     this.columns = o.columns;
     this.columnTypes = o.columnTypes;
     this.destinationCreateTable = o.destinationCreateTable;
+    this.isMmCtas = o.isMmCtas;
   }
 
   public LoadFileDesc(final CreateTableDesc createTableDesc, final 
CreateViewDesc  createViewDesc,
                       final Path sourcePath, final Path targetDir, final 
boolean isDfsDir,
-                      final String columns, final String columnTypes) {
-    this(sourcePath, targetDir, isDfsDir, columns, columnTypes);
+                      final String columns, final String columnTypes, boolean 
isMmCtas) {
+    this(sourcePath, targetDir, isDfsDir, columns, columnTypes, isMmCtas);
     if (createTableDesc != null && createTableDesc.getDatabaseName() != null
         && createTableDesc.getTableName() != null) {
       destinationCreateTable = (createTableDesc.getTableName().contains(".") ? 
"" : createTableDesc
@@ -66,15 +68,15 @@ public class LoadFileDesc extends LoadDesc implements 
Serializable {
     }
   }
 
-  public LoadFileDesc(final Path sourcePath, final Path targetDir,
-      final boolean isDfsDir, final String columns, final String columnTypes) {
-
+  public LoadFileDesc(final Path sourcePath, final Path targetDir, final 
boolean isDfsDir,
+      final String columns, final String columnTypes, boolean isMmCtas) {
     super(sourcePath);
     Utilities.LOG14535.info("creating LFD from " + sourcePath + " to " + 
targetDir);
     this.targetDir = targetDir;
     this.isDfsDir = isDfsDir;
     this.columns = columns;
     this.columnTypes = columnTypes;
+    this.isMmCtas = isMmCtas;
   }
 
   @Explain(displayName = "destination")
@@ -131,4 +133,8 @@ public class LoadFileDesc extends LoadDesc implements 
Serializable {
   public String getDestinationCreateTable(){
     return destinationCreateTable;
   }
+
+  public boolean isMmCtas() {
+    return isMmCtas;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
index 7bc9073..5e0fa34 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java
@@ -83,17 +83,20 @@ public class TestGenMapRedUtilsCreateConditionalTask {
         GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, 
condOutputPath, mockWork));
 
     reset(mockWork);
-    when(mockWork.getLoadFileWork()).thenReturn(new 
LoadFileDesc(condInputPath, condOutputPath, false, "", ""));
+    when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(
+        condInputPath, condOutputPath, false, "", "", false));
     assertFalse("Merging paths is not allowed when both conditional output 
path is not equals to MoveWork input path.",
         GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, 
condOutputPath, mockWork));
 
     reset(mockWork);
-    when(mockWork.getLoadFileWork()).thenReturn(new 
LoadFileDesc(condOutputPath, new Path("unused"), false, "", ""));
+    when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(
+        condOutputPath, new Path("unused"), false, "", "", false));
     assertFalse("Merging paths is not allowed when conditional input path is 
not a BlobStore path.",
         GenMapRedUtils.shouldMergeMovePaths(hiveConf, new 
Path("hdfs://hdfs-path"), condOutputPath, mockWork));
 
     reset(mockWork);
-    when(mockWork.getLoadFileWork()).thenReturn(new 
LoadFileDesc(condOutputPath, new Path("hdfs://hdfs-path"), false, "", ""));
+    when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(
+        condOutputPath, new Path("hdfs://hdfs-path"), false, "", "", false));
     assertFalse("Merging paths is not allowed when MoveWork output path is not 
a BlobStore path.",
         GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, 
condOutputPath, mockWork));
   }
@@ -105,7 +108,8 @@ public class TestGenMapRedUtilsCreateConditionalTask {
     final Path targetMoveWorkPath = new 
Path("s3a://bucket/scratch/-ext-10003");
     final MoveWork mockWork = mock(MoveWork.class);
 
-    when(mockWork.getLoadFileWork()).thenReturn(new 
LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", ""));
+    when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(
+        condOutputPath, targetMoveWorkPath, false, "", "", false));
 
     assertTrue("Merging BlobStore paths should be allowed.",
         GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, 
condOutputPath, mockWork));
@@ -129,7 +133,8 @@ public class TestGenMapRedUtilsCreateConditionalTask {
     MoveWork newWork;
 
     // test using loadFileWork
-    when(mockWork.getLoadFileWork()).thenReturn(new 
LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", ""));
+    when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(
+        condOutputPath, targetMoveWorkPath, false, "", "", false));
     newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork);
     assertNotNull(newWork);
     assertNotEquals(newWork, mockWork);
@@ -275,7 +280,7 @@ public class TestGenMapRedUtilsCreateConditionalTask {
   private Task<MoveWork> createMoveTask(Path source, Path destination) {
     Task<MoveWork> moveTask = mock(MoveTask.class);
     MoveWork moveWork = new MoveWork();
-    moveWork.setLoadFileWork(new LoadFileDesc(source, destination, true, null, 
null));
+    moveWork.setLoadFileWork(new LoadFileDesc(source, destination, true, null, 
null, false));
 
     when(moveTask.getWork()).thenReturn(moveWork);
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/test/queries/clientpositive/mm_all.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mm_all.q 
b/ql/src/test/queries/clientpositive/mm_all.q
index f9a829c..db16920 100644
--- a/ql/src/test/queries/clientpositive/mm_all.q
+++ b/ql/src/test/queries/clientpositive/mm_all.q
@@ -437,12 +437,20 @@ desc formatted stats2_mm;
 drop table stats2_mm;
 
 
+set hive.optimize.skewjoin=true;
+set hive.skewjoin.key=2;
+set hive.optimize.metadataonly=false;
 
-drop table intermediate;
+CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE 
tblproperties ("transactional"="true", 
"transactional_properties"="insert_only");
+FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE 
skewjoin_mm SELECT src1.key, src2.value;
+select count(distinct key) from skewjoin_mm;
+drop table skewjoin_mm;
+
+set hive.optimize.skewjoin=false;
 
 
 
+drop table intermediate;
 
 
 
-drop table intermediate;

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/test/results/clientpositive/llap/mm_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out 
b/ql/src/test/results/clientpositive/llap/mm_all.q.out
index 7a11301..4bb480a 100644
--- a/ql/src/test/results/clientpositive/llap/mm_all.q.out
+++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out
@@ -1,8 +1,6 @@
-PREHOOK: query: -- Force multiple writers when reading
-drop table intermediate
+PREHOOK: query: drop table intermediate
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- Force multiple writers when reading
-drop table intermediate
+POSTHOOK: query: drop table intermediate
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: create table intermediate(key int) partitioned by (p int) 
stored as orc
 PREHOOK: type: CREATETABLE
@@ -180,15 +178,13 @@ POSTHOOK: Input: default@part_mm@key_mm=456
 103    455
 103    455
 103    456
-PREHOOK: query: -- TODO: doesn't work truncate table part_mm 
partition(key_mm=455);
-select * from part_mm order by key, key_mm
+PREHOOK: query: select * from part_mm order by key, key_mm
 PREHOOK: type: QUERY
 PREHOOK: Input: default@part_mm
 PREHOOK: Input: default@part_mm@key_mm=455
 PREHOOK: Input: default@part_mm@key_mm=456
 #### A masked pattern was here ####
-POSTHOOK: query: -- TODO: doesn't work truncate table part_mm 
partition(key_mm=455);
-select * from part_mm order by key, key_mm
+POSTHOOK: query: select * from part_mm order by key, key_mm
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@part_mm
 POSTHOOK: Input: default@part_mm@key_mm=455
@@ -353,11 +349,9 @@ POSTHOOK: query: drop table simple_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@simple_mm
 POSTHOOK: Output: default@simple_mm
-PREHOOK: query: -- simple DP (no bucketing)
-drop table dp_mm
+PREHOOK: query: drop table dp_mm
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- simple DP (no bucketing)
-drop table dp_mm
+POSTHOOK: query: drop table dp_mm
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 
int) stored as orc
   tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
@@ -428,15 +422,11 @@ POSTHOOK: query: drop table dp_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@dp_mm
 POSTHOOK: Output: default@dp_mm
-PREHOOK: query: -- union
-
-create table union_mm(id int)  tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+PREHOOK: query: create table union_mm(id int)  tblproperties 
("transactional"="true", "transactional_properties"="insert_only")
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@union_mm
-POSTHOOK: query: -- union
-
-create table union_mm(id int)  tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+POSTHOOK: query: create table union_mm(id int)  tblproperties 
("transactional"="true", "transactional_properties"="insert_only")
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@union_mm
@@ -1221,15 +1211,9 @@ POSTHOOK: query: drop table merge1_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@merge1_mm
 POSTHOOK: Output: default@merge1_mm
-PREHOOK: query: -- TODO: need to include merge+union+DP, but it's broken for 
now
-
-
-drop table ctas0_mm
+PREHOOK: query: drop table ctas0_mm
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- TODO: need to include merge+union+DP, but it's broken for 
now
-
-
-drop table ctas0_mm
+POSTHOOK: query: drop table ctas0_mm
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", 
"transactional_properties"="insert_only") as select * from intermediate
 PREHOOK: type: CREATETABLE_AS_SELECT
@@ -2137,13 +2121,9 @@ POSTHOOK: query: drop table intermmediate_nonpart
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@intermmediate_nonpart
 POSTHOOK: Output: default@intermmediate_nonpart
-PREHOOK: query: -- non-MM export to MM table, with and without partitions
-
-drop table import0_mm
+PREHOOK: query: drop table import0_mm
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- non-MM export to MM table, with and without partitions
-
-drop table import0_mm
+POSTHOOK: query: drop table import0_mm
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: create table import0_mm(key int, p int) 
tblproperties("transactional"="true", "transactional_properties"="insert_only")
 PREHOOK: type: CREATETABLE
@@ -2236,13 +2216,9 @@ POSTHOOK: query: drop table import1_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@import1_mm
 POSTHOOK: Output: default@import1_mm
-PREHOOK: query: -- MM export into new MM table, non-part and part
-
-drop table import2_mm
+PREHOOK: query: drop table import2_mm
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- MM export into new MM table, non-part and part
-
-drop table import2_mm
+POSTHOOK: query: drop table import2_mm
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: import table import2_mm from 
'ql/test/data/exports/intermmediate_nonpart'
 PREHOOK: type: IMPORT
@@ -2340,13 +2316,9 @@ POSTHOOK: query: drop table import3_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@import3_mm
 POSTHOOK: Output: default@import3_mm
-PREHOOK: query: -- MM export into existing MM table, non-part and partial part
-
-drop table import4_mm
+PREHOOK: query: drop table import4_mm
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- MM export into existing MM table, non-part and partial part
-
-drop table import4_mm
+POSTHOOK: query: drop table import4_mm
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: create table import4_mm(key int, p int) 
tblproperties("transactional"="true", "transactional_properties"="insert_only")
 PREHOOK: type: CREATETABLE
@@ -2427,13 +2399,9 @@ POSTHOOK: query: drop table import5_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@import5_mm
 POSTHOOK: Output: default@import5_mm
-PREHOOK: query: -- MM export into existing non-MM table, non-part and part
-
-drop table import6_mm
+PREHOOK: query: drop table import6_mm
 PREHOOK: type: DROPTABLE
-POSTHOOK: query: -- MM export into existing non-MM table, non-part and part
-
-drop table import6_mm
+POSTHOOK: query: drop table import6_mm
 POSTHOOK: type: DROPTABLE
 PREHOOK: query: create table import6_mm(key int, p int)
 PREHOOK: type: CREATETABLE
@@ -2897,10 +2865,10 @@ POSTHOOK: Input: default@intermediate@p=455
 POSTHOOK: Input: default@intermediate@p=456
 POSTHOOK: Input: default@intermediate@p=457
 POSTHOOK: Output: default@multi1_mm@p=1
-POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE 
[(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
-POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE 
[(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
+POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE 
[(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
+POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
 PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p
 PREHOOK: type: QUERY
 PREHOOK: Input: default@multi1_mm
@@ -3111,7 +3079,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT
 POSTHOOK: Input: default@src
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@stats2_mm
-POSTHOOK: Lineage: stats2_mm.c0 EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src)src.FieldSchema(name:value, type:string, 
comment:default), ]
+POSTHOOK: Lineage: stats2_mm._c0 EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), (src)src.FieldSchema(name:value, type:string, 
comment:default), ]
 PREHOOK: query: desc formatted stats2_mm
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@stats2_mm
@@ -3120,7 +3088,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@stats2_mm
 # col_name             data_type               comment             
                 
-c0                     array<string>                               
+_c0                    array<string>                               
                 
 # Detailed Table Information            
 Database:              default                  
@@ -3130,7 +3098,7 @@ Retention:                0
 Table Type:            MANAGED_TABLE            
 Table Parameters:               
        COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
-       numFiles                43                  
+       numFiles                55                  
        numRows                 500                 
        rawDataSize             5312                
        totalSize               5812                
@@ -3156,6 +3124,41 @@ POSTHOOK: query: drop table stats2_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@stats2_mm
 POSTHOOK: Output: default@stats2_mm
+PREHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS 
TEXTFILE tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skewjoin_mm
+POSTHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS 
TEXTFILE tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skewjoin_mm
+PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT 
OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@skewjoin_mm
+POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT 
OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@skewjoin_mm
+POSTHOOK: Lineage: skewjoin_mm.key EXPRESSION [(src)src1.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: skewjoin_mm.value SIMPLE [(src)src2.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: select count(distinct key) from skewjoin_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skewjoin_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key) from skewjoin_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skewjoin_mm
+#### A masked pattern was here ####
+309
+PREHOOK: query: drop table skewjoin_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skewjoin_mm
+PREHOOK: Output: default@skewjoin_mm
+POSTHOOK: query: drop table skewjoin_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skewjoin_mm
+POSTHOOK: Output: default@skewjoin_mm
 PREHOOK: query: drop table intermediate
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@intermediate
@@ -3164,7 +3167,3 @@ POSTHOOK: query: drop table intermediate
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@intermediate
 POSTHOOK: Output: default@intermediate
-PREHOOK: query: drop table intermediate
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table intermediate
-POSTHOOK: type: DROPTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/test/results/clientpositive/mm_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/mm_all.q.out 
b/ql/src/test/results/clientpositive/mm_all.q.out
index 4a9e4e5..26854bd 100644
--- a/ql/src/test/results/clientpositive/mm_all.q.out
+++ b/ql/src/test/results/clientpositive/mm_all.q.out
@@ -1165,12 +1165,12 @@ POSTHOOK: Input: default@merge1_mm@key=103
 POSTHOOK: Input: default@merge1_mm@key=97
 POSTHOOK: Input: default@merge1_mm@key=98
 #### A masked pattern was here ####
-100    100
 103    103
+100    100
 98     98
 97     97
-10     10
 0      0
+10     10
 PREHOOK: query: insert into table merge1_mm partition (key) select key, key 
from intermediate
 PREHOOK: type: QUERY
 PREHOOK: Input: default@intermediate
@@ -1216,18 +1216,18 @@ POSTHOOK: Input: default@merge1_mm@key=103
 POSTHOOK: Input: default@merge1_mm@key=97
 POSTHOOK: Input: default@merge1_mm@key=98
 #### A masked pattern was here ####
-103    103
 100    100
 103    103
 97     97
 100    100
+103    103
 97     97
 98     98
 98     98
-10     10
 0      0
-10     10
 0      0
+10     10
+10     10
 PREHOOK: query: drop table merge1_mm
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@merge1_mm
@@ -2890,10 +2890,10 @@ POSTHOOK: Input: default@intermediate@p=455
 POSTHOOK: Input: default@intermediate@p=456
 POSTHOOK: Input: default@intermediate@p=457
 POSTHOOK: Output: default@multi1_mm@p=1
-POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE 
[(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
 POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
-POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE 
[(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
+POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE 
[(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
+POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
 PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p
 PREHOOK: type: QUERY
 PREHOOK: Input: default@multi1_mm
@@ -3149,6 +3149,41 @@ POSTHOOK: query: drop table stats2_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@stats2_mm
 POSTHOOK: Output: default@stats2_mm
+PREHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS 
TEXTFILE tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@skewjoin_mm
+POSTHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS 
TEXTFILE tblproperties ("transactional"="true", 
"transactional_properties"="insert_only")
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@skewjoin_mm
+PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT 
OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@skewjoin_mm
+POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT 
OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@skewjoin_mm
+POSTHOOK: Lineage: skewjoin_mm.key EXPRESSION [(src)src1.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: skewjoin_mm.value SIMPLE [(src)src2.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: select count(distinct key) from skewjoin_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@skewjoin_mm
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key) from skewjoin_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@skewjoin_mm
+#### A masked pattern was here ####
+309
+PREHOOK: query: drop table skewjoin_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@skewjoin_mm
+PREHOOK: Output: default@skewjoin_mm
+POSTHOOK: query: drop table skewjoin_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@skewjoin_mm
+POSTHOOK: Output: default@skewjoin_mm
 PREHOOK: query: drop table intermediate
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@intermediate
@@ -3157,7 +3192,3 @@ POSTHOOK: query: drop table intermediate
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@intermediate
 POSTHOOK: Output: default@intermediate
-PREHOOK: query: drop table intermediate
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table intermediate
-POSTHOOK: type: DROPTABLE

Reply via email to