Repository: hive Updated Branches: refs/heads/hive-14535 6cba3e7e4 -> f883d67e8
HIVE-16051 : MM tables: skewjoin test fails (Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f883d67e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f883d67e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f883d67e Branch: refs/heads/hive-14535 Commit: f883d67e8d1d09fada761756ad81287f56e21981 Parents: 6cba3e7 Author: Sergey Shelukhin <[email protected]> Authored: Tue Feb 28 17:58:46 2017 -0800 Committer: Sergey Shelukhin <[email protected]> Committed: Tue Feb 28 17:58:46 2017 -0800 ---------------------------------------------------------------------- .../java/org/apache/hadoop/hive/ql/Driver.java | 1 - .../hadoop/hive/ql/exec/FileSinkOperator.java | 21 +++- .../apache/hadoop/hive/ql/metadata/Hive.java | 3 +- .../hive/ql/optimizer/GenMapRedUtils.java | 6 +- .../ql/optimizer/physical/SkewJoinResolver.java | 27 ++++- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 4 +- .../hadoop/hive/ql/plan/LoadFileDesc.java | 16 ++- ...TestGenMapRedUtilsCreateConditionalTask.java | 17 ++- ql/src/test/queries/clientpositive/mm_all.q | 12 +- .../results/clientpositive/llap/mm_all.q.out | 117 +++++++++---------- ql/src/test/results/clientpositive/mm_all.q.out | 53 +++++++-- 11 files changed, 181 insertions(+), 96 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/Driver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java index f01c3d5..4b2f0d8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -1876,7 +1876,6 @@ public class Driver implements CommandProcessor { for (Task<? extends Serializable> tsk : plan.getRootTasks()) { // This should never happen, if it does, it's a bug with the potential to produce // incorrect results. - LOG.error("TODO# running " + tsk); assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty(); driverCxt.addToRunnable(tsk); http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 37c3a96..140ac29 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -326,13 +326,28 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements // MM tables don't support concat so we don't expect the merge of merged files. subdirPath += ".merged"; } + Path finalPath = null; if (!bDynParts && !isSkewedStoredAsSubDirectories) { - finalPaths[filesIdx] = getFinalPath(subdirPath, specPath, extension); + finalPath = getFinalPath(subdirPath, specPath, extension); } else { // Note: tmpPath here has the correct partition key - finalPaths[filesIdx] = getFinalPath(subdirPath, tmpPath, extension); + finalPath = getFinalPath(subdirPath, tmpPath, extension); } - outPaths[filesIdx] = finalPaths[filesIdx]; + // In the cases that have multi-stage insert, e.g. a "hive.skewjoin.key"-based skew join, + // it can happen that we want multiple commits into the same directory from different + // tasks (not just task instances). In non-MM case, Utilities.renameOrMoveFiles ensures + // unique names. We could do the same here, but this will still cause the old file to be + // deleted because it has not been committed in /this/ FSOP. We are going to fail to be + // safe. Potentially, we could implement some partial commit between stages, if this + // affects some less obscure scenario. + try { + FileSystem fpfs = finalPath.getFileSystem(hconf); + if (fpfs.exists(finalPath)) throw new RuntimeException(finalPath + " already exists"); + } catch (IOException e) { + throw new RuntimeException(e); + } + finalPaths[filesIdx] = finalPath; + outPaths[filesIdx] = finalPath; } if (isInfoEnabled) { LOG.info("Final Path: FS " + finalPaths[filesIdx]); http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 2e157ad..ea87cb4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -2172,7 +2172,7 @@ private void constructOneLBLocationMap(FileStatus fSta, destPath = new Path(destPath, ValidWriteIds.getMmFilePrefix(mmWriteId)); filter = replace ? new ValidWriteIds.IdPathFilter(mmWriteId, false, true) : filter; } - Utilities.LOG14535.info("moving " + loadPath + " to " + tblPath); + Utilities.LOG14535.info("moving " + loadPath + " to " + tblPath + " (replace = " + replace + ")"); if (replace) { replaceFiles(tblPath, loadPath, destPath, tblPath, sessionConf, isSrcLocal, filter, mmWriteId != null); @@ -3104,7 +3104,6 @@ private void constructOneLBLocationMap(FileStatus fSta, SessionState.setCurrentSessionState(parentSession); Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isRenameAllowed); - if (inheritPerms) { HdfsUtils.setFullFileStatus(conf, fullDestStatus, srcGroup, destFs, destPath, false); } http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index a777475..5334ddc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -1367,11 +1367,11 @@ public final class GenMapRedUtils { // Only create the movework for non-MM table. No action needed for a MM table. Utilities.LOG14535.info("creating dummy movetask for merge (with lfd)"); dummyMv = new MoveWork(null, null, null, - new LoadFileDesc(inputDirName, finalName, true, null, null), false); + new LoadFileDesc(inputDirName, finalName, true, null, null, false), false); } else { - // TODO# create the noop MoveWork to avoid q file changes for now. else should be removed. + // TODO# create the noop MoveWork to avoid q file changes for now. Should be removed. dummyMv = new MoveWork(null, null, null, - new LoadFileDesc(inputDirName, finalName, true, null, null), false); + new LoadFileDesc(inputDirName, finalName, true, null, null, false), false); dummyMv.setNoop(true); } // Use the original fsOp path here in case of MM - while the new FSOP merges files inside the http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java index f48d118..64db005 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java @@ -38,7 +38,10 @@ import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.LoadFileDesc; +import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.slf4j.Logger; /** * An implementation of PhysicalPlanResolver. It iterator each task with a rule @@ -46,6 +49,8 @@ import org.apache.hadoop.hive.ql.plan.MapredWork; * it will try to add a conditional task associated a list of skew join tasks. */ public class SkewJoinResolver implements PhysicalPlanResolver { + private final static Logger LOG = org.slf4j.LoggerFactory.getLogger(SkewJoinResolver.class); + @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { Dispatcher disp = new SkewJoinTaskDispatcher(pctx); @@ -78,8 +83,26 @@ public class SkewJoinResolver implements PhysicalPlanResolver { return null; } - SkewJoinProcCtx skewJoinProcContext = new SkewJoinProcCtx(task, - physicalContext.getParseContext()); + ParseContext pc = physicalContext.getParseContext(); + if (pc.getLoadTableWork() != null) { + for (LoadTableDesc ltd : pc.getLoadTableWork()) { + if (ltd.getMmWriteId() == null) continue; + // See the path in FSOP that calls fs.exists on finalPath. + LOG.debug("Not using skew join because the destination table " + + ltd.getTable().getTableName() + " is an insert_only table"); + return null; + } + } + if (pc.getLoadFileWork() != null) { + for (LoadFileDesc lfd : pc.getLoadFileWork()) { + if (!lfd.isMmCtas()) continue; + LOG.debug("Not using skew join because the destination table " + + lfd.getDestinationCreateTable() + " is an insert_only table"); + return null; + } + } + + SkewJoinProcCtx skewJoinProcContext = new SkewJoinProcCtx(task, pc); Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 2d1d47f..015d0ee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6900,9 +6900,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } boolean isDfsDir = (dest_type.intValue() == QBMetaData.DEST_DFS_FILE); - // Create LFD even for MM CTAS - it's a no-op move, but it still seems to be uses for stats. + // Create LFD even for MM CTAS - it's a no-op move, but it still seems to be used for stats. loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, - queryTmpdir, dest_path, isDfsDir, cols, colTypes)); + queryTmpdir, dest_path, isDfsDir, cols, colTypes, isMmCtas)); if (tblDesc == null) { if (viewDesc != null) { http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java index b06177e..6fad710 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadFileDesc.java @@ -36,6 +36,7 @@ public class LoadFileDesc extends LoadDesc implements Serializable { private String columns; private String columnTypes; private String destinationCreateTable; + private boolean isMmCtas; public LoadFileDesc() { } @@ -48,12 +49,13 @@ public class LoadFileDesc extends LoadDesc implements Serializable { this.columns = o.columns; this.columnTypes = o.columnTypes; this.destinationCreateTable = o.destinationCreateTable; + this.isMmCtas = o.isMmCtas; } public LoadFileDesc(final CreateTableDesc createTableDesc, final CreateViewDesc createViewDesc, final Path sourcePath, final Path targetDir, final boolean isDfsDir, - final String columns, final String columnTypes) { - this(sourcePath, targetDir, isDfsDir, columns, columnTypes); + final String columns, final String columnTypes, boolean isMmCtas) { + this(sourcePath, targetDir, isDfsDir, columns, columnTypes, isMmCtas); if (createTableDesc != null && createTableDesc.getDatabaseName() != null && createTableDesc.getTableName() != null) { destinationCreateTable = (createTableDesc.getTableName().contains(".") ? "" : createTableDesc @@ -66,15 +68,15 @@ public class LoadFileDesc extends LoadDesc implements Serializable { } } - public LoadFileDesc(final Path sourcePath, final Path targetDir, - final boolean isDfsDir, final String columns, final String columnTypes) { - + public LoadFileDesc(final Path sourcePath, final Path targetDir, final boolean isDfsDir, + final String columns, final String columnTypes, boolean isMmCtas) { super(sourcePath); Utilities.LOG14535.info("creating LFD from " + sourcePath + " to " + targetDir); this.targetDir = targetDir; this.isDfsDir = isDfsDir; this.columns = columns; this.columnTypes = columnTypes; + this.isMmCtas = isMmCtas; } @Explain(displayName = "destination") @@ -131,4 +133,8 @@ public class LoadFileDesc extends LoadDesc implements Serializable { public String getDestinationCreateTable(){ return destinationCreateTable; } + + public boolean isMmCtas() { + return isMmCtas; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java index 7bc9073..5e0fa34 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java @@ -83,17 +83,20 @@ public class TestGenMapRedUtilsCreateConditionalTask { GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork)); reset(mockWork); - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condInputPath, condOutputPath, false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condInputPath, condOutputPath, false, "", "", false)); assertFalse("Merging paths is not allowed when both conditional output path is not equals to MoveWork input path.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork)); reset(mockWork); - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, new Path("unused"), false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condOutputPath, new Path("unused"), false, "", "", false)); assertFalse("Merging paths is not allowed when conditional input path is not a BlobStore path.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, new Path("hdfs://hdfs-path"), condOutputPath, mockWork)); reset(mockWork); - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, new Path("hdfs://hdfs-path"), false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condOutputPath, new Path("hdfs://hdfs-path"), false, "", "", false)); assertFalse("Merging paths is not allowed when MoveWork output path is not a BlobStore path.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork)); } @@ -105,7 +108,8 @@ public class TestGenMapRedUtilsCreateConditionalTask { final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003"); final MoveWork mockWork = mock(MoveWork.class); - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condOutputPath, targetMoveWorkPath, false, "", "", false)); assertTrue("Merging BlobStore paths should be allowed.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork)); @@ -129,7 +133,8 @@ public class TestGenMapRedUtilsCreateConditionalTask { MoveWork newWork; // test using loadFileWork - when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc(condOutputPath, targetMoveWorkPath, false, "", "")); + when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( + condOutputPath, targetMoveWorkPath, false, "", "", false)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork); assertNotNull(newWork); assertNotEquals(newWork, mockWork); @@ -275,7 +280,7 @@ public class TestGenMapRedUtilsCreateConditionalTask { private Task<MoveWork> createMoveTask(Path source, Path destination) { Task<MoveWork> moveTask = mock(MoveTask.class); MoveWork moveWork = new MoveWork(); - moveWork.setLoadFileWork(new LoadFileDesc(source, destination, true, null, null)); + moveWork.setLoadFileWork(new LoadFileDesc(source, destination, true, null, null, false)); when(moveTask.getWork()).thenReturn(moveWork); http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/test/queries/clientpositive/mm_all.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mm_all.q b/ql/src/test/queries/clientpositive/mm_all.q index f9a829c..db16920 100644 --- a/ql/src/test/queries/clientpositive/mm_all.q +++ b/ql/src/test/queries/clientpositive/mm_all.q @@ -437,12 +437,20 @@ desc formatted stats2_mm; drop table stats2_mm; +set hive.optimize.skewjoin=true; +set hive.skewjoin.key=2; +set hive.optimize.metadataonly=false; -drop table intermediate; +CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only"); +FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value; +select count(distinct key) from skewjoin_mm; +drop table skewjoin_mm; + +set hive.optimize.skewjoin=false; +drop table intermediate; -drop table intermediate; http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/test/results/clientpositive/llap/mm_all.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out b/ql/src/test/results/clientpositive/llap/mm_all.q.out index 7a11301..4bb480a 100644 --- a/ql/src/test/results/clientpositive/llap/mm_all.q.out +++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out @@ -1,8 +1,6 @@ -PREHOOK: query: -- Force multiple writers when reading -drop table intermediate +PREHOOK: query: drop table intermediate PREHOOK: type: DROPTABLE -POSTHOOK: query: -- Force multiple writers when reading -drop table intermediate +POSTHOOK: query: drop table intermediate POSTHOOK: type: DROPTABLE PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc PREHOOK: type: CREATETABLE @@ -180,15 +178,13 @@ POSTHOOK: Input: default@part_mm@key_mm=456 103 455 103 455 103 456 -PREHOOK: query: -- TODO: doesn't work truncate table part_mm partition(key_mm=455); -select * from part_mm order by key, key_mm +PREHOOK: query: select * from part_mm order by key, key_mm PREHOOK: type: QUERY PREHOOK: Input: default@part_mm PREHOOK: Input: default@part_mm@key_mm=455 PREHOOK: Input: default@part_mm@key_mm=456 #### A masked pattern was here #### -POSTHOOK: query: -- TODO: doesn't work truncate table part_mm partition(key_mm=455); -select * from part_mm order by key, key_mm +POSTHOOK: query: select * from part_mm order by key, key_mm POSTHOOK: type: QUERY POSTHOOK: Input: default@part_mm POSTHOOK: Input: default@part_mm@key_mm=455 @@ -353,11 +349,9 @@ POSTHOOK: query: drop table simple_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@simple_mm POSTHOOK: Output: default@simple_mm -PREHOOK: query: -- simple DP (no bucketing) -drop table dp_mm +PREHOOK: query: drop table dp_mm PREHOOK: type: DROPTABLE -POSTHOOK: query: -- simple DP (no bucketing) -drop table dp_mm +POSTHOOK: query: drop table dp_mm POSTHOOK: type: DROPTABLE PREHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc tblproperties ("transactional"="true", "transactional_properties"="insert_only") @@ -428,15 +422,11 @@ POSTHOOK: query: drop table dp_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@dp_mm POSTHOOK: Output: default@dp_mm -PREHOOK: query: -- union - -create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@union_mm -POSTHOOK: query: -- union - -create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: query: create table union_mm(id int) tblproperties ("transactional"="true", "transactional_properties"="insert_only") POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@union_mm @@ -1221,15 +1211,9 @@ POSTHOOK: query: drop table merge1_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@merge1_mm POSTHOOK: Output: default@merge1_mm -PREHOOK: query: -- TODO: need to include merge+union+DP, but it's broken for now - - -drop table ctas0_mm +PREHOOK: query: drop table ctas0_mm PREHOOK: type: DROPTABLE -POSTHOOK: query: -- TODO: need to include merge+union+DP, but it's broken for now - - -drop table ctas0_mm +POSTHOOK: query: drop table ctas0_mm POSTHOOK: type: DROPTABLE PREHOOK: query: create table ctas0_mm tblproperties ("transactional"="true", "transactional_properties"="insert_only") as select * from intermediate PREHOOK: type: CREATETABLE_AS_SELECT @@ -2137,13 +2121,9 @@ POSTHOOK: query: drop table intermmediate_nonpart POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@intermmediate_nonpart POSTHOOK: Output: default@intermmediate_nonpart -PREHOOK: query: -- non-MM export to MM table, with and without partitions - -drop table import0_mm +PREHOOK: query: drop table import0_mm PREHOOK: type: DROPTABLE -POSTHOOK: query: -- non-MM export to MM table, with and without partitions - -drop table import0_mm +POSTHOOK: query: drop table import0_mm POSTHOOK: type: DROPTABLE PREHOOK: query: create table import0_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") PREHOOK: type: CREATETABLE @@ -2236,13 +2216,9 @@ POSTHOOK: query: drop table import1_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@import1_mm POSTHOOK: Output: default@import1_mm -PREHOOK: query: -- MM export into new MM table, non-part and part - -drop table import2_mm +PREHOOK: query: drop table import2_mm PREHOOK: type: DROPTABLE -POSTHOOK: query: -- MM export into new MM table, non-part and part - -drop table import2_mm +POSTHOOK: query: drop table import2_mm POSTHOOK: type: DROPTABLE PREHOOK: query: import table import2_mm from 'ql/test/data/exports/intermmediate_nonpart' PREHOOK: type: IMPORT @@ -2340,13 +2316,9 @@ POSTHOOK: query: drop table import3_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@import3_mm POSTHOOK: Output: default@import3_mm -PREHOOK: query: -- MM export into existing MM table, non-part and partial part - -drop table import4_mm +PREHOOK: query: drop table import4_mm PREHOOK: type: DROPTABLE -POSTHOOK: query: -- MM export into existing MM table, non-part and partial part - -drop table import4_mm +POSTHOOK: query: drop table import4_mm POSTHOOK: type: DROPTABLE PREHOOK: query: create table import4_mm(key int, p int) tblproperties("transactional"="true", "transactional_properties"="insert_only") PREHOOK: type: CREATETABLE @@ -2427,13 +2399,9 @@ POSTHOOK: query: drop table import5_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@import5_mm POSTHOOK: Output: default@import5_mm -PREHOOK: query: -- MM export into existing non-MM table, non-part and part - -drop table import6_mm +PREHOOK: query: drop table import6_mm PREHOOK: type: DROPTABLE -POSTHOOK: query: -- MM export into existing non-MM table, non-part and part - -drop table import6_mm +POSTHOOK: query: drop table import6_mm POSTHOOK: type: DROPTABLE PREHOOK: query: create table import6_mm(key int, p int) PREHOOK: type: CREATETABLE @@ -2897,10 +2865,10 @@ POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p PREHOOK: type: QUERY PREHOOK: Input: default@multi1_mm @@ -3111,7 +3079,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@stats2_mm -POSTHOOK: Lineage: stats2_mm.c0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: stats2_mm._c0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: desc formatted stats2_mm PREHOOK: type: DESCTABLE PREHOOK: Input: default@stats2_mm @@ -3120,7 +3088,7 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats2_mm # col_name data_type comment -c0 array<string> +_c0 array<string> # Detailed Table Information Database: default @@ -3130,7 +3098,7 @@ Retention: 0 Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 43 + numFiles 55 numRows 500 rawDataSize 5312 totalSize 5812 @@ -3156,6 +3124,41 @@ POSTHOOK: query: drop table stats2_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@stats2_mm POSTHOOK: Output: default@stats2_mm +PREHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skewjoin_mm +POSTHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skewjoin_mm +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@skewjoin_mm +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@skewjoin_mm +POSTHOOK: Lineage: skewjoin_mm.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: skewjoin_mm.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(distinct key) from skewjoin_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@skewjoin_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key) from skewjoin_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skewjoin_mm +#### A masked pattern was here #### +309 +PREHOOK: query: drop table skewjoin_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skewjoin_mm +PREHOOK: Output: default@skewjoin_mm +POSTHOOK: query: drop table skewjoin_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skewjoin_mm +POSTHOOK: Output: default@skewjoin_mm PREHOOK: query: drop table intermediate PREHOOK: type: DROPTABLE PREHOOK: Input: default@intermediate @@ -3164,7 +3167,3 @@ POSTHOOK: query: drop table intermediate POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@intermediate POSTHOOK: Output: default@intermediate -PREHOOK: query: drop table intermediate -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table intermediate -POSTHOOK: type: DROPTABLE http://git-wip-us.apache.org/repos/asf/hive/blob/f883d67e/ql/src/test/results/clientpositive/mm_all.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/mm_all.q.out b/ql/src/test/results/clientpositive/mm_all.q.out index 4a9e4e5..26854bd 100644 --- a/ql/src/test/results/clientpositive/mm_all.q.out +++ b/ql/src/test/results/clientpositive/mm_all.q.out @@ -1165,12 +1165,12 @@ POSTHOOK: Input: default@merge1_mm@key=103 POSTHOOK: Input: default@merge1_mm@key=97 POSTHOOK: Input: default@merge1_mm@key=98 #### A masked pattern was here #### -100 100 103 103 +100 100 98 98 97 97 -10 10 0 0 +10 10 PREHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate @@ -1216,18 +1216,18 @@ POSTHOOK: Input: default@merge1_mm@key=103 POSTHOOK: Input: default@merge1_mm@key=97 POSTHOOK: Input: default@merge1_mm@key=98 #### A masked pattern was here #### -103 103 100 100 103 103 97 97 100 100 +103 103 97 97 98 98 98 98 -10 10 0 0 -10 10 0 0 +10 10 +10 10 PREHOOK: query: drop table merge1_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@merge1_mm @@ -2890,10 +2890,10 @@ POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@multi1_mm@p=1 -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key SIMPLE [(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ] +POSTHOOK: Lineage: multi1_mm PARTITION(p=1).key2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select key, key2, p from multi1_mm order by key, key2, p PREHOOK: type: QUERY PREHOOK: Input: default@multi1_mm @@ -3149,6 +3149,41 @@ POSTHOOK: query: drop table stats2_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@stats2_mm POSTHOOK: Output: default@stats2_mm +PREHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@skewjoin_mm +POSTHOOK: query: CREATE TABLE skewjoin_mm(key INT, value STRING) STORED AS TEXTFILE tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@skewjoin_mm +PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@skewjoin_mm +POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE skewjoin_mm SELECT src1.key, src2.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@skewjoin_mm +POSTHOOK: Lineage: skewjoin_mm.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: skewjoin_mm.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(distinct key) from skewjoin_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@skewjoin_mm +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key) from skewjoin_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@skewjoin_mm +#### A masked pattern was here #### +309 +PREHOOK: query: drop table skewjoin_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@skewjoin_mm +PREHOOK: Output: default@skewjoin_mm +POSTHOOK: query: drop table skewjoin_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@skewjoin_mm +POSTHOOK: Output: default@skewjoin_mm PREHOOK: query: drop table intermediate PREHOOK: type: DROPTABLE PREHOOK: Input: default@intermediate @@ -3157,7 +3192,3 @@ POSTHOOK: query: drop table intermediate POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@intermediate POSTHOOK: Output: default@intermediate -PREHOOK: query: drop table intermediate -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table intermediate -POSTHOOK: type: DROPTABLE
