HIVE-14932 : handle bucketing for MM tables (Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/edaebb4b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/edaebb4b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/edaebb4b Branch: refs/heads/hive-14535 Commit: edaebb4b29e99a0fd4abf50db910449f1c41d06a Parents: af4ff37 Author: Sergey Shelukhin <ser...@apache.org> Authored: Mon Oct 17 13:41:20 2016 -0700 Committer: Sergey Shelukhin <ser...@apache.org> Committed: Mon Oct 17 13:41:20 2016 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/exec/Utilities.java | 201 +++++--- .../hadoop/hive/ql/metadata/Partition.java | 1 + .../hive/ql/optimizer/GenMapRedUtils.java | 2 +- .../hadoop/hive/ql/optimizer/SamplePruner.java | 7 +- .../apache/hadoop/hive/ql/plan/MoveWork.java | 2 +- ql/src/test/queries/clientpositive/mm_all.q | 7 +- ql/src/test/queries/clientpositive/mm_all2.q | 70 +++ ql/src/test/queries/clientpositive/mm_current.q | 18 +- .../results/clientpositive/llap/mm_all.q.out | 204 +++++++- .../results/clientpositive/llap/mm_all2.q.out | 503 +++++++++++++++++++ .../clientpositive/llap/mm_current.q.out | 217 ++++++-- 11 files changed, 1094 insertions(+), 138 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index accb237..f1dad71 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -1525,23 +1525,22 @@ public final class Utilities { int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(), numBuckets = (conf != null && conf.getTable() != null) ? conf.getTable().getNumBuckets() : 0; - return removeTempOrDuplicateFiles(fs, fileStats, dpLevels, numBuckets, hconf); + return removeTempOrDuplicateFiles(fs, fileStats, dpLevels, numBuckets, hconf, null); } public static List<Path> removeTempOrDuplicateFiles(FileSystem fs, FileStatus[] fileStats, - int dpLevels, int numBuckets, Configuration hconf) throws IOException { + int dpLevels, int numBuckets, Configuration hconf, Long mmWriteId) throws IOException { if (fileStats == null) { return null; } - List<Path> result = new ArrayList<Path>(); HashMap<String, FileStatus> taskIDToFile = null; if (dpLevels > 0) { FileStatus parts[] = fileStats; - for (int i = 0; i < parts.length; ++i) { assert parts[i].isDir() : "dynamic partition " + parts[i].getPath() + " is not a directory"; + Utilities.LOG14535.info("removeTempOrDuplicateFiles looking at DP " + parts[i].getPath()); FileStatus[] items = fs.listStatus(parts[i].getPath()); // remove empty directory since DP insert should not generate empty partitions. @@ -1551,46 +1550,80 @@ public final class Utilities { LOG.error("Cannot delete empty directory " + parts[i].getPath()); throw new IOException("Cannot delete empty directory " + parts[i].getPath()); } + parts[i] = null; + continue; } - taskIDToFile = removeTempOrDuplicateFiles(items, fs); - // if the table is bucketed and enforce bucketing, we should check and generate all buckets - if (numBuckets > 0 && taskIDToFile != null && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { - // refresh the file list - items = fs.listStatus(parts[i].getPath()); - // get the missing buckets and generate empty buckets - String taskID1 = taskIDToFile.keySet().iterator().next(); - Path bucketPath = taskIDToFile.values().iterator().next().getPath(); - Utilities.LOG14535.info("Bucket path " + bucketPath); - for (int j = 0; j < numBuckets; ++j) { - addBucketFileIfMissing(result, taskIDToFile, taskID1, bucketPath, j); + if (mmWriteId != null) { + Path mmDir = parts[i].getPath(); + if (!mmDir.getName().equals(ValidWriteIds.getMmFilePrefix(mmWriteId))) { + throw new IOException("Unexpected non-MM directory name " + mmDir); } + Utilities.LOG14535.info("removeTempOrDuplicateFiles processing files in MM directory " + mmDir); } + taskIDToFile = removeTempOrDuplicateFilesNonMm(items, fs); + + // TODO: not clear why two if conditions are different. Preserve the existing logic for now. + addBucketFileToResults(taskIDToFile, numBuckets, hconf, result); } } else { FileStatus[] items = fileStats; if (items.length == 0) { return result; } - taskIDToFile = removeTempOrDuplicateFiles(items, fs); - if(taskIDToFile != null && taskIDToFile.size() > 0 && (numBuckets > taskIDToFile.size()) - && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { - // get the missing buckets and generate empty buckets for non-dynamic partition - String taskID1 = taskIDToFile.keySet().iterator().next(); - Path bucketPath = taskIDToFile.values().iterator().next().getPath(); - Utilities.LOG14535.info("Bucket path " + bucketPath); - for (int j = 0; j < numBuckets; ++j) { - addBucketFileIfMissing(result, taskIDToFile, taskID1, bucketPath, j); + if (mmWriteId == null) { + taskIDToFile = removeTempOrDuplicateFilesNonMm(items, fs); + } else { + if (items.length > 1) { + throw new IOException("Unexpected directories for non-DP MM: " + Arrays.toString(items)); + } + Path mmDir = items[0].getPath(); + if (!items[0].isDirectory() + || !mmDir.getName().equals(ValidWriteIds.getMmFilePrefix(mmWriteId))) { + throw new IOException("Unexpected non-MM directory " + mmDir); } + Utilities.LOG14535.info( + "removeTempOrDuplicateFiles processing files in MM directory " + mmDir); + taskIDToFile = removeTempOrDuplicateFilesNonMm(fs.listStatus(mmDir), fs); } + // TODO: not clear why two if conditions are different. Preserve the existing logic for now. + addBucketFileToResults2(taskIDToFile, numBuckets, hconf, result); } return result; } + // TODO: not clear why two if conditions are different. Preserve the existing logic for now. + private static void addBucketFileToResults2(HashMap<String, FileStatus> taskIDToFile, + int numBuckets, Configuration hconf, List<Path> result) { + if(taskIDToFile != null && taskIDToFile.size() > 0 && (numBuckets > taskIDToFile.size()) + && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { + addBucketsToResultsCommon(taskIDToFile, numBuckets, result); + } + } + + // TODO: not clear why two if conditions are different. Preserve the existing logic for now. + private static void addBucketFileToResults(HashMap<String, FileStatus> taskIDToFile, + int numBuckets, Configuration hconf, List<Path> result) { + // if the table is bucketed and enforce bucketing, we should check and generate all buckets + if (numBuckets > 0 && taskIDToFile != null + && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) { + addBucketsToResultsCommon(taskIDToFile, numBuckets, result); + } + } + + private static void addBucketsToResultsCommon( + HashMap<String, FileStatus> taskIDToFile, int numBuckets, List<Path> result) { + String taskID1 = taskIDToFile.keySet().iterator().next(); + Path bucketPath = taskIDToFile.values().iterator().next().getPath(); + Utilities.LOG14535.info("Bucket path " + bucketPath); + for (int j = 0; j < numBuckets; ++j) { + addBucketFileIfMissing(result, taskIDToFile, taskID1, bucketPath, j); + } + } + private static void addBucketFileIfMissing(List<Path> result, HashMap<String, FileStatus> taskIDToFile, String taskID1, Path bucketPath, int j) { - // TODO# this will probably break with directories cause buckets would be above (or not?) String taskID2 = replaceTaskId(taskID1, j); if (!taskIDToFile.containsKey(taskID2)) { // create empty bucket, file name should be derived from taskID2 @@ -1601,77 +1634,81 @@ public final class Utilities { } } - public static HashMap<String, FileStatus> removeTempOrDuplicateFiles(FileStatus[] items, - FileSystem fs) throws IOException { - - if (items == null || fs == null) { + private static HashMap<String, FileStatus> removeTempOrDuplicateFilesNonMm( + FileStatus[] files, FileSystem fs) throws IOException { + if (files == null || fs == null) { return null; } - HashMap<String, FileStatus> taskIdToFile = new HashMap<String, FileStatus>(); - for (FileStatus one : items) { + for (FileStatus one : files) { if (isTempPath(one)) { Utilities.LOG14535.info("removeTempOrDuplicateFiles deleting " + one.getPath()/*, new Exception()*/); if (!fs.delete(one.getPath(), true)) { throw new IOException("Unable to delete tmp file: " + one.getPath()); } } else { - String taskId = getPrefixedTaskIdFromFilename(one.getPath().getName()); - Utilities.LOG14535.info("removeTempOrDuplicateFiles pondering " + one.getPath() + ", taskId " + taskId); - - FileStatus otherFile = taskIdToFile.get(taskId); - if (otherFile == null) { - taskIdToFile.put(taskId, one); - } else { - // Compare the file sizes of all the attempt files for the same task, the largest win - // any attempt files could contain partial results (due to task failures or - // speculative runs), but the largest should be the correct one since the result - // of a successful run should never be smaller than a failed/speculative run. - FileStatus toDelete = null; - - // "LOAD .. INTO" and "INSERT INTO" commands will generate files with - // "_copy_x" suffix. These files are usually read by map tasks and the - // task output gets written to some tmp path. The output file names will - // be of format taskId_attemptId. The usual path for all these tasks is - // srcPath -> taskTmpPath -> tmpPath -> finalPath. - // But, MergeFileTask can move files directly from src path to final path - // without copying it to tmp path. In such cases, different files with - // "_copy_x" suffix will be identified as duplicates (change in value - // of x is wrongly identified as attempt id) and will be deleted. - // To avoid that we will ignore files with "_copy_x" suffix from duplicate - // elimination. - if (!isCopyFile(one.getPath().getName())) { - if (otherFile.getLen() >= one.getLen()) { - toDelete = one; - } else { - toDelete = otherFile; - taskIdToFile.put(taskId, one); - } - long len1 = toDelete.getLen(); - long len2 = taskIdToFile.get(taskId).getLen(); - if (!fs.delete(toDelete.getPath(), true)) { - throw new IOException( - "Unable to delete duplicate file: " + toDelete.getPath() - + ". Existing file: " + - taskIdToFile.get(taskId).getPath()); - } else { - LOG.warn("Duplicate taskid file removed: " + toDelete.getPath() + - " with length " - + len1 + ". Existing file: " + - taskIdToFile.get(taskId).getPath() + " with length " - + len2); - } - } else { - LOG.info(one.getPath() + " file identified as duplicate. This file is" + - " not deleted as it has copySuffix."); - } - } + // This would be a single file. See if we need to remove it. + ponderRemovingTempOrDuplicateFile(fs, one, taskIdToFile); } } return taskIdToFile; } + private static void ponderRemovingTempOrDuplicateFile(FileSystem fs, + FileStatus file, HashMap<String, FileStatus> taskIdToFile) throws IOException { + String taskId = getPrefixedTaskIdFromFilename(file.getPath().getName()); + Utilities.LOG14535.info("removeTempOrDuplicateFiles pondering " + file.getPath() + ", taskId " + taskId); + + FileStatus otherFile = taskIdToFile.get(taskId); + taskIdToFile.put(taskId, (otherFile == null) ? file : + compareTempOrDuplicateFiles(fs, file, otherFile)); + } + + private static FileStatus compareTempOrDuplicateFiles(FileSystem fs, + FileStatus file, FileStatus existingFile) throws IOException { + // Compare the file sizes of all the attempt files for the same task, the largest win + // any attempt files could contain partial results (due to task failures or + // speculative runs), but the largest should be the correct one since the result + // of a successful run should never be smaller than a failed/speculative run. + FileStatus toDelete = null, toRetain = null; + + // "LOAD .. INTO" and "INSERT INTO" commands will generate files with + // "_copy_x" suffix. These files are usually read by map tasks and the + // task output gets written to some tmp path. The output file names will + // be of format taskId_attemptId. The usual path for all these tasks is + // srcPath -> taskTmpPath -> tmpPath -> finalPath. + // But, MergeFileTask can move files directly from src path to final path + // without copying it to tmp path. In such cases, different files with + // "_copy_x" suffix will be identified as duplicates (change in value + // of x is wrongly identified as attempt id) and will be deleted. + // To avoid that we will ignore files with "_copy_x" suffix from duplicate + // elimination. + if (isCopyFile(file.getPath().getName())) { + LOG.info(file.getPath() + " file identified as duplicate. This file is" + + " not deleted as it has copySuffix."); + return existingFile; + } + + if (existingFile.getLen() >= file.getLen()) { + toDelete = file; + toRetain = existingFile; + } else { + toDelete = existingFile; + toRetain = file; + } + if (!fs.delete(toDelete.getPath(), true)) { + throw new IOException( + "Unable to delete duplicate file: " + toDelete.getPath() + + ". Existing file: " + toRetain.getPath()); + } else { + LOG.warn("Duplicate taskid file removed: " + toDelete.getPath() + " with length " + + toDelete.getLen() + ". Existing file: " + toRetain.getPath() + " with length " + + toRetain.getLen()); + } + return toRetain; + } + public static boolean isCopyFile(String filename) { String taskId = filename; String copyFileSuffix = null; @@ -3928,7 +3965,7 @@ public final class Utilities { if (lbLevels != 0) return; FileStatus[] finalResults = mmDirectories.toArray(new FileStatus[mmDirectories.size()]); List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles( - fs, finalResults, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf); + fs, finalResults, dpLevels, mbc == null ? 0 : mbc.numBuckets, hconf, mmWriteId); // create empty buckets if necessary if (emptyBuckets.size() > 0) { assert mbc != null; http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java index c0edde9..95a09e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java @@ -366,6 +366,7 @@ public class Partition implements Serializable { // TODO: add test case and clean it up @SuppressWarnings("nls") public Path getBucketPath(int bucketNum) { + // Note: this makes assumptions that won't work with MM tables, unions, etc. FileStatus srcs[] = getSortedPaths(); if (srcs == null) { return null; http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index bd26854..0b5d56b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -674,7 +674,7 @@ public final class GenMapRedUtils { } String path = p.toString(); if (LOG.isDebugEnabled()) { - LOG.debug("Adding " + path + " of table" + alias_id); + LOG.debug("Adding " + path + " of table " + alias_id); } partDir.add(p); http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java index dd679db..2ad1f1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java @@ -32,6 +32,7 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; @@ -190,7 +191,9 @@ public class SamplePruner extends Transform { String fullScanMsg = ""; // check if input pruning is possible - if (sampleDescr.getInputPruning()) { + // TODO: this relies a lot on having one file per bucket. No support for MM tables for now. + boolean isMmTable = MetaStoreUtils.isMmTable(part.getTable().getParameters()); + if (sampleDescr.getInputPruning() && !isMmTable) { LOG.trace("numerator = " + num); LOG.trace("denominator = " + den); LOG.trace("bucket count = " + bucketCount); @@ -217,7 +220,7 @@ public class SamplePruner extends Transform { } } else { // need to do full scan - fullScanMsg = "Tablesample not on clustered columns"; + fullScanMsg = isMmTable ? "MM table" : "Tablesample not on clustered columns"; } LOG.warn(fullScanMsg + ", using full table scan"); Path[] ret = part.getPath(); http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java index 3ada134..f0b2775 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java @@ -70,7 +70,7 @@ public class MoveWork implements Serializable { boolean checkFileFormat, boolean srcLocal) { this(inputs, outputs); Utilities.LOG14535.info("Creating MoveWork " + System.identityHashCode(this) - + " with " + loadTableWork + "; " + loadFileWork, new Exception()); + + " with " + loadTableWork + "; " + loadFileWork); this.loadTableWork = loadTableWork; this.loadFileWork = loadFileWork; this.checkFileFormat = checkFileFormat; http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/test/queries/clientpositive/mm_all.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mm_all.q b/ql/src/test/queries/clientpositive/mm_all.q index 8163d2f..bdda5f5 100644 --- a/ql/src/test/queries/clientpositive/mm_all.q +++ b/ql/src/test/queries/clientpositive/mm_all.q @@ -11,6 +11,8 @@ drop table intermediate; create table intermediate(key int) partitioned by (p int) stored as orc; insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2; insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2; +insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2; + drop table part_mm; create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ('hivecommit'='true'); @@ -18,7 +20,7 @@ explain insert into table part_mm partition(key_mm='455') select key from interm insert into table part_mm partition(key_mm='455') select key from intermediate; insert into table part_mm partition(key_mm='456') select key from intermediate; insert into table part_mm partition(key_mm='455') select key from intermediate; -select * from part_mm order by key; +select * from part_mm order by key, key_mm; drop table part_mm; drop table simple_mm; @@ -177,7 +179,4 @@ select * from ctas1_mm; drop table ctas1_mm; - --- TODO load, multi-insert, buckets - drop table intermediate; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/test/queries/clientpositive/mm_all2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mm_all2.q b/ql/src/test/queries/clientpositive/mm_all2.q new file mode 100644 index 0000000..a1d2301 --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_all2.q @@ -0,0 +1,70 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set tez.grouping.min-size=1; +set tez.grouping.max-size=2; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- Force multiple writers when reading +drop table intermediate; +create table intermediate(key int) partitioned by (p int) stored as orc; +insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2; +insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2; +insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2; + + + +drop table bucket0_mm; +create table bucket0_mm(key int, id int) +clustered by (key) into 2 buckets +tblproperties('hivecommit'='true'); +insert into table bucket0_mm select key, key from intermediate; +select * from bucket0_mm; +select * from bucket0_mm tablesample (bucket 1 out of 2) s; +select * from bucket0_mm tablesample (bucket 2 out of 2) s; +insert into table bucket0_mm select key, key from intermediate; +select * from bucket0_mm; +select * from bucket0_mm tablesample (bucket 1 out of 2) s; +select * from bucket0_mm tablesample (bucket 2 out of 2) s; +drop table bucket0_mm; + + +drop table bucket1_mm; +create table bucket1_mm(key int, id int) partitioned by (key2 int) +clustered by (key) sorted by (key) into 2 buckets +tblproperties('hivecommit'='true'); +insert into table bucket1_mm partition (key2) +select key + 1, key, key - 1 from intermediate +union all +select key - 1, key, key + 1 from intermediate; +select * from bucket1_mm; +select * from bucket1_mm tablesample (bucket 1 out of 2) s; +select * from bucket1_mm tablesample (bucket 2 out of 2) s; +drop table bucket1_mm; + + + +drop table bucket2_mm; +create table bucket2_mm(key int, id int) +clustered by (key) into 10 buckets +tblproperties('hivecommit'='true'); +insert into table bucket2_mm select key, key from intermediate where key == 0; +select * from bucket2_mm; +select * from bucket2_mm tablesample (bucket 1 out of 10) s; +select * from bucket2_mm tablesample (bucket 4 out of 10) s; +insert into table bucket2_mm select key, key from intermediate where key in (0, 103); +select * from bucket2_mm; +select * from bucket2_mm tablesample (bucket 1 out of 10) s; +select * from bucket2_mm tablesample (bucket 4 out of 10) s; +drop table bucket2_mm; + + + +-- TODO# future + + + +-- TODO load, multi-insert, buckets + +drop table intermediate; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/test/queries/clientpositive/mm_current.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mm_current.q b/ql/src/test/queries/clientpositive/mm_current.q index f2d353f..44445dd 100644 --- a/ql/src/test/queries/clientpositive/mm_current.q +++ b/ql/src/test/queries/clientpositive/mm_current.q @@ -10,16 +10,24 @@ drop table intermediate; create table intermediate(key int) partitioned by (p int) stored as orc; insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2; insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2; +insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2; +drop table bucket1_mm; +create table bucket1_mm(key int, id int) partitioned by (key2 int) +clustered by (key) sorted by (key) into 2 buckets +tblproperties('hivecommit'='true'); +insert into table bucket1_mm partition (key2) +select key + 1, key, key - 1 from intermediate +union all +select key - 1, key, key + 1 from intermediate; +select * from bucket1_mm; +select * from bucket1_mm tablesample (bucket 1 out of 2) s; +select * from bucket1_mm tablesample (bucket 2 out of 2) s; +drop table bucket1_mm; -create table ctas1_mm tblproperties ('hivecommit'='true') as - select * from intermediate union all select * from intermediate; -select * from ctas1_mm; -drop table ctas1_mm; - drop table intermediate; http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/test/results/clientpositive/llap/mm_all.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out b/ql/src/test/results/clientpositive/llap/mm_all.q.out index 93716de..4061e5b 100644 --- a/ql/src/test/results/clientpositive/llap/mm_all.q.out +++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out @@ -30,6 +30,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@intermediate@p=456 POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=457 +POSTHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=457 +POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: drop table part_mm PREHOOK: type: DROPTABLE POSTHOOK: query: drop table part_mm @@ -61,14 +70,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: intermediate - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -101,12 +110,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@part_mm@key_mm=455 POSTHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@part_mm@key_mm=455 POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: insert into table part_mm partition(key_mm='456') select key from intermediate @@ -114,12 +125,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@part_mm@key_mm=456 POSTHOOK: query: insert into table part_mm partition(key_mm='456') select key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@part_mm@key_mm=456 POSTHOOK: Lineage: part_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate @@ -127,21 +140,23 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@part_mm@key_mm=455 POSTHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@part_mm@key_mm=455 POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select * from part_mm order by key +PREHOOK: query: select * from part_mm order by key, key_mm PREHOOK: type: QUERY PREHOOK: Input: default@part_mm PREHOOK: Input: default@part_mm@key_mm=455 PREHOOK: Input: default@part_mm@key_mm=456 #### A masked pattern was here #### -POSTHOOK: query: select * from part_mm order by key +POSTHOOK: query: select * from part_mm order by key, key_mm POSTHOOK: type: QUERY POSTHOOK: Input: default@part_mm POSTHOOK: Input: default@part_mm@key_mm=455 @@ -150,15 +165,21 @@ POSTHOOK: Input: default@part_mm@key_mm=456 0 455 0 455 0 456 -10 456 10 455 10 455 +10 456 97 455 97 455 97 456 -98 456 98 455 98 455 +98 456 +100 455 +100 455 +100 456 +103 455 +103 455 +103 456 PREHOOK: query: drop table part_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@part_mm @@ -184,12 +205,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@simple_mm POSTHOOK: query: insert into table simple_mm select key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@simple_mm POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: insert overwrite table simple_mm select key from intermediate @@ -197,12 +220,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@simple_mm POSTHOOK: query: insert overwrite table simple_mm select key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@simple_mm POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from simple_mm order by key @@ -217,17 +242,21 @@ POSTHOOK: Input: default@simple_mm 10 97 98 +100 +103 PREHOOK: query: insert into table simple_mm select key from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@simple_mm POSTHOOK: query: insert into table simple_mm select key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@simple_mm POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from simple_mm order by key @@ -246,6 +275,10 @@ POSTHOOK: Input: default@simple_mm 97 98 98 +100 +100 +103 +103 PREHOOK: query: drop table simple_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@simple_mm @@ -275,17 +308,23 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@dp_mm@key1=123 POSTHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@dp_mm@key1=123/key2=0 POSTHOOK: Output: default@dp_mm@key1=123/key2=10 +POSTHOOK: Output: default@dp_mm@key1=123/key2=100 +POSTHOOK: Output: default@dp_mm@key1=123/key2=103 POSTHOOK: Output: default@dp_mm@key1=123/key2=97 POSTHOOK: Output: default@dp_mm@key1=123/key2=98 POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=0).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=100).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=103).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=10).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=97).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=98).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] @@ -294,6 +333,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@dp_mm PREHOOK: Input: default@dp_mm@key1=123/key2=0 PREHOOK: Input: default@dp_mm@key1=123/key2=10 +PREHOOK: Input: default@dp_mm@key1=123/key2=100 +PREHOOK: Input: default@dp_mm@key1=123/key2=103 PREHOOK: Input: default@dp_mm@key1=123/key2=97 PREHOOK: Input: default@dp_mm@key1=123/key2=98 #### A masked pattern was here #### @@ -302,6 +343,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dp_mm POSTHOOK: Input: default@dp_mm@key1=123/key2=0 POSTHOOK: Input: default@dp_mm@key1=123/key2=10 +POSTHOOK: Input: default@dp_mm@key1=123/key2=100 +POSTHOOK: Input: default@dp_mm@key1=123/key2=103 POSTHOOK: Input: default@dp_mm@key1=123/key2=97 POSTHOOK: Input: default@dp_mm@key1=123/key2=98 #### A masked pattern was here #### @@ -309,6 +352,8 @@ POSTHOOK: Input: default@dp_mm@key1=123/key2=98 10 123 10 97 123 97 98 123 98 +100 123 100 +103 123 103 PREHOOK: query: drop table dp_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@dp_mm @@ -338,6 +383,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@union_mm POSTHOOK: query: insert into table union_mm select temps.p from ( @@ -348,6 +394,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@union_mm POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from union_mm order by id @@ -366,6 +413,10 @@ POSTHOOK: Input: default@union_mm 98 98 99 +100 +101 +103 +104 PREHOOK: query: insert into table union_mm select p from ( @@ -379,6 +430,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@union_mm POSTHOOK: query: insert into table union_mm select p from @@ -393,6 +445,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@union_mm POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from union_mm order by id @@ -422,6 +475,16 @@ POSTHOOK: Input: default@union_mm 99 99 100 +100 +100 +101 +101 +102 +103 +103 +104 +104 +105 PREHOOK: query: insert into table union_mm SELECT p FROM ( @@ -442,6 +505,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@union_mm POSTHOOK: query: insert into table union_mm SELECT p FROM @@ -463,6 +527,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@union_mm POSTHOOK: Lineage: union_mm.id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from union_mm order by id @@ -502,6 +567,21 @@ POSTHOOK: Input: default@union_mm 99 100 100 +100 +100 +101 +101 +101 +102 +102 +103 +103 +103 +104 +104 +104 +105 +105 PREHOOK: query: drop table union_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@union_mm @@ -527,6 +607,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@partunion_mm POSTHOOK: query: insert into table partunion_mm partition(key) select temps.* from ( @@ -537,14 +618,23 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@partunion_mm@key=0 POSTHOOK: Output: default@partunion_mm@key=1 POSTHOOK: Output: default@partunion_mm@key=10 +POSTHOOK: Output: default@partunion_mm@key=100 +POSTHOOK: Output: default@partunion_mm@key=101 +POSTHOOK: Output: default@partunion_mm@key=103 +POSTHOOK: Output: default@partunion_mm@key=104 POSTHOOK: Output: default@partunion_mm@key=11 POSTHOOK: Output: default@partunion_mm@key=97 POSTHOOK: Output: default@partunion_mm@key=98 POSTHOOK: Output: default@partunion_mm@key=99 POSTHOOK: Lineage: partunion_mm PARTITION(key=0).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=100).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=101).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=103).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: partunion_mm PARTITION(key=104).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: partunion_mm PARTITION(key=10).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: partunion_mm PARTITION(key=11).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: partunion_mm PARTITION(key=1).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] @@ -557,6 +647,10 @@ PREHOOK: Input: default@partunion_mm PREHOOK: Input: default@partunion_mm@key=0 PREHOOK: Input: default@partunion_mm@key=1 PREHOOK: Input: default@partunion_mm@key=10 +PREHOOK: Input: default@partunion_mm@key=100 +PREHOOK: Input: default@partunion_mm@key=101 +PREHOOK: Input: default@partunion_mm@key=103 +PREHOOK: Input: default@partunion_mm@key=104 PREHOOK: Input: default@partunion_mm@key=11 PREHOOK: Input: default@partunion_mm@key=97 PREHOOK: Input: default@partunion_mm@key=98 @@ -568,6 +662,10 @@ POSTHOOK: Input: default@partunion_mm POSTHOOK: Input: default@partunion_mm@key=0 POSTHOOK: Input: default@partunion_mm@key=1 POSTHOOK: Input: default@partunion_mm@key=10 +POSTHOOK: Input: default@partunion_mm@key=100 +POSTHOOK: Input: default@partunion_mm@key=101 +POSTHOOK: Input: default@partunion_mm@key=103 +POSTHOOK: Input: default@partunion_mm@key=104 POSTHOOK: Input: default@partunion_mm@key=11 POSTHOOK: Input: default@partunion_mm@key=97 POSTHOOK: Input: default@partunion_mm@key=98 @@ -581,6 +679,10 @@ POSTHOOK: Input: default@partunion_mm@key=99 98 98 98 98 99 99 +100 100 +101 101 +103 103 +104 104 PREHOOK: query: drop table partunion_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@partunion_mm @@ -605,6 +707,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@skew_mm POSTHOOK: query: insert into table skew_mm select key, key, key from intermediate @@ -612,6 +715,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@skew_mm POSTHOOK: Lineage: skew_mm.k1 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_mm.k2 SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] @@ -628,6 +732,8 @@ POSTHOOK: Input: default@skew_mm 10 10 10 97 97 97 98 98 98 +100 100 100 +103 103 103 PREHOOK: query: drop table skew_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@skew_mm @@ -654,6 +760,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@skew_dp_union_mm POSTHOOK: query: insert into table skew_dp_union_mm partition (k3) select key as i, key as j, key as k, key as l from intermediate @@ -663,10 +770,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@skew_dp_union_mm@k3=0 POSTHOOK: Output: default@skew_dp_union_mm@k3=10 +POSTHOOK: Output: default@skew_dp_union_mm@k3=100 POSTHOOK: Output: default@skew_dp_union_mm@k3=101 POSTHOOK: Output: default@skew_dp_union_mm@k3=102 +POSTHOOK: Output: default@skew_dp_union_mm@k3=103 +POSTHOOK: Output: default@skew_dp_union_mm@k3=104 +POSTHOOK: Output: default@skew_dp_union_mm@k3=107 POSTHOOK: Output: default@skew_dp_union_mm@k3=14 POSTHOOK: Output: default@skew_dp_union_mm@k3=4 POSTHOOK: Output: default@skew_dp_union_mm@k3=97 @@ -674,12 +786,24 @@ POSTHOOK: Output: default@skew_dp_union_mm@k3=98 POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=0).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=0).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=0).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=100).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=100).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=100).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=101).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=101).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=101).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=102).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=102).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=102).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=103).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=103).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=103).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=104).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=104).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=104).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=107).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=107).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=107).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=10).k1 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=10).k2 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: skew_dp_union_mm PARTITION(k3=10).k4 EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] @@ -700,8 +824,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@skew_dp_union_mm PREHOOK: Input: default@skew_dp_union_mm@k3=0 PREHOOK: Input: default@skew_dp_union_mm@k3=10 +PREHOOK: Input: default@skew_dp_union_mm@k3=100 PREHOOK: Input: default@skew_dp_union_mm@k3=101 PREHOOK: Input: default@skew_dp_union_mm@k3=102 +PREHOOK: Input: default@skew_dp_union_mm@k3=103 +PREHOOK: Input: default@skew_dp_union_mm@k3=104 +PREHOOK: Input: default@skew_dp_union_mm@k3=107 PREHOOK: Input: default@skew_dp_union_mm@k3=14 PREHOOK: Input: default@skew_dp_union_mm@k3=4 PREHOOK: Input: default@skew_dp_union_mm@k3=97 @@ -712,8 +840,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@skew_dp_union_mm POSTHOOK: Input: default@skew_dp_union_mm@k3=0 POSTHOOK: Input: default@skew_dp_union_mm@k3=10 +POSTHOOK: Input: default@skew_dp_union_mm@k3=100 POSTHOOK: Input: default@skew_dp_union_mm@k3=101 POSTHOOK: Input: default@skew_dp_union_mm@k3=102 +POSTHOOK: Input: default@skew_dp_union_mm@k3=103 +POSTHOOK: Input: default@skew_dp_union_mm@k3=104 +POSTHOOK: Input: default@skew_dp_union_mm@k3=107 POSTHOOK: Input: default@skew_dp_union_mm@k3=14 POSTHOOK: Input: default@skew_dp_union_mm@k3=4 POSTHOOK: Input: default@skew_dp_union_mm@k3=97 @@ -727,6 +859,10 @@ POSTHOOK: Input: default@skew_dp_union_mm@k3=98 98 98 98 98 98 99 100 101 99 100 101 102 +100 100 100 100 +101 102 103 104 +103 103 103 103 +104 105 106 107 PREHOOK: query: drop table skew_dp_union_mm PREHOOK: type: DROPTABLE PREHOOK: Input: default@skew_dp_union_mm @@ -748,12 +884,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@merge0_mm POSTHOOK: query: insert into table merge0_mm select key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@merge0_mm POSTHOOK: Lineage: merge0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from merge0_mm @@ -766,6 +904,8 @@ POSTHOOK: Input: default@merge0_mm #### A masked pattern was here #### 98 97 +100 +103 0 10 PREHOOK: query: insert into table merge0_mm select key from intermediate @@ -773,12 +913,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@merge0_mm POSTHOOK: query: insert into table merge0_mm select key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@merge0_mm POSTHOOK: Lineage: merge0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] PREHOOK: query: select * from merge0_mm @@ -791,10 +933,14 @@ POSTHOOK: Input: default@merge0_mm #### A masked pattern was here #### 98 97 +100 +103 0 10 98 97 +100 +103 0 10 PREHOOK: query: drop table merge0_mm @@ -818,17 +964,23 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@merge1_mm POSTHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@merge1_mm@key=0 POSTHOOK: Output: default@merge1_mm@key=10 +POSTHOOK: Output: default@merge1_mm@key=100 +POSTHOOK: Output: default@merge1_mm@key=103 POSTHOOK: Output: default@merge1_mm@key=97 POSTHOOK: Output: default@merge1_mm@key=98 POSTHOOK: Lineage: merge1_mm PARTITION(key=0).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=100).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=103).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: merge1_mm PARTITION(key=10).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: merge1_mm PARTITION(key=97).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: merge1_mm PARTITION(key=98).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] @@ -837,6 +989,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@merge1_mm PREHOOK: Input: default@merge1_mm@key=0 PREHOOK: Input: default@merge1_mm@key=10 +PREHOOK: Input: default@merge1_mm@key=100 +PREHOOK: Input: default@merge1_mm@key=103 PREHOOK: Input: default@merge1_mm@key=97 PREHOOK: Input: default@merge1_mm@key=98 #### A masked pattern was here #### @@ -845,9 +999,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@merge1_mm POSTHOOK: Input: default@merge1_mm@key=0 POSTHOOK: Input: default@merge1_mm@key=10 +POSTHOOK: Input: default@merge1_mm@key=100 +POSTHOOK: Input: default@merge1_mm@key=103 POSTHOOK: Input: default@merge1_mm@key=97 POSTHOOK: Input: default@merge1_mm@key=98 #### A masked pattern was here #### +100 100 +103 103 97 97 98 98 0 0 @@ -857,17 +1015,23 @@ PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: default@merge1_mm POSTHOOK: query: insert into table merge1_mm partition (key) select key, key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: default@merge1_mm@key=0 POSTHOOK: Output: default@merge1_mm@key=10 +POSTHOOK: Output: default@merge1_mm@key=100 +POSTHOOK: Output: default@merge1_mm@key=103 POSTHOOK: Output: default@merge1_mm@key=97 POSTHOOK: Output: default@merge1_mm@key=98 POSTHOOK: Lineage: merge1_mm PARTITION(key=0).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=100).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: merge1_mm PARTITION(key=103).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: merge1_mm PARTITION(key=10).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: merge1_mm PARTITION(key=97).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: merge1_mm PARTITION(key=98).id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] @@ -876,6 +1040,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@merge1_mm PREHOOK: Input: default@merge1_mm@key=0 PREHOOK: Input: default@merge1_mm@key=10 +PREHOOK: Input: default@merge1_mm@key=100 +PREHOOK: Input: default@merge1_mm@key=103 PREHOOK: Input: default@merge1_mm@key=97 PREHOOK: Input: default@merge1_mm@key=98 #### A masked pattern was here #### @@ -884,9 +1050,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@merge1_mm POSTHOOK: Input: default@merge1_mm@key=0 POSTHOOK: Input: default@merge1_mm@key=10 +POSTHOOK: Input: default@merge1_mm@key=100 +POSTHOOK: Input: default@merge1_mm@key=103 POSTHOOK: Input: default@merge1_mm@key=97 POSTHOOK: Input: default@merge1_mm@key=98 #### A masked pattern was here #### +100 100 +100 100 +103 103 +103 103 97 97 97 97 98 98 @@ -918,6 +1090,7 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: database:default PREHOOK: Output: default@ctas0_mm POSTHOOK: query: create table ctas0_mm tblproperties ('hivecommit'='true') as select * from intermediate @@ -925,6 +1098,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: database:default POSTHOOK: Output: default@ctas0_mm POSTHOOK: Lineage: ctas0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] @@ -939,6 +1113,8 @@ POSTHOOK: Input: default@ctas0_mm #### A masked pattern was here #### 98 455 97 455 +100 457 +103 457 0 456 10 456 PREHOOK: query: drop table ctas0_mm @@ -959,6 +1135,7 @@ PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 PREHOOK: Output: database:default PREHOOK: Output: default@ctas1_mm POSTHOOK: query: create table ctas1_mm tblproperties ('hivecommit'='true') as @@ -967,6 +1144,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 POSTHOOK: Output: database:default POSTHOOK: Output: default@ctas1_mm POSTHOOK: Lineage: ctas1_mm.key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] @@ -981,10 +1159,14 @@ POSTHOOK: Input: default@ctas1_mm #### A masked pattern was here #### 98 455 97 455 +100 457 +103 457 0 456 10 456 98 455 97 455 +100 457 +103 457 0 456 10 456 PREHOOK: query: drop table ctas1_mm @@ -995,15 +1177,11 @@ POSTHOOK: query: drop table ctas1_mm POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@ctas1_mm POSTHOOK: Output: default@ctas1_mm -PREHOOK: query: -- TODO load, multi-insert, buckets - -drop table intermediate +PREHOOK: query: drop table intermediate PREHOOK: type: DROPTABLE PREHOOK: Input: default@intermediate PREHOOK: Output: default@intermediate -POSTHOOK: query: -- TODO load, multi-insert, buckets - -drop table intermediate +POSTHOOK: query: drop table intermediate POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@intermediate POSTHOOK: Output: default@intermediate http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/test/results/clientpositive/llap/mm_all2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mm_all2.q.out b/ql/src/test/results/clientpositive/llap/mm_all2.q.out new file mode 100644 index 0000000..95ce33a --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/mm_all2.q.out @@ -0,0 +1,503 @@ +PREHOOK: query: -- Force multiple writers when reading +drop table intermediate +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Force multiple writers when reading +drop table intermediate +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate +POSTHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate +PREHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=455 +POSTHOOK: query: insert into table intermediate partition(p='455') select distinct key from src where key >= 0 order by key desc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=455 +POSTHOOK: Lineage: intermediate PARTITION(p=455).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=456 +POSTHOOK: query: insert into table intermediate partition(p='456') select distinct key from src where key is not null order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=456 +POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=457 +POSTHOOK: query: insert into table intermediate partition(p='457') select distinct key from src where key >= 100 order by key asc limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=457 +POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: drop table bucket0_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket0_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table bucket0_mm(key int, id int) +clustered by (key) into 2 buckets +tblproperties('hivecommit'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: create table bucket0_mm(key int, id int) +clustered by (key) into 2 buckets +tblproperties('hivecommit'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket0_mm +PREHOOK: query: insert into table bucket0_mm select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: insert into table bucket0_mm select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket0_mm +POSTHOOK: Lineage: bucket0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +100 100 +0 0 +10 10 +98 98 +103 103 +97 97 +PREHOOK: query: select * from bucket0_mm tablesample (bucket 1 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm tablesample (bucket 1 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +100 100 +0 0 +10 10 +98 98 +PREHOOK: query: select * from bucket0_mm tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +103 103 +97 97 +PREHOOK: query: insert into table bucket0_mm select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: insert into table bucket0_mm select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket0_mm +POSTHOOK: Lineage: bucket0_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket0_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket0_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +100 100 +0 0 +10 10 +98 98 +103 103 +97 97 +98 98 +0 0 +10 10 +100 100 +97 97 +103 103 +PREHOOK: query: select * from bucket0_mm tablesample (bucket 1 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm tablesample (bucket 1 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +100 100 +0 0 +10 10 +98 98 +98 98 +0 0 +10 10 +100 100 +PREHOOK: query: select * from bucket0_mm tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket0_mm tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket0_mm +#### A masked pattern was here #### +103 103 +97 97 +97 97 +103 103 +PREHOOK: query: drop table bucket0_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@bucket0_mm +PREHOOK: Output: default@bucket0_mm +POSTHOOK: query: drop table bucket0_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@bucket0_mm +POSTHOOK: Output: default@bucket0_mm +PREHOOK: query: drop table bucket1_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket1_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table bucket1_mm(key int, id int) partitioned by (key2 int) +clustered by (key) sorted by (key) into 2 buckets +tblproperties('hivecommit'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket1_mm +POSTHOOK: query: create table bucket1_mm(key int, id int) partitioned by (key2 int) +clustered by (key) sorted by (key) into 2 buckets +tblproperties('hivecommit'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket1_mm +PREHOOK: query: insert into table bucket1_mm partition (key2) +select key + 1, key, key - 1 from intermediate +union all +select key - 1, key, key + 1 from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket1_mm +POSTHOOK: query: insert into table bucket1_mm partition (key2) +select key + 1, key, key - 1 from intermediate +union all +select key - 1, key, key + 1 from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket1_mm@key2=-1 +POSTHOOK: Output: default@bucket1_mm@key2=1 +POSTHOOK: Output: default@bucket1_mm@key2=101 +POSTHOOK: Output: default@bucket1_mm@key2=102 +POSTHOOK: Output: default@bucket1_mm@key2=104 +POSTHOOK: Output: default@bucket1_mm@key2=11 +POSTHOOK: Output: default@bucket1_mm@key2=9 +POSTHOOK: Output: default@bucket1_mm@key2=96 +POSTHOOK: Output: default@bucket1_mm@key2=97 +POSTHOOK: Output: default@bucket1_mm@key2=98 +POSTHOOK: Output: default@bucket1_mm@key2=99 +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=-1).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=-1).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=101).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=101).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=102).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=102).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=104).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=104).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=11).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=11).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=1).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=1).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=96).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=96).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=97).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=97).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=98).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=98).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=99).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=99).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=9).id EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket1_mm PARTITION(key2=9).key EXPRESSION [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket1_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1_mm +PREHOOK: Input: default@bucket1_mm@key2=-1 +PREHOOK: Input: default@bucket1_mm@key2=1 +PREHOOK: Input: default@bucket1_mm@key2=101 +PREHOOK: Input: default@bucket1_mm@key2=102 +PREHOOK: Input: default@bucket1_mm@key2=104 +PREHOOK: Input: default@bucket1_mm@key2=11 +PREHOOK: Input: default@bucket1_mm@key2=9 +PREHOOK: Input: default@bucket1_mm@key2=96 +PREHOOK: Input: default@bucket1_mm@key2=97 +PREHOOK: Input: default@bucket1_mm@key2=98 +PREHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket1_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1_mm +POSTHOOK: Input: default@bucket1_mm@key2=-1 +POSTHOOK: Input: default@bucket1_mm@key2=1 +POSTHOOK: Input: default@bucket1_mm@key2=101 +POSTHOOK: Input: default@bucket1_mm@key2=102 +POSTHOOK: Input: default@bucket1_mm@key2=104 +POSTHOOK: Input: default@bucket1_mm@key2=11 +POSTHOOK: Input: default@bucket1_mm@key2=9 +POSTHOOK: Input: default@bucket1_mm@key2=96 +POSTHOOK: Input: default@bucket1_mm@key2=97 +POSTHOOK: Input: default@bucket1_mm@key2=98 +POSTHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +1 0 -1 +-1 0 1 +99 100 101 +104 103 102 +102 103 104 +9 10 11 +11 10 9 +98 97 96 +99 98 97 +96 97 98 +97 98 99 +101 100 99 +PREHOOK: query: select * from bucket1_mm tablesample (bucket 1 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1_mm +PREHOOK: Input: default@bucket1_mm@key2=-1 +PREHOOK: Input: default@bucket1_mm@key2=1 +PREHOOK: Input: default@bucket1_mm@key2=101 +PREHOOK: Input: default@bucket1_mm@key2=102 +PREHOOK: Input: default@bucket1_mm@key2=104 +PREHOOK: Input: default@bucket1_mm@key2=11 +PREHOOK: Input: default@bucket1_mm@key2=9 +PREHOOK: Input: default@bucket1_mm@key2=96 +PREHOOK: Input: default@bucket1_mm@key2=97 +PREHOOK: Input: default@bucket1_mm@key2=98 +PREHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket1_mm tablesample (bucket 1 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1_mm +POSTHOOK: Input: default@bucket1_mm@key2=-1 +POSTHOOK: Input: default@bucket1_mm@key2=1 +POSTHOOK: Input: default@bucket1_mm@key2=101 +POSTHOOK: Input: default@bucket1_mm@key2=102 +POSTHOOK: Input: default@bucket1_mm@key2=104 +POSTHOOK: Input: default@bucket1_mm@key2=11 +POSTHOOK: Input: default@bucket1_mm@key2=9 +POSTHOOK: Input: default@bucket1_mm@key2=96 +POSTHOOK: Input: default@bucket1_mm@key2=97 +POSTHOOK: Input: default@bucket1_mm@key2=98 +POSTHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +104 103 102 +102 103 104 +98 97 96 +96 97 98 +PREHOOK: query: select * from bucket1_mm tablesample (bucket 2 out of 2) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket1_mm +PREHOOK: Input: default@bucket1_mm@key2=-1 +PREHOOK: Input: default@bucket1_mm@key2=1 +PREHOOK: Input: default@bucket1_mm@key2=101 +PREHOOK: Input: default@bucket1_mm@key2=102 +PREHOOK: Input: default@bucket1_mm@key2=104 +PREHOOK: Input: default@bucket1_mm@key2=11 +PREHOOK: Input: default@bucket1_mm@key2=9 +PREHOOK: Input: default@bucket1_mm@key2=96 +PREHOOK: Input: default@bucket1_mm@key2=97 +PREHOOK: Input: default@bucket1_mm@key2=98 +PREHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket1_mm tablesample (bucket 2 out of 2) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket1_mm +POSTHOOK: Input: default@bucket1_mm@key2=-1 +POSTHOOK: Input: default@bucket1_mm@key2=1 +POSTHOOK: Input: default@bucket1_mm@key2=101 +POSTHOOK: Input: default@bucket1_mm@key2=102 +POSTHOOK: Input: default@bucket1_mm@key2=104 +POSTHOOK: Input: default@bucket1_mm@key2=11 +POSTHOOK: Input: default@bucket1_mm@key2=9 +POSTHOOK: Input: default@bucket1_mm@key2=96 +POSTHOOK: Input: default@bucket1_mm@key2=97 +POSTHOOK: Input: default@bucket1_mm@key2=98 +POSTHOOK: Input: default@bucket1_mm@key2=99 +#### A masked pattern was here #### +1 0 -1 +-1 0 1 +99 100 101 +9 10 11 +11 10 9 +99 98 97 +97 98 99 +101 100 99 +PREHOOK: query: drop table bucket1_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@bucket1_mm +PREHOOK: Output: default@bucket1_mm +POSTHOOK: query: drop table bucket1_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@bucket1_mm +POSTHOOK: Output: default@bucket1_mm +PREHOOK: query: drop table bucket2_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket2_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table bucket2_mm(key int, id int) +clustered by (key) into 10 buckets +tblproperties('hivecommit'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bucket2_mm +POSTHOOK: query: create table bucket2_mm(key int, id int) +clustered by (key) into 10 buckets +tblproperties('hivecommit'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bucket2_mm +PREHOOK: query: insert into table bucket2_mm select key, key from intermediate where key == 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket2_mm +POSTHOOK: query: insert into table bucket2_mm select key, key from intermediate where key == 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket2_mm +POSTHOOK: Lineage: bucket2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket2_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +0 0 +PREHOOK: query: select * from bucket2_mm tablesample (bucket 1 out of 10) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm tablesample (bucket 1 out of 10) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +0 0 +PREHOOK: query: select * from bucket2_mm tablesample (bucket 4 out of 10) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm tablesample (bucket 4 out of 10) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +PREHOOK: query: insert into table bucket2_mm select key, key from intermediate where key in (0, 103) +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Input: default@intermediate@p=457 +PREHOOK: Output: default@bucket2_mm +POSTHOOK: query: insert into table bucket2_mm select key, key from intermediate where key in (0, 103) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Input: default@intermediate@p=457 +POSTHOOK: Output: default@bucket2_mm +POSTHOOK: Lineage: bucket2_mm.id SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: bucket2_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from bucket2_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +0 0 +0 0 +103 103 +PREHOOK: query: select * from bucket2_mm tablesample (bucket 1 out of 10) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm tablesample (bucket 1 out of 10) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +0 0 +0 0 +PREHOOK: query: select * from bucket2_mm tablesample (bucket 4 out of 10) s +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from bucket2_mm tablesample (bucket 4 out of 10) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket2_mm +#### A masked pattern was here #### +103 103 +PREHOOK: query: drop table bucket2_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@bucket2_mm +PREHOOK: Output: default@bucket2_mm +POSTHOOK: query: drop table bucket2_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@bucket2_mm +POSTHOOK: Output: default@bucket2_mm +PREHOOK: query: -- TODO# future + + + +-- TODO load, multi-insert, buckets + +drop table intermediate +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate +PREHOOK: Output: default@intermediate +POSTHOOK: query: -- TODO# future + + + +-- TODO load, multi-insert, buckets + +drop table intermediate +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate +POSTHOOK: Output: default@intermediate