HIVE-14641 : handle writing to dynamic partitions (Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ad3df23b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ad3df23b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ad3df23b Branch: refs/heads/hive-14535 Commit: ad3df23b9e9ecf0ecbee11b1a143658364b45e16 Parents: e02691b Author: Sergey Shelukhin <[email protected]> Authored: Mon Oct 3 16:43:45 2016 -0700 Committer: Sergey Shelukhin <[email protected]> Committed: Mon Oct 3 16:43:45 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FileSinkOperator.java | 128 ++--- .../apache/hadoop/hive/ql/exec/MoveTask.java | 3 +- .../apache/hadoop/hive/ql/metadata/Hive.java | 9 +- ql/src/test/queries/clientpositive/mm_all.q | 162 ++++--- ql/src/test/queries/clientpositive/mm_current.q | 37 +- .../results/clientpositive/llap/mm_all.q.out | 467 +++++++++++++++++++ .../clientpositive/llap/mm_current.q.out | 235 +++------- 7 files changed, 749 insertions(+), 292 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ad3df23b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 6ea1a98..f11a7c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -225,11 +225,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements } } - private void commit(FileSystem fs) throws HiveException { - List<Path> commitPaths = null; - if (isMmTable) { - commitPaths = new ArrayList<>(); - } + private void commit(FileSystem fs, List<Path> commitPaths) throws HiveException { for (int idx = 0; idx < outPaths.length; ++idx) { try { commitOneOutPath(idx, fs, commitPaths); @@ -238,21 +234,6 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements outPaths[idx] + " to: " + finalPaths[idx], e); } } - if (isMmTable) { - Path manifestPath = new Path(specPath, "_tmp." + ValidWriteIds.getMmFilePrefix( - conf.getMmWriteId()) + "_" + taskId + MANIFEST_EXTENSION); - Utilities.LOG14535.info("Writing manifest to " + manifestPath + " with " + commitPaths); - try { - try (FSDataOutputStream out = fs.create(manifestPath)) { - out.writeInt(commitPaths.size()); - for (Path path : commitPaths) { - out.writeUTF(path.toString()); - } - } - } catch (IOException e) { - throw new HiveException(e); - } - } } private void commitOneOutPath(int idx, FileSystem fs, List<Path> commitPaths) @@ -328,8 +309,9 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements if (!bDynParts && !isSkewedStoredAsSubDirectories) { finalPaths[filesIdx] = getFinalPath(subdirPath, specPath, extension); } else { - // TODO# wrong! special case #N bucketing - finalPaths[filesIdx] = getFinalPath(subdirPath, specPath, extension); + // TODO# does this need extra special handing for bucketing? + // Note: tmpPath here has the correct partition key + finalPaths[filesIdx] = getFinalPath(subdirPath, tmpPath, extension); } outPaths[filesIdx] = finalPaths[filesIdx]; } @@ -921,7 +903,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements FSPaths fsp2 = new FSPaths(specPath, conf.isMmTable()); // TODO# this will break fsp2.configureDynPartPath(dirName, childSpecPathDynLinkedPartitions); Utilities.LOG14535.info("creating new paths for " + dirName + ", childSpec " + childSpecPathDynLinkedPartitions - + ": tmpPath " + fsp2.getTmpPath() + ", task path " + fsp2.getTaskOutputTempPath()); + + ": tmpPath " + fsp2.getTmpPath() + ", task path " + fsp2.getTaskOutputTempPath()/*, new Exception()*/); if(!conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) { createBucketFiles(fsp2); valToPaths.put(dirName, fsp2); @@ -1104,6 +1086,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements throw new HiveException(e); } } + List<Path> commitPaths = new ArrayList<>(); for (FSPaths fsp : valToPaths.values()) { fsp.closeWriters(abort); // before closing the operator check if statistics gathering is requested @@ -1139,7 +1122,27 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements } if (isNativeTable) { - fsp.commit(fs); + fsp.commit(fs, commitPaths); + } + } + if (!commitPaths.isEmpty()) { + Path manifestPath = new Path(specPath, "_tmp." + ValidWriteIds.getMmFilePrefix( + conf.getMmWriteId()) + "_" + taskId + MANIFEST_EXTENSION); + Utilities.LOG14535.info("Writing manifest to " + manifestPath + " with " + commitPaths); + try { + // Don't overwrite the manifest... should fail if we have collisions. + // We assume one FSOP per task (per specPath), so we create it in specPath. + try (FSDataOutputStream out = fs.create(manifestPath, false)) { + if (out == null) { + throw new HiveException("Failed to create manifest at " + manifestPath); + } + out.writeInt(commitPaths.size()); + for (Path path : commitPaths) { + out.writeUTF(path.toString()); + } + } + } catch (IOException e) { + throw new HiveException(e); } } // Only publish stats if this operator's flag was set to gather stats @@ -1197,30 +1200,27 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements DynamicPartitionCtx dpCtx, FileSinkDesc conf, Reporter reporter) throws IOException, HiveException { FileSystem fs = specPath.getFileSystem(hconf); - int targetLevel = (dpCtx == null) ? 1 : dpCtx.getNumDPCols(); + // Manifests would be at the root level, but the results at target level. + // TODO# special case - doesn't take bucketing into account + int targetLevel = (dpCtx == null) ? 1 : (dpCtx.getNumDPCols() + 1); + int manifestLevel = 1; + ValidWriteIds.IdPathFilter filter = new ValidWriteIds.IdPathFilter(conf.getMmWriteId(), true); if (!success) { - FileStatus[] statuses = HiveStatsUtils.getFileStatusRecurse(specPath, targetLevel, fs, - new ValidWriteIds.IdPathFilter(conf.getMmWriteId(), true)); - for (FileStatus status : statuses) { - Utilities.LOG14535.info("Deleting " + status.getPath() + " on failure"); - tryDelete(fs, status.getPath()); - } + deleteMatchingFiles(specPath, fs, targetLevel, filter); + deleteMatchingFiles(specPath, fs, manifestLevel, filter); return; } - FileStatus[] statuses = HiveStatsUtils.getFileStatusRecurse(specPath, targetLevel, fs, - new ValidWriteIds.IdPathFilter(conf.getMmWriteId(), true)); - if (statuses == null) return; - LinkedList<FileStatus> results = new LinkedList<>(); - List<Path> manifests = new ArrayList<>(statuses.length); - for (FileStatus status : statuses) { - if (status.getPath().getName().endsWith(MANIFEST_EXTENSION)) { - manifests.add(status.getPath()); - } else if (!status.isDirectory()) { - Path path = status.getPath(); - Utilities.LOG14535.warn("Unknown file found - neither a manifest nor directory: " + path); - tryDelete(fs, path); - } else { - results.addAll(Lists.newArrayList(fs.listStatus(status.getPath()))); + FileStatus[] files = HiveStatsUtils.getFileStatusRecurse(specPath, manifestLevel, fs, filter); + List<Path> manifests = new ArrayList<>(files.length); + if (files != null) { + for (FileStatus status : files) { + if (status.getPath().getName().endsWith(MANIFEST_EXTENSION)) { + manifests.add(status.getPath()); + } else if (!status.isDirectory()) { + Path path = status.getPath(); + Utilities.LOG14535.warn("Unknown file found - neither a manifest nor directory: " + path); + tryDelete(fs, path); + } } } HashSet<String> committed = new HashSet<>(); @@ -1235,18 +1235,27 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements } } } - Iterator<FileStatus> iter = results.iterator(); - while (iter.hasNext()) { - FileStatus rfs = iter.next(); - if (!committed.remove(rfs.getPath().toString())) { - iter.remove(); - Utilities.LOG14535.info("Deleting " + rfs.getPath() + " that was not committed"); - // We should actually succeed here - if we fail, don't commit the query. - if (!fs.delete(rfs.getPath(), true)) { - throw new HiveException("Failed to delete an uncommitted path " + rfs.getPath()); + + files = HiveStatsUtils.getFileStatusRecurse(specPath, targetLevel, fs, filter); + LinkedList<FileStatus> results = new LinkedList<>(); + for (FileStatus status : files) { + if (!status.isDirectory()) { + Path path = status.getPath(); + Utilities.LOG14535.warn("Unknown file found - neither a manifest nor directory: " + path); + tryDelete(fs, path); + } else { + for (FileStatus child : fs.listStatus(status.getPath())) { + Path path = child.getPath(); + if (committed.remove(path.toString())) continue; // A good file. + Utilities.LOG14535.info("Deleting " + path + " that was not committed"); + // We should actually succeed here - if we fail, don't commit the query. + if (!fs.delete(path, true)) { + throw new HiveException("Failed to delete an uncommitted path " + path); + } } } } + if (!committed.isEmpty()) { throw new HiveException("The following files were committed but not found: " + committed); } @@ -1258,7 +1267,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements if (results.isEmpty()) return; FileStatus[] finalResults = results.toArray(new FileStatus[results.size()]); - // TODO# dp will break - removeTempOrDuplicateFiles assumes dirs in results. Why? We recurse... + // TODO# dp may break - removeTempOrDuplicateFiles assumes dirs in results. Why? We recurse... List<Path> emptyBuckets = Utilities.removeTempOrDuplicateFiles( fs, finalResults, dpCtx, conf, hconf); // create empty buckets if necessary @@ -1267,6 +1276,15 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements } } + private void deleteMatchingFiles(Path specPath, FileSystem fs, + int targetLevel, ValidWriteIds.IdPathFilter filter) throws IOException { + for (FileStatus status : HiveStatsUtils.getFileStatusRecurse(specPath, targetLevel, fs, + filter)) { + Utilities.LOG14535.info("Deleting " + status.getPath() + " on failure"); + tryDelete(fs, status.getPath()); + } + } + private void tryDelete(FileSystem fs, Path path) { try { fs.delete(path, true); http://git-wip-us.apache.org/repos/asf/hive/blob/ad3df23b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 3be21c4..538bf79 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -435,7 +435,8 @@ public class MoveTask extends Task<MoveWork> implements Serializable { isSkewedStoredAsDirs(tbd), work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID, SessionState.get().getTxnMgr().getCurrentTxnId(), hasFollowingStatsTask(), - work.getLoadTableWork().getWriteType()); + work.getLoadTableWork().getWriteType(), + tbd.getMmWriteId()); console.printInfo("\t Time taken to load dynamic partitions: " + (System.currentTimeMillis() - startTime)/1000.0 + " seconds"); http://git-wip-us.apache.org/repos/asf/hive/blob/ad3df23b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 68d59aa..38b434d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1850,7 +1850,7 @@ private void constructOneLBLocationMap(FileStatus fSta, public Map<Map<String, String>, Partition> loadDynamicPartitions(final Path loadPath, final String tableName, final Map<String, String> partSpec, final boolean replace, final int numDP, final boolean listBucketingEnabled, final boolean isAcid, final long txnId, - final boolean hasFollowingStatsTask, final AcidUtils.Operation operation) + final boolean hasFollowingStatsTask, final AcidUtils.Operation operation, final Long mmWriteId) throws HiveException { final Map<Map<String, String>, Partition> partitionsMap = @@ -1895,7 +1895,7 @@ private void constructOneLBLocationMap(FileStatus fSta, Utilities.LOG14535.info("loadPartition called for DPP from " + partPath + " to " + tbl.getTableName()); Partition newPartition = loadPartition(partPath, tbl, fullPartSpec, replace, true, listBucketingEnabled, - false, isAcid, hasFollowingStatsTask, null); // TODO# special case #N + false, isAcid, hasFollowingStatsTask, mmWriteId); partitionsMap.put(fullPartSpec, newPartition); if (inPlaceEligible) { @@ -1927,7 +1927,10 @@ private void constructOneLBLocationMap(FileStatus fSta, for (Future future : futures) { future.get(); } - // TODO# special case #N - DP - we would commit the txn to metastore here + if (mmWriteId != null) { + // Commit after we have processed all the partitions. + commitMmTableWrite(tbl, mmWriteId); + } } catch (InterruptedException | ExecutionException e) { LOG.debug("Cancelling " + futures.size() + " dynamic loading tasks"); //cancel other futures http://git-wip-us.apache.org/repos/asf/hive/blob/ad3df23b/ql/src/test/queries/clientpositive/mm_all.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mm_all.q b/ql/src/test/queries/clientpositive/mm_all.q index aaf8d48..59171af 100644 --- a/ql/src/test/queries/clientpositive/mm_all.q +++ b/ql/src/test/queries/clientpositive/mm_all.q @@ -1,63 +1,117 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; -set hive.exec.dynamic.partition.mode=nonstrict; set hive.fetch.task.conversion=none; -drop table simple_mm; -drop table partunion_mm; -drop table merge_mm; -drop table ctas_mm; -drop table T1; -drop table T2; -drop table skew_mm; - - -create table simple_mm(key int) partitioned by (key_mm int) tblproperties ('hivecommit'='true'); -insert into table simple_mm partition(key_mm='455') select key from src limit 3; - -create table ctas_mm tblproperties ('hivecommit'='true') as select * from src limit 3; - -create table partunion_mm(id_mm int) partitioned by (key_mm int) tblproperties ('hivecommit'='true'); - - -insert into table partunion_mm partition(key_mm) -select temps.* from ( -select key as key_mm, key from ctas_mm -union all -select key as key_mm, key from simple_mm ) temps; +-- Force multiple writers when reading +drop table intermediate; +create table intermediate(key int) partitioned by (p int) stored as orc; +insert into table intermediate partition(p='455') select key from src limit 2; +insert into table intermediate partition(p='456') select key from src limit 2; + +drop table part_mm; +create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ('hivecommit'='true'); +explain insert into table part_mm partition(key_mm='455') select key from intermediate; +insert into table part_mm partition(key_mm='455') select key from intermediate; +insert into table part_mm partition(key_mm='456') select key from intermediate; +insert into table part_mm partition(key_mm='455') select key from intermediate; +select * from part_mm; +drop table part_mm; -set hive.merge.mapredfiles=true; -set hive.merge.sparkfiles=true; -set hive.merge.tezfiles=true; - -CREATE TABLE merge_mm (key INT, value STRING) - PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties ('hivecommit'='true'); - -EXPLAIN -INSERT OVERWRITE TABLE merge_mm PARTITION (ds='123', part) - SELECT key, value, PMOD(HASH(key), 2) as part - FROM src; - -INSERT OVERWRITE TABLE merge_mm PARTITION (ds='123', part) - SELECT key, value, PMOD(HASH(key), 2) as part - FROM src; - - -set hive.optimize.skewjoin.compiletime = true; --- the test case is wrong? +drop table simple_mm; +create table simple_mm(key int) stored as orc tblproperties ('hivecommit'='true'); +insert into table simple_mm select key from intermediate; +insert overwrite table simple_mm select key from intermediate; +select * from simple_mm; +drop table simple_mm; -CREATE TABLE T1(key STRING, val STRING) -SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; -CREATE TABLE T2(key STRING, val STRING) -SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; -LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; -EXPLAIN -SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +-- simple DP (no bucketing, no sorting?) +drop table dp_no_mm; +drop table dp_mm; -create table skew_mm(k1 string, k2 string, k3 string, k4 string) SKEWED BY (key) ON ((2)) tblproperties ('hivecommit'='true'); -INSERT OVERWRITE TABLE skew_mm -SELECT a.key as k1, a.val as k2, b.key as k3, b.val as k4 FROM T1 a JOIN T2 b ON a.key = b.key; +set hive.exec.dynamic.partition.mode=nonstrict; --- TODO load, acid, etc +set hive.merge.mapredfiles=false; +set hive.merge.sparkfiles=false; +set hive.merge.tezfiles=false; + +create table dp_no_mm (key int) partitioned by (key1 string, key2 int) stored as orc; +create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ('hivecommit'='true'); + +insert into table dp_no_mm partition (key1='123', key2) select key, key from intermediate; + +insert into table dp_mm partition (key1='123', key2) select key, key from intermediate; + +select * from dp_no_mm; +select * from dp_mm; + +drop table dp_no_mm; +drop table dp_mm; + + + +-- future + + + + + +--drop table partunion_mm; +--drop table merge_mm; +--drop table ctas_mm; +--drop table T1; +--drop table T2; +--drop table skew_mm; +-- +-- +--create table ctas_mm tblproperties ('hivecommit'='true') as select * from src limit 3; +-- +--create table partunion_mm(id_mm int) partitioned by (key_mm int) tblproperties ('hivecommit'='true'); +-- +-- +--insert into table partunion_mm partition(key_mm) +--select temps.* from ( +--select key as key_mm, key from ctas_mm +--union all +--select key as key_mm, key from simple_mm ) temps; +-- +--set hive.merge.mapredfiles=true; +--set hive.merge.sparkfiles=true; +--set hive.merge.tezfiles=true; +-- +--CREATE TABLE merge_mm (key INT, value STRING) +-- PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties ('hivecommit'='true'); +-- +--EXPLAIN +--INSERT OVERWRITE TABLE merge_mm PARTITION (ds='123', part) +-- SELECT key, value, PMOD(HASH(key), 2) as part +-- FROM src; +-- +--INSERT OVERWRITE TABLE merge_mm PARTITION (ds='123', part) +-- SELECT key, value, PMOD(HASH(key), 2) as part +-- FROM src; +-- +-- +--set hive.optimize.skewjoin.compiletime = true; +---- the test case is wrong? +-- +--CREATE TABLE T1(key STRING, val STRING) +--SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; +--LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +--CREATE TABLE T2(key STRING, val STRING) +--SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; +--LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +-- +--EXPLAIN +--SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +-- +--create table skew_mm(k1 string, k2 string, k3 string, k4 string) SKEWED BY (key) ON ((2)) tblproperties ('hivecommit'='true'); +--INSERT OVERWRITE TABLE skew_mm +--SELECT a.key as k1, a.val as k2, b.key as k3, b.val as k4 FROM T1 a JOIN T2 b ON a.key = b.key; +-- +---- TODO load, acid, etc +-- +-- + +drop table intermediate; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/ad3df23b/ql/src/test/queries/clientpositive/mm_current.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mm_current.q b/ql/src/test/queries/clientpositive/mm_current.q index 7c3e138..b551176 100644 --- a/ql/src/test/queries/clientpositive/mm_current.q +++ b/ql/src/test/queries/clientpositive/mm_current.q @@ -6,29 +6,34 @@ set tez.grouping.min-size=1; set tez.grouping.max-size=2; set hive.tez.auto.reducer.parallelism=false; -drop table part_mm; -drop table simple_mm; drop table intermediate; - create table intermediate(key int) partitioned by (p int) stored as orc; insert into table intermediate partition(p='455') select key from src limit 2; insert into table intermediate partition(p='456') select key from src limit 2; - -create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ('hivecommit'='true'); -explain insert into table part_mm partition(key_mm='455') select key from intermediate; -insert into table part_mm partition(key_mm='455') select key from intermediate; -insert into table part_mm partition(key_mm='456') select key from intermediate; -insert into table part_mm partition(key_mm='455') select key from intermediate; -select * from part_mm; -create table simple_mm(key int) stored as orc tblproperties ('hivecommit'='true'); -insert into table simple_mm select key from intermediate; -insert overwrite table simple_mm select key from intermediate; -select * from simple_mm; +drop table dp_no_mm; +drop table dp_mm; + +set hive.merge.mapredfiles=false; +set hive.merge.sparkfiles=false; +set hive.merge.tezfiles=false; + +create table dp_no_mm (key int) partitioned by (key1 string, key2 int) stored as orc; +create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ('hivecommit'='true'); + +insert into table dp_no_mm partition (key1='123', key2) select key, key from intermediate; + +insert into table dp_mm partition (key1='123', key2) select key, key from intermediate; + +select * from dp_no_mm; +select * from dp_mm; + +drop table dp_no_mm; +drop table dp_mm; -drop table part_mm; -drop table simple_mm; drop table intermediate; + http://git-wip-us.apache.org/repos/asf/hive/blob/ad3df23b/ql/src/test/results/clientpositive/llap/mm_all.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out b/ql/src/test/results/clientpositive/llap/mm_all.q.out new file mode 100644 index 0000000..b0c9c0a --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out @@ -0,0 +1,467 @@ +PREHOOK: query: -- Force multiple writers when reading +drop table intermediate +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Force multiple writers when reading +drop table intermediate +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@intermediate +POSTHOOK: query: create table intermediate(key int) partitioned by (p int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@intermediate +PREHOOK: query: insert into table intermediate partition(p='455') select key from src limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=455 +POSTHOOK: query: insert into table intermediate partition(p='455') select key from src limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=455 +POSTHOOK: Lineage: intermediate PARTITION(p=455).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: insert into table intermediate partition(p='456') select key from src limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@intermediate@p=456 +POSTHOOK: query: insert into table intermediate partition(p='456') select key from src limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@intermediate@p=456 +POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: drop table part_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table part_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ('hivecommit'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_mm +POSTHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ('hivecommit'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_mm +PREHOOK: query: explain insert into table part_mm partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +POSTHOOK: query: explain insert into table part_mm partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: intermediate + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.part_mm + Execution mode: llap + LLAP IO: all inputs + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + key_mm 455 + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.part_mm + micromanaged table: true + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_mm partition(key_mm='456') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Output: default@part_mm@key_mm=456 +POSTHOOK: query: insert into table part_mm partition(key_mm='456') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Output: default@part_mm@key_mm=456 +POSTHOOK: Lineage: part_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Output: default@part_mm@key_mm=455 +POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from part_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@part_mm +PREHOOK: Input: default@part_mm@key_mm=455 +PREHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_mm +POSTHOOK: Input: default@part_mm@key_mm=455 +POSTHOOK: Input: default@part_mm@key_mm=456 +#### A masked pattern was here #### +238 455 +86 455 +238 455 +86 455 +238 455 +86 455 +238 455 +86 455 +238 456 +86 456 +238 456 +86 456 +PREHOOK: query: drop table part_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_mm +PREHOOK: Output: default@part_mm +POSTHOOK: query: drop table part_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_mm +POSTHOOK: Output: default@part_mm +PREHOOK: query: drop table simple_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table simple_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table simple_mm(key int) stored as orc tblproperties ('hivecommit'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@simple_mm +POSTHOOK: query: create table simple_mm(key int) stored as orc tblproperties ('hivecommit'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@simple_mm +PREHOOK: query: insert into table simple_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Output: default@simple_mm +POSTHOOK: query: insert into table simple_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Output: default@simple_mm +POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert overwrite table simple_mm select key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Output: default@simple_mm +POSTHOOK: query: insert overwrite table simple_mm select key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Output: default@simple_mm +POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from simple_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@simple_mm +#### A masked pattern was here #### +POSTHOOK: query: select * from simple_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@simple_mm +#### A masked pattern was here #### +238 +86 +238 +86 +PREHOOK: query: drop table simple_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@simple_mm +PREHOOK: Output: default@simple_mm +POSTHOOK: query: drop table simple_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@simple_mm +POSTHOOK: Output: default@simple_mm +PREHOOK: query: -- simple DP (no bucketing, no sorting?) +drop table dp_no_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- simple DP (no bucketing, no sorting?) +drop table dp_no_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table dp_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table dp_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table dp_no_mm (key int) partitioned by (key1 string, key2 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dp_no_mm +POSTHOOK: query: create table dp_no_mm (key int) partitioned by (key1 string, key2 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dp_no_mm +PREHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ('hivecommit'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dp_mm +POSTHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ('hivecommit'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dp_mm +PREHOOK: query: insert into table dp_no_mm partition (key1='123', key2) select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Output: default@dp_no_mm@key1=123 +POSTHOOK: query: insert into table dp_no_mm partition (key1='123', key2) select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Output: default@dp_no_mm@key1=123/key2=238 +POSTHOOK: Output: default@dp_no_mm@key1=123/key2=86 +POSTHOOK: Lineage: dp_no_mm PARTITION(key1=123,key2=238).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_no_mm PARTITION(key1=123,key2=86).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@intermediate +PREHOOK: Input: default@intermediate@p=455 +PREHOOK: Input: default@intermediate@p=456 +PREHOOK: Output: default@dp_mm@key1=123 +POSTHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@intermediate +POSTHOOK: Input: default@intermediate@p=455 +POSTHOOK: Input: default@intermediate@p=456 +POSTHOOK: Output: default@dp_mm@key1=123/key2=238 +POSTHOOK: Output: default@dp_mm@key1=123/key2=86 +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=238).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=86).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from dp_no_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@dp_no_mm +PREHOOK: Input: default@dp_no_mm@key1=123/key2=238 +PREHOOK: Input: default@dp_no_mm@key1=123/key2=86 +#### A masked pattern was here #### +POSTHOOK: query: select * from dp_no_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dp_no_mm +POSTHOOK: Input: default@dp_no_mm@key1=123/key2=238 +POSTHOOK: Input: default@dp_no_mm@key1=123/key2=86 +#### A masked pattern was here #### +238 123 238 +238 123 238 +86 123 86 +86 123 86 +PREHOOK: query: select * from dp_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@dp_mm +PREHOOK: Input: default@dp_mm@key1=123/key2=238 +PREHOOK: Input: default@dp_mm@key1=123/key2=86 +#### A masked pattern was here #### +POSTHOOK: query: select * from dp_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dp_mm +POSTHOOK: Input: default@dp_mm@key1=123/key2=238 +POSTHOOK: Input: default@dp_mm@key1=123/key2=86 +#### A masked pattern was here #### +238 123 238 +238 123 238 +86 123 86 +86 123 86 +PREHOOK: query: drop table dp_no_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dp_no_mm +PREHOOK: Output: default@dp_no_mm +POSTHOOK: query: drop table dp_no_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dp_no_mm +POSTHOOK: Output: default@dp_no_mm +PREHOOK: query: drop table dp_mm +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dp_mm +PREHOOK: Output: default@dp_mm +POSTHOOK: query: drop table dp_mm +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dp_mm +POSTHOOK: Output: default@dp_mm +PREHOOK: query: -- future + + + + + +--drop table partunion_mm; +--drop table merge_mm; +--drop table ctas_mm; +--drop table T1; +--drop table T2; +--drop table skew_mm; +-- +-- +--create table ctas_mm tblproperties ('hivecommit'='true') as select * from src limit 3; +-- +--create table partunion_mm(id_mm int) partitioned by (key_mm int) tblproperties ('hivecommit'='true'); +-- +-- +--insert into table partunion_mm partition(key_mm) +--select temps.* from ( +--select key as key_mm, key from ctas_mm +--union all +--select key as key_mm, key from simple_mm ) temps; +-- +--set hive.merge.mapredfiles=true; +--set hive.merge.sparkfiles=true; +--set hive.merge.tezfiles=true; +-- +--CREATE TABLE merge_mm (key INT, value STRING) +-- PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties ('hivecommit'='true'); +-- +--EXPLAIN +--INSERT OVERWRITE TABLE merge_mm PARTITION (ds='123', part) +-- SELECT key, value, PMOD(HASH(key), 2) as part +-- FROM src; +-- +--INSERT OVERWRITE TABLE merge_mm PARTITION (ds='123', part) +-- SELECT key, value, PMOD(HASH(key), 2) as part +-- FROM src; +-- +-- +--set hive.optimize.skewjoin.compiletime = true; +---- the test case is wrong? +-- +--CREATE TABLE T1(key STRING, val STRING) +--SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; +--LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +--CREATE TABLE T2(key STRING, val STRING) +--SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; +--LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +-- +--EXPLAIN +--SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +-- +--create table skew_mm(k1 string, k2 string, k3 string, k4 string) SKEWED BY (key) ON ((2)) tblproperties ('hivecommit'='true'); +--INSERT OVERWRITE TABLE skew_mm +--SELECT a.key as k1, a.val as k2, b.key as k3, b.val as k4 FROM T1 a JOIN T2 b ON a.key = b.key; +-- +---- TODO load, acid, etc +-- +-- + +drop table intermediate +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@intermediate +PREHOOK: Output: default@intermediate +POSTHOOK: query: -- future + + + + + +--drop table partunion_mm; +--drop table merge_mm; +--drop table ctas_mm; +--drop table T1; +--drop table T2; +--drop table skew_mm; +-- +-- +--create table ctas_mm tblproperties ('hivecommit'='true') as select * from src limit 3; +-- +--create table partunion_mm(id_mm int) partitioned by (key_mm int) tblproperties ('hivecommit'='true'); +-- +-- +--insert into table partunion_mm partition(key_mm) +--select temps.* from ( +--select key as key_mm, key from ctas_mm +--union all +--select key as key_mm, key from simple_mm ) temps; +-- +--set hive.merge.mapredfiles=true; +--set hive.merge.sparkfiles=true; +--set hive.merge.tezfiles=true; +-- +--CREATE TABLE merge_mm (key INT, value STRING) +-- PARTITIONED BY (ds STRING, part STRING) STORED AS ORC tblproperties ('hivecommit'='true'); +-- +--EXPLAIN +--INSERT OVERWRITE TABLE merge_mm PARTITION (ds='123', part) +-- SELECT key, value, PMOD(HASH(key), 2) as part +-- FROM src; +-- +--INSERT OVERWRITE TABLE merge_mm PARTITION (ds='123', part) +-- SELECT key, value, PMOD(HASH(key), 2) as part +-- FROM src; +-- +-- +--set hive.optimize.skewjoin.compiletime = true; +---- the test case is wrong? +-- +--CREATE TABLE T1(key STRING, val STRING) +--SKEWED BY (key) ON ((2)) STORED AS TEXTFILE; +--LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; +--CREATE TABLE T2(key STRING, val STRING) +--SKEWED BY (key) ON ((3)) STORED AS TEXTFILE; +--LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; +-- +--EXPLAIN +--SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key; +-- +--create table skew_mm(k1 string, k2 string, k3 string, k4 string) SKEWED BY (key) ON ((2)) tblproperties ('hivecommit'='true'); +--INSERT OVERWRITE TABLE skew_mm +--SELECT a.key as k1, a.val as k2, b.key as k3, b.val as k4 FROM T1 a JOIN T2 b ON a.key = b.key; +-- +---- TODO load, acid, etc +-- +-- + +drop table intermediate +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@intermediate +POSTHOOK: Output: default@intermediate http://git-wip-us.apache.org/repos/asf/hive/blob/ad3df23b/ql/src/test/results/clientpositive/llap/mm_current.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mm_current.q.out b/ql/src/test/results/clientpositive/llap/mm_current.q.out index ece6cbf..fe1caee 100644 --- a/ql/src/test/results/clientpositive/llap/mm_current.q.out +++ b/ql/src/test/results/clientpositive/llap/mm_current.q.out @@ -1,11 +1,3 @@ -PREHOOK: query: drop table part_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table part_mm -POSTHOOK: type: DROPTABLE -PREHOOK: query: drop table simple_mm -PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table simple_mm -POSTHOOK: type: DROPTABLE PREHOOK: query: drop table intermediate PREHOOK: type: DROPTABLE POSTHOOK: query: drop table intermediate @@ -36,193 +28,110 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@intermediate@p=456 POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ('hivecommit'='true') +PREHOOK: query: drop table dp_no_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table dp_no_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table dp_mm +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table dp_mm +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table dp_no_mm (key int) partitioned by (key1 string, key2 int) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@part_mm -POSTHOOK: query: create table part_mm(key int) partitioned by (key_mm int) stored as orc tblproperties ('hivecommit'='true') +PREHOOK: Output: default@dp_no_mm +POSTHOOK: query: create table dp_no_mm (key int) partitioned by (key1 string, key2 int) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@part_mm -PREHOOK: query: explain insert into table part_mm partition(key_mm='455') select key from intermediate -PREHOOK: type: QUERY -POSTHOOK: query: explain insert into table part_mm partition(key_mm='455') select key from intermediate -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: intermediate - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_mm - Execution mode: llap - LLAP IO: all inputs - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - key_mm 455 - replace: false - table: - input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat - serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.part_mm - micromanaged table: true - - Stage: Stage-3 - Stats-Aggr Operator - -PREHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Output: default@part_mm@key_mm=455 -POSTHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Output: default@part_mm@key_mm=455 -POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: insert into table part_mm partition(key_mm='456') select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Output: default@part_mm@key_mm=456 -POSTHOOK: query: insert into table part_mm partition(key_mm='456') select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Output: default@part_mm@key_mm=456 -POSTHOOK: Lineage: part_mm PARTITION(key_mm=456).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate -PREHOOK: type: QUERY -PREHOOK: Input: default@intermediate -PREHOOK: Input: default@intermediate@p=455 -PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Output: default@part_mm@key_mm=455 -POSTHOOK: query: insert into table part_mm partition(key_mm='455') select key from intermediate -POSTHOOK: type: QUERY -POSTHOOK: Input: default@intermediate -POSTHOOK: Input: default@intermediate@p=455 -POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Output: default@part_mm@key_mm=455 -POSTHOOK: Lineage: part_mm PARTITION(key_mm=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select * from part_mm -PREHOOK: type: QUERY -PREHOOK: Input: default@part_mm -PREHOOK: Input: default@part_mm@key_mm=455 -PREHOOK: Input: default@part_mm@key_mm=456 -#### A masked pattern was here #### -POSTHOOK: query: select * from part_mm -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part_mm -POSTHOOK: Input: default@part_mm@key_mm=455 -POSTHOOK: Input: default@part_mm@key_mm=456 -#### A masked pattern was here #### -0 455 -455 455 -0 455 -455 455 -0 455 -455 455 -0 455 -455 455 -0 456 -455 456 -0 456 -455 456 -PREHOOK: query: create table simple_mm(key int) stored as orc tblproperties ('hivecommit'='true') +POSTHOOK: Output: default@dp_no_mm +PREHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ('hivecommit'='true') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@simple_mm -POSTHOOK: query: create table simple_mm(key int) stored as orc tblproperties ('hivecommit'='true') +PREHOOK: Output: default@dp_mm +POSTHOOK: query: create table dp_mm (key int) partitioned by (key1 string, key2 int) stored as orc + tblproperties ('hivecommit'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@simple_mm -PREHOOK: query: insert into table simple_mm select key from intermediate +POSTHOOK: Output: default@dp_mm +PREHOOK: query: insert into table dp_no_mm partition (key1='123', key2) select key, key from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Output: default@simple_mm -POSTHOOK: query: insert into table simple_mm select key from intermediate +PREHOOK: Output: default@dp_no_mm@key1=123 +POSTHOOK: query: insert into table dp_no_mm partition (key1='123', key2) select key, key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Output: default@simple_mm -POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: insert overwrite table simple_mm select key from intermediate +POSTHOOK: Output: default@dp_no_mm@key1=123/key2=0 +POSTHOOK: Output: default@dp_no_mm@key1=123/key2=455 +POSTHOOK: Lineage: dp_no_mm PARTITION(key1=123,key2=0).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_no_mm PARTITION(key1=123,key2=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate PREHOOK: type: QUERY PREHOOK: Input: default@intermediate PREHOOK: Input: default@intermediate@p=455 PREHOOK: Input: default@intermediate@p=456 -PREHOOK: Output: default@simple_mm -POSTHOOK: query: insert overwrite table simple_mm select key from intermediate +PREHOOK: Output: default@dp_mm@key1=123 +POSTHOOK: query: insert into table dp_mm partition (key1='123', key2) select key, key from intermediate POSTHOOK: type: QUERY POSTHOOK: Input: default@intermediate POSTHOOK: Input: default@intermediate@p=455 POSTHOOK: Input: default@intermediate@p=456 -POSTHOOK: Output: default@simple_mm -POSTHOOK: Lineage: simple_mm.key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] -PREHOOK: query: select * from simple_mm +POSTHOOK: Output: default@dp_mm@key1=123/key2=0 +POSTHOOK: Output: default@dp_mm@key1=123/key2=455 +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=0).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dp_mm PARTITION(key1=123,key2=455).key SIMPLE [(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from dp_no_mm +PREHOOK: type: QUERY +PREHOOK: Input: default@dp_no_mm +PREHOOK: Input: default@dp_no_mm@key1=123/key2=0 +PREHOOK: Input: default@dp_no_mm@key1=123/key2=455 +#### A masked pattern was here #### +POSTHOOK: query: select * from dp_no_mm +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dp_no_mm +POSTHOOK: Input: default@dp_no_mm@key1=123/key2=0 +POSTHOOK: Input: default@dp_no_mm@key1=123/key2=455 +#### A masked pattern was here #### +455 123 455 +455 123 455 +0 123 0 +0 123 0 +PREHOOK: query: select * from dp_mm PREHOOK: type: QUERY -PREHOOK: Input: default@simple_mm +PREHOOK: Input: default@dp_mm +PREHOOK: Input: default@dp_mm@key1=123/key2=0 +PREHOOK: Input: default@dp_mm@key1=123/key2=455 #### A masked pattern was here #### -POSTHOOK: query: select * from simple_mm +POSTHOOK: query: select * from dp_mm POSTHOOK: type: QUERY -POSTHOOK: Input: default@simple_mm +POSTHOOK: Input: default@dp_mm +POSTHOOK: Input: default@dp_mm@key1=123/key2=0 +POSTHOOK: Input: default@dp_mm@key1=123/key2=455 #### A masked pattern was here #### -0 -455 -0 -455 -PREHOOK: query: drop table part_mm +455 123 455 +455 123 455 +0 123 0 +0 123 0 +PREHOOK: query: drop table dp_no_mm PREHOOK: type: DROPTABLE -PREHOOK: Input: default@part_mm -PREHOOK: Output: default@part_mm -POSTHOOK: query: drop table part_mm +PREHOOK: Input: default@dp_no_mm +PREHOOK: Output: default@dp_no_mm +POSTHOOK: query: drop table dp_no_mm POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@part_mm -POSTHOOK: Output: default@part_mm -PREHOOK: query: drop table simple_mm +POSTHOOK: Input: default@dp_no_mm +POSTHOOK: Output: default@dp_no_mm +PREHOOK: query: drop table dp_mm PREHOOK: type: DROPTABLE -PREHOOK: Input: default@simple_mm -PREHOOK: Output: default@simple_mm -POSTHOOK: query: drop table simple_mm +PREHOOK: Input: default@dp_mm +PREHOOK: Output: default@dp_mm +POSTHOOK: query: drop table dp_mm POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@simple_mm -POSTHOOK: Output: default@simple_mm +POSTHOOK: Input: default@dp_mm +POSTHOOK: Output: default@dp_mm PREHOOK: query: drop table intermediate PREHOOK: type: DROPTABLE PREHOOK: Input: default@intermediate
