HIVE-20399 : CTAS w/a custom table location that is not fully qualified fails for MM tables (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/611770dd Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/611770dd Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/611770dd Branch: refs/heads/master Commit: 611770ddf9ab7f9c860468dc58b029b4e884beb4 Parents: ec965fa Author: sergey <[email protected]> Authored: Thu Aug 23 12:20:39 2018 -0700 Committer: sergey <[email protected]> Committed: Thu Aug 23 12:20:39 2018 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/exec/Utilities.java | 13 +++---- .../test/queries/clientpositive/mm_loc_ctas.q | 19 ++++++++++ .../results/clientpositive/mm_loc_ctas.q.out | 38 ++++++++++++++++++++ 3 files changed, 64 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/611770dd/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 30bee18..74fb1ba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -4277,14 +4277,15 @@ public final class Utilities { } } - HashSet<String> committed = new HashSet<>(); + HashSet<Path> committed = new HashSet<>(); for (Path mfp : manifests) { try (FSDataInputStream mdis = fs.open(mfp)) { int fileCount = mdis.readInt(); for (int i = 0; i < fileCount; ++i) { String nextFile = mdis.readUTF(); Utilities.FILE_OP_LOGGER.trace("Looking at committed file: {}", nextFile); - if (!committed.add(nextFile)) { + Path path = fs.makeQualified(new Path(nextFile)); + if (!committed.add(path)) { throw new HiveException(nextFile + " was specified in multiple manifests"); } } @@ -4345,7 +4346,7 @@ public final class Utilities { } private static void cleanMmDirectory(Path dir, FileSystem fs, String unionSuffix, - int lbLevels, HashSet<String> committed) throws IOException, HiveException { + int lbLevels, HashSet<Path> committed) throws IOException, HiveException { for (FileStatus child : fs.listStatus(dir)) { Path childPath = child.getPath(); if (lbLevels > 0) { @@ -4357,7 +4358,7 @@ public final class Utilities { "Recursion into LB directory {}; levels remaining ", childPath, lbLevels - 1); cleanMmDirectory(childPath, fs, unionSuffix, lbLevels - 1, committed); } else { - if (committed.contains(childPath.toString())) { + if (committed.contains(childPath)) { throw new HiveException("LB FSOP has commited " + childPath + " outside of LB directory levels " + lbLevels); } @@ -4367,12 +4368,12 @@ public final class Utilities { } // No more LB directories expected. if (unionSuffix == null) { - if (committed.remove(childPath.toString())) { + if (committed.remove(childPath)) { continue; // A good file. } deleteUncommitedFile(childPath, fs); } else if (!child.isDirectory()) { - if (committed.contains(childPath.toString())) { + if (committed.contains(childPath)) { throw new HiveException("Union FSOP has commited " + childPath + " outside of union directory " + unionSuffix); } http://git-wip-us.apache.org/repos/asf/hive/blob/611770dd/ql/src/test/queries/clientpositive/mm_loc_ctas.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mm_loc_ctas.q b/ql/src/test/queries/clientpositive/mm_loc_ctas.q new file mode 100644 index 0000000..8e4cbbb --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_loc_ctas.q @@ -0,0 +1,19 @@ +--! qt:dataset:src + +set hive.metastore.dml.events=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set tez.grouping.min-size=1; +set tez.grouping.max-size=2; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + + +drop table test; +create table test(id int, name string); +insert into test values(1, 'aa'),(2,'bb'); + +drop table test3; +CREATE TABLE test3 stored as textfile LOCATION '${system:test.tmp.dir}/test2' tblproperties('transactional'='true', 'transactional_properties'='insert_only') AS SELECT * from test; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/611770dd/ql/src/test/results/clientpositive/mm_loc_ctas.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/mm_loc_ctas.q.out b/ql/src/test/results/clientpositive/mm_loc_ctas.q.out new file mode 100644 index 0000000..471d835 --- /dev/null +++ b/ql/src/test/results/clientpositive/mm_loc_ctas.q.out @@ -0,0 +1,38 @@ +PREHOOK: query: drop table test +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table test(id int, name string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +POSTHOOK: query: create table test(id int, name string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test +PREHOOK: query: insert into test values(1, 'aa'),(2,'bb') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test +POSTHOOK: query: insert into test values(1, 'aa'),(2,'bb') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test +POSTHOOK: Lineage: test.id SCRIPT [] +POSTHOOK: Lineage: test.name SCRIPT [] +PREHOOK: query: drop table test3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test3 +POSTHOOK: type: DROPTABLE +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@test +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@test3 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test3
