Repository: hive Updated Branches: refs/heads/master b43b4d2a2 -> 468907eab
HIVE-20410: aborted Insert Overwrite on transactional table causes "Not enough history available for..." error (Eugene Koifman, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/468907ea Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/468907ea Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/468907ea Branch: refs/heads/master Commit: 468907eab36f78df3e14a24005153c9a23d62555 Parents: b43b4d2 Author: Eugene Koifman <ekoif...@apache.org> Authored: Fri Aug 17 17:08:33 2018 -0700 Committer: Eugene Koifman <ekoif...@apache.org> Committed: Fri Aug 17 17:08:33 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 11 ++++- .../apache/hadoop/hive/ql/TestTxnCommands.java | 2 +- .../mm_insert_overwrite_aborted.q | 20 ++++++++ .../mm_insert_overwrite_aborted.q.out | 50 ++++++++++++++++++++ 4 files changed, 80 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/468907ea/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index cd47a63..4d71eb4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -1127,12 +1127,19 @@ public class AcidUtils { } } - if(bestBase.oldestBase != null && bestBase.status == null) { + if(bestBase.oldestBase != null && bestBase.status == null && + MetaDataFile.isCompacted(bestBase.oldestBase, fs)) { /** * If here, it means there was a base_x (> 1 perhaps) but none were suitable for given * {@link writeIdList}. Note that 'original' files are logically a base_Long.MIN_VALUE and thus * cannot have any data for an open txn. We could check {@link deltas} has files to cover - * [1,n] w/o gaps but this would almost never happen...*/ + * [1,n] w/o gaps but this would almost never happen... + * + * We only throw for base_x produced by Compactor since that base erases all history and + * cannot be used for a client that has a snapshot in which something inside this base is + * open. (Nor can we ignore this base of course) But base_x which is a result of IOW, + * contains all history so we treat it just like delta wrt visibility. Imagine, IOW which + * aborts. It creates a base_x, which can and should just be ignored.*/ long[] exceptions = writeIdList.getInvalidWriteIds(); String minOpenWriteId = exceptions != null && exceptions.length > 0 ? Long.toString(exceptions[0]) : "x"; http://git-wip-us.apache.org/repos/asf/hive/blob/468907ea/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 536281d..9a4322d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -108,7 +108,7 @@ public class TestTxnCommands extends TxnCommandsBaseForTests { Assert.assertEquals(1, rs.size()); Assert.assertEquals("1", rs.get(0)); hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, true); - runStatementOnDriver("insert into " + Table.ACIDTBL + " values(3,2)"); + runStatementOnDriver("insert overwrite table " + Table.ACIDTBL + " values(3,2)"); hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, false); runStatementOnDriver("insert into " + Table.ACIDTBL + " values(5,6)"); rs = runStatementOnDriver("select a from " + Table.ACIDTBL + " order by a"); http://git-wip-us.apache.org/repos/asf/hive/blob/468907ea/ql/src/test/queries/clientpositive/mm_insert_overwrite_aborted.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mm_insert_overwrite_aborted.q b/ql/src/test/queries/clientpositive/mm_insert_overwrite_aborted.q new file mode 100644 index 0000000..938e1f4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/mm_insert_overwrite_aborted.q @@ -0,0 +1,20 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.exec.dynamic.partition=true; +set hive.vectorized.execution.enabled=true; +set hive.create.as.insert.only=true; + +drop table if exists studentparttab30k; +create table studentparttab30k (name string) row format delimited fields terminated by '\\t' stored as textfile; +insert into studentparttab30k values('a'); + +drop table if exists multi_insert_1; +create table multi_insert_1 (name string) row format delimited fields terminated by '\\t' stored as textfile; + +set hive.test.rollbacktxn=true; + +insert overwrite table multi_insert_1 select name FROM studentparttab30k; + +set hive.test.rollbacktxn=false; +select * from multi_insert_1; http://git-wip-us.apache.org/repos/asf/hive/blob/468907ea/ql/src/test/results/clientpositive/mm_insert_overwrite_aborted.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/mm_insert_overwrite_aborted.q.out b/ql/src/test/results/clientpositive/mm_insert_overwrite_aborted.q.out new file mode 100644 index 0000000..d645b86 --- /dev/null +++ b/ql/src/test/results/clientpositive/mm_insert_overwrite_aborted.q.out @@ -0,0 +1,50 @@ +PREHOOK: query: drop table if exists studentparttab30k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists studentparttab30k +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table studentparttab30k (name string) row format delimited fields terminated by '\\t' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studentparttab30k +POSTHOOK: query: create table studentparttab30k (name string) row format delimited fields terminated by '\\t' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studentparttab30k +PREHOOK: query: insert into studentparttab30k values('a') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@studentparttab30k +POSTHOOK: query: insert into studentparttab30k values('a') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@studentparttab30k +POSTHOOK: Lineage: studentparttab30k.name SCRIPT [] +PREHOOK: query: drop table if exists multi_insert_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists multi_insert_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table multi_insert_1 (name string) row format delimited fields terminated by '\\t' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@multi_insert_1 +POSTHOOK: query: create table multi_insert_1 (name string) row format delimited fields terminated by '\\t' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@multi_insert_1 +PREHOOK: query: insert overwrite table multi_insert_1 select name FROM studentparttab30k +PREHOOK: type: QUERY +PREHOOK: Input: default@studentparttab30k +PREHOOK: Output: default@multi_insert_1 +POSTHOOK: query: insert overwrite table multi_insert_1 select name FROM studentparttab30k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studentparttab30k +POSTHOOK: Output: default@multi_insert_1 +POSTHOOK: Lineage: multi_insert_1.name SIMPLE [(studentparttab30k)studentparttab30k.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: select * from multi_insert_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@multi_insert_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from multi_insert_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@multi_insert_1 +#### A masked pattern was here ####