Repository: hive Updated Branches: refs/heads/master 23c0f7164 -> c159f087b
HIVE-14400 : Handle concurrent insert with dynamic partition (Wei Zheng, reviewed by Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c159f087 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c159f087 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c159f087 Branch: refs/heads/master Commit: c159f087b11b78d4555d6b3d75be7df352dc46ee Parents: 23c0f71 Author: Wei Zheng <[email protected]> Authored: Tue Aug 2 19:52:57 2016 -0700 Committer: Wei Zheng <[email protected]> Committed: Tue Aug 2 19:52:57 2016 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/metadata/Hive.java | 35 ++++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/c159f087/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 66a2c94..9a24ec3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1584,14 +1584,24 @@ public class Hive { StatsSetupConst.TRUE); } MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters()); - getMSC().add_partition(newTPart.getTPartition()); - } else { - EnvironmentContext environmentContext = null; - if (hasFollowingStatsTask) { - environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); + try { + getMSC().add_partition(newTPart.getTPartition()); + } catch (AlreadyExistsException aee) { + // With multiple users concurrently issuing insert statements on the same partition has + // a side effect that some queries may not see a partition at the time when they're issued, + // but will realize the partition is actually there when it is trying to add such partition + // to the metastore and thus get AlreadyExistsException, because some earlier query just created it (race condition). + // For example, imagine such a table is created: + // create table T (name char(50)) partitioned by (ds string); + // and the following two queries are launched at the same time, from different sessions: + // insert into table T partition (ds) values ('Bob', 'today'); -- creates the partition 'today' + // insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException + // In that case, we want to retry with alterPartition. + LOG.debug("Caught AlreadyExistsException, trying to alter partition instead"); + setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart); } - alterPartition(tbl.getDbName(), tbl.getTableName(), new Partition(tbl, newTPart.getTPartition()), environmentContext); + } else { + setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart); } return newTPart; } catch (IOException e) { @@ -1609,6 +1619,17 @@ public class Hive { } } + private void setStatsPropAndAlterPartition(boolean hasFollowingStatsTask, Table tbl, + Partition newTPart) throws HiveException, InvalidOperationException { + EnvironmentContext environmentContext = null; + if (hasFollowingStatsTask) { + environmentContext = new EnvironmentContext(); + environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE); + } + alterPartition(tbl.getDbName(), tbl.getTableName(), new Partition(tbl, newTPart.getTPartition()), + environmentContext); + } + /** * Walk through sub-directory tree to construct list bucketing location map. *
