HIVE-13622 WriteSet tracking optimizations (Eugene Koifman, reviewed by Alan Gates)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f25b8652 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f25b8652 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f25b8652 Branch: refs/heads/master Commit: f25b86520d8af5cfad3a0311281bc8e70117baa4 Parents: ffa69a2 Author: Eugene Koifman <[email protected]> Authored: Thu May 19 11:59:45 2016 -0700 Committer: Eugene Koifman <[email protected]> Committed: Thu May 19 11:59:45 2016 -0700 ---------------------------------------------------------------------- .../hive/hcatalog/streaming/HiveEndPoint.java | 4 +- .../streaming/mutate/client/lock/Lock.java | 7 +- .../streaming/mutate/client/lock/TestLock.java | 11 + .../hive/metastore/TestHiveMetaStoreTxns.java | 7 + metastore/if/hive_metastore.thrift | 12 + .../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp | 2020 +++++------ .../gen/thrift/gen-cpp/hive_metastore_types.cpp | 1245 +++---- .../gen/thrift/gen-cpp/hive_metastore_types.h | 52 +- .../metastore/api/AddDynamicPartitions.java | 127 +- .../hive/metastore/api/DataOperationType.java | 57 + .../hive/metastore/api/LockComponent.java | 234 +- .../src/gen/thrift/gen-php/metastore/Types.php | 86 + .../gen/thrift/gen-py/hive_metastore/ttypes.py | 69 +- .../gen/thrift/gen-rb/hive_metastore_types.rb | 27 +- .../hive/metastore/HiveMetaStoreClient.java | 9 + .../hadoop/hive/metastore/IMetaStoreClient.java | 10 +- .../hive/metastore/LockComponentBuilder.java | 10 + .../hive/metastore/LockRequestBuilder.java | 9 + .../hadoop/hive/metastore/txn/TxnHandler.java | 108 +- .../hive/metastore/txn/TxnHandler.java.orig | 3233 ++++++++++++++++++ .../metastore/txn/TestCompactionTxnHandler.java | 15 +- .../hive/metastore/txn/TestTxnHandler.java | 85 + .../apache/hadoop/hive/ql/exec/MoveTask.java | 3 +- .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 16 +- .../hadoop/hive/ql/lockmgr/DbTxnManager.java | 34 +- .../apache/hadoop/hive/ql/metadata/Hive.java | 6 +- .../hive/ql/lockmgr/TestDbTxnManager2.java | 181 +- .../hive/ql/txn/compactor/TestCleaner.java | 7 + .../hive/ql/txn/compactor/TestInitiator.java | 21 + 29 files changed, 5992 insertions(+), 1713 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java ---------------------------------------------------------------------- diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java index db9fd72..cb64fff 100644 --- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/HiveEndPoint.java @@ -19,6 +19,7 @@ package org.apache.hive.hcatalog.streaming; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.cli.CliSessionState; @@ -964,7 +965,8 @@ public class HiveEndPoint { LockComponentBuilder lockCompBuilder = new LockComponentBuilder() .setDbName(hiveEndPoint.database) .setTableName(hiveEndPoint.table) - .setShared(); + .setShared() + .setOperationType(DataOperationType.INSERT); if (partNameForLock!=null && !partNameForLock.isEmpty() ) { lockCompBuilder.setPartitionName(partNameForLock); } http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java ---------------------------------------------------------------------- diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java index 17fa91a..c272837 100644 --- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/mutate/client/lock/Lock.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.LockComponentBuilder; import org.apache.hadoop.hive.metastore.LockRequestBuilder; +import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.apache.hadoop.hive.metastore.api.LockComponent; import org.apache.hadoop.hive.metastore.api.LockRequest; import org.apache.hadoop.hive.metastore.api.LockResponse; @@ -178,10 +179,12 @@ public class Lock { for (Table table : tables) { LockComponentBuilder componentBuilder = new LockComponentBuilder().setDbName(table.getDbName()).setTableName( table.getTableName()); + //todo: DataOperationType is set conservatively here, we'd really want to distinguish update/delete + //and insert/select and if resource (that is written to) is ACID or not if (sinks.contains(table)) { - componentBuilder.setSemiShared(); + componentBuilder.setSemiShared().setOperationType(DataOperationType.UPDATE).setIsAcid(true); } else { - componentBuilder.setShared(); + componentBuilder.setShared().setOperationType(DataOperationType.INSERT).setIsAcid(true); } LockComponent component = componentBuilder.build(); requestBuilder.addLockComponent(component); http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java ---------------------------------------------------------------------- diff --git a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java index cf56176..e454942 100644 --- a/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java +++ b/hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/mutate/client/lock/TestLock.java @@ -43,6 +43,7 @@ import java.util.Timer; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.apache.hadoop.hive.metastore.api.LockComponent; import org.apache.hadoop.hive.metastore.api.LockLevel; import org.apache.hadoop.hive.metastore.api.LockRequest; @@ -174,10 +175,14 @@ public class TestLock { LockComponent expected1 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB"); expected1.setTablename("SOURCE_1"); + expected1.setOperationType(DataOperationType.INSERT); + expected1.setIsAcid(true); assertTrue(components.contains(expected1)); LockComponent expected2 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB"); expected2.setTablename("SOURCE_2"); + expected2.setOperationType(DataOperationType.INSERT); + expected2.setIsAcid(true); assertTrue(components.contains(expected2)); } @@ -197,14 +202,20 @@ public class TestLock { LockComponent expected1 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB"); expected1.setTablename("SOURCE_1"); + expected1.setOperationType(DataOperationType.INSERT); + expected1.setIsAcid(true); assertTrue(components.contains(expected1)); LockComponent expected2 = new LockComponent(LockType.SHARED_READ, LockLevel.TABLE, "DB"); expected2.setTablename("SOURCE_2"); + expected2.setOperationType(DataOperationType.INSERT); + expected2.setIsAcid(true); assertTrue(components.contains(expected2)); LockComponent expected3 = new LockComponent(LockType.SHARED_WRITE, LockLevel.TABLE, "DB"); expected3.setTablename("SINK"); + expected3.setOperationType(DataOperationType.UPDATE); + expected3.setIsAcid(true); assertTrue(components.contains(expected3)); } http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java index 22354ab..997f73e 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreTxns.java @@ -22,6 +22,7 @@ import junit.framework.Assert; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse; import org.apache.hadoop.hive.metastore.api.LockResponse; import org.apache.hadoop.hive.metastore.api.LockState; @@ -152,14 +153,17 @@ public class TestHiveMetaStoreTxns { .setTableName("mytable") .setPartitionName("mypartition") .setExclusive() + .setOperationType(DataOperationType.NO_TXN) .build()); rqstBuilder.addLockComponent(new LockComponentBuilder() .setDbName("mydb") .setTableName("yourtable") .setSemiShared() + .setOperationType(DataOperationType.NO_TXN) .build()); rqstBuilder.addLockComponent(new LockComponentBuilder() .setDbName("yourdb") + .setOperationType(DataOperationType.NO_TXN) .setShared() .build()); rqstBuilder.setUser("fred"); @@ -188,15 +192,18 @@ public class TestHiveMetaStoreTxns { .setTableName("mytable") .setPartitionName("mypartition") .setSemiShared() + .setOperationType(DataOperationType.UPDATE) .build()) .addLockComponent(new LockComponentBuilder() .setDbName("mydb") .setTableName("yourtable") .setSemiShared() + .setOperationType(DataOperationType.UPDATE) .build()) .addLockComponent(new LockComponentBuilder() .setDbName("yourdb") .setShared() + .setOperationType(DataOperationType.SELECT) .build()) .setUser("fred"); http://git-wip-us.apache.org/repos/asf/hive/blob/f25b8652/metastore/if/hive_metastore.thrift ---------------------------------------------------------------------- diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift index f8e56c7..738456c 100755 --- a/metastore/if/hive_metastore.thrift +++ b/metastore/if/hive_metastore.thrift @@ -134,6 +134,15 @@ enum GrantRevokeType { REVOKE = 2, } +enum DataOperationType { + SELECT = 1, + INSERT = 2 + UPDATE = 3, + DELETE = 4, + UNSET = 5,//this is the default to distinguish from NULL from old clients + NO_TXN = 6,//drop table, insert overwrite, etc - something non-transactional +} + // Types of events the client can request that the metastore fire. For now just support DML operations, as the metastore knows // about DDL operations and there's no reason for the client to request such an event. enum EventRequestType { @@ -657,6 +666,8 @@ struct LockComponent { 3: required string dbname, 4: optional string tablename, 5: optional string partitionname, + 6: optional DataOperationType operationType = DataOperationType.UNSET, + 7: optional bool isAcid = false } struct LockRequest { @@ -762,6 +773,7 @@ struct AddDynamicPartitions { 2: required string dbname, 3: required string tablename, 4: required list<string> partitionnames, + 5: optional DataOperationType operationType = DataOperationType.UNSET } struct NotificationEventRequest {
