This is an automated email from the ASF dual-hosted git repository. anishek pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new c7503e2 HIVE-24032: Remove hadoop shims dependency and use FileSystem Api directly from standalone metastore (Aasha Medhi, reviewed by Pravin Kumar Sinha) Remove hadoop shims dependency from standalone metastore. Rename hive.repl.data.copy.lazy hive conf to hive.repl.run.data.copy.tasks.on.target c7503e2 is described below commit c7503e2c355d2def00318fdfed6a650920af4dc0 Author: Anishek Agarwal <anis...@gmail.com> AuthorDate: Mon Aug 24 15:43:16 2020 +0530 HIVE-24032: Remove hadoop shims dependency and use FileSystem Api directly from standalone metastore (Aasha Medhi, reviewed by Pravin Kumar Sinha) Remove hadoop shims dependency from standalone metastore. Rename hive.repl.data.copy.lazy hive conf to hive.repl.run.data.copy.tasks.on.target --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 5 +- .../hcatalog/api/repl/commands/TestCommands.java | 2 +- .../TestMetaStoreMultipleEncryptionZones.java | 63 +++++++++----------- .../parse/BaseReplicationScenariosAcidTables.java | 2 +- .../hadoop/hive/ql/parse/TestExportImport.java | 2 +- .../hive/ql/parse/TestReplicationScenarios.java | 6 +- .../parse/TestReplicationScenariosAcidTables.java | 5 +- .../TestReplicationScenariosAcrossInstances.java | 2 +- .../TestReplicationScenariosExternalTables.java | 6 +- .../ql/parse/TestStatsReplicationScenarios.java | 2 +- .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 9 ++- .../hadoop/hive/ql/exec/repl/ReplLoadTask.java | 4 +- .../repl/bootstrap/load/table/LoadPartitions.java | 2 +- .../exec/repl/bootstrap/load/table/LoadTable.java | 5 +- .../hadoop/hive/ql/exec/repl/util/FileList.java | 2 +- .../hive/ql/parse/ImportSemanticAnalyzer.java | 4 +- .../repl/dump/events/AddPartitionHandler.java | 2 +- .../parse/repl/dump/events/CommitTxnHandler.java | 2 +- .../repl/dump/events/CreateFunctionHandler.java | 2 +- .../parse/repl/dump/events/CreateTableHandler.java | 2 +- .../ql/parse/repl/dump/events/InsertHandler.java | 2 +- .../repl/load/message/CreateFunctionHandler.java | 3 +- .../hive/ql/exec/repl/util/TestFileList.java | 2 +- .../queries/clientpositive/repl_2_exim_basic.q | 2 +- .../queries/clientpositive/repl_3_exim_metadata.q | 2 +- .../queries/clientpositive/repl_4_exim_nocolstat.q | 2 +- standalone-metastore/metastore-common/pom.xml | 5 -- .../hadoop/hive/metastore/ReplChangeManager.java | 23 +++----- .../hive/metastore/utils/EncryptionZoneUtils.java | 69 ++++++++++++++++++++++ 29 files changed, 141 insertions(+), 98 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f4e0ae0..de355ad 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -531,11 +531,12 @@ public class HiveConf extends Configuration { + "task increment that would cross the specified limit."), REPL_PARTITIONS_DUMP_PARALLELISM("hive.repl.partitions.dump.parallelism",100, "Number of threads that will be used to dump partition data information during repl dump."), - REPL_DATA_COPY_LAZY("hive.repl.data.copy.lazy", true, + REPL_RUN_DATA_COPY_TASKS_ON_TARGET("hive.repl.run.data.copy.tasks.on.target", true, "Indicates whether replication should run data copy tasks during repl load operation."), REPL_FILE_LIST_CACHE_SIZE("hive.repl.file.list.cache.size", 10000, "This config indicates threshold for the maximum number of data copy locations to be kept in memory. \n" - + "When the config 'hive.repl.data.copy.lazy' is set to true, this config is not considered."), + + "When the config 'hive.repl.run.data.copy.tasks.on.target' is set to true, this config " + + "is not considered."), REPL_DUMP_METADATA_ONLY("hive.repl.dump.metadata.only", false, "Indicates whether replication dump only metadata information or data + metadata. \n" + "This config makes hive.repl.include.external.tables config ineffective."), diff --git a/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/repl/commands/TestCommands.java b/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/repl/commands/TestCommands.java index f803bc2..6156e04 100644 --- a/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/repl/commands/TestCommands.java +++ b/hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/repl/commands/TestCommands.java @@ -77,7 +77,7 @@ public class TestCommands { TestHCatClient.startMetaStoreServer(); hconf = TestHCatClient.getConf(); hconf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname,""); - hconf.set(HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname, "false"); + hconf.set(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"); hconf .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java index e46a520..7bfbbd8 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java @@ -32,10 +32,9 @@ import org.apache.hadoop.hive.metastore.ReplChangeManager.RecycleType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.utils.EncryptionZoneUtils; import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hdfs.DFSTestUtil; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; @@ -65,7 +64,6 @@ public class TestMetaStoreMultipleEncryptionZones { private static MiniDFSCluster miniDFSCluster; private static String cmroot; private static FileSystem fs; - private static HadoopShims.HdfsEncryptionShim shimCm; private static String cmrootEncrypted; private static String jksFile = System.getProperty("java.io.tmpdir") + "/test.jks"; private static String cmrootFallBack; @@ -92,8 +90,6 @@ public class TestMetaStoreMultipleEncryptionZones { hiveConf.set(HiveConf.ConfVars.REPLCMENCRYPTEDDIR.varname, cmrootEncrypted); hiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmrootFallBack); initReplChangeManager(); - //Create cm in encrypted zone - shimCm = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf); try { client = new HiveMetaStoreClient(hiveConf); @@ -156,7 +152,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb1 = new Path(warehouse.getWhRoot(), dbName1 +".db"); warehouseFs.delete(dirDb1, true); warehouseFs.mkdirs(dirDb1); - shimCm.createEncryptionZone(dirDb1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb1, "test_key_db", conf); Path dirTbl1 = new Path(dirDb1, tblName1); warehouseFs.mkdirs(dirTbl1); Path part11 = new Path(dirTbl1, "part1"); @@ -165,7 +161,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb2 = new Path(warehouse.getWhRoot(), dbName2 +".db"); warehouseFs.delete(dirDb2, true); warehouseFs.mkdirs(dirDb2); - shimCm.createEncryptionZone(dirDb2, "test_key_cm"); + EncryptionZoneUtils.createEncryptionZone(dirDb2, "test_key_cm", conf); Path dirTbl2 = new Path(dirDb2, tblName2); warehouseFs.mkdirs(dirTbl2); Path part12 = new Path(dirTbl2, "part1"); @@ -274,13 +270,13 @@ public class TestMetaStoreMultipleEncryptionZones { warehouseFs.mkdirs(dirDb); Path dirTbl1 = new Path(dirDb, tblName1); warehouseFs.mkdirs(dirTbl1); - shimCm.createEncryptionZone(dirTbl1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirTbl1, "test_key_db", conf); Path part11 = new Path(dirTbl1, "part1"); createFile(part11, "testClearer11"); Path dirTbl2 = new Path(dirDb, tblName2); warehouseFs.mkdirs(dirTbl2); - shimCm.createEncryptionZone(dirTbl2, "test_key_cm"); + EncryptionZoneUtils.createEncryptionZone(dirTbl2, "test_key_cm", conf); Path part12 = new Path(dirTbl2, "part1"); createFile(part12, "testClearer12"); @@ -350,7 +346,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb1 = new Path(warehouse.getWhRoot(), dbName1 +".db"); warehouseFs.mkdirs(dirDb1); - shimCm.createEncryptionZone(dirDb1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb1, "test_key_db", conf); Path dirTbl1 = new Path(dirDb1, tblName1); warehouseFs.mkdirs(dirTbl1); Path part11 = new Path(dirTbl1, "part1"); @@ -358,7 +354,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb2 = new Path(warehouse.getWhRoot(), dbName2 +".db"); warehouseFs.mkdirs(dirDb2); - shimCm.createEncryptionZone(dirDb2, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb2, "test_key_db", conf); Path dirTbl2 = new Path(dirDb2, tblName2); warehouseFs.mkdirs(dirTbl2); Path part12 = new Path(dirTbl2, "part1"); @@ -465,7 +461,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb = new Path(warehouse.getWhRoot(), dbName +".db"); warehouseFs.delete(dirDb, true); warehouseFs.mkdirs(dirDb); - shimCm.createEncryptionZone(dirDb, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb, "test_key_db", conf); Path dirTbl1 = new Path(dirDb, tblName1); warehouseFs.mkdirs(dirTbl1); Path part11 = new Path(dirTbl1, "part1"); @@ -614,7 +610,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb = new Path(warehouse.getWhRoot(), dbName +".db"); warehouseFs.delete(dirDb, true); warehouseFs.mkdirs(dirDb); - shimCm.createEncryptionZone(dirDb, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb, "test_key_db", conf); Path dirTbl1 = new Path(dirDb, tblName1); warehouseFs.mkdirs(dirTbl1); Path part11 = new Path(dirTbl1, "part1"); @@ -707,13 +703,13 @@ public class TestMetaStoreMultipleEncryptionZones { warehouseFs.mkdirs(dirDb); Path dirTbl1 = new Path(dirDb, tblName1); warehouseFs.mkdirs(dirTbl1); - shimCm.createEncryptionZone(dirTbl1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirTbl1, "test_key_db", conf); Path part11 = new Path(dirTbl1, "part1"); createFile(part11, "testClearer11"); Path dirTbl2 = new Path(dirDb, tblName2); warehouseFs.mkdirs(dirTbl2); - shimCm.createEncryptionZone(dirTbl2, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirTbl2, "test_key_db", conf); Path part12 = new Path(dirTbl2, "part1"); createFile(part12, "testClearer12"); @@ -799,13 +795,13 @@ public class TestMetaStoreMultipleEncryptionZones { warehouseFs.mkdirs(dirDb); Path dirTbl1 = new Path(dirDb, tblName1); warehouseFs.mkdirs(dirTbl1); - shimCm.createEncryptionZone(dirTbl1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirTbl1, "test_key_db", conf); Path part11 = new Path(dirTbl1, "part1"); createFile(part11, "testClearer11"); Path dirTbl2 = new Path(dirDb, tblName2); warehouseFs.mkdirs(dirTbl2); - shimCm.createEncryptionZone(dirTbl2, "test_key_cm"); + EncryptionZoneUtils.createEncryptionZone(dirTbl2, "test_key_cm", conf); Path part12 = new Path(dirTbl2, "part1"); createFile(part12, "testClearer12"); @@ -930,7 +926,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb1 = new Path(warehouse.getWhRoot(), dbName1 +".db"); warehouseFs.delete(dirDb1, true); warehouseFs.mkdirs(dirDb1); - shimCm.createEncryptionZone(dirDb1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb1, "test_key_db", conf); Path dirTbl1 = new Path(dirDb1, tblName1); warehouseFs.mkdirs(dirTbl1); Path part11 = new Path(dirTbl1, "part1"); @@ -939,7 +935,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb2 = new Path(warehouse.getWhRoot(), dbName2 +".db"); warehouseFs.delete(dirDb2, true); warehouseFs.mkdirs(dirDb2); - shimCm.createEncryptionZone(dirDb2, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb2, "test_key_db", conf); Path dirTbl2 = new Path(dirDb2, tblName2); warehouseFs.mkdirs(dirTbl2); Path part12 = new Path(dirTbl2, "part1"); @@ -1020,7 +1016,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb1 = new Path(warehouse.getWhRoot(), dbName1 +".db"); warehouseFs.mkdirs(dirDb1); - shimCm.createEncryptionZone(dirDb1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb1, "test_key_db", conf); Path dirTbl1 = new Path(dirDb1, tblName1); warehouseFs.mkdirs(dirTbl1); Path part11 = new Path(dirTbl1, "part1"); @@ -1028,7 +1024,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb2 = new Path(warehouse.getWhRoot(), dbName2 +".db"); warehouseFs.mkdirs(dirDb2); - shimCm.createEncryptionZone(dirDb2, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb2, "test_key_db", conf); Path dirTbl2 = new Path(dirDb2, tblName2); warehouseFs.mkdirs(dirTbl2); Path part12 = new Path(dirTbl2, "part1"); @@ -1125,7 +1121,7 @@ public class TestMetaStoreMultipleEncryptionZones { Path dirDb = new Path(warehouse.getWhRoot(), dbName +".db"); warehouseFs.delete(dirDb, true); warehouseFs.mkdirs(dirDb); - shimCm.createEncryptionZone(dirDb, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirDb, "test_key_db", conf); Path dirTbl1 = new Path(dirDb, tblName1); warehouseFs.mkdirs(dirTbl1); Path part11 = new Path(dirTbl1, "part1"); @@ -1233,7 +1229,7 @@ public class TestMetaStoreMultipleEncryptionZones { warehouseFs.mkdirs(dirDb); Path dirTbl1 = new Path(dirDb, "tbl1"); warehouseFs.mkdirs(dirTbl1); - shimCm.createEncryptionZone(dirTbl1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirTbl1, "test_key_db", conf); Path part11 = new Path(dirTbl1, "part1"); createFile(part11, "testClearer11"); @@ -1262,8 +1258,6 @@ public class TestMetaStoreMultipleEncryptionZones { FileSystem cmfs = new Path(cmrootCmClearer).getFileSystem(hiveConfCmClearer); cmfs.mkdirs(warehouseCmClearer.getWhRoot()); - HadoopShims.HdfsEncryptionShim shimCmEncrypted = ShimLoader.getHadoopShims().createHdfsEncryptionShim(cmfs, conf); - FileSystem fsWarehouse = warehouseCmClearer.getWhRoot().getFileSystem(hiveConfCmClearer); long now = System.currentTimeMillis(); Path dirDb = new Path(warehouseCmClearer.getWhRoot(), "db1"); @@ -1271,7 +1265,7 @@ public class TestMetaStoreMultipleEncryptionZones { fsWarehouse.mkdirs(dirDb); Path dirTbl1 = new Path(dirDb, "tbl1"); fsWarehouse.mkdirs(dirTbl1); - shimCmEncrypted.createEncryptionZone(dirTbl1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirTbl1, "test_key_db", conf); Path part11 = new Path(dirTbl1, "part1"); createFile(part11, "testClearer11"); String fileChksum11 = ReplChangeManager.checksumFor(part11, fsWarehouse); @@ -1280,7 +1274,7 @@ public class TestMetaStoreMultipleEncryptionZones { String fileChksum12 = ReplChangeManager.checksumFor(part12, fsWarehouse); Path dirTbl2 = new Path(dirDb, "tbl2"); fsWarehouse.mkdirs(dirTbl2); - shimCmEncrypted.createEncryptionZone(dirTbl2, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirTbl2, "test_key_db", conf); Path part21 = new Path(dirTbl2, "part1"); createFile(part21, "testClearer21"); String fileChksum21 = ReplChangeManager.checksumFor(part21, fsWarehouse); @@ -1289,7 +1283,7 @@ public class TestMetaStoreMultipleEncryptionZones { String fileChksum22 = ReplChangeManager.checksumFor(part22, fsWarehouse); Path dirTbl3 = new Path(dirDb, "tbl3"); fsWarehouse.mkdirs(dirTbl3); - shimCmEncrypted.createEncryptionZone(dirTbl3, "test_key_cm"); + EncryptionZoneUtils.createEncryptionZone(dirTbl3, "test_key_cm", conf); Path part31 = new Path(dirTbl3, "part1"); createFile(part31, "testClearer31"); String fileChksum31 = ReplChangeManager.checksumFor(part31, fsWarehouse); @@ -1371,8 +1365,7 @@ public class TestMetaStoreMultipleEncryptionZones { encryptedHiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmrootFallBack); //Create cm in encrypted zone - HadoopShims.HdfsEncryptionShim shimCmEncrypted = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf); - shimCmEncrypted.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db", conf); ReplChangeManager.resetReplChangeManagerInstance(); Warehouse warehouseEncrypted = new Warehouse(encryptedHiveConf); FileSystem warehouseFsEncrypted = warehouseEncrypted.getWhRoot().getFileSystem(encryptedHiveConf); @@ -1384,7 +1377,7 @@ public class TestMetaStoreMultipleEncryptionZones { warehouseFsEncrypted.mkdirs(dirDb); Path dirTbl1 = new Path(dirDb, "tbl1"); warehouseFsEncrypted.mkdirs(dirTbl1); - shimCmEncrypted.createEncryptionZone(dirTbl1, "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(dirTbl1, "test_key_db", conf); Path part11 = new Path(dirTbl1, "part1"); createFile(part11, "testClearer11"); @@ -1436,9 +1429,8 @@ public class TestMetaStoreMultipleEncryptionZones { encryptedHiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmRootFallbackEncrypted); //Create cm in encrypted zone - HadoopShims.HdfsEncryptionShim shimCmEncrypted = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf); - shimCmEncrypted.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db"); - shimCmEncrypted.createEncryptionZone(new Path(cmRootFallbackEncrypted), "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db", conf); + EncryptionZoneUtils.createEncryptionZone(new Path(cmRootFallbackEncrypted), "test_key_db", conf); ReplChangeManager.resetReplChangeManagerInstance(); boolean exceptionThrown = false; try { @@ -1474,8 +1466,7 @@ public class TestMetaStoreMultipleEncryptionZones { encryptedHiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmRootFallbackEncrypted); //Create cm in encrypted zone - HadoopShims.HdfsEncryptionShim shimCmEncrypted = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf); - shimCmEncrypted.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db"); + EncryptionZoneUtils.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db", conf); ReplChangeManager.resetReplChangeManagerInstance(); boolean exceptionThrown = false; diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java index a7cd3a6..203ac5e 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/BaseReplicationScenariosAcidTables.java @@ -98,7 +98,7 @@ public class BaseReplicationScenariosAcidTables { put("hive.metastore.disallow.incompatible.col.type.changes", "false"); put("hive.in.repl.test", "true"); put("metastore.warehouse.tenant.colocation", "true"); - put(HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname, "false"); + put(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"); }}; acidEnableConf.putAll(overrides); diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestExportImport.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestExportImport.java index 24104c7..21490e4 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestExportImport.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestExportImport.java @@ -55,7 +55,7 @@ public class TestExportImport { conf.set("dfs.client.use.datanode.hostname", "true"); conf.set("hadoop.proxyuser." + Utils.getUGI().getShortUserName() + ".hosts", "*"); conf.set("hive.repl.include.external.tables", "false"); - conf.set(HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname, "false"); + conf.set(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"); MiniDFSCluster miniDFSCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).build(); HashMap<String, String> overridesForHiveConf = new HashMap<String, String>() {{ diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index eb0776d..15adac0 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -181,7 +181,7 @@ public class TestReplicationScenarios { hconf.setBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES, true); hconf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, true); hconf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE, true); - hconf.setBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY, false); + hconf.setBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET, false); System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); @@ -602,7 +602,7 @@ public class TestReplicationScenarios { verifySetup("SELECT a from " + dbName + ".ptned_empty", empty, driver); verifySetup("SELECT * from " + dbName + ".unptned_empty", empty, driver); - String lazyCopyClause = " with ('" + HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname + "'='true')"; + String lazyCopyClause = " with ('" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true')"; advanceDumpDir(); run("REPL DUMP " + dbName + lazyCopyClause, driver); @@ -1650,7 +1650,7 @@ public class TestReplicationScenarios { run("CREATE TABLE " + dbName + ".ptned_empty(a string) partitioned by (b int) STORED AS TEXTFILE", driver); - List<String> lazyCopyClause = Arrays.asList("'" + HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname + "'='true'"); + List<String> lazyCopyClause = Arrays.asList("'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'"); Tuple bootstrapDump = bootstrapLoadAndVerify(dbName, replDbName, lazyCopyClause); String[] unptnData = new String[] {"eleven", "twelve"}; diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java index a7a5bdf..2c176a2 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcidTables.java @@ -25,7 +25,6 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.AbortTxnsRequest; -import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder; import org.apache.hadoop.hive.metastore.txn.TxnStore; @@ -103,7 +102,7 @@ public class TestReplicationScenariosAcidTables extends BaseReplicationScenarios put("hive.metastore.disallow.incompatible.col.type.changes", "false"); put("metastore.warehouse.tenant.colocation", "true"); put("hive.in.repl.test", "true"); - put(HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname, "false"); + put(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"); }}; acidEnableConf.putAll(overrides); @@ -1808,7 +1807,7 @@ public class TestReplicationScenariosAcidTables extends BaseReplicationScenarios @Test public void testManagedTableLazyCopy() throws Throwable { List<String> withClause = Arrays.asList( - "'" + HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname + "'='true'"); + "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'"); WarehouseInstance.Tuple bootstrapDump = primary.run("use " + primaryDbName) .run("CREATE TABLE t1(a string) STORED AS TEXTFILE") diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java index 33922b5..956e6ca 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosAcrossInstances.java @@ -162,7 +162,7 @@ public class TestReplicationScenariosAcrossInstances extends BaseReplicationAcro Path identityUdf2HdfsPath = new Path(primary.functionsRoot, "idFunc2" + File.separator + "identity_udf2.jar"); setupUDFJarOnHDFS(identityUdfLocalPath, identityUdf1HdfsPath); setupUDFJarOnHDFS(identityUdfLocalPath, identityUdf2HdfsPath); - List<String> withClasuse = Arrays.asList("'" + HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname + "'='true'"); + List<String> withClasuse = Arrays.asList("'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'"); primary.run("CREATE FUNCTION " + primaryDbName + ".idFunc1 as 'IdentityStringUDF' " diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java index db72910..f86f2ac 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java @@ -81,7 +81,7 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros overrides.put(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES.varname, "true"); overrides.put(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER.varname, UserGroupInformation.getCurrentUser().getUserName()); - overrides.put(HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname, "false"); + overrides.put(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"); internalBeforeClassSetup(overrides, TestReplicationScenarios.class); } @@ -217,7 +217,7 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros @Test public void externalTableReplicationWithDefaultPathsLazyCopy() throws Throwable { - List<String> lazyCopyClause = Arrays.asList("'" + HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname + "'='true'"); + List<String> lazyCopyClause = Arrays.asList("'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'"); //creates external tables with partitions WarehouseInstance.Tuple tuple = primary .run("use " + primaryDbName) @@ -284,7 +284,7 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros List<String> withClause = Arrays.asList( "'distcp.options.update'=''", - "'" + HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname + "'='true'" + "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'" ); primary.run("use " + primaryDbName) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestStatsReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestStatsReplicationScenarios.java index 44eead0..6df024a 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestStatsReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestStatsReplicationScenarios.java @@ -97,7 +97,7 @@ public class TestStatsReplicationScenarios { Map<String, String> additionalOverrides = new HashMap<String, String>() {{ put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); put(HiveConf.ConfVars.HIVE_IN_TEST_REPL.varname, "true"); - put(HiveConf.ConfVars.REPL_DATA_COPY_LAZY.varname, "false"); + put(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname, "false"); }}; Map<String, String> replicatedOverrides = new HashMap<>(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java index 0335e67..17aeee6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java @@ -104,7 +104,6 @@ import java.util.List; import java.util.Arrays; import java.util.Collections; import java.util.Base64; -import java.util.LinkedList; import java.util.UUID; import java.util.ArrayList; import java.util.Map; @@ -181,7 +180,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable { ReplChangeManager.getInstance(conf); Path cmRoot = new Path(conf.getVar(HiveConf.ConfVars.REPLCMDIR)); Long lastReplId; - LOG.info("Data copy at load enabled : {}", conf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY)); + LOG.info("Data copy at load enabled : {}", conf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET)); if (isBootstrap) { lastReplId = bootStrapDump(hiveDumpRoot, dmd, cmRoot, getHive()); } else { @@ -580,7 +579,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable { } Path dbRootMetadata = new Path(metadataPath, dbName); Path dbRootData = new Path(bootstrapRoot, EximUtil.DATA_PATH_NAME + File.separator + dbName); - boolean dataCopyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean dataCopyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); try (Writer writer = new Writer(dumpRoot, conf)) { for (String tableName : Utils.matchesTbl(hiveDb, dbName, work.replScope)) { try { @@ -617,7 +616,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable { } private void setDataCopyIterators(FileList extTableFileList, FileList managedTableFileList) { - boolean dataCopyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean dataCopyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); if (dataCopyAtLoad) { work.setManagedTableCopyPathIterator(Collections.<String>emptyList().iterator()); work.setExternalTblCopyPathIterator(Collections.<String>emptyList().iterator()); @@ -822,7 +821,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable { work.getMetricCollector().reportStageStart(getName(), metricMap); Path dbRoot = dumpDbMetadata(dbName, metadataPath, bootDumpBeginReplId, hiveDb); Path dbDataRoot = new Path(new Path(dumpRoot, EximUtil.DATA_PATH_NAME), dbName); - boolean dataCopyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean dataCopyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); functionsBinaryCopyPaths = dumpFunctionMetadata(dbName, dbRoot, dbDataRoot, hiveDb, dataCopyAtLoad); String uniqueKey = Utils.setDbBootstrapDumpState(hiveDb, dbName); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java index 8d479dd..66bb3da 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplLoadTask.java @@ -120,7 +120,7 @@ public class ReplLoadTask extends Task<ReplLoadWork> implements Serializable { if (shouldLoadAuthorizationMetadata()) { initiateAuthorizationLoadTask(); } - LOG.info("Data copy at load enabled : {}", conf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY)); + LOG.info("Data copy at load enabled : {}", conf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET)); if (work.isIncrementalLoad()) { return executeIncrementalLoad(); } else { @@ -347,7 +347,7 @@ public class ReplLoadTask extends Task<ReplLoadWork> implements Serializable { } private void addLazyDataCopyTask(TaskTracker loadTaskTracker) throws IOException { - boolean dataCopyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean dataCopyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); if (dataCopyAtLoad) { if (work.getExternalTableDataCopyItr() == null) { Path extTableBackingFile = new Path(work.dumpDirectory, EximUtil.FILE_LIST_EXTERNAL); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java index d751794..635fd6d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java @@ -275,7 +275,7 @@ public class LoadPartitions { loadFileType = event.replicationSpec().isReplace() ? LoadFileType.REPLACE_ALL : LoadFileType.OVERWRITE_EXISTING; stagingDir = PathUtils.getExternalTmpPath(replicaWarehousePartitionLocation, context.pathInfo); } - boolean copyAtLoad = context.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = context.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); Task<?> copyTask = ReplCopyTask.getLoadCopyTask( event.replicationSpec(), new Path(event.dataPath() + Path.SEPARATOR + getPartitionName(sourceWarehousePartitionLocation)), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java index 8572f08..45fca07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.repl.bootstrap.load.table; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.ValidReaderWriteIdList; -import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; @@ -57,7 +55,6 @@ import org.apache.hadoop.hive.ql.plan.ReplTxnWork; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.BitSet; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -284,7 +281,7 @@ public class LoadTable { + table.getCompleteName() + " with source location: " + dataPath.toString() + " and target location " + tgtPath.toString()); - boolean copyAtLoad = context.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = context.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); Task<?> copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, dataPath, tmpPath, context.hiveConf, copyAtLoad, false); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/util/FileList.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/util/FileList.java index b834521..50c621b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/util/FileList.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/util/FileList.java @@ -169,7 +169,7 @@ public class FileList implements AutoCloseable, Iterator<String> { } public int getThreshold(int cacheSize) { - boolean copyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = conf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); return copyAtLoad ? 0 : (int)(cacheSize * thresholdFactor); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java index 79ccbc5..614453b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java @@ -460,7 +460,7 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer { Task<?> copyTask = null; if (replicationSpec.isInReplicationScope()) { - boolean copyAtLoad = x.getConf().getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = x.getConf().getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, dataPath, destPath, x.getConf(), isSkipTrash, needRecycle, copyAtLoad); } else { @@ -603,7 +603,7 @@ public class ImportSemanticAnalyzer extends BaseSemanticAnalyzer { Task<?> copyTask = null; if (replicationSpec.isInReplicationScope()) { - boolean copyAtLoad = x.getConf().getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = x.getConf().getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); copyTask = ReplCopyTask.getLoadCopyTask(replicationSpec, new Path(srcLocation), destPath, x.getConf(), isSkipTrash, needRecycle, copyAtLoad); } else { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AddPartitionHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AddPartitionHandler.java index 3120c96..334d1b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AddPartitionHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AddPartitionHandler.java @@ -96,7 +96,7 @@ class AddPartitionHandler extends AbstractEventHandler { withinContext.replicationSpec, withinContext.hiveConf); - boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); Iterator<PartitionFiles> partitionFilesIter = apm.getPartitionFilesIter().iterator(); // We expect one to one mapping between partitions and file iterators. For external table, this diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CommitTxnHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CommitTxnHandler.java index fcb919a..265fefe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CommitTxnHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CommitTxnHandler.java @@ -60,7 +60,7 @@ class CommitTxnHandler extends AbstractEventHandler<CommitTxnMessage> { private void writeDumpFiles(Table qlMdTable, Partition ptn, Iterable<String> files, Context withinContext, Path dataPath) throws IOException, LoginException, MetaException, HiveFatalException { - boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); if (copyAtLoad) { // encoded filename/checksum of files, write into _files writeEncodedDumpFiles(withinContext, files, dataPath); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateFunctionHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateFunctionHandler.java index 69671b9..0d66128 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateFunctionHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateFunctionHandler.java @@ -55,7 +55,7 @@ class CreateFunctionHandler extends AbstractEventHandler<CreateFunctionMessage> Path metadataPath = new Path(withinContext.eventRoot, EximUtil.METADATA_NAME); Path dataPath = new Path(withinContext.eventRoot, EximUtil.DATA_PATH_NAME); FileSystem fileSystem = metadataPath.getFileSystem(withinContext.hiveConf); - boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); List<DataCopyPath> functionBinaryCopyPaths = new ArrayList<>(); try (JsonWriter jsonWriter = new JsonWriter(fileSystem, metadataPath)) { FunctionSerializer serializer = new FunctionSerializer(eventMessage.getFunctionObj(), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java index ed78bc0..d9ade2e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java @@ -75,7 +75,7 @@ class CreateTableHandler extends AbstractEventHandler<CreateTableMessage> { withinContext.replicationSpec, withinContext.hiveConf); - boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); Iterable<String> files = eventMessage.getFiles(); if (files != null) { if (copyAtLoad) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/InsertHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/InsertHandler.java index 704b996..b0e9bcf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/InsertHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/InsertHandler.java @@ -77,7 +77,7 @@ class InsertHandler extends AbstractEventHandler<InsertMessage> { withinContext.hiveConf); Iterable<String> files = eventMessage.getFiles(); - boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = withinContext.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); /* * Insert into/overwrite operation shall operate on one or more partitions or even partitions from multiple tables. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateFunctionHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateFunctionHandler.java index f42290e..e65769a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateFunctionHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/load/message/CreateFunctionHandler.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hive.ql.plan.CopyWork; import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork; import java.io.IOException; -import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -202,7 +201,7 @@ public class CreateFunctionHandler extends AbstractMessageHandler { } private Task<?> getCopyTask(String sourceUri, Path dest) { - boolean copyAtLoad = context.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY); + boolean copyAtLoad = context.hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET); if (copyAtLoad ) { return ReplCopyTask.getLoadCopyTask(metadata.getReplicationSpec(), new Path(sourceUri), dest, context.hiveConf); } else { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/repl/util/TestFileList.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/repl/util/TestFileList.java index 37ac5d6..67000a0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/repl/util/TestFileList.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/repl/util/TestFileList.java @@ -144,7 +144,7 @@ public class TestFileList { private Object[] setupAndGetTuple(int cacheSize, boolean lazyDataCopy) throws Exception { HiveConf hiveConf = Mockito.mock(HiveConf.class); - Mockito.when(hiveConf.getBoolVar(HiveConf.ConfVars.REPL_DATA_COPY_LAZY)).thenReturn(lazyDataCopy); + Mockito.when(hiveConf.getBoolVar(HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET)).thenReturn(lazyDataCopy); Path backingFile = new Path("/tmp/backingFile"); LinkedBlockingQueue<String> cache = new LinkedBlockingQueue<>(cacheSize); FileListStreamer fileListStreamer = Mockito.spy(new FileListStreamer(cache, backingFile, hiveConf)); diff --git a/ql/src/test/queries/clientpositive/repl_2_exim_basic.q b/ql/src/test/queries/clientpositive/repl_2_exim_basic.q index 3d5961b..2a9ca27 100644 --- a/ql/src/test/queries/clientpositive/repl_2_exim_basic.q +++ b/ql/src/test/queries/clientpositive/repl_2_exim_basic.q @@ -4,7 +4,7 @@ set hive.test.mode.prefix=; set hive.test.mode.nosamplelist=managed_t,ext_t,managed_t_imported,managed_t_r_imported,ext_t_imported,ext_t_r_imported; set hive.repl.include.external.tables=true; set hive.repl.dump.metadata.only.for.external.table=false; -set hive.repl.data.copy.lazy=false; +set hive.repl.run.data.copy.tasks.on.target=false; drop table if exists managed_t; drop table if exists ext_t; diff --git a/ql/src/test/queries/clientpositive/repl_3_exim_metadata.q b/ql/src/test/queries/clientpositive/repl_3_exim_metadata.q index 4697361..6226d2c 100644 --- a/ql/src/test/queries/clientpositive/repl_3_exim_metadata.q +++ b/ql/src/test/queries/clientpositive/repl_3_exim_metadata.q @@ -2,7 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.test.mode=true; set hive.test.mode.prefix=; set hive.test.mode.nosamplelist=replsrc,repldst,repldst_md; -set hive.repl.data.copy.lazy=false; +set hive.repl.run.data.copy.tasks.on.target=false; drop table if exists replsrc; drop table if exists repldst; diff --git a/ql/src/test/queries/clientpositive/repl_4_exim_nocolstat.q b/ql/src/test/queries/clientpositive/repl_4_exim_nocolstat.q index 4bb172e..50e2993 100644 --- a/ql/src/test/queries/clientpositive/repl_4_exim_nocolstat.q +++ b/ql/src/test/queries/clientpositive/repl_4_exim_nocolstat.q @@ -4,7 +4,7 @@ set hive.test.mode.prefix=; set hive.test.mode.nosamplelist=replsrc,repldst; set metastore.try.direct.sql=false; set hive.metastore.rawstore.impl=org.apache.hadoop.hive.metastore.ObjectStore; -set hive.repl.data.copy.lazy=false; +set hive.repl.run.data.copy.tasks.on.target=false; drop table if exists replsrc; drop table if exists repldst; diff --git a/standalone-metastore/metastore-common/pom.xml b/standalone-metastore/metastore-common/pom.xml index 827c1e9..4922ae9 100644 --- a/standalone-metastore/metastore-common/pom.xml +++ b/standalone-metastore/metastore-common/pom.xml @@ -264,11 +264,6 @@ <artifactId>mockito-core</artifactId> <scope>test</scope> </dependency> - <dependency> - <groupId>org.apache.hive</groupId> - <artifactId>hive-shims</artifactId> - <version>${project.version}</version> - </dependency> </dependencies> <profiles> diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java index 3af74ba..67c74d0 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java @@ -40,16 +40,14 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; +import org.apache.hadoop.hive.metastore.utils.EncryptionZoneUtils; import org.apache.hadoop.hive.metastore.utils.FileUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.metastore.utils.Retry; import org.apache.hadoop.hive.metastore.utils.StringUtils; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.hive.shims.HadoopShims.HdfsEncryptionShim; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,7 +59,6 @@ public class ReplChangeManager { private static boolean inited = false; private static boolean enabled = false; private static Map<String, String> encryptionZoneToCmrootMapping = new HashMap<>(); - private HadoopShims hadoopShims; private static Configuration conf; private String msUser; private String msGroup; @@ -161,7 +158,6 @@ public class ReplChangeManager { if (MetastoreConf.getBoolVar(conf, ConfVars.REPLCMENABLED)) { ReplChangeManager.enabled = true; ReplChangeManager.conf = conf; - hadoopShims = ShimLoader.getHadoopShims(); cmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMDIR); encryptedCmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMENCRYPTEDDIR); fallbackNonEncryptedCmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR); @@ -174,12 +170,10 @@ public class ReplChangeManager { Path cmroot = new Path(cmRootDir); createCmRoot(cmroot); FileSystem cmRootFs = cmroot.getFileSystem(conf); - HdfsEncryptionShim pathEncryptionShim = hadoopShims - .createHdfsEncryptionShim(cmRootFs, conf); - if (pathEncryptionShim.isPathEncrypted(cmroot)) { + if (EncryptionZoneUtils.isPathEncrypted(cmroot, conf)) { //If cm root is encrypted we keep using it for the encryption zone String encryptionZonePath = cmRootFs.getUri() - + pathEncryptionShim.getEncryptionZoneForPath(cmroot).getPath(); + + EncryptionZoneUtils.getEncryptionZoneForPath(cmroot, conf).getPath(); encryptionZoneToCmrootMapping.put(encryptionZonePath, cmRootDir); } else { encryptionZoneToCmrootMapping.put(NO_ENCRYPTION, cmRootDir); @@ -190,7 +184,7 @@ public class ReplChangeManager { throw new MetaException(ConfVars.REPLCMENCRYPTEDDIR.getHiveName() + " should be absolute path"); } createCmRoot(cmRootFallback); - if (pathEncryptionShim.isPathEncrypted(cmRootFallback)) { + if (EncryptionZoneUtils.isPathEncrypted(cmRootFallback, conf)) { throw new MetaException(ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.getHiveName() + " should not be encrypted"); } @@ -566,10 +560,9 @@ public class ReplChangeManager { String cmrootDir = fallbackNonEncryptedCmRootDir; String encryptionZonePath = NO_ENCRYPTION; if (enabled) { - HdfsEncryptionShim pathEncryptionShim = hadoopShims.createHdfsEncryptionShim(path.getFileSystem(conf), conf); - if (pathEncryptionShim.isPathEncrypted(path)) { + if (EncryptionZoneUtils.isPathEncrypted(path, conf)) { encryptionZonePath = path.getFileSystem(conf).getUri() - + pathEncryptionShim.getEncryptionZoneForPath(path).getPath(); + + EncryptionZoneUtils.getEncryptionZoneForPath(path, conf).getPath(); //For encryption zone, create cm at the relative path specified by hive.repl.cm.encryptionzone.rootdir //at the root of the encryption zone cmrootDir = encryptionZonePath + Path.SEPARATOR + encryptedCmRootDir; diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/EncryptionZoneUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/EncryptionZoneUtils.java new file mode 100644 index 0000000..22a3892 --- /dev/null +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/EncryptionZoneUtils.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.utils; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.hdfs.client.HdfsAdmin; +import org.apache.hadoop.hdfs.protocol.EncryptionZone; + +import java.io.IOException; +import java.net.URI; + +public class EncryptionZoneUtils { + + private EncryptionZoneUtils() { + + } + + public static boolean isPathEncrypted(Path path, Configuration conf) throws IOException { + Path fullPath; + if (path.isAbsolute()) { + fullPath = path; + } else { + fullPath = path.getFileSystem(conf).makeQualified(path); + } + return (EncryptionZoneUtils.getEncryptionZoneForPath(fullPath, conf) != null); + } + + public static EncryptionZone getEncryptionZoneForPath(Path path, Configuration conf) throws IOException { + URI uri = path.getFileSystem(conf).getUri(); + if ("hdfs".equals(uri.getScheme())) { + HdfsAdmin hdfsAdmin = new HdfsAdmin(uri, conf); + if (path.getFileSystem(conf).exists(path)) { + return hdfsAdmin.getEncryptionZoneForPath(path); + } else if (!path.getParent().equals(path)) { + return getEncryptionZoneForPath(path.getParent(), conf); + } else { + return null; + } + } + return null; + } + + public static void createEncryptionZone(Path path, String keyName, Configuration conf) throws IOException { + URI uri = path.getFileSystem(conf).getUri(); + if ("hdfs".equals(uri.getScheme())) { + HdfsAdmin hdfsAdmin = new HdfsAdmin(uri, conf); + hdfsAdmin.createEncryptionZone(path, keyName); + } else { + throw new UnsupportedOperationException("Cannot create encryption zone for scheme {}" + uri.getScheme()); + } + } +}