This is an automated email from the ASF dual-hosted git repository. szita pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 32fbdfa HIVE-23612: Option for HiveStrictManagedMigration to impersonate a user for FS operations (Adam Szita, reviewed by Jason Dere) 32fbdfa is described below commit 32fbdfa51de58d7a9c645b3ecb0174267f2f2e56 Author: Adam Szita <40628386+sz...@users.noreply.github.com> AuthorDate: Thu Jun 18 13:45:09 2020 +0200 HIVE-23612: Option for HiveStrictManagedMigration to impersonate a user for FS operations (Adam Szita, reviewed by Jason Dere) --- .../hive/ql/util/HiveStrictManagedMigration.java | 56 ++++++++++++++++++++-- .../ql/util/TestHiveStrictManagedMigration.java | 20 ++++++++ 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java index ab4cd6c..bc94702 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/util/HiveStrictManagedMigration.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.util; import java.io.IOException; +import java.security.PrivilegedExceptionAction; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -106,6 +107,7 @@ public class HiveStrictManagedMigration { final boolean dryRun; final TableType tableType; final int tablePoolSize; + final String fsOperationUser; RunOptions(String dbRegex, String tableRegex, @@ -118,7 +120,8 @@ public class HiveStrictManagedMigration { boolean shouldMoveExternal, boolean dryRun, TableType tableType, - int tablePoolSize) { + int tablePoolSize, + String fsOperationUser) { super(); this.dbRegex = dbRegex; this.tableRegex = tableRegex; @@ -132,6 +135,7 @@ public class HiveStrictManagedMigration { this.dryRun = dryRun; this.tableType = tableType; this.tablePoolSize = tablePoolSize; + this.fsOperationUser = fsOperationUser; } public void setShouldModifyManagedTableLocation(boolean shouldModifyManagedTableLocation) { @@ -157,6 +161,7 @@ public class HiveStrictManagedMigration { ", dryRun=" + dryRun + ", tableType=" + tableType + ", tablePoolSize=" + tablePoolSize + + ", fsOperationUser=" + fsOperationUser + '}'; } } @@ -334,6 +339,15 @@ public class HiveStrictManagedMigration { .withArgName("table type") .create("tt")); + result.addOption(OptionBuilder + .withLongOpt("fsOperationUser") + .withDescription("If set, migration tool will impersonate this user to carry out write operations on file " + + "system. Useful e.g. if this tool is run as hive, but chown-ing is also a requirement." + + "If this is unset file operations will be run in the name of the user running this process (or kinit'ed " + + "user in Kerberos environments)") + .hasArg() + .create()); + return result; } @@ -378,6 +392,8 @@ public class HiveStrictManagedMigration { } boolean dryRun = cli.hasOption("dryRun"); + String fsOperationUser = cli.getOptionValue("fsOperationUser"); + String tableTypeText = cli.getOptionValue("tableType"); int defaultPoolSize = Runtime.getRuntime().availableProcessors() / 2; @@ -406,7 +422,8 @@ public class HiveStrictManagedMigration { shouldMoveExternal, dryRun, tableTypeText == null ? null : TableType.valueOf(tableTypeText), - tablePoolSize); + tablePoolSize, + fsOperationUser); return runOpts; } @@ -431,6 +448,7 @@ public class HiveStrictManagedMigration { private final String groupName; private final FsPermission dirPerms; private final FsPermission filePerms; + private final UserGroupInformation fsOperationUser; private CloseableThreadLocal<HiveMetaStoreClient> hms; private ThreadLocal<Warehouse> wh; @@ -462,6 +480,17 @@ public class HiveStrictManagedMigration { } } + try { + if (runOptions.fsOperationUser != null) { + fsOperationUser = UserGroupInformation.createProxyUser(runOptions.fsOperationUser, + UserGroupInformation.getLoginUser()); + } else { + fsOperationUser = UserGroupInformation.getLoginUser(); + } + } catch (IOException e) { + throw new RuntimeException("Error while setting up UGI for FS operations."); + } + this.hms = new CloseableThreadLocal<>(() -> { try { HiveMetaStoreClient hiveMetaStoreClient = new HiveMetaStoreClient(conf); @@ -650,7 +679,7 @@ public class HiveStrictManagedMigration { LOG.info("Changing location of database {} to {}", dbName, newDefaultDbLocation); if (!runOptions.dryRun) { - FileSystem fs = newDefaultDbLocation.getFileSystem(conf); + FileSystem fs = getFS(newDefaultDbLocation, conf, fsOperationUser); FileUtils.mkdir(fs, newDefaultDbLocation, conf); // Set appropriate owner/perms of the DB dir only, no need to recurse checkAndSetFileOwnerPermissions(fs, newDefaultDbLocation, @@ -853,7 +882,7 @@ public class HiveStrictManagedMigration { void createExternalDbDir(Database dbObj) throws IOException, MetaException { Path externalTableDbPath = wh.get().getDefaultExternalDatabasePath(dbObj.getName()); - FileSystem fs = externalTableDbPath.getFileSystem(conf); + FileSystem fs = getFS(externalTableDbPath, conf, fsOperationUser); if (!fs.exists(externalTableDbPath)) { String dbOwner = ownerName; String dbGroup = null; @@ -872,6 +901,9 @@ public class HiveStrictManagedMigration { } } + if (dbOwner == null) { + dbOwner = conf.get("strict.managed.tables.migration.owner", "hive"); + } LOG.info("Creating external table directory for database {} at {} with ownership {}/{}", dbObj.getName(), externalTableDbPath, dbOwner, dbGroup); if (!runOptions.dryRun) { @@ -897,7 +929,7 @@ public class HiveStrictManagedMigration { // Move table directory. if (!runOptions.dryRun) { - FileSystem fs = newTablePath.getFileSystem(conf); + FileSystem fs = getFS(newTablePath, conf, fsOperationUser); if (fs.exists(oldTablePath)) { boolean movedData = fs.rename(oldTablePath, newTablePath); if (!movedData) { @@ -1659,6 +1691,20 @@ public class HiveStrictManagedMigration { } } + private static FileSystem getFS(Path path, Configuration conf, UserGroupInformation fsOperationUser) + throws IOException { + try { + return fsOperationUser.doAs(new PrivilegedExceptionAction<FileSystem>() { + @Override + public FileSystem run() throws Exception { + return path.getFileSystem(conf); + } + }); + } catch (InterruptedException e) { + throw new IOException(e); + } + } + /** * can set it from tests to test when config needs something other than default values. */ diff --git a/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java b/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java index 038a8a2..a6862a8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/util/TestHiveStrictManagedMigration.java @@ -140,6 +140,26 @@ public class TestHiveStrictManagedMigration extends TxnCommandsBaseForTests { } } + /** + * Should encounter a DB with an unset owner, and should try to chown the new dir path to 'hive' user. + * This will always fail in this test, as we're never running it as root. + * @throws Exception + */ + @Test(expected = AssertionError.class) + public void testExtDbDirOnFsIsCreatedAsHiveIfDbOwnerNull() throws Exception { + runStatementOnDriver("drop database if exists ownerlessdb"); + runStatementOnDriver("create database ownerlessdb"); + Database db = Hive.get().getDatabase("ownerlessdb"); + db.setOwnerName(null); + Hive.get().alterDatabase("ownerlessdb", db); + + String[] args = {"-m", "external"}; + HiveConf newConf = new HiveConf(hiveConf); + File newExtWarehouseDir = new File(getTestDataDir(), "newExternal"); + newConf.set(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname, newExtWarehouseDir.getAbsolutePath()); + runMigrationTool(newConf, args); + } + @Override protected String getTestDataDir() { return TEST_DATA_DIR;