This is an automated email from the ASF dual-hosted git repository. anishek pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 9e435cb HIVE-24254: Remove setOwner call in ReplChangeManager (Aasha Medhi, reviewed by Pravin Kumar Sinha) 9e435cb is described below commit 9e435cb6690b4ea0d7a46dae8c382f27c3472026 Author: Anishek Agarwal <anis...@gmail.com> AuthorDate: Mon Oct 12 09:32:38 2020 +0530 HIVE-24254: Remove setOwner call in ReplChangeManager (Aasha Medhi, reviewed by Pravin Kumar Sinha) --- .../hive/metastore/TestReplChangeManager.java | 204 ++++++++++++++++++++- .../hadoop/hive/metastore/ReplChangeManager.java | 3 - 2 files changed, 194 insertions(+), 13 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java index 9b728b8..392ebad 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestReplChangeManager.java @@ -19,8 +19,13 @@ package org.apache.hadoop.hive.metastore; import java.io.IOException; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.nio.file.Files; +import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; +import java.util.Enumeration; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -33,7 +38,9 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.ReplChangeManager.RecycleType; + import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; + import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Partition; @@ -41,6 +48,10 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authorize.DefaultImpersonationProvider; +import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.util.StringUtils; import org.junit.AfterClass; import org.junit.Assert; @@ -60,31 +71,59 @@ public class TestReplChangeManager { private static String cmroot; private static FileSystem fs; + private static HiveConf permhiveConf; + private static Warehouse permWarehouse; + private static MiniDFSCluster permDdfs; + private static String permCmroot; + @BeforeClass public static void setUp() throws Exception { + internalSetUp(); + internalSetUpProvidePerm(); + try { + client = new HiveMetaStoreClient(hiveConf); + } catch (Throwable e) { + System.err.println("Unable to open the metastore"); + System.err.println(StringUtils.stringifyException(e)); + throw e; + } + } + + private static void internalSetUpProvidePerm() throws Exception { + Configuration configuration = new Configuration(); + configuration.set("dfs.permissions.enabled", "false"); + String noPermBaseDir = Files.createTempDirectory("noPerm").toFile().getAbsolutePath(); + configuration.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, noPermBaseDir); + configuration.set("dfs.client.use.datanode.hostname", "true"); + permDdfs = new MiniDFSCluster.Builder(configuration).numDataNodes(1).format(true).build(); + permhiveConf = new HiveConf(TestReplChangeManager.class); + permhiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, + "hdfs://" + permDdfs.getNameNode().getHostAndPort() + HiveConf.ConfVars.METASTOREWAREHOUSE.defaultStrVal); + permhiveConf.setBoolean(HiveConf.ConfVars.REPLCMENABLED.varname, true); + permCmroot = "hdfs://" + permDdfs.getNameNode().getHostAndPort() + "/cmroot"; + permhiveConf.set(HiveConf.ConfVars.REPLCMDIR.varname, permCmroot); + permhiveConf.setInt(CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY, 60); + permWarehouse = new Warehouse(permhiveConf); + } + + private static void internalSetUp() throws Exception { m_dfs = new MiniDFSCluster.Builder(new Configuration()).numDataNodes(1).format(true).build(); hiveConf = new HiveConf(TestReplChangeManager.class); hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, - "hdfs://" + m_dfs.getNameNode().getHostAndPort() + HiveConf.ConfVars.METASTOREWAREHOUSE.defaultStrVal); + "hdfs://" + m_dfs.getNameNode().getHostAndPort() + HiveConf.ConfVars.METASTOREWAREHOUSE.defaultStrVal); hiveConf.setBoolean(HiveConf.ConfVars.REPLCMENABLED.varname, true); cmroot = "hdfs://" + m_dfs.getNameNode().getHostAndPort() + "/cmroot"; hiveConf.set(HiveConf.ConfVars.REPLCMDIR.varname, cmroot); hiveConf.setInt(CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY, 60); warehouse = new Warehouse(hiveConf); fs = new Path(cmroot).getFileSystem(hiveConf); - try { - client = new HiveMetaStoreClient(hiveConf); - } catch (Throwable e) { - System.err.println("Unable to open the metastore"); - System.err.println(StringUtils.stringifyException(e)); - throw e; - } } @AfterClass - public static void tearDown() throws Exception { + public static void tearDown() { try { m_dfs.shutdown(); + permDdfs.shutdown(); client.close(); } catch (Throwable e) { System.err.println("Unable to close metastore"); @@ -297,7 +336,6 @@ public class TestReplChangeManager { Path part32 = new Path(dirTbl3, "part2"); createFile(part32, "testClearer32"); String fileChksum32 = ReplChangeManager.checksumFor(part32, fs); - ReplChangeManager.getInstance(hiveConf).recycle(dirTbl1, RecycleType.MOVE, false); ReplChangeManager.getInstance(hiveConf).recycle(dirTbl2, RecycleType.MOVE, false); ReplChangeManager.getInstance(hiveConf).recycle(dirTbl3, RecycleType.MOVE, true); @@ -341,6 +379,152 @@ public class TestReplChangeManager { } @Test + public void testRecycleUsingImpersonation() throws Exception { + FileSystem fs = warehouse.getWhRoot().getFileSystem(hiveConf); + Path dirDb = new Path(warehouse.getWhRoot(), "db3"); + long now = System.currentTimeMillis(); + fs.delete(dirDb, true); + fs.mkdirs(dirDb); + Path dirTbl1 = new Path(dirDb, "tbl1"); + fs.mkdirs(dirTbl1); + Path part11 = new Path(dirTbl1, "part1"); + createFile(part11, "testClearer11"); + String fileChksum11 = ReplChangeManager.checksumFor(part11, fs); + Path part12 = new Path(dirTbl1, "part2"); + createFile(part12, "testClearer12"); + String fileChksum12 = ReplChangeManager.checksumFor(part12, fs); + final UserGroupInformation proxyUserUgi = + UserGroupInformation.createRemoteUser("impala"); + setGroupsInConf(UserGroupInformation.getCurrentUser().getGroupNames(), + "impala", hiveConf); + //set owner of data path to impala + fs.setOwner(dirTbl1, "impala", "default"); + proxyUserUgi.doAs((PrivilegedExceptionAction<Void>) () -> { + try { + //impala doesn't have access. Should provide access control exception + ReplChangeManager.getInstance(hiveConf).recycle(dirTbl1, RecycleType.MOVE, false); + Assert.fail(); + } catch (AccessControlException e) { + assertTrue(e.getMessage().contains("Permission denied: user=impala, access=WRITE")); + } + return null; + }); + ReplChangeManager.getInstance().recycle(dirTbl1, RecycleType.MOVE, false); + Assert.assertFalse(fs.exists(part11)); + Assert.assertFalse(fs.exists(part12)); + assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, part11.getName(), fileChksum11, cmroot))); + assertTrue(fs.exists(ReplChangeManager.getCMPath(hiveConf, part12.getName(), fileChksum12, cmroot))); + fs.setTimes(ReplChangeManager.getCMPath(hiveConf, part11.getName(), fileChksum11, cmroot), + now - 7 * 86400 * 1000 * 2, now - 7 * 86400 * 1000 * 2); + ReplChangeManager.scheduleCMClearer(hiveConf); + + long start = System.currentTimeMillis(); + long end; + boolean cleared = false; + do { + Thread.sleep(200); + end = System.currentTimeMillis(); + if (end - start > 5000) { + Assert.fail("timeout, cmroot has not been cleared"); + } + if (!fs.exists(ReplChangeManager.getCMPath(hiveConf, part11.getName(), fileChksum11, cmroot)) && + fs.exists(ReplChangeManager.getCMPath(hiveConf, part12.getName(), fileChksum12, cmroot))) { + cleared = true; + } + } while (!cleared); + } + + @Test + public void testRecycleUsingImpersonationWithAccess() throws Exception { + try { + ReplChangeManager.resetReplChangeManagerInstance(); + ReplChangeManager.getInstance(permhiveConf); + FileSystem fs = permWarehouse.getWhRoot().getFileSystem(permhiveConf); + long now = System.currentTimeMillis(); + Path dirDb = new Path(permWarehouse.getWhRoot(), "db3"); + fs.delete(dirDb, true); + fs.mkdirs(dirDb); + Path dirTbl1 = new Path(dirDb, "tbl1"); + fs.mkdirs(dirTbl1); + Path part11 = new Path(dirTbl1, "part1"); + createFile(part11, "testClearer11"); + String fileChksum11 = ReplChangeManager.checksumFor(part11, fs); + Path part12 = new Path(dirTbl1, "part2"); + createFile(part12, "testClearer12"); + String fileChksum12 = ReplChangeManager.checksumFor(part12, fs); + final UserGroupInformation proxyUserUgi = + UserGroupInformation.createRemoteUser("impala"); + setGroupsInConf(UserGroupInformation.getCurrentUser().getGroupNames(), + "impala", permhiveConf); + //set owner of data path to impala + fs.setOwner(dirTbl1, "impala", "default"); + proxyUserUgi.doAs((PrivilegedExceptionAction<Void>) () -> { + ReplChangeManager.getInstance().recycle(dirTbl1, RecycleType.MOVE, false); + return null; + }); + Assert.assertFalse(fs.exists(part11)); + Assert.assertFalse(fs.exists(part12)); + assertTrue(fs.exists(ReplChangeManager.getCMPath(permhiveConf, part11.getName(), fileChksum11, permCmroot))); + assertTrue(fs.exists(ReplChangeManager.getCMPath(permhiveConf, part12.getName(), fileChksum12, permCmroot))); + fs.setTimes(ReplChangeManager.getCMPath(permhiveConf, part11.getName(), fileChksum11, permCmroot), + now - 7 * 86400 * 1000 * 2, now - 7 * 86400 * 1000 * 2); + ReplChangeManager.scheduleCMClearer(permhiveConf); + + long start = System.currentTimeMillis(); + long end; + boolean cleared = false; + do { + Thread.sleep(200); + end = System.currentTimeMillis(); + if (end - start > 5000) { + Assert.fail("timeout, cmroot has not been cleared"); + } + //Impala owned file is cleared by Hive CMClearer + if (!fs.exists(ReplChangeManager.getCMPath(permhiveConf, part11.getName(), fileChksum11, permCmroot)) && + fs.exists(ReplChangeManager.getCMPath(permhiveConf, part12.getName(), fileChksum12, permCmroot))) { + cleared = true; + } + } while (!cleared); + } finally { + ReplChangeManager.resetReplChangeManagerInstance(); + ReplChangeManager.getInstance(hiveConf); + } + } + + private void setGroupsInConf(String[] groupNames, String proxyUserName, Configuration conf) + throws IOException { + conf.set( + DefaultImpersonationProvider.getTestProvider().getProxySuperuserGroupConfKey(proxyUserName), + StringUtils.join(",", Arrays.asList(groupNames))); + configureSuperUserIPAddresses(conf, proxyUserName); + ProxyUsers.refreshSuperUserGroupsConfiguration(conf); + } + + private void configureSuperUserIPAddresses(Configuration conf, + String superUserShortName) throws IOException { + List<String> ipList = new ArrayList<String>(); + Enumeration<NetworkInterface> netInterfaceList = NetworkInterface + .getNetworkInterfaces(); + while (netInterfaceList.hasMoreElements()) { + NetworkInterface inf = netInterfaceList.nextElement(); + Enumeration<InetAddress> addrList = inf.getInetAddresses(); + while (addrList.hasMoreElements()) { + InetAddress addr = addrList.nextElement(); + ipList.add(addr.getHostAddress()); + } + } + StringBuilder builder = new StringBuilder(); + for (String ip : ipList) { + builder.append(ip); + builder.append(','); + } + builder.append("127.0.1.1,"); + builder.append(InetAddress.getLocalHost().getCanonicalHostName()); + conf.setStrings(DefaultImpersonationProvider.getTestProvider().getProxySuperuserIpConfKey(superUserShortName), + builder.toString()); + } + + @Test public void shouldIdentifyCMURIs() { assertTrue(ReplChangeManager .isCMFileUri(new Path("hdfs://localhost:90000/somepath/adir/", "ab.jar#e239s2233"))); diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java index 338e351..5b76ace 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java @@ -287,9 +287,6 @@ public class ReplChangeManager { // Ignore if a file with same content already exist in cmroot // We might want to setXAttr for the new location in the future if (success) { - // set the file owner to hive (or the id metastore run as) - fs.setOwner(cmPath, msUser, msGroup); - // tag the original file name so we know where the file comes from // Note we currently only track the last known trace as // xattr has limited capacity. We shall revisit and store all original