Repository: hive Updated Branches: refs/heads/master 24e16cc57 -> e36f6e4fb
HIVE-19783: Retrieve only locations in HiveMetaStore.dropPartitionsAndGetLocations (Peter Vary, reviewed by Alexander Kolbasov and Vihang Karajgaonkar) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e36f6e4f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e36f6e4f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e36f6e4f Branch: refs/heads/master Commit: e36f6e4fbda354f33ba9cef6cf25e5573c78d618 Parents: 24e16cc Author: Peter Vary <[email protected]> Authored: Fri Jun 22 10:10:33 2018 +0200 Committer: Peter Vary <[email protected]> Committed: Fri Jun 22 10:10:33 2018 +0200 ---------------------------------------------------------------------- .../listener/DummyRawStoreFailEvent.java | 6 + .../hadoop/hive/metastore/HiveMetaStore.java | 111 ++++++++----------- .../hadoop/hive/metastore/ObjectStore.java | 46 ++++++++ .../apache/hadoop/hive/metastore/RawStore.java | 15 +++ .../hive/metastore/cache/CachedStore.java | 6 + .../hadoop/hive/metastore/utils/FileUtils.java | 14 +++ .../DummyRawStoreControlledCommit.java | 6 + .../DummyRawStoreForJdoConnection.java | 6 + .../client/MetaStoreFactoryForTests.java | 1 + .../TestTablesCreateDropAlterTruncate.java | 14 ++- 10 files changed, 160 insertions(+), 65 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java ---------------------------------------------------------------------- diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index 8f9a03f..3c334fa 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -295,6 +295,12 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable { } @Override + public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + return objectStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max); + } + + @Override public void updateCreationMetadata(String catName, String dbname, String tablename, CreationMetadata cm) throws MetaException { objectStore.updateCreationMetadata(catName, dbname, tablename, cm); http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index e88f9a5..e9d7e7c 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1505,7 +1505,8 @@ public class HiveMetaStore extends ThriftHiveMetastore { " which is not writable by " + SecurityUtils.getUser()); } - if (!isSubdirectory(databasePath, materializedViewPath)) { + if (!FileUtils.isSubdirectory(databasePath.toString(), + materializedViewPath.toString())) { tablePaths.add(materializedViewPath); } } @@ -1545,7 +1546,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { " which is not writable by " + SecurityUtils.getUser()); } - if (!isSubdirectory(databasePath, tablePath)) { + if (!FileUtils.isSubdirectory(databasePath.toString(), tablePath.toString())) { tablePaths.add(tablePath); } } @@ -1553,7 +1554,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { // For each partition in each table, drop the partitions and get a list of // partitions' locations which might need to be deleted partitionPaths = dropPartitionsAndGetLocations(ms, catName, name, table.getTableName(), - tablePath, table.getPartitionKeys(), deleteData && !isExternal(table)); + tablePath, deleteData && !isExternal(table)); // Drop the table but not its data drop_table(MetaStoreUtils.prependCatalogToDbName(table.getCatName(), table.getDbName(), conf), @@ -1604,20 +1605,6 @@ public class HiveMetaStore extends ThriftHiveMetastore { } } - /** - * Returns a BEST GUESS as to whether or not other is a subdirectory of parent. It does not - * take into account any intricacies of the underlying file system, which is assumed to be - * HDFS. This should not return any false positives, but may return false negatives. - * - * @param parent - * @param other - * @return - */ - private boolean isSubdirectory(Path parent, Path other) { - return other.toString().startsWith(parent.toString().endsWith(Path.SEPARATOR) ? - parent.toString() : parent.toString() + Path.SEPARATOR); - } - @Override public void drop_database(final String dbName, final boolean deleteData, final boolean cascade) throws NoSuchObjectException, InvalidOperationException, MetaException { @@ -2482,7 +2469,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { // Drop the partitions and get a list of locations which need to be deleted partPaths = dropPartitionsAndGetLocations(ms, catName, dbname, name, tblPath, - tbl.getPartitionKeys(), deleteData && !isExternal); + deleteData && !isExternal); // Drop any constraints on the table ms.dropConstraint(catName, dbname, name, null, true); @@ -2567,79 +2554,75 @@ public class HiveMetaStore extends ThriftHiveMetastore { } /** - * Retrieves the partitions specified by partitionKeys. If checkLocation, for locations of - * partitions which may not be subdirectories of tablePath checks to make the locations are - * writable. + * Deletes the partitions specified by catName, dbName, tableName. If checkLocation is true, for + * locations of partitions which may not be subdirectories of tablePath checks to make sure the + * locations are writable. * * Drops the metadata for each partition. * * Provides a list of locations of partitions which may not be subdirectories of tablePath. * - * @param ms - * @param dbName - * @param tableName - * @param tablePath - * @param partitionKeys - * @param checkLocation - * @return + * @param ms RawStore to use for metadata retrieval and delete + * @param catName The catName + * @param dbName The dbName + * @param tableName The tableName + * @param tablePath The tablePath of which subdirectories does not have to be checked + * @param checkLocation Should we check the locations at all + * @return The list of the Path objects to delete (only in case checkLocation is true) * @throws MetaException * @throws IOException - * @throws InvalidInputException - * @throws InvalidObjectException * @throws NoSuchObjectException */ private List<Path> dropPartitionsAndGetLocations(RawStore ms, String catName, String dbName, - String tableName, Path tablePath, List<FieldSchema> partitionKeys, boolean checkLocation) - throws MetaException, IOException, NoSuchObjectException, InvalidObjectException, - InvalidInputException { - int partitionBatchSize = MetastoreConf.getIntVar(conf, - ConfVars.BATCH_RETRIEVE_MAX); - Path tableDnsPath = null; + String tableName, Path tablePath, boolean checkLocation) + throws MetaException, IOException, NoSuchObjectException { + int batchSize = MetastoreConf.getIntVar(conf, ConfVars.BATCH_RETRIEVE_OBJECTS_MAX); + String tableDnsPath = null; if (tablePath != null) { - tableDnsPath = wh.getDnsPath(tablePath); + tableDnsPath = wh.getDnsPath(tablePath).toString(); } - List<Path> partPaths = new ArrayList<>(); - Table tbl = ms.getTable(catName, dbName, tableName); - // call dropPartition on each of the table's partitions to follow the - // procedure for cleanly dropping partitions. + List<Path> partPaths = new ArrayList<>(); while (true) { - List<Partition> partsToDelete = ms.getPartitions(catName, dbName, tableName, partitionBatchSize); - if (partsToDelete == null || partsToDelete.isEmpty()) { - break; - } - List<String> partNames = new ArrayList<>(); - for (Partition part : partsToDelete) { - if (checkLocation && part.getSd() != null && - part.getSd().getLocation() != null) { - - Path partPath = wh.getDnsPath(new Path(part.getSd().getLocation())); - if (tableDnsPath == null || - (partPath != null && !isSubdirectory(tableDnsPath, partPath))) { - if (!wh.isWritable(partPath.getParent())) { - throw new MetaException("Table metadata not deleted since the partition " + - Warehouse.makePartName(partitionKeys, part.getValues()) + - " has parent location " + partPath.getParent() + " which is not writable " + - "by " + SecurityUtils.getUser()); + Map<String, String> partitionLocations = ms.getPartitionLocations(catName, dbName, tableName, + tableDnsPath, batchSize); + if (partitionLocations == null || partitionLocations.isEmpty()) { + // No more partitions left to drop. Return with the collected path list to delete. + return partPaths; + } + + if (checkLocation) { + for (String partName : partitionLocations.keySet()) { + String pathString = partitionLocations.get(partName); + if (pathString != null) { + Path partPath = wh.getDnsPath(new Path(pathString)); + // Double check here. Maybe Warehouse.getDnsPath revealed relationship between the + // path objects + if (tableDnsPath == null || + !FileUtils.isSubdirectory(tableDnsPath, partPath.toString())) { + if (!wh.isWritable(partPath.getParent())) { + throw new MetaException("Table metadata not deleted since the partition " + + partName + " has parent location " + partPath.getParent() + + " which is not writable by " + SecurityUtils.getUser()); + } + partPaths.add(partPath); } - partPaths.add(partPath); } } - partNames.add(Warehouse.makePartName(tbl.getPartitionKeys(), part.getValues())); } + for (MetaStoreEventListener listener : listeners) { //No drop part listener events fired for public listeners historically, for drop table case. //Limiting to internal listeners for now, to avoid unexpected calls for public listeners. if (listener instanceof HMSMetricsListener) { - for (@SuppressWarnings("unused") Partition part : partsToDelete) { + for (@SuppressWarnings("unused") String partName : partitionLocations.keySet()) { listener.onDropPartition(null); } } } - ms.dropPartitions(catName, dbName, tableName, partNames); - } - return partPaths; + ms.dropPartitions(catName, dbName, tableName, new ArrayList<>(partitionLocations.keySet())); + } } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index e99f888..0d2da7a 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -2817,6 +2817,52 @@ public class ObjectStore implements RawStore, Configurable { return getPartitionsInternal(catName, dbName, tableName, maxParts, true, true); } + @Override + public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + catName = normalizeIdentifier(catName); + dbName = normalizeIdentifier(dbName); + tblName = normalizeIdentifier(tblName); + + boolean success = false; + Query query = null; + Map<String, String> partLocations = new HashMap<>(); + try { + openTransaction(); + LOG.debug("Executing getPartitionLocations"); + + query = pm.newQuery(MPartition.class); + query.setFilter("this.table.database.catalogName == t1 && this.table.database.name == t2 " + + "&& this.table.tableName == t3"); + query.declareParameters("String t1, String t2, String t3"); + query.setResult("this.partitionName, this.sd.location"); + if (max >= 0) { + //Row limit specified, set it on the Query + query.setRange(0, max); + } + + List<Object[]> result = (List<Object[]>)query.execute(catName, dbName, tblName); + for(Object[] row:result) { + String location = (String)row[1]; + if (baseLocationToNotShow != null && location != null + && FileUtils.isSubdirectory(baseLocationToNotShow, location)) { + location = null; + } + partLocations.put((String)row[0], location); + } + LOG.debug("Done executing query for getPartitionLocations"); + success = commitTransaction(); + } finally { + if (!success) { + rollbackTransaction(); + } + if (query != null) { + query.closeAll(); + } + } + return partLocations; + } + protected List<Partition> getPartitionsInternal(String catName, String dbName, String tblName, final int maxParts, boolean allowSql, boolean allowJdo) throws MetaException, NoSuchObjectException { http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java index bbbdf21..c8905c8 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -363,6 +363,21 @@ public interface RawStore extends Configurable { String tableName, int max) throws MetaException, NoSuchObjectException; /** + * Get the location for every partition of a given table. If a partition location is a child of + * baseLocationToNotShow then the partitionName is returned, but the only null location is + * returned. + * @param catName catalog name. + * @param dbName database name. + * @param tblName table name. + * @param baseLocationToNotShow Partition locations which are child of this path are omitted, and + * null value returned instead. + * @param max The maximum number of partition locations returned, or -1 for all + * @return The map of the partitionName, location pairs + */ + Map<String, String> getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max); + + /** * Alter a table. * @param catName catalog the table is in. * @param dbname database the table is in. http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index 7c3588d..1da9798 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -1039,6 +1039,12 @@ public class CachedStore implements RawStore, Configurable { } @Override + public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + return rawStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max); + } + + @Override public void alterTable(String catName, String dbName, String tblName, Table newTable) throws InvalidObjectException, MetaException { rawStore.alterTable(catName, dbName, tblName, newTable); http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java index ec9e9e2..963e12f 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/FileUtils.java @@ -510,4 +510,18 @@ public class FileUtils { return new Path(scheme, authority, pathUri.getPath()); } + + + /** + * Returns a BEST GUESS as to whether or not other is a subdirectory of parent. It does not + * take into account any intricacies of the underlying file system, which is assumed to be + * HDFS. This should not return any false positives, but may return false negatives. + * + * @param parent + * @param other Directory to check if it is a subdirectory of parent + * @return True, if other is subdirectory of parent + */ + public static boolean isSubdirectory(String parent, String other) { + return other.startsWith(parent.endsWith(Path.SEPARATOR) ? parent : parent + Path.SEPARATOR); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java index 7c7429d..8e195d0 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java @@ -272,6 +272,12 @@ public class DummyRawStoreControlledCommit implements RawStore, Configurable { } @Override + public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + return objectStore.getPartitionLocations(catName, dbName, tblName, baseLocationToNotShow, max); + } + + @Override public void alterTable(String catName, String dbName, String name, Table newTable) throws InvalidObjectException, MetaException { objectStore.alterTable(catName, dbName, name, newTable); http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java index e4f2a17..85eb6d5 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java @@ -270,6 +270,12 @@ public class DummyRawStoreForJdoConnection implements RawStore { } @Override + public Map<String, String> getPartitionLocations(String catName, String dbName, String tblName, + String baseLocationToNotShow, int max) { + return Collections.emptyMap(); + } + + @Override public void alterTable(String catName, String dbname, String name, Table newTable) throws InvalidObjectException, MetaException { } http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java index 1a57df2..55ef153 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/MetaStoreFactoryForTests.java @@ -57,6 +57,7 @@ public final class MetaStoreFactoryForTests { // set some values to use for getting conf. vars MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.METRICS_ENABLED, true); MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX, 2); + MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_OBJECTS_MAX, 2); MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.LIMIT_PARTITION_REQUEST, DEFAULT_LIMIT_PARTITION_REQUEST); MetaStoreTestUtils.setConfForStandloneMode(conf); http://git-wip-us.apache.org/repos/asf/hive/blob/e36f6e4f/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java index e1c3dcb..c40b42a 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestTablesCreateDropAlterTruncate.java @@ -167,12 +167,19 @@ public class TestTablesCreateDropAlterTruncate extends MetaStoreClientTest { .create(client, metaStore.getConf()); // Create partitions for the partitioned table - for(int i=0; i < 3; i++) { + for(int i=0; i < 2; i++) { new PartitionBuilder() .inTable(testTables[3]) .addValue("a" + i) .addToTable(client, metaStore.getConf()); } + // Add an external partition too + new PartitionBuilder() + .inTable(testTables[3]) + .addValue("a2") + .setLocation(metaStore.getWarehouseRoot() + "/external/a2") + .addToTable(client, metaStore.getConf()); + // Add data files to the partitioned table List<Partition> partitions = client.listPartitions(testTables[3].getDbName(), testTables[3].getTableName(), (short)-1); @@ -530,6 +537,8 @@ public class TestTablesCreateDropAlterTruncate extends MetaStoreClientTest { @Test public void testDropTableDeleteDir() throws Exception { Table table = testTables[0]; + Partition externalPartition = client.getPartition(partitionedTable.getDbName(), + partitionedTable.getTableName(), "test_part_col=a2"); client.dropTable(table.getDbName(), table.getTableName(), true, false); @@ -547,6 +556,9 @@ public class TestTablesCreateDropAlterTruncate extends MetaStoreClientTest { Assert.assertFalse("Table path should be removed", metaStore.isPathExists(new Path(partitionedTable.getSd().getLocation()))); + + Assert.assertFalse("Extra partition path should be removed", + metaStore.isPathExists(new Path(externalPartition.getSd().getLocation()))); } @Test
