Repository: hive Updated Branches: refs/heads/branch-1 7db3fb301 -> d038bd848
HIVE-10910 : Alter table drop partition queries in encrypted zone failing to remove data from HDFS (Eugene Koifman, reviewed by Gunther) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d038bd84 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d038bd84 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d038bd84 Branch: refs/heads/branch-1 Commit: d038bd84879749433e6be26a7588ce132062a7ae Parents: 7db3fb3 Author: Thejas Nair <[email protected]> Authored: Mon Jun 8 14:59:18 2015 -0700 Committer: Thejas Nair <[email protected]> Committed: Mon Jun 8 14:59:50 2015 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 3 +- .../hadoop/hive/metastore/HiveMetaStore.java | 101 +++++++++++-------- .../clientpositive/encryption_drop_partition.q | 18 ++++ .../encrypted/encryption_drop_partition.q.out | 81 +++++++++++++++ .../encrypted/encryption_drop_table.q.out | 2 +- 5 files changed, 159 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index fe6ee17..521a189 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -356,7 +356,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\ encryption_move_tbl.q \ encryption_drop_table.q \ encryption_insert_values.q \ - encryption_drop_view.q + encryption_drop_view.q \ + encryption_drop_partition.q beeline.positive.exclude=add_part_exist.q,\ alter1.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index d81c856..a3e5ed2 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1491,17 +1491,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { if (tbl.getSd() == null) { throw new MetaException("Table metadata is corrupted"); } - - /** - * Trash may be skipped iff: - * 1. deleteData == true, obviously. - * 2. tbl is external. - * 3. Either - * 3.1. User has specified PURGE from the commandline, and if not, - * 3.2. User has set the table to auto-purge. - */ - ifPurge = ((envContext != null) && Boolean.parseBoolean(envContext.getProperties().get("ifPurge"))) - || (tbl.isSetParameters() && "true".equalsIgnoreCase(tbl.getParameters().get("auto.purge"))); + ifPurge = isMustPurge(envContext, tbl); firePreEvent(new PreDropTableEvent(tbl, deleteData, this)); @@ -1536,19 +1526,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { } // tblPath will be null when tbl is a view. We skip the following if block in that case. - if(tblPath != null && !ifPurge) { - String trashInterval = hiveConf.get("fs.trash.interval"); - boolean trashEnabled = trashInterval != null && trashInterval.length() > 0 - && Float.parseFloat(trashInterval) > 0; - if (trashEnabled) { - HadoopShims.HdfsEncryptionShim shim = - ShimLoader.getHadoopShims().createHdfsEncryptionShim(FileSystem.get(hiveConf), hiveConf); - if (shim.isPathEncrypted(tblPath)) { - throw new MetaException("Unable to drop table because it is in an encryption zone" + - " and trash is enabled. Use PURGE option to skip trash."); - } - } - } + checkTrashPurgeCombination(tblPath, dbname + "." + name, ifPurge); // Drop the partitions and get a list of locations which need to be deleted partPaths = dropPartitionsAndGetLocations(ms, dbname, name, tblPath, tbl.getPartitionKeys(), deleteData && !isExternal); @@ -1580,6 +1558,41 @@ public class HiveMetaStore extends ThriftHiveMetastore { } /** + * Will throw MetaException if combination of trash policy/purge can't be satisfied + * @param pathToData path to data which may potentially be moved to trash + * @param objectName db.table, or db.table.part + * @param ifPurge if PURGE options is specified + */ + private void checkTrashPurgeCombination(Path pathToData, String objectName, boolean ifPurge) + throws MetaException { + if (!(pathToData != null && !ifPurge)) {//pathToData may be NULL for a view + return; + } + + boolean trashEnabled = false; + try { + trashEnabled = 0 < hiveConf.getFloat("fs.trash.interval", -1); + } catch(NumberFormatException ex) { + // nothing to do + } + + if (trashEnabled) { + try { + HadoopShims.HdfsEncryptionShim shim = + ShimLoader.getHadoopShims().createHdfsEncryptionShim(FileSystem.get(hiveConf), hiveConf); + if (shim.isPathEncrypted(pathToData)) { + throw new MetaException("Unable to drop " + objectName + " because it is in an encryption zone" + + " and trash is enabled. Use PURGE option to skip trash."); + } + } catch (IOException ex) { + MetaException e = new MetaException(ex.getMessage()); + e.initCause(ex); + throw e; + } + } + } + + /** * Deletes the data in a table's location, if it fails logs an error * * @param tablePath @@ -2581,12 +2594,14 @@ public class HiveMetaStore extends ThriftHiveMetastore { Partition part = null; boolean isArchived = false; Path archiveParentDir = null; + boolean mustPurge = false; try { ms.openTransaction(); part = ms.getPartition(db_name, tbl_name, part_vals); tbl = get_table_core(db_name, tbl_name); firePreEvent(new PreDropPartitionEvent(tbl, part, deleteData, this)); + mustPurge = isMustPurge(envContext, tbl); if (part == null) { throw new NoSuchObjectException("Partition doesn't exist. " @@ -2597,6 +2612,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { if (isArchived) { archiveParentDir = MetaStoreUtils.getOriginalLocation(part); verifyIsWritablePath(archiveParentDir); + checkTrashPurgeCombination(archiveParentDir, db_name + "." + tbl_name + "." + part_vals, mustPurge); } if (!ms.dropPartition(db_name, tbl_name, part_vals)) { throw new MetaException("Unable to drop partition"); @@ -2605,22 +2621,13 @@ public class HiveMetaStore extends ThriftHiveMetastore { if ((part.getSd() != null) && (part.getSd().getLocation() != null)) { partPath = new Path(part.getSd().getLocation()); verifyIsWritablePath(partPath); + checkTrashPurgeCombination(partPath, db_name + "." + tbl_name + "." + part_vals, mustPurge); } } finally { if (!success) { ms.rollbackTransaction(); } else if (deleteData && ((partPath != null) || (archiveParentDir != null))) { if (tbl != null && !isExternal(tbl)) { - // Data needs deletion. Check if trash may be skipped. - // Trash may be skipped iff: - // 1. deleteData == true, obviously. - // 2. tbl is external. - // 3. Either - // 3.1. User has specified PURGE from the commandline, and if not, - // 3.2. User has set the table to auto-purge. - boolean mustPurge = ((envContext != null) && Boolean.parseBoolean(envContext.getProperties().get("ifPurge"))) - || - (tbl.isSetParameters() && "true".equalsIgnoreCase(tbl.getParameters().get("auto.purge"))); if (mustPurge) { LOG.info("dropPartition() will purge " + partPath + " directly, skipping trash."); } @@ -2650,6 +2657,18 @@ public class HiveMetaStore extends ThriftHiveMetastore { return true; } + private static boolean isMustPurge(EnvironmentContext envContext, Table tbl) { + // Data needs deletion. Check if trash may be skipped. + // Trash may be skipped iff: + // 1. deleteData == true, obviously. + // 2. tbl is external. + // 3. Either + // 3.1. User has specified PURGE from the commandline, and if not, + // 3.2. User has set the table to auto-purge. + return ((envContext != null) && Boolean.parseBoolean(envContext.getProperties().get("ifPurge"))) + || (tbl.isSetParameters() && "true".equalsIgnoreCase(tbl.getParameters().get("auto.purge"))); + + } private void deleteParentRecursive(Path parent, int depth, boolean mustPurge) throws IOException, MetaException { if (depth > 0 && parent != null && wh.isWritable(parent) && wh.isEmpty(parent)) { wh.deleteDir(parent, true, mustPurge); @@ -2692,10 +2711,12 @@ public class HiveMetaStore extends ThriftHiveMetastore { ms.openTransaction(); Table tbl = null; List<Partition> parts = null; + boolean mustPurge = false; try { // We need Partition-s for firing events and for result; DN needs MPartition-s to drop. // Great... Maybe we could bypass fetching MPartitions by issuing direct SQL deletes. tbl = get_table_core(dbName, tblName); + mustPurge = isMustPurge(envContext, tbl); int minCount = 0; RequestPartsSpec spec = request.getParts(); List<String> partNames = null; @@ -2760,11 +2781,13 @@ public class HiveMetaStore extends ThriftHiveMetastore { if (MetaStoreUtils.isArchived(part)) { Path archiveParentDir = MetaStoreUtils.getOriginalLocation(part); verifyIsWritablePath(archiveParentDir); + checkTrashPurgeCombination(archiveParentDir, dbName + "." + tblName + "." + part.getValues(), mustPurge); archToDelete.add(archiveParentDir); } if ((part.getSd() != null) && (part.getSd().getLocation() != null)) { Path partPath = new Path(part.getSd().getLocation()); verifyIsWritablePath(partPath); + checkTrashPurgeCombination(partPath, dbName + "." + tblName + "." + part.getValues(), mustPurge); dirsToDelete.add(new PathAndPartValSize(partPath, part.getValues().size())); } } @@ -2780,16 +2803,6 @@ public class HiveMetaStore extends ThriftHiveMetastore { if (!success) { ms.rollbackTransaction(); } else if (deleteData && !isExternal(tbl)) { - // Data needs deletion. Check if trash may be skipped. - // Trash may be skipped iff: - // 1. deleteData == true, obviously. - // 2. tbl is external. - // 3. Either - // 3.1. User has specified PURGE from the commandline, and if not, - // 3.2. User has set the table to auto-purge. - boolean mustPurge = ((envContext != null) && Boolean.parseBoolean(envContext.getProperties().get("ifPurge"))) - || - (tbl.isSetParameters() && "true".equalsIgnoreCase(tbl.getParameters().get("auto.purge"))); LOG.info( mustPurge? "dropPartition() will purge partition-directories directly, skipping trash." : "dropPartition() will move partition-directories to trash-directory."); http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/ql/src/test/queries/clientpositive/encryption_drop_partition.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/encryption_drop_partition.q b/ql/src/test/queries/clientpositive/encryption_drop_partition.q new file mode 100644 index 0000000..e1c1796 --- /dev/null +++ b/ql/src/test/queries/clientpositive/encryption_drop_partition.q @@ -0,0 +1,18 @@ +-- SORT_QUERY_RESULTS; + +-- we're setting this so that TestNegaiveCliDriver.vm doesn't stop processing after DROP TABLE fails; + +set hive.cli.errors.ignore=true; +set hive.exec.dynamic.partition.mode=nonstrict; + +DROP TABLE IF EXISTS encrypted_table_dp PURGE; +CREATE TABLE encrypted_table_dp (key INT, value STRING) partitioned by (p STRING) LOCATION '${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table_dp'; +CRYPTO CREATE_KEY --keyName key_128 --bitLength 128; +CRYPTO CREATE_ZONE --keyName key_128 --path ${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table_dp; + +INSERT INTO encrypted_table_dp PARTITION(p)(p,key,value) values('2014-09-23', 1, 'foo'),('2014-09-24', 2, 'bar'); +SELECT * FROM encrypted_table_dp; +ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23'); +SELECT * FROM encrypted_table_dp; +ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23') PURGE; +SELECT * FROM encrypted_table_dp; http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out new file mode 100644 index 0000000..067bf82 --- /dev/null +++ b/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out @@ -0,0 +1,81 @@ +PREHOOK: query: DROP TABLE IF EXISTS encrypted_table_dp PURGE +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS encrypted_table_dp PURGE +POSTHOOK: type: DROPTABLE +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@encrypted_table_dp +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encrypted_table_dp +Encryption key created: 'key_128' +Encryption zone created: '/build/ql/test/data/warehouse/default/encrypted_table_dp' using key: 'key_128' +PREHOOK: query: INSERT INTO encrypted_table_dp PARTITION(p)(p,key,value) values('2014-09-23', 1, 'foo'),('2014-09-24', 2, 'bar') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@encrypted_table_dp +POSTHOOK: query: INSERT INTO encrypted_table_dp PARTITION(p)(p,key,value) values('2014-09-23', 1, 'foo'),('2014-09-24', 2, 'bar') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@encrypted_table_dp@p=2014-09-23 +POSTHOOK: Output: default@encrypted_table_dp@p=2014-09-24 +POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-23).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-23).value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-24).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-24).value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: SELECT * FROM encrypted_table_dp +PREHOOK: type: QUERY +PREHOOK: Input: default@encrypted_table_dp +PREHOOK: Input: default@encrypted_table_dp@p=2014-09-23 +PREHOOK: Input: default@encrypted_table_dp@p=2014-09-24 +#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging +POSTHOOK: query: SELECT * FROM encrypted_table_dp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encrypted_table_dp +POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-23 +POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-24 +#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging +1 foo 2014-09-23 +2 bar 2014-09-24 +PREHOOK: query: ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@encrypted_table_dp +PREHOOK: Output: default@encrypted_table_dp@p=2014-09-23 +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Unable to drop default.encrypted_table_dp.[2014-09-23] because it is in an encryption zone and trash is enabled. Use PURGE option to skip trash. +PREHOOK: query: SELECT * FROM encrypted_table_dp +PREHOOK: type: QUERY +PREHOOK: Input: default@encrypted_table_dp +PREHOOK: Input: default@encrypted_table_dp@p=2014-09-23 +PREHOOK: Input: default@encrypted_table_dp@p=2014-09-24 +#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging +POSTHOOK: query: SELECT * FROM encrypted_table_dp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encrypted_table_dp +POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-23 +POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-24 +#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging +1 foo 2014-09-23 +2 bar 2014-09-24 +PREHOOK: query: ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23') PURGE +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@encrypted_table_dp +PREHOOK: Output: default@encrypted_table_dp@p=2014-09-23 +POSTHOOK: query: ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23') PURGE +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@encrypted_table_dp +POSTHOOK: Output: default@encrypted_table_dp@p=2014-09-23 +PREHOOK: query: SELECT * FROM encrypted_table_dp +PREHOOK: type: QUERY +PREHOOK: Input: default@encrypted_table_dp +PREHOOK: Input: default@encrypted_table_dp@p=2014-09-24 +#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging +POSTHOOK: query: SELECT * FROM encrypted_table_dp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encrypted_table_dp +POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-24 +#### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table_dp/.hive-staging +2 bar 2014-09-24 http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out index 9171e1b..55eefa0 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out @@ -36,7 +36,7 @@ PREHOOK: query: DROP TABLE default.encrypted_table PREHOOK: type: DROPTABLE PREHOOK: Input: default@encrypted_table PREHOOK: Output: default@encrypted_table -FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:Unable to drop table because it is in an encryption zone and trash is enabled. Use PURGE option to skip trash.) +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:Unable to drop default.encrypted_table because it is in an encryption zone and trash is enabled. Use PURGE option to skip trash.) PREHOOK: query: SHOW TABLES PREHOOK: type: SHOWTABLES PREHOOK: Input: database:default
