Repository: hive
Updated Branches:
  refs/heads/branch-1 7db3fb301 -> d038bd848


HIVE-10910 : Alter table drop partition queries in encrypted zone failing to 
remove data from HDFS (Eugene Koifman, reviewed by Gunther)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d038bd84
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d038bd84
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d038bd84

Branch: refs/heads/branch-1
Commit: d038bd84879749433e6be26a7588ce132062a7ae
Parents: 7db3fb3
Author: Thejas Nair <[email protected]>
Authored: Mon Jun 8 14:59:18 2015 -0700
Committer: Thejas Nair <[email protected]>
Committed: Mon Jun 8 14:59:50 2015 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   3 +-
 .../hadoop/hive/metastore/HiveMetaStore.java    | 101 +++++++++++--------
 .../clientpositive/encryption_drop_partition.q  |  18 ++++
 .../encrypted/encryption_drop_partition.q.out   |  81 +++++++++++++++
 .../encrypted/encryption_drop_table.q.out       |   2 +-
 5 files changed, 159 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index fe6ee17..521a189 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -356,7 +356,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\
   encryption_move_tbl.q \
   encryption_drop_table.q \
   encryption_insert_values.q \
-  encryption_drop_view.q
+  encryption_drop_view.q \
+  encryption_drop_partition.q
 
 beeline.positive.exclude=add_part_exist.q,\
   alter1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index d81c856..a3e5ed2 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -1491,17 +1491,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         if (tbl.getSd() == null) {
           throw new MetaException("Table metadata is corrupted");
         }
-
-        /**
-         * Trash may be skipped iff:
-         * 1. deleteData == true, obviously.
-         * 2. tbl is external.
-         * 3. Either
-         *  3.1. User has specified PURGE from the commandline, and if not,
-         *  3.2. User has set the table to auto-purge.
-         */
-        ifPurge = ((envContext != null) && 
Boolean.parseBoolean(envContext.getProperties().get("ifPurge")))
-          || (tbl.isSetParameters() && 
"true".equalsIgnoreCase(tbl.getParameters().get("auto.purge")));
+        ifPurge = isMustPurge(envContext, tbl);
 
         firePreEvent(new PreDropTableEvent(tbl, deleteData, this));
 
@@ -1536,19 +1526,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         }
 
         // tblPath will be null when tbl is a view. We skip the following if 
block in that case.
-        if(tblPath != null && !ifPurge) {
-          String trashInterval = hiveConf.get("fs.trash.interval");
-          boolean trashEnabled = trashInterval != null && 
trashInterval.length() > 0
-            && Float.parseFloat(trashInterval) > 0;
-          if (trashEnabled) {
-            HadoopShims.HdfsEncryptionShim shim =
-              
ShimLoader.getHadoopShims().createHdfsEncryptionShim(FileSystem.get(hiveConf), 
hiveConf);
-            if (shim.isPathEncrypted(tblPath)) {
-              throw new MetaException("Unable to drop table because it is in 
an encryption zone" +
-                " and trash is enabled.  Use PURGE option to skip trash.");
-            }
-          }
-        }
+        checkTrashPurgeCombination(tblPath, dbname + "." + name, ifPurge);
         // Drop the partitions and get a list of locations which need to be 
deleted
         partPaths = dropPartitionsAndGetLocations(ms, dbname, name, tblPath,
             tbl.getPartitionKeys(), deleteData && !isExternal);
@@ -1580,6 +1558,41 @@ public class HiveMetaStore extends ThriftHiveMetastore {
     }
 
     /**
+     * Will throw MetaException if combination of trash policy/purge can't be 
satisfied
+     * @param pathToData path to data which may potentially be moved to trash
+     * @param objectName db.table, or db.table.part
+     * @param ifPurge if PURGE options is specified
+     */
+    private void checkTrashPurgeCombination(Path pathToData, String 
objectName, boolean ifPurge)
+      throws MetaException {
+      if (!(pathToData != null && !ifPurge)) {//pathToData may be NULL for a 
view
+        return;
+      }
+
+      boolean trashEnabled = false;
+      try {
+       trashEnabled = 0 < hiveConf.getFloat("fs.trash.interval", -1);
+      } catch(NumberFormatException ex) {
+       // nothing to do
+      }
+
+      if (trashEnabled) {
+        try {
+          HadoopShims.HdfsEncryptionShim shim =
+            
ShimLoader.getHadoopShims().createHdfsEncryptionShim(FileSystem.get(hiveConf), 
hiveConf);
+          if (shim.isPathEncrypted(pathToData)) {
+            throw new MetaException("Unable to drop " + objectName + " because 
it is in an encryption zone" +
+              " and trash is enabled.  Use PURGE option to skip trash.");
+          }
+        } catch (IOException ex) {
+          MetaException e = new MetaException(ex.getMessage());
+          e.initCause(ex);
+          throw e;
+        }
+      }
+    }
+
+    /**
      * Deletes the data in a table's location, if it fails logs an error
      *
      * @param tablePath
@@ -2581,12 +2594,14 @@ public class HiveMetaStore extends ThriftHiveMetastore {
       Partition part = null;
       boolean isArchived = false;
       Path archiveParentDir = null;
+      boolean mustPurge = false;
 
       try {
         ms.openTransaction();
         part = ms.getPartition(db_name, tbl_name, part_vals);
         tbl = get_table_core(db_name, tbl_name);
         firePreEvent(new PreDropPartitionEvent(tbl, part, deleteData, this));
+        mustPurge = isMustPurge(envContext, tbl);
 
         if (part == null) {
           throw new NoSuchObjectException("Partition doesn't exist. "
@@ -2597,6 +2612,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         if (isArchived) {
           archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
           verifyIsWritablePath(archiveParentDir);
+          checkTrashPurgeCombination(archiveParentDir, db_name + "." + 
tbl_name + "." + part_vals, mustPurge);
         }
         if (!ms.dropPartition(db_name, tbl_name, part_vals)) {
           throw new MetaException("Unable to drop partition");
@@ -2605,22 +2621,13 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         if ((part.getSd() != null) && (part.getSd().getLocation() != null)) {
           partPath = new Path(part.getSd().getLocation());
           verifyIsWritablePath(partPath);
+          checkTrashPurgeCombination(partPath, db_name + "." + tbl_name + "." 
+ part_vals, mustPurge);
         }
       } finally {
         if (!success) {
           ms.rollbackTransaction();
         } else if (deleteData && ((partPath != null) || (archiveParentDir != 
null))) {
           if (tbl != null && !isExternal(tbl)) {
-            // Data needs deletion. Check if trash may be skipped.
-            // Trash may be skipped iff:
-            //  1. deleteData == true, obviously.
-            //  2. tbl is external.
-            //  3. Either
-            //    3.1. User has specified PURGE from the commandline, and if 
not,
-            //    3.2. User has set the table to auto-purge.
-            boolean mustPurge = ((envContext != null) && 
Boolean.parseBoolean(envContext.getProperties().get("ifPurge")))
-                                ||
-                                 (tbl.isSetParameters() && 
"true".equalsIgnoreCase(tbl.getParameters().get("auto.purge")));
             if (mustPurge) {
               LOG.info("dropPartition() will purge " + partPath + " directly, 
skipping trash.");
             }
@@ -2650,6 +2657,18 @@ public class HiveMetaStore extends ThriftHiveMetastore {
       return true;
     }
 
+    private static boolean isMustPurge(EnvironmentContext envContext, Table 
tbl) {
+      // Data needs deletion. Check if trash may be skipped.
+      // Trash may be skipped iff:
+      //  1. deleteData == true, obviously.
+      //  2. tbl is external.
+      //  3. Either
+      //    3.1. User has specified PURGE from the commandline, and if not,
+      //    3.2. User has set the table to auto-purge.
+      return ((envContext != null) && 
Boolean.parseBoolean(envContext.getProperties().get("ifPurge")))
+        || (tbl.isSetParameters() && 
"true".equalsIgnoreCase(tbl.getParameters().get("auto.purge")));
+
+    }
     private void deleteParentRecursive(Path parent, int depth, boolean 
mustPurge) throws IOException, MetaException {
       if (depth > 0 && parent != null && wh.isWritable(parent) && 
wh.isEmpty(parent)) {
         wh.deleteDir(parent, true, mustPurge);
@@ -2692,10 +2711,12 @@ public class HiveMetaStore extends ThriftHiveMetastore {
       ms.openTransaction();
       Table tbl = null;
       List<Partition> parts = null;
+      boolean mustPurge = false;
       try {
         // We need Partition-s for firing events and for result; DN needs 
MPartition-s to drop.
         // Great... Maybe we could bypass fetching MPartitions by issuing 
direct SQL deletes.
         tbl = get_table_core(dbName, tblName);
+        mustPurge = isMustPurge(envContext, tbl);
         int minCount = 0;
         RequestPartsSpec spec = request.getParts();
         List<String> partNames = null;
@@ -2760,11 +2781,13 @@ public class HiveMetaStore extends ThriftHiveMetastore {
           if (MetaStoreUtils.isArchived(part)) {
             Path archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
             verifyIsWritablePath(archiveParentDir);
+            checkTrashPurgeCombination(archiveParentDir, dbName + "." + 
tblName + "." + part.getValues(), mustPurge);
             archToDelete.add(archiveParentDir);
           }
           if ((part.getSd() != null) && (part.getSd().getLocation() != null)) {
             Path partPath = new Path(part.getSd().getLocation());
             verifyIsWritablePath(partPath);
+            checkTrashPurgeCombination(partPath, dbName + "." + tblName + "." 
+ part.getValues(), mustPurge);
             dirsToDelete.add(new PathAndPartValSize(partPath, 
part.getValues().size()));
           }
         }
@@ -2780,16 +2803,6 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         if (!success) {
           ms.rollbackTransaction();
         } else if (deleteData && !isExternal(tbl)) {
-          // Data needs deletion. Check if trash may be skipped.
-          // Trash may be skipped iff:
-          //  1. deleteData == true, obviously.
-          //  2. tbl is external.
-          //  3. Either
-          //    3.1. User has specified PURGE from the commandline, and if not,
-          //    3.2. User has set the table to auto-purge.
-          boolean mustPurge = ((envContext != null) && 
Boolean.parseBoolean(envContext.getProperties().get("ifPurge")))
-                              ||
-                              (tbl.isSetParameters() && 
"true".equalsIgnoreCase(tbl.getParameters().get("auto.purge")));
           LOG.info( mustPurge?
                       "dropPartition() will purge partition-directories 
directly, skipping trash."
                     :  "dropPartition() will move partition-directories to 
trash-directory.");

http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/ql/src/test/queries/clientpositive/encryption_drop_partition.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/encryption_drop_partition.q 
b/ql/src/test/queries/clientpositive/encryption_drop_partition.q
new file mode 100644
index 0000000..e1c1796
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/encryption_drop_partition.q
@@ -0,0 +1,18 @@
+-- SORT_QUERY_RESULTS;
+
+-- we're setting this so that TestNegaiveCliDriver.vm doesn't stop processing 
after DROP TABLE fails;
+
+set hive.cli.errors.ignore=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+DROP TABLE IF EXISTS encrypted_table_dp PURGE;
+CREATE TABLE encrypted_table_dp (key INT, value STRING) partitioned by (p 
STRING) LOCATION 
'${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table_dp';
+CRYPTO CREATE_KEY --keyName key_128 --bitLength 128;
+CRYPTO CREATE_ZONE --keyName key_128 --path 
${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table_dp;
+
+INSERT INTO encrypted_table_dp PARTITION(p)(p,key,value) values('2014-09-23', 
1, 'foo'),('2014-09-24', 2, 'bar');
+SELECT * FROM encrypted_table_dp;
+ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23');
+SELECT * FROM encrypted_table_dp;
+ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23') PURGE;
+SELECT * FROM encrypted_table_dp;

http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out 
b/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out
new file mode 100644
index 0000000..067bf82
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/encrypted/encryption_drop_partition.q.out
@@ -0,0 +1,81 @@
+PREHOOK: query: DROP TABLE IF EXISTS encrypted_table_dp PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS encrypted_table_dp PURGE
+POSTHOOK: type: DROPTABLE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encrypted_table_dp
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encrypted_table_dp
+Encryption key created: 'key_128'
+Encryption zone created: 
'/build/ql/test/data/warehouse/default/encrypted_table_dp' using key: 'key_128'
+PREHOOK: query: INSERT INTO encrypted_table_dp PARTITION(p)(p,key,value) 
values('2014-09-23', 1, 'foo'),('2014-09-24', 2, 'bar')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@encrypted_table_dp
+POSTHOOK: query: INSERT INTO encrypted_table_dp PARTITION(p)(p,key,value) 
values('2014-09-23', 1, 'foo'),('2014-09-24', 2, 'bar')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@encrypted_table_dp@p=2014-09-23
+POSTHOOK: Output: default@encrypted_table_dp@p=2014-09-24
+POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-23).key EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-23).value SIMPLE 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-24).key EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+POSTHOOK: Lineage: encrypted_table_dp PARTITION(p=2014-09-24).value SIMPLE 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+PREHOOK: query: SELECT * FROM encrypted_table_dp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-23
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### 
data/warehouse/default/encrypted_table_dp/.hive-staging
+POSTHOOK: query: SELECT * FROM encrypted_table_dp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encrypted_table_dp
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-23
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### 
data/warehouse/default/encrypted_table_dp/.hive-staging
+1      foo     2014-09-23
+2      bar     2014-09-24
+PREHOOK: query: ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Output: default@encrypted_table_dp@p=2014-09-23
+FAILED: Execution Error, return code 1 from 
org.apache.hadoop.hive.ql.exec.DDLTask. Unable to drop 
default.encrypted_table_dp.[2014-09-23] because it is in an encryption zone and 
trash is enabled.  Use PURGE option to skip trash.
+PREHOOK: query: SELECT * FROM encrypted_table_dp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-23
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### 
data/warehouse/default/encrypted_table_dp/.hive-staging
+POSTHOOK: query: SELECT * FROM encrypted_table_dp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encrypted_table_dp
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-23
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### 
data/warehouse/default/encrypted_table_dp/.hive-staging
+1      foo     2014-09-23
+2      bar     2014-09-24
+PREHOOK: query: ALTER TABLE encrypted_table_dp DROP PARTITION (p='2014-09-23') 
PURGE
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Output: default@encrypted_table_dp@p=2014-09-23
+POSTHOOK: query: ALTER TABLE encrypted_table_dp DROP PARTITION 
(p='2014-09-23') PURGE
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@encrypted_table_dp
+POSTHOOK: Output: default@encrypted_table_dp@p=2014-09-23
+PREHOOK: query: SELECT * FROM encrypted_table_dp
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encrypted_table_dp
+PREHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### 
data/warehouse/default/encrypted_table_dp/.hive-staging
+POSTHOOK: query: SELECT * FROM encrypted_table_dp
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encrypted_table_dp
+POSTHOOK: Input: default@encrypted_table_dp@p=2014-09-24
+#### A PARTIAL masked pattern was here #### 
data/warehouse/default/encrypted_table_dp/.hive-staging
+2      bar     2014-09-24

http://git-wip-us.apache.org/repos/asf/hive/blob/d038bd84/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out 
b/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out
index 9171e1b..55eefa0 100644
--- a/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_drop_table.q.out
@@ -36,7 +36,7 @@ PREHOOK: query: DROP TABLE default.encrypted_table
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@encrypted_table
 PREHOOK: Output: default@encrypted_table
-FAILED: Execution Error, return code 1 from 
org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:Unable to drop 
table because it is in an encryption zone and trash is enabled.  Use PURGE 
option to skip trash.)
+FAILED: Execution Error, return code 1 from 
org.apache.hadoop.hive.ql.exec.DDLTask. MetaException(message:Unable to drop 
default.encrypted_table because it is in an encryption zone and trash is 
enabled.  Use PURGE option to skip trash.)
 PREHOOK: query: SHOW TABLES
 PREHOOK: type: SHOWTABLES
 PREHOOK: Input: database:default

Reply via email to