Repository: hive Updated Branches: refs/heads/master c7fe4ef4d -> 6a8f4cbe3
HIVE-19981: Managed tables converted to external tables by the HiveStrictManagedMigration utility should be set to delete data when the table is dropped (Jason Dere, reviewed by Daniel Dai) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6a8f4cbe Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6a8f4cbe Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6a8f4cbe Branch: refs/heads/master Commit: 6a8f4cbe328608a29f04c97c47b9d8c17f4ae29e Parents: c7fe4ef Author: Jason Dere <[email protected]> Authored: Wed Jun 27 17:25:14 2018 -0700 Committer: Jason Dere <[email protected]> Committed: Wed Jun 27 17:25:14 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 6 + .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 + .../clientpositive/external_table_purge.q | 165 +++++ .../llap/external_table_purge.q.out | 635 +++++++++++++++++++ .../hadoop/hive/metastore/HiveMetaStore.java | 33 +- .../hive/metastore/utils/MetaStoreUtils.java | 8 +- 7 files changed, 845 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/6a8f4cbe/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 5eb0435..fc3f752 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2847,6 +2847,12 @@ public class HiveConf extends Configuration { " on the assumption that data changes by external applications may have negative effects" + " on these operations."), + HIVE_EXTERNALTABLE_PURGE_DEFAULT("hive.external.table.purge.default", false, + "Set to true to set external.table.purge=true on newly created external tables," + + " which will specify that the table data should be deleted when the table is dropped." + + " Set to false maintain existing behavior that external tables do not delete data" + + " when the table is dropped."), + HIVE_ERROR_ON_EMPTY_PARTITION("hive.error.on.empty.partition", false, "Whether to throw an exception if dynamic partition insert generates empty results."), http://git-wip-us.apache.org/repos/asf/hive/blob/6a8f4cbe/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 9f25a9b..04cd9f5 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -387,6 +387,7 @@ minillap.query.files=acid_bucket_pruning.q,\ tez_aggr_part_stats.q,\ tez_union_view.q,\ file_with_header_footer.q,\ + external_table_purge.q,\ external_table_with_space_in_location_path.q,\ import_exported_table.q,\ orc_llap_counters.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/6a8f4cbe/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 37e1a73..52cf237 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -67,6 +67,7 @@ import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; @@ -12853,6 +12854,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } } } + + if (isExt && HiveConf.getBoolVar(conf, ConfVars.HIVE_EXTERNALTABLE_PURGE_DEFAULT)) { + if (retValue.get(MetaStoreUtils.EXTERNAL_TABLE_PURGE) == null) { + retValue.put(MetaStoreUtils.EXTERNAL_TABLE_PURGE, "true"); + } + } + boolean makeInsertOnly = !isTemporaryTable && HiveConf.getBoolVar(conf, ConfVars.HIVE_CREATE_TABLES_AS_INSERT_ONLY); boolean makeAcid = !isTemporaryTable && MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.CREATE_TABLES_AS_ACID) && http://git-wip-us.apache.org/repos/asf/hive/blob/6a8f4cbe/ql/src/test/queries/clientpositive/external_table_purge.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/external_table_purge.q b/ql/src/test/queries/clientpositive/external_table_purge.q new file mode 100644 index 0000000..144e49a --- /dev/null +++ b/ql/src/test/queries/clientpositive/external_table_purge.q @@ -0,0 +1,165 @@ + +dfs -rmr -f hdfs:///tmp/etp_1; + +dfs -mkdir -p hdfs:///tmp/etp_1; +dfs -copyFromLocal ../../data/files/kv1.txt hdfs:///tmp/etp_1/; + +create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs:///tmp/etp_1'; +set test.comment=Table should have data; +set test.comment; +select count(*) from etp_1; + +drop table etp_1; + +-- Create external table in same location, data should still be there +create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs:///tmp/etp_1'; +set test.comment=Table should have data; +set test.comment; +select count(*) from etp_1; +alter table etp_1 set tblproperties ('external.table.purge'='true'); + +drop table etp_1; + +-- Create external table in same location. Data should be gone due to external.table.purge option. +create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs:///tmp/etp_1'; +set test.comment=Table should have no data; +set test.comment; +select count(*) from etp_1; + +drop table etp_1; + +-- +-- Test hive.external.table.purge.default +-- + +dfs -mkdir -p hdfs:///tmp/etp_1; +dfs -copyFromLocal ../../data/files/kv1.txt hdfs:///tmp/etp_1/; + +set hive.external.table.purge.default=true; + +-- Can still create table and override the default +create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs:///tmp/etp_1' tblproperties ('external.table.purge'='false'); +show create table etp_1; +set test.comment=Table should have data; +set test.comment; +select count(*) from etp_1; + +drop table etp_1; + +-- Create with default options, external.table.purge should be set +create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs:///tmp/etp_1'; +show create table etp_1; +set test.comment=Table should have data; +set test.comment; +select count(*) from etp_1; + +drop table etp_1; + +-- Data should be gone +create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs:///tmp/etp_1'; +set test.comment=Table should have no data; +set test.comment; +select count(*) from etp_1; + +drop table etp_1; +dfs -rmr -f hdfs:///tmp/etp_1; + +set hive.external.table.purge.default=false; + +-- +-- Partitioned table +-- + +dfs -rmr -f hdfs:///tmp/etp_2; +dfs -mkdir -p hdfs:///tmp/etp_2/p1=part1; +dfs -mkdir -p hdfs:///tmp/etp_2/p1=part2; +dfs -copyFromLocal ../../data/files/kv1.txt hdfs:///tmp/etp_2/p1=part1/; +dfs -copyFromLocal ../../data/files/kv1.txt hdfs:///tmp/etp_2/p1=part2/; + +create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs:///tmp/etp_2'; +alter table etp_2 add partition (p1='part1'); +alter table etp_2 add partition (p1='part2'); +set test.comment=Table should have full data; +set test.comment; +select count(*) from etp_2; +alter table etp_2 drop partition (p1='part1'); +alter table etp_2 add partition (p1='part1'); +set test.comment=Table should have full data; +set test.comment; +select count(*) from etp_2; + +drop table etp_2; + +-- Create external table in same location, data should still be there +create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs:///tmp/etp_2'; +alter table etp_2 set tblproperties ('external.table.purge'='true'); +alter table etp_2 add partition (p1='part1'); +alter table etp_2 add partition (p1='part2'); +set test.comment=Table should have full data; +set test.comment; +select count(*) from etp_2; +alter table etp_2 drop partition (p1='part1'); +alter table etp_2 add partition (p1='part1'); +set test.comment=Table should have partial data; +set test.comment; +select count(*) from etp_2; + +drop table etp_2; + +-- Create external table in same location. Data should be gone due to external.table.purge option. +create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs:///tmp/etp_2'; +alter table etp_2 add partition (p1='part1'); +alter table etp_2 add partition (p1='part2'); +set test.comment=Table should have no data; +set test.comment; +select count(*) from etp_2; + +drop table etp_2; + +-- Test hive.external.table.purge.default +dfs -mkdir -p hdfs:///tmp/etp_2/p1=part1; +dfs -mkdir -p hdfs:///tmp/etp_2/p1=part2; +dfs -copyFromLocal ../../data/files/kv1.txt hdfs:///tmp/etp_2/p1=part1/; +dfs -copyFromLocal ../../data/files/kv1.txt hdfs:///tmp/etp_2/p1=part2/; + +set hive.external.table.purge.default=true; + +-- Can still create table and override the default +create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs:///tmp/etp_2' tblproperties ('external.table.purge'='false'); +show create table etp_2; +alter table etp_2 add partition (p1='part1'); +alter table etp_2 add partition (p1='part2'); +set test.comment=Table should have full data; +set test.comment; +select count(*) from etp_2; + +drop table etp_2; + +-- Create with default options, external.table.purge should be set +create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs:///tmp/etp_2'; +show create table etp_2; +alter table etp_2 add partition (p1='part1'); +alter table etp_2 add partition (p1='part2'); +set test.comment=Table should have full data; +set test.comment; +select count(*) from etp_2; +alter table etp_2 drop partition (p1='part1'); +alter table etp_2 add partition (p1='part1'); +set test.comment=Table should have partial data; +set test.comment; +select count(*) from etp_2; + +drop table etp_2; + +-- Data should be gone +create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs:///tmp/etp_2'; +alter table etp_2 add partition (p1='part1'); +alter table etp_2 add partition (p1='part2'); +set test.comment=Table should have no data; +set test.comment; +select count(*) from etp_2; + +drop table etp_2; +dfs -rmr -f hdfs:///tmp/etp_2; + +set hive.external.table.purge.default=false; http://git-wip-us.apache.org/repos/asf/hive/blob/6a8f4cbe/ql/src/test/results/clientpositive/llap/external_table_purge.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/external_table_purge.q.out b/ql/src/test/results/clientpositive/llap/external_table_purge.q.out new file mode 100644 index 0000000..24c778e --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/external_table_purge.q.out @@ -0,0 +1,635 @@ +PREHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_1 +POSTHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_1 +test.comment=Table should have data +PREHOOK: query: select count(*) from etp_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: drop table etp_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_1 +PREHOOK: Output: default@etp_1 +POSTHOOK: query: drop table etp_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: default@etp_1 +PREHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_1 +POSTHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_1 +test.comment=Table should have data +PREHOOK: query: select count(*) from etp_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: alter table etp_1 set tblproperties ('external.table.purge'='true') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@etp_1 +PREHOOK: Output: default@etp_1 +POSTHOOK: query: alter table etp_1 set tblproperties ('external.table.purge'='true') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: default@etp_1 +PREHOOK: query: drop table etp_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_1 +PREHOOK: Output: default@etp_1 +POSTHOOK: query: drop table etp_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: default@etp_1 +PREHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_1 +POSTHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_1 +test.comment=Table should have no data +PREHOOK: query: select count(*) from etp_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: drop table etp_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_1 +PREHOOK: Output: default@etp_1 +POSTHOOK: query: drop table etp_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: default@etp_1 +PREHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' tblproperties ('external.table.purge'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_1 +POSTHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' tblproperties ('external.table.purge'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_1 +PREHOOK: query: show create table etp_1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@etp_1 +POSTHOOK: query: show create table etp_1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@etp_1 +CREATE EXTERNAL TABLE `etp_1`( + `c1` string, + `c2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', + 'external.table.purge'='false', +#### A masked pattern was here #### +test.comment=Table should have data +PREHOOK: query: select count(*) from etp_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: drop table etp_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_1 +PREHOOK: Output: default@etp_1 +POSTHOOK: query: drop table etp_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: default@etp_1 +PREHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_1 +POSTHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_1 +PREHOOK: query: show create table etp_1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@etp_1 +POSTHOOK: query: show create table etp_1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@etp_1 +CREATE EXTERNAL TABLE `etp_1`( + `c1` string, + `c2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', + 'external.table.purge'='true', +#### A masked pattern was here #### +test.comment=Table should have data +PREHOOK: query: select count(*) from etp_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: drop table etp_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_1 +PREHOOK: Output: default@etp_1 +POSTHOOK: query: drop table etp_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: default@etp_1 +PREHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_1 +POSTHOOK: query: create external table etp_1 (c1 string, c2 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_1 +test.comment=Table should have no data +PREHOOK: query: select count(*) from etp_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_1 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: drop table etp_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_1 +PREHOOK: Output: default@etp_1 +POSTHOOK: query: drop table etp_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_1 +POSTHOOK: Output: default@etp_1 +PREHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_2 +POSTHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_2 +PREHOOK: query: alter table etp_2 add partition (p1='part1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +PREHOOK: query: alter table etp_2 add partition (p1='part2') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part2') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part2 +test.comment=Table should have full data +PREHOOK: query: select count(*) from etp_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_2 +PREHOOK: Input: default@etp_2@p1=part1 +PREHOOK: Input: default@etp_2@p1=part2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_2 +POSTHOOK: Input: default@etp_2@p1=part1 +POSTHOOK: Input: default@etp_2@p1=part2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1000 +PREHOOK: query: alter table etp_2 drop partition (p1='part1') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2@p1=part1 +POSTHOOK: query: alter table etp_2 drop partition (p1='part1') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +PREHOOK: query: alter table etp_2 add partition (p1='part1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +test.comment=Table should have full data +PREHOOK: query: select count(*) from etp_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_2 +PREHOOK: Input: default@etp_2@p1=part1 +PREHOOK: Input: default@etp_2@p1=part2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_2 +POSTHOOK: Input: default@etp_2@p1=part1 +POSTHOOK: Input: default@etp_2@p1=part2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1000 +PREHOOK: query: drop table etp_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2 +POSTHOOK: query: drop table etp_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2 +PREHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_2 +POSTHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_2 +PREHOOK: query: alter table etp_2 set tblproperties ('external.table.purge'='true') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 set tblproperties ('external.table.purge'='true') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2 +PREHOOK: query: alter table etp_2 add partition (p1='part1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +PREHOOK: query: alter table etp_2 add partition (p1='part2') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part2') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part2 +test.comment=Table should have full data +PREHOOK: query: select count(*) from etp_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_2 +PREHOOK: Input: default@etp_2@p1=part1 +PREHOOK: Input: default@etp_2@p1=part2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_2 +POSTHOOK: Input: default@etp_2@p1=part1 +POSTHOOK: Input: default@etp_2@p1=part2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1000 +PREHOOK: query: alter table etp_2 drop partition (p1='part1') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2@p1=part1 +POSTHOOK: query: alter table etp_2 drop partition (p1='part1') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +PREHOOK: query: alter table etp_2 add partition (p1='part1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +test.comment=Table should have partial data +PREHOOK: query: select count(*) from etp_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_2 +PREHOOK: Input: default@etp_2@p1=part1 +PREHOOK: Input: default@etp_2@p1=part2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_2 +POSTHOOK: Input: default@etp_2@p1=part1 +POSTHOOK: Input: default@etp_2@p1=part2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: drop table etp_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2 +POSTHOOK: query: drop table etp_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2 +PREHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_2 +POSTHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_2 +PREHOOK: query: alter table etp_2 add partition (p1='part1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +PREHOOK: query: alter table etp_2 add partition (p1='part2') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part2') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part2 +test.comment=Table should have no data +PREHOOK: query: select count(*) from etp_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_2 +PREHOOK: Input: default@etp_2@p1=part1 +PREHOOK: Input: default@etp_2@p1=part2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_2 +POSTHOOK: Input: default@etp_2@p1=part1 +POSTHOOK: Input: default@etp_2@p1=part2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: drop table etp_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2 +POSTHOOK: query: drop table etp_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2 +PREHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' tblproperties ('external.table.purge'='false') +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_2 +POSTHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' tblproperties ('external.table.purge'='false') +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_2 +PREHOOK: query: show create table etp_2 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@etp_2 +POSTHOOK: query: show create table etp_2 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@etp_2 +CREATE EXTERNAL TABLE `etp_2`( + `c1` string, + `c2` string) +PARTITIONED BY ( + `p1` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', + 'external.table.purge'='false', +#### A masked pattern was here #### +PREHOOK: query: alter table etp_2 add partition (p1='part1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +PREHOOK: query: alter table etp_2 add partition (p1='part2') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part2') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part2 +test.comment=Table should have full data +PREHOOK: query: select count(*) from etp_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_2 +PREHOOK: Input: default@etp_2@p1=part1 +PREHOOK: Input: default@etp_2@p1=part2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_2 +POSTHOOK: Input: default@etp_2@p1=part1 +POSTHOOK: Input: default@etp_2@p1=part2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1000 +PREHOOK: query: drop table etp_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2 +POSTHOOK: query: drop table etp_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2 +PREHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_2 +POSTHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_2 +PREHOOK: query: show create table etp_2 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@etp_2 +POSTHOOK: query: show create table etp_2 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@etp_2 +CREATE EXTERNAL TABLE `etp_2`( + `c1` string, + `c2` string) +PARTITIONED BY ( + `p1` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', + 'external.table.purge'='true', +#### A masked pattern was here #### +PREHOOK: query: alter table etp_2 add partition (p1='part1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +PREHOOK: query: alter table etp_2 add partition (p1='part2') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part2') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part2 +test.comment=Table should have full data +PREHOOK: query: select count(*) from etp_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_2 +PREHOOK: Input: default@etp_2@p1=part1 +PREHOOK: Input: default@etp_2@p1=part2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_2 +POSTHOOK: Input: default@etp_2@p1=part1 +POSTHOOK: Input: default@etp_2@p1=part2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +1000 +PREHOOK: query: alter table etp_2 drop partition (p1='part1') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2@p1=part1 +POSTHOOK: query: alter table etp_2 drop partition (p1='part1') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +PREHOOK: query: alter table etp_2 add partition (p1='part1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +test.comment=Table should have partial data +PREHOOK: query: select count(*) from etp_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_2 +PREHOOK: Input: default@etp_2@p1=part1 +PREHOOK: Input: default@etp_2@p1=part2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_2 +POSTHOOK: Input: default@etp_2@p1=part1 +POSTHOOK: Input: default@etp_2@p1=part2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +500 +PREHOOK: query: drop table etp_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2 +POSTHOOK: query: drop table etp_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2 +PREHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +PREHOOK: type: CREATETABLE +PREHOOK: Input: hdfs://### HDFS PATH ### +PREHOOK: Output: database:default +PREHOOK: Output: default@etp_2 +POSTHOOK: query: create external table etp_2 (c1 string, c2 string) partitioned by (p1 string) stored as textfile location 'hdfs://### HDFS PATH ###' +POSTHOOK: type: CREATETABLE +POSTHOOK: Input: hdfs://### HDFS PATH ### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@etp_2 +PREHOOK: query: alter table etp_2 add partition (p1='part1') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part1') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part1 +PREHOOK: query: alter table etp_2 add partition (p1='part2') +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@etp_2 +POSTHOOK: query: alter table etp_2 add partition (p1='part2') +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@etp_2 +POSTHOOK: Output: default@etp_2@p1=part2 +test.comment=Table should have no data +PREHOOK: query: select count(*) from etp_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@etp_2 +PREHOOK: Input: default@etp_2@p1=part1 +PREHOOK: Input: default@etp_2@p1=part2 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from etp_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@etp_2 +POSTHOOK: Input: default@etp_2@p1=part1 +POSTHOOK: Input: default@etp_2@p1=part2 +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: drop table etp_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@etp_2 +PREHOOK: Output: default@etp_2 +POSTHOOK: query: drop table etp_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@etp_2 +POSTHOOK: Output: default@etp_2 http://git-wip-us.apache.org/repos/asf/hive/blob/6a8f4cbe/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index e9d7e7c..35c0f5c 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -1538,7 +1538,8 @@ public class HiveMetaStore extends ThriftHiveMetastore { // If the table is not external and it might not be in a subdirectory of the database // add it's locations to the list of paths to delete Path tablePath = null; - if (table.getSd().getLocation() != null && !isExternal(table)) { + boolean tableDataShouldBeDeleted = checkTableDataShouldBeDeleted(table, deleteData); + if (table.getSd().getLocation() != null && tableDataShouldBeDeleted) { tablePath = wh.getDnsPath(new Path(table.getSd().getLocation())); if (!wh.isWritable(tablePath.getParent())) { throw new MetaException("Database metadata not deleted since table: " + @@ -1554,7 +1555,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { // For each partition in each table, drop the partitions and get a list of // partitions' locations which might need to be deleted partitionPaths = dropPartitionsAndGetLocations(ms, catName, name, table.getTableName(), - tablePath, deleteData && !isExternal(table)); + tablePath, tableDataShouldBeDeleted); // Drop the table but not its data drop_table(MetaStoreUtils.prependCatalogToDbName(table.getCatName(), table.getDbName(), conf), @@ -2433,7 +2434,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { throws NoSuchObjectException, MetaException, IOException, InvalidObjectException, InvalidInputException { boolean success = false; - boolean isExternal = false; + boolean tableDataShouldBeDeleted = false; Path tblPath = null; List<Path> partPaths = null; Table tbl = null; @@ -2456,7 +2457,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { firePreEvent(new PreDropTableEvent(tbl, deleteData, this)); - isExternal = isExternal(tbl); + tableDataShouldBeDeleted = checkTableDataShouldBeDeleted(tbl, deleteData); if (tbl.getSd().getLocation() != null) { tblPath = new Path(tbl.getSd().getLocation()); if (!wh.isWritable(tblPath.getParent())) { @@ -2469,7 +2470,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { // Drop the partitions and get a list of locations which need to be deleted partPaths = dropPartitionsAndGetLocations(ms, catName, dbname, name, tblPath, - deleteData && !isExternal); + tableDataShouldBeDeleted); // Drop any constraints on the table ms.dropConstraint(catName, dbname, name, null, true); @@ -2491,7 +2492,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { } finally { if (!success) { ms.rollbackTransaction(); - } else if (deleteData && !isExternal) { + } else if (tableDataShouldBeDeleted) { // Data needs deletion. Check if trash may be skipped. // Delete the data in the partitions which have other locations deletePartitionData(partPaths, ifPurge, db); @@ -2511,6 +2512,14 @@ public class HiveMetaStore extends ThriftHiveMetastore { return success; } + private boolean checkTableDataShouldBeDeleted(Table tbl, boolean deleteData) { + if (deleteData && isExternal(tbl)) { + // External table data can be deleted if EXTERNAL_TABLE_PURGE is true + return isExternalTablePurge(tbl); + } + return deleteData; + } + /** * Deletes the data in a table's location, if it fails logs an error * @@ -2820,6 +2829,10 @@ public class HiveMetaStore extends ThriftHiveMetastore { return MetaStoreUtils.isExternalTable(table); } + private boolean isExternalTablePurge(Table table) { + return MetaStoreUtils.isPropertyTrue(table.getParameters(), MetaStoreUtils.EXTERNAL_TABLE_PURGE); + } + @Override @Deprecated public Table get_table(final String dbname, final String name) throws MetaException, @@ -4075,7 +4088,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { boolean isArchived = false; Path archiveParentDir = null; boolean mustPurge = false; - boolean isExternalTbl = false; + boolean tableDataShouldBeDeleted = false; boolean isSourceOfReplication = false; Map<String, String> transactionalListenerResponses = Collections.emptyMap(); @@ -4093,7 +4106,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { ms.openTransaction(); part = ms.getPartition(catName, db_name, tbl_name, part_vals); tbl = get_table_core(catName, db_name, tbl_name); - isExternalTbl = isExternal(tbl); + tableDataShouldBeDeleted = checkTableDataShouldBeDeleted(tbl, deleteData); firePreEvent(new PreDropPartitionEvent(tbl, part, deleteData, this)); mustPurge = isMustPurge(envContext, tbl); @@ -4131,7 +4144,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { if (!success) { ms.rollbackTransaction(); } else if (deleteData && ((partPath != null) || (archiveParentDir != null))) { - if (!isExternalTbl) { + if (tableDataShouldBeDeleted) { if (mustPurge) { LOG.info("dropPartition() will purge " + partPath + " directly, skipping trash."); } @@ -4324,7 +4337,7 @@ public class HiveMetaStore extends ThriftHiveMetastore { } finally { if (!success) { ms.rollbackTransaction(); - } else if (deleteData && !isExternal(tbl)) { + } else if (checkTableDataShouldBeDeleted(tbl, deleteData)) { LOG.info( mustPurge? "dropPartition() will purge partition-directories directly, skipping trash." : "dropPartition() will move partition-directories to trash-directory."); http://git-wip-us.apache.org/repos/asf/hive/blob/6a8f4cbe/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 16a8c75..5b58a08 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -145,6 +145,8 @@ public class MetaStoreUtils { */ public static final String DB_EMPTY_MARKER = "!"; + public static final String EXTERNAL_TABLE_PURGE = "external.table.purge"; + // Right now we only support one special character '/'. // More special characters can be added accordingly in the future. // NOTE: @@ -565,7 +567,11 @@ public class MetaStoreUtils { } public static boolean isExternal(Map<String, String> tableParams){ - return "TRUE".equalsIgnoreCase(tableParams.get("EXTERNAL")); + return isPropertyTrue(tableParams, "EXTERNAL"); + } + + public static boolean isPropertyTrue(Map<String, String> tableParams, String prop) { + return "TRUE".equalsIgnoreCase(tableParams.get(prop)); } // check if stats need to be (re)calculated
