This is an automated email from the ASF dual-hosted git repository.
zhangbutao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new a455933d3d5 HIVE-27930: Insert/Load overwrite table partition does not
clean up directory before overwriting (#4915)(Kiran Velumuri, reviewed by
Indhumathi Muthumurugesh, Butao Zhang)
a455933d3d5 is described below
commit a455933d3d57fce20b053fa0f4fd023b28bd22de
Author: Kiran Velumuri <[email protected]>
AuthorDate: Wed Dec 13 19:44:35 2023 +0530
HIVE-27930: Insert/Load overwrite table partition does not clean up
directory before overwriting (#4915)(Kiran Velumuri, reviewed by Indhumathi
Muthumurugesh, Butao Zhang)
---
.../org/apache/hadoop/hive/ql/metadata/Hive.java | 7 +
.../insert_and_load_overwrite_drop_partition.q | 61 +++++
.../insert_and_load_overwrite_drop_partition.q.out | 256 +++++++++++++++++++++
3 files changed, 324 insertions(+)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 5022b77fc36..cca06126369 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -5542,6 +5542,13 @@ private void constructOneLBLocationMap(FileStatus fSta,
throw new HiveException("Getting globStatus " + srcf.toString(), e);
}
+ // For insert/load overwrite cases, where external.table.purge is
disabled for the table, there may be stale
+ // partitions present in the table location after Alter table drop
partition operation. In such cases, oldPath will be
+ // null, since those partitions will not be present in metastore. Added
below check to clean up those stale partitions.
+ if (oldPath == null && isInsertOverwrite) {
+ deleteOldPathForReplace(destf, destf, conf, purge, deletePathFilter,
isNeedRecycle);
+ }
+
// the extra check is required to make ALTER TABLE ... CONCATENATE work
if (oldPath != null && (srcs != null || isInsertOverwrite)) {
deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter,
isNeedRecycle);
diff --git
a/ql/src/test/queries/clientpositive/insert_and_load_overwrite_drop_partition.q
b/ql/src/test/queries/clientpositive/insert_and_load_overwrite_drop_partition.q
new file mode 100644
index 00000000000..fabf4902280
--- /dev/null
+++
b/ql/src/test/queries/clientpositive/insert_and_load_overwrite_drop_partition.q
@@ -0,0 +1,61 @@
+CREATE EXTERNAL TABLE `table1`(
+ `name` string,
+ `number` string)
+PARTITIONED BY (
+ `part_col` string);
+
+CREATE EXTERNAL TABLE `table2`(
+ `name` string,
+ `number` string)
+PARTITIONED BY (
+ `part_col` string);
+
+insert into table table1 values ('a', '10', 'part1');
+insert into table table1 values ('b', '11', 'part1');
+insert into table table1 values ('a2', '2', 'part2');
+
+insert into table table2 values ('x', '100', 'part1');
+insert into table table2 values ('y', '101', 'part1');
+insert into table table2 values ('z', '102', 'part1');
+insert into table table2 values ('x2', '200', 'part2');
+insert into table table2 values ('y2', '201', 'part2');
+insert into table table2 values ('x3', '300', 'part3');
+
+--non empty input case
+alter table table2 drop partition(part_col='part1');
+
+select count(*) from table2 where part_col='part1';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part1;
+
+insert overwrite table table2 partition(part_col='part1') select name, number
from table1 where part_col='part1';
+
+select count(*) from table2 where part_col='part1';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part1;
+
+--empty input case
+alter table table2 drop partition(part_col='part2');
+
+select count(*) from table2 where part_col='part2';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part2;
+
+insert overwrite table table2 partition(part_col='part2') select name, number
from table1 where part_col='dummy_part';
+
+select count(*) from table2 where part_col='part2';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part2;
+
+--load overwrite partition
+alter table table2 drop partition(part_col='part3');
+
+select count(*) from table2 where part_col='part3';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part3;
+
+LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' OVERWRITE INTO TABLE table2
PARTITION(part_col='part3');
+
+select count(*) from table2 where part_col='part3';
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/table2/part_col=part3;
\ No newline at end of file
diff --git
a/ql/src/test/results/clientpositive/llap/insert_and_load_overwrite_drop_partition.q.out
b/ql/src/test/results/clientpositive/llap/insert_and_load_overwrite_drop_partition.q.out
new file mode 100644
index 00000000000..c16cb464568
--- /dev/null
+++
b/ql/src/test/results/clientpositive/llap/insert_and_load_overwrite_drop_partition.q.out
@@ -0,0 +1,256 @@
+PREHOOK: query: CREATE EXTERNAL TABLE `table1`(
+ `name` string,
+ `number` string)
+PARTITIONED BY (
+ `part_col` string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table1
+POSTHOOK: query: CREATE EXTERNAL TABLE `table1`(
+ `name` string,
+ `number` string)
+PARTITIONED BY (
+ `part_col` string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table1
+PREHOOK: query: CREATE EXTERNAL TABLE `table2`(
+ `name` string,
+ `number` string)
+PARTITIONED BY (
+ `part_col` string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table2
+POSTHOOK: query: CREATE EXTERNAL TABLE `table2`(
+ `name` string,
+ `number` string)
+PARTITIONED BY (
+ `part_col` string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table2
+PREHOOK: query: insert into table table1 values ('a', '10', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table1
+POSTHOOK: query: insert into table table1 values ('a', '10', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table1
+POSTHOOK: Output: default@table1@part_col=part1
+POSTHOOK: Lineage: table1 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table1 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table1 values ('b', '11', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table1
+POSTHOOK: query: insert into table table1 values ('b', '11', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table1
+POSTHOOK: Output: default@table1@part_col=part1
+POSTHOOK: Lineage: table1 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table1 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table1 values ('a2', '2', 'part2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table1
+POSTHOOK: query: insert into table table1 values ('a2', '2', 'part2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table1
+POSTHOOK: Output: default@table1@part_col=part2
+POSTHOOK: Lineage: table1 PARTITION(part_col=part2).name SCRIPT []
+POSTHOOK: Lineage: table1 PARTITION(part_col=part2).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('x', '100', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('x', '100', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part1
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('y', '101', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('y', '101', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part1
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('z', '102', 'part1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('z', '102', 'part1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part1
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('x2', '200', 'part2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('x2', '200', 'part2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part2
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('y2', '201', 'part2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('y2', '201', 'part2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part2
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).number SCRIPT []
+PREHOOK: query: insert into table table2 values ('x3', '300', 'part3')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@table2
+POSTHOOK: query: insert into table table2 values ('x3', '300', 'part3')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part3
+POSTHOOK: Lineage: table2 PARTITION(part_col=part3).name SCRIPT []
+POSTHOOK: Lineage: table2 PARTITION(part_col=part3).number SCRIPT []
+PREHOOK: query: alter table table2 drop partition(part_col='part1')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@table2
+PREHOOK: Output: default@table2@part_col=part1
+POSTHOOK: query: alter table table2 drop partition(part_col='part1')
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@table2
+POSTHOOK: Output: default@table2@part_col=part1
+PREHOOK: query: select count(*) from table2 where part_col='part1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+#### A masked pattern was here ####
+0
+Found 3 items
+#### A masked pattern was here ####
+PREHOOK: query: insert overwrite table table2 partition(part_col='part1')
select name, number from table1 where part_col='part1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Input: default@table1@part_col=part1
+PREHOOK: Output: default@table2@part_col=part1
+POSTHOOK: query: insert overwrite table table2 partition(part_col='part1')
select name, number from table1 where part_col='part1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Input: default@table1@part_col=part1
+POSTHOOK: Output: default@table2@part_col=part1
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).name SIMPLE
[(table1)table1.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: table2 PARTITION(part_col=part1).number SIMPLE
[(table1)table1.FieldSchema(name:number, type:string, comment:null), ]
+PREHOOK: query: select count(*) from table2 where part_col='part1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+PREHOOK: Input: default@table2@part_col=part1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+POSTHOOK: Input: default@table2@part_col=part1
+#### A masked pattern was here ####
+2
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: alter table table2 drop partition(part_col='part2')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@table2
+PREHOOK: Output: default@table2@part_col=part2
+POSTHOOK: query: alter table table2 drop partition(part_col='part2')
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@table2
+POSTHOOK: Output: default@table2@part_col=part2
+PREHOOK: query: select count(*) from table2 where part_col='part2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+#### A masked pattern was here ####
+0
+Found 2 items
+#### A masked pattern was here ####
+PREHOOK: query: insert overwrite table table2 partition(part_col='part2')
select name, number from table1 where part_col='dummy_part'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table1
+PREHOOK: Output: default@table2@part_col=part2
+POSTHOOK: query: insert overwrite table table2 partition(part_col='part2')
select name, number from table1 where part_col='dummy_part'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table1
+POSTHOOK: Output: default@table2@part_col=part2
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).name SIMPLE
[(table1)table1.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: table2 PARTITION(part_col=part2).number SIMPLE
[(table1)table1.FieldSchema(name:number, type:string, comment:null), ]
+PREHOOK: query: select count(*) from table2 where part_col='part2'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+PREHOOK: Input: default@table2@part_col=part2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+POSTHOOK: Input: default@table2@part_col=part2
+#### A masked pattern was here ####
+0
+PREHOOK: query: alter table table2 drop partition(part_col='part3')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@table2
+PREHOOK: Output: default@table2@part_col=part3
+POSTHOOK: query: alter table table2 drop partition(part_col='part3')
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@table2
+POSTHOOK: Output: default@table2@part_col=part3
+PREHOOK: query: select count(*) from table2 where part_col='part3'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part3'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+#### A masked pattern was here ####
+0
+Found 1 items
+#### A masked pattern was here ####
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' OVERWRITE
INTO TABLE table2 PARTITION(part_col='part3')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@table2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' OVERWRITE
INTO TABLE table2 PARTITION(part_col='part3')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@table2
+POSTHOOK: Output: default@table2@part_col=part3
+PREHOOK: query: select count(*) from table2 where part_col='part3'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+PREHOOK: Input: default@table2@part_col=part3
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from table2 where part_col='part3'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+POSTHOOK: Input: default@table2@part_col=part3
+#### A masked pattern was here ####
+24
+Found 1 items
+#### A masked pattern was here ####