This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 87c88de  HIVE-22941 : Empty files are inserted into external tables 
after HIVE-21714 - workaround (Laszlo Bodor via Ashutosh Chauhan)
87c88de is described below

commit 87c88def6d66dc0b9afca3835ed2f0777a3b061b
Author: Laszlo Bodor <[email protected]>
AuthorDate: Sat Feb 29 08:51:29 2020 -0800

    HIVE-22941 : Empty files are inserted into external tables after HIVE-21714 
- workaround (Laszlo Bodor via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <[email protected]>
---
 .../test/resources/testconfiguration.properties    |  1 +
 .../hadoop/hive/ql/exec/FileSinkOperator.java      |  3 +-
 .../clientpositive/empty_files_external_table.q    | 12 ++++
 .../llap/empty_files_external_table.q.out          | 70 ++++++++++++++++++++++
 .../llap/tez_fixed_bucket_pruning.q.out            |  8 +--
 5 files changed, 89 insertions(+), 5 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 743fe2c..18697cc 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -449,6 +449,7 @@ minillap.query.files=acid_bucket_pruning.q,\
   temp_table_drop_partitions_filter4.q
 
 minillaplocal.query.files=\
+  empty_files_external_table.q,\
   bucket_num_reducers_acid.q,\
   dec_str.q,\
   dp_counter_non_mm.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index d0f452b..1bb52b0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -1336,7 +1336,8 @@ public class FileSinkOperator extends 
TerminalOperator<FileSinkDesc> implements
       Class<?> clazz = conf.getTableInfo().getOutputFileFormatClass();
       boolean isStreaming = 
StreamingOutputFormat.class.isAssignableFrom(clazz);
 
-      if (!isTez || isStreaming || this.isInsertOverwrite) {
+      // let empty file generation for mm/acid table as a quick and dirty 
workaround for HIVE-22941
+      if (!isTez || isStreaming || (this.isInsertOverwrite && 
(conf.isMmTable() || conf.isFullAcidTable()))) {
         createBucketFiles(fsp);
       }
     }
diff --git a/ql/src/test/queries/clientpositive/empty_files_external_table.q 
b/ql/src/test/queries/clientpositive/empty_files_external_table.q
new file mode 100644
index 0000000..28b2c01
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/empty_files_external_table.q
@@ -0,0 +1,12 @@
+create external table empty_external_table(age int, name string, id int) 
stored as orc;
+create external table t6 like empty_external_table;
+create external table t5 like empty_external_table;
+
+-- this below is not supposed to put an empty file into the external table 
(instead only clear its contents)
+insert overwrite table empty_external_table select a.* from t5 a full outer 
join t6 b on a.id=b.id and a.name=b.name and a.age=b.age;
+select count(*) from empty_external_table;
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/empty_external_table/;
+
+drop table t5;
+drop table t6;
+drop table empty_external_table;
\ No newline at end of file
diff --git 
a/ql/src/test/results/clientpositive/llap/empty_files_external_table.q.out 
b/ql/src/test/results/clientpositive/llap/empty_files_external_table.q.out
new file mode 100644
index 0000000..582d7bf
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/empty_files_external_table.q.out
@@ -0,0 +1,70 @@
+PREHOOK: query: create external table empty_external_table(age int, name 
string, id int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@empty_external_table
+POSTHOOK: query: create external table empty_external_table(age int, name 
string, id int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@empty_external_table
+PREHOOK: query: create external table t6 like empty_external_table
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t6
+POSTHOOK: query: create external table t6 like empty_external_table
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t6
+PREHOOK: query: create external table t5 like empty_external_table
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t5
+POSTHOOK: query: create external table t5 like empty_external_table
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t5
+PREHOOK: query: insert overwrite table empty_external_table select a.* from t5 
a full outer join t6 b on a.id=b.id and a.name=b.name and a.age=b.age
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t5
+PREHOOK: Input: default@t6
+PREHOOK: Output: default@empty_external_table
+POSTHOOK: query: insert overwrite table empty_external_table select a.* from 
t5 a full outer join t6 b on a.id=b.id and a.name=b.name and a.age=b.age
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t5
+POSTHOOK: Input: default@t6
+POSTHOOK: Output: default@empty_external_table
+POSTHOOK: Lineage: empty_external_table.age SIMPLE 
[(t5)a.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: empty_external_table.id SIMPLE [(t5)a.FieldSchema(name:id, 
type:int, comment:null), ]
+POSTHOOK: Lineage: empty_external_table.name SIMPLE 
[(t5)a.FieldSchema(name:name, type:string, comment:null), ]
+PREHOOK: query: select count(*) from empty_external_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@empty_external_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from empty_external_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@empty_external_table
+#### A masked pattern was here ####
+0
+PREHOOK: query: drop table t5
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t5
+PREHOOK: Output: default@t5
+POSTHOOK: query: drop table t5
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t5
+POSTHOOK: Output: default@t5
+PREHOOK: query: drop table t6
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t6
+PREHOOK: Output: default@t6
+POSTHOOK: query: drop table t6
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t6
+POSTHOOK: Output: default@t6
+PREHOOK: query: drop table empty_external_table
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@empty_external_table
+PREHOOK: Output: default@empty_external_table
+POSTHOOK: query: drop table empty_external_table
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@empty_external_table
+POSTHOOK: Output: default@empty_external_table
diff --git 
a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out 
b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
index cfbbf82..0dc9821 100644
--- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
@@ -738,7 +738,7 @@ STAGE PLANS:
                     columns.types 
bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250)
 #### A masked pattern was here ####
                     name default.l3_monthly_dw_dimplan
-                    numFiles 64
+                    numFiles 1
                     numRows 180340
                     rawDataSize 269826156
                     serialization.ddl struct l3_monthly_dw_dimplan { i64 
idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, 
i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 
resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 
txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, 
varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, 
varchar(50) period_type, varchar(3000) plan_desc [...]
@@ -761,7 +761,7 @@ STAGE PLANS:
                       columns.types 
bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250)
 #### A masked pattern was here ####
                       name default.l3_monthly_dw_dimplan
-                      numFiles 64
+                      numFiles 1
                       numRows 180340
                       rawDataSize 269826156
                       serialization.ddl struct l3_monthly_dw_dimplan { i64 
idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, 
i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 
resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 
txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, 
varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, 
varchar(50) period_type, varchar(3000) plan_de [...]
@@ -1253,7 +1253,7 @@ STAGE PLANS:
                     columns.types 
bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250)
 #### A masked pattern was here ####
                     name default.l3_monthly_dw_dimplan
-                    numFiles 64
+                    numFiles 1
                     numRows 180340
                     rawDataSize 269826156
                     serialization.ddl struct l3_monthly_dw_dimplan { i64 
idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, 
i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 
resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 
txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, 
varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, 
varchar(50) period_type, varchar(3000) plan_desc [...]
@@ -1276,7 +1276,7 @@ STAGE PLANS:
                       columns.types 
bigint:bigint:date:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:bigint:varchar(1500):varchar(500):varchar(50):varchar(50):varchar(3000):varchar(50):varchar(50):varchar(50):varchar(1):decimal(32,6):timestamp:varchar(30):varchar(50):timestamp:bigint:bigint:varchar(70):varchar(250)
 #### A masked pattern was here ####
                       name default.l3_monthly_dw_dimplan
-                      numFiles 64
+                      numFiles 1
                       numRows 180340
                       rawDataSize 269826156
                       serialization.ddl struct l3_monthly_dw_dimplan { i64 
idp_warehouse_id, i64 idp_audit_id, date idp_data_date, i64 l3_snapshot_number, 
i64 plan_key, i64 project_key, i64 charge_code_key, i64 transclass_key, i64 
resource_key, i64 finplan_detail_object_id, i64 project_object_id, i64 
txn_class_object_id, i64 charge_code_object_id, i64 resoruce_object_id, 
varchar(1500) plan_name, varchar(500) plan_code, varchar(50) plan_type, 
varchar(50) period_type, varchar(3000) plan_de [...]

Reply via email to