This is an automated email from the ASF dual-hosted git repository.

sbadhya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new c529e5f7d7b HIVE-28069: Addendum: Iceberg: Implement Merge task 
functionality for Iceberg tables - Add serde properties for CTAS and CMV 
(#5162) (Sourabh Badhya reviewed by Denys Kuzmenko)
c529e5f7d7b is described below

commit c529e5f7d7bf33e78d6328a692f468d9b0677125
Author: Sourabh Badhya <[email protected]>
AuthorDate: Tue Mar 26 20:36:01 2024 +0530

    HIVE-28069: Addendum: Iceberg: Implement Merge task functionality for 
Iceberg tables - Add serde properties for CTAS and CMV (#5162) (Sourabh Badhya 
reviewed by Denys Kuzmenko)
---
 .../test/queries/positive/iceberg_merge_files.q    |  35 ++-
 .../positive/llap/iceberg_merge_files.q.out        | 255 ++++++++++++++++++++-
 .../hadoop/hive/ql/optimizer/GenMapRedUtils.java   |   2 +
 3 files changed, 286 insertions(+), 6 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_merge_files.q 
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_merge_files.q
index 5d5cd7aa6d8..92d03881b4e 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_merge_files.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_merge_files.q
@@ -57,7 +57,7 @@ select count(distinct(file_path)) from 
default.parquet_part_source.files;
 
 -- Insert into the tables both for unpartitioned and partitioned cases for 
Parquet formats.
 insert into table parquet_source select * from parquet_source;
-insert into table parquet_part_source select * from parquet_part_source where 
ds = 102 union all select * from orc_part_source where ds = 103;
+insert into table parquet_part_source select * from parquet_part_source where 
ds = 102 union all select * from parquet_part_source where ds = 103;
 
 select count(*) from parquet_source;
 select count(*) from parquet_part_source;
@@ -93,3 +93,36 @@ select count(*) from avro_part_source;
 
 select count(distinct(file_path)) from default.avro_source.files;
 select count(distinct(file_path)) from default.avro_part_source.files;
+
+-- Generate 5 files in the CTAS source table.
+create table ctas_source stored by iceberg stored as orc as select * from src;
+insert into table ctas_source select * from src;
+insert into table ctas_source select * from src;
+insert into table ctas_source select * from src;
+insert into table ctas_source select * from src;
+select count(*) from ctas_source;
+
+-- Check whether a single file is created post merge task for CTAS and CMV.
+create table ctas_orc_table stored by iceberg stored as orc as select * from 
ctas_source;
+select count(distinct(file_path)) from default.ctas_orc_table.files;
+select count(*) from ctas_orc_table;
+
+create materialized view cmv_orc stored by iceberg stored as orc as select * 
from ctas_source;
+select count(distinct(file_path)) from default.cmv_orc.files;
+select count(*) from cmv_orc;
+
+create table ctas_parquet_table stored by iceberg stored as parquet as select 
* from ctas_source;
+select count(distinct(file_path)) from default.ctas_parquet_table.files;
+select count(*) from ctas_parquet_table;
+
+create materialized view cmv_parquet stored by iceberg stored as parquet as 
select * from ctas_source;
+select count(distinct(file_path)) from default.cmv_parquet.files;
+select count(*) from cmv_parquet;
+
+create table ctas_avro_table stored by iceberg stored as avro as select * from 
ctas_source;
+select count(distinct(file_path)) from default.ctas_avro_table.files;
+select count(*) from ctas_avro_table;
+
+create materialized view cmv_avro stored by iceberg stored as avro as select * 
from ctas_source;
+select count(distinct(file_path)) from default.cmv_avro.files;
+select count(*) from cmv_avro;
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_merge_files.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_merge_files.q.out
index a17c1130efc..395567f8395 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_merge_files.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_merge_files.q.out
@@ -258,14 +258,12 @@ POSTHOOK: query: insert into table parquet_source select 
* from parquet_source
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@parquet_source
 POSTHOOK: Output: default@parquet_source
-PREHOOK: query: insert into table parquet_part_source select * from 
parquet_part_source where ds = 102 union all select * from orc_part_source 
where ds = 103
+PREHOOK: query: insert into table parquet_part_source select * from 
parquet_part_source where ds = 102 union all select * from parquet_part_source 
where ds = 103
 PREHOOK: type: QUERY
-PREHOOK: Input: default@orc_part_source
 PREHOOK: Input: default@parquet_part_source
 PREHOOK: Output: default@parquet_part_source
-POSTHOOK: query: insert into table parquet_part_source select * from 
parquet_part_source where ds = 102 union all select * from orc_part_source 
where ds = 103
+POSTHOOK: query: insert into table parquet_part_source select * from 
parquet_part_source where ds = 102 union all select * from parquet_part_source 
where ds = 103
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@orc_part_source
 POSTHOOK: Input: default@parquet_part_source
 POSTHOOK: Output: default@parquet_part_source
 PREHOOK: query: select count(*) from parquet_source
@@ -285,7 +283,7 @@ POSTHOOK: query: select count(*) from parquet_part_source
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@parquet_part_source
 #### A masked pattern was here ####
-5000
+4000
 PREHOOK: query: select count(distinct(file_path)) from 
default.parquet_source.files
 PREHOOK: type: QUERY
 PREHOOK: Input: default@parquet_source
@@ -456,3 +454,250 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@avro_part_source
 #### A masked pattern was here ####
 6
+PREHOOK: query: create table ctas_source stored by iceberg stored as orc as 
select * from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_source
+#### A masked pattern was here ####
+POSTHOOK: query: create table ctas_source stored by iceberg stored as orc as 
select * from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_source
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ctas_source.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: ctas_source.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: insert into table ctas_source select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@ctas_source
+POSTHOOK: query: insert into table ctas_source select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@ctas_source
+PREHOOK: query: insert into table ctas_source select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@ctas_source
+POSTHOOK: query: insert into table ctas_source select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@ctas_source
+PREHOOK: query: insert into table ctas_source select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@ctas_source
+POSTHOOK: query: insert into table ctas_source select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@ctas_source
+PREHOOK: query: insert into table ctas_source select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@ctas_source
+POSTHOOK: query: insert into table ctas_source select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@ctas_source
+PREHOOK: query: select count(*) from ctas_source
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_source
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_source
+#### A masked pattern was here ####
+2500
+PREHOOK: query: create table ctas_orc_table stored by iceberg stored as orc as 
select * from ctas_source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@ctas_source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_orc_table
+#### A masked pattern was here ####
+POSTHOOK: query: create table ctas_orc_table stored by iceberg stored as orc 
as select * from ctas_source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@ctas_source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_orc_table
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ctas_orc_table.key SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_orc_table.value SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(distinct(file_path)) from 
default.ctas_orc_table.files
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_orc_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(file_path)) from 
default.ctas_orc_table.files
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_orc_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(*) from ctas_orc_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_orc_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_orc_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_orc_table
+#### A masked pattern was here ####
+2500
+PREHOOK: query: create materialized view cmv_orc stored by iceberg stored as 
orc as select * from ctas_source
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@ctas_source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_orc
+#### A masked pattern was here ####
+POSTHOOK: query: create materialized view cmv_orc stored by iceberg stored as 
orc as select * from ctas_source
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@ctas_source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: cmv_orc.key SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: cmv_orc.value SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(distinct(file_path)) from default.cmv_orc.files
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(file_path)) from default.cmv_orc.files
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_orc
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(*) from cmv_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from cmv_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_orc
+#### A masked pattern was here ####
+2500
+PREHOOK: query: create table ctas_parquet_table stored by iceberg stored as 
parquet as select * from ctas_source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@ctas_source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_parquet_table
+#### A masked pattern was here ####
+POSTHOOK: query: create table ctas_parquet_table stored by iceberg stored as 
parquet as select * from ctas_source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@ctas_source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_parquet_table
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ctas_parquet_table.key SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_parquet_table.value SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(distinct(file_path)) from 
default.ctas_parquet_table.files
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_parquet_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(file_path)) from 
default.ctas_parquet_table.files
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_parquet_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(*) from ctas_parquet_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_parquet_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_parquet_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_parquet_table
+#### A masked pattern was here ####
+2500
+PREHOOK: query: create materialized view cmv_parquet stored by iceberg stored 
as parquet as select * from ctas_source
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@ctas_source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_parquet
+#### A masked pattern was here ####
+POSTHOOK: query: create materialized view cmv_parquet stored by iceberg stored 
as parquet as select * from ctas_source
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@ctas_source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_parquet
+#### A masked pattern was here ####
+POSTHOOK: Lineage: cmv_parquet.key SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: cmv_parquet.value SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(distinct(file_path)) from 
default.cmv_parquet.files
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_parquet
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(file_path)) from 
default.cmv_parquet.files
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_parquet
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(*) from cmv_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_parquet
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from cmv_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_parquet
+#### A masked pattern was here ####
+2500
+PREHOOK: query: create table ctas_avro_table stored by iceberg stored as avro 
as select * from ctas_source
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@ctas_source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ctas_avro_table
+#### A masked pattern was here ####
+POSTHOOK: query: create table ctas_avro_table stored by iceberg stored as avro 
as select * from ctas_source
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@ctas_source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ctas_avro_table
+#### A masked pattern was here ####
+POSTHOOK: Lineage: ctas_avro_table.key SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: ctas_avro_table.value SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(distinct(file_path)) from 
default.ctas_avro_table.files
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_avro_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(file_path)) from 
default.ctas_avro_table.files
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_avro_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(*) from ctas_avro_table
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ctas_avro_table
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from ctas_avro_table
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ctas_avro_table
+#### A masked pattern was here ####
+2500
+PREHOOK: query: create materialized view cmv_avro stored by iceberg stored as 
avro as select * from ctas_source
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@ctas_source
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cmv_avro
+#### A masked pattern was here ####
+POSTHOOK: query: create materialized view cmv_avro stored by iceberg stored as 
avro as select * from ctas_source
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@ctas_source
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cmv_avro
+#### A masked pattern was here ####
+POSTHOOK: Lineage: cmv_avro.key SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: cmv_avro.value SIMPLE 
[(ctas_source)ctas_source.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(distinct(file_path)) from default.cmv_avro.files
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_avro
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct(file_path)) from default.cmv_avro.files
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_avro
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(*) from cmv_avro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cmv_avro
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from cmv_avro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cmv_avro
+#### A masked pattern was here ####
+2500
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 1eb256ceb86..975126feb47 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -1739,6 +1739,7 @@ public final class GenMapRedUtils {
       if (createTableDesc != null) {
         storageHandlerClass = createTableDesc.getStorageHandler();
         mergeTaskProperties = new Properties();
+        mergeTaskProperties.putAll(createTableDesc.getSerdeProps());
         mergeTaskProperties.put(hive_metastoreConstants.META_TABLE_NAME, 
createTableDesc.getDbTableName());
         location = createTableDesc.getLocation();
       } else {
@@ -1746,6 +1747,7 @@ public final class GenMapRedUtils {
         if (createViewDesc != null) {
           storageHandlerClass = createViewDesc.getStorageHandler();
           mergeTaskProperties = new Properties();
+          mergeTaskProperties.putAll(createViewDesc.getSerdeProps());
           mergeTaskProperties.put(hive_metastoreConstants.META_TABLE_NAME, 
createViewDesc.getViewName());
           location = createViewDesc.getLocation();
         }

Reply via email to