[ 
https://issues.apache.org/jira/browse/HIVE-28120?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17827461#comment-17827461
 ] 

Ayush Saxena commented on HIVE-28120:
-------------------------------------

It is working for me on hive-master
{noformat}
0: jdbc:hive2://localhost:10000/ (default)> create table 
tmp.test_iceberg_overwrite_union_all(
. . . . . . . . . . . . . . . . . . . . . > a string
. . . . . . . . . . . . . . . . . . . . . > )
. . . . . . . . . . . . . . . . . . . . . > stored by iceberg
. . . . . . . . . . . . . . . . . . . . . > ;
INFO  : Compiling 
command(queryId=hive_20240315101341_1f8481be-681c-41e2-8d86-9e50e1e5e0bd): 
create table tmp.test_iceberg_overwrite_union_all(
a string
)
stored by iceberg
INFO  : Semantic Analysis Completed (retrial = false)
INFO  : Created Hive schema: Schema(fieldSchemas:null, properties:null)
INFO  : Completed compiling 
command(queryId=hive_20240315101341_1f8481be-681c-41e2-8d86-9e50e1e5e0bd); Time 
taken: 0.038 seconds
INFO  : Concurrency mode is disabled, not creating a lock manager
INFO  : Executing 
command(queryId=hive_20240315101341_1f8481be-681c-41e2-8d86-9e50e1e5e0bd): 
create table tmp.test_iceberg_overwrite_union_all(
a string
)
stored by iceberg
INFO  : Starting task [Stage-0:DDL] in serial mode
INFO  : Completed executing 
command(queryId=hive_20240315101341_1f8481be-681c-41e2-8d86-9e50e1e5e0bd); Time 
taken: 0.715 seconds
No rows affected (0.782 seconds)
0: jdbc:hive2://localhost:10000/ (default)> insert overwrite table 
tmp.test_iceberg_overwrite_union_all
. . . . . . . . . . . . . . . . . . . . . > select distinct 'a' union all 
select distinct 'b';
INFO  : Compiling 
command(queryId=hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed): 
insert overwrite table tmp.test_iceberg_overwrite_union_all
select distinct 'a' union all select distinct 'b'
INFO  : Semantic Analysis Completed (retrial = false)
INFO  : Created Hive schema: Schema(fieldSchemas:[FieldSchema(name:_c0, 
type:string, comment:null)], properties:null)
INFO  : Completed compiling 
command(queryId=hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed); Time 
taken: 1.607 seconds
INFO  : Concurrency mode is disabled, not creating a lock manager
INFO  : Executing 
command(queryId=hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed): 
insert overwrite table tmp.test_iceberg_overwrite_union_all
select distinct 'a' union all select distinct 'b'
INFO  : Query ID = hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed
INFO  : Total jobs = 1
INFO  : Launching Job 1 out of 1
INFO  : Starting task [Stage-1:MAPRED] in serial mode
INFO  : Subscribed to counters: [] for queryId: 
hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed
INFO  : Tez session hasn't been created yet. Opening session
INFO  : Dag name: insert overwrite tabl......lect distinct 'b' (Stage-1)
INFO  : HS2 Host: [23cf215501f2], Query ID: 
[hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed], Dag ID: 
[dag_1710497653134_0001_1], DAG Session ID: [application_1710497653134_0001]
INFO  : Status: Running (Executing on YARN cluster with App id 
application_1710497653134_0001)


----------------------------------------------------------------------------------------------
        VERTICES      MODE        STATUS  TOTAL  COMPLETED  RUNNING  PENDING  
FAILED  KILLED 
----------------------------------------------------------------------------------------------
        VERTICES      MODE        STATUS  TOTAL  COMPLETED  RUNNING  PENDING  
FAILED  KILLED 
----------------------------------------------------------------------------------------------
Map 1 .......... container     SUCCEEDED      1          1        0        0    
   0       0 
Reducer 2 ...... container     SUCCEEDED      1          1        0        0    
   0       0 
Reducer 4 ...... container     SUCCEEDED      1          1        0        0    
   0       0 
Map 5 .......... container     SUCCEEDED      1          1        0        0    
   0       0 
Reducer 6 ...... container     SUCCEEDED      1          1        0        0    
   0       0 
----------------------------------------------------------------------------------------------
VERTICES: 05/05  [==========================>>] 100%  ELAPSED TIME: 2.49 s     
----------------------------------------------------------------------------------------------
INFO  : Starting task [Stage-3:STATS] in serial mode
INFO  : Executing stats task
INFO  : Table tmp.test_iceberg_overwrite_union_all stats: [numFiles=2, 
numRows=2, totalSize=774, rawDataSize=0, numFilesErasureCoded=0]
INFO  : Completed executing 
command(queryId=hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed); Time 
taken: 4.056 seconds
2 rows affected (5.68 seconds)
0: jdbc:hive2://localhost:10000/ (default)> select * from 
tmp.test_iceberg_overwrite_union_all;
INFO  : Compiling 
command(queryId=hive_20240315101431_4c9bb621-07b3-46c8-8fae-f4874f48e126): 
select * from tmp.test_iceberg_overwrite_union_all
INFO  : Semantic Analysis Completed (retrial = false)
INFO  : Created Hive schema: 
Schema(fieldSchemas:[FieldSchema(name:test_iceberg_overwrite_union_all.a, 
type:string, comment:null)], properties:null)
INFO  : Completed compiling 
command(queryId=hive_20240315101431_4c9bb621-07b3-46c8-8fae-f4874f48e126); Time 
taken: 0.118 seconds
INFO  : Concurrency mode is disabled, not creating a lock manager
INFO  : Executing 
command(queryId=hive_20240315101431_4c9bb621-07b3-46c8-8fae-f4874f48e126): 
select * from tmp.test_iceberg_overwrite_union_all
INFO  : Completed executing 
command(queryId=hive_20240315101431_4c9bb621-07b3-46c8-8fae-f4874f48e126); Time 
taken: 0.0 seconds
+-------------------------------------+
| test_iceberg_overwrite_union_all.a  |
+-------------------------------------+
| b                                   |
| a                                   |
+-------------------------------------+
2 rows selected (0.247 seconds)
0: jdbc:hive2://localhost:10000/ (default)> {noformat}

> When insert overwrite the iceberg table, data will loss if the sql contains 
> union all
> -------------------------------------------------------------------------------------
>
>                 Key: HIVE-28120
>                 URL: https://issues.apache.org/jira/browse/HIVE-28120
>             Project: Hive
>          Issue Type: Bug
>          Components: Iceberg integration
>    Affects Versions: 4.0.0-beta-1
>         Environment: hadoop version: 3.3.1
> hive version: 4.0.0-beta-1
> iceberg version: 1.3.0
>            Reporter: xinmingchang
>            Priority: Critical
>
> {{(1)}}
> create table tmp.test_iceberg_overwrite_union_all(
> a string
> )
> stored by iceberg
> ;
> {{(2)}}
> insert overwrite table tmp.test_iceberg_overwrite_union_all
> select distinct 'a' union all select distinct 'b';
> {{(3)}}
> select * from tmp.test_iceberg_overwrite_union_all;
>  
> the result only has one record:
> +-------------------------------------+
> | test_iceberg_overwrite_union_all.a  |
> +-------------------------------------+
> | a                                   |
> +-------------------------------------+
> According to the hiveserver log, this query will start two jobs, and each job 
> will be committed. The problem is that the job that is committed later is 
> also an overwrite, causing the result of the first commit to be overwritten. 
> like this:
> 2024-03-05T22:10:12,995 INFO  [iceberg-commit-table-pool-0]: 
> hive.HiveIcebergOutputCommitter () - Committing job has started for table: 
> default_iceberg.tmp.test_iceberg_overwrite_union_all
> 2024-03-05T22:10:13,081 INFO  [iceberg-commit-table-pool-1]: 
> hive.HiveIcebergOutputCommitter () - Committing job has started for table: 
> default_iceberg.tmp.test_iceberg_overwrite_union_all
> 2024-03-05T22:10:15,152 INFO  [iceberg-commit-table-pool-0]: 
> hive.HiveIcebergOutputCommitter () - Overwrite commit took 2157 ms for table: 
> default_iceberg.tmp.test_iceberg_overwrite_union_all with 1 file(s)
> 2024-03-05T22:10:16,980 INFO  [iceberg-commit-table-pool-1]: 
> hive.HiveIcebergOutputCommitter () - Overwrite commit took 3899 ms for table: 
> default_iceberg.tmp.test_iceberg_overwrite_union_all with 1 file(s)



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to