[
https://issues.apache.org/jira/browse/HIVE-28120?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17827461#comment-17827461
]
Ayush Saxena commented on HIVE-28120:
-------------------------------------
It is working for me on hive-master
{noformat}
0: jdbc:hive2://localhost:10000/ (default)> create table
tmp.test_iceberg_overwrite_union_all(
. . . . . . . . . . . . . . . . . . . . . > a string
. . . . . . . . . . . . . . . . . . . . . > )
. . . . . . . . . . . . . . . . . . . . . > stored by iceberg
. . . . . . . . . . . . . . . . . . . . . > ;
INFO : Compiling
command(queryId=hive_20240315101341_1f8481be-681c-41e2-8d86-9e50e1e5e0bd):
create table tmp.test_iceberg_overwrite_union_all(
a string
)
stored by iceberg
INFO : Semantic Analysis Completed (retrial = false)
INFO : Created Hive schema: Schema(fieldSchemas:null, properties:null)
INFO : Completed compiling
command(queryId=hive_20240315101341_1f8481be-681c-41e2-8d86-9e50e1e5e0bd); Time
taken: 0.038 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing
command(queryId=hive_20240315101341_1f8481be-681c-41e2-8d86-9e50e1e5e0bd):
create table tmp.test_iceberg_overwrite_union_all(
a string
)
stored by iceberg
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing
command(queryId=hive_20240315101341_1f8481be-681c-41e2-8d86-9e50e1e5e0bd); Time
taken: 0.715 seconds
No rows affected (0.782 seconds)
0: jdbc:hive2://localhost:10000/ (default)> insert overwrite table
tmp.test_iceberg_overwrite_union_all
. . . . . . . . . . . . . . . . . . . . . > select distinct 'a' union all
select distinct 'b';
INFO : Compiling
command(queryId=hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed):
insert overwrite table tmp.test_iceberg_overwrite_union_all
select distinct 'a' union all select distinct 'b'
INFO : Semantic Analysis Completed (retrial = false)
INFO : Created Hive schema: Schema(fieldSchemas:[FieldSchema(name:_c0,
type:string, comment:null)], properties:null)
INFO : Completed compiling
command(queryId=hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed); Time
taken: 1.607 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing
command(queryId=hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed):
insert overwrite table tmp.test_iceberg_overwrite_union_all
select distinct 'a' union all select distinct 'b'
INFO : Query ID = hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed
INFO : Total jobs = 1
INFO : Launching Job 1 out of 1
INFO : Starting task [Stage-1:MAPRED] in serial mode
INFO : Subscribed to counters: [] for queryId:
hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed
INFO : Tez session hasn't been created yet. Opening session
INFO : Dag name: insert overwrite tabl......lect distinct 'b' (Stage-1)
INFO : HS2 Host: [23cf215501f2], Query ID:
[hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed], Dag ID:
[dag_1710497653134_0001_1], DAG Session ID: [application_1710497653134_0001]
INFO : Status: Running (Executing on YARN cluster with App id
application_1710497653134_0001)
----------------------------------------------------------------------------------------------
VERTICES MODE STATUS TOTAL COMPLETED RUNNING PENDING
FAILED KILLED
----------------------------------------------------------------------------------------------
VERTICES MODE STATUS TOTAL COMPLETED RUNNING PENDING
FAILED KILLED
----------------------------------------------------------------------------------------------
Map 1 .......... container SUCCEEDED 1 1 0 0
0 0
Reducer 2 ...... container SUCCEEDED 1 1 0 0
0 0
Reducer 4 ...... container SUCCEEDED 1 1 0 0
0 0
Map 5 .......... container SUCCEEDED 1 1 0 0
0 0
Reducer 6 ...... container SUCCEEDED 1 1 0 0
0 0
----------------------------------------------------------------------------------------------
VERTICES: 05/05 [==========================>>] 100% ELAPSED TIME: 2.49 s
----------------------------------------------------------------------------------------------
INFO : Starting task [Stage-3:STATS] in serial mode
INFO : Executing stats task
INFO : Table tmp.test_iceberg_overwrite_union_all stats: [numFiles=2,
numRows=2, totalSize=774, rawDataSize=0, numFilesErasureCoded=0]
INFO : Completed executing
command(queryId=hive_20240315101410_dd62945c-39ef-46a7-888e-3fd12e3378ed); Time
taken: 4.056 seconds
2 rows affected (5.68 seconds)
0: jdbc:hive2://localhost:10000/ (default)> select * from
tmp.test_iceberg_overwrite_union_all;
INFO : Compiling
command(queryId=hive_20240315101431_4c9bb621-07b3-46c8-8fae-f4874f48e126):
select * from tmp.test_iceberg_overwrite_union_all
INFO : Semantic Analysis Completed (retrial = false)
INFO : Created Hive schema:
Schema(fieldSchemas:[FieldSchema(name:test_iceberg_overwrite_union_all.a,
type:string, comment:null)], properties:null)
INFO : Completed compiling
command(queryId=hive_20240315101431_4c9bb621-07b3-46c8-8fae-f4874f48e126); Time
taken: 0.118 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing
command(queryId=hive_20240315101431_4c9bb621-07b3-46c8-8fae-f4874f48e126):
select * from tmp.test_iceberg_overwrite_union_all
INFO : Completed executing
command(queryId=hive_20240315101431_4c9bb621-07b3-46c8-8fae-f4874f48e126); Time
taken: 0.0 seconds
+-------------------------------------+
| test_iceberg_overwrite_union_all.a |
+-------------------------------------+
| b |
| a |
+-------------------------------------+
2 rows selected (0.247 seconds)
0: jdbc:hive2://localhost:10000/ (default)> {noformat}
> When insert overwrite the iceberg table, data will loss if the sql contains
> union all
> -------------------------------------------------------------------------------------
>
> Key: HIVE-28120
> URL: https://issues.apache.org/jira/browse/HIVE-28120
> Project: Hive
> Issue Type: Bug
> Components: Iceberg integration
> Affects Versions: 4.0.0-beta-1
> Environment: hadoop version: 3.3.1
> hive version: 4.0.0-beta-1
> iceberg version: 1.3.0
> Reporter: xinmingchang
> Priority: Critical
>
> {{(1)}}
> create table tmp.test_iceberg_overwrite_union_all(
> a string
> )
> stored by iceberg
> ;
> {{(2)}}
> insert overwrite table tmp.test_iceberg_overwrite_union_all
> select distinct 'a' union all select distinct 'b';
> {{(3)}}
> select * from tmp.test_iceberg_overwrite_union_all;
>
> the result only has one record:
> +-------------------------------------+
> | test_iceberg_overwrite_union_all.a |
> +-------------------------------------+
> | a |
> +-------------------------------------+
> According to the hiveserver log, this query will start two jobs, and each job
> will be committed. The problem is that the job that is committed later is
> also an overwrite, causing the result of the first commit to be overwritten.
> like this:
> 2024-03-05T22:10:12,995 INFO [iceberg-commit-table-pool-0]:
> hive.HiveIcebergOutputCommitter () - Committing job has started for table:
> default_iceberg.tmp.test_iceberg_overwrite_union_all
> 2024-03-05T22:10:13,081 INFO [iceberg-commit-table-pool-1]:
> hive.HiveIcebergOutputCommitter () - Committing job has started for table:
> default_iceberg.tmp.test_iceberg_overwrite_union_all
> 2024-03-05T22:10:15,152 INFO [iceberg-commit-table-pool-0]:
> hive.HiveIcebergOutputCommitter () - Overwrite commit took 2157 ms for table:
> default_iceberg.tmp.test_iceberg_overwrite_union_all with 1 file(s)
> 2024-03-05T22:10:16,980 INFO [iceberg-commit-table-pool-1]:
> hive.HiveIcebergOutputCommitter () - Overwrite commit took 3899 ms for table:
> default_iceberg.tmp.test_iceberg_overwrite_union_all with 1 file(s)
--
This message was sent by Atlassian Jira
(v8.20.10#820010)