[
https://issues.apache.org/jira/browse/HUDI-5031?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated HUDI-5031:
---------------------------------
Labels: pull-request-available (was: )
> Hudi merge into creates empty partition files when the source table has
> partitions and the target table does not
> ----------------------------------------------------------------------------------------------------------------
>
> Key: HUDI-5031
> URL: https://issues.apache.org/jira/browse/HUDI-5031
> Project: Apache Hudi
> Issue Type: Bug
> Components: writer-core
> Environment: hudi:release-0.11.0
> spark: 3.2.1
> Reporter: weiming
> Assignee: weiming
> Priority: Minor
> Labels: pull-request-available
> Fix For: 0.11.0
>
>
>
> {{{}-{}}}{{{}-source{}}} table
> {{create table hudi_test_wm_mor_01 (}}
> {{ }}{{id int,}}
> {{ }}{{name string,}}
> {{ }}{{price double,}}
> {{ }}{{ts bigint,}}
> {{ }}{{dt string}}
> {{) using hudi}}
> {{tblproperties (}}
> {{ }}{{type}} {{= }}{{{}'mor'{}}}{{{},{}}}
> {{ }}{{primaryKey = }}{{{}'id'{}}}{{{},{}}}
> {{ }}{{preCombineField = }}{{'ts'}}
> {{)}}
> {{partitioned by (dt);}}
>
>
> {{{}-{}}}{{{}-target{}}} table
> {{create table hudi_test_wm_mor_02 (}}
> {{ }}{{id int,}}
> {{ }}{{name string,}}
> {{ }}{{price double,}}
> {{ }}{{ts bigint,}}
> {{ }}{{dt string}}
> {{) using hudi}}
> {{tblproperties (}}
> {{ }}{{type}} {{= }}{{{}'mor'{}}}{{{},{}}}
> {{ }}{{primaryKey = }}{{{}'id'{}}}{{{},{}}}
> {{ }}{{preCombineField = }}{{'ts'}}
> {{)}}
> {{partitioned by (dt);}}
>
> -- insert some data
> {{{}insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values
> (12,{}}}{{{}'a12'{}}}{{{},23.234,1648871782,{}}}{{{}'2021-12-11'{}}}{{{});{}}}
> {{{}insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values
> (13,{}}}{{{}'a13'{}}}{{{},24.234,1648871783,{}}}{{{}'2021-12-12'{}}}{{{});{}}}
> {{{}insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values
> (14,{}}}{{{}'a14'{}}}{{{},25.234,1648871784,{}}}{{{}'2021-12-13'{}}}{{{});{}}}
> {{{}insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values
> (15,{}}}{{{}'a15'{}}}{{{},26.234,1648871785,{}}}{{{}'2021-12-14'{}}}{{{});{}}}
> {{{}insert into hudi_test_wm_mor_01 (id,name,price,ts,dt) values
> (16,{}}}{{{}'a16'{}}}{{{},27.234,1648871786,{}}}{{{}'2021-12-15'{}}}{{{});{}}}
> {{{}{}}}{{{}{}}}
>
> {{{}insert into hudi_test_wm_mor_02 (id,name,price,ts,dt) values
> (12,{}}}{{{}'target12'{}}}{{{},88.1,1648871782,{}}}{{{}'2021-12-11'{}}}{{{});{}}}
> {{{}insert into hudi_test_wm_mor_02 (id,name,price,ts,dt) values
> (13,{}}}{{{}'target13'{}}}{{{},89.1,1648871783,{}}}{{{}'2021-12-12'{}}}{{{});{}}}
>
>
> --merge operation
> {{merge into hudi_test_wm_mor_02 h0}}
> {{using (}}
> {{ }}{{select}} {{id, name, price, ts, dt from hudi_test_wm_mor_01}}
> {{ }}{{) s0}}
> {{ }}{{on h0.id = s0.id and h0.dt = s0.dt}}
> {{ }}{{when matched then update }}{{set}} {{* ;}}
>
> Description:
> After the merge sql executes, five partitions are created in the target table
> (2021-12-11, 2021-12-12, 2021-12-13, 2021-12-14, 2021-12-15).
> Actually only two partitions of the data match, creating two partitions as
> expected (2021-12-11, 2021-12-12)
> The remaining 3 partitions should not be created (2021-12-13, 2021-12-14,
> 2021-12-15).
> In extreme cases, a very large number of empty partitions are created in the
> target table
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)