ad1happy2go commented on issue #9469:
URL: https://github.com/apache/hudi/issues/9469#issuecomment-1686194536
@praneethh I tried the similar stuff using spark-sql. It gave me two records
only. below if the code I used -
```
CREATE TABLE issue_9469_23 USING HUDI
PARTITIONED BY(log_dt)
tblproperties (
"hoodie.payload.ordering.field"="load_ts",
"hoodie.datasource.write.recordkey.field"="emp_id",
"hoodie.datasource.write.partitionpath.field"="log_dt",
"hoodie.datasource.write.precombine.field"="load_ts",
"hoodie.index.type"="GLOBAL_SIMPLE",
"hoodie.simple.index.update.partition.path"="true",
"hoodie.datasource.write.payload.class"="org.apache.hudi.common.model.PartialUpdateAvroPayload",
"hoodie.datasource.write.reconcile.schema"="true",
"hoodie.schema.on.read.enable"="true",
"hoodie.datasource.write.hive_style_partitioning"="true",
"hoodie.datasource.write.row.writer.enable"="false"
) AS SELECT 1 as emp_id, 'neo' as emp_name, cast('2023-08-04 12:00:00' as
timestamp) as log_ts, cast('2023-08-04 12:00:00' as timestamp) as load_ts,
cast('2023-08-04' as date) as log_dt;
merge into issue_9469_23 as target using ( SELECT 1 as emp_id, 'neo_1' as
emp_name, cast('2023-08-05 12:00:00' as timestamp) as log_ts, cast('2023-08-05
12:00:00' as timestamp) as load_ts, cast('2023-08-05' as date) as log_dt
UNION
SELECT 2 as emp_id, 'trinity' as emp_name, cast('2023-08-05
14:00:00' as timestamp) as log_ts, cast('2023-08-05 15:00:00' as timestamp) as
load_ts, cast('2023-08-05' as date) as log_dt
) source on target.emp_id = source.emp_id when matched then
update set target.log_ts = source.load_ts, target.log_ts = source.log_dt,
target.load_ts = source.load_ts
when not matched then insert *;
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]