rangareddy commented on issue #13680:
URL: https://github.com/apache/hudi/issues/13680#issuecomment-3451980096
Hi @mansipp
Re-testing today confirms I cannot reproduce the issue. Since I've lost the
previous reproduction steps, could you please retry the operation, starting
with the open source Hudi Spark bundle?
```scala
scala> import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.DataSourceWriteOptions
scala> import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.SaveMode
scala> val df1 = Seq(
| ("100", "2015-01-01", "event_name_900",
"2015-01-01T13:51:39.340396Z", "type1"),
| ("101", "2015-01-01", "event_name_546",
"2015-01-01T12:14:58.597216Z", "type2"),
| ("102", "2015-01-01", "event_name_345",
"2015-01-01T13:51:40.417052Z", "type3"),
| ("103", "2015-01-01", "event_name_234",
"2015-01-01T13:51:40.519832Z", "type4"),
| ("104", "2015-01-01", "event_name_123",
"2015-01-01T12:15:00.512679Z", "type1"),
| ("105", "2015-01-01", "event_name_678",
"2015-01-01T13:51:42.248818Z", "type2"),
| ("106", "2015-01-01", "event_name_890",
"2015-01-01T13:51:44.735360Z", "type3"),
| ("107", "2015-01-01", "event_name_944",
"2015-01-01T13:51:45.019544Z", "type4"),
| ("108", "2015-01-01", "event_name_456",
"2015-01-01T13:51:45.208007Z", "type1"),
| ("109", "2015-01-01", "event_name_567",
"2015-01-01T13:51:45.369689Z", "type2"),
| ("110", "2015-01-01", "event_name_789",
"2015-01-01T12:15:05.664947Z", "type3"),
| ("111", "2015-01-01", "event_name_322",
"2015-01-01T13:51:47.388239Z", "type4")
| ).toDF("event_id", "event_date", "event_name", "event_ts",
"event_type")
25/10/27 21:00:46 WARN DFSPropertiesConfiguration: Properties file
file:/etc/hudi/conf/hudi-defaults.conf not found. Ignoring to load props file
25/10/27 21:00:46 WARN DFSPropertiesConfiguration: Cannot find
HUDI_CONF_DIR, please set it as the dir of hudi-defaults.conf
df1: org.apache.spark.sql.DataFrame = [event_id: string, event_date: string
... 3 more fields]
scala> :paste
// Entering paste mode (ctrl-D to finish)
df1.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.index.type", "GLOBAL_BLOOM")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Overwrite)
.save(tablePath)
scala> spark.sql("select * from hudi_issue_13680 order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20251027210113487|20251027210113487...|event_id:100,even...|
type1|18755352-0d8f-463...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20251027210113487|20251027210113487...|event_id:101,even...|
type2|bf9cbd75-c800-40f...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20251027210113487|20251027210113487...|event_id:102,even...|
type3|1f11a59f-ba60-418...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20251027210113487|20251027210113487...|event_id:103,even...|
type4|ee267944-d497-45f...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20251027210113487|20251027210113487...|event_id:104,even...|
type1|18755352-0d8f-463...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20251027210113487|20251027210113487...|event_id:105,even...|
type2|bf9cbd75-c800-40f...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20251027210113487|20251027210113487...|event_id:106,even...|
type3|1f11a59f-ba60-418...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20251027210113487|20251027210113487...|event_id:107,even...|
type4|ee267944-d497-45f...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20251027210113487|20251027210113487...|event_id:108,even...|
type1|18755352-0d8f-463...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20251027210113487|20251027210113487...|event_id:109,even...|
type2|bf9cbd75-c800-40f...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20251027210113487|20251027210113487...|event_id:110,even...|
type3|1f11a59f-ba60-418...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20251027210113487|20251027210113487...|event_id:111,even...|
type4|ee267944-d497-45f...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
scala> spark.sql("INSERT INTO hudi_issue_13680 (event_id, event_date,
event_name, event_ts, event_type) VALUES('112', DATE('2015-01-01'),
'event_name_123', TIMESTAMP('2015-01-01 13:51:45'), 'type5')")
25/10/27 21:03:17 WARN HoodieTableFileSystemView: Partition: type5 is not
available in store
25/10/27 21:03:17 WARN HoodieTableFileSystemView: Partition: type5 is not
available in store
res5: org.apache.spark.sql.DataFrame = []
scala> spark.sql("select * from hudi_issue_13680 order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20251027210113487|20251027210113487...|event_id:100,even...|
type1|18755352-0d8f-463...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20251027210113487|20251027210113487...|event_id:101,even...|
type2|bf9cbd75-c800-40f...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20251027210113487|20251027210113487...|event_id:102,even...|
type3|1f11a59f-ba60-418...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20251027210113487|20251027210113487...|event_id:103,even...|
type4|ee267944-d497-45f...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20251027210113487|20251027210113487...|event_id:104,even...|
type1|18755352-0d8f-463...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20251027210113487|20251027210113487...|event_id:105,even...|
type2|bf9cbd75-c800-40f...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20251027210113487|20251027210113487...|event_id:106,even...|
type3|1f11a59f-ba60-418...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20251027210113487|20251027210113487...|event_id:107,even...|
type4|ee267944-d497-45f...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20251027210113487|20251027210113487...|event_id:108,even...|
type1|18755352-0d8f-463...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20251027210113487|20251027210113487...|event_id:109,even...|
type2|bf9cbd75-c800-40f...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20251027210113487|20251027210113487...|event_id:110,even...|
type3|1f11a59f-ba60-418...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20251027210113487|20251027210113487...|event_id:111,even...|
type4|ee267944-d497-45f...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
| 20251027210316907|20251027210316907...|event_id:112,even...|
type5|3a3e838b-7b34-494...| 112|2015-01-01|event_name_123| 2015-01-01
13:51:45| type5|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]