deepakpanda93 commented on issue #13680:
URL: https://github.com/apache/hudi/issues/13680#issuecomment-3569330006
Hello @mansipp
I just tested with Hudi 1.0.2 open source bundle and not able to reproduce
the issue.
```
scala> import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.DataSourceWriteOptions
scala> import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.SaveMode
scala> val tableName = "hudi_issue_13680"
tableName: String = hudi_issue_13680
scala> var tablePath = "file:///tmp/hudi_issue_13680"
tablePath: String = file:///tmp/hudi_issue_13680
scala> val df1 = Seq(
| ("100", "2015-01-01", "event_name_900",
"2015-01-01T13:51:39.340396Z", "type1"),
| ("101", "2015-01-01", "event_name_546",
"2015-01-01T12:14:58.597216Z", "type2"),
| ("102", "2015-01-01", "event_name_345",
"2015-01-01T13:51:40.417052Z", "type3"),
| ("103", "2015-01-01", "event_name_234",
"2015-01-01T13:51:40.519832Z", "type4"),
| ("104", "2015-01-01", "event_name_123",
"2015-01-01T12:15:00.512679Z", "type1"),
| ("105", "2015-01-01", "event_name_678",
"2015-01-01T13:51:42.248818Z", "type2"),
| ("106", "2015-01-01", "event_name_890",
"2015-01-01T13:51:44.735360Z", "type3"),
| ("107", "2015-01-01", "event_name_944",
"2015-01-01T13:51:45.019544Z", "type4"),
| ("108", "2015-01-01", "event_name_456",
"2015-01-01T13:51:45.208007Z", "type1"),
| ("109", "2015-01-01", "event_name_567",
"2015-01-01T13:51:45.369689Z", "type2"),
| ("110", "2015-01-01", "event_name_789",
"2015-01-01T12:15:05.664947Z", "type3"),
| ("111", "2015-01-01", "event_name_322",
"2015-01-01T13:51:47.388239Z", "type4")
| ).toDF("event_id", "event_date", "event_name", "event_ts",
"event_type")
25/11/24 07:21:49 WARN MetricsConfig: Cannot locate configuration: tried
hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties
df1: org.apache.spark.sql.DataFrame = [event_id: string, event_date: string
... 3 more fields]
scala> :paste
// Entering paste mode (ctrl-D to finish)
df1.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.index.type", "GLOBAL_BLOOM")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Overwrite)
.save(tablePath)
// Exiting paste mode, now interpreting.
25/11/24 07:22:09 WARN MetricsConfig: Cannot locate configuration: tried
hadoop-metrics2-hbase.properties,hadoop-metrics2.properties
# WARNING: Unable to get Instrumentation. Dynamic Attach failed. You may add
this JAR as -javaagent manually, or supply -Djdk.attach.allowAttachSelf
# WARNING: Unable to attach Serviceability Agent.
sun.jvm.hotspot.memory.Universe.getNarrowOopBase()
25/11/24 07:22:11 WARN HoodieBloomIndex: fallback to loading column ranges
from files
scala> spark.sql("select * from hudi_issue_13680 order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20251124072209151|20251124072209151...|event_id:100,even...|
type1|91dd24f5-7abd-4f2...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20251124072209151|20251124072209151...|event_id:101,even...|
type2|24a420cc-ab4b-4eb...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20251124072209151|20251124072209151...|event_id:102,even...|
type3|11828ac2-24b2-437...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20251124072209151|20251124072209151...|event_id:103,even...|
type4|79c3f13f-a5ec-4d3...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20251124072209151|20251124072209151...|event_id:104,even...|
type1|91dd24f5-7abd-4f2...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20251124072209151|20251124072209151...|event_id:105,even...|
type2|24a420cc-ab4b-4eb...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20251124072209151|20251124072209151...|event_id:106,even...|
type3|11828ac2-24b2-437...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20251124072209151|20251124072209151...|event_id:107,even...|
type4|79c3f13f-a5ec-4d3...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20251124072209151|20251124072209151...|event_id:108,even...|
type1|91dd24f5-7abd-4f2...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20251124072209151|20251124072209151...|event_id:109,even...|
type2|24a420cc-ab4b-4eb...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20251124072209151|20251124072209151...|event_id:110,even...|
type3|11828ac2-24b2-437...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20251124072209151|20251124072209151...|event_id:111,even...|
type4|79c3f13f-a5ec-4d3...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
scala> spark.sql("INSERT INTO hudi_issue_13680 (event_id, event_date,
event_name, event_ts, event_type) VALUES('112', DATE('2015-01-01'),
'event_name_123', TIMESTAMP('2015-01-01 13:51:45'), 'type5')")
25/11/24 07:22:27 WARN HoodieTableFileSystemView: Partition: type5 is not
available in store
25/11/24 07:22:27 WARN HoodieTableFileSystemView: Partition: type5 is not
available in store
res2: org.apache.spark.sql.DataFrame = []
scala> spark.sql("select * from hudi_issue_13680 order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20251124072209151|20251124072209151...|event_id:100,even...|
type1|91dd24f5-7abd-4f2...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20251124072209151|20251124072209151...|event_id:101,even...|
type2|24a420cc-ab4b-4eb...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20251124072209151|20251124072209151...|event_id:102,even...|
type3|11828ac2-24b2-437...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20251124072209151|20251124072209151...|event_id:103,even...|
type4|79c3f13f-a5ec-4d3...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20251124072209151|20251124072209151...|event_id:104,even...|
type1|91dd24f5-7abd-4f2...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20251124072209151|20251124072209151...|event_id:105,even...|
type2|24a420cc-ab4b-4eb...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20251124072209151|20251124072209151...|event_id:106,even...|
type3|11828ac2-24b2-437...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20251124072209151|20251124072209151...|event_id:107,even...|
type4|79c3f13f-a5ec-4d3...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20251124072209151|20251124072209151...|event_id:108,even...|
type1|91dd24f5-7abd-4f2...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20251124072209151|20251124072209151...|event_id:109,even...|
type2|24a420cc-ab4b-4eb...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20251124072209151|20251124072209151...|event_id:110,even...|
type3|11828ac2-24b2-437...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20251124072209151|20251124072209151...|event_id:111,even...|
type4|79c3f13f-a5ec-4d3...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
| 20251124072227182|20251124072227182...|event_id:112,even...|
type5|44c71743-983e-4fc...| 112|2015-01-01|event_name_123| 2015-01-01
13:51:45| type5|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]