linliu-code commented on issue #13680:
URL: https://github.com/apache/hudi/issues/13680#issuecomment-3160388655

   @mansipp , I followed your scrip but using the vanilla hudi 1.0.2, which 
shows the newly added data correctly. 
   
   `export SPARK_VERSION=3.5 # or 3.4, 3.3
   spark-shell --packages 
org.apache.hudi:hudi-spark$SPARK_VERSION-bundle_2.12:1.0.2 \
   --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
   --conf 
'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog'
 \
   --conf 
'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \
   --conf 'spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar'`
   
   
   `scala> import org.apache.hudi.DataSourceWriteOptions
   import org.apache.hudi.DataSourceWriteOptions
   
   scala> import org.apache.spark.sql.SaveMode
   import org.apache.spark.sql.SaveMode
   
   scala> val df1 = Seq(
        |  ("100", "2015-01-01", "event_name_900", 
"2015-01-01T13:51:39.340396Z", "type1"),
        |  ("101", "2015-01-01", "event_name_546", 
"2015-01-01T12:14:58.597216Z", "type2"),
        |  ("102", "2015-01-01", "event_name_345", 
"2015-01-01T13:51:40.417052Z", "type3"),
        |  ("103", "2015-01-01", "event_name_234", 
"2015-01-01T13:51:40.519832Z", "type4"),
        |  ("104", "2015-01-01", "event_name_123", 
"2015-01-01T12:15:00.512679Z", "type1"),
        |  ("105", "2015-01-01", "event_name_678", 
"2015-01-01T13:51:42.248818Z", "type2"),
        |  ("106", "2015-01-01", "event_name_890", 
"2015-01-01T13:51:44.735360Z", "type3"),
        |  ("107", "2015-01-01", "event_name_944", 
"2015-01-01T13:51:45.019544Z", "type4"),
        |  ("108", "2015-01-01", "event_name_456", 
"2015-01-01T13:51:45.208007Z", "type1"),
        |  ("109", "2015-01-01", "event_name_567", 
"2015-01-01T13:51:45.369689Z", "type2"),
        |  ("110", "2015-01-01", "event_name_789", 
"2015-01-01T12:15:05.664947Z", "type3"),
        |  ("111", "2015-01-01", "event_name_322", 
"2015-01-01T13:51:47.388239Z", "type4")
        |  ).toDF("event_id", "event_date", "event_name", "event_ts", 
"event_type")
   25/08/06 07:15:38 WARN DFSPropertiesConfiguration: Properties file 
file:/etc/hudi/conf/hudi-defaults.conf not found. Ignoring to load props file
   25/08/06 07:15:38 WARN DFSPropertiesConfiguration: Cannot find 
HUDI_CONF_DIR, please set it as the dir of hudi-defaults.conf
   df1: org.apache.spark.sql.DataFrame = [event_id: string, event_date: string 
... 3 more fields]
   
   scala> var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
   tableName: String = mansipp_hudi_102_cow_fta_write_lf_table_update_test
   
   scala> var tablePath = 
/tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test
   <console>:24: error: not found: value /
          var tablePath = 
/tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test
                          ^
   <console>:24: error: not found: value /
          var tablePath = 
/tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test
                              ^
   
   scala> var tablePath = 
"/tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test"
   tablePath: String = /tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test
   
   scala> df1.write.format("hudi")
   res0: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.metadata.enable", "true")
   res1: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.table.name", tableName)
   res2: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.database.name", "default")
   res3: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.write.operation", "upsert")
   res4: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
   res5: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.write.recordkey.field", 
"event_id,event_date")
   res6: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.write.partitionpath.field", "event_type")
   res7: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.write.precombine.field", "event_ts")
   res8: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.write.keygenerator.class", 
"org.apache.hudi.keygen.ComplexKeyGenerator")
   res9: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.hive_sync.enable", "true")
   res10: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.meta.sync.enable", "true")
   res11: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.index.type", "GLOBAL_BLOOM")
   res12: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.hive_sync.mode", "hms")
   res13: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.hive_sync.database", "default")
   res14: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.hive_sync.table", tableName)
   res15: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.hive_sync.partition_fields", "event_type")
   res16: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .option("hoodie.datasource.hive_sync.partition_extractor_class", 
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
   res17: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .mode(SaveMode.Overwrite)
   res18: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] = 
org.apache.spark.sql.DataFrameWriter@d6ee83b
   
   scala>  .save(tablePath)
   25/08/06 07:16:33 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout 
does not exist
   25/08/06 07:16:33 WARN HiveConf: HiveConf of name hive.stats.retries.wait 
does not exist
   25/08/06 07:16:36 WARN ObjectStore: Version information not found in 
metastore. hive.metastore.schema.verification is not enabled so recording the 
schema version 2.3.0
   25/08/06 07:16:36 WARN ObjectStore: setMetaStoreSchemaVersion called but 
recording version is disabled: version = 2.3.0, comment = Set by MetaStore 
[email protected]
   25/08/06 07:16:42 WARN MetricsConfig: Cannot locate configuration: tried 
hadoop-metrics2-hbase.properties,hadoop-metrics2.properties
   # WARNING: Unable to attach Serviceability Agent. Unable to attach even with 
module exceptions: [org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: 
Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense 
failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense 
failed.]
   25/08/06 07:16:47 WARN HoodieBloomIndex: fallback to loading column ranges 
from files
   25/08/06 07:16:55 WARN log: Updating partition stats fast for: 
mansipp_hudi_102_cow_fta_write_lf_table_update_test
   25/08/06 07:16:55 WARN log: Updated size to 436099
   25/08/06 07:16:55 WARN log: Updating partition stats fast for: 
mansipp_hudi_102_cow_fta_write_lf_table_update_test
   25/08/06 07:16:55 WARN log: Updated size to 436097
   25/08/06 07:16:55 WARN log: Updating partition stats fast for: 
mansipp_hudi_102_cow_fta_write_lf_table_update_test
   25/08/06 07:16:55 WARN log: Updated size to 436098
   25/08/06 07:16:55 WARN log: Updating partition stats fast for: 
mansipp_hudi_102_cow_fta_write_lf_table_update_test
   25/08/06 07:16:55 WARN log: Updated size to 436090
   25/08/06 07:16:56 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout 
does not exist
   25/08/06 07:16:56 WARN HiveConf: HiveConf of name hive.stats.retries.wait 
does not exist
   25/08/06 07:16:58 WARN ObjectStore: Failed to get database global_temp, 
returning NoSuchObjectException
   
   scala> spark.sql("select * from 
mansipp_hudi_102_cow_fta_write_lf_table_update_test order by event_id").show();
   
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
   |_hoodie_commit_time|_hoodie_commit_seqno|  
_hoodie_record_key|_hoodie_partition_path|   
_hoodie_file_name|event_id|event_date|    event_name|            
event_ts|event_type|
   
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
   |  20250806071637700|20250806071637700...|event_id:100,even...|              
   type1|b282bd3f-41e6-4df...|     
100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
   |  20250806071637700|20250806071637700...|event_id:101,even...|              
   type2|2677a131-5502-4b6...|     
101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
   |  20250806071637700|20250806071637700...|event_id:102,even...|              
   type3|1d8fd795-117f-40f...|     
102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
   |  20250806071637700|20250806071637700...|event_id:103,even...|              
   type4|48336739-a555-415...|     
103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
   |  20250806071637700|20250806071637700...|event_id:104,even...|              
   type1|b282bd3f-41e6-4df...|     
104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
   |  20250806071637700|20250806071637700...|event_id:105,even...|              
   type2|2677a131-5502-4b6...|     
105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
   |  20250806071637700|20250806071637700...|event_id:106,even...|              
   type3|1d8fd795-117f-40f...|     
106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
   |  20250806071637700|20250806071637700...|event_id:107,even...|              
   type4|48336739-a555-415...|     
107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
   |  20250806071637700|20250806071637700...|event_id:108,even...|              
   type1|b282bd3f-41e6-4df...|     
108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
   |  20250806071637700|20250806071637700...|event_id:109,even...|              
   type2|2677a131-5502-4b6...|     
109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
   |  20250806071637700|20250806071637700...|event_id:110,even...|              
   type3|1d8fd795-117f-40f...|     
110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
   |  20250806071637700|20250806071637700...|event_id:111,even...|              
   type4|48336739-a555-415...|     
111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
   
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
   
   
   scala> spark.sql("INSERT INTO 
mansipp_hudi_102_cow_fta_write_lf_table_update_test (event_id, event_date, 
event_name, event_ts, event_type) VALUES('112', DATE('2015-01-01'), 
'event_name_123', TIMESTAMP('2015-01-01 13:51:45'), 'type5')")
   25/08/06 07:17:23 WARN HoodieTableFileSystemView: Partition: type5 is not 
available in store
   25/08/06 07:17:23 WARN HoodieTableFileSystemView: Partition: type5 is not 
available in store
   25/08/06 07:17:26 WARN log: Updating partition stats fast for: 
mansipp_hudi_102_cow_fta_write_lf_table_update_test
   25/08/06 07:17:26 WARN log: Updated size to 435924
   25/08/06 07:17:26 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout 
does not exist
   25/08/06 07:17:26 WARN HiveConf: HiveConf of name hive.stats.retries.wait 
does not exist
   res21: org.apache.spark.sql.DataFrame = []
   
   scala>
   
   scala> spark.sql("select * from 
mansipp_hudi_102_cow_fta_write_lf_table_update_test order by event_id").show();
   
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
   |_hoodie_commit_time|_hoodie_commit_seqno|  
_hoodie_record_key|_hoodie_partition_path|   
_hoodie_file_name|event_id|event_date|    event_name|            
event_ts|event_type|
   
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
   |  20250806071637700|20250806071637700...|event_id:100,even...|              
   type1|b282bd3f-41e6-4df...|     
100|2015-01-01|event_name_900|2015-01-01T13:51:...|     type1|
   |  20250806071637700|20250806071637700...|event_id:101,even...|              
   type2|2677a131-5502-4b6...|     
101|2015-01-01|event_name_546|2015-01-01T12:14:...|     type2|
   |  20250806071637700|20250806071637700...|event_id:102,even...|              
   type3|1d8fd795-117f-40f...|     
102|2015-01-01|event_name_345|2015-01-01T13:51:...|     type3|
   |  20250806071637700|20250806071637700...|event_id:103,even...|              
   type4|48336739-a555-415...|     
103|2015-01-01|event_name_234|2015-01-01T13:51:...|     type4|
   |  20250806071637700|20250806071637700...|event_id:104,even...|              
   type1|b282bd3f-41e6-4df...|     
104|2015-01-01|event_name_123|2015-01-01T12:15:...|     type1|
   |  20250806071637700|20250806071637700...|event_id:105,even...|              
   type2|2677a131-5502-4b6...|     
105|2015-01-01|event_name_678|2015-01-01T13:51:...|     type2|
   |  20250806071637700|20250806071637700...|event_id:106,even...|              
   type3|1d8fd795-117f-40f...|     
106|2015-01-01|event_name_890|2015-01-01T13:51:...|     type3|
   |  20250806071637700|20250806071637700...|event_id:107,even...|              
   type4|48336739-a555-415...|     
107|2015-01-01|event_name_944|2015-01-01T13:51:...|     type4|
   |  20250806071637700|20250806071637700...|event_id:108,even...|              
   type1|b282bd3f-41e6-4df...|     
108|2015-01-01|event_name_456|2015-01-01T13:51:...|     type1|
   |  20250806071637700|20250806071637700...|event_id:109,even...|              
   type2|2677a131-5502-4b6...|     
109|2015-01-01|event_name_567|2015-01-01T13:51:...|     type2|
   |  20250806071637700|20250806071637700...|event_id:110,even...|              
   type3|1d8fd795-117f-40f...|     
110|2015-01-01|event_name_789|2015-01-01T12:15:...|     type3|
   |  20250806071637700|20250806071637700...|event_id:111,even...|              
   type4|48336739-a555-415...|     
111|2015-01-01|event_name_322|2015-01-01T13:51:...|     type4|
   |  20250806071722764|20250806071722764...|event_id:112,even...|              
   type5|2566b8a7-59cb-485...|     112|2015-01-01|event_name_123| 2015-01-01 
13:51:45|     type5|
   
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
   
   `
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to