linliu-code commented on issue #13680:
URL: https://github.com/apache/hudi/issues/13680#issuecomment-3160388655
@mansipp , I followed your scrip but using the vanilla hudi 1.0.2, which
shows the newly added data correctly.
`export SPARK_VERSION=3.5 # or 3.4, 3.3
spark-shell --packages
org.apache.hudi:hudi-spark$SPARK_VERSION-bundle_2.12:1.0.2 \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
--conf
'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog'
\
--conf
'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \
--conf 'spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar'`
`scala> import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.DataSourceWriteOptions
scala> import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.SaveMode
scala> val df1 = Seq(
| ("100", "2015-01-01", "event_name_900",
"2015-01-01T13:51:39.340396Z", "type1"),
| ("101", "2015-01-01", "event_name_546",
"2015-01-01T12:14:58.597216Z", "type2"),
| ("102", "2015-01-01", "event_name_345",
"2015-01-01T13:51:40.417052Z", "type3"),
| ("103", "2015-01-01", "event_name_234",
"2015-01-01T13:51:40.519832Z", "type4"),
| ("104", "2015-01-01", "event_name_123",
"2015-01-01T12:15:00.512679Z", "type1"),
| ("105", "2015-01-01", "event_name_678",
"2015-01-01T13:51:42.248818Z", "type2"),
| ("106", "2015-01-01", "event_name_890",
"2015-01-01T13:51:44.735360Z", "type3"),
| ("107", "2015-01-01", "event_name_944",
"2015-01-01T13:51:45.019544Z", "type4"),
| ("108", "2015-01-01", "event_name_456",
"2015-01-01T13:51:45.208007Z", "type1"),
| ("109", "2015-01-01", "event_name_567",
"2015-01-01T13:51:45.369689Z", "type2"),
| ("110", "2015-01-01", "event_name_789",
"2015-01-01T12:15:05.664947Z", "type3"),
| ("111", "2015-01-01", "event_name_322",
"2015-01-01T13:51:47.388239Z", "type4")
| ).toDF("event_id", "event_date", "event_name", "event_ts",
"event_type")
25/08/06 07:15:38 WARN DFSPropertiesConfiguration: Properties file
file:/etc/hudi/conf/hudi-defaults.conf not found. Ignoring to load props file
25/08/06 07:15:38 WARN DFSPropertiesConfiguration: Cannot find
HUDI_CONF_DIR, please set it as the dir of hudi-defaults.conf
df1: org.apache.spark.sql.DataFrame = [event_id: string, event_date: string
... 3 more fields]
scala> var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
tableName: String = mansipp_hudi_102_cow_fta_write_lf_table_update_test
scala> var tablePath =
/tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test
<console>:24: error: not found: value /
var tablePath =
/tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test
^
<console>:24: error: not found: value /
var tablePath =
/tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test
^
scala> var tablePath =
"/tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test"
tablePath: String = /tmp/mansipp_hudi_102_cow_fta_write_lf_table_update_test
scala> df1.write.format("hudi")
res0: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.metadata.enable", "true")
res1: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.table.name", tableName)
res2: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.database.name", "default")
res3: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.write.operation", "upsert")
res4: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
res5: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.write.recordkey.field",
"event_id,event_date")
res6: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.write.partitionpath.field", "event_type")
res7: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.write.precombine.field", "event_ts")
res8: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")
res9: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.hive_sync.enable", "true")
res10: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.meta.sync.enable", "true")
res11: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.index.type", "GLOBAL_BLOOM")
res12: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.hive_sync.mode", "hms")
res13: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.hive_sync.database", "default")
res14: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.hive_sync.table", tableName)
res15: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.hive_sync.partition_fields", "event_type")
res16: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
res17: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .mode(SaveMode.Overwrite)
res18: org.apache.spark.sql.DataFrameWriter[org.apache.spark.sql.Row] =
org.apache.spark.sql.DataFrameWriter@d6ee83b
scala> .save(tablePath)
25/08/06 07:16:33 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout
does not exist
25/08/06 07:16:33 WARN HiveConf: HiveConf of name hive.stats.retries.wait
does not exist
25/08/06 07:16:36 WARN ObjectStore: Version information not found in
metastore. hive.metastore.schema.verification is not enabled so recording the
schema version 2.3.0
25/08/06 07:16:36 WARN ObjectStore: setMetaStoreSchemaVersion called but
recording version is disabled: version = 2.3.0, comment = Set by MetaStore
[email protected]
25/08/06 07:16:42 WARN MetricsConfig: Cannot locate configuration: tried
hadoop-metrics2-hbase.properties,hadoop-metrics2.properties
# WARNING: Unable to attach Serviceability Agent. Unable to attach even with
module exceptions: [org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException:
Sense failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense
failed., org.apache.hudi.org.openjdk.jol.vm.sa.SASupportException: Sense
failed.]
25/08/06 07:16:47 WARN HoodieBloomIndex: fallback to loading column ranges
from files
25/08/06 07:16:55 WARN log: Updating partition stats fast for:
mansipp_hudi_102_cow_fta_write_lf_table_update_test
25/08/06 07:16:55 WARN log: Updated size to 436099
25/08/06 07:16:55 WARN log: Updating partition stats fast for:
mansipp_hudi_102_cow_fta_write_lf_table_update_test
25/08/06 07:16:55 WARN log: Updated size to 436097
25/08/06 07:16:55 WARN log: Updating partition stats fast for:
mansipp_hudi_102_cow_fta_write_lf_table_update_test
25/08/06 07:16:55 WARN log: Updated size to 436098
25/08/06 07:16:55 WARN log: Updating partition stats fast for:
mansipp_hudi_102_cow_fta_write_lf_table_update_test
25/08/06 07:16:55 WARN log: Updated size to 436090
25/08/06 07:16:56 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout
does not exist
25/08/06 07:16:56 WARN HiveConf: HiveConf of name hive.stats.retries.wait
does not exist
25/08/06 07:16:58 WARN ObjectStore: Failed to get database global_temp,
returning NoSuchObjectException
scala> spark.sql("select * from
mansipp_hudi_102_cow_fta_write_lf_table_update_test order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250806071637700|20250806071637700...|event_id:100,even...|
type1|b282bd3f-41e6-4df...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250806071637700|20250806071637700...|event_id:101,even...|
type2|2677a131-5502-4b6...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250806071637700|20250806071637700...|event_id:102,even...|
type3|1d8fd795-117f-40f...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250806071637700|20250806071637700...|event_id:103,even...|
type4|48336739-a555-415...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250806071637700|20250806071637700...|event_id:104,even...|
type1|b282bd3f-41e6-4df...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250806071637700|20250806071637700...|event_id:105,even...|
type2|2677a131-5502-4b6...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250806071637700|20250806071637700...|event_id:106,even...|
type3|1d8fd795-117f-40f...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250806071637700|20250806071637700...|event_id:107,even...|
type4|48336739-a555-415...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250806071637700|20250806071637700...|event_id:108,even...|
type1|b282bd3f-41e6-4df...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250806071637700|20250806071637700...|event_id:109,even...|
type2|2677a131-5502-4b6...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250806071637700|20250806071637700...|event_id:110,even...|
type3|1d8fd795-117f-40f...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250806071637700|20250806071637700...|event_id:111,even...|
type4|48336739-a555-415...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
scala> spark.sql("INSERT INTO
mansipp_hudi_102_cow_fta_write_lf_table_update_test (event_id, event_date,
event_name, event_ts, event_type) VALUES('112', DATE('2015-01-01'),
'event_name_123', TIMESTAMP('2015-01-01 13:51:45'), 'type5')")
25/08/06 07:17:23 WARN HoodieTableFileSystemView: Partition: type5 is not
available in store
25/08/06 07:17:23 WARN HoodieTableFileSystemView: Partition: type5 is not
available in store
25/08/06 07:17:26 WARN log: Updating partition stats fast for:
mansipp_hudi_102_cow_fta_write_lf_table_update_test
25/08/06 07:17:26 WARN log: Updated size to 435924
25/08/06 07:17:26 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout
does not exist
25/08/06 07:17:26 WARN HiveConf: HiveConf of name hive.stats.retries.wait
does not exist
res21: org.apache.spark.sql.DataFrame = []
scala>
scala> spark.sql("select * from
mansipp_hudi_102_cow_fta_write_lf_table_update_test order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250806071637700|20250806071637700...|event_id:100,even...|
type1|b282bd3f-41e6-4df...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250806071637700|20250806071637700...|event_id:101,even...|
type2|2677a131-5502-4b6...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250806071637700|20250806071637700...|event_id:102,even...|
type3|1d8fd795-117f-40f...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250806071637700|20250806071637700...|event_id:103,even...|
type4|48336739-a555-415...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250806071637700|20250806071637700...|event_id:104,even...|
type1|b282bd3f-41e6-4df...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250806071637700|20250806071637700...|event_id:105,even...|
type2|2677a131-5502-4b6...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250806071637700|20250806071637700...|event_id:106,even...|
type3|1d8fd795-117f-40f...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250806071637700|20250806071637700...|event_id:107,even...|
type4|48336739-a555-415...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250806071637700|20250806071637700...|event_id:108,even...|
type1|b282bd3f-41e6-4df...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250806071637700|20250806071637700...|event_id:109,even...|
type2|2677a131-5502-4b6...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250806071637700|20250806071637700...|event_id:110,even...|
type3|1d8fd795-117f-40f...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250806071637700|20250806071637700...|event_id:111,even...|
type4|48336739-a555-415...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
| 20250806071722764|20250806071722764...|event_id:112,even...|
type5|2566b8a7-59cb-485...| 112|2015-01-01|event_name_123| 2015-01-01
13:51:45| type5|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]