[
https://issues.apache.org/jira/browse/HUDI-9690?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Mansi Patel updated HUDI-9690:
------------------------------
Description:
Hudi 1.0.2 SQL Global_Bloom index not returning newly inserted data.
{code:java}
spark-shell --jars
/home/hadoop/hudi-spark3-bundle_2.12-1.0.2-amzn-0-SNAPSHOT.jar \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
\
--conf
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"
import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode
val df1 = Seq(
("100", "2015-01-01", "event_name_900", "2015-01-01T13:51:39.340396Z",
"type1"),
("101", "2015-01-01", "event_name_546", "2015-01-01T12:14:58.597216Z",
"type2"),
("102", "2015-01-01", "event_name_345", "2015-01-01T13:51:40.417052Z",
"type3"),
("103", "2015-01-01", "event_name_234", "2015-01-01T13:51:40.519832Z",
"type4"),
("104", "2015-01-01", "event_name_123", "2015-01-01T12:15:00.512679Z",
"type1"),
("105", "2015-01-01", "event_name_678", "2015-01-01T13:51:42.248818Z",
"type2"),
("106", "2015-01-01", "event_name_890", "2015-01-01T13:51:44.735360Z",
"type3"),
("107", "2015-01-01", "event_name_944", "2015-01-01T13:51:45.019544Z",
"type4"),
("108", "2015-01-01", "event_name_456", "2015-01-01T13:51:45.208007Z",
"type1"),
("109", "2015-01-01", "event_name_567", "2015-01-01T13:51:45.369689Z",
"type2"),
("110", "2015-01-01", "event_name_789", "2015-01-01T12:15:05.664947Z",
"type3"),
("111", "2015-01-01", "event_name_322", "2015-01-01T13:51:47.388239Z", "type4")
).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://<path>/" + tableName + "/"
df1.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.index.type", "GLOBAL_BLOOM")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Overwrite)
.save(tablePath)
# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250805000927187|20250805000927187...|event_id:100,even...|
type1|db420273-0543-4c0...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:101,even...|
type2|d888bc7e-09c8-44a...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250805000927187|20250805000927187...|event_id:102,even...|
type3|198de9d0-7df0-4c5...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:103,even...|
type4|72c26c48-802d-473...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:104,even...|
type1|db420273-0543-4c0...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250805000927187|20250805000927187...|event_id:105,even...|
type2|d888bc7e-09c8-44a...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:106,even...|
type3|198de9d0-7df0-4c5...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:107,even...|
type4|72c26c48-802d-473...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:108,even...|
type1|db420273-0543-4c0...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:109,even...|
type2|d888bc7e-09c8-44a...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:110,even...|
type3|198de9d0-7df0-4c5...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250805000927187|20250805000927187...|event_id:111,even...|
type4|72c26c48-802d-473...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
# INSERT new data
spark.sql("INSERT INTO mansipp_hudi_102_cow_fta_write_lf_table_update_test
(event_id, event_date, event_name, event_ts, event_type) VALUES('112',
DATE('2015-01-01'), 'event_name_123', TIMESTAMP('2015-01-01 13:51:45'),
'type5')")
# SELECT - Not showing new record
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250805000927187|20250805000927187...|event_id:100,even...|
type1|db420273-0543-4c0...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:101,even...|
type2|d888bc7e-09c8-44a...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250805000927187|20250805000927187...|event_id:102,even...|
type3|198de9d0-7df0-4c5...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:103,even...|
type4|72c26c48-802d-473...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:104,even...|
type1|db420273-0543-4c0...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250805000927187|20250805000927187...|event_id:105,even...|
type2|d888bc7e-09c8-44a...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:106,even...|
type3|198de9d0-7df0-4c5...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:107,even...|
type4|72c26c48-802d-473...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:108,even...|
type1|db420273-0543-4c0...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:109,even...|
type2|d888bc7e-09c8-44a...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:110,even...|
type3|198de9d0-7df0-4c5...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250805000927187|20250805000927187...|event_id:111,even...|
type4|72c26c48-802d-473...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
# df works
import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode
var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://fgac-iceberg-011426214932/mansipp/fta/ec2/" + tableName +
"/"
val newData = Seq(
("113", "2015-01-01", "event_name_999", "2015-01-01T14:00:00.000000Z", "type5")
).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
newData.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.index.type", "GLOBAL_BLOOM")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Append)
.save(tablePath)
# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250805000927187|20250805000927187...|event_id:100,even...|
type1|db420273-0543-4c0...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:101,even...|
type2|d888bc7e-09c8-44a...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250805000927187|20250805000927187...|event_id:102,even...|
type3|198de9d0-7df0-4c5...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:103,even...|
type4|72c26c48-802d-473...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:104,even...|
type1|db420273-0543-4c0...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250805000927187|20250805000927187...|event_id:105,even...|
type2|d888bc7e-09c8-44a...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:106,even...|
type3|198de9d0-7df0-4c5...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:107,even...|
type4|72c26c48-802d-473...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:108,even...|
type1|db420273-0543-4c0...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:109,even...|
type2|d888bc7e-09c8-44a...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:110,even...|
type3|198de9d0-7df0-4c5...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250805000927187|20250805000927187...|event_id:111,even...|
type4|72c26c48-802d-473...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
| 20250805001335884|20250805001335884...|event_id:113,even...|
type5|6f74bbeb-e7cd-4e2...|
113|2015-01-01|event_name_999|2015-01-01T14:00:...| type5|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+{code}
was:
Hudi 1.0.2 SQL Global_Bloom index not returning newly inserted data.
{code:java}
spark-shell --jars
/home/hadoop/hudi-spark3-bundle_2.12-1.0.2-amzn-0-SNAPSHOT.jar \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
\
--conf
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"
import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode
val df1 = Seq(
("100", "2015-01-01", "event_name_900", "2015-01-01T13:51:39.340396Z",
"type1"),
("101", "2015-01-01", "event_name_546", "2015-01-01T12:14:58.597216Z",
"type2"),
("102", "2015-01-01", "event_name_345", "2015-01-01T13:51:40.417052Z",
"type3"),
("103", "2015-01-01", "event_name_234", "2015-01-01T13:51:40.519832Z",
"type4"),
("104", "2015-01-01", "event_name_123", "2015-01-01T12:15:00.512679Z",
"type1"),
("105", "2015-01-01", "event_name_678", "2015-01-01T13:51:42.248818Z",
"type2"),
("106", "2015-01-01", "event_name_890", "2015-01-01T13:51:44.735360Z",
"type3"),
("107", "2015-01-01", "event_name_944", "2015-01-01T13:51:45.019544Z",
"type4"),
("108", "2015-01-01", "event_name_456", "2015-01-01T13:51:45.208007Z",
"type1"),
("109", "2015-01-01", "event_name_567", "2015-01-01T13:51:45.369689Z",
"type2"),
("110", "2015-01-01", "event_name_789", "2015-01-01T12:15:05.664947Z",
"type3"),
("111", "2015-01-01", "event_name_322", "2015-01-01T13:51:47.388239Z", "type4")
).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://<path>/" + tableName + "/"
df1.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.index.type", "GLOBAL_BLOOM")
// .option("hoodie.datasource.write.hive_style_partitioning", "true")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Overwrite)
.save(tablePath)
# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250805000927187|20250805000927187...|event_id:100,even...|
type1|db420273-0543-4c0...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:101,even...|
type2|d888bc7e-09c8-44a...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250805000927187|20250805000927187...|event_id:102,even...|
type3|198de9d0-7df0-4c5...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:103,even...|
type4|72c26c48-802d-473...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:104,even...|
type1|db420273-0543-4c0...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250805000927187|20250805000927187...|event_id:105,even...|
type2|d888bc7e-09c8-44a...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:106,even...|
type3|198de9d0-7df0-4c5...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:107,even...|
type4|72c26c48-802d-473...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:108,even...|
type1|db420273-0543-4c0...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:109,even...|
type2|d888bc7e-09c8-44a...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:110,even...|
type3|198de9d0-7df0-4c5...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250805000927187|20250805000927187...|event_id:111,even...|
type4|72c26c48-802d-473...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
# INSERT new data
spark.sql("INSERT INTO mansipp_hudi_102_cow_fta_write_lf_table_update_test
(event_id, event_date, event_name, event_ts, event_type) VALUES('112',
DATE('2015-01-01'), 'event_name_123', TIMESTAMP('2015-01-01 13:51:45'),
'type5')")
# SELECT - Not showing new record
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250805000927187|20250805000927187...|event_id:100,even...|
type1|db420273-0543-4c0...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:101,even...|
type2|d888bc7e-09c8-44a...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250805000927187|20250805000927187...|event_id:102,even...|
type3|198de9d0-7df0-4c5...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:103,even...|
type4|72c26c48-802d-473...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:104,even...|
type1|db420273-0543-4c0...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250805000927187|20250805000927187...|event_id:105,even...|
type2|d888bc7e-09c8-44a...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:106,even...|
type3|198de9d0-7df0-4c5...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:107,even...|
type4|72c26c48-802d-473...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:108,even...|
type1|db420273-0543-4c0...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:109,even...|
type2|d888bc7e-09c8-44a...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:110,even...|
type3|198de9d0-7df0-4c5...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250805000927187|20250805000927187...|event_id:111,even...|
type4|72c26c48-802d-473...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
# df works
import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode
var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://fgac-iceberg-011426214932/mansipp/fta/ec2/" + tableName +
"/"
val newData = Seq(
("113", "2015-01-01", "event_name_999", "2015-01-01T14:00:00.000000Z", "type5")
).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
newData.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.index.type", "GLOBAL_BLOOM")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Append)
.save(tablePath)
# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250805000927187|20250805000927187...|event_id:100,even...|
type1|db420273-0543-4c0...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:101,even...|
type2|d888bc7e-09c8-44a...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250805000927187|20250805000927187...|event_id:102,even...|
type3|198de9d0-7df0-4c5...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:103,even...|
type4|72c26c48-802d-473...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:104,even...|
type1|db420273-0543-4c0...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250805000927187|20250805000927187...|event_id:105,even...|
type2|d888bc7e-09c8-44a...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:106,even...|
type3|198de9d0-7df0-4c5...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:107,even...|
type4|72c26c48-802d-473...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:108,even...|
type1|db420273-0543-4c0...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:109,even...|
type2|d888bc7e-09c8-44a...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:110,even...|
type3|198de9d0-7df0-4c5...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250805000927187|20250805000927187...|event_id:111,even...|
type4|72c26c48-802d-473...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
| 20250805001335884|20250805001335884...|event_id:113,even...|
type5|6f74bbeb-e7cd-4e2...|
113|2015-01-01|event_name_999|2015-01-01T14:00:...| type5|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+{code}
> Hudi 1.0.2 SQL Global_Bloom index not returning newly inserted data
> -------------------------------------------------------------------
>
> Key: HUDI-9690
> URL: https://issues.apache.org/jira/browse/HUDI-9690
> Project: Apache Hudi
> Issue Type: Bug
> Components: spark-sql
> Reporter: Mansi Patel
> Priority: Major
> Labels: 1.0.2
>
> Hudi 1.0.2 SQL Global_Bloom index not returning newly inserted data.
> {code:java}
> spark-shell --jars
> /home/hadoop/hudi-spark3-bundle_2.12-1.0.2-amzn-0-SNAPSHOT.jar \
> --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
> --conf
> "spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
> \
> --conf
> "spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"
> import org.apache.hudi.DataSourceWriteOptions
> import org.apache.spark.sql.SaveMode
> val df1 = Seq(
> ("100", "2015-01-01", "event_name_900", "2015-01-01T13:51:39.340396Z",
> "type1"),
> ("101", "2015-01-01", "event_name_546", "2015-01-01T12:14:58.597216Z",
> "type2"),
> ("102", "2015-01-01", "event_name_345", "2015-01-01T13:51:40.417052Z",
> "type3"),
> ("103", "2015-01-01", "event_name_234", "2015-01-01T13:51:40.519832Z",
> "type4"),
> ("104", "2015-01-01", "event_name_123", "2015-01-01T12:15:00.512679Z",
> "type1"),
> ("105", "2015-01-01", "event_name_678", "2015-01-01T13:51:42.248818Z",
> "type2"),
> ("106", "2015-01-01", "event_name_890", "2015-01-01T13:51:44.735360Z",
> "type3"),
> ("107", "2015-01-01", "event_name_944", "2015-01-01T13:51:45.019544Z",
> "type4"),
> ("108", "2015-01-01", "event_name_456", "2015-01-01T13:51:45.208007Z",
> "type1"),
> ("109", "2015-01-01", "event_name_567", "2015-01-01T13:51:45.369689Z",
> "type2"),
> ("110", "2015-01-01", "event_name_789", "2015-01-01T12:15:05.664947Z",
> "type3"),
> ("111", "2015-01-01", "event_name_322", "2015-01-01T13:51:47.388239Z",
> "type4")
> ).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
> var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
> var tablePath = "s3://<path>/" + tableName + "/"
> df1.write.format("hudi")
> .option("hoodie.metadata.enable", "true")
> .option("hoodie.table.name", tableName)
> .option("hoodie.database.name", "default")
> .option("hoodie.datasource.write.operation", "upsert")
> .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
> .option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
> .option("hoodie.datasource.write.partitionpath.field", "event_type")
> .option("hoodie.datasource.write.precombine.field", "event_ts")
> .option("hoodie.datasource.write.keygenerator.class",
> "org.apache.hudi.keygen.ComplexKeyGenerator")
> .option("hoodie.datasource.hive_sync.enable", "true")
> .option("hoodie.datasource.meta.sync.enable", "true")
> .option("hoodie.index.type", "GLOBAL_BLOOM")
> .option("hoodie.datasource.hive_sync.mode", "hms")
> .option("hoodie.datasource.hive_sync.database", "default")
> .option("hoodie.datasource.hive_sync.table", tableName)
> .option("hoodie.datasource.hive_sync.partition_fields", "event_type")
> .option("hoodie.datasource.hive_sync.partition_extractor_class",
> "org.apache.hudi.hive.MultiPartKeysValueExtractor")
> .mode(SaveMode.Overwrite)
> .save(tablePath)
> # SELECT
> spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
> order by event_id").show();
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno|
> _hoodie_record_key|_hoodie_partition_path|
> _hoodie_file_name|event_id|event_date| event_name|
> event_ts|event_type|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> | 20250805000927187|20250805000927187...|event_id:100,even...|
> type1|db420273-0543-4c0...|
> 100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
> | 20250805000927187|20250805000927187...|event_id:101,even...|
> type2|d888bc7e-09c8-44a...|
> 101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
> | 20250805000927187|20250805000927187...|event_id:102,even...|
> type3|198de9d0-7df0-4c5...|
> 102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
> | 20250805000927187|20250805000927187...|event_id:103,even...|
> type4|72c26c48-802d-473...|
> 103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
> | 20250805000927187|20250805000927187...|event_id:104,even...|
> type1|db420273-0543-4c0...|
> 104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
> | 20250805000927187|20250805000927187...|event_id:105,even...|
> type2|d888bc7e-09c8-44a...|
> 105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
> | 20250805000927187|20250805000927187...|event_id:106,even...|
> type3|198de9d0-7df0-4c5...|
> 106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
> | 20250805000927187|20250805000927187...|event_id:107,even...|
> type4|72c26c48-802d-473...|
> 107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
> | 20250805000927187|20250805000927187...|event_id:108,even...|
> type1|db420273-0543-4c0...|
> 108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
> | 20250805000927187|20250805000927187...|event_id:109,even...|
> type2|d888bc7e-09c8-44a...|
> 109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
> | 20250805000927187|20250805000927187...|event_id:110,even...|
> type3|198de9d0-7df0-4c5...|
> 110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
> | 20250805000927187|20250805000927187...|event_id:111,even...|
> type4|72c26c48-802d-473...|
> 111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> # INSERT new data
> spark.sql("INSERT INTO mansipp_hudi_102_cow_fta_write_lf_table_update_test
> (event_id, event_date, event_name, event_ts, event_type) VALUES('112',
> DATE('2015-01-01'), 'event_name_123', TIMESTAMP('2015-01-01 13:51:45'),
> 'type5')")
> # SELECT - Not showing new record
> spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
> order by event_id").show();
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno|
> _hoodie_record_key|_hoodie_partition_path|
> _hoodie_file_name|event_id|event_date| event_name|
> event_ts|event_type|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> | 20250805000927187|20250805000927187...|event_id:100,even...|
> type1|db420273-0543-4c0...|
> 100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
> | 20250805000927187|20250805000927187...|event_id:101,even...|
> type2|d888bc7e-09c8-44a...|
> 101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
> | 20250805000927187|20250805000927187...|event_id:102,even...|
> type3|198de9d0-7df0-4c5...|
> 102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
> | 20250805000927187|20250805000927187...|event_id:103,even...|
> type4|72c26c48-802d-473...|
> 103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
> | 20250805000927187|20250805000927187...|event_id:104,even...|
> type1|db420273-0543-4c0...|
> 104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
> | 20250805000927187|20250805000927187...|event_id:105,even...|
> type2|d888bc7e-09c8-44a...|
> 105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
> | 20250805000927187|20250805000927187...|event_id:106,even...|
> type3|198de9d0-7df0-4c5...|
> 106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
> | 20250805000927187|20250805000927187...|event_id:107,even...|
> type4|72c26c48-802d-473...|
> 107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
> | 20250805000927187|20250805000927187...|event_id:108,even...|
> type1|db420273-0543-4c0...|
> 108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
> | 20250805000927187|20250805000927187...|event_id:109,even...|
> type2|d888bc7e-09c8-44a...|
> 109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
> | 20250805000927187|20250805000927187...|event_id:110,even...|
> type3|198de9d0-7df0-4c5...|
> 110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
> | 20250805000927187|20250805000927187...|event_id:111,even...|
> type4|72c26c48-802d-473...|
> 111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> # df works
> import org.apache.hudi.DataSourceWriteOptions
> import org.apache.spark.sql.SaveMode
> var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
> var tablePath = "s3://fgac-iceberg-011426214932/mansipp/fta/ec2/" + tableName
> + "/"
> val newData = Seq(
> ("113", "2015-01-01", "event_name_999", "2015-01-01T14:00:00.000000Z",
> "type5")
> ).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
> newData.write.format("hudi")
> .option("hoodie.metadata.enable", "true")
> .option("hoodie.table.name", tableName)
> .option("hoodie.database.name", "default")
> .option("hoodie.datasource.write.operation", "upsert")
> .option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
> .option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
> .option("hoodie.datasource.write.partitionpath.field", "event_type")
> .option("hoodie.datasource.write.precombine.field", "event_ts")
> .option("hoodie.datasource.write.keygenerator.class",
> "org.apache.hudi.keygen.ComplexKeyGenerator")
> .option("hoodie.index.type", "GLOBAL_BLOOM")
> .option("hoodie.datasource.hive_sync.enable", "true")
> .option("hoodie.datasource.meta.sync.enable", "true")
> .option("hoodie.datasource.hive_sync.mode", "hms")
> .option("hoodie.datasource.hive_sync.database", "default")
> .option("hoodie.datasource.hive_sync.table", tableName)
> .option("hoodie.datasource.hive_sync.partition_fields", "event_type")
> .option("hoodie.datasource.hive_sync.partition_extractor_class",
> "org.apache.hudi.hive.MultiPartKeysValueExtractor")
> .mode(SaveMode.Append)
> .save(tablePath)
> # SELECT
> spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
> order by event_id").show();
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno|
> _hoodie_record_key|_hoodie_partition_path|
> _hoodie_file_name|event_id|event_date| event_name|
> event_ts|event_type|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
> | 20250805000927187|20250805000927187...|event_id:100,even...|
> type1|db420273-0543-4c0...|
> 100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
> | 20250805000927187|20250805000927187...|event_id:101,even...|
> type2|d888bc7e-09c8-44a...|
> 101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
> | 20250805000927187|20250805000927187...|event_id:102,even...|
> type3|198de9d0-7df0-4c5...|
> 102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
> | 20250805000927187|20250805000927187...|event_id:103,even...|
> type4|72c26c48-802d-473...|
> 103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
> | 20250805000927187|20250805000927187...|event_id:104,even...|
> type1|db420273-0543-4c0...|
> 104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
> | 20250805000927187|20250805000927187...|event_id:105,even...|
> type2|d888bc7e-09c8-44a...|
> 105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
> | 20250805000927187|20250805000927187...|event_id:106,even...|
> type3|198de9d0-7df0-4c5...|
> 106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
> | 20250805000927187|20250805000927187...|event_id:107,even...|
> type4|72c26c48-802d-473...|
> 107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
> | 20250805000927187|20250805000927187...|event_id:108,even...|
> type1|db420273-0543-4c0...|
> 108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
> | 20250805000927187|20250805000927187...|event_id:109,even...|
> type2|d888bc7e-09c8-44a...|
> 109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
> | 20250805000927187|20250805000927187...|event_id:110,even...|
> type3|198de9d0-7df0-4c5...|
> 110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
> | 20250805000927187|20250805000927187...|event_id:111,even...|
> type4|72c26c48-802d-473...|
> 111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
> | 20250805001335884|20250805001335884...|event_id:113,even...|
> type5|6f74bbeb-e7cd-4e2...|
> 113|2015-01-01|event_name_999|2015-01-01T14:00:...| type5|
> +-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)