Mansi Patel created HUDI-9690:
---------------------------------
Summary: Hudi 1.0.2 SQL Global_Bloom index not returning newly
inserted data
Key: HUDI-9690
URL: https://issues.apache.org/jira/browse/HUDI-9690
Project: Apache Hudi
Issue Type: Bug
Components: spark-sql
Reporter: Mansi Patel
Hudi 1.0.2 SQL Global_Bloom index not returning newly inserted data.
{code:java}
spark-shell --jars
/home/hadoop/hudi-spark3-bundle_2.12-1.0.2-amzn-0-SNAPSHOT.jar \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
\
--conf
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"
import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode
val df1 = Seq(
("100", "2015-01-01", "event_name_900", "2015-01-01T13:51:39.340396Z",
"type1"),
("101", "2015-01-01", "event_name_546", "2015-01-01T12:14:58.597216Z",
"type2"),
("102", "2015-01-01", "event_name_345", "2015-01-01T13:51:40.417052Z",
"type3"),
("103", "2015-01-01", "event_name_234", "2015-01-01T13:51:40.519832Z",
"type4"),
("104", "2015-01-01", "event_name_123", "2015-01-01T12:15:00.512679Z",
"type1"),
("105", "2015-01-01", "event_name_678", "2015-01-01T13:51:42.248818Z",
"type2"),
("106", "2015-01-01", "event_name_890", "2015-01-01T13:51:44.735360Z",
"type3"),
("107", "2015-01-01", "event_name_944", "2015-01-01T13:51:45.019544Z",
"type4"),
("108", "2015-01-01", "event_name_456", "2015-01-01T13:51:45.208007Z",
"type1"),
("109", "2015-01-01", "event_name_567", "2015-01-01T13:51:45.369689Z",
"type2"),
("110", "2015-01-01", "event_name_789", "2015-01-01T12:15:05.664947Z",
"type3"),
("111", "2015-01-01", "event_name_322", "2015-01-01T13:51:47.388239Z", "type4")
).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://<path>/" + tableName + "/"
df1.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.index.type", "GLOBAL_BLOOM")
// .option("hoodie.datasource.write.hive_style_partitioning", "true")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Overwrite)
.save(tablePath)
# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250805000927187|20250805000927187...|event_id:100,even...|
type1|db420273-0543-4c0...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:101,even...|
type2|d888bc7e-09c8-44a...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250805000927187|20250805000927187...|event_id:102,even...|
type3|198de9d0-7df0-4c5...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:103,even...|
type4|72c26c48-802d-473...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:104,even...|
type1|db420273-0543-4c0...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250805000927187|20250805000927187...|event_id:105,even...|
type2|d888bc7e-09c8-44a...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:106,even...|
type3|198de9d0-7df0-4c5...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:107,even...|
type4|72c26c48-802d-473...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:108,even...|
type1|db420273-0543-4c0...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:109,even...|
type2|d888bc7e-09c8-44a...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:110,even...|
type3|198de9d0-7df0-4c5...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250805000927187|20250805000927187...|event_id:111,even...|
type4|72c26c48-802d-473...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
# INSERT new data
spark.sql("INSERT INTO mansipp_hudi_102_cow_fta_write_lf_table_update_test
(event_id, event_date, event_name, event_ts, event_type) VALUES('112',
DATE('2015-01-01'), 'event_name_123', TIMESTAMP('2015-01-01 13:51:45'),
'type5')")
# SELECT - Not showing new record
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250805000927187|20250805000927187...|event_id:100,even...|
type1|db420273-0543-4c0...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:101,even...|
type2|d888bc7e-09c8-44a...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250805000927187|20250805000927187...|event_id:102,even...|
type3|198de9d0-7df0-4c5...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:103,even...|
type4|72c26c48-802d-473...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:104,even...|
type1|db420273-0543-4c0...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250805000927187|20250805000927187...|event_id:105,even...|
type2|d888bc7e-09c8-44a...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:106,even...|
type3|198de9d0-7df0-4c5...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:107,even...|
type4|72c26c48-802d-473...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:108,even...|
type1|db420273-0543-4c0...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:109,even...|
type2|d888bc7e-09c8-44a...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:110,even...|
type3|198de9d0-7df0-4c5...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250805000927187|20250805000927187...|event_id:111,even...|
type4|72c26c48-802d-473...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
# df works
import org.apache.hudi.DataSourceWriteOptions
import org.apache.spark.sql.SaveMode
var tableName = "mansipp_hudi_102_cow_fta_write_lf_table_update_test"
var tablePath = "s3://fgac-iceberg-011426214932/mansipp/fta/ec2/" + tableName +
"/"
val newData = Seq(
("113", "2015-01-01", "event_name_999", "2015-01-01T14:00:00.000000Z", "type5")
).toDF("event_id", "event_date", "event_name", "event_ts", "event_type")
newData.write.format("hudi")
.option("hoodie.metadata.enable", "true")
.option("hoodie.table.name", tableName)
.option("hoodie.database.name", "default")
.option("hoodie.datasource.write.operation", "upsert")
.option("hoodie.datasource.write.table.type", "COPY_ON_WRITE")
.option("hoodie.datasource.write.recordkey.field", "event_id,event_date")
.option("hoodie.datasource.write.partitionpath.field", "event_type")
.option("hoodie.datasource.write.precombine.field", "event_ts")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.ComplexKeyGenerator")
.option("hoodie.index.type", "GLOBAL_BLOOM")
.option("hoodie.datasource.hive_sync.enable", "true")
.option("hoodie.datasource.meta.sync.enable", "true")
.option("hoodie.datasource.hive_sync.mode", "hms")
.option("hoodie.datasource.hive_sync.database", "default")
.option("hoodie.datasource.hive_sync.table", tableName)
.option("hoodie.datasource.hive_sync.partition_fields", "event_type")
.option("hoodie.datasource.hive_sync.partition_extractor_class",
"org.apache.hudi.hive.MultiPartKeysValueExtractor")
.mode(SaveMode.Append)
.save(tablePath)
# SELECT
spark.sql("select * from mansipp_hudi_102_cow_fta_write_lf_table_update_test
order by event_id").show();
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|
_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name|event_id|event_date| event_name|
event_ts|event_type|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+
| 20250805000927187|20250805000927187...|event_id:100,even...|
type1|db420273-0543-4c0...|
100|2015-01-01|event_name_900|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:101,even...|
type2|d888bc7e-09c8-44a...|
101|2015-01-01|event_name_546|2015-01-01T12:14:...| type2|
| 20250805000927187|20250805000927187...|event_id:102,even...|
type3|198de9d0-7df0-4c5...|
102|2015-01-01|event_name_345|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:103,even...|
type4|72c26c48-802d-473...|
103|2015-01-01|event_name_234|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:104,even...|
type1|db420273-0543-4c0...|
104|2015-01-01|event_name_123|2015-01-01T12:15:...| type1|
| 20250805000927187|20250805000927187...|event_id:105,even...|
type2|d888bc7e-09c8-44a...|
105|2015-01-01|event_name_678|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:106,even...|
type3|198de9d0-7df0-4c5...|
106|2015-01-01|event_name_890|2015-01-01T13:51:...| type3|
| 20250805000927187|20250805000927187...|event_id:107,even...|
type4|72c26c48-802d-473...|
107|2015-01-01|event_name_944|2015-01-01T13:51:...| type4|
| 20250805000927187|20250805000927187...|event_id:108,even...|
type1|db420273-0543-4c0...|
108|2015-01-01|event_name_456|2015-01-01T13:51:...| type1|
| 20250805000927187|20250805000927187...|event_id:109,even...|
type2|d888bc7e-09c8-44a...|
109|2015-01-01|event_name_567|2015-01-01T13:51:...| type2|
| 20250805000927187|20250805000927187...|event_id:110,even...|
type3|198de9d0-7df0-4c5...|
110|2015-01-01|event_name_789|2015-01-01T12:15:...| type3|
| 20250805000927187|20250805000927187...|event_id:111,even...|
type4|72c26c48-802d-473...|
111|2015-01-01|event_name_322|2015-01-01T13:51:...| type4|
| 20250805001335884|20250805001335884...|event_id:113,even...|
type5|6f74bbeb-e7cd-4e2...|
113|2015-01-01|event_name_999|2015-01-01T14:00:...| type5|
+-------------------+--------------------+--------------------+----------------------+--------------------+--------+----------+--------------+--------------------+----------+{code}
{{}}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)