rangareddy commented on issue #14991:
URL: https://github.com/apache/hudi/issues/14991#issuecomment-3661355787
Verification using the code below confirms that TimestampBasedKeyGenerator
is working correctly for both Copy-on-Write (COW) and Merge-on-Read (MOR)
tables.
```sh
export SPARK_VERSION=3.5
export HUDI_VERSION=1.1.0
export SCALA_VERSION=2.13
spark-shell --master "local[2]" \
--packages
org.apache.hudi:hudi-spark$SPARK_VERSION-bundle_$SCALA_VERSION:$HUDI_VERSION \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
--conf
'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog'
\
--conf
'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \
--conf 'spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar'
```
```scala
import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.config.HoodieWriteConfig
import org.apache.hudi.keygen.constant.KeyGeneratorOptions._
import org.apache.hudi.hive.MultiPartKeysValueExtractor
val df = Seq((1, "z3", 30, "v1", "2018-09-23"), (2, "z3", 35, "v1",
"2018-09-24")).toDF("id", "name", "age", "ts", "data_date")
val morTableName = "issue_4417_mor"
val morBasePath = "file:///tmp/hudi/issue_4417_mor"
df.write.format("hudi").
option(HoodieWriteConfig.TABLE_NAME, morTableName).
option("hoodie.datasource.write.table.type", "MERGE_ON_READ").
option("hoodie.datasource.write.recordkey.field", "id").
option("hoodie.datasource.write.partitionpath.field", "data_date").
option("hoodie.datasource.write.precombine.field", "ts").
option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.TimestampBasedKeyGenerator").
option("hoodie.deltastreamer.keygen.timebased.timestamp.type",
"DATE_STRING").
option("hoodie.deltastreamer.keygen.timebased.output.dateformat",
"yyyy/MM/dd").
option("hoodie.deltastreamer.keygen.timebased.timezone", "GMT+8:00").
option("hoodie.deltastreamer.keygen.timebased.input.dateformat",
"yyyy-MM-dd").
mode(org.apache.spark.sql.SaveMode.Append).
save(morBasePath)
spark.read.format("hudi").load(morBasePath).where("data_date =
'2018-09-24'").show()
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name| id|name|age| ts| data_date|
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
| 20251216160614658|20251216160614658...| 2|
2018/09/24|e2634367-796f-4e3...| 2| z3| 35| v1|2018-09-24|
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
spark.read.format("hudi").load(morBasePath).where("data_date =
'2018/09/24'").show()
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name| id|name|age| ts| data_date|
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
| 20251216160614658|20251216160614658...| 2|
2018/09/24|e2634367-796f-4e3...| 2| z3| 35| v1|2018-09-24|
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
```
```scala
val cowTableName = "issue_4417_cow"
val cowBasePath = "file:///tmp/hudi/issue_4417_cow"
df.write.format("hudi").
option(HoodieWriteConfig.TABLE_NAME, cowTableName).
option("hoodie.datasource.write.table.type", "COPY_ON_WRITE").
option("hoodie.datasource.write.recordkey.field", "id").
option("hoodie.datasource.write.partitionpath.field", "data_date").
option("hoodie.datasource.write.precombine.field", "ts").
option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.TimestampBasedKeyGenerator").
option("hoodie.deltastreamer.keygen.timebased.timestamp.type",
"DATE_STRING").
option("hoodie.deltastreamer.keygen.timebased.output.dateformat",
"yyyy/MM/dd").
option("hoodie.deltastreamer.keygen.timebased.timezone", "GMT+8:00").
option("hoodie.deltastreamer.keygen.timebased.input.dateformat",
"yyyy-MM-dd").
mode(org.apache.spark.sql.SaveMode.Append).
save(cowBasePath)
spark.read.format("hudi").load(cowBasePath).where("data_date =
'2018-09-24'").show()
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name| id|name|age| ts| data_date|
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
| 20251216160903596|20251216160903596...| 2|
2018/09/24|c017d67e-2abc-46d...| 2| z3| 35| v1|2018-09-24|
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
spark.read.format("hudi").load(cowBasePath).where("data_date =
'2018/09/24'").show()
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|
_hoodie_file_name| id|name|age| ts| data_date|
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
| 20251216160903596|20251216160903596...| 2|
2018/09/24|c017d67e-2abc-46d...| 2| z3| 35| v1|2018-09-24|
+-------------------+--------------------+------------------+----------------------+--------------------+---+----+---+---+----------+
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]