[
https://issues.apache.org/jira/browse/HUDI-7613?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17886546#comment-17886546
]
Lokesh Jain commented on HUDI-7613:
-----------------------------------
Hive test script:
Tested using Docker demo
{code:java}
import org.apache.hudi.HoodieSparkUtils
import org.apache.hudi.common.config.TypedProperties
import org.apache.hudi.common.util.StringUtils
import org.apache.hudi.exception.HoodieException
import org.apache.hudi.functional.TestSparkSqlWithCustomKeyGenerator._
import org.apache.hudi.testutils.HoodieClientTestUtils.createMetaClient
import org.apache.hudi.util.SparkKeyGenUtilsimport org.apache.spark.sql.SaveMode
import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
import org.joda.time.DateTime
import org.joda.time.format.DateTimeFormat
import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue}
import org.slf4j.LoggerFactory
val df = spark.sql(
s"""SELECT 1 as id, 'a1' as name, 1.6 as price, 1704121827 as ts, 'cat1'
as segment
| UNION
| SELECT 2 as id, 'a2' as name, 10.8 as price, 1704121827 as ts,
'cat1' as segment
| UNION
| SELECT 3 as id, 'a3' as name, 30.0 as price, 1706800227 as ts,
'cat1' as segment
| UNION
| SELECT 4 as id, 'a4' as name, 103.4 as price, 1701443427 as ts,
'cat2' as segment
| UNION
| SELECT 5 as id, 'a5' as name, 1999.0 as price, 1704121827 as ts,
'cat2' as segment
| UNION
| SELECT 6 as id, 'a6' as name, 80.0 as price, 1704121827 as ts,
'cat3' as segment
|""".stripMargin)
df.write.format("hudi")
.option("hoodie.datasource.write.table.type", "MERGE_ON_READ")
.option("hoodie.datasource.write.keygenerator.class",
"org.apache.hudi.keygen.CustomAvroKeyGenerator")
.option("hoodie.datasource.write.partitionpath.field",
"segment:simple,ts:timestamp")
.option("hoodie.datasource.write.recordkey.field", "id")
.option("hoodie.datasource.write.precombine.field", "name")
.option("hoodie.table.name", "hudi_table_2")
.option("hoodie.insert.shuffle.parallelism", "1")
.option("hoodie.upsert.shuffle.parallelism", "1")
.option("hoodie.bulkinsert.shuffle.parallelism", "1")
.option("hoodie.keygen.timebased.timestamp.type", "SCALAR")
.option("hoodie.keygen.timebased.output.dateformat", "yyyy")
.option("hoodie.keygen.timebased.timestamp.scalar.time.unit", "seconds")
.mode(SaveMode.Overwrite)
.save("/user/hive/warehouse/hudi_table_2")/var/hoodie/ws/hudi-sync/hudi-hive-sync/
// Ran hive sync and then queried the data
run_sync_tool.sh \
--jdbc-url jdbc:hive2://hiveserver:10000 \
--user hive \
--pass hive \
--partitioned-by segment,ts \
--base-path /user/hive/warehouse/hudi_table_2 \
--database default \
--table hudi_table_2 \
--partition-value-extractor org.apache.hudi.hive.MultiPartKeysValueExtractor
{code}
> Check write/query with CustomKeyGenerator on Flink and Hive
> -----------------------------------------------------------
>
> Key: HUDI-7613
> URL: https://issues.apache.org/jira/browse/HUDI-7613
> Project: Apache Hudi
> Issue Type: Improvement
> Reporter: Ethan Guo (this is the old account; please use "yihua")
> Assignee: Lokesh Jain
> Priority: Blocker
> Fix For: 1.0.0
>
> Original Estimate: 12h
> Time Spent: 11h
> Remaining Estimate: 1h
>
> https://github.com/apache/hudi/pull/10615/files#r1551075779
--
This message was sent by Atlassian Jira
(v8.20.10#820010)