[
https://issues.apache.org/jira/browse/HUDI-5977?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated HUDI-5977:
---------------------------------
Labels: pull-request-available (was: )
> Fix Date to String casts when non-vectorized readers are used
> -------------------------------------------------------------
>
> Key: HUDI-5977
> URL: https://issues.apache.org/jira/browse/HUDI-5977
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: voon
> Assignee: voon
> Priority: Major
> Labels: pull-request-available
>
> When a Date -> String type conversion is performed and when the
> non-vectorized reader is used, the table becomes unreadable.
>
> Test casae to replicate this issue
>
> {code:java}
> test("Test DATE to STRING conversions when vectorized reading is not
> enabled") {
> val tableName = generateTableName
> spark.sql(
> s"""
> | create table $tableName (
> | id int,
> | name string,
> | price double,
> | ts long
> |) using hudi
> | partitioned by (ts)
> |tblproperties (
> | primaryKey = 'id'
> )
> """.stripMargin)
> spark.sql(
> s"""
> | insert into $tableName
> | select 1 as id, 'a1' as name, 10 as price, 1000 as ts
> """.stripMargin)
> spark.sql("set hoodie.schema.on.read.enable = true") // adding a struct
> column to force reads to use non-vectorized readers
> spark.sql(s"alter table $tableName add column (`new_struct_col` STRUCT<f0:
> INTEGER, f1: STRING>)")
> spark.sql(
> s"""
> | insert into $tableName
> | values (2, 'a2', 20, struct(2, 'f_2'), 1001)
> """.stripMargin) spark.sql(s"alter table $tableName add column
> (`date_to_string_col` date)")
> spark.sql(
> s"""
> | insert into $tableName
> | values (3, 'a3', 30, struct(3, 'f_3'), date '2023-03-22', 1002)
> """.stripMargin)
> spark.sql(s"alter table $tableName alter column `date_to_string_col` type
> string")
> // struct and string (converted from date) column must be read to ensure
> that non-vectorized reader is used
> checkAnswer(s"select * from $tableName")(
> Seq("year=2021/month=02/day=%s".format(DEFAULT_PARTITION_PATH)),
> Seq("year=2021/month=02/day=01")
> )
> }{code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)