[
https://issues.apache.org/jira/browse/HUDI-5835?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Tao Meng updated HUDI-5835:
---------------------------
Description:
avro schema create by sparksql miss avro name and namespace,
This will lead the read schema and write schema of the log file to be
incompatible
{code:java}
// code placeholder
spark.sql(
s"""
|create table $tableName (
| id int,
| name string,
| price double,
| ts long,
| ff decimal(38, 10)
|) using hudi
| location '${tablePath.toString}'
| tblproperties (
| type = 'mor',
| primaryKey = 'id',
| preCombineField = 'ts'
| )
""".stripMargin)
spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000, 10.0")
checkAnswer(s"select id, name, price, ts from $tableName")(
Seq(1, "a1", 10.0, 1000)
)
spark.sql(s"update $tableName set price = 22 where id = 1")
checkAnswer(s"select id, name, price, ts from $tableName")( -------- failed
Seq(1, "a1", 22.0, 1000)
)
{code}
was:
avro schema create by sparksql will miss avro name and namespace,
This will lead the read schema and write schema of the log file to be
incompatible
{code:java}
// code placeholdertest("Test Add Column and Update Table") { withTempDir { tmp
=> val tableName = generateTableName //spark.sql("SET
hoodie.datasource.read.extract.partition.values.from.path=true") val tablePath
= new Path(tmp.getCanonicalPath, tableName) // create table spark.sql( s"""
|create table $tableName ( | id int, | name string, | price double, | ts long,
| ff decimal(38, 10) |) using hudi | location '${tablePath.toString}' |
tblproperties ( | type = 'mor', | primaryKey = 'id', | preCombineField = 'ts' |
) """.stripMargin) // insert data to table spark.sql(s"insert into $tableName
select 1, 'a1', 10, 1000, 10.0") checkAnswer(s"select id, name, price, ts from
$tableName")( Seq(1, "a1", 10.0, 1000) ) spark.sql(s"update $tableName set
price = 22 where id = 1") checkAnswer(s"select id, name, price, ts from
$tableName")( Seq(1, "a1", 22.0, 1000) ) spark.sql(s"alter table $tableName add
column new_col1 int") checkAnswer(s"select id, name, price, ts, new_col1 from
$tableName")( Seq(1, "a1", 22.0, 1000, null) ) // update and check
spark.sql(s"update $tableName set price = price * 2 where id = 1")
checkAnswer(s"select id, name, price, ts, new_col1 from $tableName")( Seq(1,
"a1", 44.0, 1000, null) ) } }
{code}
> spark cannot read mor table after execute update statement
> ----------------------------------------------------------
>
> Key: HUDI-5835
> URL: https://issues.apache.org/jira/browse/HUDI-5835
> Project: Apache Hudi
> Issue Type: Bug
> Components: spark
> Affects Versions: 0.13.0
> Reporter: Tao Meng
> Priority: Blocker
>
> avro schema create by sparksql miss avro name and namespace,
> This will lead the read schema and write schema of the log file to be
> incompatible
>
> {code:java}
> // code placeholder
> spark.sql(
> s"""
> |create table $tableName (
> | id int,
> | name string,
> | price double,
> | ts long,
> | ff decimal(38, 10)
> |) using hudi
> | location '${tablePath.toString}'
> | tblproperties (
> | type = 'mor',
> | primaryKey = 'id',
> | preCombineField = 'ts'
> | )
> """.stripMargin)
> spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000, 10.0")
> checkAnswer(s"select id, name, price, ts from $tableName")(
> Seq(1, "a1", 10.0, 1000)
> )
> spark.sql(s"update $tableName set price = 22 where id = 1")
> checkAnswer(s"select id, name, price, ts from $tableName")( -------- failed
> Seq(1, "a1", 22.0, 1000)
> )
> {code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)