[
https://issues.apache.org/jira/browse/HUDI-5835?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Tao Meng updated HUDI-5835:
---------------------------
Description:
avro schema create by sparksql will miss avro name and namespace,
This will lead the read schema and write schema of the log file to be
incompatible
{code:java}
// code placeholdertest("Test Add Column and Update Table") { withTempDir { tmp
=> val tableName = generateTableName //spark.sql("SET
hoodie.datasource.read.extract.partition.values.from.path=true") val tablePath
= new Path(tmp.getCanonicalPath, tableName) // create table spark.sql( s"""
|create table $tableName ( | id int, | name string, | price double, | ts long,
| ff decimal(38, 10) |) using hudi | location '${tablePath.toString}' |
tblproperties ( | type = 'mor', | primaryKey = 'id', | preCombineField = 'ts' |
) """.stripMargin) // insert data to table spark.sql(s"insert into $tableName
select 1, 'a1', 10, 1000, 10.0") checkAnswer(s"select id, name, price, ts from
$tableName")( Seq(1, "a1", 10.0, 1000) ) spark.sql(s"update $tableName set
price = 22 where id = 1") checkAnswer(s"select id, name, price, ts from
$tableName")( Seq(1, "a1", 22.0, 1000) ) spark.sql(s"alter table $tableName add
column new_col1 int") checkAnswer(s"select id, name, price, ts, new_col1 from
$tableName")( Seq(1, "a1", 22.0, 1000, null) ) // update and check
spark.sql(s"update $tableName set price = price * 2 where id = 1")
checkAnswer(s"select id, name, price, ts, new_col1 from $tableName")( Seq(1,
"a1", 44.0, 1000, null) ) } }
{code}
was:
avro schema create by sparksql will miss avro name and namespace,
This will lead the read schema and write schema of the log file to be
incompatible
{code:java}
// code placeholder
{code}
test("Test Add Column and Update Table") \{ withTempDir { tmp => val tableName
= generateTableName //spark.sql("SET
hoodie.datasource.read.extract.partition.values.from.path=true") val tablePath
= new Path(tmp.getCanonicalPath, tableName) // create table spark.sql( s"""
|create table $tableName ( | id int, | name string, | price double, | ts long,
| ff decimal(38, 10) |) using hudi | location '${tablePath.toString}' |
tblproperties ( | type = 'mor', | primaryKey = 'id', | preCombineField = 'ts' |
) """.stripMargin) // insert data to table spark.sql(s"insert into $tableName
select 1, 'a1', 10, 1000, 10.0") checkAnswer(s"select id, name, price, ts from
$tableName")( Seq(1, "a1", 10.0, 1000) ) spark.sql(s"update $tableName set
price = 22 where id = 1") checkAnswer(s"select id, name, price, ts from
$tableName")( Seq(1, "a1", 22.0, 1000) ) spark.sql(s"alter table $tableName add
column new_col1 int") checkAnswer(s"select id, name, price, ts, new_col1 from
$tableName")( Seq(1, "a1", 22.0, 1000, null) ) // update and check
spark.sql(s"update $tableName set price = price * 2 where id = 1")
checkAnswer(s"select id, name, price, ts, new_col1 from $tableName")( Seq(1,
"a1", 44.0, 1000, null) ) } }
> spark cannot read mor table after execute update statement
> ----------------------------------------------------------
>
> Key: HUDI-5835
> URL: https://issues.apache.org/jira/browse/HUDI-5835
> Project: Apache Hudi
> Issue Type: Bug
> Components: spark
> Affects Versions: 0.13.0
> Reporter: Tao Meng
> Priority: Blocker
>
> avro schema create by sparksql will miss avro name and namespace,
> This will lead the read schema and write schema of the log file to be
> incompatible
>
> {code:java}
> // code placeholdertest("Test Add Column and Update Table") { withTempDir {
> tmp => val tableName = generateTableName //spark.sql("SET
> hoodie.datasource.read.extract.partition.values.from.path=true") val
> tablePath = new Path(tmp.getCanonicalPath, tableName) // create table
> spark.sql( s""" |create table $tableName ( | id int, | name string, | price
> double, | ts long, | ff decimal(38, 10) |) using hudi | location
> '${tablePath.toString}' | tblproperties ( | type = 'mor', | primaryKey =
> 'id', | preCombineField = 'ts' | ) """.stripMargin) // insert data to table
> spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000, 10.0")
> checkAnswer(s"select id, name, price, ts from $tableName")( Seq(1, "a1",
> 10.0, 1000) ) spark.sql(s"update $tableName set price = 22 where id = 1")
> checkAnswer(s"select id, name, price, ts from $tableName")( Seq(1, "a1",
> 22.0, 1000) ) spark.sql(s"alter table $tableName add column new_col1 int")
> checkAnswer(s"select id, name, price, ts, new_col1 from $tableName")( Seq(1,
> "a1", 22.0, 1000, null) ) // update and check spark.sql(s"update $tableName
> set price = price * 2 where id = 1") checkAnswer(s"select id, name, price,
> ts, new_col1 from $tableName")( Seq(1, "a1", 44.0, 1000, null) ) } }
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)