[
https://issues.apache.org/jira/browse/HUDI-5835?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Tao Meng updated HUDI-5835:
---------------------------
Description:
avro schema create by sparksql will miss avro name and namespace,
This will lead the read schema and write schema of the log file to be
incompatible
{code:java}
// code placeholder
{code}
test("Test Add Column and Update Table") \{ withTempDir { tmp => val tableName
= generateTableName //spark.sql("SET
hoodie.datasource.read.extract.partition.values.from.path=true") val tablePath
= new Path(tmp.getCanonicalPath, tableName) // create table spark.sql( s"""
|create table $tableName ( | id int, | name string, | price double, | ts long,
| ff decimal(38, 10) |) using hudi | location '${tablePath.toString}' |
tblproperties ( | type = 'mor', | primaryKey = 'id', | preCombineField = 'ts' |
) """.stripMargin) // insert data to table spark.sql(s"insert into $tableName
select 1, 'a1', 10, 1000, 10.0") checkAnswer(s"select id, name, price, ts from
$tableName")( Seq(1, "a1", 10.0, 1000) ) spark.sql(s"update $tableName set
price = 22 where id = 1") checkAnswer(s"select id, name, price, ts from
$tableName")( Seq(1, "a1", 22.0, 1000) ) spark.sql(s"alter table $tableName add
column new_col1 int") checkAnswer(s"select id, name, price, ts, new_col1 from
$tableName")( Seq(1, "a1", 22.0, 1000, null) ) // update and check
spark.sql(s"update $tableName set price = price * 2 where id = 1")
checkAnswer(s"select id, name, price, ts, new_col1 from $tableName")( Seq(1,
"a1", 44.0, 1000, null) ) } }
was:
avro schema create by sparksql will miss avro name and namespace,
This will lead the read schema and write schema of the log file to be
incompatible
test("Test Add Column and Update Table") {
withTempDir { tmp =>
val tableName = generateTableName
//spark.sql("SET
hoodie.datasource.read.extract.partition.values.from.path=true")
val tablePath = new Path(tmp.getCanonicalPath, tableName)
// create table
spark.sql(
s"""
|create table $tableName (
| id int,
| name string,
| price double,
| ts long,
| ff decimal(38, 10)
|) using hudi
| location '${tablePath.toString}'
| tblproperties (
| type = 'mor',
| primaryKey = 'id',
| preCombineField = 'ts'
| )
""".stripMargin)
// insert data to table
spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000, 10.0")
checkAnswer(s"select id, name, price, ts from $tableName")(
Seq(1, "a1", 10.0, 1000)
)
spark.sql(s"update $tableName set price = 22 where id = 1")
checkAnswer(s"select id, name, price, ts from $tableName")(
Seq(1, "a1", 22.0, 1000)
)
spark.sql(s"alter table $tableName add column new_col1 int")
checkAnswer(s"select id, name, price, ts, new_col1 from $tableName")(
Seq(1, "a1", 22.0, 1000, null)
)
// update and check
spark.sql(s"update $tableName set price = price * 2 where id = 1")
checkAnswer(s"select id, name, price, ts, new_col1 from $tableName")(
Seq(1, "a1", 44.0, 1000, null)
)
}
}
> spark cannot read mor table after execute update statement
> ----------------------------------------------------------
>
> Key: HUDI-5835
> URL: https://issues.apache.org/jira/browse/HUDI-5835
> Project: Apache Hudi
> Issue Type: Bug
> Components: spark
> Affects Versions: 0.13.0
> Reporter: Tao Meng
> Priority: Blocker
>
> avro schema create by sparksql will miss avro name and namespace,
> This will lead the read schema and write schema of the log file to be
> incompatible
>
> {code:java}
> // code placeholder
> {code}
> test("Test Add Column and Update Table") \{ withTempDir { tmp => val
> tableName = generateTableName //spark.sql("SET
> hoodie.datasource.read.extract.partition.values.from.path=true") val
> tablePath = new Path(tmp.getCanonicalPath, tableName) // create table
> spark.sql( s""" |create table $tableName ( | id int, | name string, | price
> double, | ts long, | ff decimal(38, 10) |) using hudi | location
> '${tablePath.toString}' | tblproperties ( | type = 'mor', | primaryKey =
> 'id', | preCombineField = 'ts' | ) """.stripMargin) // insert data to table
> spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000, 10.0")
> checkAnswer(s"select id, name, price, ts from $tableName")( Seq(1, "a1",
> 10.0, 1000) ) spark.sql(s"update $tableName set price = 22 where id = 1")
> checkAnswer(s"select id, name, price, ts from $tableName")( Seq(1, "a1",
> 22.0, 1000) ) spark.sql(s"alter table $tableName add column new_col1 int")
> checkAnswer(s"select id, name, price, ts, new_col1 from $tableName")( Seq(1,
> "a1", 22.0, 1000, null) ) // update and check spark.sql(s"update $tableName
> set price = price * 2 where id = 1") checkAnswer(s"select id, name, price,
> ts, new_col1 from $tableName")( Seq(1, "a1", 44.0, 1000, null) ) } }
--
This message was sent by Atlassian Jira
(v8.20.10#820010)