[ 
https://issues.apache.org/jira/browse/HUDI-5835?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Tao Meng updated HUDI-5835:
---------------------------
    Description: 
avro schema create by sparksql miss avro name and namespace, 

This will lead the read schema and write schema of the log file to be 
incompatible

 
{code:java}
// code placeholder
 spark.sql(
   s"""
      |create table $tableName (
      |  id int,
      |  name string,
      |  price double,
      |  ts long,
      |  ff decimal(38, 10)
      |) using hudi
      | location '${tablePath.toString}'
      | tblproperties (
      |  type = 'mor',
      |  primaryKey = 'id',
      |  preCombineField = 'ts'
      | )
""".stripMargin)
 spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000, 10.0")
checkAnswer(s"select id, name, price, ts from $tableName")(
  Seq(1, "a1", 10.0, 1000)
)
spark.sql(s"update $tableName set price = 22 where id = 1")
checkAnswer(s"select id, name, price, ts from $tableName")(   -------- failed
  Seq(1, "a1", 22.0, 1000)
)

{code}
 

  was:
avro schema create by sparksql will miss avro name and namespace, 

This will lead the read schema and write schema of the log file to be 
incompatible

 
{code:java}
// code placeholdertest("Test Add Column and Update Table") { withTempDir { tmp 
=> val tableName = generateTableName //spark.sql("SET 
hoodie.datasource.read.extract.partition.values.from.path=true") val tablePath 
= new Path(tmp.getCanonicalPath, tableName) // create table spark.sql( s""" 
|create table $tableName ( | id int, | name string, | price double, | ts long, 
| ff decimal(38, 10) |) using hudi | location '${tablePath.toString}' | 
tblproperties ( | type = 'mor', | primaryKey = 'id', | preCombineField = 'ts' | 
) """.stripMargin) // insert data to table spark.sql(s"insert into $tableName 
select 1, 'a1', 10, 1000, 10.0") checkAnswer(s"select id, name, price, ts from 
$tableName")( Seq(1, "a1", 10.0, 1000) ) spark.sql(s"update $tableName set 
price = 22 where id = 1") checkAnswer(s"select id, name, price, ts from 
$tableName")( Seq(1, "a1", 22.0, 1000) ) spark.sql(s"alter table $tableName add 
column new_col1 int") checkAnswer(s"select id, name, price, ts, new_col1 from 
$tableName")( Seq(1, "a1", 22.0, 1000, null) ) // update and check 
spark.sql(s"update $tableName set price = price * 2 where id = 1") 
checkAnswer(s"select id, name, price, ts, new_col1 from $tableName")( Seq(1, 
"a1", 44.0, 1000, null) ) } }

{code}


> spark cannot read mor table after execute update statement
> ----------------------------------------------------------
>
>                 Key: HUDI-5835
>                 URL: https://issues.apache.org/jira/browse/HUDI-5835
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: spark
>    Affects Versions: 0.13.0
>            Reporter: Tao Meng
>            Priority: Blocker
>
> avro schema create by sparksql miss avro name and namespace, 
> This will lead the read schema and write schema of the log file to be 
> incompatible
>  
> {code:java}
> // code placeholder
>  spark.sql(
>    s"""
>       |create table $tableName (
>       |  id int,
>       |  name string,
>       |  price double,
>       |  ts long,
>       |  ff decimal(38, 10)
>       |) using hudi
>       | location '${tablePath.toString}'
>       | tblproperties (
>       |  type = 'mor',
>       |  primaryKey = 'id',
>       |  preCombineField = 'ts'
>       | )
> """.stripMargin)
>  spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000, 10.0")
> checkAnswer(s"select id, name, price, ts from $tableName")(
>   Seq(1, "a1", 10.0, 1000)
> )
> spark.sql(s"update $tableName set price = 22 where id = 1")
> checkAnswer(s"select id, name, price, ts from $tableName")(   -------- failed
>   Seq(1, "a1", 22.0, 1000)
> )
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to