[ https://issues.apache.org/jira/browse/HUDI-2279?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
董可伦 updated HUDI-2279: ---------------------- Description: Example: {code:java} val tableName = generateTableName // Create table spark.sql( s""" |create table $tableName ( | id int, | name string, | price double, | ts long, | dt string |) using hudi | location '${tmp.getCanonicalPath}/$tableName' | options ( | primaryKey ='id', | preCombineField = 'ts' | ) """.stripMargin) spark.sql( s""" |merge into ${tableName} as t0 |using ( | select 1 as id, '2021-05-05' as dt, 1002 as ts, 97 as price, 'a1' as name union all | select 1 as id, '2021-05-05' as dt, 1003 as ts, 98 as price, 'a2' as name union all | select 2 as id, '2021-05-05' as dt, 1001 as ts, 99 as price, 'a3' as name | ) as s0 |on t0.id = s0.id |when matched then update set * |when not matched then insert * |""".stripMargin) spark.sql(s"select id,name,price,dt from ${tableName}").show() {code} Fow now,the result is: +---+----------+-----+---+ | id| name|price| dt| +---+----------+-----+---+ | 1|2021-05-05| 98.0| a2| | 2|2021-05-05| 99.0| a3| +---+----------+-----+---+ When the order of the column types of souceTable is different from that of the column types of targetTable {code:java} spark.sql( s""" |merge into ${tableName} as t0 |using ( | select 1 as id, 'a1' as name, 1002 as ts, '2021-05-05' as dt, 97 as price union all | select 1 as id, 'a2' as name, 1003 as ts, '2021-05-05' as dt, 98 as price union all | select 2 as id, 'a3' as name, 1001 as ts, '2021-05-05' as dt, 99 as price | ) as s0 |on t0.id = s0.id |when matched then update set * |when not matched then insert * |""".stripMargin){code} It will throw an exception: {code:java} [ERROR] 2021-08-05 21:48:53,941 org.apache.hudi.io.HoodieWriteHandle - Error writing record HoodieRecord{key=HoodieKey { recordKey=id:2 partitionPath=}, currentLocation='null', newLocation='null'} java.lang.RuntimeException: Error in execute expression: org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.Integer. Expressions is: [boundreference() AS `id` boundreference() AS `name` CAST(boundreference() AS `price` AS DOUBLE) CAST(boundreference() AS `ts` AS BIGINT) CAST(boundreference() AS `dt` AS STRING)] CodeBody is: { ...... Caused by: java.lang.ClassCastException: org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.IntegerCaused by: java.lang.ClassCastException: org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.Integer at org.apache.hudi.sql.payload.ExpressionPayloadEvaluator_366797ae_4c30_4862_8222_7be486ede4f8.eval(Unknown Source) at org.apache.spark.sql.hudi.command.payload.ExpressionPayload.org$apache$spark$sql$hudi$command$payload$ExpressionPayload$$evaluate(ExpressionPayload.scala:258) ... 18 more{code} > Support column name matching for insert * and update set * when sourceTable's > columns contains all targetTable's columns > ------------------------------------------------------------------------------------------------------------------------ > > Key: HUDI-2279 > URL: https://issues.apache.org/jira/browse/HUDI-2279 > Project: Apache Hudi > Issue Type: Improvement > Components: Spark Integration > Reporter: 董可伦 > Assignee: 董可伦 > Priority: Major > Fix For: 0.9.0 > > > Example: > {code:java} > val tableName = generateTableName > // Create table > spark.sql( > s""" > |create table $tableName ( > | id int, > | name string, > | price double, > | ts long, > | dt string > |) using hudi > | location '${tmp.getCanonicalPath}/$tableName' > | options ( > | primaryKey ='id', > | preCombineField = 'ts' > | ) > """.stripMargin) > spark.sql( > s""" > |merge into ${tableName} as t0 > |using ( > | select 1 as id, '2021-05-05' as dt, 1002 as ts, 97 as price, 'a1' as > name union all > | select 1 as id, '2021-05-05' as dt, 1003 as ts, 98 as price, 'a2' as > name union all > | select 2 as id, '2021-05-05' as dt, 1001 as ts, 99 as price, 'a3' as > name > | ) as s0 > |on t0.id = s0.id > |when matched then update set * > |when not matched then insert * > |""".stripMargin) > spark.sql(s"select id,name,price,dt from ${tableName}").show() > > {code} > Fow now,the result is: > +---+----------+-----+---+ > | id| name|price| dt| > +---+----------+-----+---+ > | 1|2021-05-05| 98.0| a2| > | 2|2021-05-05| 99.0| a3| > +---+----------+-----+---+ > When the order of the column types of souceTable is different from that of > the column types of targetTable > > {code:java} > spark.sql( > s""" > |merge into ${tableName} as t0 > |using ( > | select 1 as id, 'a1' as name, 1002 as ts, '2021-05-05' as dt, 97 as > price union all > | select 1 as id, 'a2' as name, 1003 as ts, '2021-05-05' as dt, 98 as > price union all > | select 2 as id, 'a3' as name, 1001 as ts, '2021-05-05' as dt, 99 as > price > | ) as s0 > |on t0.id = s0.id > |when matched then update set * > |when not matched then insert * > |""".stripMargin){code} > > It will throw an exception: > {code:java} > [ERROR] 2021-08-05 21:48:53,941 org.apache.hudi.io.HoodieWriteHandle - Error > writing record HoodieRecord{key=HoodieKey { recordKey=id:2 partitionPath=}, > currentLocation='null', newLocation='null'} > java.lang.RuntimeException: Error in execute expression: > org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.Integer. > Expressions is: [boundreference() AS `id` boundreference() AS `name` > CAST(boundreference() AS `price` AS DOUBLE) CAST(boundreference() AS `ts` AS > BIGINT) CAST(boundreference() AS `dt` AS STRING)] > CodeBody is: { > ...... > Caused by: java.lang.ClassCastException: > org.apache.spark.unsafe.types.UTF8String cannot be cast to > java.lang.IntegerCaused by: java.lang.ClassCastException: > org.apache.spark.unsafe.types.UTF8String cannot be cast to java.lang.Integer > at > org.apache.hudi.sql.payload.ExpressionPayloadEvaluator_366797ae_4c30_4862_8222_7be486ede4f8.eval(Unknown > Source) at > org.apache.spark.sql.hudi.command.payload.ExpressionPayload.org$apache$spark$sql$hudi$command$payload$ExpressionPayload$$evaluate(ExpressionPayload.scala:258) > ... 18 more{code} > -- This message was sent by Atlassian Jira (v8.3.4#803005)