[
https://issues.apache.org/jira/browse/HUDI-2514?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Raymond Xu updated HUDI-2514:
-----------------------------
Priority: Critical (was: Blocker)
> Add default hiveTableSerdeProperties for Spark SQL when sync Hive
> -----------------------------------------------------------------
>
> Key: HUDI-2514
> URL: https://issues.apache.org/jira/browse/HUDI-2514
> Project: Apache Hudi
> Issue Type: Improvement
> Components: Spark Integration
> Reporter: 董可伦
> Assignee: 董可伦
> Priority: Critical
> Labels: pull-request-available
> Fix For: 0.11.0
>
>
> If do not add the default hiveTableSerdeProperties,Spark SQL will not work
> properly
> For example,update:
>
> {code:java}
> update hudi.test_hudi_table set price=333 where id=111;
> {code}
>
> It will throw an Exception:
> {code:java}
> 21/10/03 17:41:15 ERROR SparkSQLDriver: Failed in [update
> hudi.test_hudi_table set price=333 where id=111]
> java.lang.AssertionError: assertion failed: There are no primary key in table
> `hudi`.`test_hudi_table`, cannot execute update operator
> at scala.Predef$.assert(Predef.scala:170)
> at
> org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.buildHoodieConfig(UpdateHoodieTableCommand.scala:91)
> at
> org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.run(UpdateHoodieTableCommand.scala:73)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
> at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
> at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
> at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
> at
> org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
> at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
> at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
> at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694)
> at
> org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62)
> at
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:371)
> at
> org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376)
> at
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:274)
> at
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
> at
> org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
> at
> org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
> at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
> at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
> at
> org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> java.lang.AssertionError: assertion failed: There are no primary key in table
> `hudi`.`test_hudi_table`, cannot execute update operator
> at scala.Predef$.assert(Predef.scala:170)
> at
> org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.buildHoodieConfig(UpdateHoodieTableCommand.scala:91)
> at
> org.apache.spark.sql.hudi.command.UpdateHoodieTableCommand.run(UpdateHoodieTableCommand.scala:73)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
> at
> org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
> at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
> at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
> at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
> at
> org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
> at org.apache.spark.sql.Dataset.<init>(Dataset.scala:194)
> at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
> at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:694)
> at
> org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:62)
> at
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:371)
> at
> org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:376)
> at
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:274)
> at
> org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at
> org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
> at
> org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:845)
> at
> org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:161)
> at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:184)
> at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
> at
> org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:920)
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:929)
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
> {code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.1#820001)