chandu-1101 commented on issue #9141:
URL: https://github.com/apache/hudi/issues/9141#issuecomment-1631095205
When i change the scala code to the below (exactly like said in the quick
start) the insert itself fails.
```
val sess = Application.spark();
val snapshotDf =
sess.read.parquet("s3://bucket/snapshots-test/ge11-drop/")
val cdcSchema1 =
SparkUtils.getSchema("s3://bucket/schemas/GE11GLOBAL_candidates-CandidatesList.json")
val cdcDf =
sess.read.schema(cdcSchema1).json("s3://bucket/inputs-test/ge11-drop/*")
snapshotDf.createOrReplaceTempView("snapshot")
val snapshotDf2 = snapshotDf.limit(4).withColumn("cdc_pk",lit("0"))
snapshotDf2.write.format("hudi")
.options(getQuickstartWriteConfigs)
.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "cdc_pk")
.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "_id.oid")
.option(HoodieWriteConfig.TABLE_NAME,"GE11")
.mode(SaveMode.Overwrite)
.save("s3://bucket/snapshots-hudi/ge11-drop/snapshot");
```
i start the spark shell as follows
```
spark-shell --driver-memory 1g --executor-memory 4g --executor-cores 1
--driver-cores 1 --conf spark.dynamicAllocation.maxExecutors=2 --conf
"spark.serializer=org.apache.spark.serializer.KryoSerializer" --conf
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"
--conf
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
--conf "spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar"
--conf spark.sql.legacy.parquet.int96RebaseModeInRead=CORRECTED --conf
spark.sql.legacy.parquet.int96RebaseModeInWrite=CORRECTED --conf
spark.sql.legacy.parquet.datetimeRebaseModeInRead=CORRECTED --conf
spark.sql.legacy.parquet.datetimeRebaseModeInWrite=CORRECTED --name ravic
--packages org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.1 --jars
/home/hadoop/jars2/spark-1.0-SNAPSHOT.jar,/home/hadoop/hudi/hudi-release-0.12.3/packaging/hudi-spark-bundle/target/hudi-spark3.3-bundle_2.12-0.12.3.jar
```
Exception (I am unable to get the basic insert working)
```
07-11 15:58:17 ${sys:config.appname} WARN DAGScheduler: Broadcasting large
task binary with size 1032.2 KiB
07-11 15:58:18 ${sys:config.appname} ERROR HoodieSparkSqlWriter$: UPSERT
failed with errors
org.apache.hudi.exception.HoodieException: Write to Hudi failed
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:148)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:103)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:224)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:114)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$7(SQLExecution.scala:139)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:224)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:139)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:245)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:138)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:100)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:96)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:615)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:177)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:615)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:591)
at
org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:96)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:83)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:81)
at
org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:124)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:860)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:390)
at
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:363)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
... 59 elided
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]