SusurHe opened a new issue #3821:
URL: https://github.com/apache/iceberg/issues/3821
This error is occasionally encountered when spark execute `merge into`:
`Cannot find column 'xxx' of the target table among...`,
The code that generated the error is in a 'foreachpartition', and the table
executes merge into in parallel. The possible reason is that there are
insufficient resources;
I can be sure that the columns of the source table and the columns of the
target table are the same, and the `targeTable` should be correct when
executing the `alignInsertActionAssignments` of the class
`AlignRowLevelOperations`, but the columns of source table get error;
- error log:
```
21/12/27 00:24:40 INFO ReplaceDataExec: Data source write support
IcebergBatchWrite(table=iceberg.db_1.option, format=PARQUET) committed.
21/12/27 00:24:40 WARN MongoCdcJob: update metadata prod-app[1640535050200]
status to failed
org.apache.spark.sql.AnalysisException: Cannot find column 'custom_attr' of
the target table among the INSERT columns: addor, kafka_part, bindsina,
bindmobile, ns, birth_month, bindqq, completeinfo, kafka_topic, job_id,
kafka_ts, id, cityid, kafka_offset, awardcount, op_ts, bindweibo,
check_email_code, bindemail, created, birth_year, op. INSERT clauses must
provide values for all columns of the target table.
at
org.apache.spark.sql.catalyst.analysis.AlignRowLevelOperations$.$anonfun$alignInsertActionAssignments$1(AlignRowLevelOperations.scala:100)
at
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike.map(TraversableLike.scala:238)
at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
at scala.collection.immutable.List.map(List.scala:298)
at
org.apache.spark.sql.catalyst.analysis.AlignRowLevelOperations$.org$apache$spark$sql$catalyst$analysis$AlignRowLevelOperations$$alignInsertActionAssignments(AlignRowLevelOperations.scala:94)
at
org.apache.spark.sql.catalyst.analysis.AlignRowLevelOperations$$anonfun$apply$1.$anonfun$applyOrElse$2(AlignRowLevelOperations.scala:79)
at
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
at
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:238)
at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at
org.apache.spark.sql.catalyst.analysis.AlignRowLevelOperations$$anonfun$apply$1.applyOrElse(AlignRowLevelOperations.scala:53)
at
org.apache.spark.sql.catalyst.analysis.AlignRowLevelOperations$$anonfun$apply$1.applyOrElse(AlignRowLevelOperations.scala:39)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDown$2(AnalysisHelper.scala:110)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:74)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDown$1(AnalysisHelper.scala:110)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:223)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDown(AnalysisHelper.scala:108)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDown$(AnalysisHelper.scala:106)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDown(LogicalPlan.scala:29)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators(AnalysisHelper.scala:73)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators$(AnalysisHelper.scala:72)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:29)
at
org.apache.spark.sql.catalyst.analysis.AlignRowLevelOperations$.apply(AlignRowLevelOperations.scala:39)
at
org.apache.spark.sql.catalyst.analysis.AlignRowLevelOperations$.apply(AlignRowLevelOperations.scala:34)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:216)
at
scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at
scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.foldLeft(List.scala:89)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:213)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:205)
at scala.collection.immutable.List.foreach(List.scala:392)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:205)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:196)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:190)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:155)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:183)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:88)
at
org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:183)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:174)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:230)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:173)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:73)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
at
org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:143)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at
org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:143)
at
org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:73)
at
org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:71)
at
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:63)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:98)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96)
at
org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:618)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:613)
at
com.zy.dp.xenon.ng.iceberg.IcebergSinkTable.mergeInto(IcebergSinkTable.scala:29)
at
com.zy.dp.xenon.ng.MongoCdcJob.$anonfun$processData$4(MongoCdcJob.scala:188)
at
com.zy.dp.xenon.ng.MongoCdcJob.$anonfun$processData$4$adapted(MongoCdcJob.scala:144)
at
com.zy.dp.xenon.common.conversion.RichDataFrame$ImplicitRichDataFrame.$anonfun$foreachColumnValuePar$4(RichDataFrame.scala:80)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
21/12/27 00:24:40 ERROR ApplicationMaster: User class threw exception:
org.apache.spark.sql.AnalysisException: Cannot find column 'custom_attr' of the
target table among the INSERT columns: addor, kafka_part, bindsina, bindmobile,
ns, birth_month, bindqq, completeinfo, kafka_topic, job_id, kafka_ts, id,
cityid, kafka_offset, awardcount, op_ts, bindweibo, check_email_code,
bindemail, created, birth_year, op. INSERT clauses must provide values for all
columns of the target table.
org.apache.spark.sql.AnalysisException: Cannot find column 'custom_attr' of
the target table among the INSERT columns: addor, kafka_part, bindsina,
bindmobile, ns, birth_month, bindqq, completeinfo, kafka_topic, job_id,
kafka_ts, id, cityid, kafka_offset, awardcount, op_ts, bindweibo,
check_email_code, bindemail, created, birth_year, op. INSERT clauses must
provide values for all columns of the target table.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]