[ https://issues.apache.org/jira/browse/SPARK-53038?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Wan Kun updated SPARK-53038: ---------------------------- Description: In DeduplicateRelations rule, HiveGenericUDF will create many new HiveGenericUDFEvaluator, and each HiveGenericUDFEvaluator will call GenericUDF initialize method once. {code:java} private[hive] case class HiveGenericUDF( name: String, funcWrapper: HiveFunctionWrapper, children: Seq[Expression]) { @transient private lazy val evaluator = new HiveGenericUDFEvaluator(funcWrapper, children) } at org.apache.spark.sql.hive.HiveGenericUDF.evaluator(hiveUDFs.scala:138) at org.apache.spark.sql.hive.HiveGenericUDF.dataType$lzycompute(hiveUDFs.scala:135) at org.apache.spark.sql.hive.HiveGenericUDF.dataType(hiveUDFs.scala:135) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes$$anonfun$1.applyOrElse(ExpectsInputTypes.scala:56) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes$$anonfun$1.applyOrElse(ExpectsInputTypes.scala:55) at scala.collection.IterableOnceOps.collectFirst(IterableOnce.scala:1256) at scala.collection.IterableOnceOps.collectFirst$(IterableOnce.scala:1248) at scala.collection.AbstractIterable.collectFirst(Iterable.scala:935) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes$.checkInputDataTypes(ExpectsInputTypes.scala:55) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.checkInputDataTypes(ExpectsInputTypes.scala:46) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.checkInputDataTypes$(ExpectsInputTypes.scala:45) at org.apache.spark.sql.catalyst.expressions.GetJsonObject.checkInputDataTypes(jsonExpressions.scala:137) at org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1$adapted(Expression.scala:279) at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) at scala.collection.AbstractIterable.forall(Iterable.scala:935) at org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1$adapted(Expression.scala:279) at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) at scala.collection.AbstractIterable.forall(Iterable.scala:935) at org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1$adapted(Expression.scala:279) at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) at scala.collection.AbstractIterable.forall(Iterable.scala:935) at org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$resolved$1(LogicalPlan.scala:104) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$resolved$1$adapted(LogicalPlan.scala:104) at scala.collection.immutable.List.forall(List.scala:387) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolved$lzycompute(LogicalPlan.scala:104) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolved(LogicalPlan.scala:104) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$childrenResolved$1(LogicalPlan.scala:111) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$childrenResolved$1$adapted(LogicalPlan.scala:111) at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) at scala.collection.AbstractIterable.forall(Iterable.scala:935) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.childrenResolved(LogicalPlan.scala:111) at org.apache.spark.sql.catalyst.plans.logical.Project.resolved$lzycompute(basicLogicalOperators.scala:89) at org.apache.spark.sql.catalyst.plans.logical.Project.resolved(basicLogicalOperators.scala:81) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.deduplicateAndRenew(DeduplicateRelations.scala:288) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.org$apache$spark$sql$catalyst$analysis$DeduplicateRelations$$renewDuplicatedRelations(DeduplicateRelations.scala:119) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1(DeduplicateRelations.scala:212) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1$adapted(DeduplicateRelations.scala:211) at scala.collection.immutable.Vector.foreach(Vector.scala:2124) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.deduplicate(DeduplicateRelations.scala:211) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.org$apache$spark$sql$catalyst$analysis$DeduplicateRelations$$renewDuplicatedRelations(DeduplicateRelations.scala:202) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1(DeduplicateRelations.scala:212) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1$adapted(DeduplicateRelations.scala:211) at scala.collection.immutable.Vector.foreach(Vector.scala:2124) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.deduplicate(DeduplicateRelations.scala:211) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.org$apache$spark$sql$catalyst$analysis$DeduplicateRelations$$renewDuplicatedRelations(DeduplicateRelations.scala:202) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.apply(DeduplicateRelations.scala:40) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:171) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) at scala.collection.immutable.Vector1.map(Vector.scala:2140) at scala.collection.immutable.Vector1.map(Vector.scala:385) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) at scala.collection.immutable.Vector1.map(Vector.scala:2140) at scala.collection.immutable.Vector1.map(Vector.scala:385) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) at scala.collection.immutable.Vector1.map(Vector.scala:2140) at scala.collection.immutable.Vector1.map(Vector.scala:385) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) at scala.collection.immutable.Vector1.map(Vector.scala:2140) at scala.collection.immutable.Vector1.map(Vector.scala:385) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$1(InlineCTE.scala:155) at scala.collection.immutable.Vector.foreach(Vector.scala:2124) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:152) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.apply(InlineCTE.scala:49) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:181) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:161) at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:191) at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:213) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:393) at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:211) {code} We could cached the returnInspector to avoid the unnecessary initialization of HiveGenericUDFEvaluator. was: In DeduplicateRelations rule, HiveGenericUDF will create many new HiveGenericUDFEvaluator, and each HiveGenericUDFEvaluator will call GenericUDF initialize method once. {code:java} private[hive] case class HiveGenericUDF( name: String, funcWrapper: HiveFunctionWrapper, children: Seq[Expression]) { @transient private lazy val evaluator = new HiveGenericUDFEvaluator(funcWrapper, children) } at org.apache.spark.sql.hive.HiveGenericUDF.evaluator(hiveUDFs.scala:138) at org.apache.spark.sql.hive.HiveGenericUDF.dataType$lzycompute(hiveUDFs.scala:135) at org.apache.spark.sql.hive.HiveGenericUDF.dataType(hiveUDFs.scala:135) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes$$anonfun$1.applyOrElse(ExpectsInputTypes.scala:56) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes$$anonfun$1.applyOrElse(ExpectsInputTypes.scala:55) at scala.collection.IterableOnceOps.collectFirst(IterableOnce.scala:1256) at scala.collection.IterableOnceOps.collectFirst$(IterableOnce.scala:1248) at scala.collection.AbstractIterable.collectFirst(Iterable.scala:935) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes$.checkInputDataTypes(ExpectsInputTypes.scala:55) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.checkInputDataTypes(ExpectsInputTypes.scala:46) at org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.checkInputDataTypes$(ExpectsInputTypes.scala:45) at org.apache.spark.sql.catalyst.expressions.GetJsonObject.checkInputDataTypes(jsonExpressions.scala:137) at org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1$adapted(Expression.scala:279) at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) at scala.collection.AbstractIterable.forall(Iterable.scala:935) at org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1$adapted(Expression.scala:279) at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) at scala.collection.AbstractIterable.forall(Iterable.scala:935) at org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1$adapted(Expression.scala:279) at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) at scala.collection.AbstractIterable.forall(Iterable.scala:935) at org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:279) at org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) at org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$resolved$1(LogicalPlan.scala:104) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$resolved$1$adapted(LogicalPlan.scala:104) at scala.collection.immutable.List.forall(List.scala:387) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolved$lzycompute(LogicalPlan.scala:104) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolved(LogicalPlan.scala:104) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$childrenResolved$1(LogicalPlan.scala:111) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$childrenResolved$1$adapted(LogicalPlan.scala:111) at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) at scala.collection.AbstractIterable.forall(Iterable.scala:935) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.childrenResolved(LogicalPlan.scala:111) at org.apache.spark.sql.catalyst.plans.logical.Project.resolved$lzycompute(basicLogicalOperators.scala:89) at org.apache.spark.sql.catalyst.plans.logical.Project.resolved(basicLogicalOperators.scala:81) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.deduplicateAndRenew(DeduplicateRelations.scala:288) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.org$apache$spark$sql$catalyst$analysis$DeduplicateRelations$$renewDuplicatedRelations(DeduplicateRelations.scala:119) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1(DeduplicateRelations.scala:212) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1$adapted(DeduplicateRelations.scala:211) at scala.collection.immutable.Vector.foreach(Vector.scala:2124) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.deduplicate(DeduplicateRelations.scala:211) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.org$apache$spark$sql$catalyst$analysis$DeduplicateRelations$$renewDuplicatedRelations(DeduplicateRelations.scala:202) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1(DeduplicateRelations.scala:212) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1$adapted(DeduplicateRelations.scala:211) at scala.collection.immutable.Vector.foreach(Vector.scala:2124) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.deduplicate(DeduplicateRelations.scala:211) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.org$apache$spark$sql$catalyst$analysis$DeduplicateRelations$$renewDuplicatedRelations(DeduplicateRelations.scala:202) at org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.apply(DeduplicateRelations.scala:40) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:171) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) at scala.collection.immutable.Vector1.map(Vector.scala:2140) at scala.collection.immutable.Vector1.map(Vector.scala:385) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) at scala.collection.immutable.Vector1.map(Vector.scala:2140) at scala.collection.immutable.Vector1.map(Vector.scala:385) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) at scala.collection.immutable.Vector1.map(Vector.scala:2140) at scala.collection.immutable.Vector1.map(Vector.scala:385) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) at scala.collection.immutable.Vector1.map(Vector.scala:2140) at scala.collection.immutable.Vector1.map(Vector.scala:385) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$1(InlineCTE.scala:155) at scala.collection.immutable.Vector.foreach(Vector.scala:2124) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:152) at org.apache.spark.sql.catalyst.optimizer.InlineCTE.apply(InlineCTE.scala:49) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:181) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:161) at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:191) at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:213) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:393) at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:211) {code} We should avoid the unnecessary initialization of HiveGenericUDFEvaluator. > Call initialize only once per GenericUDF instance > ------------------------------------------------- > > Key: SPARK-53038 > URL: https://issues.apache.org/jira/browse/SPARK-53038 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 4.0.0 > Reporter: Wan Kun > Priority: Major > > In DeduplicateRelations rule, HiveGenericUDF will create many new > HiveGenericUDFEvaluator, and each HiveGenericUDFEvaluator will call > GenericUDF initialize method once. > {code:java} > private[hive] case class HiveGenericUDF( > name: String, funcWrapper: HiveFunctionWrapper, children: > Seq[Expression]) { > @transient > private lazy val evaluator = new HiveGenericUDFEvaluator(funcWrapper, > children) > } > at > org.apache.spark.sql.hive.HiveGenericUDF.evaluator(hiveUDFs.scala:138) > at > org.apache.spark.sql.hive.HiveGenericUDF.dataType$lzycompute(hiveUDFs.scala:135) > at org.apache.spark.sql.hive.HiveGenericUDF.dataType(hiveUDFs.scala:135) > at > org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes$$anonfun$1.applyOrElse(ExpectsInputTypes.scala:56) > at > org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes$$anonfun$1.applyOrElse(ExpectsInputTypes.scala:55) > at > scala.collection.IterableOnceOps.collectFirst(IterableOnce.scala:1256) > at > scala.collection.IterableOnceOps.collectFirst$(IterableOnce.scala:1248) > at scala.collection.AbstractIterable.collectFirst(Iterable.scala:935) > at > org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes$.checkInputDataTypes(ExpectsInputTypes.scala:55) > at > org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.checkInputDataTypes(ExpectsInputTypes.scala:46) > at > org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.checkInputDataTypes$(ExpectsInputTypes.scala:45) > at > org.apache.spark.sql.catalyst.expressions.GetJsonObject.checkInputDataTypes(jsonExpressions.scala:137) > at > org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) > at > org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) > at > org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1(Expression.scala:279) > at > org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1$adapted(Expression.scala:279) > at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) > at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) > at scala.collection.AbstractIterable.forall(Iterable.scala:935) > at > org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:279) > at > org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) > at > org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) > at > org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1(Expression.scala:279) > at > org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1$adapted(Expression.scala:279) > at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) > at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) > at scala.collection.AbstractIterable.forall(Iterable.scala:935) > at > org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:279) > at > org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) > at > org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) > at > org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1(Expression.scala:279) > at > org.apache.spark.sql.catalyst.expressions.Expression.$anonfun$childrenResolved$1$adapted(Expression.scala:279) > at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) > at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) > at scala.collection.AbstractIterable.forall(Iterable.scala:935) > at > org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:279) > at > org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:267) > at > org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:267) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$resolved$1(LogicalPlan.scala:104) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$resolved$1$adapted(LogicalPlan.scala:104) > at scala.collection.immutable.List.forall(List.scala:387) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolved$lzycompute(LogicalPlan.scala:104) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolved(LogicalPlan.scala:104) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$childrenResolved$1(LogicalPlan.scala:111) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.$anonfun$childrenResolved$1$adapted(LogicalPlan.scala:111) > at scala.collection.IterableOnceOps.forall(IterableOnce.scala:633) > at scala.collection.IterableOnceOps.forall$(IterableOnce.scala:630) > at scala.collection.AbstractIterable.forall(Iterable.scala:935) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.childrenResolved(LogicalPlan.scala:111) > at > org.apache.spark.sql.catalyst.plans.logical.Project.resolved$lzycompute(basicLogicalOperators.scala:89) > at > org.apache.spark.sql.catalyst.plans.logical.Project.resolved(basicLogicalOperators.scala:81) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.deduplicateAndRenew(DeduplicateRelations.scala:288) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.org$apache$spark$sql$catalyst$analysis$DeduplicateRelations$$renewDuplicatedRelations(DeduplicateRelations.scala:119) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1(DeduplicateRelations.scala:212) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1$adapted(DeduplicateRelations.scala:211) > at scala.collection.immutable.Vector.foreach(Vector.scala:2124) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.deduplicate(DeduplicateRelations.scala:211) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.org$apache$spark$sql$catalyst$analysis$DeduplicateRelations$$renewDuplicatedRelations(DeduplicateRelations.scala:202) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1(DeduplicateRelations.scala:212) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.$anonfun$deduplicate$1$adapted(DeduplicateRelations.scala:211) > at scala.collection.immutable.Vector.foreach(Vector.scala:2124) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.deduplicate(DeduplicateRelations.scala:211) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.org$apache$spark$sql$catalyst$analysis$DeduplicateRelations$$renewDuplicatedRelations(DeduplicateRelations.scala:202) > at > org.apache.spark.sql.catalyst.analysis.DeduplicateRelations$.apply(DeduplicateRelations.scala:40) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:171) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) > at scala.collection.immutable.Vector1.map(Vector.scala:2140) > at scala.collection.immutable.Vector1.map(Vector.scala:385) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) > at scala.collection.immutable.Vector1.map(Vector.scala:2140) > at scala.collection.immutable.Vector1.map(Vector.scala:385) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) > at scala.collection.immutable.Vector1.map(Vector.scala:2140) > at scala.collection.immutable.Vector1.map(Vector.scala:385) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$3(InlineCTE.scala:187) > at scala.collection.immutable.Vector1.map(Vector.scala:2140) > at scala.collection.immutable.Vector1.map(Vector.scala:385) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:187) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.$anonfun$inlineCTE$1(InlineCTE.scala:155) > at scala.collection.immutable.Vector.foreach(Vector.scala:2124) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.org$apache$spark$sql$catalyst$optimizer$InlineCTE$$inlineCTE(InlineCTE.scala:152) > at > org.apache.spark.sql.catalyst.optimizer.InlineCTE.apply(InlineCTE.scala:49) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:181) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:161) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:191) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:213) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:393) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:211) > {code} > We could cached the returnInspector to avoid the unnecessary initialization > of HiveGenericUDFEvaluator. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org