[
https://issues.apache.org/jira/browse/SPARK-49042?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17869671#comment-17869671
]
Wei Guo commented on SPARK-49042:
---------------------------------
[~arnaud.nauwynck] Can you provide some code to construct a dataset to
reproduce this warning log?
> CodeGenerator: Error calculating stats of compiled class.
> java.lang.UnsupportedOperationException: empty.max
> ------------------------------------------------------------------------------------------------------------
>
> Key: SPARK-49042
> URL: https://issues.apache.org/jira/browse/SPARK-49042
> Project: Spark
> Issue Type: Bug
> Components: Spark Core
> Affects Versions: 3.1.1, 3.5.1
> Reporter: Arnaud Nauwynck
> Priority: Major
>
> CodeGenerator (here using "dataset.count()") generate WARN logs for some
> datasets
> The thrown exception is catched, an error log is printed, and code statistics
> are WRONG because it increment "(-1, -1)" instead of real values.
> Here is log error
> {noformat}
> WARN CodeGenerator: Error calculating stats of compiled class.
> java.lang.UnsupportedOperationException: empty.max
> at scala.collection.TraversableOnce.max(TraversableOnce.scala:234)
> at scala.collection.TraversableOnce.max$(TraversableOnce.scala:232)
> at scala.collection.AbstractTraversable.max(Traversable.scala:108)
> at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.$anonfun$updateAndGetCompilationStats$1(CodeGenerator.scala:1470)
> at
> scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
> at scala.collection.Iterator.foreach(Iterator.scala:941)
> at scala.collection.Iterator.foreach$(Iterator.scala:941)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
> at scala.collection.IterableLike.foreach(IterableLike.scala:74)
> at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
> at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
> at scala.collection.TraversableLike.map(TraversableLike.scala:238)
> at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
> at scala.collection.AbstractTraversable.map(Traversable.scala:108)
> at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.updateAndGetCompilationStats(CodeGenerator.scala:1451)
> at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1405)
> at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1501)
> at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1498)
> at
> org.sparkproject.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
> at
> org.sparkproject.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
> at
> org.sparkproject.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
> at
> org.sparkproject.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
> at org.sparkproject.guava.cache.LocalCache.get(LocalCache.java:4000)
> at
> org.sparkproject.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
> at
> org.sparkproject.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
> at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1352)
> at
> org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:721)
> at
> org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:720)
> at
> org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
> at
> org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
> at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
> at
> org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:321)
> at
> org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:387)
> at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:3006)
> at
> org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:3005)
> at
> org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685)
> at org.apache.spark.sql.Dataset.count(Dataset.scala:3005)
> {noformat}
> Here is corresponding code:
> https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala#L1604
> the variable methodCodeSizes is empty, therefore the method ".max" throws
> exception
> (instead of returning 0 which would be correct).
> {noformat}
> private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator):
> ByteCodeStats = {
> // First retrieve the generated classes.
> val classes = evaluator.getBytecodes.asScala
> // Then walk the classes to get at the method bytecode.
> val codeAttr =
> Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
> val codeAttrField = codeAttr.getDeclaredField("code")
> codeAttrField.setAccessible(true)
> val codeStats = classes.map { case (_, classBytes) =>
> val classCodeSize = classBytes.length
>
> CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classCodeSize)
> try {
> val cf = new ClassFile(new ByteArrayInputStream(classBytes))
> val constPoolSize = cf.getConstantPoolSize
> val methodCodeSizes = cf.methodInfos.asScala.flatMap { method => //
> <== methodInfos EMPTY ??
> method.getAttributes().filter(_.getClass eq codeAttr).map { a =>
> ... truncated ....
> }
> }
> (methodCodeSizes.max, constPoolSize) // <=== EMPTY, throw
> } catch {
> case NonFatal(e) =>
> logWarning("Error calculating stats of compiled class.", e) //
> <=== the exception printed in log
> (-1, -1) // <==== the value accumulated to statistics... then
> continuing "normally"
> }
> }
> {noformat}
> Maybe the code could be changed to add
> {noformat}
> if (cf.methodInfos.isEmpty) {
> return (0, 0)
> }
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]