[
https://issues.apache.org/jira/browse/SPARK-30711?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Hyukjin Kwon resolved SPARK-30711.
----------------------------------
Resolution: Cannot Reproduce
> 64KB JVM bytecode limit - janino.InternalCompilerException
> ----------------------------------------------------------
>
> Key: SPARK-30711
> URL: https://issues.apache.org/jira/browse/SPARK-30711
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 2.4.0, 2.4.1, 2.4.2, 2.4.3, 2.4.4
> Environment: Windows 10
> Spark 2.4.4
> scalaVersion 2.11.12
> JVM Oracle 1.8.0_221-b11
> Reporter: Frederik Schreiber
> Priority: Major
>
> Exception
> {code:java}
> ERROR CodeGenerator: failed to compile:
> org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass":
> Code of method "processNext()V" of class
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4"
> grows beyond 64 KBERROR CodeGenerator: failed to compile:
> org.codehaus.janino.InternalCompilerException: Compiling "GeneratedClass":
> Code of method "processNext()V" of class
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4"
> grows beyond 64 KBorg.codehaus.janino.InternalCompilerException: Compiling
> "GeneratedClass": Code of method "processNext()V" of class
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4"
> grows beyond 64 KB at
> org.codehaus.janino.UnitCompiler.compileUnit(UnitCompiler.java:382) at
> org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:237) at
> org.codehaus.janino.SimpleCompiler.compileToClassLoader(SimpleCompiler.java:465)
> at
> org.codehaus.janino.ClassBodyEvaluator.compileToClass(ClassBodyEvaluator.java:313)
> at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:235)
> at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:207) at
> org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80) at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1290)
> at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1372)
> at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1369)
> at
> org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
> at
> org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
> at
> org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
> at
> org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) at
> org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000) at
> org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) at
> org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
> at
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1238)
> at
> org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:584)
> at
> org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:583)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
> at
> org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at
> org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at
> org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247)
> at
> org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296)
> at
> org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3384)
> at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2783)
> at org.apache.spark.sql.Dataset$$anonfun$collect$1.apply(Dataset.scala:2783)
> at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365) at
> org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364) at
> org.apache.spark.sql.Dataset.collect(Dataset.scala:2783) at
> de.sparkbug.janino.SparkJaninoBug$$anonfun$1.apply(SparkJaninoBug.scala:105)
> at
> de.sparkbug.janino.SparkJaninoBug$$anonfun$1.apply(SparkJaninoBug.scala:12)
> at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at
> org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at
> org.scalatest.Transformer.apply(Transformer.scala:22) at
> org.scalatest.Transformer.apply(Transformer.scala:20) at
> org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) at
> org.scalatest.TestSuite$class.withFixture(TestSuite.scala:196) at
> org.scalatest.FunSuite.withFixture(FunSuite.scala:1560) at
> org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:183)
> at
> org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
> at
> org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
> at org.scalatest.SuperEngine.runTestImpl(Engine.scala:286) at
> org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:196) at
> org.scalatest.FunSuite.runTest(FunSuite.scala:1560) at
> org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
> at
> org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
> at
> org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:393)
> at
> org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:381)
> at scala.collection.immutable.List.foreach(List.scala:392) at
> org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:381) at
> org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:376)
> at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:458) at
> org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:229) at
> org.scalatest.FunSuite.runTests(FunSuite.scala:1560) at
> org.scalatest.Suite$class.run(Suite.scala:1124) at
> org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
> at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
> at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233) at
> org.scalatest.SuperEngine.runImpl(Engine.scala:518) at
> org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:233) at
> org.scalatest.FunSuite.run(FunSuite.scala:1560) at
> org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) at
> org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1349)
> at
> org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1343)
> at scala.collection.immutable.List.foreach(List.scala:392) at
> org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1343) at
> org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1012)
> at
> org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011)
> at
> org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1509)
> at
> org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1011)
> at org.scalatest.tools.Runner$.run(Runner.scala:850) at
> org.scalatest.tools.Runner.run(Runner.scala) at
> org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:133)
> at
> org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:27)Caused
> by: org.codehaus.janino.InternalCompilerException: Code of method
> "processNext()V" of class
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4"
> grows beyond 64 KB at
> org.codehaus.janino.CodeContext.makeSpace(CodeContext.java:1009){code}
> Example code:
> {code:java}
> package de.sparkbug.janino
> import java.sql.Date
> import org.apache.spark.sql.functions._
> import org.apache.spark.sql.types._
> import org.apache.spark.sql.{Row, SparkSession}
> import org.scalatest.FunSuite
> class SparkJaninoBug extends FunSuite {
> test("test janino compile bug") {
> val spark: SparkSession = SparkSession
> .builder()
> .appName("Test Spark App")
> .master("local[*]")
> .getOrCreate()
> import spark.implicits._
> val br_schema = StructType(Seq(
> StructField("S_ID", IntegerType, nullable = true),
> StructField("D_ID", LongType, nullable = true),
> StructField("TYPE", StringType, nullable = true),
> StructField("RT", StringType, nullable = true),
> StructField("BR_ID", StringType, nullable = true),
> StructField("I_ID", DateType, nullable = true),
> StructField("SG_ID", IntegerType, nullable = true),
> StructField("S_ID_MAIN", IntegerType, nullable = true),
> StructField("PT_ID", IntegerType, nullable = true),
> StructField("C_ID", IntegerType, nullable = true),
> StructField("CF_ID", IntegerType, nullable = true),
> StructField("P_ID", IntegerType, nullable = true),
> StructField("RT_ID", IntegerType, nullable = true),
> StructField("BT_ID", IntegerType, nullable = true),
> StructField("I_T", StringType, nullable = true),
> StructField("A", DoubleType, nullable = true),
> StructField("T_A", DoubleType, nullable = true),
> StructField("B_S_DT", DateType, nullable = true),
> StructField("B_E_DT", DateType, nullable = true),
> StructField("B_M_DT", DateType, nullable = true),
> StructField("BR_P_A", DoubleType, nullable = true),
> StructField("BR_D_A", IntegerType, nullable = true),
> StructField("BR_B_D", IntegerType, nullable = true),
> StructField("BR_B_A", DoubleType, nullable = true)
> ))
> val b_data = Seq(Row(111, 2804711765L, "D", "recano", "1017888",
> Date.valueOf("2019-11-20"),1,111,1,1,1,2,1,1,"",0.0,0.0,Date.valueOf("2019-10-01"),Date.valueOf("2019-10-31"),Date.valueOf("2019-10-31"),0.0,30,30,0.0))
> val df_b = spark.createDataFrame(spark.sparkContext.parallelize(b_data),
> br_schema)
> val schema = StructType(Seq(
> StructField( "D_ID", LongType, nullable = true),
> StructField( "RT_ID", IntegerType, nullable = true),
> StructField( "P_ID", IntegerType, nullable = true),
> StructField( "BT_ID", IntegerType, nullable = true),
> StructField( "CF_ID", IntegerType, nullable = true),
> StructField( "B_ID", IntegerType, nullable = true),
> StructField( "S_ID", IntegerType, nullable = true),
> StructField( "SG_ID", IntegerType, nullable = true),
> StructField( "PT_ID", IntegerType, nullable = true),
> StructField( "C_ID", IntegerType, nullable = true),
> StructField( "N_C_DT", DateType, nullable = true),
> StructField( "N_T_DT", DateType, nullable = true),
> StructField( "A_B_D", IntegerType, nullable = true),
> StructField( "B_P_E", DateType, nullable = true),
> StructField( "B_P_S", DateType, nullable = true),
> StructField( "A_P_A", DoubleType, nullable = true),
> StructField( "A_B_1_D_A", DoubleType, nullable = true),
> StructField( "A_C", IntegerType, nullable = true),
> StructField( "A_D_A", DoubleType, nullable = true)
> ))
> val a_data =
> Seq(Row(2804711813L,1,2,1,1,1,111,1,1,1,null,null,30,Date.valueOf("2019-10-31"),Date.valueOf("2019-10-01"),0.0,0.0,1,-1.0))
> val df_a = spark.createDataFrame(spark.sparkContext.parallelize(a_data),
> schema)
> val df = df_b
> .join(df_a, List("D_ID", "RT_ID", "P_ID", "BT_ID", "CF_ID", "S_ID",
> "SG_ID", "PT_ID", "C_ID"), "outer")
> .withColumn("T", lit(null))
> .withColumn("B_C", lit("2"))
> .withColumn("A_B_DT", to_date(concat(year(df_b("I_ID")),
> month(df_b("I_ID")), lpad($"B_C",2,"0")), "yyyyMMdd"))
> .withColumn("B_B_DT", date_sub($"I_ID", 6))
> .withColumn("B_P_E", when($"N_T_DT" > $"A_B_DT"
> or $"N_C_DT" > $"A_B_DT", last_day($"B_P_E")).otherwise($"B_P_E"))
> .withColumn("A_D_A", when($"RT_ID".isInCollection(Seq(5, 6)),
> $"A_D_A").otherwise(datediff($"B_P_E", $"B_P_S") + 1))
> .withColumn("A_I_C", when($"N_C_DT".isNotNull and $"B_P_S" ===
> $"B_P_E", lit(true)).otherwise(lit(false)))
> .withColumn("A_D_A", when($"A_I_C", lit(0)).otherwise($"A_D_A"))
> .withColumn("A_D_A", when($"A_D_A" > 30, 30).otherwise($"A_D_A"))
> .withColumn("A_P_A", round($"A_B_1_D_A" * $"A_D_A", 7))
> .withColumn("M_T_I",
> when($"A_I_C", 5)
> .when($"T".isNotNull, 6)
> .when($"A_P_A".isNotNull and $"BR_P_A".isNotNull,
> when(abs($"A_P_A" - $"BR_P_A") < 0.001, 1).otherwise(2))
> .when($"A_P_A".isNotNull and $"BR_P_A".isNull, 3)
> .when($"A_P_A".isNull and $"BR_P_A".isNotNull, 4)
> .otherwise(lit(99)))
> .withColumn("D_A", when($"M_T_I" === 2, round($"BR_P_A" - $"A_P_A",
> 7)).otherwise(lit(null)))
> .withColumn("D_D", when($"M_T_I" === 2, round($"BR_D_A" - $"A_D_A",
> 7)).otherwise(lit(null)))
> .withColumn("RT_ID", when($"RT_ID".isNull, lit(99)).otherwise($"RT_ID"))
> df.collect()
> }
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]