[ https://issues.apache.org/jira/browse/SYSTEMML-1267?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Deron Eriksson updated SYSTEMML-1267: ------------------------------------- Description: This occurs for Spark 2.1.0 (spark-2.1.0-bin-hadoop2.7) but not for Spark 2.0.2 on my machine. This occurs for matrix 1000x1000 but not for matrix 100x100 using Spark 2.1.0. The following DataFrame input code results in a "CodeGenerator: Error calculating stats of compiled class" warning. {code} $ spark-shell --executor-memory 4G --driver-memory 4G --jars target/SystemML.jar import org.apache.sysml.api.mlcontext._ import org.apache.sysml.api.mlcontext.ScriptFactory._ val ml = new MLContext(sc) import org.apache.spark.sql._ import org.apache.spark.sql.types.{StructType,StructField,DoubleType} import scala.util.Random val numRows = 1000 val numCols = 1000 val data = sc.parallelize(0 to numRows-1).map { _ => Row.fromSeq(Seq.fill(numCols)(Random.nextDouble)) } val schema = StructType((0 to numCols-1).map { i => StructField("C" + i, DoubleType, true) } ) val df = spark.createDataFrame(data, schema) val minMaxMean = """ minOut = min(Xin) maxOut = max(Xin) meanOut = mean(Xin) """ val mm = new MatrixMetadata(numRows, numCols) val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", "maxOut", "meanOut") {code} Results in: {code} scala> val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", "maxOut", "meanOut") [Stage 0:> (0 + 8) / 8]17/02/14 17:37:45 WARN CodeGenerator: Error calculating stats of compiled class. java.io.EOFException at java.io.DataInputStream.readFully(DataInputStream.java:197) at java.io.DataInputStream.readFully(DataInputStream.java:169) at org.codehaus.janino.util.ClassFile.loadAttribute(ClassFile.java:1509) at org.codehaus.janino.util.ClassFile.loadAttributes(ClassFile.java:644) at org.codehaus.janino.util.ClassFile.loadFields(ClassFile.java:623) at org.codehaus.janino.util.ClassFile.<init>(ClassFile.java:280) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anonfun$recordCompilationStats$1.apply(CodeGenerator.scala:967) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anonfun$recordCompilationStats$1.apply(CodeGenerator.scala:964) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at scala.collection.AbstractIterable.foreach(Iterable.scala:54) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.recordCompilationStats(CodeGenerator.scala:964) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:936) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:998) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:995) at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) at org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) at org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000) at org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) at org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:890) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:405) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:359) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:32) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:874) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.extractProjection$lzycompute(ExpressionEncoder.scala:266) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.extractProjection(ExpressionEncoder.scala:266) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:547) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:547) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1762) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1944) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1944) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) minMaxMeanScript: org.apache.sysml.api.mlcontext.Script = Inputs: [1] (Dataset as Matrix) Xin: [C0: double, C1: double ... 998 more fields] Outputs: [1] minOut [2] maxOut [3] meanOut {code} If I execute the 'val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", "maxOut", "meanOut")' line multiple times, the warning only occurs the first time. was: This occurs for Spark 2.1.0 (spark-2.1.0-bin-hadoop2.7) but not for Spark 2.0.2 on my machine. This occurs for matrix 1000x1000 but not for matrix 100x100 using Spark 2.1.0. The following DataFrame input code results in a "CodeGenerator: Error calculating stats of compiled class" warning. {code} $ spark-shell --executor-memory 4G --driver-memory 4G --jars target/SystemML.jar import org.apache.sysml.api.mlcontext._ import org.apache.sysml.api.mlcontext.ScriptFactory._ val ml = new MLContext(sc) import org.apache.spark.sql._ import org.apache.spark.sql.types.{StructType,StructField,DoubleType} import scala.util.Random val numRows = 1000 val numCols = 1000 val data = sc.parallelize(0 to numRows-1).map { _ => Row.fromSeq(Seq.fill(numCols)(Random.nextDouble)) } val schema = StructType((0 to numCols-1).map { i => StructField("C" + i, DoubleType, true) } ) val df = spark.createDataFrame(data, schema) val minMaxMean = """ minOut = min(Xin) maxOut = max(Xin) meanOut = mean(Xin) """ val mm = new MatrixMetadata(numRows, numCols) val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", "maxOut", "meanOut") {code} Results in: {code} scala> val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", "maxOut", "meanOut") [Stage 0:> (0 + 8) / 8]17/02/14 17:37:45 WARN CodeGenerator: Error calculating stats of compiled class. java.io.EOFException at java.io.DataInputStream.readFully(DataInputStream.java:197) at java.io.DataInputStream.readFully(DataInputStream.java:169) at org.codehaus.janino.util.ClassFile.loadAttribute(ClassFile.java:1509) at org.codehaus.janino.util.ClassFile.loadAttributes(ClassFile.java:644) at org.codehaus.janino.util.ClassFile.loadFields(ClassFile.java:623) at org.codehaus.janino.util.ClassFile.<init>(ClassFile.java:280) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anonfun$recordCompilationStats$1.apply(CodeGenerator.scala:967) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anonfun$recordCompilationStats$1.apply(CodeGenerator.scala:964) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at scala.collection.AbstractIterable.foreach(Iterable.scala:54) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.recordCompilationStats(CodeGenerator.scala:964) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:936) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:998) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:995) at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) at org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) at org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000) at org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) at org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:890) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:405) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:359) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:32) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:874) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.extractProjection$lzycompute(ExpressionEncoder.scala:266) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.extractProjection(ExpressionEncoder.scala:266) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:547) at org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:547) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1762) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157) at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1944) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1944) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:99) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) minMaxMeanScript: org.apache.sysml.api.mlcontext.Script = Inputs: [1] (Dataset as Matrix) Xin: [C0: double, C1: double ... 998 more fields] Outputs: [1] minOut [2] maxOut [3] meanOut {code} If I execute the 'val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", "maxOut", "meanOut")' line multiple times, it only occurs the first time. > Input DataFrame CodeGenerator Error calculating stats warning > ------------------------------------------------------------- > > Key: SYSTEMML-1267 > URL: https://issues.apache.org/jira/browse/SYSTEMML-1267 > Project: SystemML > Issue Type: Bug > Components: APIs, Runtime > Affects Versions: SystemML 0.13 > Reporter: Deron Eriksson > > This occurs for Spark 2.1.0 (spark-2.1.0-bin-hadoop2.7) but not for Spark > 2.0.2 on my machine. > This occurs for matrix 1000x1000 but not for matrix 100x100 using Spark 2.1.0. > The following DataFrame input code results in a "CodeGenerator: Error > calculating stats of compiled class" warning. > {code} > $ spark-shell --executor-memory 4G --driver-memory 4G --jars > target/SystemML.jar > import org.apache.sysml.api.mlcontext._ > import org.apache.sysml.api.mlcontext.ScriptFactory._ > val ml = new MLContext(sc) > import org.apache.spark.sql._ > import org.apache.spark.sql.types.{StructType,StructField,DoubleType} > import scala.util.Random > val numRows = 1000 > val numCols = 1000 > val data = sc.parallelize(0 to numRows-1).map { _ => > Row.fromSeq(Seq.fill(numCols)(Random.nextDouble)) } > val schema = StructType((0 to numCols-1).map { i => StructField("C" + i, > DoubleType, true) } ) > val df = spark.createDataFrame(data, schema) > val minMaxMean = > """ > minOut = min(Xin) > maxOut = max(Xin) > meanOut = mean(Xin) > """ > val mm = new MatrixMetadata(numRows, numCols) > val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", > "maxOut", "meanOut") > {code} > Results in: > {code} > scala> val minMaxMeanScript = dml(minMaxMean).in("Xin", df, mm).out("minOut", > "maxOut", "meanOut") > [Stage 0:> (0 + 8) / > 8]17/02/14 17:37:45 WARN CodeGenerator: Error calculating stats of compiled > class. > java.io.EOFException > at java.io.DataInputStream.readFully(DataInputStream.java:197) > at java.io.DataInputStream.readFully(DataInputStream.java:169) > at org.codehaus.janino.util.ClassFile.loadAttribute(ClassFile.java:1509) > at org.codehaus.janino.util.ClassFile.loadAttributes(ClassFile.java:644) > at org.codehaus.janino.util.ClassFile.loadFields(ClassFile.java:623) > at org.codehaus.janino.util.ClassFile.<init>(ClassFile.java:280) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anonfun$recordCompilationStats$1.apply(CodeGenerator.scala:967) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anonfun$recordCompilationStats$1.apply(CodeGenerator.scala:964) > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) > at scala.collection.AbstractIterable.foreach(Iterable.scala:54) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.recordCompilationStats(CodeGenerator.scala:964) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:936) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:998) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:995) > at > org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) > at > org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) > at > org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) > at > org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) > at org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000) > at > org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) > at > org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:890) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:405) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:359) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection$.create(GenerateUnsafeProjection.scala:32) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:874) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.extractProjection$lzycompute(ExpressionEncoder.scala:266) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.extractProjection(ExpressionEncoder.scala:266) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.toRow(ExpressionEncoder.scala:290) > at > org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:547) > at > org.apache.spark.sql.SparkSession$$anonfun$3.apply(SparkSession.scala:547) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) > at org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1762) > at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157) > at org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:1157) > at > org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1944) > at > org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1944) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) > at org.apache.spark.scheduler.Task.run(Task.scala:99) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > minMaxMeanScript: org.apache.sysml.api.mlcontext.Script = > > Inputs: > [1] (Dataset as Matrix) Xin: [C0: double, C1: double ... 998 more fields] > Outputs: > [1] minOut > [2] maxOut > [3] meanOut > {code} > If I execute the 'val minMaxMeanScript = dml(minMaxMean).in("Xin", df, > mm).out("minOut", "maxOut", "meanOut")' line multiple times, the warning only > occurs the first time. -- This message was sent by Atlassian JIRA (v6.3.15#6346)