Catalin Alexandru Zamfir created SPARK-11659: ------------------------------------------------
Summary: Codegen sporadically fails with same input character Key: SPARK-11659 URL: https://issues.apache.org/jira/browse/SPARK-11659 Project: Spark Issue Type: Bug Components: Spark Core, SQL Affects Versions: 1.5.1 Environment: Default, Linux (Jessie) Reporter: Catalin Alexandru Zamfir We pretty much have a default instalation of Spark 1.5.1. Some of our jobs sporadically fail with the below exception for the same "input character" (we don't have @ in our inputs, but jobs still fail) and when we re-run the same job with the same input, all jobs pass without any failures. I believe it's a bug in code-gen but I can't debug this on a production cluster (and it's almost close to impossible to reproduce it). {{Job aborted due to stage failure: Task 50 in stage 4.0 failed 4 times, most recent failure: Lost task 50.3 in stage 4.0 (TID 894, 10.136.64.112): java.util.concurrent.ExecutionException: java.lang.Exception: failed to compile: org.codehaus.commons.compiler.CompileException: Line 15, Column 9: Invalid character input "@" (character code 64) public SpecificOrdering generate(org.apache.spark.sql.catalyst.expressions.Expression[] expr) { return new SpecificOrdering(expr); } class SpecificOrdering extends org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering { private org.apache.spark.sql.catalyst.expressions.Expression[] expressions; public SpecificOrdering(org.apache.spark.sql.catalyst.expressions.Expression[] expr) { expressions = expr; } @Override public int compare(InternalRow a, InternalRow b) { InternalRow i = null; // Holds current row being evaluated. i = a; boolean isNullA2; long primitiveA3; { /* input[0, LongType] */ boolean isNull0 = i.isNullAt(0); long primitive1 = isNull0 ? -1L : (i.getLong(0)); isNullA2 = isNull0; primitiveA3 = primitive1; } i = b; boolean isNullB4; long primitiveB5; { /* input[0, LongType] */ boolean isNull0 = i.isNullAt(0); long primitive1 = isNull0 ? -1L : (i.getLong(0)); isNullB4 = isNull0; primitiveB5 = primitive1; } if (isNullA2 && isNullB4) { // Nothing } else if (isNullA2) { return -1; } else if (isNullB4) { return 1; } else { int comp = (primitiveA3 > primitiveB5 ? 1 : primitiveA3 < primitiveB5 ? -1 : 0); if (comp != 0) { return comp; } } i = a; boolean isNullA8; long primitiveA9; { /* input[1, LongType] */ boolean isNull6 = i.isNullAt(1); long primitive7 = isNull6 ? -1L : (i.getLong(1)); isNullA8 = isNull6; primitiveA9 = primitive7; } i = b; boolean isNullB10; long primitiveB11; { /* input[1, LongType] */ boolean isNull6 = i.isNullAt(1); long primitive7 = isNull6 ? -1L : (i.getLong(1)); isNullB10 = isNull6; primitiveB11 = primitive7; } if (isNullA8 && isNullB10) { // Nothing } else if (isNullA8) { return -1; } else if (isNullB10) { return 1; } else { int comp = (primitiveA9 > primitiveB11 ? 1 : primitiveA9 < primitiveB11 ? -1 : 0); if (comp != 0) { return comp; } } i = a; boolean isNullA14; long primitiveA15; { /* input[2, LongType] */ boolean isNull12 = i.isNullAt(2); long primitive13 = isNull12 ? -1L : (i.getLong(2)); isNullA14 = isNull12; primitiveA15 = primitive13; } i = b; boolean isNullB16; long primitiveB17; { /* input[2, LongType] */ boolean isNull12 = i.isNullAt(2); long primitive13 = isNull12 ? -1L : (i.getLong(2)); isNullB16 = isNull12; primitiveB17 = primitive13; } if (isNullA14 && isNullB16) { // Nothing } else if (isNullA14) { return -1; } else if (isNullB16) { return 1; } else { int comp = (primitiveA15 > primitiveB17 ? 1 : primitiveA15 < primitiveB17 ? -1 : 0); if (comp != 0) { return comp; } } return 0; } } at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306) at org.spark-project.guava.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293) at org.spark-project.guava.util.concurrent.AbstractFuture.get(AbstractFuture.java:116) at org.spark-project.guava.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135) at org.spark-project.guava.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410) at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2380) at org.spark-project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) at org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000) at org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) at org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.compile(CodeGenerator.scala:362) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering$.create(GenerateOrdering.scala:139) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering$.create(GenerateOrdering.scala:49) at org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering.create(GenerateOrdering.scala) at org.apache.spark.sql.execution.UnsafeKVExternalSorter.<init>(UnsafeKVExternalSorter.java:68) at org.apache.spark.sql.execution.UnsafeFixedWidthAggregationMap.destructAndCreateExternalSorter(UnsafeFixedWidthAggregationMap.java:257) at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.switchToSortBasedAggregation(TungstenAggregationIterator.scala:435) at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:379) at org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.start(TungstenAggregationIterator.scala:622) at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1.org$apache$spark$sql$execution$aggregate$TungstenAggregate$$anonfun$$executePartition$1(TungstenAggregate.scala:110) at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119) at org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119) at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:63) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297) at org.apache.spark.rdd.RDD.iterator(RDD.scala:264) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:88) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Caused by: java.lang.Exception: failed to compile: org.codehaus.commons.compiler.CompileException: Line 15, Column 9: Invalid character input "@" (character code 64) public SpecificOrdering generate(org.apache.spark.sql.catalyst.expressions.Expression[] expr) { return new SpecificOrdering(expr); } class SpecificOrdering extends org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering { private org.apache.spark.sql.catalyst.expressions.Expression[] expressions; public SpecificOrdering(org.apache.spark.sql.catalyst.expressions.Expression[] expr) { expressions = expr; } @Override public int compare(InternalRow a, InternalRow b) { InternalRow i = null; // Holds current row being evaluated. i = a; boolean isNullA2; long primitiveA3; { /* input[0, LongType] */ boolean isNull0 = i.isNullAt(0); long primitive1 = isNull0 ? -1L : (i.getLong(0)); isNullA2 = isNull0; primitiveA3 = primitive1; } i = b; boolean isNullB4; long primitiveB5; { /* input[0, LongType] */ boolean isNull0 = i.isNullAt(0); long primitive1 = isNull0 ? -1L : (i.getLong(0)); isNullB4 = isNull0; primitiveB5 = primitive1; } if (isNullA2 && isNullB4) { // Nothing } else if (isNullA2) { return -1; } else if (isNullB4) { return 1; } else { int comp = (primitiveA3 > primitiveB5 ? 1 : primitiveA3 < primitiveB5 ? -1 : 0); if (comp != 0) { return comp; } } i = a; boolean isNullA8; long primitiveA9; { /* input[1, LongType] */ boolean isNull6 = i.isNullAt(1); long primitive7 = isNull6 ? -1L : (i.getLong(1)); isNullA8 = isNull6; primitiveA9 = primitive7; } i = b; boolean isNullB10; long primitiveB11; { /* input[1, LongType] */ boolean isNull6 = i.isNullAt(1); long primitive7 = isNull6 ? -1L : (i.getLong(1)); isNullB10 = isNull6; primitiveB11 = primitive7; } if (isNullA8 && isNullB10) { // Nothing } else if (isNullA8) { return -1; } else if (isNullB10) { return 1; } else { int comp = (primitiveA9 > primitiveB11 ? 1 : primitiveA9 < primitiveB11 ? -1 : 0); if (comp != 0) { return comp; } } i = a; boolean isNullA14; long primitiveA15; { /* input[2, LongType] */ boolean isNull12 = i.isNullAt(2); long primitive13 = isNull12 ? -1L : (i.getLong(2)); isNullA14 = isNull12; primitiveA15 = primitive13; } i = b; boolean isNullB16; long primitiveB17; { /* input[2, LongType] */ boolean isNull12 = i.isNullAt(2); long primitive13 = isNull12 ? -1L : (i.getLong(2)); isNullB16 = isNull12; primitiveB17 = primitive13; } if (isNullA14 && isNullB16) { // Nothing } else if (isNullA14) { return -1; } else if (isNullB16) { return 1; } else { int comp = (primitiveA15 > primitiveB17 ? 1 : primitiveA15 < primitiveB17 ? -1 : 0); if (comp != 0) { return comp; } } return 0; } } at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:392) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:412) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:409) at org.spark-project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) at org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) ... 41 more Caused by: org.codehaus.commons.compiler.CompileException: Line 15, Column 9: Invalid character input "@" (character code 64) at org.codehaus.janino.Scanner.internalRead(Scanner.java:838) at org.codehaus.janino.Scanner.read(Scanner.java:261) at org.codehaus.janino.Parser.readOperator(Parser.java:2387) at org.codehaus.janino.Parser.parseConstructorDeclarator(Parser.java:721) at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:435) at org.codehaus.janino.Parser.parseClassBody(Parser.java:338) at org.codehaus.janino.Parser.parseClassDeclarationRest(Parser.java:311) at org.codehaus.janino.Parser.parseClassBodyDeclaration(Parser.java:406) at org.codehaus.janino.ClassBodyEvaluator.cook(ClassBodyEvaluator.java:220) at org.codehaus.janino.SimpleCompiler.cook(SimpleCompiler.java:194) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:80) at org.codehaus.commons.compiler.Cookable.cook(Cookable.java:72) at org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:387) ... 45 more Driver stacktrace:}} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org