[jira] [Reopened] (SPARK-11229) NPE in JoinedRow.isNullAt when spark.shuffle.memoryFraction=0
[ https://issues.apache.org/jira/browse/SPARK-11229?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sean Owen reopened SPARK-11229: --- "Fixed" implies there was a change attached to this JIRA that resolved the issue, and we don't have that here. If it were probably resolved by another JIRA, "duplicate" would be appropriate. Otherwise, *shrug* doesn't really matter but "cannot reproduce" is maybe most accurate. > NPE in JoinedRow.isNullAt when spark.shuffle.memoryFraction=0 > - > > Key: SPARK-11229 > URL: https://issues.apache.org/jira/browse/SPARK-11229 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.5.1 > Environment: 14.04.1-Ubuntu SMP x86_64 GNU/Linux >Reporter: Romi Kuntsman > > Steps to reproduce: > 1. set spark.shuffle.memoryFraction=0 > 2. load dataframe from parquet file > 3. see it's read correctly by calling dataframe.show() > 4. call dataframe.count() > Expected behaviour: > get count of rows in dataframe > OR, if memoryFraction=0 is an invalid setting, get notified about it > Actual behaviour: > CatalystReadSupport doesn't read the schema (even thought there is one) and > then there's a NullPointerException. > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1822) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1835) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1848) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1919) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:905) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) > at org.apache.spark.rdd.RDD.collect(RDD.scala:904) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:177) > at > org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385) > at > org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56) > at > org.apache.spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:1903) > at org.apache.spark.sql.DataFrame.collect(DataFrame.scala:1384) > at org.apache.spark.sql.DataFrame.count(DataFrame.scala:1402) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.spark.sql.catalyst.expressions.JoinedRow.isNullAt(JoinedRow.scala:70) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificMutableProjection.apply(Unknown > Source) > at > org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator$$anonfun$generateProcessRow$1.apply(TungstenAggregationIterator.scala:194) > at > org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator$$anonfun$generateProcessRow$1.apply(TungstenAggregationIterator.scala:192) > at > org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:368) > at >
[jira] [Reopened] (SPARK-11229) NPE in JoinedRow.isNullAt when spark.shuffle.memoryFraction=0
[ https://issues.apache.org/jira/browse/SPARK-11229?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Michael Armbrust reopened SPARK-11229: -- > NPE in JoinedRow.isNullAt when spark.shuffle.memoryFraction=0 > - > > Key: SPARK-11229 > URL: https://issues.apache.org/jira/browse/SPARK-11229 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.5.1 > Environment: 14.04.1-Ubuntu SMP x86_64 GNU/Linux >Reporter: Romi Kuntsman > Fix For: 1.6.0 > > > Steps to reproduce: > 1. set spark.shuffle.memoryFraction=0 > 2. load dataframe from parquet file > 3. see it's read correctly by calling dataframe.show() > 4. call dataframe.count() > Expected behaviour: > get count of rows in dataframe > OR, if memoryFraction=0 is an invalid setting, get notified about it > Actual behaviour: > CatalystReadSupport doesn't read the schema (even thought there is one) and > then there's a NullPointerException. > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1283) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1271) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1270) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1270) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:697) > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:697) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1496) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1458) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1447) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:567) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1822) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1835) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1848) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1919) > at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:905) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:306) > at org.apache.spark.rdd.RDD.collect(RDD.scala:904) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:177) > at > org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385) > at > org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56) > at > org.apache.spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:1903) > at org.apache.spark.sql.DataFrame.collect(DataFrame.scala:1384) > at org.apache.spark.sql.DataFrame.count(DataFrame.scala:1402) > ... 14 more > Caused by: java.lang.NullPointerException > at > org.apache.spark.sql.catalyst.expressions.JoinedRow.isNullAt(JoinedRow.scala:70) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificMutableProjection.apply(Unknown > Source) > at > org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator$$anonfun$generateProcessRow$1.apply(TungstenAggregationIterator.scala:194) > at > org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator$$anonfun$generateProcessRow$1.apply(TungstenAggregationIterator.scala:192) > at > org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.processInputs(TungstenAggregationIterator.scala:368) > at > org.apache.spark.sql.execution.aggregate.TungstenAggregationIterator.start(TungstenAggregationIterator.scala:622) > at > org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1.org$apache$spark$sql$execution$aggregate$TungstenAggregate$$anonfun$$executePartition$1(TungstenAggregate.scala:110) > at >