Barry Becker created SPARK-19317:
------------------------------------

             Summary: UnsupportedOperationException: empty.reduceLeft in 
LinearSeqOptimized
                 Key: SPARK-19317
                 URL: https://issues.apache.org/jira/browse/SPARK-19317
             Project: Spark
          Issue Type: Bug
          Components: Spark Core
    Affects Versions: 2.1.0
            Reporter: Barry Becker


I wish I had more of a simple reproducible case to give, but I got the below 
exception while selecting null values in one of the columns of a dataframe.
My client code that failed was 
df.filter(filterExp).count()
where the filter expression was something like someColumn.isNull.
There were 412 nulls out of 716,000 total rows for the column being filtered.
Its odd because I have a different, smaller dataset where I did the same thing 
on a column with 100 nulls out of 800 and did not get the error.
The exception seems to indicate that spark is trying to do reduceLeft on an 
empy list.

{code}
java.lang.UnsupportedOperationException: 
empty.reduceLeftscala.collection.LinearSeqOptimized$class.reduceLeft(LinearSeqOptimized.scala:137)
 scala.collection.immutable.List.reduceLeft(List.scala:84) 
scala.collection.TraversableOnce$class.reduce(TraversableOnce.scala:208) 
scala.collection.AbstractTraversable.reduce(Traversable.scala:104) 
org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$1.applyOrElse(InMemoryTableScanExec.scala:90)
 
org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$1.applyOrElse(InMemoryTableScanExec.scala:54)
 scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:36) 
org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$1.applyOrElse(InMemoryTableScanExec.scala:61)
 
org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$1.applyOrElse(InMemoryTableScanExec.scala:54)
 scala.PartialFunction$Lifted.apply(PartialFunction.scala:223) 
scala.PartialFunction$Lifted.apply(PartialFunction.scala:219) 
org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$2.apply(InMemoryTableScanExec.scala:95)
 
org.apache.spark.sql.execution.columnar.InMemoryTableScanExec$$anonfun$2.apply(InMemoryTableScanExec.scala:94)
 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
 
scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
 scala.collection.immutable.List.foreach(List.scala:381) 
scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) 
scala.collection.immutable.List.flatMap(List.scala:344) 
org.apache.spark.sql.execution.columnar.InMemoryTableScanExec.(InMemoryTableScanExec.scala:94)
 
org.apache.spark.sql.execution.SparkStrategies$InMemoryScans$$anonfun$6.apply(SparkStrategies.scala:306)
 
org.apache.spark.sql.execution.SparkStrategies$InMemoryScans$$anonfun$6.apply(SparkStrategies.scala:306)
 
org.apache.spark.sql.execution.SparkPlanner.pruneFilterProject(SparkPlanner.scala:96)
 
org.apache.spark.sql.execution.SparkStrategies$InMemoryScans$.apply(SparkStrategies.scala:302)
 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:62)
 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:62)
 scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434) 
scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) 
scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) 
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92) 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
 
scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
 
scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
 scala.collection.Iterator$class.foreach(Iterator.scala:893) 
scala.collection.AbstractIterator.foreach(Iterator.scala:1336) 
scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157) 
scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336) 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
 scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434) 
scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) 
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92) 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:77)
 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:74)
 
scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
 
scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
 scala.collection.Iterator$class.foreach(Iterator.scala:893) 
scala.collection.AbstractIterator.foreach(Iterator.scala:1336) 
scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157) 
scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336) 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:74)
 
org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:66)
 scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434) 
scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) 
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:92) 
org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:79)
 
org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:75)
 
org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:84)
 
org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:84)
 org.apache.spark.sql.Dataset.withCallback(Dataset.scala:2774) 
org.apache.spark.sql.Dataset.count(Dataset.scala:2404) 
mypackage.Selection(Selection.scala:34) 
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to