[ 
https://issues.apache.org/jira/browse/SPARK-10304?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Yin Huai updated SPARK-10304:
-----------------------------
    Description: 
I have a dir structure like {{/path/table1/partition_column=1/}}. When I try to 
use {{load("/path/")}}, it works and I get a DF. When I query this DF, if it is 
stored as ORC, there will be the following NPE. But, if it is Parquet, we even 
can return rows. We should complain to users about the dir struct because 
{{table1}} does not meet our format.

{code}
org.apache.spark.SparkException: Job aborted due to stage failure: Task 26 in 
stage 57.0 failed 4 times, most recent failure: Lost task 26.3 in stage 57.0 
(TID 3504, 10.0.195.227): java.lang.NullPointerException
at 
org.apache.spark.sql.hive.HiveInspectors$class.unwrapperFor(HiveInspectors.scala:466)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan.unwrapperFor(OrcRelation.scala:224)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1$$anonfun$9.apply(OrcRelation.scala:261)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1$$anonfun$9.apply(OrcRelation.scala:261)
        at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        at scala.collection.immutable.List.foreach(List.scala:318)
        at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
        at scala.collection.AbstractTraversable.map(Traversable.scala:105)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1.apply(OrcRelation.scala:261)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1.apply(OrcRelation.scala:256)
        at scala.Option.map(Option.scala:145)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan.org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject(OrcRelation.scala:256)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$execute$3.apply(OrcRelation.scala:318)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$execute$3.apply(OrcRelation.scala:316)
        at 
org.apache.spark.rdd.HadoopRDD$HadoopMapPartitionsWithSplitRDD.compute(HadoopRDD.scala:380)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
{code}

  was:
{code}
org.apache.spark.SparkException: Job aborted due to stage failure: Task 26 in 
stage 57.0 failed 4 times, most recent failure: Lost task 26.3 in stage 57.0 
(TID 3504, 10.0.195.227): java.lang.NullPointerException
at 
org.apache.spark.sql.hive.HiveInspectors$class.unwrapperFor(HiveInspectors.scala:466)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan.unwrapperFor(OrcRelation.scala:224)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1$$anonfun$9.apply(OrcRelation.scala:261)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1$$anonfun$9.apply(OrcRelation.scala:261)
        at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        at scala.collection.immutable.List.foreach(List.scala:318)
        at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
        at scala.collection.AbstractTraversable.map(Traversable.scala:105)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1.apply(OrcRelation.scala:261)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1.apply(OrcRelation.scala:256)
        at scala.Option.map(Option.scala:145)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan.org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject(OrcRelation.scala:256)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$execute$3.apply(OrcRelation.scala:318)
        at 
org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$execute$3.apply(OrcRelation.scala:316)
        at 
org.apache.spark.rdd.HadoopRDD$HadoopMapPartitionsWithSplitRDD.compute(HadoopRDD.scala:380)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
        at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
        at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
        at 
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
{code}


> Partition discovery does not throw an exception if the dir structure is valid
> -----------------------------------------------------------------------------
>
>                 Key: SPARK-10304
>                 URL: https://issues.apache.org/jira/browse/SPARK-10304
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>            Reporter: Yin Huai
>            Assignee: Zhan Zhang
>            Priority: Critical
>
> I have a dir structure like {{/path/table1/partition_column=1/}}. When I try 
> to use {{load("/path/")}}, it works and I get a DF. When I query this DF, if 
> it is stored as ORC, there will be the following NPE. But, if it is Parquet, 
> we even can return rows. We should complain to users about the dir struct 
> because {{table1}} does not meet our format.
> {code}
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 26 in 
> stage 57.0 failed 4 times, most recent failure: Lost task 26.3 in stage 57.0 
> (TID 3504, 10.0.195.227): java.lang.NullPointerException
> at 
> org.apache.spark.sql.hive.HiveInspectors$class.unwrapperFor(HiveInspectors.scala:466)
>       at 
> org.apache.spark.sql.hive.orc.OrcTableScan.unwrapperFor(OrcRelation.scala:224)
>       at 
> org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1$$anonfun$9.apply(OrcRelation.scala:261)
>       at 
> org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1$$anonfun$9.apply(OrcRelation.scala:261)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>       at scala.collection.immutable.List.foreach(List.scala:318)
>       at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>       at scala.collection.AbstractTraversable.map(Traversable.scala:105)
>       at 
> org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1.apply(OrcRelation.scala:261)
>       at 
> org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject$1.apply(OrcRelation.scala:256)
>       at scala.Option.map(Option.scala:145)
>       at 
> org.apache.spark.sql.hive.orc.OrcTableScan.org$apache$spark$sql$hive$orc$OrcTableScan$$fillObject(OrcRelation.scala:256)
>       at 
> org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$execute$3.apply(OrcRelation.scala:318)
>       at 
> org.apache.spark.sql.hive.orc.OrcTableScan$$anonfun$execute$3.apply(OrcRelation.scala:316)
>       at 
> org.apache.spark.rdd.HadoopRDD$HadoopMapPartitionsWithSplitRDD.compute(HadoopRDD.scala:380)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
>       at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
>       at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
>       at 
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to