[
https://issues.apache.org/jira/browse/SPARK-6245?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14355843#comment-14355843
]
Matthew Farrellee commented on SPARK-6245:
------------------------------------------
this is an issue for the scala interface as well.
{code}
scala> val qsc = new org.apache.spark.sql.SQLContext(sc)
qsc: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@36c77da5
scala> qsc.jsonRDD(sc.parallelize(List()))
java.lang.UnsupportedOperationException: empty collection
at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:886)
at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:886)
at scala.Option.getOrElse(Option.scala:120)
at org.apache.spark.rdd.RDD.reduce(RDD.scala:886)
at org.apache.spark.sql.json.JsonRDD$.inferSchema(JsonRDD.scala:57)
at org.apache.spark.sql.SQLContext.jsonRDD(SQLContext.scala:232)
at org.apache.spark.sql.SQLContext.jsonRDD(SQLContext.scala:204)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:15)
at $iwC$$iwC$$iwC.<init>(<console>:20)
at $iwC$$iwC.<init>(<console>:22)
at $iwC.<init>(<console>:24)
at <init>(<console>:26)
at .<init>(<console>:30)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)
at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)
at
org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)
at
org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:828)
at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:873)
at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:785)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:628)
at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:636)
at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:641)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:968)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:916)
at
org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:916)
at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:916)
at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1011)
at org.apache.spark.repl.Main$.main(Main.scala:31)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:358)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
{code}
{{org.apache.spark.sql.json.JsonRDD$.inferSchema(JsonRDD.scala:57)}} is surely
the guilty party
> jsonRDD() of empty RDD results in exception
> -------------------------------------------
>
> Key: SPARK-6245
> URL: https://issues.apache.org/jira/browse/SPARK-6245
> Project: Spark
> Issue Type: Bug
> Components: PySpark, SQL, Streaming
> Affects Versions: 1.2.1
> Reporter: Matthew Farrellee
>
> converting an empty RDD to a JSON RDD results in an exception. this case is
> common when using spark streaming.
> {code}
> from pyspark import SparkContext
> from pyspark.sql import SQLContext
> sc = SparkContext()
> qsc = SQLContext(sc)
> qsc.jsonRDD(sc.parallelize([]))
> {code}
> exception:
> {noformat}
> Traceback (most recent call last):
>
> File "/tmp/bug.py", line 5, in <module>
> qsc.jsonRDD(sc.parallelize([]))
> File "/usr/share/spark/python/pyspark/sql.py", line 1605, in jsonRDD
> srdd = self._ssql_ctx.jsonRDD(jrdd.rdd(), samplingRatio)
> File
> "/usr/share/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", line
> 538, in __call__
> File "/usr/share/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py",
> line 300, in get_return_value
> py4j.protocol.Py4JJavaError: An error occurred while calling o27.jsonRDD.
> : java.lang.UnsupportedOperationException: empty collection
> at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:886)
> at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:886)
> at scala.Option.getOrElse(Option.scala:120)
> at org.apache.spark.rdd.RDD.reduce(RDD.scala:886)
> at org.apache.spark.sql.json.JsonRDD$.inferSchema(JsonRDD.scala:57)
> at org.apache.spark.sql.SQLContext.jsonRDD(SQLContext.scala:232)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:606)
> at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
> at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
> at py4j.Gateway.invoke(Gateway.java:259)
> at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
> at py4j.commands.CallCommand.execute(CallCommand.java:79)
> at py4j.GatewayConnection.run(GatewayConnection.java:207)
> at java.lang.Thread.run(Thread.java:745)
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]