Repository: spark
Updated Branches:
  refs/heads/branch-1.3 724aab4b9 -> 684ff2476


SPARK-6245 [SQL] jsonRDD() of empty RDD results in exception

Avoid `UnsupportedOperationException` from JsonRDD.inferSchema on empty RDD.

Not sure if this is supposed to be an error (but a better one), but it seems 
like this case can come up if the input is down-sampled so much that nothing is 
sampled.

Now stuff like this:
```
sqlContext.jsonRDD(sc.parallelize(List[String]()))
```
just results in
```
org.apache.spark.sql.DataFrame = []
```

Author: Sean Owen <so...@cloudera.com>

Closes #4971 from srowen/SPARK-6245 and squashes the following commits:

3699964 [Sean Owen] Set() -> Set.empty
3c619e1 [Sean Owen] Avoid UnsupportedOperationException from 
JsonRDD.inferSchema on empty RDD


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/684ff247
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/684ff247
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/684ff247

Branch: refs/heads/branch-1.3
Commit: 684ff2476e4ef8aa2d39e1385413edb1b9129838
Parents: 724aab4
Author: Sean Owen <so...@cloudera.com>
Authored: Wed Mar 11 14:09:09 2015 +0000
Committer: Sean Owen <so...@cloudera.com>
Committed: Mon Mar 16 19:06:53 2015 +0000

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/sql/json/JsonRDD.scala    | 6 +++++-
 .../src/test/scala/org/apache/spark/sql/json/JsonSuite.scala  | 7 +++++++
 .../test/scala/org/apache/spark/sql/json/TestJsonData.scala   | 3 +++
 3 files changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/684ff247/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index e54a2a3..2b0358c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -48,7 +48,11 @@ private[sql] object JsonRDD extends Logging {
     require(samplingRatio > 0, s"samplingRatio ($samplingRatio) should be 
greater than 0")
     val schemaData = if (samplingRatio > 0.99) json else json.sample(false, 
samplingRatio, 1)
     val allKeys =
-      parseJson(schemaData, 
columnNameOfCorruptRecords).map(allKeysWithValueTypes).reduce(_ ++ _)
+      if (schemaData.isEmpty()) {
+        Set.empty[(String,DataType)]
+      } else {
+        parseJson(schemaData, 
columnNameOfCorruptRecords).map(allKeysWithValueTypes).reduce(_ ++ _)
+      }
     createSchema(allKeys)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/684ff247/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index 0c21f72..320b80d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -1033,4 +1033,11 @@ class JsonSuite extends QueryTest {
     assert(!logicalRelation2.sameResult(logicalRelation3),
       s"$logicalRelation2 and $logicalRelation3 should be considered not 
having the same result.")
   }
+
+  test("SPARK-6245 JsonRDD.inferSchema on empty RDD") {
+    // This is really a test that it doesn't throw an exception
+    val emptySchema = JsonRDD.inferSchema(empty, 1.0, "")
+    assert(StructType(Seq()) === emptySchema)
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/684ff247/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
index 15698f6..47a97a4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
@@ -185,4 +185,7 @@ object TestJsonData {
       """{"a":{, b:3}""" ::
       """{"b":"str_b_4", "a":"str_a_4", "c":"str_c_4"}""" ::
       """]""" :: Nil)
+
+  val empty =
+    TestSQLContext.sparkContext.parallelize(Seq[String]())
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to