Repository: spark
Updated Branches:
  refs/heads/branch-1.5 4d54ba896 -> e7e0540eb


[SPARK-12477][SQL] - Tungsten projection fails for null values in array fields

Accessing null elements in an array field fails when tungsten is enabled.
It works in Spark 1.3.1, and in Spark > 1.5 with Tungsten disabled.

This PR solves this by checking if the accessed element in the array field is 
null, in the generated code.

Example:
```
// Array of String
case class AS( as: Seq[String] )
val dfAS = sc.parallelize( Seq( AS ( Seq("a",null,"b") ) ) ).toDF
dfAS.registerTempTable("T_AS")
for (i <- 0 to 2) { println(i + " = " + sqlContext.sql(s"select as[$i] from 
T_AS").collect.mkString(","))}
```

With Tungsten disabled:
```
0 = [a]
1 = [null]
2 = [b]
```

With Tungsten enabled:
```
0 = [a]
15/12/22 09:32:50 ERROR Executor: Exception in task 7.0 in stage 1.0 (TID 15)
java.lang.NullPointerException
        at 
org.apache.spark.sql.catalyst.expressions.UnsafeRowWriters$UTF8StringWriter.getSize(UnsafeRowWriters.java:90)
        at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown
 Source)
        at 
org.apache.spark.sql.execution.TungstenProject$$anonfun$3$$anonfun$apply$3.apply(basicOperators.scala:90)
        at 
org.apache.spark.sql.execution.TungstenProject$$anonfun$3$$anonfun$apply$3.apply(basicOperators.scala:88)
        at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
        at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
        at scala.collection.Iterator$class.foreach(Iterator.scala:727)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
```

Author: pierre-borckmans <[email protected]>

Closes #10429 from 
pierre-borckmans/SPARK-12477_Tungsten-Projection-Null-Element-In-Array.

(cherry picked from commit 43b2a6390087b7ce262a54dc8ab8dd825db62e21)
Signed-off-by: Reynold Xin <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e7e0540e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e7e0540e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e7e0540e

Branch: refs/heads/branch-1.5
Commit: e7e0540ebdc30275fed562196436e2353c74bb91
Parents: 4d54ba8
Author: pierre-borckmans <[email protected]>
Authored: Tue Dec 22 23:00:42 2015 -0800
Committer: Reynold Xin <[email protected]>
Committed: Tue Dec 22 23:01:03 2015 -0800

----------------------------------------------------------------------
 .../sql/catalyst/expressions/complexTypeExtractors.scala    | 2 +-
 .../org/apache/spark/sql/DataFrameComplexTypeSuite.scala    | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e7e0540e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 9927da2..0adbe20 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -216,7 +216,7 @@ case class GetArrayItem(child: Expression, ordinal: 
Expression)
     nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
       s"""
         final int index = (int) $eval2;
-        if (index >= $eval1.numElements() || index < 0) {
+        if (index >= $eval1.numElements() || index < 0 || 
$eval1.isNullAt(index)) {
           ${ev.isNull} = true;
         } else {
           ${ev.primitive} = ${ctx.getValue(eval1, dataType, "index")};

http://git-wip-us.apache.org/repos/asf/spark/blob/e7e0540e/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
index 3c359dd..06d711f 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
@@ -43,4 +43,13 @@ class DataFrameComplexTypeSuite extends QueryTest with 
SharedSQLContext {
     val df = sqlContext.sparkContext.parallelize(Seq((1, 1))).toDF("a", "b")
     df.select(array($"a").as("s")).select(f(expr("s[0]"))).collect()
   }
+
+  test("SPARK-12477 accessing null element in array field") {
+    val df = sparkContext.parallelize(Seq((Seq("val1", null, "val2"),
+      Seq(Some(1), None, Some(2))))).toDF("s", "i")
+    val nullStringRow = df.selectExpr("s[1]").collect()(0)
+    assert(nullStringRow == org.apache.spark.sql.Row(null))
+    val nullIntRow = df.selectExpr("i[1]").collect()(0)
+    assert(nullIntRow == org.apache.spark.sql.Row(null))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to