Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c02bc926d -> 7026eb87e


[SPARK-17160] Properly escape field names in code-generated error messages

This patch addresses a corner-case escaping bug where field names which contain 
special characters were unsafely interpolated into error message string 
literals in generated Java code, leading to compilation errors.

This patch addresses these issues by using `addReferenceObj` to store the error 
messages as string fields rather than inline string constants.

Author: Josh Rosen <joshro...@databricks.com>

Closes #15156 from JoshRosen/SPARK-17160.

(cherry picked from commit e719b1c045ba185d242d21bbfcdee2c84dafc587)
Signed-off-by: Josh Rosen <joshro...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7026eb87
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7026eb87
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7026eb87

Branch: refs/heads/branch-2.0
Commit: 7026eb87e7d7799d2818334a2e191dc46987975f
Parents: c02bc92
Author: Josh Rosen <joshro...@databricks.com>
Authored: Mon Sep 19 20:20:36 2016 -0700
Committer: Josh Rosen <joshro...@databricks.com>
Committed: Mon Sep 19 20:21:25 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/expressions/misc.scala   | 12 +++++++++---
 .../sql/catalyst/expressions/objects/objects.scala     | 12 ++++++++----
 .../sql/catalyst/expressions/CodeGenerationSuite.scala | 13 ++++++++++++-
 3 files changed, 29 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7026eb87/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 3692075..92f8fb8 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -477,10 +477,13 @@ case class PrintToStderr(child: Expression) extends 
UnaryExpression {
 
   protected override def nullSafeEval(input: Any): Any = input
 
+  private val outputPrefix = s"Result of ${child.simpleString} is "
+
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val outputPrefixField = ctx.addReferenceObj("outputPrefix", outputPrefix)
     nullSafeCodeGen(ctx, ev, c =>
       s"""
-         | System.err.println("Result of ${child.simpleString} is " + $c);
+         | System.err.println($outputPrefixField + $c);
          | ${ev.value} = $c;
        """.stripMargin)
   }
@@ -501,10 +504,12 @@ case class AssertTrue(child: Expression) extends 
UnaryExpression with ImplicitCa
 
   override def prettyName: String = "assert_true"
 
+  private val errMsg = s"'${child.simpleString}' is not true!"
+
   override def eval(input: InternalRow) : Any = {
     val v = child.eval(input)
     if (v == null || java.lang.Boolean.FALSE.equals(v)) {
-      throw new RuntimeException(s"'${child.simpleString}' is not true!")
+      throw new RuntimeException(errMsg)
     } else {
       null
     }
@@ -512,9 +517,10 @@ case class AssertTrue(child: Expression) extends 
UnaryExpression with ImplicitCa
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval = child.genCode(ctx)
+    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
     ExprCode(code = s"""${eval.code}
        |if (${eval.isNull} || !${eval.value}) {
-       |  throw new RuntimeException("'${child.simpleString}' is not true.");
+       |  throw new RuntimeException($errMsgField);
        |}""".stripMargin, isNull = "true", value = "null")
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/7026eb87/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 1cdda53..691edd5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -763,7 +763,10 @@ case class GetExternalRowField(
   override def eval(input: InternalRow): Any =
     throw new UnsupportedOperationException("Only code-generated evaluation is 
supported")
 
+  private val errMsg = s"The ${index}th field '$fieldName' of input row cannot 
be null."
+
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
     val row = child.genCode(ctx)
     val code = s"""
       ${row.code}
@@ -773,8 +776,7 @@ case class GetExternalRowField(
       }
 
       if (${row.value}.isNullAt($index)) {
-        throw new RuntimeException("The ${index}th field '$fieldName' of input 
row " +
-          "cannot be null.");
+        throw new RuntimeException($errMsgField);
       }
 
       final Object ${ev.value} = ${row.value}.get($index);
@@ -799,7 +801,10 @@ case class ValidateExternalType(child: Expression, 
expected: DataType)
   override def eval(input: InternalRow): Any =
     throw new UnsupportedOperationException("Only code-generated evaluation is 
supported")
 
+  private val errMsg = s" is not a valid external type for schema of 
${expected.simpleString}"
+
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
     val input = child.genCode(ctx)
     val obj = input.value
 
@@ -820,8 +825,7 @@ case class ValidateExternalType(child: Expression, 
expected: DataType)
         if ($typeCheck) {
           ${ev.value} = (${ctx.boxedType(dataType)}) $obj;
         } else {
-          throw new RuntimeException($obj.getClass().getName() + " is not a 
valid " +
-            "external type for schema of ${expected.simpleString}");
+          throw new RuntimeException($obj.getClass().getName() + $errMsgField);
         }
       }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/7026eb87/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 0532cf5..45dcfca 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow
+import org.apache.spark.sql.catalyst.expressions.objects.{CreateExternalRow, 
GetExternalRowField, ValidateExternalType}
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -265,4 +265,15 @@ class CodeGenerationSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       Literal.create("\\\\u001/Compilation error occurs", StringType) :: Nil)
 
   }
+
+  test("SPARK-17160: field names are properly escaped by GetExternalRowField") 
{
+    val inputObject = BoundReference(0, ObjectType(classOf[Row]), nullable = 
true)
+    GenerateUnsafeProjection.generate(
+      ValidateExternalType(
+        GetExternalRowField(inputObject, index = 0, fieldName = "\"quote"), 
IntegerType) :: Nil)
+  }
+
+  test("SPARK-17160: field names are properly escaped by AssertTrue") {
+    GenerateUnsafeProjection.generate(AssertTrue(Cast(Literal("\""), 
BooleanType)) :: Nil)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to