spark git commit: [SPARK-24571][SQL] Support Char literals

lixiao Wed, 20 Jun 2018 23:38:52 -0700

Repository: spark
Updated Branches:
  refs/heads/master 9de11d3f9 -> 54fcaafb0



[SPARK-24571][SQL] Support Char literals

## What changes were proposed in this pull request?

In the PR, I propose to automatically convert a `Literal` with `Char` type to a 
`Literal` of `String` type. Currently, the following code:
```scala
val df = Seq("Amsterdam", "San Francisco", "London").toDF("city")
df.where($"city".contains('o')).show(false)
```
fails with the exception:
```
Unsupported literal type class java.lang.Character o
java.lang.RuntimeException: Unsupported literal type class java.lang.Character o
at org.apache.spark.sql.catalyst.expressions.Literal$.apply(literals.scala:78)
```
The PR fixes this issue by converting `char` to `string` of length `1`. I 
believe it makes sense to does not differentiate `char` and `string(1)` in _a 
unified, multi-language data platform_ like Spark which supports languages like 
Python/R.

Author: Maxim Gekk <maxim.g...@databricks.com>
Author: Maxim Gekk <max.g...@gmail.com>

Closes #21578 from MaxGekk/support-char-literals.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54fcaafb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54fcaafb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54fcaafb

Branch: refs/heads/master
Commit: 54fcaafb094e299f21c18370fddb4a727c88d875
Parents: 9de11d3
Author: Maxim Gekk <maxim.g...@databricks.com>
Authored: Wed Jun 20 23:38:37 2018 -0700
Committer: Xiao Li <gatorsm...@gmail.com>
Committed: Wed Jun 20 23:38:37 2018 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/CatalystTypeConverters.scala   | 1 +
 .../org/apache/spark/sql/catalyst/expressions/literals.scala | 1 +
 .../spark/sql/catalyst/CatalystTypeConvertersSuite.scala     | 8 ++++++++
 .../sql/catalyst/expressions/LiteralExpressionSuite.scala    | 7 +++++++
 .../src/test/scala/org/apache/spark/sql/DatasetSuite.scala   | 8 ++++++++
 5 files changed, 25 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/54fcaafb/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 9e9105a..93df73a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -286,6 +286,7 @@ object CatalystTypeConverters {
     override def toCatalystImpl(scalaValue: Any): UTF8String = scalaValue 
match {
       case str: String => UTF8String.fromString(str)
       case utf8: UTF8String => utf8
+      case chr: Char => UTF8String.fromString(chr.toString)
       case other => throw new IllegalArgumentException(
         s"The value (${other.toString}) of the type 
(${other.getClass.getCanonicalName}) "
           + s"cannot be converted to the string type")

http://git-wip-us.apache.org/repos/asf/spark/blob/54fcaafb/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 246025b..0cc2a33 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -57,6 +57,7 @@ object Literal {
     case b: Byte => Literal(b, ByteType)
     case s: Short => Literal(s, ShortType)
     case s: String => Literal(UTF8String.fromString(s), StringType)
+    case c: Char => Literal(UTF8String.fromString(c.toString), StringType)
     case b: Boolean => Literal(b, BooleanType)
     case d: BigDecimal => Literal(Decimal(d), DecimalType.fromBigDecimal(d))
     case d: JavaBigDecimal =>

http://git-wip-us.apache.org/repos/asf/spark/blob/54fcaafb/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index f99af9b..89452ee 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
 import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class CatalystTypeConvertersSuite extends SparkFunSuite {
 
@@ -139,4 +140,11 @@ class CatalystTypeConvertersSuite extends SparkFunSuite {
     assert(exception.getMessage.contains("The value (0.1) of the type "
       + "(java.lang.Double) cannot be converted to the string type"))
   }
+
+  test("SPARK-24571: convert Char to String") {
+    val chr: Char = 'X'
+    val converter = 
CatalystTypeConverters.createToCatalystConverter(StringType)
+    val expected = UTF8String.fromString("X")
+    assert(converter(chr) === expected)
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/54fcaafb/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index a9e0eb0..86f80fe 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -219,4 +219,11 @@ class LiteralExpressionSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     checkUnsupportedTypeInLiteral(Map("key1" -> 1, "key2" -> 2))
     checkUnsupportedTypeInLiteral(("mike", 29, 1.0))
   }
+
+  test("SPARK-24571: char literals") {
+    checkEvaluation(Literal('X'), "X")
+    checkEvaluation(Literal.create('0'), "0")
+    checkEvaluation(Literal('\u0000'), "\u0000")
+    checkEvaluation(Literal.create('\n'), "\n")
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/54fcaafb/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 093cee9..2d20c50 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1479,6 +1479,14 @@ class DatasetSuite extends QueryTest with 
SharedSQLContext {
     assert(ds1.schema == ds2.schema)
     checkDataset(ds1.select("_2._2"), ds2.select("_2._2").collect(): _*)
   }
+
+  test("SPARK-24571: filtering of string values by char literal") {
+    val df = Seq("Amsterdam", "San Francisco", "X").toDF("city")
+    checkAnswer(df.where('city === 'X'), Seq(Row("X")))
+    checkAnswer(
+      df.where($"city".contains(new java.lang.Character('A'))),
+      Seq(Row("Amsterdam")))
+  }
 }
 
 case class TestDataUnion(x: Int, y: Int, z: Int)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-24571][SQL] Support Char literals

Reply via email to