This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a35043c [SPARK-27591][SQL] Fix UnivocityParser for UserDefinedType
a35043c is described below
commit a35043c9e22a9bd9e372246c8d337e016736536c
Author: Artem Kalchenko <[email protected]>
AuthorDate: Wed May 1 08:27:51 2019 +0900
[SPARK-27591][SQL] Fix UnivocityParser for UserDefinedType
## What changes were proposed in this pull request?
Fix bug in UnivocityParser. makeConverter method didn't work correctly for
UsedDefinedType
## How was this patch tested?
A test suite for UnivocityParser has been extended.
Closes #24496 from kalkolab/spark-27591.
Authored-by: Artem Kalchenko <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
---
.../spark/sql/catalyst/csv/UnivocityParser.scala | 2 +-
.../sql/catalyst/csv/UnivocityParserSuite.scala | 35 ++++++++++++++++++++++
2 files changed, 36 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index b26044e..8456b7d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -166,7 +166,7 @@ class UnivocityParser(
case _: StringType => (d: String) =>
nullSafeDatum(d, name, nullable, options)(UTF8String.fromString)
- case udt: UserDefinedType[_] => (datum: String) =>
+ case udt: UserDefinedType[_] =>
makeConverter(name, udt.sqlType, nullable)
// We don't actually hit this exception though, we keep it for
understandability
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 986de12..933c576 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -231,4 +231,39 @@ class UnivocityParserSuite extends SparkFunSuite with
SQLHelper {
Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalParsing)
}
+
+ test("SPARK-27591 UserDefinedType can be read") {
+
+ @SQLUserDefinedType(udt = classOf[StringBasedUDT])
+ case class NameId(name: String, id: Int)
+
+ class StringBasedUDT extends UserDefinedType[NameId] {
+ override def sqlType: DataType = StringType
+
+ override def serialize(obj: NameId): Any = s"${obj.name}\t${obj.id}"
+
+ override def deserialize(datum: Any): NameId = datum match {
+ case s: String =>
+ val split = s.split("\t")
+ if (split.length != 2) throw new RuntimeException(s"Can't parse $s
into NameId");
+ NameId(split(0), Integer.parseInt(split(1)))
+ case _ => throw new RuntimeException(s"Can't parse $datum into
NameId");
+ }
+
+ override def userClass: Class[NameId] = classOf[NameId]
+ }
+
+ object StringBasedUDT extends StringBasedUDT
+
+ val input = "name\t42"
+ val expected = UTF8String.fromString(input)
+
+ val options = new CSVOptions(Map.empty[String, String], false, "GMT")
+ val parser = new UnivocityParser(StructType(Seq.empty), options)
+
+ val convertedValue = parser.makeConverter("_1", StringBasedUDT, nullable =
false).apply(input)
+
+ assert(convertedValue.isInstanceOf[UTF8String])
+ assert(convertedValue == expected)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]