Repository: spark Updated Branches: refs/heads/branch-2.0 5487fa0b8 -> eec03718d
[SPARK-16216][SQL][FOLLOWUP][BRANCH-2.0] Bacoport enabling timestamp type tests for JSON and verify all unsupported types in CSV ## What changes were proposed in this pull request? This backports https://github.com/apache/spark/pull/14829 ## How was this patch tested? Tests in `JsonHadoopFsRelation` and `CSVSuite`. Author: hyukjinkwon <[email protected]> Closes #14840 from HyukjinKwon/SPARK-16216-followup-backport. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eec03718 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eec03718 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eec03718 Branch: refs/heads/branch-2.0 Commit: eec03718db7e412f466ea72f3d9c2682915080bc Parents: 5487fa0 Author: hyukjinkwon <[email protected]> Authored: Sun Aug 28 16:30:33 2016 +0200 Committer: Herman van Hovell <[email protected]> Committed: Sun Aug 28 16:30:33 2016 +0200 ---------------------------------------------------------------------- .../datasources/csv/CSVFileFormat.scala | 19 ++++++++++++------- .../datasources/csv/CSVInferSchema.scala | 1 + .../sql/execution/datasources/csv/CSVSuite.scala | 15 ++++++++++++++- .../sql/sources/JsonHadoopFsRelationSuite.scala | 4 ---- 4 files changed, 27 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/eec03718/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala index 12e19f9..4a60f51 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala @@ -180,13 +180,18 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister { } private def verifySchema(schema: StructType): Unit = { - schema.foreach { field => - field.dataType match { - case _: ArrayType | _: MapType | _: StructType => - throw new UnsupportedOperationException( - s"CSV data source does not support ${field.dataType.simpleString} data type.") - case _ => - } + def verifyType(dataType: DataType): Unit = dataType match { + case ByteType | ShortType | IntegerType | LongType | FloatType | + DoubleType | BooleanType | _: DecimalType | TimestampType | + DateType | StringType => + + case udt: UserDefinedType[_] => verifyType(udt.sqlType) + + case _ => + throw new UnsupportedOperationException( + s"CSV data source does not support ${dataType.simpleString} data type.") } + + schema.foreach(field => verifyType(field.dataType)) } } http://git-wip-us.apache.org/repos/asf/spark/blob/eec03718/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala index f1b4c11..1ca6eff 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala @@ -290,6 +290,7 @@ private[csv] object CSVTypeCast { DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(datum).getTime) } case _: StringType => UTF8String.fromString(datum) + case udt: UserDefinedType[_] => castTo(datum, udt.sqlType, nullable, options) case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") } } http://git-wip-us.apache.org/repos/asf/spark/blob/eec03718/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index f68d220..1930862 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -27,7 +27,7 @@ import org.apache.hadoop.io.SequenceFile.CompressionType import org.apache.hadoop.io.compress.GzipCodec import org.apache.spark.SparkException -import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.{DataFrame, QueryTest, Row, UDT} import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils} import org.apache.spark.sql.types._ @@ -680,6 +680,19 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils { Seq((1, Array("Tesla", "Chevy", "Ford"))).toDF("id", "brands").write.csv(csvDir) }.getMessage assert(msg.contains("CSV data source does not support array<string> data type")) + + msg = intercept[UnsupportedOperationException] { + Seq((1, new UDT.MyDenseVector(Array(0.25, 2.25, 4.25)))).toDF("id", "vectors") + .write.csv(csvDir) + }.getMessage + assert(msg.contains("CSV data source does not support array<double> data type")) + + msg = intercept[SparkException] { + val schema = StructType(StructField("a", new UDT.MyDenseVectorUDT(), true) :: Nil) + spark.range(1).write.csv(csvDir) + spark.read.schema(schema).csv(csvDir).collect() + }.getCause.getMessage + assert(msg.contains("Unsupported type: array")) } } http://git-wip-us.apache.org/repos/asf/spark/blob/eec03718/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala index 52486b1..d79edee 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/JsonHadoopFsRelationSuite.scala @@ -32,10 +32,6 @@ class JsonHadoopFsRelationSuite extends HadoopFsRelationTest { override protected def supportsDataType(dataType: DataType): Boolean = dataType match { case _: NullType => false case _: BinaryType => false - // `TimestampType` is disabled because `DatatypeConverter.parseDateTime()` - // in `DateTimeUtils` parses the formatted string wrongly when the date is - // too early. (e.g. "1600-07-13T08:36:32.847"). - case _: TimestampType => false case _: CalendarIntervalType => false case _ => true } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
