Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/21667#discussion_r199694805
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
---
@@ -42,63 +38,27 @@ object DataSourceUtils {
/**
* Verify if the schema is supported in datasource. This verification
should be done
- * in a driver side, e.g., `prepareWrite`, `buildReader`, and
`buildReaderWithPartitionValues`
- * in `FileFormat`.
- *
- * Unsupported data types of csv, json, orc, and parquet are as follows;
- * csv -> R/W: Interval, Null, Array, Map, Struct
- * json -> W: Interval
- * orc -> W: Interval, Null
- * parquet -> R/W: Interval, Null
+ * in a driver side.
*/
private def verifySchema(format: FileFormat, schema: StructType,
isReadPath: Boolean): Unit = {
- def throwUnsupportedException(dataType: DataType): Unit = {
- throw new UnsupportedOperationException(
- s"$format data source does not support ${dataType.simpleString}
data type.")
- }
-
- def verifyType(dataType: DataType): Unit = dataType match {
- case BooleanType | ByteType | ShortType | IntegerType | LongType |
FloatType | DoubleType |
- StringType | BinaryType | DateType | TimestampType | _:
DecimalType =>
-
- // All the unsupported types for CSV
- case _: NullType | _: CalendarIntervalType | _: StructType | _:
ArrayType | _: MapType
- if format.isInstanceOf[CSVFileFormat] =>
- throwUnsupportedException(dataType)
-
- case st: StructType => st.foreach { f => verifyType(f.dataType) }
-
- case ArrayType(elementType, _) => verifyType(elementType)
-
- case MapType(keyType, valueType, _) =>
- verifyType(keyType)
- verifyType(valueType)
-
- case udt: UserDefinedType[_] => verifyType(udt.sqlType)
-
- // Interval type not supported in all the write path
- case _: CalendarIntervalType if !isReadPath =>
- throwUnsupportedException(dataType)
-
- // JSON and ORC don't support an Interval type, but we pass it in
read pass
- // for back-compatibility.
- case _: CalendarIntervalType if format.isInstanceOf[JsonFileFormat]
||
- format.isInstanceOf[OrcFileFormat] =>
+ def verifyType(dataType: DataType): Unit = {
+ if (!format.supportDataType(dataType, isReadPath)) {
+ throw new UnsupportedOperationException(
+ s"$format data source does not support ${dataType.simpleString}
data type.")
+ }
+ dataType match {
--- End diff --
It's tricky to rely on 2 places to correctly determine the unsupported
type. `format.supportDataType` should handle complex types themselves, to make
the code clearer and easier to maintain.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]