uros-b commented on code in PR #56850:
URL: https://github.com/apache/spark/pull/56850#discussion_r3492553525
##########
sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala:
##########
@@ -683,6 +683,56 @@ class InsertSuite extends QueryTest with TestHiveSingleton
with BeforeAndAfter {
}
}
+ test("SPARK-57556: TIME type is unsupported when writing to a Hive serde
directory") {
+ // Disable native data source conversion so that the write goes through
the Hive serde
+ // path (HiveFileFormat) instead of a native data source that may support
TIME.
+ withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") {
+ withTempDir { dir =>
+ // InsertIntoHiveDirCommand wraps the failure in a SparkException, so
assert on the cause.
+ val e = intercept[SparkException] {
+ sql(
+ s"""
+ |INSERT OVERWRITE LOCAL DIRECTORY '${dir.toURI.getPath}'
+ |STORED AS PARQUET
+ |SELECT TIME'12:01:02' AS c
+ """.stripMargin)
+ }
+ checkError(
+ exception = e.getCause.asInstanceOf[AnalysisException],
+ condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+ parameters = Map(
+ "columnName" -> "`c`",
+ "columnType" -> s"\"${TimeType().sql}\"",
+ "format" -> "Hive"))
+ }
+ }
+ }
+
+ test("SPARK-57556: nested TIME type is unsupported when writing to a Hive
serde directory") {
+ // Exercises HiveFileFormat.supportDataType's recursion into nested types:
a TIME nested inside
+ // an array must also be rejected, with the full (array) column type
reported.
+ withSQLConf(HiveUtils.CONVERT_METASTORE_INSERT_DIR.key -> "false") {
+ withTempDir { dir =>
+ // InsertIntoHiveDirCommand wraps the failure in a SparkException, so
assert on the cause.
+ val e = intercept[SparkException] {
+ sql(
+ s"""
+ |INSERT OVERWRITE LOCAL DIRECTORY '${dir.toURI.getPath}'
+ |STORED AS PARQUET
+ |SELECT array(TIME'12:01:02') AS c
+ """.stripMargin)
+ }
+ checkError(
+ exception = e.getCause.asInstanceOf[AnalysisException],
+ condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+ parameters = Map(
+ "columnName" -> "`c`",
+ "columnType" -> s"\"${ArrayType(TimeType()).sql}\"",
+ "format" -> "Hive"))
+ }
+ }
+ }
+
Review Comment:
Nit: how about a more direct table-write case, e.g. `INSERT INTO <metastore
Hive serde table>`?
##########
docs/sql-ref-datatypes.md:
##########
@@ -48,6 +48,7 @@ Spark SQL and DataFrames support the following data types:
time-zone.
- `TimeType(precision)`: Represents values comprising values of fields hour,
minute and second with the number of decimal digits `precision` following the
decimal point in the seconds field, without a time-zone.
The range of values is from `00:00:00` to `23:59:59` for min precision `0`,
and to `23:59:59.999999999` for max precision `9`. The default precision is `6`.
+ - Note: Apache Hive has no TIME type, so `TimeType` is not supported in
Hive SerDe interop. Storing it in a Hive SerDe table (including `INSERT
OVERWRITE DIRECTORY ... STORED AS`) or passing it to a Hive UDF/UDAF/UDTF
raises an error rather than silently converting the value.
Review Comment:
```suggestion
- Note: Apache Hive has no TIME type, so `TimeType` is not supported in
Hive SerDe interop. Storing it in a Hive SerDe table (including `INSERT
OVERWRITE DIRECTORY ... STORED AS`) or passing it to a Hive UDF/UDAF/UDTF
raises an error rather than silently converting the value.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]