yaooqinn commented on code in PR #45180:
URL: https://github.com/apache/spark/pull/45180#discussion_r1498524228
##########
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala:
##########
@@ -2876,22 +2876,38 @@ class HiveDDLSuite
}
}
- test("SPARK-24681 checks if nested column names do not include ',', ':', and
';'") {
- Seq("nested,column", "nested:column", "nested;column").foreach {
nestedColumnName =>
+ test("SPARK-47101 checks if nested column names do not include invalid
characters") {
+ // delimiter characters
+ Seq(",", ":").foreach { c =>
+ val typ = s"array<struct<`abc${c}xyz`:int>>"
+ val replaced = typ.replaceAll("`",
"").replaceAll("(?<=struct<|,)([^,<:]+)(?=:)", "`$1`")
+ withTable("t") {
+ checkError(
+ exception = intercept[SparkException] {
+ sql(s"CREATE TABLE t (a $typ) USING hive")
+ },
+ errorClass = "CANNOT_RECOGNIZE_HIVE_TYPE",
+ parameters = Map(
+ "fieldType" -> toSQLType(replaced),
+ "fieldName" -> "`a`")
+ )
+ }
+ }
+ // other special characters
+ Seq(";", "^", "\\", "/", "%").foreach { c =>
+ val typ = s"array<struct<`abc${c}xyz`:int>>"
+ val replaced = typ.replaceAll("`", "")
+ val msg = s"java.lang.IllegalArgumentException: Error: : expected at the
position " +
+ s"16 of '$replaced' but '$c' is found."
withTable("t") {
checkError(
exception = intercept[AnalysisException] {
- spark.range(1)
- .select(struct(lit(0).as(nestedColumnName)).as("toplevel"))
- .write
- .format("hive")
- .saveAsTable("t")
+ sql(s"CREATE TABLE t (a $typ) USING hive")
},
- errorClass = "INVALID_HIVE_COLUMN_NAME",
+ errorClass = "_LEGACY_ERROR_TEMP_3065",
Review Comment:
INVALID_HIVE_COLUMN_NAME is not necessary anymore. 1) the restrictions for
column names have been removed in this PR. 2) Nested field names belong to the
data type part. For these two reasons, INVALID_HIVE_COLUMN_NAME could be
removed.
_LEGACY_ERROR_TEMP_3065 is thrown by
`org.apache.spark.sql.hive.HiveExternalCatalog#withClient`. It's hard to
distinguish one Hive error from another for metastore API calls.
##########
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala:
##########
@@ -3385,24 +3401,11 @@ class HiveDDLSuite
}
}
- test("SPARK-44911: Create the table with invalid column") {
+ test("SPARK-47101: comma is allowed in column name") {
val tbl = "t1"
withTable(tbl) {
- val e = intercept[AnalysisException] {
- sql(
- s"""
- |CREATE TABLE t1
- |STORED AS parquet
- |SELECT id, DATE'2018-01-01' + MAKE_DT_INTERVAL(0, id) FROM
RANGE(0, 10)
- """.stripMargin)
- }
- checkError(e,
- errorClass = "INVALID_HIVE_COLUMN_NAME",
- parameters = Map(
- "invalidChars" -> "','",
- "tableName" -> "`spark_catalog`.`default`.`t1`",
- "columnName" -> "`DATE '2018-01-01' + make_dt_interval(0, id, 0,
0`.`000000)`")
- )
+ sql("CREATE TABLE t1 STORED AS parquet SELECT id as `a,b` FROM range(1)")
Review Comment:
Hi @dongjoon-hyun, this is changed via request from @cloud-fan
https://github.com/apache/spark/pull/45180#discussion_r1497399190
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]