cloud-fan commented on code in PR #45180:
URL: https://github.com/apache/spark/pull/45180#discussion_r1497068367


##########
sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala:
##########
@@ -151,45 +152,35 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
   }
 
   /**
-   * Checks the validity of data column names. Hive metastore disallows the 
table to use some
-   * special characters (',', ':', and ';') in data column names, including 
nested column names.
-   * Partition columns do not have such a restriction. Views do not have such 
a restriction.
+   * According to the Hive Document:
+   *   > In Hive 0.13 and later, column names can contain any Unicode 
character (see HIVE-6013),
+   *     however, dot (.) and colon (:) yield errors on querying, so they are 
disallowed in
+   *     Hive 1.2.0 (see HIVE-10120). Any column name that is specified within 
backticks (`) is
+   *     treated literally. Within a backtick string, use double backticks 
(``) to represent
+   *     a backtick character. Backtick quotation also enables the use of 
reserved keywords
+   *     for table and column identifiers
+   * In addition, Spark SQL doesn't rely on Hive analysis for column 
resolution, so dot (.) and
+   * colon (:) yield no errors on querying.
+   *
+   * To sum up, we do not need to check top level column names, and check the 
nested types that
+   * contain a 'name' field in it.
+   *
    */
   private def verifyDataSchema(
       tableName: TableIdentifier, tableType: CatalogTableType, dataSchema: 
StructType): Unit = {
     if (tableType != VIEW) {
-      val invalidChars = Seq(",", ":", ";")
-      def verifyNestedColumnNames(schema: StructType): Unit = schema.foreach { 
f =>
-        f.dataType match {
-          case st: StructType => verifyNestedColumnNames(st)
-          case _ if invalidChars.exists(f.name.contains) =>
-            val invalidCharsString = invalidChars.map(c => 
s"'$c'").mkString(", ")
-            throw new AnalysisException(
-              errorClass = "INVALID_HIVE_COLUMN_NAME",
-              messageParameters = Map(
-                "invalidChars" -> invalidCharsString,
-                "tableName" -> toSQLId(tableName.nameParts),
-                "columnName" -> toSQLId(f.name)
-              ))
-          case _ =>
-        }
-      }
-
       dataSchema.foreach { f =>
-        f.dataType match {
-          // Checks top-level column names
-          case _ if f.name.contains(",") =>
+        try {
+          TypeInfoUtils.getTypeInfoFromTypeString(f.dataType.catalogString)

Review Comment:
   what does it do? I can't find it in the previous code?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to