This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c770daa9d46 [SPARK-39079][SQL] Forbid dot in V2 catalog name
c770daa9d46 is described below

commit c770daa9d468cf2c3cda117904478996cd88ebb9
Author: Cheng Pan <cheng...@apache.org>
AuthorDate: Wed May 11 23:42:39 2022 +0800

    [SPARK-39079][SQL] Forbid dot in V2 catalog name
    
    ### What changes were proposed in this pull request?
    
    Forbid dot in V2 catalog name.
    
    ### Why are the changes needed?
    
    In the following configuration, we define 2 catalogs `test`, `test.cat`.
    ```
    spark.sql.catalog.test=CatalogTestImpl
    spark.sql.catalog.test.k1=v1
    spark.sql.catalog.test.cat.k1=CatalogTestImpl
    spark.sql.catalog.test.cat.k1=v1
    ```
    
    In the current implementation, three keys will be treated to `test`'s 
configuration, it's a little bit weird.
    ```
    k1=v1
    cat.k1=CatalogTestImpl
    cat.k1=v1
    ```
    
    Besides, the current implementation of `SHOW CATALOGS` use lazy load 
strategy, it's not friendly for GUI client like DBeaver to display available 
catalog list, w/o this restriction, it is hard to find the catalog key in 
configuration to load catalogs eagerly.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, previously Spark allows the catalog name contain `.` but not after 
this change.
    
    ### How was this patch tested?
    
    New UT.
    
    Closes #36418 from pan3793/catalog.
    
    Authored-by: Cheng Pan <cheng...@apache.org>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../org/apache/spark/sql/connector/catalog/Catalogs.scala     | 11 ++++++++---
 .../org/apache/spark/sql/errors/QueryExecutionErrors.scala    |  4 ++++
 .../spark/sql/connector/catalog/CatalogLoadingSuite.java      | 11 +++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
index 9949f45d483..16983d239d1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.connector.catalog
 
 import java.lang.reflect.InvocationTargetException
 import java.util
-import java.util.NoSuchElementException
 import java.util.regex.Pattern
 
 import org.apache.spark.SparkException
@@ -39,13 +38,19 @@ private[sql] object Catalogs {
    * @param conf a SQLConf
    * @return an initialized CatalogPlugin
    * @throws CatalogNotFoundException if the plugin class cannot be found
-   * @throws org.apache.spark.SparkException           if the plugin class 
cannot be instantiated
+   * @throws org.apache.spark.SparkException if the plugin class cannot be 
instantiated
    */
   @throws[CatalogNotFoundException]
   @throws[SparkException]
   def load(name: String, conf: SQLConf): CatalogPlugin = {
     val pluginClassName = try {
-      conf.getConfString("spark.sql.catalog." + name)
+      val _pluginClassName = conf.getConfString(s"spark.sql.catalog.$name")
+      // SPARK-39079 do configuration check first, otherwise some path-based 
table like
+      // `org.apache.spark.sql.json`.`/path/json_file` may fail on analyze 
phase
+      if (name.contains(".")) {
+        throw QueryExecutionErrors.invalidCatalogNameError(name)
+      }
+      _pluginClassName
     } catch {
       case _: NoSuchElementException =>
         throw QueryExecutionErrors.catalogPluginClassNotFoundError(name)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 53d32927cee..f9d3854fc5e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1532,6 +1532,10 @@ object QueryExecutionErrors extends QueryErrorsBase {
     new UnsupportedOperationException(s"Invalid output mode: $outputMode")
   }
 
+  def invalidCatalogNameError(name: String): Throwable = {
+    new SparkException(s"Invalid catalog name: $name")
+  }
+
   def catalogPluginClassNotFoundError(name: String): Throwable = {
     new CatalogNotFoundException(
       s"Catalog '$name' plugin class not found: spark.sql.catalog.$name is not 
defined")
diff --git 
a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
 
b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
index 369e2fcaf1a..88ec8b6fc07 100644
--- 
a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
+++ 
b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
@@ -39,6 +39,17 @@ public class CatalogLoadingSuite {
     Assert.assertEquals("Catalog should have correct name", "test-name", 
testPlugin.name());
   }
 
+  @Test
+  public void testIllegalCatalogName() {
+    SQLConf conf = new SQLConf();
+    conf.setConfString("spark.sql.catalog.test.name", 
TestCatalogPlugin.class.getCanonicalName());
+
+    SparkException exc = Assert.assertThrows(SparkException.class,
+            () -> Catalogs.load("test.name", conf));
+    Assert.assertTrue("Catalog name should not contain '.'", 
exc.getMessage().contains(
+            "Invalid catalog name: test.name"));
+  }
+
   @Test
   public void testInitializationOptions() throws SparkException {
     SQLConf conf = new SQLConf();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to