This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c770daa9d46 [SPARK-39079][SQL] Forbid dot in V2 catalog name c770daa9d46 is described below commit c770daa9d468cf2c3cda117904478996cd88ebb9 Author: Cheng Pan <cheng...@apache.org> AuthorDate: Wed May 11 23:42:39 2022 +0800 [SPARK-39079][SQL] Forbid dot in V2 catalog name ### What changes were proposed in this pull request? Forbid dot in V2 catalog name. ### Why are the changes needed? In the following configuration, we define 2 catalogs `test`, `test.cat`. ``` spark.sql.catalog.test=CatalogTestImpl spark.sql.catalog.test.k1=v1 spark.sql.catalog.test.cat.k1=CatalogTestImpl spark.sql.catalog.test.cat.k1=v1 ``` In the current implementation, three keys will be treated to `test`'s configuration, it's a little bit weird. ``` k1=v1 cat.k1=CatalogTestImpl cat.k1=v1 ``` Besides, the current implementation of `SHOW CATALOGS` use lazy load strategy, it's not friendly for GUI client like DBeaver to display available catalog list, w/o this restriction, it is hard to find the catalog key in configuration to load catalogs eagerly. ### Does this PR introduce _any_ user-facing change? Yes, previously Spark allows the catalog name contain `.` but not after this change. ### How was this patch tested? New UT. Closes #36418 from pan3793/catalog. Authored-by: Cheng Pan <cheng...@apache.org> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/connector/catalog/Catalogs.scala | 11 ++++++++--- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 4 ++++ .../spark/sql/connector/catalog/CatalogLoadingSuite.java | 11 +++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala index 9949f45d483..16983d239d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.connector.catalog import java.lang.reflect.InvocationTargetException import java.util -import java.util.NoSuchElementException import java.util.regex.Pattern import org.apache.spark.SparkException @@ -39,13 +38,19 @@ private[sql] object Catalogs { * @param conf a SQLConf * @return an initialized CatalogPlugin * @throws CatalogNotFoundException if the plugin class cannot be found - * @throws org.apache.spark.SparkException if the plugin class cannot be instantiated + * @throws org.apache.spark.SparkException if the plugin class cannot be instantiated */ @throws[CatalogNotFoundException] @throws[SparkException] def load(name: String, conf: SQLConf): CatalogPlugin = { val pluginClassName = try { - conf.getConfString("spark.sql.catalog." + name) + val _pluginClassName = conf.getConfString(s"spark.sql.catalog.$name") + // SPARK-39079 do configuration check first, otherwise some path-based table like + // `org.apache.spark.sql.json`.`/path/json_file` may fail on analyze phase + if (name.contains(".")) { + throw QueryExecutionErrors.invalidCatalogNameError(name) + } + _pluginClassName } catch { case _: NoSuchElementException => throw QueryExecutionErrors.catalogPluginClassNotFoundError(name) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 53d32927cee..f9d3854fc5e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1532,6 +1532,10 @@ object QueryExecutionErrors extends QueryErrorsBase { new UnsupportedOperationException(s"Invalid output mode: $outputMode") } + def invalidCatalogNameError(name: String): Throwable = { + new SparkException(s"Invalid catalog name: $name") + } + def catalogPluginClassNotFoundError(name: String): Throwable = { new CatalogNotFoundException( s"Catalog '$name' plugin class not found: spark.sql.catalog.$name is not defined") diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java index 369e2fcaf1a..88ec8b6fc07 100644 --- a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java +++ b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java @@ -39,6 +39,17 @@ public class CatalogLoadingSuite { Assert.assertEquals("Catalog should have correct name", "test-name", testPlugin.name()); } + @Test + public void testIllegalCatalogName() { + SQLConf conf = new SQLConf(); + conf.setConfString("spark.sql.catalog.test.name", TestCatalogPlugin.class.getCanonicalName()); + + SparkException exc = Assert.assertThrows(SparkException.class, + () -> Catalogs.load("test.name", conf)); + Assert.assertTrue("Catalog name should not contain '.'", exc.getMessage().contains( + "Invalid catalog name: test.name")); + } + @Test public void testInitializationOptions() throws SparkException { SQLConf conf = new SQLConf(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org