This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 52fc0d4c681c [SPARK-54056][SQL] resolve substitution for SQLConf
settings in Catalogs
52fc0d4c681c is described below
commit 52fc0d4c681cbd66acd6bd1ebfc1facaff4c4eca
Author: EugenYushin <[email protected]>
AuthorDate: Tue Nov 4 10:47:13 2025 -0800
[SPARK-54056][SQL] resolve substitution for SQLConf settings in Catalogs
### What changes were proposed in this pull request?
Pass `SQLConf` values through `ConfigReader` bindings, allowing for
`${env:xyz} -> sys.env.get("xyz")` substitution before passing the settings to
`CatalogPlugin` implementations.
### Why are the changes needed?
Settings for custom (table) catalog are not being rendered the same way as
any other `SQLConf` settings. Particular catalog can define its own set of
properties along with custom name for that catalog, e.g.
"spark.sql.catalog.test-name". This prevents from registering these properties
using
[SQLConf.buildConf](https://github.com/apache/spark/blob/54dee4a4b59cf3817f15da46d35f42a81a3b1c07/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala#L111)
machinery, meaning th [...]
This becomes an issue if such props are set in config files like
`spark-defaults.conf` and not through code (where we can access `sys.env`
directly).
```
scala> spark.conf.set("spark.sql.catalog.mssql",
"org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog")
scala> spark.conf.set("spark.sql.catalog.mssql.url", "jdbc:sqlserver://...")
scala> spark.conf.set("spark.sql.catalog.mssql.user", "${env:MSSQL_USER}")
scala> spark.conf.set("spark.sql.catalog.mssql.password",
"${env:MSSQL_PASSWORD}")
scala> spark.sql("show tables in mssql").show()
com.microsoft.sqlserver.jdbc.SQLServerException: Login failed for user
'${env:MSSQL_PASSWORD}'. ClientConnectionId:4f9efa06-90bb-4498-9099-b1d9d3fac935
at
com.microsoft.sqlserver.jdbc.SQLServerException.makeFromDatabaseError(SQLServerException.java:278)
```
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
* unit testing is tricky for this change cause' we need to deal with
`sys.env` or `sys.props`. At the same time, `Catalogs` is object which doesn't
allow us to override `ConfigReader` the same way its done in
[ConfigReaderSuite](https://github.com/apache/spark/blob/b7763a7eae2b9609012cbb4ce981276c6471cc4e/core/src/test/scala/org/apache/spark/internal/config/ConfigReaderSuite.scala#L27-L31)
* manual testing via `spark-shell`
```
export MSSQL_USER=...
export MSSQL_PASSWORD=...
./bin/spark-shell --master local --driver-class-path
mssql-jdbc-13.2.0.jre11.jar --jars mssql-jdbc-13.2.0.jre11.jar
...
scala> sys.env.get("MSSQL_USER")
val res0: Option[String] = Some(...)
scala> spark.conf.set("spark.sql.catalog.mssql",
"org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog")
scala> spark.conf.set("spark.sql.catalog.mssql.url", "jdbc:sqlserver://...")
scala> spark.conf.set("spark.sql.catalog.mssql.user", "${env:MSSQL_USER}")
scala> spark.conf.set("spark.sql.catalog.mssql.password",
"${env:MSSQL_PASSWORD}")
scala> spark.sql("show tables in mssql").show()
+---------+-------------------+-----------+
|namespace| tableName|isTemporary|
+---------+-------------------+-----------+
...
```
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #52759 from EugeneYushin/catalogs-load-conf.
Lead-authored-by: EugenYushin <[email protected]>
Co-authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit a1428d40970fdf9f884b0d63d2f0aefd2a18bc4e)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../scala/org/apache/spark/sql/connector/catalog/Catalogs.scala | 8 +++++++-
.../apache/spark/sql/connector/catalog/CatalogLoadingSuite.java | 7 ++++++-
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
index 419191f8f9c0..e6c70fdabb15 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
@@ -22,6 +22,7 @@ import java.util
import java.util.regex.Pattern
import org.apache.spark.SparkException
+import org.apache.spark.internal.config.ConfigReader
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -93,10 +94,15 @@ private[sql] object Catalogs {
private def catalogOptions(name: String, conf: SQLConf) = {
val prefix = Pattern.compile("^spark\\.sql\\.catalog\\." + name +
"\\.(.+)")
val options = new util.HashMap[String, String]
+ val reader = new ConfigReader(options)
conf.getAllConfs.foreach {
case (key, value) =>
val matcher = prefix.matcher(key)
- if (matcher.matches && matcher.groupCount > 0)
options.put(matcher.group(1), value)
+ if (matcher.matches && matcher.groupCount > 0) {
+ // pass config entries through default ConfigReader mechanics,
+ // substituting prefixes from bindings: ${env:XYZ} ->
sys.env.get("XYZ")
+ options.put(matcher.group(1), reader.substitute(value))
+ }
}
new CaseInsensitiveStringMap(options)
}
diff --git
a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
index 339f16407ae6..c7e8d7b0f7f3 100644
---
a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
+++
b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
@@ -17,6 +17,7 @@
package org.apache.spark.sql.connector.catalog;
+import org.apache.spark.network.util.JavaUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -24,6 +25,7 @@ import org.apache.spark.SparkException;
import org.apache.spark.sql.internal.SQLConf;
import org.apache.spark.sql.util.CaseInsensitiveStringMap;
import org.apache.spark.util.Utils;
+
public class CatalogLoadingSuite {
@Test
public void testLoad() throws SparkException {
@@ -58,6 +60,7 @@ public class CatalogLoadingSuite {
conf.setConfString("spark.sql.catalog.test-name",
TestCatalogPlugin.class.getCanonicalName());
conf.setConfString("spark.sql.catalog.test-name.name", "not-catalog-name");
conf.setConfString("spark.sql.catalog.test-name.kEy", "valUE");
+ conf.setConfString("spark.sql.catalog.test-name.osName",
"${system:os.name}");
CatalogPlugin plugin = Catalogs.load("test-name", conf);
Assertions.assertNotNull(plugin,"Should instantiate a non-null plugin");
@@ -66,11 +69,13 @@ public class CatalogLoadingSuite {
TestCatalogPlugin testPlugin = (TestCatalogPlugin) plugin;
- Assertions.assertEquals(2, testPlugin.options.size(), "Options should
contain only two keys");
+ Assertions.assertEquals(3, testPlugin.options.size(), "Options should
contain only three keys");
Assertions.assertEquals("not-catalog-name", testPlugin.options.get("name"),
"Options should contain correct value for name (not overwritten)");
Assertions.assertEquals("valUE", testPlugin.options.get("key"),
"Options should contain correct value for key");
+ Assertions.assertEquals(JavaUtils.osName, testPlugin.options.get("osName"),
+ "Options should contain correct substitution for value");
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]