This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6167ef277d0a [SPARK-55065] Avoid making two JDBC API calls
6167ef277d0a is described below
commit 6167ef277d0ae9c579d29d8f9bfc9f8d3839dd99
Author: Stefan Savić <[email protected]>
AuthorDate: Mon Jan 19 22:12:46 2026 +0800
[SPARK-55065] Avoid making two JDBC API calls
### What changes were proposed in this pull request?
Instead of explicitly calling `tableExists` in
`JDBCTableCatalog.loadTable(...)`, this change now detects whether a table
exists by catching `SQLException` from `getQueryOutputSchema` and checking it
with the dialect-specific `isObjectNotFoundException` method.
By checking `isObjectNotFoundException` before `isSyntaxErrorBestEffort`,
the code can reliably distinguish between the case where table does not exists
and other SQL syntax errors. This order is important because when a table does
not exist, the exception raised can also match the criteria for
`isSyntaxErrorBestEffort`.
### Why are the changes needed?
This change removes the redundant tableExists call, since we can determine
whether a table exists based on the error thrown from `getQueryOutputSchema`.
With this change now only makes one JDBC API call instead of two, improving
efficiency by eliminating the need for a separate table existence check.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
It was tested using existing and new tests in `JDBCTableCatalogSuite`.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #53830 from SavicStefan/stefan-savic_data/jdbc/two_api_calls.
Authored-by: Stefan Savić <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/execution/datasources/jdbc/JDBCRDD.scala | 18 +++++++++++++++++-
.../datasources/v2/jdbc/JDBCTableCatalog.scala | 6 +-----
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 8534a24d0110..1ce2667db917 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -27,7 +27,9 @@ import org.apache.spark.internal.Logging
import org.apache.spark.internal.LogKeys.SQL_TEXT
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.Identifier
import org.apache.spark.sql.connector.expressions.filter.Predicate
+import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.datasources.{DataSourceMetricsMixin,
ExternalEngineDatasourceRDD}
import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
@@ -49,12 +51,19 @@ object JDBCRDD extends Logging {
* schema.
*
* @param options - JDBC options that contains url, table and other
information.
+ * @param conn - JDBC connection to use for fetching the schema.
+ * @param ident - Optional table identifier used for error reporting.
+ * @param catalogName - Optional catalog name used for error reporting.
*
* @return A StructType giving the table's Catalyst schema.
* @throws java.sql.SQLException if the table specification is garbage.
* @throws java.sql.SQLException if the table contains an unsupported type.
*/
- def resolveTable(options: JDBCOptions, conn: Connection): StructType = {
+ def resolveTable(
+ options: JDBCOptions,
+ conn: Connection,
+ ident: Option[Identifier] = None,
+ catalogName: Option[String] = None): StructType = {
val url = options.url
val prepareQuery = options.prepareQuery
val table = options.tableOrQuery
@@ -64,6 +73,13 @@ object JDBCRDD extends Logging {
try {
getQueryOutputSchema(fullQuery, options, dialect, conn)
} catch {
+ // By checking isObjectNotFoundException before isSyntaxErrorBestEffort,
we can reliably
+ // distinguish between the case where the table does not exist and other
SQL syntax errors.
+ // This order is important because when a table does not exist, the
exception raised can
+ // also match the criteria for isSyntaxErrorBestEffort.
+ case e: SQLException if ident.isDefined &&
+ dialect.isObjectNotFoundException(e) =>
+ throw QueryCompilationErrors.noSuchTableError(catalogName.get,
ident.get)
case e: SQLException if dialect.isSyntaxErrorBestEffort(e) =>
throw new SparkException(
errorClass =
"JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR.DURING_OUTPUT_SCHEMA_RESOLUTION",
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index b95ac98b39da..611d166a231a 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -154,10 +154,6 @@ class JDBCTableCatalog extends TableCatalog
override def loadTable(ident: Identifier): Table = {
JdbcUtils.withConnection(options) { conn =>
- if (!tableExists(ident, conn)) {
- throw QueryCompilationErrors.noSuchTableError(name(), ident)
- }
-
val optionsWithTableName = new JDBCOptions(
options.parameters + (JDBCOptions.JDBC_TABLE_NAME ->
getTableName(ident)))
JdbcUtils.classifyException(
@@ -172,7 +168,7 @@ class JDBCTableCatalog extends TableCatalog
val remoteSchemaFetchMetric = JdbcUtils
.createSchemaFetchMetric(SparkSession.active.sparkContext)
val schema = SQLMetrics.withTimingNs(remoteSchemaFetchMetric) {
- JDBCRDD.resolveTable(optionsWithTableName, conn)
+ JDBCRDD.resolveTable(optionsWithTableName, conn, Some(ident),
Some(name()))
}
JDBCTable(ident, schema, optionsWithTableName,
Map(JDBCRelation.schemaFetchKey -> remoteSchemaFetchMetric))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]