This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ca147d2f30ee [SPARK-45597][PYTHON][SQL][FOLLOW-UP] Minor deduplicate
datasource checking logic
ca147d2f30ee is described below
commit ca147d2f30ee1fe4d6fdaa57c6698d151f83262b
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Dec 26 14:52:42 2023 +0900
[SPARK-45597][PYTHON][SQL][FOLLOW-UP] Minor deduplicate datasource checking
logic
### What changes were proposed in this pull request?
This PR proposes to deduplicate datasource checking logic.
### Why are the changes needed?
For better maintenance and readability.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing test cases should cover them.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44489 from HyukjinKwon/SPARK-45597-followup2.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../spark/sql/execution/datasources/DataSource.scala | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index b400e3799942..decc20c52531 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -639,6 +639,8 @@ object DataSource extends Logging {
val provider2 = s"$provider1.DefaultSource"
val loader = Utils.getContextOrSparkClassLoader
val serviceLoader = ServiceLoader.load(classOf[DataSourceRegister], loader)
+ lazy val isUserDefinedDataSource = SparkSession.getActiveSession.exists(
+ _.sessionState.dataSourceManager.dataSourceExists(provider))
try {
serviceLoader.asScala.filter(_.shortName().equalsIgnoreCase(provider1)).toList
match {
@@ -650,8 +652,6 @@ object DataSource extends Logging {
// Found the data source using fully qualified path
dataSource
case Failure(error) =>
- val isUserDefinedDataSource =
SparkSession.getActiveSession.exists(
- _.sessionState.dataSourceManager.dataSourceExists(provider))
if (provider1.startsWith("org.apache.spark.sql.hive.orc")) {
throw QueryCompilationErrors.orcNotUsedWithHiveEnabledError()
} else if (provider1.toLowerCase(Locale.ROOT) == "avro" ||
@@ -676,15 +676,11 @@ object DataSource extends Logging {
throw e
}
}
+ case _ :: Nil if isUserDefinedDataSource =>
+ // There was DSv1 or DSv2 loaded, but the same name source was found
+ // in user defined data source.
+ throw QueryCompilationErrors.foundMultipleDataSources(provider)
case head :: Nil =>
- // there is exactly one registered alias
- val isUserDefinedDataSource = SparkSession.getActiveSession.exists(
- _.sessionState.dataSourceManager.dataSourceExists(provider))
- // The source can be successfully loaded as either a V1 or a V2 data
source.
- // Check if it is also a user-defined data source.
- if (isUserDefinedDataSource) {
- throw QueryCompilationErrors.foundMultipleDataSources(provider)
- }
head.getClass
case sources =>
// There are multiple registered aliases for the input. If there is
single datasource
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]