This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 462a447ca62 [SPARK-44520][SQL] Replace the term
UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY with
UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY and disclosure root AE
462a447ca62 is described below
commit 462a447ca62a27a15d1e35b888e3923e5d2d012f
Author: Kent Yao <[email protected]>
AuthorDate: Tue Jul 25 09:39:19 2023 +0800
[SPARK-44520][SQL] Replace the term
UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY with
UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY and disclosure root AE
### What changes were proposed in this pull request?
1. UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY is duplicated with
UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY. This PR uses the shorter one.
2. Bugfix for hiding root AE from UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY
### Why are the changes needed?
UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY is duplicated.
Bugfix for hiding root AE from UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY
For example,
```scala
select id from parquet.`abc`;
[UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY] Unsupported data source type for
direct query on files: parquet; line 1 pos 15
org.apache.spark.sql.AnalysisException:
[UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY] Unsupported data source type for
direct query on files: parquet; line 1 pos 15
```
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
existing tests
Closes #42124 from yaooqinn/SPARK-44520.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
(cherry picked from commit 2f7a9a1e26f46484a6bda4cf65f5119fdea3ba4c)
Signed-off-by: Kent Yao <[email protected]>
---
common/utils/src/main/resources/error/error-classes.json | 5 -----
docs/sql-error-conditions.md | 6 ------
.../apache/spark/sql/errors/QueryCompilationErrors.scala | 6 ------
.../org/apache/spark/sql/execution/datasources/rules.scala | 14 ++++++++------
.../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 10 ++++------
.../apache/spark/sql/hive/execution/SQLQuerySuite.scala | 11 +++++++++++
6 files changed, 23 insertions(+), 29 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-classes.json
b/common/utils/src/main/resources/error/error-classes.json
index d61b1721664..ba3076fb931 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -2701,11 +2701,6 @@
],
"sqlState" : "0A000"
},
- "UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY" : {
- "message" : [
- "The direct query on files does not support the data source type:
<className>. Please try a different data source type or consider using a
different query method."
- ]
- },
"UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE" : {
"message" : [
"The <format> datasource doesn't support the column <columnName> of the
type <columnType>."
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 6dbbf7bf05c..8ce33062e22 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -1886,12 +1886,6 @@ Unsupported data source type for direct query on files:
`<dataSourceType>`
Unsupported data type `<typeName>`.
-### UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY
-
-SQLSTATE: none assigned
-
-The direct query on files does not support the data source type:
`<className>`. Please try a different data source type or consider using a
different query method.
-
### UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE
SQLSTATE: none assigned
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 957c8cb334b..b2eee0dee98 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1688,12 +1688,6 @@ private[sql] object QueryCompilationErrors extends
QueryErrorsBase {
"tableSchema" -> tableSchema.toString))
}
- def unsupportedDataSourceTypeForDirectQueryOnFilesError(className: String):
Throwable = {
- new AnalysisException(
- errorClass = "UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY",
- messageParameters = Map("className" -> className))
- }
-
def saveDataIntoViewNotAllowedError(): Throwable = {
new AnalysisException(
errorClass = "_LEGACY_ERROR_TEMP_1158",
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 3f235e10c81..4cbd54e6d20 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -45,15 +45,17 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends
Rule[LogicalPlan] {
conf.runSQLonFile && u.multipartIdentifier.size == 2
}
- private def resolveDataSource(ident: Seq[String]): DataSource = {
+ private def resolveDataSource(unresolved: UnresolvedRelation): DataSource = {
+ val ident = unresolved.multipartIdentifier
val dataSource = DataSource(sparkSession, paths = Seq(ident.last),
className = ident.head)
// `dataSource.providingClass` may throw ClassNotFoundException, the
caller side will try-catch
// it and return the original plan, so that the analyzer can report table
not found later.
val isFileFormat =
classOf[FileFormat].isAssignableFrom(dataSource.providingClass)
if (!isFileFormat ||
dataSource.className.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER)
{
- throw
QueryCompilationErrors.unsupportedDataSourceTypeForDirectQueryOnFilesError(
- dataSource.className)
+ unresolved.failAnalysis(
+ errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
+ messageParameters = Map("dataSourceType" -> ident.head))
}
dataSource
}
@@ -65,7 +67,7 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends
Rule[LogicalPlan] {
// fail to time travel. Otherwise, this is some other catalog table that
isn't resolved yet,
// so we should leave it be for now.
try {
- resolveDataSource(u.multipartIdentifier)
+ resolveDataSource(u)
throw
QueryCompilationErrors.timeTravelUnsupportedError(toSQLId(u.multipartIdentifier))
} catch {
case _: ClassNotFoundException => r
@@ -73,11 +75,11 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends
Rule[LogicalPlan] {
case u: UnresolvedRelation if maybeSQLFile(u) =>
try {
- val ds = resolveDataSource(u.multipartIdentifier)
+ val ds = resolveDataSource(u)
LogicalRelation(ds.resolveRelation())
} catch {
case _: ClassNotFoundException => u
- case e: Exception =>
+ case e: Exception if !e.isInstanceOf[AnalysisException] =>
// the provider is valid, but failed to create a logical plan
u.failAnalysis(
errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 7ad27f05a58..0e08fed12ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1647,18 +1647,16 @@ class SQLQuerySuite extends QueryTest with
SharedSparkSession with AdaptiveSpark
exception = intercept[AnalysisException] {
sql("select * from json.invalid_file")
},
- errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
- parameters = Map("dataSourceType" -> "json"),
- context = ExpectedContext("json.invalid_file", 14, 30)
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> "file:/.*invalid_file"),
+ matchPVals = true
)
checkError(
exception = intercept[AnalysisException] {
sql(s"select id from `org.apache.spark.sql.hive.orc`.`file_path`")
},
- errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
- parameters = Map("dataSourceType" -> "org.apache.spark.sql.hive.orc"),
- context = ExpectedContext("`org.apache.spark.sql.hive.orc`.`file_path`",
15, 57)
+ errorClass = "_LEGACY_ERROR_TEMP_1138"
)
e = intercept[AnalysisException] {
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index e93576761c8..9308d1eda14 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1354,6 +1354,17 @@ abstract class SQLQuerySuiteBase extends QueryTest with
SQLTestUtils with TestHi
})
}
+ test("SPARK-44520: invalid path for support direct query shall throw correct
exception") {
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql(s"select id from parquet.`invalid_path`")
+ },
+ errorClass = "PATH_NOT_FOUND",
+ parameters = Map("path" -> "file.*invalid_path"),
+ matchPVals = true
+ )
+ }
+
test("run sql directly on files - orc") {
val df = spark.range(100).toDF()
withTempPath(f => {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]