This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 17816170316 [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND 17816170316 is described below commit 178161703161ccf49b37baf9a667630865367950 Author: itholic <haejoon....@databricks.com> AuthorDate: Wed Nov 23 08:38:20 2022 +0300 [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND ### What changes were proposed in this pull request? The original PR to introduce the error class `PATH_NOT_FOUND` was reverted since it breaks the tests in different test env. This PR proposes to restore it back. ### Why are the changes needed? Restoring the reverted changes with proper fix. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? The existing CI should pass. Closes #38575 from itholic/SPARK-40948-followup. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- R/pkg/tests/fulltests/test_sparkSQL.R | 14 +++++--- core/src/main/resources/error/error-classes.json | 10 +++--- .../spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../org/apache/spark/sql/DataFrameSuite.scala | 37 ++++++++++++---------- .../execution/datasources/DataSourceSuite.scala | 28 +++++++++------- 5 files changed, 52 insertions(+), 39 deletions(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 534ec07abac..d2b6220b2e7 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -3990,12 +3990,16 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume expect_error(read.df(source = "json"), paste("Error in load : analysis error - Unable to infer schema for JSON.", "It must be specified manually")) - expect_error(read.df("arbitrary_path"), "Error in load : analysis error - Path does not exist") - expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist") - expect_error(read.text("arbitrary_path"), "Error in text : analysis error - Path does not exist") - expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - Path does not exist") + expect_error(read.df("arbitrary_path"), + "Error in load : analysis error - \\[PATH_NOT_FOUND\\].*") + expect_error(read.json("arbitrary_path"), + "Error in json : analysis error - \\[PATH_NOT_FOUND\\].*") + expect_error(read.text("arbitrary_path"), + "Error in text : analysis error - \\[PATH_NOT_FOUND\\].*") + expect_error(read.orc("arbitrary_path"), + "Error in orc : analysis error - \\[PATH_NOT_FOUND\\].*") expect_error(read.parquet("arbitrary_path"), - "Error in parquet : analysis error - Path does not exist") + "Error in parquet : analysis error - \\[PATH_NOT_FOUND\\].*") # Arguments checking in R side. expect_error(read.df(path = c(3)), diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 77d155bfc21..12c97c2108a 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -912,6 +912,11 @@ ], "sqlState" : "42000" }, + "PATH_NOT_FOUND" : { + "message" : [ + "Path does not exist: <path>." + ] + }, "PIVOT_VALUE_DATA_TYPE_MISMATCH" : { "message" : [ "Invalid pivot value '<value>': value data type <valueType> does not match pivot column data type <pivotType>" @@ -2332,11 +2337,6 @@ "Unable to infer schema for <format>. It must be specified manually." ] }, - "_LEGACY_ERROR_TEMP_1130" : { - "message" : [ - "Path does not exist: <path>." - ] - }, "_LEGACY_ERROR_TEMP_1131" : { "message" : [ "Data source <className> does not support <outputMode> output mode." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 63c912c15a1..0f245597efd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1378,7 +1378,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def dataPathNotExistError(path: String): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1130", + errorClass = "PATH_NOT_FOUND", messageParameters = Map("path" -> path)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index aab68065319..589ee1bea27 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2327,39 +2327,44 @@ class DataFrameSuite extends QueryTest test("SPARK-13774: Check error message for non existent path without globbed paths") { val uuid = UUID.randomUUID().toString val baseDir = Utils.createTempDir() - try { - val e = intercept[AnalysisException] { + checkError( + exception = intercept[AnalysisException] { spark.read.format("csv").load( new File(baseDir, "file").getAbsolutePath, new File(baseDir, "file2").getAbsolutePath, new File(uuid, "file3").getAbsolutePath, uuid).rdd - } - assert(e.getMessage.startsWith("Path does not exist")) - } finally { - - } - + }, + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> "file:.*"), + matchPVals = true + ) } test("SPARK-13774: Check error message for not existent globbed paths") { // Non-existent initial path component: val nonExistentBasePath = "/" + UUID.randomUUID().toString assert(!new File(nonExistentBasePath).exists()) - val e = intercept[AnalysisException] { - spark.read.format("text").load(s"$nonExistentBasePath/*") - } - assert(e.getMessage.startsWith("Path does not exist")) + checkError( + exception = intercept[AnalysisException] { + spark.read.format("text").load(s"$nonExistentBasePath/*") + }, + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> s"file:$nonExistentBasePath/*") + ) // Existent initial path component, but no matching files: val baseDir = Utils.createTempDir() val childDir = Utils.createTempDir(baseDir.getAbsolutePath) assert(childDir.exists()) try { - val e1 = intercept[AnalysisException] { - spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd - } - assert(e1.getMessage.startsWith("Path does not exist")) + checkError( + exception = intercept[AnalysisException] { + spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd + }, + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> s"file:${baseDir.getAbsolutePath}/*/*-xyz.json") + ) } finally { Utils.deleteRecursively(baseDir) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala index 3034d4fe67c..06e570cb016 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala @@ -109,18 +109,22 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester { } test("test non existent paths") { - assertThrows[AnalysisException]( - DataSource.checkAndGlobPathIfNecessary( - Seq( - path1.toString, - path2.toString, - nonExistentPath.toString - ), - hadoopConf, - checkEmptyGlobPath = true, - checkFilesExist = true, - enableGlobbing = true - ) + checkError( + exception = intercept[AnalysisException]( + DataSource.checkAndGlobPathIfNecessary( + Seq( + path1.toString, + path2.toString, + nonExistentPath.toString + ), + hadoopConf, + checkEmptyGlobPath = true, + checkFilesExist = true, + enableGlobbing = true + ) + ), + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> nonExistentPath.toString) ) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org