This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 57d49255676 [SPARK-40948][SQL] Introduce new error class: PATH_NOT_FOUND 57d49255676 is described below commit 57d492556768eb341f525ce7eb5c934089fa9e7e Author: itholic <haejoon....@databricks.com> AuthorDate: Mon Nov 7 14:13:13 2022 +0300 [SPARK-40948][SQL] Introduce new error class: PATH_NOT_FOUND ### What changes were proposed in this pull request? This PR proposes to introduce new error class `DATA_PATH_NOT_EXIST`, by updating the existing legacy temp error class `_LEGACY_ERROR_TEMP_1130 `. ### Why are the changes needed? We should use appropriate error class name that matches the error message. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The existing CI should pass. Closes #38422 from itholic/LEGACY_MIGRATE. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- R/pkg/tests/fulltests/test_sparkSQL.R | 19 +++++++--- core/src/main/resources/error/error-classes.json | 10 ++--- .../spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../org/apache/spark/sql/DataFrameSuite.scala | 44 ++++++++++++++-------- .../execution/datasources/DataSourceSuite.scala | 28 ++++++++------ 5 files changed, 64 insertions(+), 39 deletions(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 534ec07abac..91a2c51660b 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -3990,12 +3990,21 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume expect_error(read.df(source = "json"), paste("Error in load : analysis error - Unable to infer schema for JSON.", "It must be specified manually")) - expect_error(read.df("arbitrary_path"), "Error in load : analysis error - Path does not exist") - expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist") - expect_error(read.text("arbitrary_path"), "Error in text : analysis error - Path does not exist") - expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - Path does not exist") + expect_error(read.df("arbitrary_path"), + paste("Error in load : analysis error - [PATH_NOT_FOUND] Path does not exist:", + "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE) + expect_error(read.json("arbitrary_path"), + paste("Error in json : analysis error - [PATH_NOT_FOUND] Path does not exist:", + "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE) + expect_error(read.text("arbitrary_path"), + paste("Error in text : analysis error - [PATH_NOT_FOUND] Path does not exist:", + "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE) + expect_error(read.orc("arbitrary_path"), + paste("Error in orc : analysis error - [PATH_NOT_FOUND] Path does not exist:", + "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE) expect_error(read.parquet("arbitrary_path"), - "Error in parquet : analysis error - Path does not exist") + paste("Error in parquet : analysis error - [PATH_NOT_FOUND] Path does not exist:", + "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE) # Arguments checking in R side. expect_error(read.df(path = c(3)), diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ceb3e4ed5b1..73652a1ca78 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -806,6 +806,11 @@ ], "sqlState" : "42000" }, + "PATH_NOT_FOUND" : { + "message" : [ + "Path does not exist: <path>." + ] + }, "PIVOT_VALUE_DATA_TYPE_MISMATCH" : { "message" : [ "Invalid pivot value '<value>': value data type <valueType> does not match pivot column data type <pivotType>" @@ -2226,11 +2231,6 @@ "Unable to infer schema for <format>. It must be specified manually." ] }, - "_LEGACY_ERROR_TEMP_1130" : { - "message" : [ - "Path does not exist: <path>." - ] - }, "_LEGACY_ERROR_TEMP_1131" : { "message" : [ "Data source <className> does not support <outputMode> output mode." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index b56e1957f77..4056052c81e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1388,7 +1388,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def dataPathNotExistError(path: String): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1130", + errorClass = "PATH_NOT_FOUND", messageParameters = Map("path" -> path)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index fabd0a4e1a9..d11e86b7d63 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2322,15 +2322,21 @@ class DataFrameSuite extends QueryTest test("SPARK-13774: Check error message for non existent path without globbed paths") { val uuid = UUID.randomUUID().toString val baseDir = Utils.createTempDir() + val filePath1 = new File(baseDir, "file").getAbsolutePath + val filePath2 = new File(baseDir, "file2").getAbsolutePath + val filePath3 = new File(uuid, "file3").getAbsolutePath try { - val e = intercept[AnalysisException] { - spark.read.format("csv").load( - new File(baseDir, "file").getAbsolutePath, - new File(baseDir, "file2").getAbsolutePath, - new File(uuid, "file3").getAbsolutePath, - uuid).rdd - } - assert(e.getMessage.startsWith("Path does not exist")) + checkError( + exception = intercept[AnalysisException] { + spark.read.format("csv").load( + filePath1, + filePath2, + filePath3, + uuid).rdd + }, + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> s"file:$filePath1") + ) } finally { } @@ -2341,20 +2347,26 @@ class DataFrameSuite extends QueryTest // Non-existent initial path component: val nonExistentBasePath = "/" + UUID.randomUUID().toString assert(!new File(nonExistentBasePath).exists()) - val e = intercept[AnalysisException] { - spark.read.format("text").load(s"$nonExistentBasePath/*") - } - assert(e.getMessage.startsWith("Path does not exist")) + checkError( + exception = intercept[AnalysisException] { + spark.read.format("text").load(s"$nonExistentBasePath/*") + }, + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> s"file:$nonExistentBasePath/*") + ) // Existent initial path component, but no matching files: val baseDir = Utils.createTempDir() val childDir = Utils.createTempDir(baseDir.getAbsolutePath) assert(childDir.exists()) try { - val e1 = intercept[AnalysisException] { - spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd - } - assert(e1.getMessage.startsWith("Path does not exist")) + checkError( + exception = intercept[AnalysisException] { + spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd + }, + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> s"file:${baseDir.getAbsolutePath}/*/*-xyz.json") + ) } finally { Utils.deleteRecursively(baseDir) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala index 3034d4fe67c..2832114d506 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala @@ -109,18 +109,22 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester { } test("test non existent paths") { - assertThrows[AnalysisException]( - DataSource.checkAndGlobPathIfNecessary( - Seq( - path1.toString, - path2.toString, - nonExistentPath.toString - ), - hadoopConf, - checkEmptyGlobPath = true, - checkFilesExist = true, - enableGlobbing = true - ) + checkError( + exception = intercept[AnalysisException]( + DataSource.checkAndGlobPathIfNecessary( + Seq( + path1.toString, + path2.toString, + nonExistentPath.toString + ), + hadoopConf, + checkEmptyGlobPath = true, + checkFilesExist = true, + enableGlobbing = true + ) + ), + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> "mockFs://mockFs/nonexistentpath") ) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org