[spark] branch master updated: [SPARK-40948][SQL] Introduce new error class: PATH_NOT_FOUND

maxgekk Mon, 07 Nov 2022 03:13:39 -0800

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 57d49255676 [SPARK-40948][SQL] Introduce new error class: 
PATH_NOT_FOUND
57d49255676 is described below

commit 57d492556768eb341f525ce7eb5c934089fa9e7e
Author: itholic <haejoon....@databricks.com>
AuthorDate: Mon Nov 7 14:13:13 2022 +0300

    [SPARK-40948][SQL] Introduce new error class: PATH_NOT_FOUND
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to introduce new error class `DATA_PATH_NOT_EXIST`, by 
updating the existing legacy temp error class `_LEGACY_ERROR_TEMP_1130 `.
    
    ### Why are the changes needed?
    
    We should use appropriate error class name that matches the error message.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    The existing CI should pass.
    
    Closes #38422 from itholic/LEGACY_MIGRATE.
    
    Authored-by: itholic <haejoon....@databricks.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 R/pkg/tests/fulltests/test_sparkSQL.R              | 19 +++++++---
 core/src/main/resources/error/error-classes.json   | 10 ++---
 .../spark/sql/errors/QueryCompilationErrors.scala  |  2 +-
 .../org/apache/spark/sql/DataFrameSuite.scala      | 44 ++++++++++++++--------
 .../execution/datasources/DataSourceSuite.scala    | 28 ++++++++------
 5 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index 534ec07abac..91a2c51660b 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -3990,12 +3990,21 @@ test_that("Call DataFrameWriter.load() API in Java 
without path and check argume
   expect_error(read.df(source = "json"),
                paste("Error in load : analysis error - Unable to infer schema 
for JSON.",
                      "It must be specified manually"))
-  expect_error(read.df("arbitrary_path"), "Error in load : analysis error - 
Path does not exist")
-  expect_error(read.json("arbitrary_path"), "Error in json : analysis error - 
Path does not exist")
-  expect_error(read.text("arbitrary_path"), "Error in text : analysis error - 
Path does not exist")
-  expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - 
Path does not exist")
+  expect_error(read.df("arbitrary_path"),
+               paste("Error in load : analysis error - [PATH_NOT_FOUND] Path 
does not exist:",
+                     "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
+  expect_error(read.json("arbitrary_path"),
+               paste("Error in json : analysis error - [PATH_NOT_FOUND] Path 
does not exist:",
+                     "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
+  expect_error(read.text("arbitrary_path"),
+               paste("Error in text : analysis error - [PATH_NOT_FOUND] Path 
does not exist:",
+                     "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
+  expect_error(read.orc("arbitrary_path"),
+               paste("Error in orc : analysis error - [PATH_NOT_FOUND] Path 
does not exist:",
+                     "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
   expect_error(read.parquet("arbitrary_path"),
-              "Error in parquet : analysis error - Path does not exist")
+               paste("Error in parquet : analysis error - [PATH_NOT_FOUND] 
Path does not exist:",
+                     "file:/__w/spark/spark/arbitrary_path."), fixed = TRUE)
 
   # Arguments checking in R side.
   expect_error(read.df(path = c(3)),
diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index ceb3e4ed5b1..73652a1ca78 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -806,6 +806,11 @@
     ],
     "sqlState" : "42000"
   },
+  "PATH_NOT_FOUND" : {
+    "message" : [
+      "Path does not exist: <path>."
+    ]
+  },
   "PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
     "message" : [
       "Invalid pivot value '<value>': value data type <valueType> does not 
match pivot column data type <pivotType>"
@@ -2226,11 +2231,6 @@
       "Unable to infer schema for <format>. It must be specified manually."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1130" : {
-    "message" : [
-      "Path does not exist: <path>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1131" : {
     "message" : [
       "Data source <className> does not support <outputMode> output mode."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index b56e1957f77..4056052c81e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1388,7 +1388,7 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
 
   def dataPathNotExistError(path: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1130",
+      errorClass = "PATH_NOT_FOUND",
       messageParameters = Map("path" -> path))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index fabd0a4e1a9..d11e86b7d63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2322,15 +2322,21 @@ class DataFrameSuite extends QueryTest
   test("SPARK-13774: Check error message for non existent path without globbed 
paths") {
     val uuid = UUID.randomUUID().toString
     val baseDir = Utils.createTempDir()
+    val filePath1 = new File(baseDir, "file").getAbsolutePath
+    val filePath2 = new File(baseDir, "file2").getAbsolutePath
+    val filePath3 = new File(uuid, "file3").getAbsolutePath
     try {
-      val e = intercept[AnalysisException] {
-        spark.read.format("csv").load(
-          new File(baseDir, "file").getAbsolutePath,
-          new File(baseDir, "file2").getAbsolutePath,
-          new File(uuid, "file3").getAbsolutePath,
-          uuid).rdd
-      }
-      assert(e.getMessage.startsWith("Path does not exist"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read.format("csv").load(
+            filePath1,
+            filePath2,
+            filePath3,
+            uuid).rdd
+        },
+        errorClass = "PATH_NOT_FOUND",
+        parameters = Map("path" -> s"file:$filePath1")
+      )
     } finally {
 
     }
@@ -2341,20 +2347,26 @@ class DataFrameSuite extends QueryTest
     // Non-existent initial path component:
     val nonExistentBasePath = "/" + UUID.randomUUID().toString
     assert(!new File(nonExistentBasePath).exists())
-    val e = intercept[AnalysisException] {
-      spark.read.format("text").load(s"$nonExistentBasePath/*")
-    }
-    assert(e.getMessage.startsWith("Path does not exist"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.format("text").load(s"$nonExistentBasePath/*")
+      },
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> s"file:$nonExistentBasePath/*")
+    )
 
     // Existent initial path component, but no matching files:
     val baseDir = Utils.createTempDir()
     val childDir = Utils.createTempDir(baseDir.getAbsolutePath)
     assert(childDir.exists())
     try {
-      val e1 = intercept[AnalysisException] {
-        spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
-      }
-      assert(e1.getMessage.startsWith("Path does not exist"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
+        },
+        errorClass = "PATH_NOT_FOUND",
+        parameters = Map("path" -> 
s"file:${baseDir.getAbsolutePath}/*/*-xyz.json")
+      )
     } finally {
       Utils.deleteRecursively(baseDir)
     }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index 3034d4fe67c..2832114d506 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -109,18 +109,22 @@ class DataSourceSuite extends SharedSparkSession with 
PrivateMethodTester {
   }
 
   test("test non existent paths") {
-    assertThrows[AnalysisException](
-      DataSource.checkAndGlobPathIfNecessary(
-        Seq(
-          path1.toString,
-          path2.toString,
-          nonExistentPath.toString
-        ),
-        hadoopConf,
-        checkEmptyGlobPath = true,
-        checkFilesExist = true,
-        enableGlobbing = true
-      )
+    checkError(
+      exception = intercept[AnalysisException](
+        DataSource.checkAndGlobPathIfNecessary(
+          Seq(
+            path1.toString,
+            path2.toString,
+            nonExistentPath.toString
+          ),
+          hadoopConf,
+          checkEmptyGlobPath = true,
+          checkFilesExist = true,
+          enableGlobbing = true
+        )
+      ),
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> "mockFs://mockFs/nonexistentpath")
     )
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-40948][SQL] Introduce new error class: PATH_NOT_FOUND

Reply via email to