[spark] branch master updated: [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND

maxgekk Tue, 22 Nov 2022 21:38:49 -0800

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 17816170316 [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND
17816170316 is described below

commit 178161703161ccf49b37baf9a667630865367950
Author: itholic <haejoon....@databricks.com>
AuthorDate: Wed Nov 23 08:38:20 2022 +0300

    [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND
    
    ### What changes were proposed in this pull request?
    
    The original PR to introduce the error class `PATH_NOT_FOUND` was reverted 
since it breaks the tests in different test env.
    
    This PR proposes to restore it back.
    
    ### Why are the changes needed?
    
    Restoring the reverted changes with proper fix.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    The existing CI should pass.
    
    Closes #38575 from itholic/SPARK-40948-followup.
    
    Authored-by: itholic <haejoon....@databricks.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 R/pkg/tests/fulltests/test_sparkSQL.R              | 14 +++++---
 core/src/main/resources/error/error-classes.json   | 10 +++---
 .../spark/sql/errors/QueryCompilationErrors.scala  |  2 +-
 .../org/apache/spark/sql/DataFrameSuite.scala      | 37 ++++++++++++----------
 .../execution/datasources/DataSourceSuite.scala    | 28 +++++++++-------
 5 files changed, 52 insertions(+), 39 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index 534ec07abac..d2b6220b2e7 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -3990,12 +3990,16 @@ test_that("Call DataFrameWriter.load() API in Java 
without path and check argume
   expect_error(read.df(source = "json"),
                paste("Error in load : analysis error - Unable to infer schema 
for JSON.",
                      "It must be specified manually"))
-  expect_error(read.df("arbitrary_path"), "Error in load : analysis error - 
Path does not exist")
-  expect_error(read.json("arbitrary_path"), "Error in json : analysis error - 
Path does not exist")
-  expect_error(read.text("arbitrary_path"), "Error in text : analysis error - 
Path does not exist")
-  expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - 
Path does not exist")
+  expect_error(read.df("arbitrary_path"),
+               "Error in load : analysis error - \\[PATH_NOT_FOUND\\].*")
+  expect_error(read.json("arbitrary_path"),
+               "Error in json : analysis error - \\[PATH_NOT_FOUND\\].*")
+  expect_error(read.text("arbitrary_path"),
+               "Error in text : analysis error - \\[PATH_NOT_FOUND\\].*")
+  expect_error(read.orc("arbitrary_path"),
+               "Error in orc : analysis error - \\[PATH_NOT_FOUND\\].*")
   expect_error(read.parquet("arbitrary_path"),
-              "Error in parquet : analysis error - Path does not exist")
+               "Error in parquet : analysis error - \\[PATH_NOT_FOUND\\].*")
 
   # Arguments checking in R side.
   expect_error(read.df(path = c(3)),
diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 77d155bfc21..12c97c2108a 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -912,6 +912,11 @@
     ],
     "sqlState" : "42000"
   },
+  "PATH_NOT_FOUND" : {
+    "message" : [
+      "Path does not exist: <path>."
+    ]
+  },
   "PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
     "message" : [
       "Invalid pivot value '<value>': value data type <valueType> does not 
match pivot column data type <pivotType>"
@@ -2332,11 +2337,6 @@
       "Unable to infer schema for <format>. It must be specified manually."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1130" : {
-    "message" : [
-      "Path does not exist: <path>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1131" : {
     "message" : [
       "Data source <className> does not support <outputMode> output mode."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 63c912c15a1..0f245597efd 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1378,7 +1378,7 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
 
   def dataPathNotExistError(path: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1130",
+      errorClass = "PATH_NOT_FOUND",
       messageParameters = Map("path" -> path))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index aab68065319..589ee1bea27 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2327,39 +2327,44 @@ class DataFrameSuite extends QueryTest
   test("SPARK-13774: Check error message for non existent path without globbed 
paths") {
     val uuid = UUID.randomUUID().toString
     val baseDir = Utils.createTempDir()
-    try {
-      val e = intercept[AnalysisException] {
+    checkError(
+      exception = intercept[AnalysisException] {
         spark.read.format("csv").load(
           new File(baseDir, "file").getAbsolutePath,
           new File(baseDir, "file2").getAbsolutePath,
           new File(uuid, "file3").getAbsolutePath,
           uuid).rdd
-      }
-      assert(e.getMessage.startsWith("Path does not exist"))
-    } finally {
-
-    }
-
+      },
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> "file:.*"),
+      matchPVals = true
+    )
    }
 
   test("SPARK-13774: Check error message for not existent globbed paths") {
     // Non-existent initial path component:
     val nonExistentBasePath = "/" + UUID.randomUUID().toString
     assert(!new File(nonExistentBasePath).exists())
-    val e = intercept[AnalysisException] {
-      spark.read.format("text").load(s"$nonExistentBasePath/*")
-    }
-    assert(e.getMessage.startsWith("Path does not exist"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.format("text").load(s"$nonExistentBasePath/*")
+      },
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> s"file:$nonExistentBasePath/*")
+    )
 
     // Existent initial path component, but no matching files:
     val baseDir = Utils.createTempDir()
     val childDir = Utils.createTempDir(baseDir.getAbsolutePath)
     assert(childDir.exists())
     try {
-      val e1 = intercept[AnalysisException] {
-        spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
-      }
-      assert(e1.getMessage.startsWith("Path does not exist"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
+        },
+        errorClass = "PATH_NOT_FOUND",
+        parameters = Map("path" -> 
s"file:${baseDir.getAbsolutePath}/*/*-xyz.json")
+      )
     } finally {
       Utils.deleteRecursively(baseDir)
     }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index 3034d4fe67c..06e570cb016 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -109,18 +109,22 @@ class DataSourceSuite extends SharedSparkSession with 
PrivateMethodTester {
   }
 
   test("test non existent paths") {
-    assertThrows[AnalysisException](
-      DataSource.checkAndGlobPathIfNecessary(
-        Seq(
-          path1.toString,
-          path2.toString,
-          nonExistentPath.toString
-        ),
-        hadoopConf,
-        checkEmptyGlobPath = true,
-        checkFilesExist = true,
-        enableGlobbing = true
-      )
+    checkError(
+      exception = intercept[AnalysisException](
+        DataSource.checkAndGlobPathIfNecessary(
+          Seq(
+            path1.toString,
+            path2.toString,
+            nonExistentPath.toString
+          ),
+          hadoopConf,
+          checkEmptyGlobPath = true,
+          checkFilesExist = true,
+          enableGlobbing = true
+        )
+      ),
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> nonExistentPath.toString)
     )
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-40948][SQL][FOLLOWUP] Restore PATH_NOT_FOUND

Reply via email to