This is an automated email from the ASF dual-hosted git repository.

HyukjinKwon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e3c519e86f8a [SPARK-56859][SQL] Create user error for 
IllegalArgumentException in CSVOptions
e3c519e86f8a is described below

commit e3c519e86f8aab87825f079efb1a01965e686fb5
Author: Shivadarshan Devadiga <[email protected]>
AuthorDate: Wed May 20 15:33:38 2026 -0700

    [SPARK-56859][SQL] Create user error for IllegalArgumentException in 
CSVOptions
    
    ### What changes were proposed in this pull request?
    This PR converts IllegalArgumentException thrown by require() statements in 
CSVOptions to proper user-facing Spark errors with error classes
    
    ### Why are the changes needed?
    Currently, CSVOptions uses Scala's require() which throws generic 
IllegalArgumentException with plain text messages. This change aligns lineSep 
validation with Spark's error handling standards, making errors more actionable 
and consistent.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. The error messages for invalid lineSep values will change
    
    ### How was this patch tested?
    All SQL related tests were run including the linsep tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    cc: markj-db MaxGekk HyukjinKwon
    
    Closes #55877 from shivadarshan-devadiga/cveoptions-user-error.
    
    Authored-by: Shivadarshan Devadiga <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 .../src/main/resources/error/error-conditions.json | 23 ++++++++++++++++++++++
 .../apache/spark/sql/catalyst/csv/CSVOptions.scala | 14 ++++++++-----
 .../spark/sql/errors/QueryExecutionErrors.scala    | 16 +++++++++++++++
 .../sql/execution/datasources/csv/CSVSuite.scala   | 22 +++++++++++++--------
 4 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index 81ef749e19f8..119a1d5d42b4 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -4136,6 +4136,29 @@
     },
     "sqlState" : "42K0E"
   },
+  "INVALID_LINE_SEPARATOR" : {
+    "message" : [
+      "Invalid line separator configuration."
+    ],
+    "subClass" : {
+      "EMPTY" : {
+        "message" : [
+          "The 'lineSep' option cannot be an empty string."
+        ]
+      },
+      "NULL" : {
+        "message" : [
+          "The 'lineSep' option cannot be a null value."
+        ]
+      },
+      "TOO_LONG" : {
+        "message" : [
+          "The 'lineSep' option can contain at most 2 characters, but got 
<length> characters."
+        ]
+      }
+    },
+    "sqlState" : "22023"
+  },
   "INVALID_LOG_VERSION" : {
     "message" : [
       "UnsupportedLogVersion."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 9edb1603f463..4ae7aad19a9c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -284,11 +284,15 @@ class CSVOptions(
    * A string between two consecutive JSON records.
    */
   val lineSeparator: Option[String] = parameters.get(LINE_SEP).map { sep =>
-    require(sep != null, "'lineSep' cannot be a null value.")
-    require(sep.nonEmpty, "'lineSep' cannot be an empty string.")
-    // Intentionally allow it up to 2 for Window's CRLF although multiple
-    // characters have an issue with quotes. This is intentionally 
undocumented.
-    require(sep.length <= 2, "'lineSep' can contain only 1 character.")
+    if (sep == null) {
+      throw QueryExecutionErrors.lineSepCannotBeNullError()
+    }
+    if (sep.isEmpty) {
+      throw QueryExecutionErrors.lineSepCannotBeEmptyError()
+    }
+    if (sep.length > 2) {
+      throw QueryExecutionErrors.lineSepTooLongError(sep.length)
+    }
     if (sep.length == 2) logWarning("It is not recommended to set 'lineSep' " +
       "with 2 characters due to the limitation of supporting multi-char 
'lineSep' within quotes.")
     sep
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 0aa830827687..b97244c1c280 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -3349,4 +3349,20 @@ private[sql] object QueryExecutionErrors extends 
QueryErrorsBase with ExecutionE
         "expectedFamily" -> expectedFamily,
         "actualFamily" -> actualFamily))
   }
+
+  def lineSepCannotBeNullError(): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "INVALID_LINE_SEPARATOR.NULL")
+  }
+
+  def lineSepCannotBeEmptyError(): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "INVALID_LINE_SEPARATOR.EMPTY")
+  }
+
+  def lineSepTooLongError(length: Int): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "INVALID_LINE_SEPARATOR.TOO_LONG",
+      messageParameters = Map("length" -> length.toString))
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 24f80f4b928f..22b291677cd8 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2484,15 +2484,21 @@ abstract class CSVSuite
   // scalastyle:on nonascii
 
   test("lineSep restrictions") {
-    val errMsg1 = intercept[IllegalArgumentException] {
-      spark.read.option("lineSep", "").csv(testFile(carsFile)).collect()
-    }.getMessage
-    assert(errMsg1.contains("'lineSep' cannot be an empty string"))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        spark.read.option("lineSep", "").csv(testFile(carsFile)).collect()
+      },
+      condition = "INVALID_LINE_SEPARATOR.EMPTY",
+      parameters = Map.empty
+    )
 
-    val errMsg2 = intercept[IllegalArgumentException] {
-      spark.read.option("lineSep", "123").csv(testFile(carsFile)).collect()
-    }.getMessage
-    assert(errMsg2.contains("'lineSep' can contain only 1 character"))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        spark.read.option("lineSep", "123").csv(testFile(carsFile)).collect()
+      },
+      condition = "INVALID_LINE_SEPARATOR.TOO_LONG",
+      parameters = Map("length" -> "3")
+    )
   }
 
   Seq(true, false).foreach { multiLine =>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to