This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 43b02d7f25b [SPARK-40156][SQL] `url_decode()` should the return an error class 43b02d7f25b is described below commit 43b02d7f25b2ab99f9b0be3c45aaedb1a8bdd40c Author: zhiming she <505306...@qq.com> AuthorDate: Fri Aug 26 13:33:59 2022 +0300 [SPARK-40156][SQL] `url_decode()` should the return an error class ### What changes were proposed in this pull request? url_decode() return an error class when Invalid parameter input. like : ``` spark.sql("SELECT url_decode('http%3A%2F%2spark.apache.org')").show ``` output: ``` org.apache.spark.SparkIllegalArgumentException: [CANNOT_DECODE_URL] Cannot decode url : http%3A%2F%2spark.apache.org. URLDecoder: Illegal hex characters in escape (%) pattern - For input string: "2s" at org.apache.spark.sql.errors.QueryExecutionErrors$.illegalUrlError(QueryExecutionErrors.scala:351) at org.apache.spark.sql.catalyst.expressions.UrlCodec$.decode(urlExpressions.scala:117) at org.apache.spark.sql.catalyst.expressions.UrlCodec.decode(urlExpressions.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.sql.catalyst.expressions.objects.InvokeLike.invoke(objects.scala:148) ``` ### Why are the changes needed? To improve user experience w/ Spark SQL. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z url-functions.sql" ``` Closes #37636 from zzzzming95/SPARK-40156. Lead-authored-by: zhiming she <505306...@qq.com> Co-authored-by: zzzzming95 <505306...@qq.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 7 +++++++ .../spark/sql/catalyst/expressions/urlExpressions.scala | 7 ++++++- .../apache/spark/sql/errors/QueryExecutionErrors.scala | 7 +++++++ .../test/resources/sql-tests/inputs/url-functions.sql | 1 + .../resources/sql-tests/results/url-functions.sql.out | 16 ++++++++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index f4e9a8a3df7..aa72576c8f3 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -17,6 +17,13 @@ ], "sqlState" : "22005" }, + "CANNOT_DECODE_URL" : { + "message" : [ + "Cannot decode url : <url>.", + "<details>" + ], + "sqlState" : "42000" + }, "CANNOT_INFER_DATE" : { "message" : [ "Cannot infer date in schema inference when LegacyTimeParserPolicy is \"LEGACY\". Legacy Date formatter does not support strict date format matching which is required to avoid inferring timestamps and other non-date entries to date." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala index 174e60371af..2b9885743e5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala @@ -110,7 +110,12 @@ object UrlCodec { } def decode(src: UTF8String, enc: UTF8String): UTF8String = { - UTF8String.fromString(URLDecoder.decode(src.toString, enc.toString)) + try { + UTF8String.fromString(URLDecoder.decode(src.toString, enc.toString)) + } catch { + case e: IllegalArgumentException => + throw QueryExecutionErrors.illegalUrlError(src, e) + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index b52a197595c..035918b6f4b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -332,6 +332,13 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error.", e) } + def illegalUrlError(url: UTF8String, e: IllegalArgumentException): + Throwable with SparkThrowable = { + new SparkIllegalArgumentException(errorClass = "CANNOT_DECODE_URL", + messageParameters = Array(url.toString, e.getMessage) + ) + } + def dataTypeOperationUnsupportedError(): Throwable = { new UnsupportedOperationException("dataType") } diff --git a/sql/core/src/test/resources/sql-tests/inputs/url-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/url-functions.sql index 9f8af7eac7e..be69e5ffb87 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/url-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/url-functions.sql @@ -15,5 +15,6 @@ select url_encode(null); -- url_decode function select url_decode('https%3A%2F%2Fspark.apache.org'); +select url_decode('http%3A%2F%2spark.apache.org'); select url_decode('inva lid://user:pass@host/file\\;param?query\\;p2'); select url_decode(null); \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/url-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/url-functions.sql.out index fc714bfc41b..44f4682e27e 100644 --- a/sql/core/src/test/resources/sql-tests/results/url-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/url-functions.sql.out @@ -95,6 +95,22 @@ struct<url_decode(https%3A%2F%2Fspark.apache.org):string> https://spark.apache.org +-- !query +select url_decode('http%3A%2F%2spark.apache.org') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkIllegalArgumentException +{ + "errorClass" : "CANNOT_DECODE_URL", + "sqlState" : "42000", + "messageParameters" : { + "url" : "http%3A%2F%2spark.apache.org", + "details" : "URLDecoder: Illegal hex characters in escape (%) pattern - For input string: \"2s\"" + } +} + + -- !query select url_decode('inva lid://user:pass@host/file\\;param?query\\;p2') -- !query schema --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org