mihailom-db commented on code in PR #48500:
URL: https://github.com/apache/spark/pull/48500#discussion_r1803357408


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala:
##########
@@ -129,7 +129,7 @@ case class UrlDecode(child: Expression, failOnError: 
Boolean = true)
 case class TryUrlDecode(expr: Expression, replacement: Expression)
   extends RuntimeReplaceable with InheritAnalysisRules {
 
-  def this(expr: Expression) = this(expr, UrlDecode(expr, false))
+  def this(expr: Expression) = this(expr, UrlDecode(expr, failOnError = false))

Review Comment:
   Avoid changes in other expressions unrelated to this PR.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala:
##########
@@ -362,12 +362,16 @@ private[sql] object QueryExecutionErrors extends 
QueryErrorsBase with ExecutionE
         "groupIndex" -> groupIndex.toString()))
   }
 
-  def invalidUrlError(url: UTF8String, e: URISyntaxException): 
SparkIllegalArgumentException = {
+  def invalidUrlError(
+      url: UTF8String,
+      e: URISyntaxException,
+      suggestedFunc: String)
+  : SparkIllegalArgumentException = {
     new SparkIllegalArgumentException(
       errorClass = "INVALID_URL",
       messageParameters = Map(
         "url" -> url.toString,
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+        "functionName" -> suggestedFunc),

Review Comment:
   Could we please just push try_parse_url to the error-conditions and not have 
this parameter. If the need comes that we have different functionNames we will 
update, but for now we want to push as much of the text as possible to 
error-conditions



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala:
##########
@@ -169,6 +169,36 @@ object ParseUrl {
   private val REGEXSUBFIX = "=([^&]*)"
 }
 
+/**
+ * Extracts a part from a URL
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(url, partToExtract[, key]) - Extracts a part from a URL.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST');
+       spark.apache.org
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY');
+       query=1
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 
'query');
+       1
+  """,
+  since = "4.0.0",
+  group = "url_funcs")
+case class TryParseUrl(params: Seq[Expression], replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
+  def this(children: Seq[Expression]) = this(children, ParseUrl(children, 
failOnError = false))
+  def this() = this(Seq.empty)

Review Comment:
   Why are we adding this constructor? ParseUrl needs to be called with 
children, so there is no need to add this constructor to try version.



##########
common/utils/src/main/resources/error/error-conditions.json:
##########
@@ -3207,7 +3207,7 @@
   },
   "INVALID_URL" : {
     "message" : [
-      "The url is invalid: <url>. If necessary set <ansiConfig> to \"false\" 
to bypass this error."
+      "The url is invalid: <url>. Use <functionName> to tolerate invalid URL 
and return NULL instead."

Review Comment:
   Just push the `try_parse_url` here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to