beliefer commented on a change in pull request #27507: [SPARK-24884][SQL] 
Support regexp function regexp_extract_all
URL: https://github.com/apache/spark/pull/27507#discussion_r378645357
 
 

 ##########
 File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
 ##########
 @@ -508,3 +535,99 @@ case class RegExpExtract(subject: Expression, regexp: 
Expression, idx: Expressio
     })
   }
 }
+
+/**
+ * Extract all specific(idx) groups identified by a Java regex.
+ *
+ * NOTE: this expression is not THREAD-SAFE, as it has some internal mutable 
status.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(str, regexp[, idx]) - Extracts all group that matches 
`regexp`.",
+  arguments = """
+    Arguments:
+      * str - a string expression of the input string.
+      * regexp - a string expression of the regex string.
+
+          Since Spark 2.0, string literals (including regex patterns) are 
unescaped in our SQL
+          parser. For example, to match "\abc", a regular expression for 
`regexp` can be
+          "^\\abc$".
+
+          There is a SQL config 'spark.sql.parser.escapedStringLiterals' that 
can be used to
+          fallback to the Spark 1.6 behavior regarding string literal parsing. 
For example,
+          if the config is enabled, the `regexp` that can match "\abc" is 
"^\abc$".
+      * idx - an int expression of the regex group index. The regex maybe 
contains multiple
+          groups. `idx` indicates which regex group to extract.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('100-200, 300-400', '(\\d+)-(\\d+)', 1);
+       ["100","300"]
+  """,
+  since = "3.0.0")
+case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: 
Expression)
+  extends RegExpExtractBase {
+  def this(s: Expression, r: Expression) = this(s, r, Literal(1))
+
+  override def nullSafeEval(s: Any, p: Any, r: Any): Any = {
+    val m = getLastMatcher(s, p)
+    val matchResults = new ArrayBuffer[UTF8String]()
+    val mr: MatchResult = m.toMatchResult
+    while(m.find) {
+      val mr: MatchResult = m.toMatchResult
+      val index = r.asInstanceOf[Int]
+      RegExpExtractBase.checkGroupIndex(mr.groupCount, index)
+      val group = mr.group(index)
+      if (group == null) { // Pattern matched, but not optional group
 
 Review comment:
   OK.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to