MaxGekk commented on code in PR #37154:
URL: https://github.com/apache/spark/pull/37154#discussion_r918754573


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala:
##########
@@ -1043,3 +1043,89 @@ case class RegExpSubStr(left: Expression, right: 
Expression)
       newChildren: IndexedSeq[Expression]): RegExpSubStr =
     copy(left = newChildren(0), right = newChildren(1))
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str, regexp) - Searches a string for a regular expression and 
returns an integer that indicates the beginning position matched substring. 
Positions are 1-based, not 0-based. If no match is found, returns 0.
+  """,
+  arguments = """
+    Arguments:
+      * str - a string expression.
+      * regexp - a string representing a regular expression. The regex string 
should be a
+          Java regular expression.<br><br>
+          Since Spark 2.0, string literals (including regex patterns) are 
unescaped in our SQL
+          parser. For example, to match "\abc", a regular expression for 
`regexp` can be
+          "^\\abc$".<br><br>
+          There is a SQL config 'spark.sql.parser.escapedStringLiterals' that 
can be used to
+          fallback to the Spark 1.6 behavior regarding string literal parsing. 
For example,
+          if the config is enabled, the `regexp` that can match "\abc" is 
"^\abc$".
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('[email protected]', '@[^.]*');
+       5
+  """,
+  since = "3.4.0",
+  group = "string_funcs")
+// scalastyle:on line.size.limit
+case class RegExpInStr(subject: Expression, regexp: Expression, idx: 
Expression)
+  extends RegExpExtractBase {
+  def this(s: Expression, r: Expression) = this(s, r, Literal(0))
+
+  override def nullSafeEval(s: Any, p: Any, r: Any): Any = {
+    try {
+      val m = getLastMatcher(s, p)
+      if (m.find) {
+        m.toMatchResult.start() + 1
+      } else {
+        0
+      }
+    } catch {
+      case _: IllegalStateException => 0
+    }
+  }
+
+  override def dataType: DataType = IntegerType
+  override def prettyName: String = "regexp_instr"
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
+    val classNamePattern = classOf[Pattern].getCanonicalName
+    val matcher = ctx.freshName("matcher")
+
+    val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex")
+    val termPattern = ctx.addMutableState(classNamePattern, "pattern")
+
+    val setEvNotNull = if (nullable) {
+      s"${ev.isNull} = false;"
+    } else {
+      ""
+    }
+
+    nullSafeCodeGen(ctx, ev, (subject, regexp, _) => {
+      s"""
+         |try {
+         |  if (!$regexp.equals($termLastRegex)) {
+         |    // regex value changed
+         |    $termLastRegex = $regexp.clone();
+         |    $termPattern = 
$classNamePattern.compile($termLastRegex.toString());
+         |  }
+         |  java.util.regex.Matcher $matcher = 
$termPattern.matcher($subject.toString());
+         |  $setEvNotNull
+         |  if ($matcher.find()) {
+         |    ${ev.value} = $matcher.toMatchResult().start($idx) + 1;
+         |  } else {
+         |    ${ev.value} = 0;
+         |  }
+         |} catch (IllegalStateException e) {
+         |  $setEvNotNull

Review Comment:
   I will move it to the beginning of the `try` block since we need to set it 
in any case. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to