This is an automated email from the ASF dual-hosted git repository.

abhishekrb pushed a commit to branch regexp_pattern_validation
in repository https://gitbox.apache.org/repos/asf/druid.git

commit a376b5ba2186c54718bb70772008f2d800ad47f3
Author: Abhishek Balaji Radhakrishnan <[email protected]>
AuthorDate: Wed Nov 19 19:37:26 2025 -0800

    Common pattern validation for all REGEXP* functions for nicer error message
    
    Move the compilePattern() utility and wire it up to all the REGEXP* 
functions
    so any invalid regex pattern will return nicer error messages to users.
    Otherwise, a user would get a cryptic error message "Illegal character 
range near index.."
---
 .../druid/query/expression/RegexpExprUtils.java    | 51 ++++++++++++++++++++++
 .../query/expression/RegexpExtractExprMacro.java   | 26 +----------
 .../query/expression/RegexpLikeExprMacro.java      |  2 +-
 .../query/expression/RegexpReplaceExprMacro.java   |  2 +-
 .../expression/RegexpExtractExprMacroTest.java     | 21 +++++++++
 .../query/expression/RegexpLikeExprMacroTest.java  | 17 ++++++++
 .../expression/RegexpReplaceExprMacroTest.java     | 17 ++++++++
 7 files changed, 109 insertions(+), 27 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java
 
b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java
new file mode 100644
index 00000000000..8a16602b409
--- /dev/null
+++ 
b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.query.expression;
+
+import org.apache.druid.error.InvalidInput;
+import org.apache.druid.java.util.common.StringUtils;
+
+import javax.annotation.Nullable;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+public class RegexpExprUtils
+{
+  /**
+   * Compile the provided pattern, or provide a nice error message if it 
cannot be compiled.
+   */
+  public static Pattern compilePattern(@Nullable String pattern, String 
functionName)
+  {
+    try {
+      return 
Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern));
+    }
+    catch (PatternSyntaxException e) {
+      throw InvalidInput.exception(
+          e,
+          StringUtils.format(
+              "An invalid pattern [%s] was provided for the %s function, 
error: [%s]",
+              e.getPattern(),
+              functionName,
+              e.getMessage()
+          )
+      );
+    }
+  }
+}
diff --git 
a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java
 
b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java
index 6a690787735..d7102936c14 100644
--- 
a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java
+++ 
b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java
@@ -19,8 +19,6 @@
 
 package org.apache.druid.query.expression;
 
-import org.apache.druid.error.InvalidInput;
-import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.math.expr.Expr;
 import org.apache.druid.math.expr.ExprEval;
 import org.apache.druid.math.expr.ExprMacroTable;
@@ -31,7 +29,6 @@ import javax.annotation.Nullable;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
 
 public class RegexpExtractExprMacro implements ExprMacroTable.ExprMacro
 {
@@ -61,7 +58,7 @@ public class RegexpExtractExprMacro implements 
ExprMacroTable.ExprMacro
     }
 
     // Precompile the pattern.
-    final Pattern pattern = compilePattern((String) 
patternExpr.getLiteralValue());
+    final Pattern pattern = RegexpExprUtils.compilePattern((String) 
patternExpr.getLiteralValue(), FN_NAME);
 
     final int index = indexExpr == null ? 0 : ((Number) 
indexExpr.getLiteralValue()).intValue();
 
@@ -97,25 +94,4 @@ public class RegexpExtractExprMacro implements 
ExprMacroTable.ExprMacro
     }
     return new RegexpExtractExpr(args);
   }
-
-  /**
-   * Compile the provided pattern, or provide a nice error message if it 
cannot be compiled.
-   */
-  private static Pattern compilePattern(@Nullable String pattern)
-  {
-    try {
-      return 
Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern));
-    }
-    catch (PatternSyntaxException e) {
-      throw InvalidInput.exception(
-          e,
-          StringUtils.format(
-              "An invalid pattern [%s] was provided for the %s function, 
error: [%s]",
-              e.getPattern(),
-              FN_NAME,
-              e.getMessage()
-          )
-      );
-    }
-  }
 }
diff --git 
a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java
 
b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java
index 3073cee447d..a2c8bdc3e9a 100644
--- 
a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java
+++ 
b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java
@@ -86,7 +86,7 @@ public class RegexpLikeExprMacro implements 
ExprMacroTable.ExprMacro
 
       final String patternString = (String) patternExpr.getLiteralValue();
       this.arg = args.get(0);
-      this.pattern = 
Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(patternString));
+      this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME);
     }
 
     @Nonnull
diff --git 
a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java
 
b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java
index 835fdcd6c91..a839d4bda71 100644
--- 
a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java
+++ 
b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java
@@ -96,7 +96,7 @@ public class RegexpReplaceExprMacro implements 
ExprMacroTable.ExprMacro
       final String patternString = (String) patternExpr.getLiteralValue();
 
       this.arg = args.get(0);
-      this.pattern = patternString != null ? Pattern.compile(patternString) : 
null;
+      this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME);
       this.replacement = (String) replacementExpr.getLiteralValue();
     }
 
diff --git 
a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java
 
b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java
index 8a0212f33d5..42330d6adbd 100644
--- 
a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java
@@ -19,9 +19,13 @@
 
 package org.apache.druid.query.expression;
 
+import org.apache.druid.error.DruidException;
+import org.apache.druid.error.DruidExceptionMatcher;
+import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.math.expr.ExprEval;
 import org.apache.druid.math.expr.ExpressionType;
 import org.apache.druid.math.expr.InputBindings;
+import org.hamcrest.MatcherAssert;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -46,6 +50,23 @@ public class RegexpExtractExprMacroTest extends MacroTestBase
     eval("regexp_extract('a', 'b', 'c', 'd')", InputBindings.nilBindings());
   }
 
+  @Test
+  public void testInvalidRegexpExtractPattern()
+  {
+    MatcherAssert.assertThat(
+        Assert.assertThrows(DruidException.class, () ->
+            eval(
+                "regexp_extract('pod-1234-node', '[ab-0-9]')",
+                InputBindings.forInputSupplier("a", ExpressionType.STRING, () 
-> "foo")
+            )
+        ),
+        DruidExceptionMatcher.invalidInput().expectMessageContains(
+            "An invalid pattern [[ab-0-9]] was provided for the regexp_extract 
function,"
+            + " error: [Illegal character range near index 4"
+        )
+    );
+  }
+
   @Test
   public void testMatch()
   {
diff --git 
a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java
 
b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java
index d6e31baad0b..b1658d0839a 100644
--- 
a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java
@@ -19,9 +19,12 @@
 
 package org.apache.druid.query.expression;
 
+import org.apache.druid.error.DruidException;
+import org.apache.druid.error.DruidExceptionMatcher;
 import org.apache.druid.math.expr.ExprEval;
 import org.apache.druid.math.expr.ExpressionType;
 import org.apache.druid.math.expr.InputBindings;
+import org.hamcrest.MatcherAssert;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -46,6 +49,20 @@ public class RegexpLikeExprMacroTest extends MacroTestBase
     eval("regexp_like('a', 'b', 'c')", InputBindings.nilBindings());
   }
 
+  @Test
+  public void testInvalidRegexpLikePattern()
+  {
+    MatcherAssert.assertThat(
+        Assert.assertThrows(
+            DruidException.class,
+            () -> eval("regexp_like('a', '[Ab-C]')", 
InputBindings.nilBindings())),
+        DruidExceptionMatcher.invalidInput().expectMessageContains(
+            "An invalid pattern [[Ab-C]] was provided for the regexp_like 
function,"
+            + " error: [Illegal character range near index 4"
+        )
+    );
+  }
+
   @Test
   public void testMatch()
   {
diff --git 
a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java
 
b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java
index 870bd5c8db8..a3faa7ab751 100644
--- 
a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java
+++ 
b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java
@@ -20,9 +20,12 @@
 package org.apache.druid.query.expression;
 
 import com.google.common.collect.ImmutableMap;
+import org.apache.druid.error.DruidException;
+import org.apache.druid.error.DruidExceptionMatcher;
 import org.apache.druid.math.expr.ExprEval;
 import org.apache.druid.math.expr.ExpressionType;
 import org.apache.druid.math.expr.InputBindings;
+import org.hamcrest.MatcherAssert;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -40,6 +43,20 @@ public class RegexpReplaceExprMacroTest extends MacroTestBase
     eval("regexp_replace()", InputBindings.nilBindings());
   }
 
+  @Test
+  public void testInvalidRegexpReplacePattern()
+  {
+    MatcherAssert.assertThat(
+        Assert.assertThrows(
+            DruidException.class,
+            () -> eval("regexp_replace(a, '[Ab-cd-0]', 'xyz')", 
InputBindings.nilBindings())),
+        DruidExceptionMatcher.invalidInput().expectMessageContains(
+            "An invalid pattern [[Ab-cd-0]] was provided for the 
regexp_replace function,"
+            + " error: [Illegal character range near index 7"
+        )
+    );
+  }
+
   @Test
   public void testErrorFourArguments()
   {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to