This is an automated email from the ASF dual-hosted git repository. abhishekrb pushed a commit to branch regexp_pattern_validation in repository https://gitbox.apache.org/repos/asf/druid.git
commit a376b5ba2186c54718bb70772008f2d800ad47f3 Author: Abhishek Balaji Radhakrishnan <[email protected]> AuthorDate: Wed Nov 19 19:37:26 2025 -0800 Common pattern validation for all REGEXP* functions for nicer error message Move the compilePattern() utility and wire it up to all the REGEXP* functions so any invalid regex pattern will return nicer error messages to users. Otherwise, a user would get a cryptic error message "Illegal character range near index.." --- .../druid/query/expression/RegexpExprUtils.java | 51 ++++++++++++++++++++++ .../query/expression/RegexpExtractExprMacro.java | 26 +---------- .../query/expression/RegexpLikeExprMacro.java | 2 +- .../query/expression/RegexpReplaceExprMacro.java | 2 +- .../expression/RegexpExtractExprMacroTest.java | 21 +++++++++ .../query/expression/RegexpLikeExprMacroTest.java | 17 ++++++++ .../expression/RegexpReplaceExprMacroTest.java | 17 ++++++++ 7 files changed, 109 insertions(+), 27 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java new file mode 100644 index 00000000000..8a16602b409 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExprUtils.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.expression; + +import org.apache.druid.error.InvalidInput; +import org.apache.druid.java.util.common.StringUtils; + +import javax.annotation.Nullable; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +public class RegexpExprUtils +{ + /** + * Compile the provided pattern, or provide a nice error message if it cannot be compiled. + */ + public static Pattern compilePattern(@Nullable String pattern, String functionName) + { + try { + return Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern)); + } + catch (PatternSyntaxException e) { + throw InvalidInput.exception( + e, + StringUtils.format( + "An invalid pattern [%s] was provided for the %s function, error: [%s]", + e.getPattern(), + functionName, + e.getMessage() + ) + ); + } + } +} diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java index 6a690787735..d7102936c14 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpExtractExprMacro.java @@ -19,8 +19,6 @@ package org.apache.druid.query.expression; -import org.apache.druid.error.InvalidInput; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.Expr; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExprMacroTable; @@ -31,7 +29,6 @@ import javax.annotation.Nullable; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; public class RegexpExtractExprMacro implements ExprMacroTable.ExprMacro { @@ -61,7 +58,7 @@ public class RegexpExtractExprMacro implements ExprMacroTable.ExprMacro } // Precompile the pattern. - final Pattern pattern = compilePattern((String) patternExpr.getLiteralValue()); + final Pattern pattern = RegexpExprUtils.compilePattern((String) patternExpr.getLiteralValue(), FN_NAME); final int index = indexExpr == null ? 0 : ((Number) indexExpr.getLiteralValue()).intValue(); @@ -97,25 +94,4 @@ public class RegexpExtractExprMacro implements ExprMacroTable.ExprMacro } return new RegexpExtractExpr(args); } - - /** - * Compile the provided pattern, or provide a nice error message if it cannot be compiled. - */ - private static Pattern compilePattern(@Nullable String pattern) - { - try { - return Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(pattern)); - } - catch (PatternSyntaxException e) { - throw InvalidInput.exception( - e, - StringUtils.format( - "An invalid pattern [%s] was provided for the %s function, error: [%s]", - e.getPattern(), - FN_NAME, - e.getMessage() - ) - ); - } - } } diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java index 3073cee447d..a2c8bdc3e9a 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpLikeExprMacro.java @@ -86,7 +86,7 @@ public class RegexpLikeExprMacro implements ExprMacroTable.ExprMacro final String patternString = (String) patternExpr.getLiteralValue(); this.arg = args.get(0); - this.pattern = Pattern.compile(StringUtils.nullToEmptyNonDruidDataString(patternString)); + this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME); } @Nonnull diff --git a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java index 835fdcd6c91..a839d4bda71 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java +++ b/processing/src/main/java/org/apache/druid/query/expression/RegexpReplaceExprMacro.java @@ -96,7 +96,7 @@ public class RegexpReplaceExprMacro implements ExprMacroTable.ExprMacro final String patternString = (String) patternExpr.getLiteralValue(); this.arg = args.get(0); - this.pattern = patternString != null ? Pattern.compile(patternString) : null; + this.pattern = RegexpExprUtils.compilePattern(patternString, FN_NAME); this.replacement = (String) replacementExpr.getLiteralValue(); } diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java index 8a0212f33d5..42330d6adbd 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpExtractExprMacroTest.java @@ -19,9 +19,13 @@ package org.apache.druid.query.expression; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; +import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.hamcrest.MatcherAssert; import org.junit.Assert; import org.junit.Test; @@ -46,6 +50,23 @@ public class RegexpExtractExprMacroTest extends MacroTestBase eval("regexp_extract('a', 'b', 'c', 'd')", InputBindings.nilBindings()); } + @Test + public void testInvalidRegexpExtractPattern() + { + MatcherAssert.assertThat( + Assert.assertThrows(DruidException.class, () -> + eval( + "regexp_extract('pod-1234-node', '[ab-0-9]')", + InputBindings.forInputSupplier("a", ExpressionType.STRING, () -> "foo") + ) + ), + DruidExceptionMatcher.invalidInput().expectMessageContains( + "An invalid pattern [[ab-0-9]] was provided for the regexp_extract function," + + " error: [Illegal character range near index 4" + ) + ); + } + @Test public void testMatch() { diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java index d6e31baad0b..b1658d0839a 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpLikeExprMacroTest.java @@ -19,9 +19,12 @@ package org.apache.druid.query.expression; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.hamcrest.MatcherAssert; import org.junit.Assert; import org.junit.Test; @@ -46,6 +49,20 @@ public class RegexpLikeExprMacroTest extends MacroTestBase eval("regexp_like('a', 'b', 'c')", InputBindings.nilBindings()); } + @Test + public void testInvalidRegexpLikePattern() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> eval("regexp_like('a', '[Ab-C]')", InputBindings.nilBindings())), + DruidExceptionMatcher.invalidInput().expectMessageContains( + "An invalid pattern [[Ab-C]] was provided for the regexp_like function," + + " error: [Illegal character range near index 4" + ) + ); + } + @Test public void testMatch() { diff --git a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java index 870bd5c8db8..a3faa7ab751 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/RegexpReplaceExprMacroTest.java @@ -20,9 +20,12 @@ package org.apache.druid.query.expression; import com.google.common.collect.ImmutableMap; +import org.apache.druid.error.DruidException; +import org.apache.druid.error.DruidExceptionMatcher; import org.apache.druid.math.expr.ExprEval; import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.math.expr.InputBindings; +import org.hamcrest.MatcherAssert; import org.junit.Assert; import org.junit.Test; @@ -40,6 +43,20 @@ public class RegexpReplaceExprMacroTest extends MacroTestBase eval("regexp_replace()", InputBindings.nilBindings()); } + @Test + public void testInvalidRegexpReplacePattern() + { + MatcherAssert.assertThat( + Assert.assertThrows( + DruidException.class, + () -> eval("regexp_replace(a, '[Ab-cd-0]', 'xyz')", InputBindings.nilBindings())), + DruidExceptionMatcher.invalidInput().expectMessageContains( + "An invalid pattern [[Ab-cd-0]] was provided for the regexp_replace function," + + " error: [Illegal character range near index 7" + ) + ); + } + @Test public void testErrorFourArguments() { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
