This is an automated email from the ASF dual-hosted git repository. rubenql pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push: new d9b55b4 [CALCITE-4419] Posix regex operators cannot be used within RelBuilder d9b55b4 is described below commit d9b55b4300eb2959c4ed55e9b692ffb9f12dd1ef Author: rubenada <rube...@gmail.com> AuthorDate: Tue Nov 24 11:49:43 2020 +0000 [CALCITE-4419] Posix regex operators cannot be used within RelBuilder --- .../calcite/adapter/enumerable/RexImpTable.java | 34 ++++++++++--- .../main/java/org/apache/calcite/runtime/Like.java | 21 +++++++++ .../org/apache/calcite/runtime/SqlFunctions.java | 19 ++------ .../calcite/sql/fun/SqlPosixRegexOperator.java | 48 +++++++++---------- .../org/apache/calcite/util/BuiltInMethod.java | 2 +- .../test/enumerable/EnumerableCalcTest.java | 55 ++++++++++++++++++++++ 6 files changed, 128 insertions(+), 51 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index 42aae7e..7ac4fb1 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -476,17 +476,18 @@ public class RexImpTable { map.put(SIMILAR_TO, similarImplementor); // POSIX REGEX - final MethodImplementor posixRegexImplementor = - new MethodImplementor(BuiltInMethod.POSIX_REGEX.method, - NullPolicy.STRICT, false); + final MethodImplementor posixRegexImplementorCaseSensitive = + new PosixRegexMethodImplementor(true); + final MethodImplementor posixRegexImplementorCaseInsensitive = + new PosixRegexMethodImplementor(false); map.put(SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE, - posixRegexImplementor); + posixRegexImplementorCaseInsensitive); map.put(SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE, - posixRegexImplementor); + posixRegexImplementorCaseSensitive); map.put(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE, - NotImplementor.of(posixRegexImplementor)); + NotImplementor.of(posixRegexImplementorCaseInsensitive)); map.put(SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE, - NotImplementor.of(posixRegexImplementor)); + NotImplementor.of(posixRegexImplementorCaseSensitive)); map.put(REGEXP_REPLACE, new RegexpReplaceImplementor()); // Multisets & arrays @@ -2001,6 +2002,25 @@ public class RexImpTable { } } + /** Implementor for {@link org.apache.calcite.sql.fun.SqlPosixRegexOperator}s. */ + private static class PosixRegexMethodImplementor extends MethodImplementor { + protected final boolean caseSensitive; + + PosixRegexMethodImplementor(boolean caseSensitive) { + super(BuiltInMethod.POSIX_REGEX.method, NullPolicy.STRICT, false); + this.caseSensitive = caseSensitive; + } + + @Override Expression implementSafe(RexToLixTranslator translator, + RexCall call, List<Expression> argValueList) { + assert argValueList.size() == 2; + // Add extra parameter (caseSensitive boolean flag), required by SqlFunctions#posixRegex. + final List<Expression> newOperands = new ArrayList<>(argValueList); + newOperands.add(Expressions.constant(caseSensitive)); + return super.implementSafe(translator, call, newOperands); + } + } + /** * Implementor for JSON_VALUE function, convert to solid format * "JSON_VALUE(json_doc, path, empty_behavior, empty_default, error_behavior, error default)" diff --git a/core/src/main/java/org/apache/calcite/runtime/Like.java b/core/src/main/java/org/apache/calcite/runtime/Like.java index 8ebecb7..34f20de 100644 --- a/core/src/main/java/org/apache/calcite/runtime/Like.java +++ b/core/src/main/java/org/apache/calcite/runtime/Like.java @@ -18,6 +18,9 @@ package org.apache.calcite.runtime; import org.checkerframework.checker.nullness.qual.Nullable; +import java.util.Arrays; +import java.util.Locale; + /** * Utilities for converting SQL {@code LIKE} and {@code SIMILAR} operators * to regular expressions. @@ -42,6 +45,11 @@ public class Like { "[:alnum:]", "\\p{Alnum}" }; + // It's important to have XDigit before Digit to match XDigit first + // (i.e. see the posixRegexToPattern method) + private static final String[] POSIX_CHARACTER_CLASSES = new String[] { "Lower", "Upper", "ASCII", + "Alpha", "XDigit", "Digit", "Alnum", "Punct", "Graph", "Print", "Blank", "Cntrl", "Space" }; + private Like() { } @@ -303,4 +311,17 @@ public class Like { return javaPattern.toString(); } + + static java.util.regex.Pattern posixRegexToPattern(String regex, boolean caseSensitive) { + // Replace existing character classes with java equivalent ones + String originalRegex = regex; + String[] existingExpressions = Arrays.stream(POSIX_CHARACTER_CLASSES) + .filter(v -> originalRegex.contains(v.toLowerCase(Locale.ROOT))).toArray(String[]::new); + for (String v : existingExpressions) { + regex = regex.replace(v.toLowerCase(Locale.ROOT), "\\p{" + v + "}"); + } + + int flags = caseSensitive ? 0 : java.util.regex.Pattern.CASE_INSENSITIVE; + return java.util.regex.Pattern.compile(regex, flags); + } } diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java index c2f912a..455c6cc 100644 --- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java @@ -119,11 +119,6 @@ public class SqlFunctions { private static final Function1<List<Object>, Enumerable<Object>> LIST_AS_ENUMERABLE = Linq4j::asEnumerable; - // It's important to have XDigit before Digit to match XDigit first - // (i.e. see the posixRegex method) - private static final String[] POSIX_CHARACTER_CLASSES = new String[] { "Lower", "Upper", "ASCII", - "Alpha", "XDigit", "Digit", "Alnum", "Punct", "Graph", "Print", "Blank", "Cntrl", "Space" }; - @SuppressWarnings("unused") private static final Function1<Object[], Enumerable<@Nullable Object[]>> ARRAY_CARTESIAN_PRODUCT = lists -> { @@ -615,17 +610,9 @@ public class SqlFunctions { return Pattern.matches(regex, s); } - public static boolean posixRegex(String s, String regex, Boolean caseSensitive) { - // Replace existing character classes with java equivalent ones - String originalRegex = regex; - String[] existingExpressions = Arrays.stream(POSIX_CHARACTER_CLASSES) - .filter(v -> originalRegex.contains(v.toLowerCase(Locale.ROOT))).toArray(String[]::new); - for (String v : existingExpressions) { - regex = regex.replace(v.toLowerCase(Locale.ROOT), "\\p{" + v + "}"); - } - - int flags = caseSensitive ? 0 : Pattern.CASE_INSENSITIVE; - return Pattern.compile(regex, flags).matcher(s).find(); + public static boolean posixRegex(String s, String regex, boolean caseSensitive) { + final Pattern pattern = Like.posixRegexToPattern(regex, caseSensitive); + return pattern.matcher(s).find(); } // = diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java index e57b575..fe6c204 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlPosixRegexOperator.java @@ -17,26 +17,18 @@ package org.apache.calcite.sql.fun; import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.sql.SqlBasicCall; import org.apache.calcite.sql.SqlBinaryOperator; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlCallBinding; import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlLiteral; -import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperandCountRange; import org.apache.calcite.sql.SqlWriter; -import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.InferTypes; import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlOperandCountRanges; import org.apache.calcite.sql.type.SqlTypeUtil; -import org.checkerframework.checker.nullness.qual.Nullable; - -import java.util.Arrays; - /** * An operator describing the <code>~</code> operator. * @@ -47,6 +39,7 @@ public class SqlPosixRegexOperator extends SqlBinaryOperator { private final boolean caseSensitive; private final boolean negated; + private final String operatorString; // ~ Constructors ----------------------------------------------------------- @@ -72,29 +65,28 @@ public class SqlPosixRegexOperator extends SqlBinaryOperator { OperandTypes.STRING_SAME_SAME_SAME); this.caseSensitive = caseSensitive; this.negated = negated; + final StringBuilder sb = new StringBuilder(3); + if (this.negated) { + sb.append("!"); + } + sb.append("~"); + if (!this.caseSensitive) { + sb.append("*"); + } + this.operatorString = sb.toString(); } // ~ Methods ---------------------------------------------------------------- @Override public SqlOperandCountRange getOperandCountRange() { - return SqlOperandCountRanges.between(2, 3); - } - - @Override public SqlCall createCall( - @Nullable SqlLiteral functionQualifier, - SqlParserPos pos, - @Nullable SqlNode... operands) { - pos = pos.plusAll(operands); - operands = Arrays.copyOf(operands, operands.length + 1); - operands[operands.length - 1] = SqlLiteral.createBoolean(caseSensitive, SqlParserPos.ZERO); - return new SqlBasicCall(this, operands, pos, false, functionQualifier); + return SqlOperandCountRanges.of(2); } @Override public boolean checkOperandTypes( SqlCallBinding callBinding, boolean throwOnFailure) { int operandCount = callBinding.getOperandCount(); - if (operandCount != 2 && operandCount != 3) { + if (operandCount != 2) { throw new AssertionError( "Unexpected number of args to " + callBinding.getCall() + ": " + operandCount); } @@ -121,16 +113,18 @@ public class SqlPosixRegexOperator extends SqlBinaryOperator { final SqlWriter.Frame frame = writer.startList("", ""); call.operand(0).unparse(writer, getLeftPrec(), getRightPrec()); - if (this.negated) { - writer.print("!"); - } - writer.print("~"); - if (!this.caseSensitive) { - writer.print("*"); - } + writer.print(this.operatorString); writer.print(" "); call.operand(1).unparse(writer, getLeftPrec(), getRightPrec()); writer.endList(frame); } + + public boolean isCaseSensitive() { + return caseSensitive; + } + + public boolean isNegated() { + return negated; + } } diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java index b227ec3..9f7ad50 100644 --- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java +++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java @@ -418,7 +418,7 @@ public enum BuiltInMethod { RTRIM(SqlFunctions.class, "rtrim", String.class), LIKE(SqlFunctions.class, "like", String.class, String.class), SIMILAR(SqlFunctions.class, "similar", String.class, String.class), - POSIX_REGEX(SqlFunctions.class, "posixRegex", String.class, String.class, Boolean.class), + POSIX_REGEX(SqlFunctions.class, "posixRegex", String.class, String.class, boolean.class), REGEXP_REPLACE3(SqlFunctions.class, "regexpReplace", String.class, String.class, String.class), REGEXP_REPLACE4(SqlFunctions.class, "regexpReplace", String.class, diff --git a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCalcTest.java b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCalcTest.java index ce4feab..c5d5e1f 100644 --- a/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCalcTest.java +++ b/core/src/test/java/org/apache/calcite/test/enumerable/EnumerableCalcTest.java @@ -17,6 +17,7 @@ package org.apache.calcite.test.enumerable; import org.apache.calcite.adapter.java.ReflectiveSchema; +import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.test.CalciteAssert; import org.apache.calcite.test.JdbcTest; @@ -54,4 +55,58 @@ class EnumerableCalcTest { "$f0=500", "$f0=1000"); } + + /** + * Test cases for + * <a href="https://issues.apache.org/jira/browse/CALCITE-4419">[CALCITE-4419] + * Posix regex operators cannot be used within RelBuilder</a>. + */ + @Test void testPosixRegexCaseSensitive() { + checkPosixRegex("E..c", SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE, + "empid=200; name=Eric"); + checkPosixRegex("e..c", SqlStdOperatorTable.POSIX_REGEX_CASE_SENSITIVE, ""); + } + + @Test void testPosixRegexCaseInsensitive() { + checkPosixRegex("E..c", SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE, + "empid=200; name=Eric"); + checkPosixRegex("e..c", SqlStdOperatorTable.POSIX_REGEX_CASE_INSENSITIVE, + "empid=200; name=Eric"); + } + + @Test void testNegatedPosixRegexCaseSensitive() { + checkPosixRegex("E..c", SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE, + "empid=100; name=Bill", "empid=110; name=Theodore", "empid=150; name=Sebastian"); + checkPosixRegex("e..c", SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_SENSITIVE, + "empid=100; name=Bill", "empid=110; name=Theodore", "empid=150; name=Sebastian", "empid=200; name=Eric"); + } + + @Test void testNegatedPosixRegexCaseInsensitive() { + checkPosixRegex("E..c", SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE, + "empid=100; name=Bill", "empid=110; name=Theodore", "empid=150; name=Sebastian"); + checkPosixRegex("e..c", SqlStdOperatorTable.NEGATED_POSIX_REGEX_CASE_INSENSITIVE, + "empid=100; name=Bill", "empid=110; name=Theodore", "empid=150; name=Sebastian"); + } + + private void checkPosixRegex( + String literalValue, + SqlOperator operator, + String... expectedResult) { + CalciteAssert.that() + .withSchema("s", new ReflectiveSchema(new JdbcTest.HrSchema())) + .query("?") + .withRel( + builder -> builder + .scan("s", "emps") + .filter( + builder.call( + operator, + builder.field("name"), + builder.literal(literalValue))) + .project( + builder.field("empid"), + builder.field("name")) + .build()) + .returnsUnordered(expectedResult); + } }