This is an automated email from the ASF dual-hosted git repository. smiklosovic pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push: new 85b25f2256 Add regular expression constraint 85b25f2256 is described below commit 85b25f2256e34da4ae3756621704242895f29f51 Author: Stefan Miklosovic <smikloso...@apache.org> AuthorDate: Thu Mar 13 16:26:33 2025 +0100 Add regular expression constraint patch by Stefan Miklosovic; reviewed by Bernardo Botella for CASSANDRA-20275 --- CHANGES.txt | 1 + .../pages/developing/cql/constraints.adoc | 31 ++++++ pylib/cqlshlib/cql3handling.py | 1 + pylib/cqlshlib/test/test_cqlsh_completion.py | 2 +- .../cql3/constraints/ConstraintFunction.java | 2 +- .../cql3/constraints/FunctionColumnConstraint.java | 5 +- .../cql3/constraints/NotNullConstraint.java | 2 +- .../cql3/constraints/RegexpConstraint.java | 112 +++++++++++++++++++++ .../constraints/UnaryFunctionColumnConstraint.java | 2 +- .../cassandra/cql3/functions/types/ParseUtils.java | 2 +- .../cassandra/contraints/RegexpConstraintTest.java | 103 +++++++++++++++++++ 11 files changed, 256 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 7df26992c3..c02e830092 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Add regular expression constraint (CASSANDRA-20275) * Improve constraints autocompletion (CASSANDRA-20341) * Add JVM version and Cassandra build date to nodetool version -v (CASSANDRA-19721) * Move all disk error logic to DiskErrorsHandler to enable pluggability (CASSANDRA-20363) diff --git a/doc/modules/cassandra/pages/developing/cql/constraints.adoc b/doc/modules/cassandra/pages/developing/cql/constraints.adoc index f768d4fd8f..7e18938729 100644 --- a/doc/modules/cassandra/pages/developing/cql/constraints.adoc +++ b/doc/modules/cassandra/pages/developing/cql/constraints.adoc @@ -191,4 +191,35 @@ INSERT INTO ks.tb (id, val) VALUES (1, 'abc'); ... [Invalid query] message="Value for column 'val' violated JSON constraint as it is not a valid JSON." +---- + +=== REGEXP constraint + +Defines a constraint which checks text-like values againt a regular expression. + +---- +CREATE TABLE ks.tb ( + id int primary key, + value CHECK REGEXP(value) = 'a.*b' +) +---- + +---- +cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'asdadasdabb'); +cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'aaaaa'); +... [Invalid query] message="Value does not match regular expression 'a.*b'" +---- + +Negation can be also used: + +---- +ALTER TABLE ks.tb ALTER value CHECK REGEXP(value) != 'a.*b'; +---- + +which would logically invert the condition: + +---- +cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'asdadasdabb'); +... [Invalid query] message="Value does match regular expression 'a.*b'" +cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'aaaaa'); ---- \ No newline at end of file diff --git a/pylib/cqlshlib/cql3handling.py b/pylib/cqlshlib/cql3handling.py index cddcdb34d6..f1baf2ad29 100644 --- a/pylib/cqlshlib/cql3handling.py +++ b/pylib/cqlshlib/cql3handling.py @@ -334,6 +334,7 @@ JUNK ::= /([ \t\r\f\v]+|(--|[/][/])[^\n\r]*([\n\r]|$)|[/][*].*?[*][/])/ ; <constraintComparableFunction> ::= "LENGTH" | "OCTET_LENGTH" + | "REGEXP" ; <constraintStandaloneFunction> ::= "JSON" diff --git a/pylib/cqlshlib/test/test_cqlsh_completion.py b/pylib/cqlshlib/test/test_cqlsh_completion.py index aa38ec45db..bf2385a46f 100644 --- a/pylib/cqlshlib/test/test_cqlsh_completion.py +++ b/pylib/cqlshlib/test/test_cqlsh_completion.py @@ -1109,7 +1109,7 @@ class TestCqlshCompletion(CqlshCompletionCase): other_choices_ok=True) self.trycompletions('ALTER TABLE new_table ADD col int C', immediate='HECK ') self.trycompletions('ALTER TABLE new_table ADD col int CHECK ', - choices=['<identifier>', '<quotedName>', 'JSON', 'LENGTH', 'NOT_NULL', 'OCTET_LENGTH'], + choices=['<identifier>', '<quotedName>', 'JSON', 'LENGTH', 'NOT_NULL', 'OCTET_LENGTH', 'REGEXP'], other_choices_ok=True) self.trycompletions('ALTER TABLE IF EXISTS new_table RENAME ', choices=['IF', '<quotedName>', '<identifier>']) self.trycompletions('ALTER TABLE new_table RENAME ', choices=['IF', '<quotedName>', '<identifier>']) diff --git a/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java b/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java index 1dda89093f..9952ab32d9 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java +++ b/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java @@ -79,7 +79,7 @@ public abstract class ConstraintFunction * Method that validates that a condition is valid. This method is called when the CQL constraint is created to determine * if the CQL statement is valid or needs to be rejected as invalid throwing a {@link InvalidConstraintDefinitionException} */ - public void validate(ColumnMetadata columnMetadata) throws InvalidConstraintDefinitionException + public void validate(ColumnMetadata columnMetadata, String term) throws InvalidConstraintDefinitionException { } diff --git a/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java index dac62ddfc6..a94b4bd0bd 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java @@ -78,7 +78,8 @@ public class FunctionColumnConstraint extends AbstractFunctionConstraint<Functio public enum Functions { LENGTH(LengthConstraint::new), - OCTET_LENGTH(OctetLengthConstraint::new); + OCTET_LENGTH(OctetLengthConstraint::new), + REGEXP(RegexpConstraint::new); private final Function<ColumnIdentifier, ConstraintFunction> functionCreator; @@ -157,7 +158,7 @@ public class FunctionColumnConstraint extends AbstractFunctionConstraint<Functio { validateArgs(columnMetadata); validateTypes(columnMetadata); - function.validate(columnMetadata); + function.validate(columnMetadata, term); } @Override diff --git a/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java index 465db80933..fb9f7de95b 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java @@ -49,7 +49,7 @@ public class NotNullConstraint extends ConstraintFunction } @Override - public void validate(ColumnMetadata columnMetadata) throws InvalidConstraintDefinitionException + public void validate(ColumnMetadata columnMetadata, String term) throws InvalidConstraintDefinitionException { if (columnMetadata.isPrimaryKeyColumn()) throw new InvalidConstraintDefinitionException(format("%s constraint can not be specified on a %s key column '%s'", diff --git a/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java new file mode 100644 index 0000000000..a2e439585f --- /dev/null +++ b/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.cql3.constraints; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.Operator; +import org.apache.cassandra.cql3.functions.types.ParseUtils; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.AsciiType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.schema.ColumnMetadata; + +import static java.lang.String.format; +import static org.apache.cassandra.cql3.Operator.EQ; +import static org.apache.cassandra.cql3.Operator.NEQ; + +public class RegexpConstraint extends ConstraintFunction +{ + public static final String FUNCTION_NAME = "REGEXP"; + private static final List<AbstractType<?>> SUPPORTED_TYPES = List.of(UTF8Type.instance, AsciiType.instance); + private static final List<Operator> ALLOWED_FUNCTION_OPERATORS = List.of(EQ, NEQ); + + private Pattern pattern; + + public RegexpConstraint(ColumnIdentifier columnName) + { + super(columnName, FUNCTION_NAME); + } + + @Override + protected void internalEvaluate(AbstractType<?> valueType, Operator relationType, String regexp, ByteBuffer columnValue) + { + assert pattern != null; + Matcher matcher = pattern.matcher(valueType.getString(columnValue)); + + switch (relationType) + { + case EQ: + if (!matcher.matches()) + throw new ConstraintViolationException(format("Value does not match regular expression %s", regexp)); + break; + case NEQ: + if (matcher.matches()) + throw new ConstraintViolationException(format("Value does match regular expression %s", regexp)); + break; + default: + throw new IllegalStateException("Unsupported operator: " + relationType); + } + } + + @Override + public List<AbstractType<?>> getSupportedTypes() + { + return SUPPORTED_TYPES; + } + + @Override + public List<Operator> getSupportedOperators() + { + return ALLOWED_FUNCTION_OPERATORS; + } + + @Override + public void validate(ColumnMetadata columnMetadata, String regexp) throws InvalidConstraintDefinitionException + { + try + { + // compilation of a regexp every single time upon evaluation is not performance friendly + // so we "cache" the compiled regexp for further reuse upon actual validation + pattern = Pattern.compile(ParseUtils.unquote(regexp)); + } + catch (Exception e) + { + throw new InvalidConstraintDefinitionException(format("String '%s' is not a valid regular expression", ParseUtils.unquote(regexp))); + } + } + + @Override + public boolean equals(Object o) + { + if (this == o) + return true; + + if (!(o instanceof RegexpConstraint)) + return false; + + RegexpConstraint other = (RegexpConstraint) o; + + return columnName.equals(other.columnName); + } +} diff --git a/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java b/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java index d39f7f865d..80fd443e0e 100644 --- a/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java +++ b/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java @@ -124,7 +124,7 @@ public class UnaryFunctionColumnConstraint extends AbstractFunctionConstraint<Un { validateArgs(columnMetadata); validateTypes(columnMetadata); - function.validate(columnMetadata); + function.validate(columnMetadata, term); } @Override diff --git a/src/java/org/apache/cassandra/cql3/functions/types/ParseUtils.java b/src/java/org/apache/cassandra/cql3/functions/types/ParseUtils.java index 8d0f29b45b..45bf720c0c 100644 --- a/src/java/org/apache/cassandra/cql3/functions/types/ParseUtils.java +++ b/src/java/org/apache/cassandra/cql3/functions/types/ParseUtils.java @@ -268,7 +268,7 @@ public abstract class ParseUtils * @param value The string to unquote. * @return The unquoted string. */ - static String unquote(String value) + public static String unquote(String value) { return unquote(value, '\''); } diff --git a/test/unit/org/apache/cassandra/contraints/RegexpConstraintTest.java b/test/unit/org/apache/cassandra/contraints/RegexpConstraintTest.java new file mode 100644 index 0000000000..fd4800e247 --- /dev/null +++ b/test/unit/org/apache/cassandra/contraints/RegexpConstraintTest.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.contraints; + +import org.junit.Test; + +import org.apache.cassandra.cql3.ColumnIdentifier; +import org.apache.cassandra.cql3.Operator; +import org.apache.cassandra.cql3.constraints.ColumnConstraints; +import org.apache.cassandra.cql3.constraints.FunctionColumnConstraint.Raw; +import org.apache.cassandra.cql3.constraints.InvalidConstraintDefinitionException; +import org.apache.cassandra.cql3.constraints.RegexpConstraint; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.AsciiType; +import org.apache.cassandra.db.marshal.IntegerType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.schema.ColumnMetadata; +import org.assertj.core.api.ThrowableAssert; + +import static java.util.List.of; +import static org.apache.cassandra.schema.ColumnMetadata.Kind.REGULAR; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +public class RegexpConstraintTest +{ + private static final ColumnIdentifier columnIdentifier = new ColumnIdentifier("a_column", false); + private static final ColumnIdentifier regexpFunctionIdentifier = new ColumnIdentifier(RegexpConstraint.FUNCTION_NAME, false); + private static final ColumnMetadata regularStringColumn = getColumnOfType(UTF8Type.instance); + private static final ColumnMetadata regularAsciiColumn = getColumnOfType(AsciiType.instance); + + private static final ColumnConstraints regexp = new ColumnConstraints(of(new Raw(regexpFunctionIdentifier, columnIdentifier, Operator.EQ, "'a..b'").prepare())); + private static final ColumnConstraints negatedRegexp = new ColumnConstraints(of(new Raw(regexpFunctionIdentifier, columnIdentifier, Operator.NEQ, "'a..b'").prepare())); + + @Test + public void testRegexpConstraint() throws Throwable + { + run(regexp, "acdb"); + run(regexp, "aaaaaaa", "Value does not match regular expression 'a..b'"); + run(negatedRegexp, "acdb", "Value does match regular expression 'a..b'"); + run(negatedRegexp, "aaaaa"); + } + + @Test + public void testInvalidPattern() + { + ColumnConstraints invalid = new ColumnConstraints(of(new Raw(regexpFunctionIdentifier, columnIdentifier, Operator.EQ, "'*abc'").prepare())); + assertThatThrownBy(() -> invalid.validate(regularStringColumn)) + .hasMessage("String '*abc' is not a valid regular expression") + .isInstanceOf(InvalidConstraintDefinitionException.class); + } + + @Test + public void testInvalidTypes() + { + assertThatThrownBy(() -> regexp.validate(getColumnOfType(IntegerType.instance))) + .hasMessage("Constraint 'REGEXP' can be used only for columns of type " + + "[org.apache.cassandra.db.marshal.UTF8Type, org.apache.cassandra.db.marshal.AsciiType] " + + "but it was class org.apache.cassandra.db.marshal.IntegerType"); + } + + private void run(ColumnConstraints regexp, String input) throws Throwable + { + run(regexp, input, null); + } + + private void run(ColumnConstraints regexp, String input, String exceptionMessage) throws Throwable + { + ThrowableAssert.ThrowingCallable callable = () -> + { + regexp.validate(regularStringColumn); + regexp.evaluate(regularStringColumn.type, regularStringColumn.type.fromString(input)); + + regexp.validate(regularAsciiColumn); + regexp.evaluate(regularAsciiColumn.type, regularAsciiColumn.type.fromString(input)); + }; + + if (exceptionMessage == null) + callable.call(); + else + assertThatThrownBy(callable).hasMessageContaining(exceptionMessage); + } + + private static ColumnMetadata getColumnOfType(AbstractType<?> type) + { + return new ColumnMetadata("a", "b", columnIdentifier, type, -1, REGULAR, null); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org