This is an automated email from the ASF dual-hosted git repository.

smiklosovic pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 85b25f2256 Add regular expression constraint
85b25f2256 is described below

commit 85b25f2256e34da4ae3756621704242895f29f51
Author: Stefan Miklosovic <smikloso...@apache.org>
AuthorDate: Thu Mar 13 16:26:33 2025 +0100

    Add regular expression constraint
    
    patch by Stefan Miklosovic; reviewed by Bernardo Botella for CASSANDRA-20275
---
 CHANGES.txt                                        |   1 +
 .../pages/developing/cql/constraints.adoc          |  31 ++++++
 pylib/cqlshlib/cql3handling.py                     |   1 +
 pylib/cqlshlib/test/test_cqlsh_completion.py       |   2 +-
 .../cql3/constraints/ConstraintFunction.java       |   2 +-
 .../cql3/constraints/FunctionColumnConstraint.java |   5 +-
 .../cql3/constraints/NotNullConstraint.java        |   2 +-
 .../cql3/constraints/RegexpConstraint.java         | 112 +++++++++++++++++++++
 .../constraints/UnaryFunctionColumnConstraint.java |   2 +-
 .../cassandra/cql3/functions/types/ParseUtils.java |   2 +-
 .../cassandra/contraints/RegexpConstraintTest.java | 103 +++++++++++++++++++
 11 files changed, 256 insertions(+), 7 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 7df26992c3..c02e830092 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 5.1
+ * Add regular expression constraint (CASSANDRA-20275)
  * Improve constraints autocompletion (CASSANDRA-20341)
  * Add JVM version and Cassandra build date to nodetool version -v 
(CASSANDRA-19721)
  * Move all disk error logic to DiskErrorsHandler to enable pluggability 
(CASSANDRA-20363)
diff --git a/doc/modules/cassandra/pages/developing/cql/constraints.adoc 
b/doc/modules/cassandra/pages/developing/cql/constraints.adoc
index f768d4fd8f..7e18938729 100644
--- a/doc/modules/cassandra/pages/developing/cql/constraints.adoc
+++ b/doc/modules/cassandra/pages/developing/cql/constraints.adoc
@@ -191,4 +191,35 @@ INSERT INTO ks.tb (id, val) VALUES (1, 'abc');
 ... [Invalid query] message="Value for column 'val' violated JSON
 constraint as it is not a valid JSON."
 
+----
+
+=== REGEXP constraint
+
+Defines a constraint which checks text-like values againt a regular expression.
+
+----
+CREATE TABLE ks.tb (
+    id int primary key,
+    value CHECK REGEXP(value) = 'a.*b'
+)
+----
+
+----
+cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'asdadasdabb');
+cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'aaaaa');
+... [Invalid query] message="Value does not match regular expression 'a.*b'"
+----
+
+Negation can be also used:
+
+----
+ALTER TABLE ks.tb ALTER value CHECK REGEXP(value) != 'a.*b';
+----
+
+which would logically invert the condition:
+
+----
+cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'asdadasdabb');
+... [Invalid query] message="Value does match regular expression 'a.*b'"
+cassandra@cqlsh> INSERT INTO ks.tb (id , value ) VALUES ( 1, 'aaaaa');
 ----
\ No newline at end of file
diff --git a/pylib/cqlshlib/cql3handling.py b/pylib/cqlshlib/cql3handling.py
index cddcdb34d6..f1baf2ad29 100644
--- a/pylib/cqlshlib/cql3handling.py
+++ b/pylib/cqlshlib/cql3handling.py
@@ -334,6 +334,7 @@ JUNK ::= /([ 
\t\r\f\v]+|(--|[/][/])[^\n\r]*([\n\r]|$)|[/][*].*?[*][/])/ ;
 
 <constraintComparableFunction> ::= "LENGTH"
                                  | "OCTET_LENGTH"
+                                 | "REGEXP"
                                  ;
 
 <constraintStandaloneFunction> ::= "JSON"
diff --git a/pylib/cqlshlib/test/test_cqlsh_completion.py 
b/pylib/cqlshlib/test/test_cqlsh_completion.py
index aa38ec45db..bf2385a46f 100644
--- a/pylib/cqlshlib/test/test_cqlsh_completion.py
+++ b/pylib/cqlshlib/test/test_cqlsh_completion.py
@@ -1109,7 +1109,7 @@ class TestCqlshCompletion(CqlshCompletionCase):
                             other_choices_ok=True)
         self.trycompletions('ALTER TABLE new_table ADD col int C', 
immediate='HECK ')
         self.trycompletions('ALTER TABLE new_table ADD col int CHECK ',
-                            choices=['<identifier>', '<quotedName>', 'JSON', 
'LENGTH', 'NOT_NULL', 'OCTET_LENGTH'],
+                            choices=['<identifier>', '<quotedName>', 'JSON', 
'LENGTH', 'NOT_NULL', 'OCTET_LENGTH', 'REGEXP'],
                             other_choices_ok=True)
         self.trycompletions('ALTER TABLE IF EXISTS new_table RENAME ', 
choices=['IF', '<quotedName>', '<identifier>'])
         self.trycompletions('ALTER TABLE new_table RENAME ', choices=['IF', 
'<quotedName>', '<identifier>'])
diff --git 
a/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java 
b/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java
index 1dda89093f..9952ab32d9 100644
--- a/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java
+++ b/src/java/org/apache/cassandra/cql3/constraints/ConstraintFunction.java
@@ -79,7 +79,7 @@ public abstract class ConstraintFunction
      * Method that validates that a condition is valid. This method is called 
when the CQL constraint is created to determine
      * if the CQL statement is valid or needs to be rejected as invalid 
throwing a {@link InvalidConstraintDefinitionException}
      */
-    public void validate(ColumnMetadata columnMetadata) throws 
InvalidConstraintDefinitionException
+    public void validate(ColumnMetadata columnMetadata, String term) throws 
InvalidConstraintDefinitionException
     {
     }
 
diff --git 
a/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java 
b/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java
index dac62ddfc6..a94b4bd0bd 100644
--- 
a/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java
+++ 
b/src/java/org/apache/cassandra/cql3/constraints/FunctionColumnConstraint.java
@@ -78,7 +78,8 @@ public class FunctionColumnConstraint extends 
AbstractFunctionConstraint<Functio
     public enum Functions
     {
         LENGTH(LengthConstraint::new),
-        OCTET_LENGTH(OctetLengthConstraint::new);
+        OCTET_LENGTH(OctetLengthConstraint::new),
+        REGEXP(RegexpConstraint::new);
 
         private final Function<ColumnIdentifier, ConstraintFunction> 
functionCreator;
 
@@ -157,7 +158,7 @@ public class FunctionColumnConstraint extends 
AbstractFunctionConstraint<Functio
     {
         validateArgs(columnMetadata);
         validateTypes(columnMetadata);
-        function.validate(columnMetadata);
+        function.validate(columnMetadata, term);
     }
 
     @Override
diff --git 
a/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java 
b/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java
index 465db80933..fb9f7de95b 100644
--- a/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java
+++ b/src/java/org/apache/cassandra/cql3/constraints/NotNullConstraint.java
@@ -49,7 +49,7 @@ public class NotNullConstraint extends ConstraintFunction
     }
 
     @Override
-    public void validate(ColumnMetadata columnMetadata) throws 
InvalidConstraintDefinitionException
+    public void validate(ColumnMetadata columnMetadata, String term) throws 
InvalidConstraintDefinitionException
     {
         if (columnMetadata.isPrimaryKeyColumn())
             throw new InvalidConstraintDefinitionException(format("%s 
constraint can not be specified on a %s key column '%s'",
diff --git 
a/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java 
b/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java
new file mode 100644
index 0000000000..a2e439585f
--- /dev/null
+++ b/src/java/org/apache/cassandra/cql3/constraints/RegexpConstraint.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.cql3.constraints;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.cassandra.cql3.ColumnIdentifier;
+import org.apache.cassandra.cql3.Operator;
+import org.apache.cassandra.cql3.functions.types.ParseUtils;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.AsciiType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.schema.ColumnMetadata;
+
+import static java.lang.String.format;
+import static org.apache.cassandra.cql3.Operator.EQ;
+import static org.apache.cassandra.cql3.Operator.NEQ;
+
+public class RegexpConstraint extends ConstraintFunction
+{
+    public static final String FUNCTION_NAME = "REGEXP";
+    private static final List<AbstractType<?>> SUPPORTED_TYPES = 
List.of(UTF8Type.instance, AsciiType.instance);
+    private static final List<Operator> ALLOWED_FUNCTION_OPERATORS = 
List.of(EQ, NEQ);
+
+    private Pattern pattern;
+
+    public RegexpConstraint(ColumnIdentifier columnName)
+    {
+        super(columnName, FUNCTION_NAME);
+    }
+
+    @Override
+    protected void internalEvaluate(AbstractType<?> valueType, Operator 
relationType, String regexp, ByteBuffer columnValue)
+    {
+        assert pattern != null;
+        Matcher matcher = pattern.matcher(valueType.getString(columnValue));
+
+        switch (relationType)
+        {
+            case EQ:
+                if (!matcher.matches())
+                    throw new ConstraintViolationException(format("Value does 
not match regular expression %s", regexp));
+                break;
+            case NEQ:
+                if (matcher.matches())
+                    throw new ConstraintViolationException(format("Value does 
match regular expression %s", regexp));
+                break;
+            default:
+                throw new IllegalStateException("Unsupported operator: " + 
relationType);
+        }
+    }
+
+    @Override
+    public List<AbstractType<?>> getSupportedTypes()
+    {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public List<Operator> getSupportedOperators()
+    {
+        return ALLOWED_FUNCTION_OPERATORS;
+    }
+
+    @Override
+    public void validate(ColumnMetadata columnMetadata, String regexp) throws 
InvalidConstraintDefinitionException
+    {
+        try
+        {
+            // compilation of a regexp every single time upon evaluation is 
not performance friendly
+            // so we "cache" the compiled regexp for further reuse upon actual 
validation
+            pattern = Pattern.compile(ParseUtils.unquote(regexp));
+        }
+        catch (Exception e)
+        {
+            throw new InvalidConstraintDefinitionException(format("String '%s' 
is not a valid regular expression", ParseUtils.unquote(regexp)));
+        }
+    }
+
+    @Override
+    public boolean equals(Object o)
+    {
+        if (this == o)
+            return true;
+
+        if (!(o instanceof RegexpConstraint))
+            return false;
+
+        RegexpConstraint other = (RegexpConstraint) o;
+
+        return columnName.equals(other.columnName);
+    }
+}
diff --git 
a/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java
 
b/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java
index d39f7f865d..80fd443e0e 100644
--- 
a/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java
+++ 
b/src/java/org/apache/cassandra/cql3/constraints/UnaryFunctionColumnConstraint.java
@@ -124,7 +124,7 @@ public class UnaryFunctionColumnConstraint extends 
AbstractFunctionConstraint<Un
     {
         validateArgs(columnMetadata);
         validateTypes(columnMetadata);
-        function.validate(columnMetadata);
+        function.validate(columnMetadata, term);
     }
 
     @Override
diff --git a/src/java/org/apache/cassandra/cql3/functions/types/ParseUtils.java 
b/src/java/org/apache/cassandra/cql3/functions/types/ParseUtils.java
index 8d0f29b45b..45bf720c0c 100644
--- a/src/java/org/apache/cassandra/cql3/functions/types/ParseUtils.java
+++ b/src/java/org/apache/cassandra/cql3/functions/types/ParseUtils.java
@@ -268,7 +268,7 @@ public abstract class ParseUtils
      * @param value The string to unquote.
      * @return The unquoted string.
      */
-    static String unquote(String value)
+    public static String unquote(String value)
     {
         return unquote(value, '\'');
     }
diff --git 
a/test/unit/org/apache/cassandra/contraints/RegexpConstraintTest.java 
b/test/unit/org/apache/cassandra/contraints/RegexpConstraintTest.java
new file mode 100644
index 0000000000..fd4800e247
--- /dev/null
+++ b/test/unit/org/apache/cassandra/contraints/RegexpConstraintTest.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.contraints;
+
+import org.junit.Test;
+
+import org.apache.cassandra.cql3.ColumnIdentifier;
+import org.apache.cassandra.cql3.Operator;
+import org.apache.cassandra.cql3.constraints.ColumnConstraints;
+import org.apache.cassandra.cql3.constraints.FunctionColumnConstraint.Raw;
+import 
org.apache.cassandra.cql3.constraints.InvalidConstraintDefinitionException;
+import org.apache.cassandra.cql3.constraints.RegexpConstraint;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.AsciiType;
+import org.apache.cassandra.db.marshal.IntegerType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.schema.ColumnMetadata;
+import org.assertj.core.api.ThrowableAssert;
+
+import static java.util.List.of;
+import static org.apache.cassandra.schema.ColumnMetadata.Kind.REGULAR;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+public class RegexpConstraintTest
+{
+    private static final ColumnIdentifier columnIdentifier = new 
ColumnIdentifier("a_column", false);
+    private static final ColumnIdentifier regexpFunctionIdentifier = new 
ColumnIdentifier(RegexpConstraint.FUNCTION_NAME, false);
+    private static final ColumnMetadata regularStringColumn = 
getColumnOfType(UTF8Type.instance);
+    private static final ColumnMetadata regularAsciiColumn = 
getColumnOfType(AsciiType.instance);
+
+    private static final ColumnConstraints regexp = new 
ColumnConstraints(of(new Raw(regexpFunctionIdentifier, columnIdentifier, 
Operator.EQ, "'a..b'").prepare()));
+    private static final ColumnConstraints negatedRegexp = new 
ColumnConstraints(of(new Raw(regexpFunctionIdentifier, columnIdentifier, 
Operator.NEQ, "'a..b'").prepare()));
+
+    @Test
+    public void testRegexpConstraint() throws Throwable
+    {
+        run(regexp, "acdb");
+        run(regexp, "aaaaaaa", "Value does not match regular expression 
'a..b'");
+        run(negatedRegexp, "acdb", "Value does match regular expression 
'a..b'");
+        run(negatedRegexp, "aaaaa");
+    }
+
+    @Test
+    public void testInvalidPattern()
+    {
+        ColumnConstraints invalid = new ColumnConstraints(of(new 
Raw(regexpFunctionIdentifier, columnIdentifier, Operator.EQ, 
"'*abc'").prepare()));
+        assertThatThrownBy(() -> invalid.validate(regularStringColumn))
+        .hasMessage("String '*abc' is not a valid regular expression")
+        .isInstanceOf(InvalidConstraintDefinitionException.class);
+    }
+
+    @Test
+    public void testInvalidTypes()
+    {
+        assertThatThrownBy(() -> 
regexp.validate(getColumnOfType(IntegerType.instance)))
+        .hasMessage("Constraint 'REGEXP' can be used only for columns of type 
" +
+                    "[org.apache.cassandra.db.marshal.UTF8Type, 
org.apache.cassandra.db.marshal.AsciiType] " +
+                    "but it was class 
org.apache.cassandra.db.marshal.IntegerType");
+    }
+
+    private void run(ColumnConstraints regexp, String input) throws Throwable
+    {
+        run(regexp, input, null);
+    }
+
+    private void run(ColumnConstraints regexp, String input, String 
exceptionMessage) throws Throwable
+    {
+        ThrowableAssert.ThrowingCallable callable = () ->
+        {
+            regexp.validate(regularStringColumn);
+            regexp.evaluate(regularStringColumn.type, 
regularStringColumn.type.fromString(input));
+
+            regexp.validate(regularAsciiColumn);
+            regexp.evaluate(regularAsciiColumn.type, 
regularAsciiColumn.type.fromString(input));
+        };
+
+        if (exceptionMessage == null)
+            callable.call();
+        else
+            
assertThatThrownBy(callable).hasMessageContaining(exceptionMessage);
+    }
+
+    private static ColumnMetadata getColumnOfType(AbstractType<?> type)
+    {
+        return new ColumnMetadata("a", "b", columnIdentifier, type, -1, 
REGULAR, null);
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to