This is an automated email from the ASF dual-hosted git repository.

maedhroz pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 85048b2019 Add post-filtering support for the IN operator in SAI 
queries
85048b2019 is described below

commit 85048b20196e2c7fddc142de9095ef6e9f82e837
Author: Sunil Ramchandra Pawar <[email protected]>
AuthorDate: Tue Oct 29 08:33:59 2024 +0530

    Add post-filtering support for the IN operator in SAI queries
    
    patch by Sunil Ramchandra Pawar; reviewed by Caleb Rackliffe and Ekaterina 
Dimitrova
---
 CHANGES.txt                                        |  1 +
 .../sai/supported-query-operators-list.adoc        |  4 +-
 .../cassandra/index/sai/plan/Expression.java       | 27 ++++++-
 .../cassandra/index/sai/utils/IndexTermType.java   |  3 +-
 .../distributed/test/sai/StrictFilteringTest.java  | 10 +--
 .../index/sai/cql/AllowFilteringTest.java          | 86 ++++++++++++++++++++++
 6 files changed, 116 insertions(+), 15 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 360c17b6b6..c5d63acb83 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 5.1
+ * Add post-filtering support for the IN operator in SAI queries 
(CASSANDRA-20025)
  * Don’t finish ongoing decommission and move operations during startup 
(CASSANDRA-20040)
  * Nodetool reconfigure cms has correct return code when streaming fails 
(CASSANDRA-19972)
  * Reintroduce RestrictionSet#iterator() optimization around multi-column 
restrictions (CASSANDRA-20034)
diff --git 
a/doc/modules/cassandra/partials/sai/supported-query-operators-list.adoc 
b/doc/modules/cassandra/partials/sai/supported-query-operators-list.adoc
index 3c90a41703..a5ae372f61 100644
--- a/doc/modules/cassandra/partials/sai/supported-query-operators-list.adoc
+++ b/doc/modules/cassandra/partials/sai/supported-query-operators-list.adoc
@@ -4,6 +4,6 @@ ifeval::["{evalproduct}" == "dse"]
 * Strings: `=`, `CONTAINS`, `CONTAINS KEY`, `AND`
 endif::[]
 ifeval::["{evalproduct}" != "dse"]
-* Numerics: `=`, `<`, `>`, `<=`, `>=`, `AND`, `OR`, `IN`
-* Strings: `=`, `CONTAINS`, `CONTAINS KEY`, `AND`, `OR`, `IN`
+* Numerics: `=`, `<`, `>`, `<=`, `>=`, `AND`
+* Strings: `=`, `CONTAINS`, `CONTAINS KEY`, `AND`
 endif::[]
diff --git a/src/java/org/apache/cassandra/index/sai/plan/Expression.java 
b/src/java/org/apache/cassandra/index/sai/plan/Expression.java
index 52a77b42d5..9823c3c072 100644
--- a/src/java/org/apache/cassandra/index/sai/plan/Expression.java
+++ b/src/java/org/apache/cassandra/index/sai/plan/Expression.java
@@ -79,7 +79,7 @@ public abstract class Expression
 
     public enum IndexOperator
     {
-        EQ, RANGE, CONTAINS_KEY, CONTAINS_VALUE, ANN;
+        EQ, RANGE, CONTAINS_KEY, CONTAINS_VALUE, ANN, IN;
 
         public static IndexOperator valueOf(Operator operator)
         {
@@ -104,6 +104,9 @@ public abstract class Expression
                 case ANN:
                     return ANN;
 
+                case IN:
+                    return IN;
+
                 default:
                     return null;
             }
@@ -111,7 +114,7 @@ public abstract class Expression
 
         public boolean isEquality()
         {
-            return this == EQ || this == CONTAINS_KEY || this == 
CONTAINS_VALUE;
+            return this == EQ || this == CONTAINS_KEY || this == 
CONTAINS_VALUE || this == IN;
         }
 
         public boolean isEqualityOrRange()
@@ -169,6 +172,7 @@ public abstract class Expression
             case EQ:
             case CONTAINS:
             case CONTAINS_KEY:
+            case IN:
                 lower = new Bound(value, indexTermType, true);
                 upper = lower;
                 operator = IndexOperator.valueOf(op);
@@ -223,8 +227,8 @@ public abstract class Expression
                 Value first = new Value(buffers.get(0), indexTermType);
                 Value second = new Value(buffers.get(1), indexTermType);
 
-                // SimpleRestriction#addToRowFilter() ensures correct bounds 
ordering, but SAI enforces a non-arbitrary 
-                // ordering between IPv4 and IPv6 addresses, so correction may 
still be necessary. 
+                // SimpleRestriction#addToRowFilter() ensures correct bounds 
ordering, but SAI enforces a non-arbitrary
+                // ordering between IPv4 and IPv6 addresses, so correction may 
still be necessary.
                 boolean outOfOrder = indexTermType.compare(first.encoded, 
second.encoded) > 0;
                 lower = new Bound(outOfOrder ? second : first, true);
                 upper = new Bound(outOfOrder ? first : second, true);
@@ -236,6 +240,7 @@ public abstract class Expression
                 lower = new Bound(value, indexTermType, true);
                 upper = lower;
                 break;
+
             default:
                 throw new IllegalArgumentException("Index does not support the 
" + op + " operator");
         }
@@ -275,6 +280,9 @@ public abstract class Expression
                 if (operator == IndexOperator.EQ || operator == 
IndexOperator.CONTAINS_KEY || operator == IndexOperator.CONTAINS_VALUE)
                     return cmp == 0;
 
+                if (operator == IndexOperator.IN)
+                    return termMatches(value.raw, lower.value.raw);
+
                 if (cmp > 0 || (cmp == 0 && !lowerInclusive))
                     return false;
             }
@@ -335,6 +343,17 @@ public abstract class Expression
             case RANGE:
                 isMatch = isLowerSatisfiedBy(term) && isUpperSatisfiedBy(term);
                 break;
+            case IN:
+                ListType<?> type = 
ListType.getInstance(indexTermType.columnMetadata().type, true);
+                List<? extends ByteBuffer> buffers = 
type.unpack(requestedValue);
+                for (ByteBuffer value : buffers)
+                {
+                    if (indexTermType.compare(term, value) == 0)
+                    {
+                        return true;
+                    }
+                }
+                break;
         }
         return isMatch;
     }
diff --git a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java 
b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
index a558d5ee82..c1dfad04a5 100644
--- a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
+++ b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
@@ -566,7 +566,8 @@ public class IndexTermType
             operator == Operator.LIKE_CONTAINS ||
             operator == Operator.LIKE_PREFIX ||
             operator == Operator.LIKE_MATCHES ||
-            operator == Operator.LIKE_SUFFIX) return false;
+            operator == Operator.LIKE_SUFFIX ||
+            operator == Operator.IN) return false;
 
         // ANN is only supported against vectors, and vector indexes only 
support ANN
         if (operator == Operator.ANN)
diff --git 
a/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java
 
b/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java
index 301336f862..83bfdcf948 100644
--- 
a/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java
+++ 
b/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java
@@ -21,17 +21,14 @@ package org.apache.cassandra.distributed.test.sai;
 import java.io.IOException;
 import java.util.Iterator;
 
-import org.assertj.core.api.Assertions;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
-import org.apache.cassandra.cql3.Operator;
 import org.apache.cassandra.distributed.Cluster;
 import org.apache.cassandra.distributed.api.ConsistencyLevel;
 import org.apache.cassandra.distributed.shared.AssertUtils;
 import org.apache.cassandra.distributed.test.TestBaseImpl;
-import org.apache.cassandra.index.sai.plan.StorageAttachedIndexQueryPlan;
 
 import static org.apache.cassandra.distributed.api.Feature.GOSSIP;
 import static org.apache.cassandra.distributed.api.Feature.NETWORK;
@@ -55,11 +52,10 @@ public class StrictFilteringTest extends TestBaseImpl
     }
 
     @Test
-    public void shouldRejectNonStrictIN()
+    public void shouldPostFilterNonStrictIN()
     {
         CLUSTER.schemaChange(withKeyspace("CREATE TABLE %s.reject_in (k int 
PRIMARY KEY, a int, b int) WITH read_repair = 'NONE'"));
         CLUSTER.schemaChange(withKeyspace("CREATE INDEX ON %s.reject_in(a) 
USING 'sai'"));
-        CLUSTER.schemaChange(withKeyspace("CREATE INDEX ON %s.reject_in(b) 
USING 'sai'"));
         SAIUtil.waitForIndexQueryable(CLUSTER, KEYSPACE);
 
         // insert an unrepaired row
@@ -68,9 +64,7 @@ public class StrictFilteringTest extends TestBaseImpl
 
         String select = withKeyspace("SELECT * FROM %s.reject_in WHERE a = 1 
AND b IN (2, 3) ALLOW FILTERING");
 
-        // This should fail, as strict filtering is not allowed:
-        Assertions.assertThatThrownBy(() -> 
CLUSTER.coordinator(1).execute(select, ConsistencyLevel.ALL))
-                  
.hasMessageContaining(String.format(StorageAttachedIndexQueryPlan.UNSUPPORTED_NON_STRICT_OPERATOR,
 Operator.IN));
+        assertRows(CLUSTER.coordinator(1).execute(select, 
ConsistencyLevel.ALL), row(0,1,2));
 
         // Repair fixes the split row, although we still only allow the query 
when reconciliation is not required:
         CLUSTER.get(1).nodetoolResult("repair", KEYSPACE).asserts().success();
diff --git 
a/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java 
b/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
index f80d07eeb7..7a9198a700 100644
--- a/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
+++ b/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
@@ -296,6 +296,91 @@ public class AllowFilteringTest extends SAITester
         test("SELECT * FROM %s WHERE v1=0 AND v2=0 AND k1=0 AND k2=0 AND (c1, 
c2, c3, c4) = (0, 0, 0, 0) AND v3=0", true);
     }
 
+    @Test
+    public void testAllowFilteringTextWithINClause ()
+    {
+        createTable("CREATE TABLE %S (k1 TEXT, k2 TEXT, k3 TEXT, PRIMARY 
KEY(k1))");
+        createIndex("CREATE INDEX ON %s(K2) USING 'sai'");
+
+        execute("INSERT INTO %s (k1,k2,k3) VALUES ('s1','s11','s111')");
+        execute("INSERT INTO %s (k1,k2,k3) VALUES ('s2','s11','s11')");
+        execute("INSERT INTO %s (k1,k2,k3) VALUES ('s3','s22','s111')");
+        execute("INSERT INTO %s (k1,k2,k3) VALUES ('s4','s22','s111')");
+        execute("INSERT INTO %s (k1,k2,k3) VALUES ('s5','s31','s111')");
+
+        assertRowCount(execute("SELECT * FROM %s WHERE k2='s11' AND k3 IN 
('s11','s111') ALLOW FILTERING"),2);
+        assertRowCount(execute("SELECT * FROM %s WHERE k2='s22' AND k3 IN 
('s111','s111') ALLOW FILTERING"), 2);
+        assertRowCount(execute("SELECT * FROM %s WHERE k2='s22' AND k3 IN 
('s','s1') ALLOW FILTERING"), 0);
+        // To test if an IN clause without an AND condition does not create a 
query plan and works as expected.
+        assertRowCount(execute("SELECT * FROM %s WHERE k2 IN ('s11','s22') 
ALLOW FILTERING"), 4);
+
+    }
+
+    @Test
+    public void testAllowFilteringIntWithINClause ()
+    {
+        createTable("CREATE TABLE %S (k1 text, k2 int, k3 int, PRIMARY 
KEY(k1))");
+        createIndex("CREATE INDEX ON %s(K2) USING 'sai'");
+
+        execute("insert into %s (k1,k2,k3) values ('s1',11,1)");
+        execute("insert into %s (k1,k2,k3) values ('s2',11,11)");
+        execute("insert into %s (k1,k2,k3) values ('s3',11,111)");
+        execute("insert into %s (k1,k2,k3) values ('s4',22,1)");
+        execute("insert into %s (k1,k2,k3) values ('s5',22,11)");
+        execute("insert into %s (k1,k2,k3) values ('s6',22,111)");
+
+        assertRowCount(execute("SELECT * FROM %s WHERE k2=11 AND k3 IN 
(1,11,111) ALLOW FILTERING"),3);
+        assertRowCount(execute("SELECT * FROM %s WHERE k2=22 AND k3 IN 
(1,11,111) ALLOW FILTERING"),3);
+        assertRowCount(execute("SELECT * FROM %s WHERE k2=22 AND k3 IN 
(101,102) ALLOW FILTERING"),0);
+        // To test if an IN clause without an AND condition does not create a 
query plan and works as expected.
+        assertRowCount(execute("SELECT * FROM %s WHERE k2 IN (11,22) ALLOW 
FILTERING"), 6);
+
+    }
+
+    @Test
+    public void testAllowFilteringBigIntWithINClause ()
+    {
+            createTable("CREATE TABLE %S (k1 text, k2 bigint, k3 bigint, 
PRIMARY KEY(k1))");
+            createIndex("CREATE INDEX ON %s(K2) USING 'sai'");
+
+            execute("insert into %s (k1, k2, k3) values ('s1', 1001, 100)");
+            execute("insert into %s (k1, k2, k3) values ('s2', 1001, 200)");
+            execute("insert into %s (k1, k2, k3) values ('s3', 1001, 300)");
+            execute("insert into %s (k1, k2, k3) values ('s4', 2002, 100)");
+            execute("insert into %s (k1, k2, k3) values ('s5', 2002, 200)");
+            execute("insert into %s (k1, k2, k3) values ('s6', 2002, 300)");
+
+            assertRowCount(execute("SELECT * FROM %s WHERE k2=1001 AND k3 IN 
(100, 200, 300) ALLOW FILTERING"), 3);
+            assertRowCount(execute("SELECT * FROM %s WHERE k2=2002 AND k3 IN 
(100, 200, 300) ALLOW FILTERING"), 3);
+            assertRowCount(execute("SELECT * FROM %s WHERE k2=2002 AND k3 IN 
(101, 201, 301) ALLOW FILTERING"), 0);
+        // To test if an IN clause without an AND condition does not create a 
query plan and works as expected.
+            assertRowCount(execute("SELECT * FROM %s WHERE k2 IN (1001,2002) 
ALLOW FILTERING"), 6);
+
+    }
+
+    @Test
+    public void testAllowFilteringBigDecimalWithINClause()
+    {
+        createTable("CREATE TABLE %S (k1 text, k2 decimal, k3 decimal, PRIMARY 
KEY(k1))");
+        createIndex("CREATE INDEX ON %s(K2) USING 'sai'");
+
+        execute("insert into %s (k1, k2, k3) values ('s1', 1.1, 1.11)");
+        execute("insert into %s (k1, k2, k3) values ('s2', 1.1, 1.12)");
+        execute("insert into %s (k1, k2, k3) values ('s3', 1.1, 1.13)");
+        execute("insert into %s (k1, k2, k3) values ('s4', 2.2, 1.11)");
+        execute("insert into %s (k1, k2, k3) values ('s5', 2.2, 1.12)");
+        execute("insert into %s (k1, k2, k3) values ('s6', 2.2, 1.13)");
+
+        assertRowCount(execute("SELECT * FROM %s WHERE k2=1.1 AND k3 IN (1.11, 
1.12, 1.13) ALLOW FILTERING"), 3);
+        assertRowCount(execute("SELECT * FROM %s WHERE k2=2.2 AND k3 IN (1.11, 
1.12, 1.13) ALLOW FILTERING"), 3);
+        assertRowCount(execute("SELECT * FROM %s WHERE k2=2.2 AND k3 IN (1.21, 
1.22, 1.13) ALLOW FILTERING"), 1);
+        assertRowCount(execute("SELECT * FROM %s WHERE k2=2.2 AND k3 IN (1.21, 
1.22, 1.23) ALLOW FILTERING"), 0);
+        // To test if an IN clause without an AND condition does not create a 
query plan and works as expected.
+        assertRowCount(execute("SELECT * FROM %s WHERE k2 IN (1.1,2.2) ALLOW 
FILTERING"), 6);
+    }
+
+
+
     private void test(String query, boolean requiresAllowFiltering) throws 
Throwable
     {
         if (requiresAllowFiltering)
@@ -305,4 +390,5 @@ public class AllowFilteringTest extends SAITester
 
         assertNotNull(execute(query + " ALLOW FILTERING"));
     }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to