This is an automated email from the ASF dual-hosted git repository.
maedhroz pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push:
new 85048b2019 Add post-filtering support for the IN operator in SAI
queries
85048b2019 is described below
commit 85048b20196e2c7fddc142de9095ef6e9f82e837
Author: Sunil Ramchandra Pawar <[email protected]>
AuthorDate: Tue Oct 29 08:33:59 2024 +0530
Add post-filtering support for the IN operator in SAI queries
patch by Sunil Ramchandra Pawar; reviewed by Caleb Rackliffe and Ekaterina
Dimitrova
---
CHANGES.txt | 1 +
.../sai/supported-query-operators-list.adoc | 4 +-
.../cassandra/index/sai/plan/Expression.java | 27 ++++++-
.../cassandra/index/sai/utils/IndexTermType.java | 3 +-
.../distributed/test/sai/StrictFilteringTest.java | 10 +--
.../index/sai/cql/AllowFilteringTest.java | 86 ++++++++++++++++++++++
6 files changed, 116 insertions(+), 15 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 360c17b6b6..c5d63acb83 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
5.1
+ * Add post-filtering support for the IN operator in SAI queries
(CASSANDRA-20025)
* Don’t finish ongoing decommission and move operations during startup
(CASSANDRA-20040)
* Nodetool reconfigure cms has correct return code when streaming fails
(CASSANDRA-19972)
* Reintroduce RestrictionSet#iterator() optimization around multi-column
restrictions (CASSANDRA-20034)
diff --git
a/doc/modules/cassandra/partials/sai/supported-query-operators-list.adoc
b/doc/modules/cassandra/partials/sai/supported-query-operators-list.adoc
index 3c90a41703..a5ae372f61 100644
--- a/doc/modules/cassandra/partials/sai/supported-query-operators-list.adoc
+++ b/doc/modules/cassandra/partials/sai/supported-query-operators-list.adoc
@@ -4,6 +4,6 @@ ifeval::["{evalproduct}" == "dse"]
* Strings: `=`, `CONTAINS`, `CONTAINS KEY`, `AND`
endif::[]
ifeval::["{evalproduct}" != "dse"]
-* Numerics: `=`, `<`, `>`, `<=`, `>=`, `AND`, `OR`, `IN`
-* Strings: `=`, `CONTAINS`, `CONTAINS KEY`, `AND`, `OR`, `IN`
+* Numerics: `=`, `<`, `>`, `<=`, `>=`, `AND`
+* Strings: `=`, `CONTAINS`, `CONTAINS KEY`, `AND`
endif::[]
diff --git a/src/java/org/apache/cassandra/index/sai/plan/Expression.java
b/src/java/org/apache/cassandra/index/sai/plan/Expression.java
index 52a77b42d5..9823c3c072 100644
--- a/src/java/org/apache/cassandra/index/sai/plan/Expression.java
+++ b/src/java/org/apache/cassandra/index/sai/plan/Expression.java
@@ -79,7 +79,7 @@ public abstract class Expression
public enum IndexOperator
{
- EQ, RANGE, CONTAINS_KEY, CONTAINS_VALUE, ANN;
+ EQ, RANGE, CONTAINS_KEY, CONTAINS_VALUE, ANN, IN;
public static IndexOperator valueOf(Operator operator)
{
@@ -104,6 +104,9 @@ public abstract class Expression
case ANN:
return ANN;
+ case IN:
+ return IN;
+
default:
return null;
}
@@ -111,7 +114,7 @@ public abstract class Expression
public boolean isEquality()
{
- return this == EQ || this == CONTAINS_KEY || this ==
CONTAINS_VALUE;
+ return this == EQ || this == CONTAINS_KEY || this ==
CONTAINS_VALUE || this == IN;
}
public boolean isEqualityOrRange()
@@ -169,6 +172,7 @@ public abstract class Expression
case EQ:
case CONTAINS:
case CONTAINS_KEY:
+ case IN:
lower = new Bound(value, indexTermType, true);
upper = lower;
operator = IndexOperator.valueOf(op);
@@ -223,8 +227,8 @@ public abstract class Expression
Value first = new Value(buffers.get(0), indexTermType);
Value second = new Value(buffers.get(1), indexTermType);
- // SimpleRestriction#addToRowFilter() ensures correct bounds
ordering, but SAI enforces a non-arbitrary
- // ordering between IPv4 and IPv6 addresses, so correction may
still be necessary.
+ // SimpleRestriction#addToRowFilter() ensures correct bounds
ordering, but SAI enforces a non-arbitrary
+ // ordering between IPv4 and IPv6 addresses, so correction may
still be necessary.
boolean outOfOrder = indexTermType.compare(first.encoded,
second.encoded) > 0;
lower = new Bound(outOfOrder ? second : first, true);
upper = new Bound(outOfOrder ? first : second, true);
@@ -236,6 +240,7 @@ public abstract class Expression
lower = new Bound(value, indexTermType, true);
upper = lower;
break;
+
default:
throw new IllegalArgumentException("Index does not support the
" + op + " operator");
}
@@ -275,6 +280,9 @@ public abstract class Expression
if (operator == IndexOperator.EQ || operator ==
IndexOperator.CONTAINS_KEY || operator == IndexOperator.CONTAINS_VALUE)
return cmp == 0;
+ if (operator == IndexOperator.IN)
+ return termMatches(value.raw, lower.value.raw);
+
if (cmp > 0 || (cmp == 0 && !lowerInclusive))
return false;
}
@@ -335,6 +343,17 @@ public abstract class Expression
case RANGE:
isMatch = isLowerSatisfiedBy(term) && isUpperSatisfiedBy(term);
break;
+ case IN:
+ ListType<?> type =
ListType.getInstance(indexTermType.columnMetadata().type, true);
+ List<? extends ByteBuffer> buffers =
type.unpack(requestedValue);
+ for (ByteBuffer value : buffers)
+ {
+ if (indexTermType.compare(term, value) == 0)
+ {
+ return true;
+ }
+ }
+ break;
}
return isMatch;
}
diff --git a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
index a558d5ee82..c1dfad04a5 100644
--- a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
+++ b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
@@ -566,7 +566,8 @@ public class IndexTermType
operator == Operator.LIKE_CONTAINS ||
operator == Operator.LIKE_PREFIX ||
operator == Operator.LIKE_MATCHES ||
- operator == Operator.LIKE_SUFFIX) return false;
+ operator == Operator.LIKE_SUFFIX ||
+ operator == Operator.IN) return false;
// ANN is only supported against vectors, and vector indexes only
support ANN
if (operator == Operator.ANN)
diff --git
a/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java
b/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java
index 301336f862..83bfdcf948 100644
---
a/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java
+++
b/test/distributed/org/apache/cassandra/distributed/test/sai/StrictFilteringTest.java
@@ -21,17 +21,14 @@ package org.apache.cassandra.distributed.test.sai;
import java.io.IOException;
import java.util.Iterator;
-import org.assertj.core.api.Assertions;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
-import org.apache.cassandra.cql3.Operator;
import org.apache.cassandra.distributed.Cluster;
import org.apache.cassandra.distributed.api.ConsistencyLevel;
import org.apache.cassandra.distributed.shared.AssertUtils;
import org.apache.cassandra.distributed.test.TestBaseImpl;
-import org.apache.cassandra.index.sai.plan.StorageAttachedIndexQueryPlan;
import static org.apache.cassandra.distributed.api.Feature.GOSSIP;
import static org.apache.cassandra.distributed.api.Feature.NETWORK;
@@ -55,11 +52,10 @@ public class StrictFilteringTest extends TestBaseImpl
}
@Test
- public void shouldRejectNonStrictIN()
+ public void shouldPostFilterNonStrictIN()
{
CLUSTER.schemaChange(withKeyspace("CREATE TABLE %s.reject_in (k int
PRIMARY KEY, a int, b int) WITH read_repair = 'NONE'"));
CLUSTER.schemaChange(withKeyspace("CREATE INDEX ON %s.reject_in(a)
USING 'sai'"));
- CLUSTER.schemaChange(withKeyspace("CREATE INDEX ON %s.reject_in(b)
USING 'sai'"));
SAIUtil.waitForIndexQueryable(CLUSTER, KEYSPACE);
// insert an unrepaired row
@@ -68,9 +64,7 @@ public class StrictFilteringTest extends TestBaseImpl
String select = withKeyspace("SELECT * FROM %s.reject_in WHERE a = 1
AND b IN (2, 3) ALLOW FILTERING");
- // This should fail, as strict filtering is not allowed:
- Assertions.assertThatThrownBy(() ->
CLUSTER.coordinator(1).execute(select, ConsistencyLevel.ALL))
-
.hasMessageContaining(String.format(StorageAttachedIndexQueryPlan.UNSUPPORTED_NON_STRICT_OPERATOR,
Operator.IN));
+ assertRows(CLUSTER.coordinator(1).execute(select,
ConsistencyLevel.ALL), row(0,1,2));
// Repair fixes the split row, although we still only allow the query
when reconciliation is not required:
CLUSTER.get(1).nodetoolResult("repair", KEYSPACE).asserts().success();
diff --git
a/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
b/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
index f80d07eeb7..7a9198a700 100644
--- a/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
+++ b/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
@@ -296,6 +296,91 @@ public class AllowFilteringTest extends SAITester
test("SELECT * FROM %s WHERE v1=0 AND v2=0 AND k1=0 AND k2=0 AND (c1,
c2, c3, c4) = (0, 0, 0, 0) AND v3=0", true);
}
+ @Test
+ public void testAllowFilteringTextWithINClause ()
+ {
+ createTable("CREATE TABLE %S (k1 TEXT, k2 TEXT, k3 TEXT, PRIMARY
KEY(k1))");
+ createIndex("CREATE INDEX ON %s(K2) USING 'sai'");
+
+ execute("INSERT INTO %s (k1,k2,k3) VALUES ('s1','s11','s111')");
+ execute("INSERT INTO %s (k1,k2,k3) VALUES ('s2','s11','s11')");
+ execute("INSERT INTO %s (k1,k2,k3) VALUES ('s3','s22','s111')");
+ execute("INSERT INTO %s (k1,k2,k3) VALUES ('s4','s22','s111')");
+ execute("INSERT INTO %s (k1,k2,k3) VALUES ('s5','s31','s111')");
+
+ assertRowCount(execute("SELECT * FROM %s WHERE k2='s11' AND k3 IN
('s11','s111') ALLOW FILTERING"),2);
+ assertRowCount(execute("SELECT * FROM %s WHERE k2='s22' AND k3 IN
('s111','s111') ALLOW FILTERING"), 2);
+ assertRowCount(execute("SELECT * FROM %s WHERE k2='s22' AND k3 IN
('s','s1') ALLOW FILTERING"), 0);
+ // To test if an IN clause without an AND condition does not create a
query plan and works as expected.
+ assertRowCount(execute("SELECT * FROM %s WHERE k2 IN ('s11','s22')
ALLOW FILTERING"), 4);
+
+ }
+
+ @Test
+ public void testAllowFilteringIntWithINClause ()
+ {
+ createTable("CREATE TABLE %S (k1 text, k2 int, k3 int, PRIMARY
KEY(k1))");
+ createIndex("CREATE INDEX ON %s(K2) USING 'sai'");
+
+ execute("insert into %s (k1,k2,k3) values ('s1',11,1)");
+ execute("insert into %s (k1,k2,k3) values ('s2',11,11)");
+ execute("insert into %s (k1,k2,k3) values ('s3',11,111)");
+ execute("insert into %s (k1,k2,k3) values ('s4',22,1)");
+ execute("insert into %s (k1,k2,k3) values ('s5',22,11)");
+ execute("insert into %s (k1,k2,k3) values ('s6',22,111)");
+
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=11 AND k3 IN
(1,11,111) ALLOW FILTERING"),3);
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=22 AND k3 IN
(1,11,111) ALLOW FILTERING"),3);
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=22 AND k3 IN
(101,102) ALLOW FILTERING"),0);
+ // To test if an IN clause without an AND condition does not create a
query plan and works as expected.
+ assertRowCount(execute("SELECT * FROM %s WHERE k2 IN (11,22) ALLOW
FILTERING"), 6);
+
+ }
+
+ @Test
+ public void testAllowFilteringBigIntWithINClause ()
+ {
+ createTable("CREATE TABLE %S (k1 text, k2 bigint, k3 bigint,
PRIMARY KEY(k1))");
+ createIndex("CREATE INDEX ON %s(K2) USING 'sai'");
+
+ execute("insert into %s (k1, k2, k3) values ('s1', 1001, 100)");
+ execute("insert into %s (k1, k2, k3) values ('s2', 1001, 200)");
+ execute("insert into %s (k1, k2, k3) values ('s3', 1001, 300)");
+ execute("insert into %s (k1, k2, k3) values ('s4', 2002, 100)");
+ execute("insert into %s (k1, k2, k3) values ('s5', 2002, 200)");
+ execute("insert into %s (k1, k2, k3) values ('s6', 2002, 300)");
+
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=1001 AND k3 IN
(100, 200, 300) ALLOW FILTERING"), 3);
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=2002 AND k3 IN
(100, 200, 300) ALLOW FILTERING"), 3);
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=2002 AND k3 IN
(101, 201, 301) ALLOW FILTERING"), 0);
+ // To test if an IN clause without an AND condition does not create a
query plan and works as expected.
+ assertRowCount(execute("SELECT * FROM %s WHERE k2 IN (1001,2002)
ALLOW FILTERING"), 6);
+
+ }
+
+ @Test
+ public void testAllowFilteringBigDecimalWithINClause()
+ {
+ createTable("CREATE TABLE %S (k1 text, k2 decimal, k3 decimal, PRIMARY
KEY(k1))");
+ createIndex("CREATE INDEX ON %s(K2) USING 'sai'");
+
+ execute("insert into %s (k1, k2, k3) values ('s1', 1.1, 1.11)");
+ execute("insert into %s (k1, k2, k3) values ('s2', 1.1, 1.12)");
+ execute("insert into %s (k1, k2, k3) values ('s3', 1.1, 1.13)");
+ execute("insert into %s (k1, k2, k3) values ('s4', 2.2, 1.11)");
+ execute("insert into %s (k1, k2, k3) values ('s5', 2.2, 1.12)");
+ execute("insert into %s (k1, k2, k3) values ('s6', 2.2, 1.13)");
+
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=1.1 AND k3 IN (1.11,
1.12, 1.13) ALLOW FILTERING"), 3);
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=2.2 AND k3 IN (1.11,
1.12, 1.13) ALLOW FILTERING"), 3);
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=2.2 AND k3 IN (1.21,
1.22, 1.13) ALLOW FILTERING"), 1);
+ assertRowCount(execute("SELECT * FROM %s WHERE k2=2.2 AND k3 IN (1.21,
1.22, 1.23) ALLOW FILTERING"), 0);
+ // To test if an IN clause without an AND condition does not create a
query plan and works as expected.
+ assertRowCount(execute("SELECT * FROM %s WHERE k2 IN (1.1,2.2) ALLOW
FILTERING"), 6);
+ }
+
+
+
private void test(String query, boolean requiresAllowFiltering) throws
Throwable
{
if (requiresAllowFiltering)
@@ -305,4 +390,5 @@ public class AllowFilteringTest extends SAITester
assertNotNull(execute(query + " ALLOW FILTERING"));
}
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]