This is an automated email from the ASF dual-hosted git repository.
mck pushed a commit to branch cassandra-5.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-5.0 by this push:
new cada1a13c8 Vector search should be able to restrict on clustering keys
when filtering isn't required
cada1a13c8 is described below
commit cada1a13c8b77fc672d67d754912cf28a7120e3c
Author: Mick Semb Wever <[email protected]>
AuthorDate: Tue Apr 9 00:11:20 2024 +0200
Vector search should be able to restrict on clustering keys when filtering
isn't required
patch by Mick Semb Wever; reviewed by Caleb Rackliffe for CASSANDRA-19544
---
CHANGES.txt | 1 +
.../cql3/restrictions/StatementRestrictions.java | 17 +++++++---
.../cassandra/cql3/statements/SelectStatement.java | 4 +--
.../CassandraXMLJUnitResultFormatter.java | 3 +-
.../index/sai/cql/VectorInvalidQueryTest.java | 36 ++++++++++++++++++++++
5 files changed, 53 insertions(+), 8 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index eaf27a314c..74d142089c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
5.0-beta2
+ * Vector search can restrict on clustering keys when filtering isn't required
(CASSANDRA-19544)
* Fix FBUtilities' parsing of gcp cos_containerd kernel versions
(CASSANDRA-18594)
* Clean up KeyRangeIterator classes (CASSANDRA-19428)
* Warn clients about possible consistency violations for filtering queries
against multiple mutable columns (CASSANDRA-19489)
diff --git
a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java
b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java
index 4f6b829191..d5b6a2a6fd 100644
--- a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java
+++ b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java
@@ -317,8 +317,15 @@ public final class StatementRestrictions
var nonIndexedColumns =
Stream.concat(nonAnnColumns.stream(), clusteringColumns.stream())
.filter(c ->
indexRegistry.listIndexes().stream().noneMatch(i -> i.dependsOn(c)))
.collect(Collectors.toList());
+
if (!nonIndexedColumns.isEmpty())
- throw
invalidRequest(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE);
+ {
+ // restrictions on non-clustering columns, or
clusterings that still need filtering, are invalid
+ if (!clusteringColumns.containsAll(nonIndexedColumns)
+ ||
partitionKeyRestrictions.hasUnrestrictedPartitionKeyComponents(table)
+ ||
clusteringColumnsRestrictions.needFiltering())
+ throw
invalidRequest(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE);
+ }
}
}
else
@@ -467,7 +474,7 @@ public final class StatementRestrictions
/**
* This method determines whether a specified column is restricted on
equality or something equivalent, like IN.
- * It can be used in conjunction with the columns selected by a query to
determine which of those columns is
+ * It can be used in conjunction with the columns selected by a query to
determine which of those columns is
* already bound by the client (and from its perspective, not retrieved by
the database).
*
* @param column a column from the same table these restrictions are
against
@@ -779,8 +786,8 @@ public final class StatementRestrictions
if (filterRestrictions.isEmpty())
return RowFilter.none();
- // If there is only one replica, we don't need reconciliation at any
consistency level.
- boolean needsReconciliation = !table.isVirtual()
+ // If there is only one replica, we don't need reconciliation at any
consistency level.
+ boolean needsReconciliation = !table.isVirtual()
&&
options.getConsistency().needsReconciliation()
&&
Keyspace.open(table.keyspace).getReplicationStrategy().getReplicationFactor().allReplicas
> 1;
@@ -1041,7 +1048,7 @@ public final class StatementRestrictions
// a full partition query, then we include that content.
return queriesFullPartitions();
}
-
+
@Override
public String toString()
{
diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
index 5418159ad6..d8fa830b13 100644
--- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
+++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
@@ -120,7 +120,7 @@ public class SelectStatement implements
CQLStatement.SingleKeyspaceCqlStatement
"/ LOCAL_ONE /
NODE_LOCAL. Consistency level %s was requested. " +
"Downgrading
the consistency level to %s.";
public static final String TOPK_PAGE_SIZE_WARNING = "Top-K queries do not
support paging and the page size is set to %d, " +
- "which is less than
LIMIT %d. The page size has been set to %<d to match the LIMIT.";
+ "which is less than
LIMIT %d. The page size has been set to %d to match the LIMIT.";
public final VariableSpecifications bindVariables;
public final TableMetadata table;
@@ -325,7 +325,7 @@ public class SelectStatement implements
CQLStatement.SingleKeyspaceCqlStatement
pageSize = limit.count();
limit = getDataLimits(userLimit, userPerPartitionLimit,
pageSize, aggregationSpec);
options = QueryOptions.withPageSize(options, pageSize);
- ClientWarn.instance.warn(String.format(TOPK_PAGE_SIZE_WARNING,
oldPageSize, limit.count()));
+ ClientWarn.instance.warn(String.format(TOPK_PAGE_SIZE_WARNING,
oldPageSize, limit.count(), pageSize));
}
}
diff --git
a/test/unit/org/apache/cassandra/CassandraXMLJUnitResultFormatter.java
b/test/unit/org/apache/cassandra/CassandraXMLJUnitResultFormatter.java
index d59be7790c..7a9df05a48 100644
--- a/test/unit/org/apache/cassandra/CassandraXMLJUnitResultFormatter.java
+++ b/test/unit/org/apache/cassandra/CassandraXMLJUnitResultFormatter.java
@@ -219,7 +219,8 @@ public class CassandraXMLJUnitResultFormatter implements
JUnitResultFormatter, X
{
// only include properties and system-out if there's failure/error
rootElement.appendChild(propsElement);
- rootElement.appendChild(systemOutputElement);
+ if (null != systemOutputElement)
+ rootElement.appendChild(systemOutputElement);
}
if (out != null) {
Writer wri = null;
diff --git
a/test/unit/org/apache/cassandra/index/sai/cql/VectorInvalidQueryTest.java
b/test/unit/org/apache/cassandra/index/sai/cql/VectorInvalidQueryTest.java
index 1bfc3a1a57..e26f6d9f83 100644
--- a/test/unit/org/apache/cassandra/index/sai/cql/VectorInvalidQueryTest.java
+++ b/test/unit/org/apache/cassandra/index/sai/cql/VectorInvalidQueryTest.java
@@ -324,6 +324,42 @@ public class VectorInvalidQueryTest extends SAITester
.isInstanceOf(InvalidRequestException.class).hasRootCauseMessage(StorageAttachedIndex.VECTOR_NON_FLOAT_ERROR);
}
+ @Test
+ public void canOrderWithWhereOnPrimaryColumns() throws Throwable
+ {
+ createTable("CREATE TABLE %s (a int, b int, c int, d int, v
vector<float, 2>, PRIMARY KEY ((a,b),c,d))");
+ createIndex("CREATE CUSTOM INDEX ON %s(v) USING
'StorageAttachedIndex'");
+
+ execute("INSERT INTO %s (a, b, c, d, v) VALUES (1, 2, 1, 2,
[6.0,1.0])");
+
+ ResultSet result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2
ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
+ assertEquals(1, result.size());
+ result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1
ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
+ assertEquals(1, result.size());
+ result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 AND
d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
+ assertEquals(1, result.size());
+
+ assertThatThrownBy(() -> executeNet("SELECT * FROM %s WHERE a = 1 AND
b = 2 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1"))
+
.isInstanceOf(InvalidQueryException.class).hasMessage(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE);
+
+ createIndex("CREATE CUSTOM INDEX c_idx ON %s(c) USING
'StorageAttachedIndex'");
+
+ assertThatThrownBy(() -> executeNet("SELECT * FROM %s WHERE a = 1 AND
b = 2 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1"))
+
.isInstanceOf(InvalidQueryException.class).hasMessage(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE);
+
+ dropIndex("DROP INDEX %s.c_idx");
+ createIndex("CREATE CUSTOM INDEX ON %s(d) USING
'StorageAttachedIndex'");
+
+ result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1
ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
+ assertEquals(1, result.size());
+ result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 AND
d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
+ assertEquals(1, result.size());
+ result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND d = 2
ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
+ assertEquals(1, result.size());
+ result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c > 0
ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE);
+ assertEquals(1, result.size());
+ }
+
@Test
public void canOnlyExecuteWithCorrectConsistencyLevel()
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]