This is an automated email from the ASF dual-hosted git repository.
maedhroz pushed a commit to branch cassandra-5.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-5.0 by this push:
new f28a16740a Avoid indexing empty values for non-literals and types that
do not allow them
f28a16740a is described below
commit f28a16740abfdc6c6c72a40d8ac2a7efc14edc0e
Author: Caleb Rackliffe <[email protected]>
AuthorDate: Mon Feb 10 16:27:33 2025 -0600
Avoid indexing empty values for non-literals and types that do not allow
them
patch by Caleb Rackliffe; reviewed by David Capwell and Andres de la Peña
for CASSANDRA-20313
Co-authored-by: Caleb Rackliffe <[email protected]>
Co-authored-by: Andres de la Peña <[email protected]>
---
CHANGES.txt | 1 +
.../index/sai/disk/v1/SSTableIndexWriter.java | 2 +-
.../cassandra/index/sai/memory/MemtableIndex.java | 2 +-
.../cassandra/index/sai/utils/IndexTermType.java | 15 ++++-
.../cassandra/index/sai/cql/EmptyValuesTest.java | 65 ++++++++++++++++++++++
5 files changed, 81 insertions(+), 4 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 8763039fa6..cbbd1767b5 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
5.0.4
+ * Avoid indexing empty values for non-literals and types that do not allow
them (CASSANDRA-20313)
* Fix incorrect results of min / max in-built functions on clustering columns
in descending order (CASSANDRA-20295)
* Avoid possible consistency violations for SAI intersection queries over
repaired index matches and multiple non-indexed column matches (CASSANDRA-20189)
* Skip check for DirectIO when initializing tools (CASSANDRA-20289)
diff --git
a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
index 58ee69a215..bf384c4f5f 100644
--- a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
+++ b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
@@ -201,7 +201,7 @@ public class SSTableIndexWriter implements
PerColumnIndexWriter
}
// Some types support empty byte buffers:
- if (term.remaining() == 0 &&
!index.termType().indexType().allowsEmpty()) return;
+ if (term.remaining() == 0 && index.termType().skipsEmptyValue())
return;
if (analyzer == null || !index.termType().isLiteral())
{
diff --git a/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
b/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
index f0f2ea36ad..1546ede37d 100644
--- a/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
+++ b/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
@@ -82,7 +82,7 @@ public class MemtableIndex implements MemtableOrdering
public long index(DecoratedKey key, Clustering<?> clustering, ByteBuffer
value)
{
- if (value == null || (value.remaining() == 0 && !type.allowsEmpty()))
+ if (value == null || (value.remaining() == 0 &&
memoryIndex.index.termType().skipsEmptyValue()))
return 0;
long ram = memoryIndex.add(key, clustering, value);
diff --git a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
index f5a00b184e..7fa226e958 100644
--- a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
+++ b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
@@ -275,6 +275,14 @@ public class IndexTermType
}
}
+ /**
+ * @return {@code true} if the empty values of the given type should be
excluded from indexing
+ */
+ public boolean skipsEmptyValue()
+ {
+ return !indexType.allowsEmpty() || !isLiteral();
+ }
+
public AbstractType<?> indexType()
{
return indexType;
@@ -541,6 +549,9 @@ public class IndexTermType
public ByteSource asComparableBytes(ByteBuffer value,
ByteComparable.Version version)
{
+ if (value.remaining() == 0)
+ return ByteSource.EMPTY;
+
if (isInetAddress() || isBigInteger() || isBigDecimal())
return ByteSource.optionalFixedLength(ByteBufferAccessor.instance,
value);
else if (isLong())
@@ -560,8 +571,8 @@ public class IndexTermType
*/
public ByteBuffer asIndexBytes(ByteBuffer value)
{
- if (value == null)
- return null;
+ if (value == null || value.remaining() == 0)
+ return value;
if (isInetAddress())
return encodeInetAddress(value);
diff --git a/test/unit/org/apache/cassandra/index/sai/cql/EmptyValuesTest.java
b/test/unit/org/apache/cassandra/index/sai/cql/EmptyValuesTest.java
new file mode 100644
index 0000000000..ad36dd1156
--- /dev/null
+++ b/test/unit/org/apache/cassandra/index/sai/cql/EmptyValuesTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.index.sai.cql;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+
+import org.apache.cassandra.cql3.CQL3Type;
+import org.apache.cassandra.index.sai.SAITester;
+import org.apache.cassandra.index.sai.StorageAttachedIndex;
+import org.apache.cassandra.index.sai.utils.IndexTermType;
+import org.apache.cassandra.utils.AbstractTypeGenerators;
+
+import static org.apache.cassandra.utils.ByteBufferUtil.EMPTY_BYTE_BUFFER;
+import static org.quicktheories.QuickTheory.qt;
+
+/**
+ * Tests that empty values are only indexed for literal indexes. See
CASSANDRA-20313 for more details.
+ */
+public class EmptyValuesTest extends SAITester
+{
+ @Test
+ public void testEmptyValues()
+ {
+
qt().forAll(AbstractTypeGenerators.primitiveTypeGen()).checkAssert(type -> {
+ CQL3Type cql3Type = type.asCQL3Type();
+ if (type.allowsEmpty() &&
StorageAttachedIndex.SUPPORTED_TYPES.contains(cql3Type))
+ testEmptyValues(cql3Type);
+ });
+ }
+
+ private void testEmptyValues(CQL3Type type)
+ {
+ createTable(String.format("CREATE TABLE %%s (k int PRIMARY KEY, v
%s)", type));
+ execute("INSERT INTO %s (k, v) VALUES (0, ?)", EMPTY_BYTE_BUFFER);
+ flush();
+ createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+ IndexTermType termType = createIndexTermType(type.getType());
+ boolean indexed = !termType.skipsEmptyValue();
+
+ Assertions.assertThat(execute("SELECT * FROM %s WHERE v = ?",
EMPTY_BYTE_BUFFER)).hasSize(indexed ? 1 : 0);
+
+ execute("INSERT INTO %s (k, v) VALUES (1, ?)", EMPTY_BYTE_BUFFER);
+
+ Assertions.assertThat(execute("SELECT * FROM %s WHERE v = ?",
EMPTY_BYTE_BUFFER)).hasSize(indexed ? 2 : 0);
+ flush();
+ Assertions.assertThat(execute("SELECT * FROM %s WHERE v = ?",
EMPTY_BYTE_BUFFER)).hasSize(indexed ? 2 : 0);
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]