This is an automated email from the ASF dual-hosted git repository.

maedhroz pushed a commit to branch cassandra-5.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/cassandra-5.0 by this push:
     new f28a16740a Avoid indexing empty values for non-literals and types that 
do not allow them
f28a16740a is described below

commit f28a16740abfdc6c6c72a40d8ac2a7efc14edc0e
Author: Caleb Rackliffe <[email protected]>
AuthorDate: Mon Feb 10 16:27:33 2025 -0600

    Avoid indexing empty values for non-literals and types that do not allow 
them
    
    patch by Caleb Rackliffe; reviewed by David Capwell and Andres de la Peña 
for CASSANDRA-20313
    
    Co-authored-by: Caleb Rackliffe <[email protected]>
    Co-authored-by: Andres de la Peña <[email protected]>
---
 CHANGES.txt                                        |  1 +
 .../index/sai/disk/v1/SSTableIndexWriter.java      |  2 +-
 .../cassandra/index/sai/memory/MemtableIndex.java  |  2 +-
 .../cassandra/index/sai/utils/IndexTermType.java   | 15 ++++-
 .../cassandra/index/sai/cql/EmptyValuesTest.java   | 65 ++++++++++++++++++++++
 5 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 8763039fa6..cbbd1767b5 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 5.0.4
+ * Avoid indexing empty values for non-literals and types that do not allow 
them (CASSANDRA-20313)
  * Fix incorrect results of min / max in-built functions on clustering columns 
in descending order (CASSANDRA-20295)
  * Avoid possible consistency violations for SAI intersection queries over 
repaired index matches and multiple non-indexed column matches (CASSANDRA-20189)
  * Skip check for DirectIO when initializing tools (CASSANDRA-20289)
diff --git 
a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java 
b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
index 58ee69a215..bf384c4f5f 100644
--- a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
+++ b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
@@ -201,7 +201,7 @@ public class SSTableIndexWriter implements 
PerColumnIndexWriter
         }
 
         // Some types support empty byte buffers:
-        if (term.remaining() == 0 && 
!index.termType().indexType().allowsEmpty()) return;
+        if (term.remaining() == 0 && index.termType().skipsEmptyValue()) 
return;
 
         if (analyzer == null || !index.termType().isLiteral())
         {
diff --git a/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java 
b/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
index f0f2ea36ad..1546ede37d 100644
--- a/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
+++ b/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
@@ -82,7 +82,7 @@ public class MemtableIndex implements MemtableOrdering
 
     public long index(DecoratedKey key, Clustering<?> clustering, ByteBuffer 
value)
     {
-        if (value == null || (value.remaining() == 0 && !type.allowsEmpty()))
+        if (value == null || (value.remaining() == 0 && 
memoryIndex.index.termType().skipsEmptyValue()))
             return 0;
 
         long ram = memoryIndex.add(key, clustering, value);
diff --git a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java 
b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
index f5a00b184e..7fa226e958 100644
--- a/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
+++ b/src/java/org/apache/cassandra/index/sai/utils/IndexTermType.java
@@ -275,6 +275,14 @@ public class IndexTermType
         }
     }
 
+    /**
+     * @return {@code true} if the empty values of the given type should be 
excluded from indexing
+     */
+    public boolean skipsEmptyValue()
+    {
+        return !indexType.allowsEmpty() || !isLiteral();
+    }
+
     public AbstractType<?> indexType()
     {
         return indexType;
@@ -541,6 +549,9 @@ public class IndexTermType
 
     public ByteSource asComparableBytes(ByteBuffer value, 
ByteComparable.Version version)
     {
+        if (value.remaining() == 0)
+            return ByteSource.EMPTY;
+
         if (isInetAddress() || isBigInteger() || isBigDecimal())
             return ByteSource.optionalFixedLength(ByteBufferAccessor.instance, 
value);
         else if (isLong())
@@ -560,8 +571,8 @@ public class IndexTermType
      */
     public ByteBuffer asIndexBytes(ByteBuffer value)
     {
-        if (value == null)
-            return null;
+        if (value == null || value.remaining() == 0)
+            return value;
 
         if (isInetAddress())
             return encodeInetAddress(value);
diff --git a/test/unit/org/apache/cassandra/index/sai/cql/EmptyValuesTest.java 
b/test/unit/org/apache/cassandra/index/sai/cql/EmptyValuesTest.java
new file mode 100644
index 0000000000..ad36dd1156
--- /dev/null
+++ b/test/unit/org/apache/cassandra/index/sai/cql/EmptyValuesTest.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.index.sai.cql;
+
+import org.assertj.core.api.Assertions;
+import org.junit.Test;
+
+import org.apache.cassandra.cql3.CQL3Type;
+import org.apache.cassandra.index.sai.SAITester;
+import org.apache.cassandra.index.sai.StorageAttachedIndex;
+import org.apache.cassandra.index.sai.utils.IndexTermType;
+import org.apache.cassandra.utils.AbstractTypeGenerators;
+
+import static org.apache.cassandra.utils.ByteBufferUtil.EMPTY_BYTE_BUFFER;
+import static org.quicktheories.QuickTheory.qt;
+
+/**
+ * Tests that empty values are only indexed for literal indexes. See 
CASSANDRA-20313 for more details.
+ */
+public class EmptyValuesTest extends SAITester
+{
+    @Test
+    public void testEmptyValues()
+    {
+        
qt().forAll(AbstractTypeGenerators.primitiveTypeGen()).checkAssert(type -> {
+            CQL3Type cql3Type = type.asCQL3Type();
+            if (type.allowsEmpty() && 
StorageAttachedIndex.SUPPORTED_TYPES.contains(cql3Type))
+                testEmptyValues(cql3Type);
+        });
+    }
+
+    private void testEmptyValues(CQL3Type type)
+    {
+        createTable(String.format("CREATE TABLE %%s (k int PRIMARY KEY, v 
%s)", type));
+        execute("INSERT INTO %s (k, v) VALUES (0, ?)", EMPTY_BYTE_BUFFER);
+        flush();
+        createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+        IndexTermType termType = createIndexTermType(type.getType());
+        boolean indexed = !termType.skipsEmptyValue();
+
+        Assertions.assertThat(execute("SELECT * FROM %s WHERE v = ?", 
EMPTY_BYTE_BUFFER)).hasSize(indexed ? 1 : 0);
+
+        execute("INSERT INTO %s (k, v) VALUES (1, ?)", EMPTY_BYTE_BUFFER);
+
+        Assertions.assertThat(execute("SELECT * FROM %s WHERE v = ?", 
EMPTY_BYTE_BUFFER)).hasSize(indexed ? 2 : 0);
+        flush();
+        Assertions.assertThat(execute("SELECT * FROM %s WHERE v = ?", 
EMPTY_BYTE_BUFFER)).hasSize(indexed ? 2 : 0);
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to