This is an automated email from the ASF dual-hosted git repository.

maedhroz pushed a commit to branch cassandra-5.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/cassandra-5.0 by this push:
     new 46acaf22e6 Ensure SAI indexes empty byte buffers for types that allow 
them as a valid input
46acaf22e6 is described below

commit 46acaf22e688e7a2e707ac61fd88c96ed33b60d7
Author: Caleb Rackliffe <calebrackli...@gmail.com>
AuthorDate: Fri Mar 15 17:29:01 2024 -0500

    Ensure SAI indexes empty byte buffers for types that allow them as a valid 
input
    
    patch by Caleb Rackliffe; reviewed by David Capwell for CASSANDRA-19461
    
    Co-authored-by: Caleb Rackliffe <calebrackli...@gmail.com>
    Co-authored-by: David Capwell <dcapw...@apache.org>
---
 CHANGES.txt                                        |  1 +
 .../statements/schema/CreateIndexStatement.java    |  2 +-
 .../index/sai/disk/v1/SSTableIndexWriter.java      |  3 +-
 .../cassandra/index/sai/memory/MemtableIndex.java  |  5 +-
 test/unit/accord/utils/Gens.java                   | 70 +++++++++++++---
 .../index/sai/cql/AbstractSimpleEqTestBase.java    | 92 +++++++++++++++++++++
 .../index/sai/cql/AllTypesSimpleEqTest.java        | 95 ++++++++++++++++++++++
 .../index/sai/cql/EmptyStringLifecycleTest.java    | 79 ++++++++++++++++++
 .../org/apache/cassandra/utils/Generators.java     |  9 ++
 9 files changed, 343 insertions(+), 13 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 056c84a82b..c056c2785f 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 5.0-beta2
+ * Ensure SAI indexes empty byte buffers for types that allow them as a valid 
input (CASSANDRA-19461)
  * Deprecate Python 3.7 and earlier, but allow cqlsh to run with Python 
3.6-3.11 (CASSANDRA-19467)
  * Set uuid_sstable_identifiers_enabled to true in cassandra-latest.yaml 
(CASSANDRA-19460)
  * Revert switching to approxTime in Dispatcher (CASSANDRA-19454)
diff --git 
a/src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java
 
b/src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java
index b53e066f90..9f5bbfe058 100644
--- 
a/src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java
+++ 
b/src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java
@@ -242,7 +242,7 @@ public final class CreateIndexStatement extends 
AlterSchemaStatement
             throw ire(ONLY_PARTITION_KEY, column);
 
         if (column.type.isFrozenCollection() && target.type != Type.FULL)
-            throw ire(CREATE_ON_FROZEN_COLUMN, target.type, column, column);
+            throw ire(CREATE_ON_FROZEN_COLUMN, target.type, column, 
column.name.toCQLString());
 
         if (!column.type.isFrozenCollection() && target.type == Type.FULL)
             throw ire(FULL_ON_FROZEN_COLLECTIONS);
diff --git 
a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java 
b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
index 3400f81b9d..06bb7d80fe 100644
--- a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
+++ b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java
@@ -200,7 +200,8 @@ public class SSTableIndexWriter implements 
PerColumnIndexWriter
             currentBuilder = newSegmentBuilder();
         }
 
-        if (term.remaining() == 0) return;
+        // Some types support empty byte buffers:
+        if (term.remaining() == 0 && 
!index.termType().indexType().allowsEmpty()) return;
 
         if (analyzer == null || !index.termType().isLiteral())
         {
diff --git a/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java 
b/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
index d124af651e..f0f2ea36ad 100644
--- a/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
+++ b/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java
@@ -28,6 +28,7 @@ import java.util.function.Function;
 import org.apache.cassandra.db.Clustering;
 import org.apache.cassandra.db.DecoratedKey;
 import org.apache.cassandra.db.PartitionPosition;
+import org.apache.cassandra.db.marshal.AbstractType;
 import org.apache.cassandra.dht.AbstractBounds;
 import org.apache.cassandra.index.sai.QueryContext;
 import org.apache.cassandra.index.sai.StorageAttachedIndex;
@@ -46,10 +47,12 @@ public class MemtableIndex implements MemtableOrdering
     private final MemoryIndex memoryIndex;
     private final LongAdder writeCount = new LongAdder();
     private final LongAdder estimatedMemoryUsed = new LongAdder();
+    private final AbstractType<?> type;
 
     public MemtableIndex(StorageAttachedIndex index)
     {
         this.memoryIndex = index.termType().isVector() ? new 
VectorMemoryIndex(index) : new TrieMemoryIndex(index);
+        this.type = index.termType().indexType();
     }
 
     public long writeCount()
@@ -79,7 +82,7 @@ public class MemtableIndex implements MemtableOrdering
 
     public long index(DecoratedKey key, Clustering<?> clustering, ByteBuffer 
value)
     {
-        if (value == null || value.remaining() == 0)
+        if (value == null || (value.remaining() == 0 && !type.allowsEmpty()))
             return 0;
 
         long ram = memoryIndex.add(key, clustering, value);
diff --git a/test/unit/accord/utils/Gens.java b/test/unit/accord/utils/Gens.java
index 4a80d724af..1fdff2258b 100644
--- a/test/unit/accord/utils/Gens.java
+++ b/test/unit/accord/utils/Gens.java
@@ -372,7 +372,12 @@ public class Gens {
         @Override
         public ListDSL<T> unique()
         {
-            return new ListDSL<>(new GenReset<>(fn));
+            return new ListDSL<>(new GenReset<>(fn, false));
+        }
+
+        public ListDSL<T> uniqueBestEffort()
+        {
+            return new ListDSL<>(new GenReset<>(fn, true));
         }
 
         @Override
@@ -384,7 +389,16 @@ public class Gens {
                 int size = sizeGen.nextInt(r);
                 List<T> list = new ArrayList<>(size);
                 for (int i = 0; i < size; i++)
-                    list.add(fn.next(r));
+                {
+                    try
+                    {
+                        list.add(fn.next(r));
+                    }
+                    catch (IgnoreGenResult e)
+                    {
+                        // ignore
+                    }
+                }
                 return list;
             };
         }
@@ -402,7 +416,12 @@ public class Gens {
         @Override
         public ArrayDSL<T> unique()
         {
-            return new ArrayDSL<>(type, new GenReset<>(fn));
+            return new ArrayDSL<>(type, new GenReset<>(fn, false));
+        }
+
+        public ArrayDSL<T> uniqueBestEffort()
+        {
+            return new ArrayDSL<>(type, new GenReset<>(fn, true));
         }
 
         @Override
@@ -414,7 +433,16 @@ public class Gens {
                 int size = sizeGen.nextInt(r);
                 T[] list = (T[]) Array.newInstance(type, size);
                 for (int i = 0; i < size; i++)
-                    list[i] = fn.next(r);
+                {
+                    try
+                    {
+                        list[i] = fn.next(r);
+                    }
+                    catch (IgnoreGenResult e)
+                    {
+                        return Arrays.copyOf(list, i);
+                    }
+                }
                 return list;
             };
         }
@@ -495,22 +523,44 @@ public class Gens {
         void reset();
     }
 
+    private static final class IgnoreGenResult extends RuntimeException
+    {
+        private static final IgnoreGenResult INSTANCE = new IgnoreGenResult();
+        private IgnoreGenResult()
+        {
+            super(null, null, false, false);
+        }
+    }
+
     private static class GenReset<T> implements Gen<T>, Reset
     {
         private final Set<T> seen = new HashSet<>();
         private final Gen<T> fn;
+        private final boolean bestEffort;
 
-        private GenReset(Gen<T> fn)
+        private GenReset(Gen<T> fn, boolean bestEffort)
         {
             this.fn = fn;
+            this.bestEffort = bestEffort;
         }
 
         @Override
         public T next(RandomSource random)
         {
-            T value;
-            while (!seen.add((value = fn.next(random)))) {}
-            return value;
+            if (!bestEffort)
+            {
+                T value;
+                while (!seen.add((value = fn.next(random)))) {}
+                return value;
+            }
+            else
+            {
+                T value = null;
+                int i;
+                for (i = 0; i < 42 && !seen.add((value = fn.next(random))); 
i++) {}
+                if (i == 42) throw IgnoreGenResult.INSTANCE;
+                return value;
+            }
         }
 
         @Override
@@ -526,7 +576,7 @@ public class Gens {
 
         private IntGenReset(Gen.IntGen fn)
         {
-            this.base = new GenReset<>(fn);
+            this.base = new GenReset<>(fn, false);
         }
         @Override
         public int nextInt(RandomSource random) {
@@ -545,7 +595,7 @@ public class Gens {
 
         private LongGenReset(Gen.LongGen fn)
         {
-            this.base = new GenReset<>(fn);
+            this.base = new GenReset<>(fn, false);
         }
         @Override
         public long nextLong(RandomSource random) {
diff --git 
a/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java 
b/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java
new file mode 100644
index 0000000000..fae644f389
--- /dev/null
+++ b/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.cql;
+
+import java.nio.ByteBuffer;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.TreeMap;
+import javax.annotation.Nullable;
+
+import accord.utils.Gen;
+import accord.utils.Property;
+import org.agrona.collections.IntArrayList;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.cql3.UntypedResultSet;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.index.sai.SAITester;
+import org.assertj.core.api.Assertions;
+
+import static accord.utils.Property.qt;
+
+public abstract class AbstractSimpleEqTestBase extends SAITester
+{
+    static
+    {
+        // The info table gets updated and force-flushed every time we 
truncate, so disable that and avoid the overhead:
+        CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true);
+
+        // Ignore SAI timeouts as this is just validating the read/write logic 
of the index:
+        CassandraRelevantProperties.SAI_TEST_DISABLE_TIMEOUT.setBoolean(true);
+    }
+
+    protected void test(AbstractType<?> type, @Nullable Long seed, int 
examples, Gen<Gen<ByteBuffer>> distribution)
+    {
+        createTable("CREATE TABLE %s (pk int PRIMARY KEY, value " + 
type.asCQL3Type() + ')');
+        disableCompaction(KEYSPACE);
+        createIndex(String.format(CREATE_INDEX_TEMPLATE, "value"));
+
+        Property.ForBuilder builder = qt().withExamples(examples);
+        if (seed != null)
+            builder = builder.withSeed(seed);
+
+        builder.check(rs -> {
+            execute("TRUNCATE %s");
+
+            Gen<ByteBuffer> support = distribution.next(rs);
+            Map<ByteBuffer, IntArrayList> termIndex = new TreeMap<>();
+
+            for (int i = 0; i < 1000; i++)
+            {
+                ByteBuffer term = support.next(rs);
+                execute("INSERT INTO %s (pk, value) VALUES (?, ?)", i, term);
+                termIndex.computeIfAbsent(term, ignore -> new 
IntArrayList()).addInt(i);
+            }
+            
+            flush();
+
+            for (var e : termIndex.entrySet())
+            {
+                ByteBuffer term = e.getKey();
+                IntArrayList expected = e.getValue();
+                UntypedResultSet result = execute("SELECT pk, value FROM %s 
WHERE value=?", term);
+                IntArrayList actual = new IntArrayList(expected.size(), -1);
+                for (var row : result)
+                {
+                    ByteBuffer readValue = row.getBytes("value");
+                    Assertions.assertThat(readValue).describedAs("%s != %s", 
type.compose(term), type.compose(readValue)).isEqualTo(term);
+                    actual.add(row.getInt("pk"));
+                }
+                expected.sort(Comparator.naturalOrder());
+                actual.sort(Comparator.naturalOrder());
+                Assertions.assertThat(actual).describedAs("Unexpected 
partitions for term %s", type.compose(term)).isEqualTo(expected);
+            }
+        });
+    }
+}
\ No newline at end of file
diff --git 
a/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java 
b/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java
new file mode 100644
index 0000000000..55390b3f5c
--- /dev/null
+++ b/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.cql;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import accord.utils.Gen;
+import accord.utils.Gens;
+import org.apache.cassandra.cql3.CQL3Type;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.DecimalType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.index.sai.StorageAttachedIndex;
+import org.apache.cassandra.utils.AbstractTypeGenerators;
+import org.apache.cassandra.utils.Generators;
+import org.quicktheories.generators.SourceDSL;
+
+@RunWith(Parameterized.class)
+public class AllTypesSimpleEqTest extends AbstractSimpleEqTestBase
+{
+    private static final Map<AbstractType<?>, Long> LARGE_DOMAIN_FAILING_SEEDS 
=
+        Map.of(UTF8Type.instance, -4379508235061872764L);
+    private static final Map<AbstractType<?>, Long> SHORT_DOMAIN_FAILING_SEEDS 
=
+        Map.of(UTF8Type.instance, -4379508235061872764L);
+
+    private final AbstractType<?> type;
+
+    public AllTypesSimpleEqTest(AbstractType<?> type)
+    {
+        this.type = type;
+    }
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data()
+    {
+        return StorageAttachedIndex.SUPPORTED_TYPES.stream()
+                                                   .map(CQL3Type::getType)
+                                                   // TODO: Track down unicode 
character edge cases...
+                                                   .filter(t -> t != 
UTF8Type.instance)
+                                                   .distinct()
+                                                   .map(t -> new Object[]{ t })
+                                                   
.collect(Collectors.toList());
+    }
+
+    @Test
+    public void largeDomain()
+    {
+        Gen<ByteBuffer> gen = genFromType();
+        test(type, LARGE_DOMAIN_FAILING_SEEDS.get(type), 10, rs -> gen);
+    }
+
+    @Test
+    public void smallDomain()
+    {
+        Gen<ByteBuffer> gen = genFromType();
+        test(type, SHORT_DOMAIN_FAILING_SEEDS.get(type), 10, rs -> {
+            List<ByteBuffer> domain = 
Gens.lists(gen).uniqueBestEffort().ofSizeBetween(5, 100).next(rs);
+            return r -> r.pick(domain);
+        });
+    }
+
+    private Gen<ByteBuffer> genFromType()
+    {
+        if (type == DecimalType.instance)
+            // 0.0 != 0.000 but compare(0.0, 0.000) == 0, this breaks the test
+            return 
Generators.toGen(Generators.bigDecimal(SourceDSL.arbitrary().constant(42), 
Generators.bigInt())
+                                              
.map(DecimalType.instance::decompose));
+        
+        return 
Generators.toGen(AbstractTypeGenerators.getTypeSupport(type).bytesGen());
+    }
+}
\ No newline at end of file
diff --git 
a/test/unit/org/apache/cassandra/index/sai/cql/EmptyStringLifecycleTest.java 
b/test/unit/org/apache/cassandra/index/sai/cql/EmptyStringLifecycleTest.java
new file mode 100644
index 0000000000..7ace636e4f
--- /dev/null
+++ b/test/unit/org/apache/cassandra/index/sai/cql/EmptyStringLifecycleTest.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.cql;
+
+import org.junit.Test;
+
+import org.apache.cassandra.cql3.UntypedResultSet;
+import org.apache.cassandra.index.sai.SAITester;
+
+public class EmptyStringLifecycleTest extends SAITester
+{
+    @Test
+    public void testBeforeAndAfterFlush()
+    {
+        createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
+        disableCompaction(KEYSPACE);
+        createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+        execute("INSERT INTO %s (k, v) VALUES (0, '')");
+        execute("INSERT INTO %s (k) VALUES (1)");
+
+        UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''");
+        assertRows(rows, row(0, ""));
+
+        flush();
+        rows = execute("SELECT * FROM %s WHERE v = ''");
+        assertRows(rows, row(0, ""));
+    }
+
+    @Test
+    public void testAfterInitialBuild()
+    {
+        createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
+        disableCompaction(KEYSPACE);
+
+        execute("INSERT INTO %s (k, v) VALUES (0, '')");
+        execute("INSERT INTO %s (k) VALUES (1)");
+        createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+        UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''");
+        assertRows(rows, row(0, ""));
+    }
+
+    @Test
+    public void testAfterCompaction()
+    {
+        createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
+        disableCompaction(KEYSPACE);
+        createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+        execute("INSERT INTO %s (k, v) VALUES (0, '')");
+        execute("INSERT INTO %s (k) VALUES (1)");
+        flush();
+
+        execute("INSERT INTO %s (k, v) VALUES (1, '')");
+        flush();
+        
+        compact();
+
+        UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''");
+        assertRows(rows, row(1, ""), row(0, ""));
+    }
+}
diff --git a/test/unit/org/apache/cassandra/utils/Generators.java 
b/test/unit/org/apache/cassandra/utils/Generators.java
index f23aff7498..9675d5525e 100644
--- a/test/unit/org/apache/cassandra/utils/Generators.java
+++ b/test/unit/org/apache/cassandra/utils/Generators.java
@@ -43,6 +43,7 @@ import org.quicktheories.core.Gen;
 import org.quicktheories.core.RandomnessSource;
 import org.quicktheories.generators.SourceDSL;
 import org.quicktheories.impl.Constraint;
+import org.quicktheories.impl.JavaRandom;
 
 import static 
org.apache.cassandra.config.CassandraRelevantProperties.TEST_BLOB_SHARED_SEED;
 
@@ -533,4 +534,12 @@ public final class Generators
                         .flatMap(start -> SourceDSL.integers().between(start, 
max)
                                                    .map(end -> 
Range.closed(start, end)));
     }
+
+    public static <T> accord.utils.Gen<T> toGen(org.quicktheories.core.Gen<T> 
qt)
+    {
+        return rs -> {
+            JavaRandom r = new JavaRandom(rs.asJdkRandom());
+            return qt.generate(r);
+        };
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to