This is an automated email from the ASF dual-hosted git repository. maedhroz pushed a commit to branch cassandra-5.0 in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-5.0 by this push: new 46acaf22e6 Ensure SAI indexes empty byte buffers for types that allow them as a valid input 46acaf22e6 is described below commit 46acaf22e688e7a2e707ac61fd88c96ed33b60d7 Author: Caleb Rackliffe <calebrackli...@gmail.com> AuthorDate: Fri Mar 15 17:29:01 2024 -0500 Ensure SAI indexes empty byte buffers for types that allow them as a valid input patch by Caleb Rackliffe; reviewed by David Capwell for CASSANDRA-19461 Co-authored-by: Caleb Rackliffe <calebrackli...@gmail.com> Co-authored-by: David Capwell <dcapw...@apache.org> --- CHANGES.txt | 1 + .../statements/schema/CreateIndexStatement.java | 2 +- .../index/sai/disk/v1/SSTableIndexWriter.java | 3 +- .../cassandra/index/sai/memory/MemtableIndex.java | 5 +- test/unit/accord/utils/Gens.java | 70 +++++++++++++--- .../index/sai/cql/AbstractSimpleEqTestBase.java | 92 +++++++++++++++++++++ .../index/sai/cql/AllTypesSimpleEqTest.java | 95 ++++++++++++++++++++++ .../index/sai/cql/EmptyStringLifecycleTest.java | 79 ++++++++++++++++++ .../org/apache/cassandra/utils/Generators.java | 9 ++ 9 files changed, 343 insertions(+), 13 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 056c84a82b..c056c2785f 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0-beta2 + * Ensure SAI indexes empty byte buffers for types that allow them as a valid input (CASSANDRA-19461) * Deprecate Python 3.7 and earlier, but allow cqlsh to run with Python 3.6-3.11 (CASSANDRA-19467) * Set uuid_sstable_identifiers_enabled to true in cassandra-latest.yaml (CASSANDRA-19460) * Revert switching to approxTime in Dispatcher (CASSANDRA-19454) diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java index b53e066f90..9f5bbfe058 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java @@ -242,7 +242,7 @@ public final class CreateIndexStatement extends AlterSchemaStatement throw ire(ONLY_PARTITION_KEY, column); if (column.type.isFrozenCollection() && target.type != Type.FULL) - throw ire(CREATE_ON_FROZEN_COLUMN, target.type, column, column); + throw ire(CREATE_ON_FROZEN_COLUMN, target.type, column, column.name.toCQLString()); if (!column.type.isFrozenCollection() && target.type == Type.FULL) throw ire(FULL_ON_FROZEN_COLLECTIONS); diff --git a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java index 3400f81b9d..06bb7d80fe 100644 --- a/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java +++ b/src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java @@ -200,7 +200,8 @@ public class SSTableIndexWriter implements PerColumnIndexWriter currentBuilder = newSegmentBuilder(); } - if (term.remaining() == 0) return; + // Some types support empty byte buffers: + if (term.remaining() == 0 && !index.termType().indexType().allowsEmpty()) return; if (analyzer == null || !index.termType().isLiteral()) { diff --git a/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java b/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java index d124af651e..f0f2ea36ad 100644 --- a/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java +++ b/src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java @@ -28,6 +28,7 @@ import java.util.function.Function; import org.apache.cassandra.db.Clustering; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.PartitionPosition; +import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.index.sai.QueryContext; import org.apache.cassandra.index.sai.StorageAttachedIndex; @@ -46,10 +47,12 @@ public class MemtableIndex implements MemtableOrdering private final MemoryIndex memoryIndex; private final LongAdder writeCount = new LongAdder(); private final LongAdder estimatedMemoryUsed = new LongAdder(); + private final AbstractType<?> type; public MemtableIndex(StorageAttachedIndex index) { this.memoryIndex = index.termType().isVector() ? new VectorMemoryIndex(index) : new TrieMemoryIndex(index); + this.type = index.termType().indexType(); } public long writeCount() @@ -79,7 +82,7 @@ public class MemtableIndex implements MemtableOrdering public long index(DecoratedKey key, Clustering<?> clustering, ByteBuffer value) { - if (value == null || value.remaining() == 0) + if (value == null || (value.remaining() == 0 && !type.allowsEmpty())) return 0; long ram = memoryIndex.add(key, clustering, value); diff --git a/test/unit/accord/utils/Gens.java b/test/unit/accord/utils/Gens.java index 4a80d724af..1fdff2258b 100644 --- a/test/unit/accord/utils/Gens.java +++ b/test/unit/accord/utils/Gens.java @@ -372,7 +372,12 @@ public class Gens { @Override public ListDSL<T> unique() { - return new ListDSL<>(new GenReset<>(fn)); + return new ListDSL<>(new GenReset<>(fn, false)); + } + + public ListDSL<T> uniqueBestEffort() + { + return new ListDSL<>(new GenReset<>(fn, true)); } @Override @@ -384,7 +389,16 @@ public class Gens { int size = sizeGen.nextInt(r); List<T> list = new ArrayList<>(size); for (int i = 0; i < size; i++) - list.add(fn.next(r)); + { + try + { + list.add(fn.next(r)); + } + catch (IgnoreGenResult e) + { + // ignore + } + } return list; }; } @@ -402,7 +416,12 @@ public class Gens { @Override public ArrayDSL<T> unique() { - return new ArrayDSL<>(type, new GenReset<>(fn)); + return new ArrayDSL<>(type, new GenReset<>(fn, false)); + } + + public ArrayDSL<T> uniqueBestEffort() + { + return new ArrayDSL<>(type, new GenReset<>(fn, true)); } @Override @@ -414,7 +433,16 @@ public class Gens { int size = sizeGen.nextInt(r); T[] list = (T[]) Array.newInstance(type, size); for (int i = 0; i < size; i++) - list[i] = fn.next(r); + { + try + { + list[i] = fn.next(r); + } + catch (IgnoreGenResult e) + { + return Arrays.copyOf(list, i); + } + } return list; }; } @@ -495,22 +523,44 @@ public class Gens { void reset(); } + private static final class IgnoreGenResult extends RuntimeException + { + private static final IgnoreGenResult INSTANCE = new IgnoreGenResult(); + private IgnoreGenResult() + { + super(null, null, false, false); + } + } + private static class GenReset<T> implements Gen<T>, Reset { private final Set<T> seen = new HashSet<>(); private final Gen<T> fn; + private final boolean bestEffort; - private GenReset(Gen<T> fn) + private GenReset(Gen<T> fn, boolean bestEffort) { this.fn = fn; + this.bestEffort = bestEffort; } @Override public T next(RandomSource random) { - T value; - while (!seen.add((value = fn.next(random)))) {} - return value; + if (!bestEffort) + { + T value; + while (!seen.add((value = fn.next(random)))) {} + return value; + } + else + { + T value = null; + int i; + for (i = 0; i < 42 && !seen.add((value = fn.next(random))); i++) {} + if (i == 42) throw IgnoreGenResult.INSTANCE; + return value; + } } @Override @@ -526,7 +576,7 @@ public class Gens { private IntGenReset(Gen.IntGen fn) { - this.base = new GenReset<>(fn); + this.base = new GenReset<>(fn, false); } @Override public int nextInt(RandomSource random) { @@ -545,7 +595,7 @@ public class Gens { private LongGenReset(Gen.LongGen fn) { - this.base = new GenReset<>(fn); + this.base = new GenReset<>(fn, false); } @Override public long nextLong(RandomSource random) { diff --git a/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java b/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java new file mode 100644 index 0000000000..fae644f389 --- /dev/null +++ b/test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTestBase.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.sai.cql; + +import java.nio.ByteBuffer; +import java.util.Comparator; +import java.util.Map; +import java.util.TreeMap; +import javax.annotation.Nullable; + +import accord.utils.Gen; +import accord.utils.Property; +import org.agrona.collections.IntArrayList; +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.index.sai.SAITester; +import org.assertj.core.api.Assertions; + +import static accord.utils.Property.qt; + +public abstract class AbstractSimpleEqTestBase extends SAITester +{ + static + { + // The info table gets updated and force-flushed every time we truncate, so disable that and avoid the overhead: + CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true); + + // Ignore SAI timeouts as this is just validating the read/write logic of the index: + CassandraRelevantProperties.SAI_TEST_DISABLE_TIMEOUT.setBoolean(true); + } + + protected void test(AbstractType<?> type, @Nullable Long seed, int examples, Gen<Gen<ByteBuffer>> distribution) + { + createTable("CREATE TABLE %s (pk int PRIMARY KEY, value " + type.asCQL3Type() + ')'); + disableCompaction(KEYSPACE); + createIndex(String.format(CREATE_INDEX_TEMPLATE, "value")); + + Property.ForBuilder builder = qt().withExamples(examples); + if (seed != null) + builder = builder.withSeed(seed); + + builder.check(rs -> { + execute("TRUNCATE %s"); + + Gen<ByteBuffer> support = distribution.next(rs); + Map<ByteBuffer, IntArrayList> termIndex = new TreeMap<>(); + + for (int i = 0; i < 1000; i++) + { + ByteBuffer term = support.next(rs); + execute("INSERT INTO %s (pk, value) VALUES (?, ?)", i, term); + termIndex.computeIfAbsent(term, ignore -> new IntArrayList()).addInt(i); + } + + flush(); + + for (var e : termIndex.entrySet()) + { + ByteBuffer term = e.getKey(); + IntArrayList expected = e.getValue(); + UntypedResultSet result = execute("SELECT pk, value FROM %s WHERE value=?", term); + IntArrayList actual = new IntArrayList(expected.size(), -1); + for (var row : result) + { + ByteBuffer readValue = row.getBytes("value"); + Assertions.assertThat(readValue).describedAs("%s != %s", type.compose(term), type.compose(readValue)).isEqualTo(term); + actual.add(row.getInt("pk")); + } + expected.sort(Comparator.naturalOrder()); + actual.sort(Comparator.naturalOrder()); + Assertions.assertThat(actual).describedAs("Unexpected partitions for term %s", type.compose(term)).isEqualTo(expected); + } + }); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java b/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java new file mode 100644 index 0000000000..55390b3f5c --- /dev/null +++ b/test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.sai.cql; + +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import accord.utils.Gen; +import accord.utils.Gens; +import org.apache.cassandra.cql3.CQL3Type; +import org.apache.cassandra.db.marshal.AbstractType; +import org.apache.cassandra.db.marshal.DecimalType; +import org.apache.cassandra.db.marshal.UTF8Type; +import org.apache.cassandra.index.sai.StorageAttachedIndex; +import org.apache.cassandra.utils.AbstractTypeGenerators; +import org.apache.cassandra.utils.Generators; +import org.quicktheories.generators.SourceDSL; + +@RunWith(Parameterized.class) +public class AllTypesSimpleEqTest extends AbstractSimpleEqTestBase +{ + private static final Map<AbstractType<?>, Long> LARGE_DOMAIN_FAILING_SEEDS = + Map.of(UTF8Type.instance, -4379508235061872764L); + private static final Map<AbstractType<?>, Long> SHORT_DOMAIN_FAILING_SEEDS = + Map.of(UTF8Type.instance, -4379508235061872764L); + + private final AbstractType<?> type; + + public AllTypesSimpleEqTest(AbstractType<?> type) + { + this.type = type; + } + + @Parameterized.Parameters(name = "{0}") + public static Collection<Object[]> data() + { + return StorageAttachedIndex.SUPPORTED_TYPES.stream() + .map(CQL3Type::getType) + // TODO: Track down unicode character edge cases... + .filter(t -> t != UTF8Type.instance) + .distinct() + .map(t -> new Object[]{ t }) + .collect(Collectors.toList()); + } + + @Test + public void largeDomain() + { + Gen<ByteBuffer> gen = genFromType(); + test(type, LARGE_DOMAIN_FAILING_SEEDS.get(type), 10, rs -> gen); + } + + @Test + public void smallDomain() + { + Gen<ByteBuffer> gen = genFromType(); + test(type, SHORT_DOMAIN_FAILING_SEEDS.get(type), 10, rs -> { + List<ByteBuffer> domain = Gens.lists(gen).uniqueBestEffort().ofSizeBetween(5, 100).next(rs); + return r -> r.pick(domain); + }); + } + + private Gen<ByteBuffer> genFromType() + { + if (type == DecimalType.instance) + // 0.0 != 0.000 but compare(0.0, 0.000) == 0, this breaks the test + return Generators.toGen(Generators.bigDecimal(SourceDSL.arbitrary().constant(42), Generators.bigInt()) + .map(DecimalType.instance::decompose)); + + return Generators.toGen(AbstractTypeGenerators.getTypeSupport(type).bytesGen()); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/index/sai/cql/EmptyStringLifecycleTest.java b/test/unit/org/apache/cassandra/index/sai/cql/EmptyStringLifecycleTest.java new file mode 100644 index 0000000000..7ace636e4f --- /dev/null +++ b/test/unit/org/apache/cassandra/index/sai/cql/EmptyStringLifecycleTest.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.sai.cql; + +import org.junit.Test; + +import org.apache.cassandra.cql3.UntypedResultSet; +import org.apache.cassandra.index.sai.SAITester; + +public class EmptyStringLifecycleTest extends SAITester +{ + @Test + public void testBeforeAndAfterFlush() + { + createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)"); + disableCompaction(KEYSPACE); + createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v')); + + execute("INSERT INTO %s (k, v) VALUES (0, '')"); + execute("INSERT INTO %s (k) VALUES (1)"); + + UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''"); + assertRows(rows, row(0, "")); + + flush(); + rows = execute("SELECT * FROM %s WHERE v = ''"); + assertRows(rows, row(0, "")); + } + + @Test + public void testAfterInitialBuild() + { + createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)"); + disableCompaction(KEYSPACE); + + execute("INSERT INTO %s (k, v) VALUES (0, '')"); + execute("INSERT INTO %s (k) VALUES (1)"); + createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v')); + + UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''"); + assertRows(rows, row(0, "")); + } + + @Test + public void testAfterCompaction() + { + createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)"); + disableCompaction(KEYSPACE); + createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v')); + + execute("INSERT INTO %s (k, v) VALUES (0, '')"); + execute("INSERT INTO %s (k) VALUES (1)"); + flush(); + + execute("INSERT INTO %s (k, v) VALUES (1, '')"); + flush(); + + compact(); + + UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''"); + assertRows(rows, row(1, ""), row(0, "")); + } +} diff --git a/test/unit/org/apache/cassandra/utils/Generators.java b/test/unit/org/apache/cassandra/utils/Generators.java index f23aff7498..9675d5525e 100644 --- a/test/unit/org/apache/cassandra/utils/Generators.java +++ b/test/unit/org/apache/cassandra/utils/Generators.java @@ -43,6 +43,7 @@ import org.quicktheories.core.Gen; import org.quicktheories.core.RandomnessSource; import org.quicktheories.generators.SourceDSL; import org.quicktheories.impl.Constraint; +import org.quicktheories.impl.JavaRandom; import static org.apache.cassandra.config.CassandraRelevantProperties.TEST_BLOB_SHARED_SEED; @@ -533,4 +534,12 @@ public final class Generators .flatMap(start -> SourceDSL.integers().between(start, max) .map(end -> Range.closed(start, end))); } + + public static <T> accord.utils.Gen<T> toGen(org.quicktheories.core.Gen<T> qt) + { + return rs -> { + JavaRandom r = new JavaRandom(rs.asJdkRandom()); + return qt.generate(r); + }; + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org