dcapwell commented on code in PR #3185:
URL: https://github.com/apache/cassandra/pull/3185#discussion_r1532895217
##########
src/java/org/apache/cassandra/index/sai/disk/v1/SSTableIndexWriter.java:
##########
@@ -200,7 +200,8 @@ else if (shouldFlush(sstableRowId))
currentBuilder = newSegmentBuilder();
}
- if (term.remaining() == 0) return;
+ // Some types support empty byte buffers:
+ if (term.remaining() == 0 &&
!index.termType().indexType().allowsEmpty()) return;
Review Comment:
yay, a non-test code path using this new method! Glad I added it ^_^...
semantics are hard...
##########
test/unit/org/apache/cassandra/index/sai/cql/AllTypesSimpleEqTest.java:
##########
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.cql;
+
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import accord.utils.Gen;
+import accord.utils.Gens;
+import org.apache.cassandra.cql3.CQL3Type;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.db.marshal.AsciiType;
+import org.apache.cassandra.db.marshal.DecimalType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.index.sai.StorageAttachedIndex;
+import org.apache.cassandra.utils.AbstractTypeGenerators;
+import org.apache.cassandra.utils.Generators;
+import org.quicktheories.generators.SourceDSL;
+
+@RunWith(Parameterized.class)
+public class AllTypesSimpleEqTest extends AbstractSimpleEqTest
+{
+ private static final Map<AbstractType<?>, Long> LARGE_DOMAIN_FAILING_SEEDS
=
+ Map.of(AsciiType.instance, -5097559362029524309L, UTF8Type.instance,
-4379508235061872764L);
+ private static final Map<AbstractType<?>, Long> SHORT_DOMAIN_FAILING_SEEDS
=
+ Map.of(AsciiType.instance, -5097559362029524309L, UTF8Type.instance,
-4379508235061872764L);
+ private final AbstractType<?> type;
+
+ public AllTypesSimpleEqTest(AbstractType<?> type)
+ {
+ this.type = type;
+ }
+
+ @Parameterized.Parameters(name = "{0}")
+ public static Collection<Object[]> data()
+ {
+ return StorageAttachedIndex.SUPPORTED_TYPES.stream()
+ .map(CQL3Type::getType)
+ // TODO: Track down unicode
character edge cases...
+ .filter(t -> t !=
UTF8Type.instance)
Review Comment:
don't you need to filter ascii as well?
##########
src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java:
##########
@@ -242,7 +242,7 @@ private void validateIndexTarget(TableMetadata table,
IndexMetadata.Kind kind, I
throw ire(ONLY_PARTITION_KEY, column);
if (column.type.isFrozenCollection() && target.type != Type.FULL)
- throw ire(CREATE_ON_FROZEN_COLUMN, target.type, column, column);
+ throw ire(CREATE_ON_FROZEN_COLUMN, target.type, column,
column.name.toCQLString());
Review Comment:
not for this patch: honestly, when do we want toString to not be valid CQL?
this is a common issue...
##########
test/unit/accord/utils/Gens.java:
##########
@@ -372,7 +372,12 @@ public ListDSL(Gen<T> fn) {
@Override
public ListDSL<T> unique()
{
- return new ListDSL<>(new GenReset<>(fn));
+ return new ListDSL<>(new GenReset<>(fn, false));
+ }
+
+ public ListDSL<T> uniqueBestEffort()
Review Comment:
not for this patch: going to have to do the same in accord... before we
merge accord to trunk we are going to have to account for the drift between the
2 repos....
##########
test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTest.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.cql;
+
+import java.nio.ByteBuffer;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.TreeMap;
+import javax.annotation.Nullable;
+
+import accord.utils.Gen;
+import accord.utils.Property;
+import org.agrona.collections.IntArrayList;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.cql3.UntypedResultSet;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.index.sai.SAITester;
+import org.assertj.core.api.Assertions;
+
+import static accord.utils.Property.qt;
+
+public abstract class AbstractSimpleEqTest extends SAITester
+{
+ static
+ {
+ // The info table gets updated and force-flushed every time we
truncate, so disable that and avoid the overhead:
+ CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true);
+ }
+
+ protected void test(AbstractType<?> type, @Nullable Long seed, int
examples, Gen<Gen<ByteBuffer>> distribution)
+ {
+ createTable("CREATE TABLE %s (pk int PRIMARY KEY, value " +
type.asCQL3Type() + ')');
Review Comment:
this tests the index correctness, but it might be good to also change the
index column away from regular to static/cluster/partition columns based off
the randomness...
Rather than having 1 test for the type, we can have 1 test for each type of
column?
this feedback is optional, just thinking it could improve coverage
##########
src/java/org/apache/cassandra/index/sai/memory/MemtableIndex.java:
##########
@@ -79,7 +82,7 @@ public ByteBuffer getMaxTerm()
public long index(DecoratedKey key, Clustering<?> clustering, ByteBuffer
value)
{
- if (value == null || value.remaining() == 0)
+ if (value == null || (!type.allowsEmpty() && value.remaining() == 0))
Review Comment:
```suggestion
if (value == null || (value.remaining() == 0 && !type.allowsEmpty()))
```
nit: we only want to call `allowsEmpty` when its empty, else it adds a
virtual dispatch that is `false` in most cases... so slightly less overhead in
the common case (non-empty)
##########
test/unit/org/apache/cassandra/index/sai/cql/EmptyStringLifecycleTest.java:
##########
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.cql;
+
+import org.junit.Test;
+
+import org.apache.cassandra.cql3.UntypedResultSet;
+import org.apache.cassandra.index.sai.SAITester;
+
+public class EmptyStringLifecycleTest extends SAITester
+{
+ @Test
+ public void testBeforeAndAfterFlush()
+ {
+ createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
+ disableCompaction(KEYSPACE);
+ createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+ execute("INSERT INTO %s (k, v) VALUES (0, '')");
+ execute("INSERT INTO %s (k) VALUES (1)");
+
+ UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''");
+ assertRows(rows, row(0, ""));
+
+ flush();
+ rows = execute("SELECT * FROM %s WHERE v = ''");
+ assertRows(rows, row(0, ""));
+ }
+
+ @Test
+ public void testAfterInitialBuild()
+ {
+ createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
+ disableCompaction(KEYSPACE);
+
+ execute("INSERT INTO %s (k, v) VALUES (0, '')");
+ execute("INSERT INTO %s (k) VALUES (1)");
+ createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+ UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''");
+ assertRows(rows, row(0, ""));
+ }
+
+ @Test
+ public void testAfterCompaction()
+ {
+ createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
+ disableCompaction(KEYSPACE);
+ createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+ execute("INSERT INTO %s (k, v) VALUES (0, '')");
+ execute("INSERT INTO %s (k) VALUES (1)");
+ flush();
+
+ execute("INSERT INTO %s (k, v) VALUES (0, '')");
+ flush();
+
+ compact();
Review Comment:
this doesn't validate we actually compacted, so would be good to validate we
had 2 files now have 1... simple test bug can mean we don't actually compact
(you disable compaction, so we actually no op right now...)
##########
test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTest.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.cql;
+
+import java.nio.ByteBuffer;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.TreeMap;
+import javax.annotation.Nullable;
+
+import accord.utils.Gen;
+import accord.utils.Property;
+import org.agrona.collections.IntArrayList;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.cql3.UntypedResultSet;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.index.sai.SAITester;
+import org.assertj.core.api.Assertions;
+
+import static accord.utils.Property.qt;
+
+public abstract class AbstractSimpleEqTest extends SAITester
+{
+ static
+ {
+ // The info table gets updated and force-flushed every time we
truncate, so disable that and avoid the overhead:
+ CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true);
Review Comment:
```suggestion
CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true);
// ignore SAI timeouts as this is just validating the read/write
logic of the index, will help make the test more stable
CassandraRelevantProperties.SAI_TEST_DISABLE_TIMEOUT.setBoolean(true);
```
just in case we are slow, disabling sai timeouts will make this less
flakey...
##########
test/unit/org/apache/cassandra/index/sai/cql/AbstractSimpleEqTest.java:
##########
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.cql;
+
+import java.nio.ByteBuffer;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.TreeMap;
+import javax.annotation.Nullable;
+
+import accord.utils.Gen;
+import accord.utils.Property;
+import org.agrona.collections.IntArrayList;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.cql3.UntypedResultSet;
+import org.apache.cassandra.db.marshal.AbstractType;
+import org.apache.cassandra.index.sai.SAITester;
+import org.assertj.core.api.Assertions;
+
+import static accord.utils.Property.qt;
+
+public abstract class AbstractSimpleEqTest extends SAITester
+{
+ static
+ {
+ // The info table gets updated and force-flushed every time we
truncate, so disable that and avoid the overhead:
+ CassandraRelevantProperties.UNSAFE_SYSTEM.setBoolean(true);
+ }
+
+ protected void test(AbstractType<?> type, @Nullable Long seed, int
examples, Gen<Gen<ByteBuffer>> distribution)
+ {
+ createTable("CREATE TABLE %s (pk int PRIMARY KEY, value " +
type.asCQL3Type() + ')');
+ disableCompaction(KEYSPACE);
+ createIndex(String.format(CREATE_INDEX_TEMPLATE, "value"));
+
+ Property.ForBuilder builder = qt().withExamples(examples);
+ if (seed != null)
+ builder = builder.withSeed(seed);
+
+ builder.check(rs -> {
+ execute("TRUNCATE %s");
+
+ Gen<ByteBuffer> support = distribution.next(rs);
+ Map<ByteBuffer, IntArrayList> termIndex = new TreeMap<>();
+
+ for (int i = 0; i < 1000; i++)
+ {
+ ByteBuffer term = support.next(rs);
+ execute("INSERT INTO %s (pk, value) VALUES (?, ?)", i, term);
+ termIndex.computeIfAbsent(term, ignore -> new
IntArrayList()).addInt(i);
+ }
+
+ flush();
Review Comment:
to make sure we test memtable index as well, we should loop over the for
loop 2 times: first is memtable, second is disk...
```
for (int I = 0; I < 2; I++)
{
if (I == 1) flush();
...
}
```
##########
test/unit/org/apache/cassandra/index/sai/cql/EmptyStringLifecycleTest.java:
##########
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.cql;
+
+import org.junit.Test;
+
+import org.apache.cassandra.cql3.UntypedResultSet;
+import org.apache.cassandra.index.sai.SAITester;
+
+public class EmptyStringLifecycleTest extends SAITester
+{
+ @Test
+ public void testBeforeAndAfterFlush()
+ {
+ createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
+ disableCompaction(KEYSPACE);
+ createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+ execute("INSERT INTO %s (k, v) VALUES (0, '')");
+ execute("INSERT INTO %s (k) VALUES (1)");
+
+ UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''");
+ assertRows(rows, row(0, ""));
+
+ flush();
+ rows = execute("SELECT * FROM %s WHERE v = ''");
+ assertRows(rows, row(0, ""));
+ }
+
+ @Test
+ public void testAfterInitialBuild()
+ {
+ createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
+ disableCompaction(KEYSPACE);
+
+ execute("INSERT INTO %s (k, v) VALUES (0, '')");
+ execute("INSERT INTO %s (k) VALUES (1)");
+ createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+ UntypedResultSet rows = execute("SELECT * FROM %s WHERE v = ''");
+ assertRows(rows, row(0, ""));
+ }
+
+ @Test
+ public void testAfterCompaction()
+ {
+ createTable("CREATE TABLE %s (k int PRIMARY KEY, v text)");
+ disableCompaction(KEYSPACE);
+ createIndex(String.format(CREATE_INDEX_TEMPLATE, 'v'));
+
+ execute("INSERT INTO %s (k, v) VALUES (0, '')");
+ execute("INSERT INTO %s (k) VALUES (1)");
+ flush();
+
+ execute("INSERT INTO %s (k, v) VALUES (0, '')");
Review Comment:
I think it would be nice here to have this be `1, ''`, and have the test
validate it read from both values...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]