adelapena commented on code in PR #2540:
URL: https://github.com/apache/cassandra/pull/2540#discussion_r1286956201
##########
src/java/org/apache/cassandra/index/sai/disk/format/IndexComponent.java:
##########
@@ -68,19 +68,23 @@ public enum IndexComponent
TOKEN_VALUES("TokenValues"),
/**
- * An on-disk trie containing the primary keys used for looking up the
rowId from a partition key
+ * An on-disk block packed index containing the starting and ending rowIds
for each partition.
*/
- PRIMARY_KEY_TRIE("PrimaryKeyTrie"),
+ PARTITION_SIZES("PartitionSizes"),
/**
* Prefix-compressed blocks of primary keys used for rowId to partition
key lookups
*/
- PRIMARY_KEY_BLOCKS("PrimaryKeyBlocks"),
+ PARTITION_KEY_BLOCKS("PartitionKeyBlocks"),
Review Comment:
The JavaDoc of `PARTITION_KEY_BLOCKS` and `PARTITION_KEY_BLOCK_OFFSETS`
would need to be updated to talk about partition keys instead of primary keys.
Also, the new components for clustering keys would also need some JavaDoc.
##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+ static
+ {
+ DatabaseDescriptor.toolInitialization();
+ // Partitioner is not set in client mode.
+ if (DatabaseDescriptor.getPartitioner() == null)
+
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+ }
+
+ protected TableMetadata metadata;
+ protected IndexDescriptor indexDescriptor;
+
+ private PrimaryKeyMap primaryKeyMap;
+
+ private PrimaryKey primaryKey;
+
+ @Param({"3", "4", "5"})
+ private int partitionBlockShift;
+
+ @Param({"3", "4", "5"})
+ private int clusteringBlockShift;
+
+ @Param({"10", "100", "1000", "10000"})
+ private int partitionSize;
+
+ @Param({"true", "false"})
+ private boolean randomClustering;
+
+ @Setup(Level.Trial)
+ public void trialSetup() throws Exception
+ {
+ String keyspaceName = "ks";
+ String tableName = this.getClass().getSimpleName();
+ metadata = TableMetadata
+ .builder(keyspaceName, tableName)
+ .partitioner(Murmur3Partitioner.instance)
+ .addPartitionKeyColumn("pk1", LongType.instance)
+ .addPartitionKeyColumn("pk2", LongType.instance)
+ .addClusteringColumn("ck1", UTF8Type.instance)
+ .addClusteringColumn("ck2", UTF8Type.instance)
+ .build();
+
+ Descriptor descriptor = new Descriptor(new
File(Files.createTempDirectory("jmh").toFile()),
+ metadata.keyspace,
+ metadata.name,
+ Util.newUUIDGen().get());
+
+ indexDescriptor = IndexDescriptor.create(descriptor,
metadata.comparator);
+
+
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+ SSTableComponentsWriter writer = new
SSTableComponentsWriter(indexDescriptor);
+
+ PrimaryKey.Factory factory = new
PrimaryKey.Factory(metadata.comparator);
+
+ int rows = 1000000;
Review Comment:
This could be a constant. I would also write it as `1_000_000` for
readability.
##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+ static
+ {
+ DatabaseDescriptor.toolInitialization();
+ // Partitioner is not set in client mode.
+ if (DatabaseDescriptor.getPartitioner() == null)
+
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+ }
+
+ protected TableMetadata metadata;
+ protected IndexDescriptor indexDescriptor;
+
+ private PrimaryKeyMap primaryKeyMap;
+
+ private PrimaryKey primaryKey;
+
+ @Param({"3", "4", "5"})
+ private int partitionBlockShift;
+
+ @Param({"3", "4", "5"})
+ private int clusteringBlockShift;
+
+ @Param({"10", "100", "1000", "10000"})
+ private int partitionSize;
+
+ @Param({"true", "false"})
+ private boolean randomClustering;
+
+ @Setup(Level.Trial)
+ public void trialSetup() throws Exception
+ {
+ String keyspaceName = "ks";
+ String tableName = this.getClass().getSimpleName();
+ metadata = TableMetadata
+ .builder(keyspaceName, tableName)
+ .partitioner(Murmur3Partitioner.instance)
+ .addPartitionKeyColumn("pk1", LongType.instance)
+ .addPartitionKeyColumn("pk2", LongType.instance)
+ .addClusteringColumn("ck1", UTF8Type.instance)
+ .addClusteringColumn("ck2", UTF8Type.instance)
+ .build();
+
+ Descriptor descriptor = new Descriptor(new
File(Files.createTempDirectory("jmh").toFile()),
+ metadata.keyspace,
+ metadata.name,
+ Util.newUUIDGen().get());
+
+ indexDescriptor = IndexDescriptor.create(descriptor,
metadata.comparator);
+
+
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+ SSTableComponentsWriter writer = new
SSTableComponentsWriter(indexDescriptor);
+
+ PrimaryKey.Factory factory = new
PrimaryKey.Factory(metadata.comparator);
+
+ int rows = 1000000;
+
+ PrimaryKey[] primaryKeys = new PrimaryKey[rows];
+ int partition = 0;
+ int partitionCounter = 0;
+ for (int index = 0; index < rows; index++)
+ {
+ primaryKeys[index] = factory.create(makeKey(metadata, (long)
partition, (long) partition), makeClustering(metadata));
+ partitionCounter++;
+ if (partitionCounter == partitionSize)
+ {
+ partition++;
+ partitionCounter = 0;
+ }
+ }
+
+ Arrays.sort(primaryKeys);
+
+ DecoratedKey lastKey = null;
+ for (PrimaryKey primaryKey : primaryKeys)
+ {
+ if (lastKey == null ||
lastKey.compareTo(primaryKey.partitionKey()) < 0)
+ {
+ lastKey = primaryKey.partitionKey();
+ writer.startPartition(lastKey);
+ }
+ writer.nextRow(primaryKey);
+ }
+
+ writer.complete();
+
+ SSTableReader sstableReader = mock(SSTableReader.class);
+ when(sstableReader.metadata()).thenReturn(metadata);
+
+ PrimaryKeyMap.Factory mapFactory = new
WideRowAwarePrimaryKeyMap.WideRowAwarePrimaryKeyMapFactory(indexDescriptor,
sstableReader);
+
+ primaryKeyMap = mapFactory.newPerSSTablePrimaryKeyMap();
+
+ primaryKey = primaryKeys[500000];
+ }
+
+ @Benchmark
+ public long advanceToTerm()
+ {
+ return primaryKeyMap.rowIdFromPrimaryKey(primaryKey);
+ }
+
+ protected DecoratedKey makeKey(TableMetadata table, Object...partitionKeys)
+ {
+ ByteBuffer key;
+ if (TypeUtil.isComposite(table.partitionKeyType))
+ key =
((CompositeType)table.partitionKeyType).decompose(partitionKeys);
+ else
+ key = table.partitionKeyType.fromString((String)partitionKeys[0]);
+ return table.partitioner.decorateKey(key);
+ }
+
+ protected Clustering<?> makeClustering(TableMetadata table)
Review Comment:
Nit: can be `private`
##########
src/java/org/apache/cassandra/index/sai/StorageAttachedIndexGroup.java:
##########
@@ -219,10 +219,10 @@ public Set<Component> getComponents()
return getComponents(indexes);
}
- static Set<Component> getComponents(Collection<StorageAttachedIndex>
indices)
+ Set<Component> getComponents(Collection<StorageAttachedIndex> indices)
Review Comment:
Nit: can be `private`
##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+ static
+ {
+ DatabaseDescriptor.toolInitialization();
+ // Partitioner is not set in client mode.
+ if (DatabaseDescriptor.getPartitioner() == null)
+
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+ }
+
+ protected TableMetadata metadata;
+ protected IndexDescriptor indexDescriptor;
+
+ private PrimaryKeyMap primaryKeyMap;
+
+ private PrimaryKey primaryKey;
+
+ @Param({"3", "4", "5"})
+ private int partitionBlockShift;
Review Comment:
Nit: All the attributes annotated with `@Param` can be `public` to prevent
warnings
##########
src/java/org/apache/cassandra/index/sai/disk/v1/sortedterms/SortedTermsWriter.java:
##########
@@ -49,9 +44,9 @@
* <p>
* For documentation of the underlying on-disk data structures, see the
package documentation.
* <p>
- * The TERMS_DICT_ constants allow for quickly determining the id of the
current block based on a point id
- * or to check if we are exactly at the beginning of the block.
- * Terms data are organized in blocks of (2 ^ {@link #TERMS_DICT_BLOCK_SHIFT})
terms.
+ * The {@code cassandra.sai.sorted_terms_block_shift} property is used to
quickly determine the id of the current block
Review Comment:
Nit: This can be a reference to `{@link
CassandraRelevantProperties#SAI_SORTED_TERMS_BLOCK_SHIFT}`
##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+ static
+ {
+ DatabaseDescriptor.toolInitialization();
+ // Partitioner is not set in client mode.
+ if (DatabaseDescriptor.getPartitioner() == null)
+
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+ }
+
+ protected TableMetadata metadata;
+ protected IndexDescriptor indexDescriptor;
+
+ private PrimaryKeyMap primaryKeyMap;
+
+ private PrimaryKey primaryKey;
+
+ @Param({"3", "4", "5"})
+ private int partitionBlockShift;
+
+ @Param({"3", "4", "5"})
+ private int clusteringBlockShift;
+
+ @Param({"10", "100", "1000", "10000"})
+ private int partitionSize;
+
+ @Param({"true", "false"})
+ private boolean randomClustering;
+
+ @Setup(Level.Trial)
+ public void trialSetup() throws Exception
+ {
+ String keyspaceName = "ks";
+ String tableName = this.getClass().getSimpleName();
+ metadata = TableMetadata
+ .builder(keyspaceName, tableName)
+ .partitioner(Murmur3Partitioner.instance)
+ .addPartitionKeyColumn("pk1", LongType.instance)
+ .addPartitionKeyColumn("pk2", LongType.instance)
+ .addClusteringColumn("ck1", UTF8Type.instance)
+ .addClusteringColumn("ck2", UTF8Type.instance)
+ .build();
+
+ Descriptor descriptor = new Descriptor(new
File(Files.createTempDirectory("jmh").toFile()),
+ metadata.keyspace,
+ metadata.name,
+ Util.newUUIDGen().get());
+
+ indexDescriptor = IndexDescriptor.create(descriptor,
metadata.comparator);
+
+
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+ SSTableComponentsWriter writer = new
SSTableComponentsWriter(indexDescriptor);
+
+ PrimaryKey.Factory factory = new
PrimaryKey.Factory(metadata.comparator);
+
+ int rows = 1000000;
+
+ PrimaryKey[] primaryKeys = new PrimaryKey[rows];
+ int partition = 0;
+ int partitionCounter = 0;
Review Comment:
`partitionCounter` might look like we are counting partitions, instead of
rows. Perhaps this could be name `partitionRows`, or simply `rows`?
##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+ static
+ {
+ DatabaseDescriptor.toolInitialization();
+ // Partitioner is not set in client mode.
+ if (DatabaseDescriptor.getPartitioner() == null)
+
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+ }
+
+ protected TableMetadata metadata;
+ protected IndexDescriptor indexDescriptor;
+
+ private PrimaryKeyMap primaryKeyMap;
+
+ private PrimaryKey primaryKey;
+
+ @Param({"3", "4", "5"})
+ private int partitionBlockShift;
+
+ @Param({"3", "4", "5"})
+ private int clusteringBlockShift;
+
+ @Param({"10", "100", "1000", "10000"})
+ private int partitionSize;
+
+ @Param({"true", "false"})
+ private boolean randomClustering;
+
+ @Setup(Level.Trial)
+ public void trialSetup() throws Exception
+ {
+ String keyspaceName = "ks";
+ String tableName = this.getClass().getSimpleName();
+ metadata = TableMetadata
+ .builder(keyspaceName, tableName)
+ .partitioner(Murmur3Partitioner.instance)
+ .addPartitionKeyColumn("pk1", LongType.instance)
+ .addPartitionKeyColumn("pk2", LongType.instance)
+ .addClusteringColumn("ck1", UTF8Type.instance)
+ .addClusteringColumn("ck2", UTF8Type.instance)
+ .build();
+
+ Descriptor descriptor = new Descriptor(new
File(Files.createTempDirectory("jmh").toFile()),
+ metadata.keyspace,
+ metadata.name,
+ Util.newUUIDGen().get());
+
+ indexDescriptor = IndexDescriptor.create(descriptor,
metadata.comparator);
+
+
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+ SSTableComponentsWriter writer = new
SSTableComponentsWriter(indexDescriptor);
+
+ PrimaryKey.Factory factory = new
PrimaryKey.Factory(metadata.comparator);
+
+ int rows = 1000000;
+
+ PrimaryKey[] primaryKeys = new PrimaryKey[rows];
+ int partition = 0;
+ int partitionCounter = 0;
+ for (int index = 0; index < rows; index++)
+ {
+ primaryKeys[index] = factory.create(makeKey(metadata, (long)
partition, (long) partition), makeClustering(metadata));
+ partitionCounter++;
+ if (partitionCounter == partitionSize)
+ {
+ partition++;
+ partitionCounter = 0;
+ }
+ }
+
+ Arrays.sort(primaryKeys);
+
+ DecoratedKey lastKey = null;
+ for (PrimaryKey primaryKey : primaryKeys)
+ {
+ if (lastKey == null ||
lastKey.compareTo(primaryKey.partitionKey()) < 0)
+ {
+ lastKey = primaryKey.partitionKey();
+ writer.startPartition(lastKey);
+ }
+ writer.nextRow(primaryKey);
+ }
+
+ writer.complete();
+
+ SSTableReader sstableReader = mock(SSTableReader.class);
+ when(sstableReader.metadata()).thenReturn(metadata);
+
+ PrimaryKeyMap.Factory mapFactory = new
WideRowAwarePrimaryKeyMap.WideRowAwarePrimaryKeyMapFactory(indexDescriptor,
sstableReader);
+
+ primaryKeyMap = mapFactory.newPerSSTablePrimaryKeyMap();
+
+ primaryKey = primaryKeys[500000];
Review Comment:
Why `500000 `?
##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+ static
+ {
+ DatabaseDescriptor.toolInitialization();
+ // Partitioner is not set in client mode.
+ if (DatabaseDescriptor.getPartitioner() == null)
+
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+ }
+
+ protected TableMetadata metadata;
+ protected IndexDescriptor indexDescriptor;
+
+ private PrimaryKeyMap primaryKeyMap;
+
+ private PrimaryKey primaryKey;
+
+ @Param({"3", "4", "5"})
+ private int partitionBlockShift;
+
+ @Param({"3", "4", "5"})
+ private int clusteringBlockShift;
+
+ @Param({"10", "100", "1000", "10000"})
+ private int partitionSize;
+
+ @Param({"true", "false"})
+ private boolean randomClustering;
+
+ @Setup(Level.Trial)
+ public void trialSetup() throws Exception
+ {
+ String keyspaceName = "ks";
+ String tableName = this.getClass().getSimpleName();
+ metadata = TableMetadata
+ .builder(keyspaceName, tableName)
+ .partitioner(Murmur3Partitioner.instance)
+ .addPartitionKeyColumn("pk1", LongType.instance)
+ .addPartitionKeyColumn("pk2", LongType.instance)
+ .addClusteringColumn("ck1", UTF8Type.instance)
+ .addClusteringColumn("ck2", UTF8Type.instance)
+ .build();
+
+ Descriptor descriptor = new Descriptor(new
File(Files.createTempDirectory("jmh").toFile()),
+ metadata.keyspace,
+ metadata.name,
+ Util.newUUIDGen().get());
+
+ indexDescriptor = IndexDescriptor.create(descriptor,
metadata.comparator);
+
+
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+ SSTableComponentsWriter writer = new
SSTableComponentsWriter(indexDescriptor);
+
+ PrimaryKey.Factory factory = new
PrimaryKey.Factory(metadata.comparator);
+
+ int rows = 1000000;
+
+ PrimaryKey[] primaryKeys = new PrimaryKey[rows];
+ int partition = 0;
+ int partitionCounter = 0;
+ for (int index = 0; index < rows; index++)
+ {
+ primaryKeys[index] = factory.create(makeKey(metadata, (long)
partition, (long) partition), makeClustering(metadata));
+ partitionCounter++;
+ if (partitionCounter == partitionSize)
+ {
+ partition++;
+ partitionCounter = 0;
+ }
+ }
+
+ Arrays.sort(primaryKeys);
+
+ DecoratedKey lastKey = null;
+ for (PrimaryKey primaryKey : primaryKeys)
+ {
+ if (lastKey == null ||
lastKey.compareTo(primaryKey.partitionKey()) < 0)
+ {
+ lastKey = primaryKey.partitionKey();
+ writer.startPartition(lastKey);
+ }
+ writer.nextRow(primaryKey);
+ }
+
+ writer.complete();
+
+ SSTableReader sstableReader = mock(SSTableReader.class);
+ when(sstableReader.metadata()).thenReturn(metadata);
+
+ PrimaryKeyMap.Factory mapFactory = new
WideRowAwarePrimaryKeyMap.WideRowAwarePrimaryKeyMapFactory(indexDescriptor,
sstableReader);
+
+ primaryKeyMap = mapFactory.newPerSSTablePrimaryKeyMap();
+
+ primaryKey = primaryKeys[500000];
+ }
+
+ @Benchmark
+ public long advanceToTerm()
+ {
+ return primaryKeyMap.rowIdFromPrimaryKey(primaryKey);
+ }
+
+ protected DecoratedKey makeKey(TableMetadata table, Object...partitionKeys)
Review Comment:
Nit: can be `private static`
##########
src/java/org/apache/cassandra/index/sai/disk/v1/SkinnyRowAwarePrimaryKeyMap.java:
##########
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.disk.v1;
+
+import org.apache.cassandra.db.BufferDecoratedKey;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.dht.IPartitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexComponent;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.bitpack.BlockPackedReader;
+import
org.apache.cassandra.index.sai.disk.v1.bitpack.MonotonicBlockPackedReader;
+import org.apache.cassandra.index.sai.disk.v1.bitpack.NumericValuesMeta;
+import org.apache.cassandra.index.sai.disk.v1.sortedterms.SortedTermsMeta;
+import org.apache.cassandra.index.sai.disk.v1.sortedterms.SortedTermsReader;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.FileHandle;
+import org.apache.cassandra.io.util.FileUtils;
+import org.apache.cassandra.utils.Throwables;
+import org.apache.cassandra.utils.bytecomparable.ByteComparable;
+import org.apache.cassandra.utils.bytecomparable.ByteSource;
+import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse;
+
+import javax.annotation.concurrent.NotThreadSafe;
+import javax.annotation.concurrent.ThreadSafe;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+/**
+ * A row-aware {@link PrimaryKeyMap} for skinny tables (those with no
clustering columns).
+ * <p>
+ * This uses the following on-disk structures:
+ * <ul>
+ * <li>Block-packed structure for rowId to token lookups using {@link
BlockPackedReader}.
+ * Uses component {@link IndexComponent#TOKEN_VALUES} </li>
+ * <li>A sorted-terms structure for rowId to {@link PrimaryKey} and {@link
PrimaryKey} to rowId lookups using
+ * {@link SortedTermsReader}. Uses components {@link
IndexComponent#PARTITION_KEY_BLOCKS} and
+ * {@link IndexComponent#PARTITION_KEY_BLOCK_OFFSETS}</li>
+ * </ul>
+ *
+ * While the {@link SkinnyRowAwarePrimaryKeyMapFactory} is threadsafe,
individual instances of the {@link SkinnyRowAwarePrimaryKeyMap}
+ * are not.
+ */
+@NotThreadSafe
+public class SkinnyRowAwarePrimaryKeyMap implements PrimaryKeyMap
Review Comment:
I wonder if the row-aware concept should be mentioned in the name of the
class and the first line of the JavaDoc. We don't have any other type of
primary key maps, and row awareness seems meaningless for the skinny tables
where `SkinnyRowAwarePrimaryKeyMap` is meant to be used.
So maybe the classes could be named just `SkinnyPrimaryKeyMap` and
`WidePrimaryKeyMap`?
##########
src/java/org/apache/cassandra/index/sai/disk/v1/LongArray.java:
##########
@@ -37,6 +37,13 @@ public interface LongArray extends Closeable
*/
long length();
+ /**
+ * Using the given value returns the first index corresponding to the
value.
Review Comment:
```suggestion
* Using the given value returns the first index corresponding to the
value.
*
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]