[GitHub] [cassandra] adelapena commented on a diff in pull request #2540: Reduce size of per-SSTable index components for SAI

via GitHub Tue, 08 Aug 2023 04:27:02 -0700


adelapena commented on code in PR #2540:
URL: https://github.com/apache/cassandra/pull/2540#discussion_r1286956201



##########
src/java/org/apache/cassandra/index/sai/disk/format/IndexComponent.java:
##########
@@ -68,19 +68,23 @@ public enum IndexComponent
     TOKEN_VALUES("TokenValues"),
 
     /**
-     * An on-disk trie containing the primary keys used for looking up the 
rowId from a partition key
+     * An on-disk block packed index containing the starting and ending rowIds 
for each partition.
      */
-    PRIMARY_KEY_TRIE("PrimaryKeyTrie"),
+    PARTITION_SIZES("PartitionSizes"),
 
     /**
      * Prefix-compressed blocks of primary keys used for rowId to partition 
key lookups
      */
-    PRIMARY_KEY_BLOCKS("PrimaryKeyBlocks"),
+    PARTITION_KEY_BLOCKS("PartitionKeyBlocks"),

Review Comment:
   The JavaDoc of `PARTITION_KEY_BLOCKS` and `PARTITION_KEY_BLOCK_OFFSETS` 
would need to be updated to talk about partition keys instead of primary keys. 
Also, the new components for clustering keys would also need some JavaDoc.



##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+    static
+    {
+        DatabaseDescriptor.toolInitialization();
+        // Partitioner is not set in client mode.
+        if (DatabaseDescriptor.getPartitioner() == null)
+            
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+    }
+
+    protected TableMetadata metadata;
+    protected IndexDescriptor indexDescriptor;
+
+    private PrimaryKeyMap primaryKeyMap;
+
+    private PrimaryKey primaryKey;
+
+    @Param({"3", "4", "5"})
+    private int partitionBlockShift;
+
+    @Param({"3", "4", "5"})
+    private int clusteringBlockShift;
+
+    @Param({"10", "100", "1000", "10000"})
+    private int partitionSize;
+
+    @Param({"true", "false"})
+    private boolean randomClustering;
+
+    @Setup(Level.Trial)
+    public void trialSetup() throws Exception
+    {
+        String keyspaceName = "ks";
+        String tableName = this.getClass().getSimpleName();
+        metadata = TableMetadata
+                   .builder(keyspaceName, tableName)
+                   .partitioner(Murmur3Partitioner.instance)
+                   .addPartitionKeyColumn("pk1", LongType.instance)
+                   .addPartitionKeyColumn("pk2", LongType.instance)
+                   .addClusteringColumn("ck1", UTF8Type.instance)
+                   .addClusteringColumn("ck2", UTF8Type.instance)
+                   .build();
+
+        Descriptor descriptor = new Descriptor(new 
File(Files.createTempDirectory("jmh").toFile()),
+                                               metadata.keyspace,
+                                               metadata.name,
+                                               Util.newUUIDGen().get());
+
+        indexDescriptor = IndexDescriptor.create(descriptor, 
metadata.comparator);
+
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+        SSTableComponentsWriter writer = new 
SSTableComponentsWriter(indexDescriptor);
+
+        PrimaryKey.Factory factory = new 
PrimaryKey.Factory(metadata.comparator);
+
+        int rows = 1000000;

Review Comment:
   This could be a constant. I would also write it as `1_000_000` for 
readability.



##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+    static
+    {
+        DatabaseDescriptor.toolInitialization();
+        // Partitioner is not set in client mode.
+        if (DatabaseDescriptor.getPartitioner() == null)
+            
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+    }
+
+    protected TableMetadata metadata;
+    protected IndexDescriptor indexDescriptor;
+
+    private PrimaryKeyMap primaryKeyMap;
+
+    private PrimaryKey primaryKey;
+
+    @Param({"3", "4", "5"})
+    private int partitionBlockShift;
+
+    @Param({"3", "4", "5"})
+    private int clusteringBlockShift;
+
+    @Param({"10", "100", "1000", "10000"})
+    private int partitionSize;
+
+    @Param({"true", "false"})
+    private boolean randomClustering;
+
+    @Setup(Level.Trial)
+    public void trialSetup() throws Exception
+    {
+        String keyspaceName = "ks";
+        String tableName = this.getClass().getSimpleName();
+        metadata = TableMetadata
+                   .builder(keyspaceName, tableName)
+                   .partitioner(Murmur3Partitioner.instance)
+                   .addPartitionKeyColumn("pk1", LongType.instance)
+                   .addPartitionKeyColumn("pk2", LongType.instance)
+                   .addClusteringColumn("ck1", UTF8Type.instance)
+                   .addClusteringColumn("ck2", UTF8Type.instance)
+                   .build();
+
+        Descriptor descriptor = new Descriptor(new 
File(Files.createTempDirectory("jmh").toFile()),
+                                               metadata.keyspace,
+                                               metadata.name,
+                                               Util.newUUIDGen().get());
+
+        indexDescriptor = IndexDescriptor.create(descriptor, 
metadata.comparator);
+
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+        SSTableComponentsWriter writer = new 
SSTableComponentsWriter(indexDescriptor);
+
+        PrimaryKey.Factory factory = new 
PrimaryKey.Factory(metadata.comparator);
+
+        int rows = 1000000;
+
+        PrimaryKey[] primaryKeys = new PrimaryKey[rows];
+        int partition = 0;
+        int partitionCounter = 0;
+        for (int index = 0; index < rows; index++)
+        {
+            primaryKeys[index] = factory.create(makeKey(metadata, (long) 
partition, (long) partition), makeClustering(metadata));
+            partitionCounter++;
+            if (partitionCounter == partitionSize)
+            {
+                partition++;
+                partitionCounter = 0;
+            }
+        }
+
+        Arrays.sort(primaryKeys);
+
+        DecoratedKey lastKey = null;
+        for (PrimaryKey primaryKey : primaryKeys)
+        {
+            if (lastKey == null || 
lastKey.compareTo(primaryKey.partitionKey()) < 0)
+            {
+                lastKey = primaryKey.partitionKey();
+                writer.startPartition(lastKey);
+            }
+            writer.nextRow(primaryKey);
+        }
+
+        writer.complete();
+
+        SSTableReader sstableReader = mock(SSTableReader.class);
+        when(sstableReader.metadata()).thenReturn(metadata);
+
+        PrimaryKeyMap.Factory mapFactory = new 
WideRowAwarePrimaryKeyMap.WideRowAwarePrimaryKeyMapFactory(indexDescriptor, 
sstableReader);
+
+        primaryKeyMap = mapFactory.newPerSSTablePrimaryKeyMap();
+
+        primaryKey = primaryKeys[500000];
+    }
+
+    @Benchmark
+    public long advanceToTerm()
+    {
+        return primaryKeyMap.rowIdFromPrimaryKey(primaryKey);
+    }
+
+    protected DecoratedKey makeKey(TableMetadata table, Object...partitionKeys)
+    {
+        ByteBuffer key;
+        if (TypeUtil.isComposite(table.partitionKeyType))
+            key = 
((CompositeType)table.partitionKeyType).decompose(partitionKeys);
+        else
+            key = table.partitionKeyType.fromString((String)partitionKeys[0]);
+        return table.partitioner.decorateKey(key);
+    }
+
+    protected Clustering<?> makeClustering(TableMetadata table)

Review Comment:
   Nit: can be `private`



##########
src/java/org/apache/cassandra/index/sai/StorageAttachedIndexGroup.java:
##########
@@ -219,10 +219,10 @@ public Set<Component> getComponents()
         return getComponents(indexes);
     }
 
-    static Set<Component> getComponents(Collection<StorageAttachedIndex> 
indices)
+    Set<Component> getComponents(Collection<StorageAttachedIndex> indices)

Review Comment:
   Nit: can be `private`



##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+    static
+    {
+        DatabaseDescriptor.toolInitialization();
+        // Partitioner is not set in client mode.
+        if (DatabaseDescriptor.getPartitioner() == null)
+            
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+    }
+
+    protected TableMetadata metadata;
+    protected IndexDescriptor indexDescriptor;
+
+    private PrimaryKeyMap primaryKeyMap;
+
+    private PrimaryKey primaryKey;
+
+    @Param({"3", "4", "5"})
+    private int partitionBlockShift;

Review Comment:
   Nit: All the attributes annotated with `@Param` can be `public` to prevent 
warnings



##########
src/java/org/apache/cassandra/index/sai/disk/v1/sortedterms/SortedTermsWriter.java:
##########
@@ -49,9 +44,9 @@
  * <p>
  * For documentation of the underlying on-disk data structures, see the 
package documentation.
  * <p>
- * The TERMS_DICT_ constants allow for quickly determining the id of the 
current block based on a point id
- * or to check if we are exactly at the beginning of the block.
- * Terms data are organized in blocks of (2 ^ {@link #TERMS_DICT_BLOCK_SHIFT}) 
terms.
+ * The {@code cassandra.sai.sorted_terms_block_shift} property is used to 
quickly determine the id of the current block

Review Comment:
   Nit: This can be a reference to `{@link 
CassandraRelevantProperties#SAI_SORTED_TERMS_BLOCK_SHIFT}`



##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+    static
+    {
+        DatabaseDescriptor.toolInitialization();
+        // Partitioner is not set in client mode.
+        if (DatabaseDescriptor.getPartitioner() == null)
+            
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+    }
+
+    protected TableMetadata metadata;
+    protected IndexDescriptor indexDescriptor;
+
+    private PrimaryKeyMap primaryKeyMap;
+
+    private PrimaryKey primaryKey;
+
+    @Param({"3", "4", "5"})
+    private int partitionBlockShift;
+
+    @Param({"3", "4", "5"})
+    private int clusteringBlockShift;
+
+    @Param({"10", "100", "1000", "10000"})
+    private int partitionSize;
+
+    @Param({"true", "false"})
+    private boolean randomClustering;
+
+    @Setup(Level.Trial)
+    public void trialSetup() throws Exception
+    {
+        String keyspaceName = "ks";
+        String tableName = this.getClass().getSimpleName();
+        metadata = TableMetadata
+                   .builder(keyspaceName, tableName)
+                   .partitioner(Murmur3Partitioner.instance)
+                   .addPartitionKeyColumn("pk1", LongType.instance)
+                   .addPartitionKeyColumn("pk2", LongType.instance)
+                   .addClusteringColumn("ck1", UTF8Type.instance)
+                   .addClusteringColumn("ck2", UTF8Type.instance)
+                   .build();
+
+        Descriptor descriptor = new Descriptor(new 
File(Files.createTempDirectory("jmh").toFile()),
+                                               metadata.keyspace,
+                                               metadata.name,
+                                               Util.newUUIDGen().get());
+
+        indexDescriptor = IndexDescriptor.create(descriptor, 
metadata.comparator);
+
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+        SSTableComponentsWriter writer = new 
SSTableComponentsWriter(indexDescriptor);
+
+        PrimaryKey.Factory factory = new 
PrimaryKey.Factory(metadata.comparator);
+
+        int rows = 1000000;
+
+        PrimaryKey[] primaryKeys = new PrimaryKey[rows];
+        int partition = 0;
+        int partitionCounter = 0;

Review Comment:
   `partitionCounter` might look like we are counting partitions, instead of 
rows. Perhaps this could be name `partitionRows`, or simply `rows`?



##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+    static
+    {
+        DatabaseDescriptor.toolInitialization();
+        // Partitioner is not set in client mode.
+        if (DatabaseDescriptor.getPartitioner() == null)
+            
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+    }
+
+    protected TableMetadata metadata;
+    protected IndexDescriptor indexDescriptor;
+
+    private PrimaryKeyMap primaryKeyMap;
+
+    private PrimaryKey primaryKey;
+
+    @Param({"3", "4", "5"})
+    private int partitionBlockShift;
+
+    @Param({"3", "4", "5"})
+    private int clusteringBlockShift;
+
+    @Param({"10", "100", "1000", "10000"})
+    private int partitionSize;
+
+    @Param({"true", "false"})
+    private boolean randomClustering;
+
+    @Setup(Level.Trial)
+    public void trialSetup() throws Exception
+    {
+        String keyspaceName = "ks";
+        String tableName = this.getClass().getSimpleName();
+        metadata = TableMetadata
+                   .builder(keyspaceName, tableName)
+                   .partitioner(Murmur3Partitioner.instance)
+                   .addPartitionKeyColumn("pk1", LongType.instance)
+                   .addPartitionKeyColumn("pk2", LongType.instance)
+                   .addClusteringColumn("ck1", UTF8Type.instance)
+                   .addClusteringColumn("ck2", UTF8Type.instance)
+                   .build();
+
+        Descriptor descriptor = new Descriptor(new 
File(Files.createTempDirectory("jmh").toFile()),
+                                               metadata.keyspace,
+                                               metadata.name,
+                                               Util.newUUIDGen().get());
+
+        indexDescriptor = IndexDescriptor.create(descriptor, 
metadata.comparator);
+
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+        SSTableComponentsWriter writer = new 
SSTableComponentsWriter(indexDescriptor);
+
+        PrimaryKey.Factory factory = new 
PrimaryKey.Factory(metadata.comparator);
+
+        int rows = 1000000;
+
+        PrimaryKey[] primaryKeys = new PrimaryKey[rows];
+        int partition = 0;
+        int partitionCounter = 0;
+        for (int index = 0; index < rows; index++)
+        {
+            primaryKeys[index] = factory.create(makeKey(metadata, (long) 
partition, (long) partition), makeClustering(metadata));
+            partitionCounter++;
+            if (partitionCounter == partitionSize)
+            {
+                partition++;
+                partitionCounter = 0;
+            }
+        }
+
+        Arrays.sort(primaryKeys);
+
+        DecoratedKey lastKey = null;
+        for (PrimaryKey primaryKey : primaryKeys)
+        {
+            if (lastKey == null || 
lastKey.compareTo(primaryKey.partitionKey()) < 0)
+            {
+                lastKey = primaryKey.partitionKey();
+                writer.startPartition(lastKey);
+            }
+            writer.nextRow(primaryKey);
+        }
+
+        writer.complete();
+
+        SSTableReader sstableReader = mock(SSTableReader.class);
+        when(sstableReader.metadata()).thenReturn(metadata);
+
+        PrimaryKeyMap.Factory mapFactory = new 
WideRowAwarePrimaryKeyMap.WideRowAwarePrimaryKeyMapFactory(indexDescriptor, 
sstableReader);
+
+        primaryKeyMap = mapFactory.newPerSSTablePrimaryKeyMap();
+
+        primaryKey = primaryKeys[500000];

Review Comment:
   Why `500000 `?



##########
test/microbench/org/apache/cassandra/test/microbench/sai/SortedTermsBench.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.test.microbench.sai;
+
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.cassandra.Util;
+import org.apache.cassandra.config.CassandraRelevantProperties;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.db.marshal.CompositeType;
+import org.apache.cassandra.db.marshal.LongType;
+import org.apache.cassandra.db.marshal.UTF8Type;
+import org.apache.cassandra.dht.Murmur3Partitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.SSTableComponentsWriter;
+import org.apache.cassandra.index.sai.disk.v1.WideRowAwarePrimaryKeyMap;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.index.sai.utils.TypeUtil;
+import org.apache.cassandra.io.sstable.Descriptor;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.File;
+import org.apache.cassandra.schema.TableMetadata;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+import static org.apache.cassandra.index.sai.SAITester.getRandom;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+@BenchmarkMode({Mode.Throughput})
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 10)
+@Fork(value = 1, jvmArgsAppend = "-Xmx512M")
+@Threads(1)
+@State(Scope.Benchmark)
+public class SortedTermsBench
+{
+    static
+    {
+        DatabaseDescriptor.toolInitialization();
+        // Partitioner is not set in client mode.
+        if (DatabaseDescriptor.getPartitioner() == null)
+            
DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+    }
+
+    protected TableMetadata metadata;
+    protected IndexDescriptor indexDescriptor;
+
+    private PrimaryKeyMap primaryKeyMap;
+
+    private PrimaryKey primaryKey;
+
+    @Param({"3", "4", "5"})
+    private int partitionBlockShift;
+
+    @Param({"3", "4", "5"})
+    private int clusteringBlockShift;
+
+    @Param({"10", "100", "1000", "10000"})
+    private int partitionSize;
+
+    @Param({"true", "false"})
+    private boolean randomClustering;
+
+    @Setup(Level.Trial)
+    public void trialSetup() throws Exception
+    {
+        String keyspaceName = "ks";
+        String tableName = this.getClass().getSimpleName();
+        metadata = TableMetadata
+                   .builder(keyspaceName, tableName)
+                   .partitioner(Murmur3Partitioner.instance)
+                   .addPartitionKeyColumn("pk1", LongType.instance)
+                   .addPartitionKeyColumn("pk2", LongType.instance)
+                   .addClusteringColumn("ck1", UTF8Type.instance)
+                   .addClusteringColumn("ck2", UTF8Type.instance)
+                   .build();
+
+        Descriptor descriptor = new Descriptor(new 
File(Files.createTempDirectory("jmh").toFile()),
+                                               metadata.keyspace,
+                                               metadata.name,
+                                               Util.newUUIDGen().get());
+
+        indexDescriptor = IndexDescriptor.create(descriptor, 
metadata.comparator);
+
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_PARTITION_BLOCK_SHIFT.setInt(partitionBlockShift);
+        
CassandraRelevantProperties.SAI_SORTED_TERMS_CLUSTERING_BLOCK_SHIFT.setInt(clusteringBlockShift);
+        SSTableComponentsWriter writer = new 
SSTableComponentsWriter(indexDescriptor);
+
+        PrimaryKey.Factory factory = new 
PrimaryKey.Factory(metadata.comparator);
+
+        int rows = 1000000;
+
+        PrimaryKey[] primaryKeys = new PrimaryKey[rows];
+        int partition = 0;
+        int partitionCounter = 0;
+        for (int index = 0; index < rows; index++)
+        {
+            primaryKeys[index] = factory.create(makeKey(metadata, (long) 
partition, (long) partition), makeClustering(metadata));
+            partitionCounter++;
+            if (partitionCounter == partitionSize)
+            {
+                partition++;
+                partitionCounter = 0;
+            }
+        }
+
+        Arrays.sort(primaryKeys);
+
+        DecoratedKey lastKey = null;
+        for (PrimaryKey primaryKey : primaryKeys)
+        {
+            if (lastKey == null || 
lastKey.compareTo(primaryKey.partitionKey()) < 0)
+            {
+                lastKey = primaryKey.partitionKey();
+                writer.startPartition(lastKey);
+            }
+            writer.nextRow(primaryKey);
+        }
+
+        writer.complete();
+
+        SSTableReader sstableReader = mock(SSTableReader.class);
+        when(sstableReader.metadata()).thenReturn(metadata);
+
+        PrimaryKeyMap.Factory mapFactory = new 
WideRowAwarePrimaryKeyMap.WideRowAwarePrimaryKeyMapFactory(indexDescriptor, 
sstableReader);
+
+        primaryKeyMap = mapFactory.newPerSSTablePrimaryKeyMap();
+
+        primaryKey = primaryKeys[500000];
+    }
+
+    @Benchmark
+    public long advanceToTerm()
+    {
+        return primaryKeyMap.rowIdFromPrimaryKey(primaryKey);
+    }
+
+    protected DecoratedKey makeKey(TableMetadata table, Object...partitionKeys)

Review Comment:
   Nit: can be `private static`



##########
src/java/org/apache/cassandra/index/sai/disk/v1/SkinnyRowAwarePrimaryKeyMap.java:
##########
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sai.disk.v1;
+
+import org.apache.cassandra.db.BufferDecoratedKey;
+import org.apache.cassandra.db.Clustering;
+import org.apache.cassandra.db.DecoratedKey;
+import org.apache.cassandra.dht.IPartitioner;
+import org.apache.cassandra.index.sai.disk.PrimaryKeyMap;
+import org.apache.cassandra.index.sai.disk.format.IndexComponent;
+import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
+import org.apache.cassandra.index.sai.disk.v1.bitpack.BlockPackedReader;
+import 
org.apache.cassandra.index.sai.disk.v1.bitpack.MonotonicBlockPackedReader;
+import org.apache.cassandra.index.sai.disk.v1.bitpack.NumericValuesMeta;
+import org.apache.cassandra.index.sai.disk.v1.sortedterms.SortedTermsMeta;
+import org.apache.cassandra.index.sai.disk.v1.sortedterms.SortedTermsReader;
+import org.apache.cassandra.index.sai.utils.PrimaryKey;
+import org.apache.cassandra.io.sstable.format.SSTableReader;
+import org.apache.cassandra.io.util.FileHandle;
+import org.apache.cassandra.io.util.FileUtils;
+import org.apache.cassandra.utils.Throwables;
+import org.apache.cassandra.utils.bytecomparable.ByteComparable;
+import org.apache.cassandra.utils.bytecomparable.ByteSource;
+import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse;
+
+import javax.annotation.concurrent.NotThreadSafe;
+import javax.annotation.concurrent.ThreadSafe;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+/**
+ * A row-aware {@link PrimaryKeyMap} for skinny tables (those with no 
clustering columns).
+ * <p>
+ * This uses the following on-disk structures:
+ * <ul>
+ *     <li>Block-packed structure for rowId to token lookups using {@link 
BlockPackedReader}.
+ *     Uses component {@link IndexComponent#TOKEN_VALUES} </li>
+ *     <li>A sorted-terms structure for rowId to {@link PrimaryKey} and {@link 
PrimaryKey} to rowId lookups using
+ *     {@link SortedTermsReader}. Uses components {@link 
IndexComponent#PARTITION_KEY_BLOCKS} and
+ *     {@link IndexComponent#PARTITION_KEY_BLOCK_OFFSETS}</li>
+ * </ul>
+ *
+ * While the {@link SkinnyRowAwarePrimaryKeyMapFactory} is threadsafe, 
individual instances of the {@link SkinnyRowAwarePrimaryKeyMap}
+ * are not.
+ */
+@NotThreadSafe
+public class SkinnyRowAwarePrimaryKeyMap implements PrimaryKeyMap

Review Comment:
   I wonder if the row-aware concept should be mentioned in the name of the 
class and the first line of the JavaDoc. We don't have any other type of 
primary key maps, and row awareness seems meaningless for the skinny tables 
where `SkinnyRowAwarePrimaryKeyMap` is meant to be used.
   
   So maybe the classes could be named just `SkinnyPrimaryKeyMap` and 
`WidePrimaryKeyMap`?



##########
src/java/org/apache/cassandra/index/sai/disk/v1/LongArray.java:
##########
@@ -37,6 +37,13 @@ public interface LongArray extends Closeable
      */
     long length();
 
+    /**
+     * Using the given value returns the first index corresponding to the 
value.

Review Comment:
   ```suggestion
        * Using the given value returns the first index corresponding to the 
value.
        *
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [cassandra] adelapena commented on a diff in pull request #2540: Reduce size of per-SSTable index components for SAI

Reply via email to