[cassandra] branch cassandra-3.0 updated: Use mean row count instead of mean column count for index selectivity calculation

bdeggleston Tue, 06 Aug 2019 10:26:55 -0700

This is an automated email from the ASF dual-hosted git repository.

bdeggleston pushed a commit to branch cassandra-3.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git



The following commit(s) were added to refs/heads/cassandra-3.0 by this push:
     new da8d41f  Use mean row count instead of mean column count for index 
selectivity calculation
da8d41f is described below

commit da8d41f497efedf57e335ec2664680da583a3aba
Author: Jordan West <jorda...@gmail.com>
AuthorDate: Mon Aug 5 09:44:14 2019 -0700

    Use mean row count instead of mean column count for index selectivity 
calculation
    
    patch by Jordan West; reviewed by Blake Eggleston for CASSANDRA-15259
---
 CHANGES.txt                                        |  1 +
 .../cassandra/index/internal/CassandraIndex.java   | 21 +++++++++++++-
 test/unit/org/apache/cassandra/SchemaLoader.java   | 33 ++++++++++++++++++++++
 .../apache/cassandra/db/SecondaryIndexTest.java    | 26 +++++++++++++++++
 4 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index f04b489..c2bed92 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0.19
+ * Use mean row count instead of mean column count for index selectivity 
calculation (CASSANDRA-15259)
  * Avoid updating unchanged gossip states (CASSANDRA-15097)
  * Prevent recreation of previously dropped columns with a different kind 
(CASSANDRA-14948)
  * Prevent client requests from blocking on executor task queue 
(CASSANDRA-15013)
diff --git a/src/java/org/apache/cassandra/index/internal/CassandraIndex.java 
b/src/java/org/apache/cassandra/index/internal/CassandraIndex.java
index 3211fe9..ad5dd4b 100644
--- a/src/java/org/apache/cassandra/index/internal/CassandraIndex.java
+++ b/src/java/org/apache/cassandra/index/internal/CassandraIndex.java
@@ -278,7 +278,26 @@ public abstract class CassandraIndex implements Index
 
     public long getEstimatedResultRows()
     {
-        return indexCfs.getMeanColumns();
+        long totalRows = 0;
+        long totalPartitions = 0;
+        for (SSTableReader sstable : 
indexCfs.getSSTables(SSTableSet.CANONICAL))
+        {
+            if (sstable.descriptor.version.storeRows())
+            {
+                totalPartitions += sstable.getEstimatedPartitionSize().count();
+                totalRows += sstable.getTotalRows();
+            } else
+            {
+                // for legacy sstables we don't have a total row count so we 
approximate it
+                // using estimated column count (which is the same logic as 
pre-3.0
+                // see CASSANDRA-15259
+                long colCount = sstable.getEstimatedColumnCount().count();
+                totalPartitions += colCount;
+                totalRows += sstable.getEstimatedColumnCount().mean() * 
colCount;
+            }
+        }
+
+        return totalPartitions > 0 ? (int) (totalRows / totalPartitions) : 0;
     }
 
     /**
diff --git a/test/unit/org/apache/cassandra/SchemaLoader.java 
b/test/unit/org/apache/cassandra/SchemaLoader.java
index 1686973..8d61f39 100644
--- a/test/unit/org/apache/cassandra/SchemaLoader.java
+++ b/test/unit/org/apache/cassandra/SchemaLoader.java
@@ -425,6 +425,39 @@ public class SchemaLoader
 
         return cfm.compression(getCompressionParameters());
     }
+
+    public static CFMetaData compositeMultipleIndexCFMD(String ksName, String 
cfName) throws ConfigurationException
+    {
+        CFMetaData cfm = CFMetaData.Builder.create(ksName, cfName)
+                                           .addPartitionKey("key", 
AsciiType.instance)
+                                           .addClusteringColumn("c1", 
AsciiType.instance)
+                                           .addRegularColumn("birthdate", 
LongType.instance)
+                                           .addRegularColumn("notbirthdate", 
LongType.instance)
+                                           .build();
+
+        cfm.indexes(
+            cfm.getIndexes()
+               .with(IndexMetadata.fromIndexTargets(cfm,
+                                                    Collections.singletonList(
+                                                    new IndexTarget(new 
ColumnIdentifier("birthdate", true),
+                                                                    
IndexTarget.Type.VALUES)),
+                                                    "birthdate_key_index",
+                                                    
IndexMetadata.Kind.COMPOSITES,
+                                                    Collections.EMPTY_MAP))
+               .with(IndexMetadata.fromIndexTargets(cfm,
+                                                    Collections.singletonList(
+                                                    new IndexTarget(new 
ColumnIdentifier("notbirthdate", true),
+                                                                    
IndexTarget.Type.VALUES)),
+                                                    "notbirthdate_key_index",
+                                                    
IndexMetadata.Kind.COMPOSITES,
+                                                    Collections.EMPTY_MAP))
+        );
+
+
+        return cfm.compression(getCompressionParameters());
+    }
+
+
     public static CFMetaData keysIndexCFMD(String ksName, String cfName, 
boolean withIndex) throws ConfigurationException
     {
         CFMetaData cfm = CFMetaData.Builder.createDense(ksName, cfName, false, 
false)
diff --git a/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java 
b/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java
index adcb68b..9fb0463 100644
--- a/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java
+++ b/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java
@@ -57,6 +57,7 @@ public class SecondaryIndexTest
 {
     public static final String KEYSPACE1 = "SecondaryIndexTest1";
     public static final String WITH_COMPOSITE_INDEX = "WithCompositeIndex";
+    public static final String WITH_MULTIPLE_COMPOSITE_INDEX = 
"WithMultipleCompositeIndex";
     public static final String WITH_KEYS_INDEX = "WithKeysIndex";
     public static final String COMPOSITE_INDEX_TO_BE_ADDED = 
"CompositeIndexToBeAdded";
 
@@ -68,6 +69,7 @@ public class SecondaryIndexTest
                                     KeyspaceParams.simple(1),
                                     SchemaLoader.compositeIndexCFMD(KEYSPACE1, 
WITH_COMPOSITE_INDEX, true).gcGraceSeconds(0),
                                     SchemaLoader.compositeIndexCFMD(KEYSPACE1, 
COMPOSITE_INDEX_TO_BE_ADDED, false).gcGraceSeconds(0),
+                                    
SchemaLoader.compositeMultipleIndexCFMD(KEYSPACE1, 
WITH_MULTIPLE_COMPOSITE_INDEX).gcGraceSeconds(0),
                                     SchemaLoader.keysIndexCFMD(KEYSPACE1, 
WITH_KEYS_INDEX, true).gcGraceSeconds(0));
     }
 
@@ -76,6 +78,7 @@ public class SecondaryIndexTest
     {
         
Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_COMPOSITE_INDEX).truncateBlocking();
         
Keyspace.open(KEYSPACE1).getColumnFamilyStore(COMPOSITE_INDEX_TO_BE_ADDED).truncateBlocking();
+        
Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_MULTIPLE_COMPOSITE_INDEX).truncateBlocking();
         
Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_KEYS_INDEX).truncateBlocking();
     }
 
@@ -491,6 +494,29 @@ public class SecondaryIndexTest
         assertIndexedCount(cfs, ByteBufferUtil.bytes("birthdate"), 1l, 10);
     }
 
+    @Test
+    public void testSelectivityWithMultipleIndexes()
+    {
+        ColumnFamilyStore cfs = 
Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_MULTIPLE_COMPOSITE_INDEX);
+
+        // creates rows such that birthday_index has 1 partition (key = 1L) 
with 4 rows -- mean row count = 4, and notbirthdate_index has 2 partitions with 
2 rows each -- mean row count = 2
+        new RowUpdateBuilder(cfs.metadata, 0, 
"k1").clustering("c").add("birthdate", 1L).add("notbirthdate", 
2L).build().applyUnsafe();
+        new RowUpdateBuilder(cfs.metadata, 0, 
"k2").clustering("c").add("birthdate", 1L).add("notbirthdate", 
2L).build().applyUnsafe();
+        new RowUpdateBuilder(cfs.metadata, 0, 
"k3").clustering("c").add("birthdate", 1L).add("notbirthdate", 
3L).build().applyUnsafe();
+        new RowUpdateBuilder(cfs.metadata, 0, 
"k4").clustering("c").add("birthdate", 1L).add("notbirthdate", 
3L).build().applyUnsafe();
+
+        cfs.forceBlockingFlush();
+        ReadCommand rc = Util.cmd(cfs)
+                             .fromKeyIncl("k1")
+                             .toKeyIncl("k3")
+                             .columns("birthdate")
+                             .filterOn("birthdate", Operator.EQ, 1L)
+                             .filterOn("notbirthdate", Operator.EQ, 0L)
+                             .build();
+
+        assertEquals("notbirthdate_key_index", rc.indexMetadata().name);
+    }
+
     private void assertIndexedNone(ColumnFamilyStore cfs, ByteBuffer col, 
Object val)
     {
         assertIndexedCount(cfs, col, val, 0);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

[cassandra] branch cassandra-3.0 updated: Use mean row count instead of mean column count for index selectivity calculation

Reply via email to