This is an automated email from the ASF dual-hosted git repository.
bdeggleston pushed a commit to branch cassandra-3.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-3.0 by this push:
new da8d41f Use mean row count instead of mean column count for index
selectivity calculation
da8d41f is described below
commit da8d41f497efedf57e335ec2664680da583a3aba
Author: Jordan West <[email protected]>
AuthorDate: Mon Aug 5 09:44:14 2019 -0700
Use mean row count instead of mean column count for index selectivity
calculation
patch by Jordan West; reviewed by Blake Eggleston for CASSANDRA-15259
---
CHANGES.txt | 1 +
.../cassandra/index/internal/CassandraIndex.java | 21 +++++++++++++-
test/unit/org/apache/cassandra/SchemaLoader.java | 33 ++++++++++++++++++++++
.../apache/cassandra/db/SecondaryIndexTest.java | 26 +++++++++++++++++
4 files changed, 80 insertions(+), 1 deletion(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index f04b489..c2bed92 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
3.0.19
+ * Use mean row count instead of mean column count for index selectivity
calculation (CASSANDRA-15259)
* Avoid updating unchanged gossip states (CASSANDRA-15097)
* Prevent recreation of previously dropped columns with a different kind
(CASSANDRA-14948)
* Prevent client requests from blocking on executor task queue
(CASSANDRA-15013)
diff --git a/src/java/org/apache/cassandra/index/internal/CassandraIndex.java
b/src/java/org/apache/cassandra/index/internal/CassandraIndex.java
index 3211fe9..ad5dd4b 100644
--- a/src/java/org/apache/cassandra/index/internal/CassandraIndex.java
+++ b/src/java/org/apache/cassandra/index/internal/CassandraIndex.java
@@ -278,7 +278,26 @@ public abstract class CassandraIndex implements Index
public long getEstimatedResultRows()
{
- return indexCfs.getMeanColumns();
+ long totalRows = 0;
+ long totalPartitions = 0;
+ for (SSTableReader sstable :
indexCfs.getSSTables(SSTableSet.CANONICAL))
+ {
+ if (sstable.descriptor.version.storeRows())
+ {
+ totalPartitions += sstable.getEstimatedPartitionSize().count();
+ totalRows += sstable.getTotalRows();
+ } else
+ {
+ // for legacy sstables we don't have a total row count so we
approximate it
+ // using estimated column count (which is the same logic as
pre-3.0
+ // see CASSANDRA-15259
+ long colCount = sstable.getEstimatedColumnCount().count();
+ totalPartitions += colCount;
+ totalRows += sstable.getEstimatedColumnCount().mean() *
colCount;
+ }
+ }
+
+ return totalPartitions > 0 ? (int) (totalRows / totalPartitions) : 0;
}
/**
diff --git a/test/unit/org/apache/cassandra/SchemaLoader.java
b/test/unit/org/apache/cassandra/SchemaLoader.java
index 1686973..8d61f39 100644
--- a/test/unit/org/apache/cassandra/SchemaLoader.java
+++ b/test/unit/org/apache/cassandra/SchemaLoader.java
@@ -425,6 +425,39 @@ public class SchemaLoader
return cfm.compression(getCompressionParameters());
}
+
+ public static CFMetaData compositeMultipleIndexCFMD(String ksName, String
cfName) throws ConfigurationException
+ {
+ CFMetaData cfm = CFMetaData.Builder.create(ksName, cfName)
+ .addPartitionKey("key",
AsciiType.instance)
+ .addClusteringColumn("c1",
AsciiType.instance)
+ .addRegularColumn("birthdate",
LongType.instance)
+ .addRegularColumn("notbirthdate",
LongType.instance)
+ .build();
+
+ cfm.indexes(
+ cfm.getIndexes()
+ .with(IndexMetadata.fromIndexTargets(cfm,
+ Collections.singletonList(
+ new IndexTarget(new
ColumnIdentifier("birthdate", true),
+
IndexTarget.Type.VALUES)),
+ "birthdate_key_index",
+
IndexMetadata.Kind.COMPOSITES,
+ Collections.EMPTY_MAP))
+ .with(IndexMetadata.fromIndexTargets(cfm,
+ Collections.singletonList(
+ new IndexTarget(new
ColumnIdentifier("notbirthdate", true),
+
IndexTarget.Type.VALUES)),
+ "notbirthdate_key_index",
+
IndexMetadata.Kind.COMPOSITES,
+ Collections.EMPTY_MAP))
+ );
+
+
+ return cfm.compression(getCompressionParameters());
+ }
+
+
public static CFMetaData keysIndexCFMD(String ksName, String cfName,
boolean withIndex) throws ConfigurationException
{
CFMetaData cfm = CFMetaData.Builder.createDense(ksName, cfName, false,
false)
diff --git a/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java
b/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java
index adcb68b..9fb0463 100644
--- a/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java
+++ b/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java
@@ -57,6 +57,7 @@ public class SecondaryIndexTest
{
public static final String KEYSPACE1 = "SecondaryIndexTest1";
public static final String WITH_COMPOSITE_INDEX = "WithCompositeIndex";
+ public static final String WITH_MULTIPLE_COMPOSITE_INDEX =
"WithMultipleCompositeIndex";
public static final String WITH_KEYS_INDEX = "WithKeysIndex";
public static final String COMPOSITE_INDEX_TO_BE_ADDED =
"CompositeIndexToBeAdded";
@@ -68,6 +69,7 @@ public class SecondaryIndexTest
KeyspaceParams.simple(1),
SchemaLoader.compositeIndexCFMD(KEYSPACE1,
WITH_COMPOSITE_INDEX, true).gcGraceSeconds(0),
SchemaLoader.compositeIndexCFMD(KEYSPACE1,
COMPOSITE_INDEX_TO_BE_ADDED, false).gcGraceSeconds(0),
+
SchemaLoader.compositeMultipleIndexCFMD(KEYSPACE1,
WITH_MULTIPLE_COMPOSITE_INDEX).gcGraceSeconds(0),
SchemaLoader.keysIndexCFMD(KEYSPACE1,
WITH_KEYS_INDEX, true).gcGraceSeconds(0));
}
@@ -76,6 +78,7 @@ public class SecondaryIndexTest
{
Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_COMPOSITE_INDEX).truncateBlocking();
Keyspace.open(KEYSPACE1).getColumnFamilyStore(COMPOSITE_INDEX_TO_BE_ADDED).truncateBlocking();
+
Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_MULTIPLE_COMPOSITE_INDEX).truncateBlocking();
Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_KEYS_INDEX).truncateBlocking();
}
@@ -491,6 +494,29 @@ public class SecondaryIndexTest
assertIndexedCount(cfs, ByteBufferUtil.bytes("birthdate"), 1l, 10);
}
+ @Test
+ public void testSelectivityWithMultipleIndexes()
+ {
+ ColumnFamilyStore cfs =
Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_MULTIPLE_COMPOSITE_INDEX);
+
+ // creates rows such that birthday_index has 1 partition (key = 1L)
with 4 rows -- mean row count = 4, and notbirthdate_index has 2 partitions with
2 rows each -- mean row count = 2
+ new RowUpdateBuilder(cfs.metadata, 0,
"k1").clustering("c").add("birthdate", 1L).add("notbirthdate",
2L).build().applyUnsafe();
+ new RowUpdateBuilder(cfs.metadata, 0,
"k2").clustering("c").add("birthdate", 1L).add("notbirthdate",
2L).build().applyUnsafe();
+ new RowUpdateBuilder(cfs.metadata, 0,
"k3").clustering("c").add("birthdate", 1L).add("notbirthdate",
3L).build().applyUnsafe();
+ new RowUpdateBuilder(cfs.metadata, 0,
"k4").clustering("c").add("birthdate", 1L).add("notbirthdate",
3L).build().applyUnsafe();
+
+ cfs.forceBlockingFlush();
+ ReadCommand rc = Util.cmd(cfs)
+ .fromKeyIncl("k1")
+ .toKeyIncl("k3")
+ .columns("birthdate")
+ .filterOn("birthdate", Operator.EQ, 1L)
+ .filterOn("notbirthdate", Operator.EQ, 0L)
+ .build();
+
+ assertEquals("notbirthdate_key_index", rc.indexMetadata().name);
+ }
+
private void assertIndexedNone(ColumnFamilyStore cfs, ByteBuffer col,
Object val)
{
assertIndexedCount(cfs, col, val, 0);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]