This is an automated email from the ASF dual-hosted git repository. bdeggleston pushed a commit to branch cassandra-3.0 in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-3.0 by this push: new da8d41f Use mean row count instead of mean column count for index selectivity calculation da8d41f is described below commit da8d41f497efedf57e335ec2664680da583a3aba Author: Jordan West <jorda...@gmail.com> AuthorDate: Mon Aug 5 09:44:14 2019 -0700 Use mean row count instead of mean column count for index selectivity calculation patch by Jordan West; reviewed by Blake Eggleston for CASSANDRA-15259 --- CHANGES.txt | 1 + .../cassandra/index/internal/CassandraIndex.java | 21 +++++++++++++- test/unit/org/apache/cassandra/SchemaLoader.java | 33 ++++++++++++++++++++++ .../apache/cassandra/db/SecondaryIndexTest.java | 26 +++++++++++++++++ 4 files changed, 80 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index f04b489..c2bed92 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.0.19 + * Use mean row count instead of mean column count for index selectivity calculation (CASSANDRA-15259) * Avoid updating unchanged gossip states (CASSANDRA-15097) * Prevent recreation of previously dropped columns with a different kind (CASSANDRA-14948) * Prevent client requests from blocking on executor task queue (CASSANDRA-15013) diff --git a/src/java/org/apache/cassandra/index/internal/CassandraIndex.java b/src/java/org/apache/cassandra/index/internal/CassandraIndex.java index 3211fe9..ad5dd4b 100644 --- a/src/java/org/apache/cassandra/index/internal/CassandraIndex.java +++ b/src/java/org/apache/cassandra/index/internal/CassandraIndex.java @@ -278,7 +278,26 @@ public abstract class CassandraIndex implements Index public long getEstimatedResultRows() { - return indexCfs.getMeanColumns(); + long totalRows = 0; + long totalPartitions = 0; + for (SSTableReader sstable : indexCfs.getSSTables(SSTableSet.CANONICAL)) + { + if (sstable.descriptor.version.storeRows()) + { + totalPartitions += sstable.getEstimatedPartitionSize().count(); + totalRows += sstable.getTotalRows(); + } else + { + // for legacy sstables we don't have a total row count so we approximate it + // using estimated column count (which is the same logic as pre-3.0 + // see CASSANDRA-15259 + long colCount = sstable.getEstimatedColumnCount().count(); + totalPartitions += colCount; + totalRows += sstable.getEstimatedColumnCount().mean() * colCount; + } + } + + return totalPartitions > 0 ? (int) (totalRows / totalPartitions) : 0; } /** diff --git a/test/unit/org/apache/cassandra/SchemaLoader.java b/test/unit/org/apache/cassandra/SchemaLoader.java index 1686973..8d61f39 100644 --- a/test/unit/org/apache/cassandra/SchemaLoader.java +++ b/test/unit/org/apache/cassandra/SchemaLoader.java @@ -425,6 +425,39 @@ public class SchemaLoader return cfm.compression(getCompressionParameters()); } + + public static CFMetaData compositeMultipleIndexCFMD(String ksName, String cfName) throws ConfigurationException + { + CFMetaData cfm = CFMetaData.Builder.create(ksName, cfName) + .addPartitionKey("key", AsciiType.instance) + .addClusteringColumn("c1", AsciiType.instance) + .addRegularColumn("birthdate", LongType.instance) + .addRegularColumn("notbirthdate", LongType.instance) + .build(); + + cfm.indexes( + cfm.getIndexes() + .with(IndexMetadata.fromIndexTargets(cfm, + Collections.singletonList( + new IndexTarget(new ColumnIdentifier("birthdate", true), + IndexTarget.Type.VALUES)), + "birthdate_key_index", + IndexMetadata.Kind.COMPOSITES, + Collections.EMPTY_MAP)) + .with(IndexMetadata.fromIndexTargets(cfm, + Collections.singletonList( + new IndexTarget(new ColumnIdentifier("notbirthdate", true), + IndexTarget.Type.VALUES)), + "notbirthdate_key_index", + IndexMetadata.Kind.COMPOSITES, + Collections.EMPTY_MAP)) + ); + + + return cfm.compression(getCompressionParameters()); + } + + public static CFMetaData keysIndexCFMD(String ksName, String cfName, boolean withIndex) throws ConfigurationException { CFMetaData cfm = CFMetaData.Builder.createDense(ksName, cfName, false, false) diff --git a/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java b/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java index adcb68b..9fb0463 100644 --- a/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java +++ b/test/unit/org/apache/cassandra/db/SecondaryIndexTest.java @@ -57,6 +57,7 @@ public class SecondaryIndexTest { public static final String KEYSPACE1 = "SecondaryIndexTest1"; public static final String WITH_COMPOSITE_INDEX = "WithCompositeIndex"; + public static final String WITH_MULTIPLE_COMPOSITE_INDEX = "WithMultipleCompositeIndex"; public static final String WITH_KEYS_INDEX = "WithKeysIndex"; public static final String COMPOSITE_INDEX_TO_BE_ADDED = "CompositeIndexToBeAdded"; @@ -68,6 +69,7 @@ public class SecondaryIndexTest KeyspaceParams.simple(1), SchemaLoader.compositeIndexCFMD(KEYSPACE1, WITH_COMPOSITE_INDEX, true).gcGraceSeconds(0), SchemaLoader.compositeIndexCFMD(KEYSPACE1, COMPOSITE_INDEX_TO_BE_ADDED, false).gcGraceSeconds(0), + SchemaLoader.compositeMultipleIndexCFMD(KEYSPACE1, WITH_MULTIPLE_COMPOSITE_INDEX).gcGraceSeconds(0), SchemaLoader.keysIndexCFMD(KEYSPACE1, WITH_KEYS_INDEX, true).gcGraceSeconds(0)); } @@ -76,6 +78,7 @@ public class SecondaryIndexTest { Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_COMPOSITE_INDEX).truncateBlocking(); Keyspace.open(KEYSPACE1).getColumnFamilyStore(COMPOSITE_INDEX_TO_BE_ADDED).truncateBlocking(); + Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_MULTIPLE_COMPOSITE_INDEX).truncateBlocking(); Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_KEYS_INDEX).truncateBlocking(); } @@ -491,6 +494,29 @@ public class SecondaryIndexTest assertIndexedCount(cfs, ByteBufferUtil.bytes("birthdate"), 1l, 10); } + @Test + public void testSelectivityWithMultipleIndexes() + { + ColumnFamilyStore cfs = Keyspace.open(KEYSPACE1).getColumnFamilyStore(WITH_MULTIPLE_COMPOSITE_INDEX); + + // creates rows such that birthday_index has 1 partition (key = 1L) with 4 rows -- mean row count = 4, and notbirthdate_index has 2 partitions with 2 rows each -- mean row count = 2 + new RowUpdateBuilder(cfs.metadata, 0, "k1").clustering("c").add("birthdate", 1L).add("notbirthdate", 2L).build().applyUnsafe(); + new RowUpdateBuilder(cfs.metadata, 0, "k2").clustering("c").add("birthdate", 1L).add("notbirthdate", 2L).build().applyUnsafe(); + new RowUpdateBuilder(cfs.metadata, 0, "k3").clustering("c").add("birthdate", 1L).add("notbirthdate", 3L).build().applyUnsafe(); + new RowUpdateBuilder(cfs.metadata, 0, "k4").clustering("c").add("birthdate", 1L).add("notbirthdate", 3L).build().applyUnsafe(); + + cfs.forceBlockingFlush(); + ReadCommand rc = Util.cmd(cfs) + .fromKeyIncl("k1") + .toKeyIncl("k3") + .columns("birthdate") + .filterOn("birthdate", Operator.EQ, 1L) + .filterOn("notbirthdate", Operator.EQ, 0L) + .build(); + + assertEquals("notbirthdate_key_index", rc.indexMetadata().name); + } + private void assertIndexedNone(ColumnFamilyStore cfs, ByteBuffer col, Object val) { assertIndexedCount(cfs, col, val, 0); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org