This is an automated email from the ASF dual-hosted git repository. konstantinov pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push: new 7fb21c323a Optimize DataPlacement lookup by ReplicationParams 7fb21c323a is described below commit 7fb21c323ad77286989a92479ae981f96ecb0593 Author: Dmitry Konstantinov <netud...@gmail.com> AuthorDate: Wed Jul 30 18:55:26 2025 +0100 Optimize DataPlacement lookup by ReplicationParams Avoid double lookup of the same DataPlacement in forNonLocalStrategyTokenRead and forNonLocalStrategyTokenWrite methods Memorize hashCode value for ReplicationParams Deduplicate ReplicationParams to use the same objects in DataPlacements and KeyspaceMetadata to use the fast == path in the equals Do not search endpoints for a token in a typical write case twice (to identify pending endpoints) Patch by Dmitry Konstantinov; reviewed by Stefan Miklosovic, Sam Tunnicliffe for CASSANDRA-20804 --- CHANGES.txt | 1 + .../statements/schema/CreateKeyspaceStatement.java | 8 ++++++- .../apache/cassandra/locator/ReplicaLayout.java | 26 ++++++++++++++++++---- .../apache/cassandra/schema/KeyspaceParams.java | 5 +++++ .../apache/cassandra/schema/ReplicationParams.java | 4 +++- .../org/apache/cassandra/tcm/ClusterMetadata.java | 20 +++++++++++++++++ .../cassandra/tcm/ownership/DataPlacement.java | 5 ++++- .../cassandra/tcm/ownership/DataPlacements.java | 11 +++++++++ 8 files changed, 73 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 6a23d410e3..2a7162fb4c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * Optimize DataPlacement lookup by ReplicationParams (CASSANDRA-20804) * Fix ShortPaxosSimulationTest and AccordSimulationRunner do not execute from the cli (CASSANDRA-20805) * Allow overriding arbitrary settings via environment variables (CASSANDRA-20749) * Optimize MessagingService.getVersionOrdinal (CASSANDRA-20816) diff --git a/src/java/org/apache/cassandra/cql3/statements/schema/CreateKeyspaceStatement.java b/src/java/org/apache/cassandra/cql3/statements/schema/CreateKeyspaceStatement.java index 882117f218..ed448f6842 100644 --- a/src/java/org/apache/cassandra/cql3/statements/schema/CreateKeyspaceStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/schema/CreateKeyspaceStatement.java @@ -81,7 +81,13 @@ public final class CreateKeyspaceStatement extends AlterSchemaStatement throw new AlreadyExistsException(keyspaceName); } - KeyspaceMetadata keyspaceMetadata = KeyspaceMetadata.create(keyspaceName, attrs.asNewKeyspaceParams()); + // we deduplicate ReplicationParams here to use the same objects in KeyspaceMetadata + // as we have as keys in metadata.placements to have a fast map lookup + // ReplicationParams are immutable, so it is a safe optimization + KeyspaceParams keyspaceParams = attrs.asNewKeyspaceParams(); + ReplicationParams replicationParams = metadata.placements.deduplicateReplicationParams(keyspaceParams.replication); + keyspaceParams = keyspaceParams.withSwapped(replicationParams); + KeyspaceMetadata keyspaceMetadata = KeyspaceMetadata.create(keyspaceName, keyspaceParams); if (keyspaceMetadata.params.replication.klass.equals(LocalStrategy.class)) throw ire("Unable to use given strategy class: LocalStrategy is reserved for internal use."); diff --git a/src/java/org/apache/cassandra/locator/ReplicaLayout.java b/src/java/org/apache/cassandra/locator/ReplicaLayout.java index f961b4051d..0cd3ccf257 100644 --- a/src/java/org/apache/cassandra/locator/ReplicaLayout.java +++ b/src/java/org/apache/cassandra/locator/ReplicaLayout.java @@ -31,6 +31,7 @@ import org.apache.cassandra.schema.KeyspaceMetadata; import org.apache.cassandra.schema.TableId; import org.apache.cassandra.service.reads.ReadCoordinator; import org.apache.cassandra.tcm.ClusterMetadata; +import org.apache.cassandra.tcm.ownership.DataPlacement; import org.apache.cassandra.utils.FBUtilities; import java.util.Set; @@ -238,8 +239,14 @@ public abstract class ReplicaLayout<E extends Endpoints<E>> { // todo deduplicate so that "pending" contains "read - write", // which is a hack until we revisit how consistency level handles pending - natural = forNonLocalStrategyTokenRead(metadata, ks, token); - pending = forNonLocalStrategyTokenWrite(metadata, ks, token).without(natural.endpoints()); + DataPlacement dataPlacement = metadata.placements.get(ks.params.replication); + natural = forNonLocalStrategyTokenRead(dataPlacement, token); + // perf optimization to avoid double endpoints search and filtering for a typical case + // DataPlacement constructor does a deduplication of reads/writes, so we can use cheap == comparision here + if (dataPlacement.reads == dataPlacement.writes) + pending = EndpointsForToken.empty(token); + else + pending = forNonLocalStrategyTokenWrite(dataPlacement, token).without(natural.endpoints()); } return forTokenWrite(replicationStrategy, natural, pending); } @@ -392,14 +399,25 @@ public abstract class ReplicaLayout<E extends Endpoints<E>> public static EndpointsForToken forNonLocalStrategyTokenRead(ClusterMetadata metadata, KeyspaceMetadata keyspace, Token token) { - return metadata.placements.get(keyspace.params.replication).reads.forToken(token).get(); + return forNonLocalStrategyTokenRead(metadata.placements.get(keyspace.params.replication), token); + } + + public static EndpointsForToken forNonLocalStrategyTokenRead(DataPlacement dataPlacement, Token token) + { + return dataPlacement.reads.forToken(token).get(); } static EndpointsForToken forNonLocalStrategyTokenWrite(ClusterMetadata metadata, KeyspaceMetadata keyspace, Token token) { - return metadata.placements.get(keyspace.params.replication).writes.forToken(token).get(); + return forNonLocalStrategyTokenWrite(metadata.placements.get(keyspace.params.replication), token); } + static EndpointsForToken forNonLocalStrategyTokenWrite(DataPlacement dataPlacement, Token token) + { + return dataPlacement.writes.forToken(token).get(); + } + + static EndpointsForRange forLocalStrategyRange(ClusterMetadata metadata, AbstractReplicationStrategy replicationStrategy, AbstractBounds<PartitionPosition> range) { return replicationStrategy.calculateNaturalReplicas(range.right.getToken(), metadata); diff --git a/src/java/org/apache/cassandra/schema/KeyspaceParams.java b/src/java/org/apache/cassandra/schema/KeyspaceParams.java index 09afed84ba..f683fabf49 100644 --- a/src/java/org/apache/cassandra/schema/KeyspaceParams.java +++ b/src/java/org/apache/cassandra/schema/KeyspaceParams.java @@ -117,6 +117,11 @@ public final class KeyspaceParams return new KeyspaceParams(true, ReplicationParams.nts(args), FastPathStrategy.simple()); } + public KeyspaceParams withSwapped(ReplicationParams params) + { + return new KeyspaceParams(durableWrites, params, fastPath); + } + public void validate(String name, ClientState state, ClusterMetadata metadata) { replication.validate(name, state, metadata); diff --git a/src/java/org/apache/cassandra/schema/ReplicationParams.java b/src/java/org/apache/cassandra/schema/ReplicationParams.java index 40a92c803f..c1e2643897 100644 --- a/src/java/org/apache/cassandra/schema/ReplicationParams.java +++ b/src/java/org/apache/cassandra/schema/ReplicationParams.java @@ -57,11 +57,13 @@ public final class ReplicationParams public final Class<? extends AbstractReplicationStrategy> klass; public final ImmutableMap<String, String> options; + private final int hashCode; private ReplicationParams(Class<? extends AbstractReplicationStrategy> klass, Map<String, String> options) { this.klass = klass; this.options = ImmutableMap.copyOf(options); + this.hashCode = Objects.hashCode(this.klass, this.options); } @VisibleForTesting @@ -234,7 +236,7 @@ public final class ReplicationParams @Override public int hashCode() { - return Objects.hashCode(klass, options); + return hashCode; } @Override diff --git a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java index d57df4de5f..aa8c238941 100644 --- a/src/java/org/apache/cassandra/tcm/ClusterMetadata.java +++ b/src/java/org/apache/cassandra/tcm/ClusterMetadata.java @@ -55,6 +55,7 @@ import org.apache.cassandra.locator.Replica; import org.apache.cassandra.net.CMSIdentifierMismatchException; import org.apache.cassandra.schema.DistributedSchema; import org.apache.cassandra.schema.KeyspaceMetadata; +import org.apache.cassandra.schema.KeyspaceParams; import org.apache.cassandra.schema.Keyspaces; import org.apache.cassandra.schema.ReplicationParams; import org.apache.cassandra.schema.TableId; @@ -1064,6 +1065,8 @@ public class ClusterMetadata TokenMap tokenMap = TokenMap.serializer.deserialize(in, version); DataPlacements placements = DataPlacements.serializer.deserialize(in, version); + schema = deduplicateReplicationParams(schema, placements); + AccordFastPath accordFastPath; ConsensusMigrationState consensusMigrationState; AccordStaleReplicas staleReplicas; @@ -1107,6 +1110,23 @@ public class ClusterMetadata staleReplicas); } + private DistributedSchema deduplicateReplicationParams(DistributedSchema schema, DataPlacements placements) + { + Keyspaces newKeyspaces = schema.getKeyspaces(); + for (KeyspaceMetadata keyspaceMetadata : schema.getKeyspaces()) + { + KeyspaceParams params = keyspaceMetadata.params; + ReplicationParams newReplicationParams = placements.deduplicateReplicationParams(params.replication); + if (newReplicationParams != params.replication) + { + KeyspaceParams newKeyspaceParams = params.withSwapped(newReplicationParams); + KeyspaceMetadata newKeyspaceMetadata = keyspaceMetadata.withSwapped(newKeyspaceParams); + newKeyspaces = newKeyspaces.withAddedOrUpdated(newKeyspaceMetadata); + } + } + return new DistributedSchema(newKeyspaces, schema.lastModified()); + } + @Override public long serializedSize(ClusterMetadata metadata, Version version) { diff --git a/src/java/org/apache/cassandra/tcm/ownership/DataPlacement.java b/src/java/org/apache/cassandra/tcm/ownership/DataPlacement.java index 12920d6862..fd1a2618db 100644 --- a/src/java/org/apache/cassandra/tcm/ownership/DataPlacement.java +++ b/src/java/org/apache/cassandra/tcm/ownership/DataPlacement.java @@ -58,7 +58,10 @@ public class DataPlacement ReplicaGroups writes) { this.reads = reads; - this.writes = writes; + if (reads.equals(writes)) + this.writes = reads; // performance optimization for a typical case, to not search for endpoints twice + else + this.writes = writes; } /** diff --git a/src/java/org/apache/cassandra/tcm/ownership/DataPlacements.java b/src/java/org/apache/cassandra/tcm/ownership/DataPlacements.java index b89ecde7d3..a4dc95eef4 100644 --- a/src/java/org/apache/cassandra/tcm/ownership/DataPlacements.java +++ b/src/java/org/apache/cassandra/tcm/ownership/DataPlacements.java @@ -159,6 +159,17 @@ public class DataPlacements extends ReplicationMap<DataPlacement> implements Met .allMatch(e -> e.getValue().equivalentTo(other.get(e.getKey()))); } + public ReplicationParams deduplicateReplicationParams(ReplicationParams replicationParams) + { + if (this.get(replicationParams) == null) + return replicationParams; + + for (ReplicationParams placementReplicationParams : keys()) + if (placementReplicationParams.equals(replicationParams)) + return placementReplicationParams; + return replicationParams; + } + public static DataPlacements sortReplicaGroups(DataPlacements placements, Comparator<Replica> comparator) { Builder builder = DataPlacements.builder(placements.size()); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org