This is an automated email from the ASF dual-hosted git repository. jmckenzie pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push: new 3d93650 Disallow removal of a DC from system_auth replication settings 3d93650 is described below commit 3d9365096bc579d10e417278576d650611105120 Author: Josh McKenzie <jmcken...@apache.org> AuthorDate: Wed Mar 23 12:42:36 2022 -0400 Disallow removal of a DC from system_auth replication settings Patch by Josh McKenzie; reviewed by Jon Meredith for CASSANDRA-17478 Co-authored-by: Josh McKenzie <jmcken...@apache.org> Co-authored-by: Nachiket Patil <nachiket_pa...@apple.com> --- CHANGES.txt | 1 + .../cassandra/locator/NetworkTopologyStrategy.java | 10 + .../UpdateSystemAuthAfterDCExpansionTest.java | 233 +++++++++++++++++++++ .../cql3/validation/operations/AlterNTSTest.java | 48 +++++ .../cql3/validation/operations/AlterTest.java | 15 +- 5 files changed, 301 insertions(+), 6 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index ba48d61..75a6475 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 4.1 + * Disallow removing DC from system_auth while nodes are active in the DC (CASSANDRA-17478) * Add guardrail for the number of fields per UDT (CASSANDRA-17385) * Allow users to change cqlsh history location using env variable (CASSANDRA-17448) * Add required -f option to use nodetool verify and standalone sstableverify (CASSANDRA-17017) diff --git a/src/java/org/apache/cassandra/locator/NetworkTopologyStrategy.java b/src/java/org/apache/cassandra/locator/NetworkTopologyStrategy.java index ff88fce..dd2ec99 100644 --- a/src/java/org/apache/cassandra/locator/NetworkTopologyStrategy.java +++ b/src/java/org/apache/cassandra/locator/NetworkTopologyStrategy.java @@ -38,6 +38,7 @@ import org.apache.cassandra.utils.Pair; import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; /** * <p> @@ -311,6 +312,15 @@ public class NetworkTopologyStrategy extends AbstractReplicationStrategy // Validate the data center names super.validateExpectedOptions(); + + if (keyspaceName.equalsIgnoreCase(SchemaConstants.AUTH_KEYSPACE_NAME)) + { + Set<String> differenceSet = Sets.difference((Set<String>) recognizedOptions(), configOptions.keySet()); + if (!differenceSet.isEmpty()) + { + throw new ConfigurationException("Following datacenters have active nodes and must be present in replication options for keyspace " + SchemaConstants.AUTH_KEYSPACE_NAME + ": " + differenceSet.toString()); + } + } } @Override diff --git a/test/distributed/org/apache/cassandra/distributed/UpdateSystemAuthAfterDCExpansionTest.java b/test/distributed/org/apache/cassandra/distributed/UpdateSystemAuthAfterDCExpansionTest.java new file mode 100644 index 0000000..8765067 --- /dev/null +++ b/test/distributed/org/apache/cassandra/distributed/UpdateSystemAuthAfterDCExpansionTest.java @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test; + +import java.util.Collections; +import java.util.UUID; + +import com.google.common.collect.ImmutableList; +import org.apache.cassandra.utils.concurrent.Condition; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.cassandra.auth.AuthenticatedUser; +import org.apache.cassandra.auth.RoleOptions; +import org.apache.cassandra.auth.RoleResource; +import org.apache.cassandra.config.DatabaseDescriptor; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.IInstance; +import org.apache.cassandra.distributed.api.IInstanceConfig; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.gms.FailureDetector; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.utils.progress.ProgressEventType; + +import static java.util.concurrent.TimeUnit.MINUTES; +import static org.apache.cassandra.auth.AuthKeyspace.ROLES; +import static org.apache.cassandra.distributed.api.Feature.GOSSIP; +import static org.apache.cassandra.distributed.api.Feature.NETWORK; +import static org.apache.cassandra.distributed.shared.AssertUtils.assertRows; +import static org.apache.cassandra.distributed.shared.AssertUtils.row; +import static org.apache.cassandra.distributed.shared.NetworkTopology.dcAndRack; +import static org.apache.cassandra.distributed.shared.NetworkTopology.networkTopology; +import static org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +/* + * Test that system_auth can only be altered to have valid datacenters, and that + * all valid datacenters must have at least one replica. + * + * Create a cluster with one nodes in dc1 with a new role + * Alter the system_auth keyspace to use NTS with {dc1: 1} + * Expand a cluster with a new node in dc2 + * Alter the system auth keyspace to use NTS with {dc1: 1}, {dc2, 1} & repair + * Check that the new role is present in the new datacenter + * Remove the dc2 node + * Check the keyspace can be altered again to remove it + */ +public class UpdateSystemAuthAfterDCExpansionTest extends TestBaseImpl +{ + static final Logger logger = LoggerFactory.getLogger(UpdateSystemAuthAfterDCExpansionTest.class); + static final String username = "shinynewuser"; + + static void assertRolePresent(IInstance instance) + { + assertRows(instance.executeInternal(String.format("SELECT role FROM %s.%s WHERE role = ?", + SchemaConstants.AUTH_KEYSPACE_NAME, ROLES), + username), + row(username)); + } + + static void assertRoleAbsent(IInstance instance) + { + assertRows(instance.executeInternal(String.format("SELECT role FROM %s.%s WHERE role = ?", + SchemaConstants.AUTH_KEYSPACE_NAME, ROLES), + username)); + } + + static void assertQueryThrowsConfigurationException(Cluster cluster, String query) + { + cluster.forEach(instance -> { + try + { + // No need to use cluster.schemaChange as we're expecting a failure + instance.executeInternal(query); + fail("Expected \"" + query + "\" to throw a ConfigurationException, but it completed"); + } + catch (Throwable tr) + { + assertEquals("org.apache.cassandra.exceptions.ConfigurationException", tr.getClass().getCanonicalName()); + } + }); + } + + String alterKeyspaceStatement(String ntsOptions) + { + return String.format("ALTER KEYSPACE " + SchemaConstants.AUTH_KEYSPACE_NAME + + " WITH replication = {'class': 'NetworkTopologyStrategy', %s};", ntsOptions); + } + + @BeforeClass + static public void beforeClass() throws Throwable + { + // reduce the time from 10s to prevent "Cannot process role related query as the role manager isn't yet setup." + // exception from CassandraRoleManager + System.setProperty("cassandra.superuser_setup_delay_ms", "0"); + TestBaseImpl.beforeClass(); + } + + public void validateExpandAndContract(String initialDatacenters, + String expandedDatacenters, + String beforeDecommissionedDatacenters, + String afterDecommissionedDatacenters) throws Throwable + { + try (Cluster cluster = Cluster.build(1) + .withConfig(config -> config.set("auto_bootstrap", true) + .with(GOSSIP) + .with(NETWORK)) + .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(2)) + .withNodeIdTopology(networkTopology(2, + (nodeid) -> nodeid % 2 == 1 ? dcAndRack("dc1", "rack1") + : dcAndRack("dc2", "rack2") + )) + .withNodes(1) + .createWithoutStarting()) + { + logger.debug("Starting cluster with single node in dc1"); + cluster.startup(); + + // currently no way to set authenticated user for coordinator + logger.debug("Creating test role"); + cluster.get(1).runOnInstance(() -> DatabaseDescriptor.getRoleManager().createRole(AuthenticatedUser.SYSTEM_USER, + RoleResource.role(username), + new RoleOptions())); + assertRolePresent(cluster.get(1)); + + logger.debug("Try changing NTS too early before a node from the DC has joined"); + assertQueryThrowsConfigurationException(cluster, alterKeyspaceStatement("'dc1': '1', 'dc2': '1'")); + + logger.debug("Altering '{}' keyspace to use NTS with {}", SchemaConstants.AUTH_KEYSPACE_NAME, initialDatacenters); + cluster.schemaChangeIgnoringStoppedInstances(alterKeyspaceStatement(initialDatacenters)); + + logger.debug("Bootstrapping second node in dc2"); + IInstanceConfig config = cluster.newInstanceConfig(); + config.set("auto_bootstrap", true); + cluster.bootstrap(config).startup(); + + // Check that the role is on node1 but has not made it to node2 + assertRolePresent(cluster.get(1)); + assertRoleAbsent(cluster.get(2)); + + // Update options to make sure a replica is in the remote DC + logger.debug("Altering '{}' keyspace to use NTS with dc1 & dc2", SchemaConstants.AUTH_KEYSPACE_NAME); + cluster.schemaChangeIgnoringStoppedInstances(alterKeyspaceStatement(expandedDatacenters)); + + // make sure that all sstables have moved to repaired by triggering a compaction + logger.debug("Repair system_auth to make sure role is replicated everywhere"); + cluster.get(1).runOnInstance(() -> { + try + { + Condition await = newOneTimeCondition(); + StorageService.instance.repair(SchemaConstants.AUTH_KEYSPACE_NAME, Collections.emptyMap(), ImmutableList.of((tag, event) -> { + if (event.getType() == ProgressEventType.COMPLETE) + await.signalAll(); + })).right.get(); + await.await(1L, MINUTES); + } + catch (Exception e) + { + fail("Unexpected exception: " + e); + } + }); + + logger.debug("Check the role is now replicated as expected after repairing"); + assertRolePresent(cluster.get(1)); + assertRolePresent(cluster.get(2)); + + // Make sure we cannot remove either of the active datacenters + logger.debug("Verify that neither active datacenter can be ALTER KEYSPACEd away"); + assertQueryThrowsConfigurationException(cluster, alterKeyspaceStatement("'dc1': '1'")); + assertQueryThrowsConfigurationException(cluster, alterKeyspaceStatement("'dc2': '1'")); + + logger.debug("Starting to decomission dc2"); + cluster.schemaChangeIgnoringStoppedInstances(alterKeyspaceStatement(beforeDecommissionedDatacenters)); + + // Forcibly shutdown and have node2 evicted by FD + logger.debug("Force shutdown node2"); + String node2hostId = cluster.get(2).callOnInstance(() -> StorageService.instance.getLocalHostId()); + cluster.get(2).shutdown(false); + + logger.debug("removeNode node2"); + cluster.get(1).runOnInstance(() -> { + UUID hostId = UUID.fromString(node2hostId); + InetAddressAndPort endpoint = StorageService.instance.getEndpointForHostId(hostId); + FailureDetector.instance.forceConviction(endpoint); + StorageService.instance.removeNode(node2hostId); + }); + + logger.debug("Remove replication to decomissioned dc2"); + cluster.schemaChangeIgnoringStoppedInstances(alterKeyspaceStatement(afterDecommissionedDatacenters)); + } + } + + @Test + public void explicitDCTest() throws Throwable + { + String initialDatacenters = "'dc1': '1'"; + String expandedDatacenters = "'dc1': '1', 'dc2': '1'"; + String beforeDecommissionedDatacenters = "'dc1': '1', 'dc2': '1'"; + String afterDecommissionedDatacenters = "'dc1': '1'"; + validateExpandAndContract(initialDatacenters, expandedDatacenters, beforeDecommissionedDatacenters, afterDecommissionedDatacenters); + } + + @Test + public void replicaFactorTest() throws Throwable + { + String initialDatacenters = "'replication_factor': '1'"; + String expandedDatacenters = "'replication_factor': '1'"; + String beforeDecommissionedDatacenters = "'replication_factor': '1', 'dc2': '1'"; + String afterDecommissionedDatacenters = "'dc1': '1'"; + validateExpandAndContract(initialDatacenters, expandedDatacenters, beforeDecommissionedDatacenters, afterDecommissionedDatacenters); + } +} \ No newline at end of file diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterNTSTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterNTSTest.java index 4cc95e1..e8f9842 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterNTSTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterNTSTest.java @@ -19,12 +19,21 @@ package org.apache.cassandra.cql3.validation.operations; import java.util.List; +import java.util.UUID; import org.junit.Test; import com.datastax.driver.core.PreparedStatement; +import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.cql3.CQLTester; +import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.locator.IEndpointSnitch; +import org.apache.cassandra.locator.InetAddressAndPort; +import org.apache.cassandra.locator.Replica; +import org.apache.cassandra.locator.ReplicaCollection; +import org.apache.cassandra.schema.SchemaConstants; import org.apache.cassandra.service.ClientWarn; +import org.apache.cassandra.service.StorageService; import org.assertj.core.api.Assertions; import static org.junit.Assert.assertEquals; @@ -100,4 +109,43 @@ public class AlterNTSTest extends CQLTester warnings = ClientWarn.instance.getWarnings(); assertNull(warnings); } + + @Test + public void testAlterKeyspaceSystem_AuthWithNTSOnlyAcceptsConfiguredDataCenterNames() throws Throwable + { + requireAuthentication(); + + // Add a peer + StorageService.instance.getTokenMetadata().updateHostId(UUID.randomUUID(), InetAddressAndPort.getByName("127.0.0.2")); + + // Register an Endpoint snitch which returns fixed value for data center. + DatabaseDescriptor.setEndpointSnitch(new IEndpointSnitch() + { + public String getRack(InetAddressAndPort endpoint) { return RACK1; } + public String getDatacenter(InetAddressAndPort endpoint) + { + if(endpoint.getHostAddress(false).equalsIgnoreCase("127.0.0.2")) + return "datacenter2"; + return DATA_CENTER; + } + public <C extends ReplicaCollection<? extends C>> C sortedByProximity(InetAddressAndPort address, C addresses) + { + return null; + } + + public int compareEndpoints(InetAddressAndPort target, Replica r1, Replica r2) + { + return 0; + } + + // NOOP + public void gossiperStarting() { } + + public boolean isWorthMergingForRangeQuery(ReplicaCollection<?> merged, ReplicaCollection<?> l1, ReplicaCollection<?> l2) { return false; } + }); + + // try modifying the system_auth keyspace without second DC which has active node. + assertInvalidThrow(ConfigurationException.class, "ALTER KEYSPACE system_auth WITH replication = { 'class' : 'NetworkTopologyStrategy', '" + DATA_CENTER + "' : 2 }"); + execute("ALTER KEYSPACE " + SchemaConstants.AUTH_KEYSPACE_NAME + " WITH replication = {'class' : 'NetworkTopologyStrategy', '" + DATA_CENTER + "' : 1 , '" + DATA_CENTER_REMOTE + "' : 1 }"); + } } diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java index ee8957d..2b8fc0a 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java @@ -22,12 +22,6 @@ import java.util.UUID; import org.junit.Test; import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.dht.OrderPreservingPartitioner; -import org.apache.cassandra.locator.InetAddressAndPort; -import org.apache.cassandra.locator.TokenMetadata; -import org.apache.cassandra.config.DatabaseDescriptor; -import org.apache.cassandra.schema.SchemaConstants; -import org.apache.cassandra.schema.SchemaKeyspaceTables; import org.apache.cassandra.cql3.CQLTester; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; @@ -38,6 +32,7 @@ import org.apache.cassandra.exceptions.SyntaxException; import org.apache.cassandra.locator.InetAddressAndPort; import org.apache.cassandra.locator.TokenMetadata; import org.apache.cassandra.schema.SchemaConstants; +import org.apache.cassandra.schema.SchemaKeyspaceTables; import org.apache.cassandra.service.StorageService; import org.apache.cassandra.utils.FBUtilities; @@ -284,6 +279,14 @@ public class AlterTest extends CQLTester assertRowsIgnoringOrderAndExtra(execute("SELECT keyspace_name, durable_writes, replication FROM system_schema.keyspaces"), row(KEYSPACE, true, map("class", "org.apache.cassandra.locator.SimpleStrategy", "replication_factor", "1")), row(KEYSPACE_PER_TEST, true, map("class", "org.apache.cassandra.locator.SimpleStrategy", "replication_factor", "1")), + row(ks1, true, map("class", "org.apache.cassandra.locator.NetworkTopologyStrategy", DATA_CENTER, "0", DATA_CENTER_REMOTE, "3"))); + + schemaChange("ALTER KEYSPACE " + ks1 + " WITH replication = { 'class' : 'NetworkTopologyStrategy', '" + DATA_CENTER_REMOTE + "': 3 }"); + + // Removal is a two-step process as the "0" filter has been removed from NTS.prepareOptions + assertRowsIgnoringOrderAndExtra(execute("SELECT keyspace_name, durable_writes, replication FROM system_schema.keyspaces"), + row(KEYSPACE, true, map("class", "org.apache.cassandra.locator.SimpleStrategy", "replication_factor", "1")), + row(KEYSPACE_PER_TEST, true, map("class", "org.apache.cassandra.locator.SimpleStrategy", "replication_factor", "1")), row(ks1, true, map("class", "org.apache.cassandra.locator.NetworkTopologyStrategy", DATA_CENTER_REMOTE, "3"))); // The auto-expansion should not change existing replication counts; do not let the user shoot themselves in the foot --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org