This is an automated email from the ASF dual-hosted git repository.
jmckenzie pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push:
new 3d93650 Disallow removal of a DC from system_auth replication settings
3d93650 is described below
commit 3d9365096bc579d10e417278576d650611105120
Author: Josh McKenzie <[email protected]>
AuthorDate: Wed Mar 23 12:42:36 2022 -0400
Disallow removal of a DC from system_auth replication settings
Patch by Josh McKenzie; reviewed by Jon Meredith for CASSANDRA-17478
Co-authored-by: Josh McKenzie <[email protected]>
Co-authored-by: Nachiket Patil <[email protected]>
---
CHANGES.txt | 1 +
.../cassandra/locator/NetworkTopologyStrategy.java | 10 +
.../UpdateSystemAuthAfterDCExpansionTest.java | 233 +++++++++++++++++++++
.../cql3/validation/operations/AlterNTSTest.java | 48 +++++
.../cql3/validation/operations/AlterTest.java | 15 +-
5 files changed, 301 insertions(+), 6 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index ba48d61..75a6475 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
4.1
+ * Disallow removing DC from system_auth while nodes are active in the DC
(CASSANDRA-17478)
* Add guardrail for the number of fields per UDT (CASSANDRA-17385)
* Allow users to change cqlsh history location using env variable
(CASSANDRA-17448)
* Add required -f option to use nodetool verify and standalone sstableverify
(CASSANDRA-17017)
diff --git a/src/java/org/apache/cassandra/locator/NetworkTopologyStrategy.java
b/src/java/org/apache/cassandra/locator/NetworkTopologyStrategy.java
index ff88fce..dd2ec99 100644
--- a/src/java/org/apache/cassandra/locator/NetworkTopologyStrategy.java
+++ b/src/java/org/apache/cassandra/locator/NetworkTopologyStrategy.java
@@ -38,6 +38,7 @@ import org.apache.cassandra.utils.Pair;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
/**
* <p>
@@ -311,6 +312,15 @@ public class NetworkTopologyStrategy extends
AbstractReplicationStrategy
// Validate the data center names
super.validateExpectedOptions();
+
+ if (keyspaceName.equalsIgnoreCase(SchemaConstants.AUTH_KEYSPACE_NAME))
+ {
+ Set<String> differenceSet = Sets.difference((Set<String>)
recognizedOptions(), configOptions.keySet());
+ if (!differenceSet.isEmpty())
+ {
+ throw new ConfigurationException("Following datacenters have
active nodes and must be present in replication options for keyspace " +
SchemaConstants.AUTH_KEYSPACE_NAME + ": " + differenceSet.toString());
+ }
+ }
}
@Override
diff --git
a/test/distributed/org/apache/cassandra/distributed/UpdateSystemAuthAfterDCExpansionTest.java
b/test/distributed/org/apache/cassandra/distributed/UpdateSystemAuthAfterDCExpansionTest.java
new file mode 100644
index 0000000..8765067
--- /dev/null
+++
b/test/distributed/org/apache/cassandra/distributed/UpdateSystemAuthAfterDCExpansionTest.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.distributed.test;
+
+import java.util.Collections;
+import java.util.UUID;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.cassandra.utils.concurrent.Condition;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.auth.AuthenticatedUser;
+import org.apache.cassandra.auth.RoleOptions;
+import org.apache.cassandra.auth.RoleResource;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.distributed.Cluster;
+import org.apache.cassandra.distributed.api.IInstance;
+import org.apache.cassandra.distributed.api.IInstanceConfig;
+import org.apache.cassandra.distributed.api.TokenSupplier;
+import org.apache.cassandra.gms.FailureDetector;
+import org.apache.cassandra.locator.InetAddressAndPort;
+import org.apache.cassandra.schema.SchemaConstants;
+import org.apache.cassandra.service.StorageService;
+import org.apache.cassandra.utils.progress.ProgressEventType;
+
+import static java.util.concurrent.TimeUnit.MINUTES;
+import static org.apache.cassandra.auth.AuthKeyspace.ROLES;
+import static org.apache.cassandra.distributed.api.Feature.GOSSIP;
+import static org.apache.cassandra.distributed.api.Feature.NETWORK;
+import static org.apache.cassandra.distributed.shared.AssertUtils.assertRows;
+import static org.apache.cassandra.distributed.shared.AssertUtils.row;
+import static
org.apache.cassandra.distributed.shared.NetworkTopology.dcAndRack;
+import static
org.apache.cassandra.distributed.shared.NetworkTopology.networkTopology;
+import static
org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
+/*
+ * Test that system_auth can only be altered to have valid datacenters, and
that
+ * all valid datacenters must have at least one replica.
+ *
+ * Create a cluster with one nodes in dc1 with a new role
+ * Alter the system_auth keyspace to use NTS with {dc1: 1}
+ * Expand a cluster with a new node in dc2
+ * Alter the system auth keyspace to use NTS with {dc1: 1}, {dc2, 1} & repair
+ * Check that the new role is present in the new datacenter
+ * Remove the dc2 node
+ * Check the keyspace can be altered again to remove it
+ */
+public class UpdateSystemAuthAfterDCExpansionTest extends TestBaseImpl
+{
+ static final Logger logger =
LoggerFactory.getLogger(UpdateSystemAuthAfterDCExpansionTest.class);
+ static final String username = "shinynewuser";
+
+ static void assertRolePresent(IInstance instance)
+ {
+ assertRows(instance.executeInternal(String.format("SELECT role FROM
%s.%s WHERE role = ?",
+
SchemaConstants.AUTH_KEYSPACE_NAME, ROLES),
+ username),
+ row(username));
+ }
+
+ static void assertRoleAbsent(IInstance instance)
+ {
+ assertRows(instance.executeInternal(String.format("SELECT role FROM
%s.%s WHERE role = ?",
+
SchemaConstants.AUTH_KEYSPACE_NAME, ROLES),
+ username));
+ }
+
+ static void assertQueryThrowsConfigurationException(Cluster cluster,
String query)
+ {
+ cluster.forEach(instance -> {
+ try
+ {
+ // No need to use cluster.schemaChange as we're expecting a
failure
+ instance.executeInternal(query);
+ fail("Expected \"" + query + "\" to throw a
ConfigurationException, but it completed");
+ }
+ catch (Throwable tr)
+ {
+
assertEquals("org.apache.cassandra.exceptions.ConfigurationException",
tr.getClass().getCanonicalName());
+ }
+ });
+ }
+
+ String alterKeyspaceStatement(String ntsOptions)
+ {
+ return String.format("ALTER KEYSPACE " +
SchemaConstants.AUTH_KEYSPACE_NAME +
+ " WITH replication = {'class':
'NetworkTopologyStrategy', %s};", ntsOptions);
+ }
+
+ @BeforeClass
+ static public void beforeClass() throws Throwable
+ {
+ // reduce the time from 10s to prevent "Cannot process role related
query as the role manager isn't yet setup."
+ // exception from CassandraRoleManager
+ System.setProperty("cassandra.superuser_setup_delay_ms", "0");
+ TestBaseImpl.beforeClass();
+ }
+
+ public void validateExpandAndContract(String initialDatacenters,
+ String expandedDatacenters,
+ String
beforeDecommissionedDatacenters,
+ String
afterDecommissionedDatacenters) throws Throwable
+ {
+ try (Cluster cluster = Cluster.build(1)
+ .withConfig(config ->
config.set("auto_bootstrap", true)
+
.with(GOSSIP)
+
.with(NETWORK))
+
.withTokenSupplier(TokenSupplier.evenlyDistributedTokens(2))
+ .withNodeIdTopology(networkTopology(2,
+
(nodeid) -> nodeid % 2 == 1 ? dcAndRack("dc1", "rack1")
+
: dcAndRack("dc2", "rack2")
+ ))
+ .withNodes(1)
+ .createWithoutStarting())
+ {
+ logger.debug("Starting cluster with single node in dc1");
+ cluster.startup();
+
+ // currently no way to set authenticated user for coordinator
+ logger.debug("Creating test role");
+ cluster.get(1).runOnInstance(() ->
DatabaseDescriptor.getRoleManager().createRole(AuthenticatedUser.SYSTEM_USER,
+
RoleResource.role(username),
+
new RoleOptions()));
+ assertRolePresent(cluster.get(1));
+
+ logger.debug("Try changing NTS too early before a node from the DC
has joined");
+ assertQueryThrowsConfigurationException(cluster,
alterKeyspaceStatement("'dc1': '1', 'dc2': '1'"));
+
+ logger.debug("Altering '{}' keyspace to use NTS with {}",
SchemaConstants.AUTH_KEYSPACE_NAME, initialDatacenters);
+
cluster.schemaChangeIgnoringStoppedInstances(alterKeyspaceStatement(initialDatacenters));
+
+ logger.debug("Bootstrapping second node in dc2");
+ IInstanceConfig config = cluster.newInstanceConfig();
+ config.set("auto_bootstrap", true);
+ cluster.bootstrap(config).startup();
+
+ // Check that the role is on node1 but has not made it to node2
+ assertRolePresent(cluster.get(1));
+ assertRoleAbsent(cluster.get(2));
+
+ // Update options to make sure a replica is in the remote DC
+ logger.debug("Altering '{}' keyspace to use NTS with dc1 & dc2",
SchemaConstants.AUTH_KEYSPACE_NAME);
+
cluster.schemaChangeIgnoringStoppedInstances(alterKeyspaceStatement(expandedDatacenters));
+
+ // make sure that all sstables have moved to repaired by
triggering a compaction
+ logger.debug("Repair system_auth to make sure role is replicated
everywhere");
+ cluster.get(1).runOnInstance(() -> {
+ try
+ {
+ Condition await = newOneTimeCondition();
+
StorageService.instance.repair(SchemaConstants.AUTH_KEYSPACE_NAME,
Collections.emptyMap(), ImmutableList.of((tag, event) -> {
+ if (event.getType() == ProgressEventType.COMPLETE)
+ await.signalAll();
+ })).right.get();
+ await.await(1L, MINUTES);
+ }
+ catch (Exception e)
+ {
+ fail("Unexpected exception: " + e);
+ }
+ });
+
+ logger.debug("Check the role is now replicated as expected after
repairing");
+ assertRolePresent(cluster.get(1));
+ assertRolePresent(cluster.get(2));
+
+ // Make sure we cannot remove either of the active datacenters
+ logger.debug("Verify that neither active datacenter can be ALTER
KEYSPACEd away");
+ assertQueryThrowsConfigurationException(cluster,
alterKeyspaceStatement("'dc1': '1'"));
+ assertQueryThrowsConfigurationException(cluster,
alterKeyspaceStatement("'dc2': '1'"));
+
+ logger.debug("Starting to decomission dc2");
+
cluster.schemaChangeIgnoringStoppedInstances(alterKeyspaceStatement(beforeDecommissionedDatacenters));
+
+ // Forcibly shutdown and have node2 evicted by FD
+ logger.debug("Force shutdown node2");
+ String node2hostId = cluster.get(2).callOnInstance(() ->
StorageService.instance.getLocalHostId());
+ cluster.get(2).shutdown(false);
+
+ logger.debug("removeNode node2");
+ cluster.get(1).runOnInstance(() -> {
+ UUID hostId = UUID.fromString(node2hostId);
+ InetAddressAndPort endpoint =
StorageService.instance.getEndpointForHostId(hostId);
+ FailureDetector.instance.forceConviction(endpoint);
+ StorageService.instance.removeNode(node2hostId);
+ });
+
+ logger.debug("Remove replication to decomissioned dc2");
+
cluster.schemaChangeIgnoringStoppedInstances(alterKeyspaceStatement(afterDecommissionedDatacenters));
+ }
+ }
+
+ @Test
+ public void explicitDCTest() throws Throwable
+ {
+ String initialDatacenters = "'dc1': '1'";
+ String expandedDatacenters = "'dc1': '1', 'dc2': '1'";
+ String beforeDecommissionedDatacenters = "'dc1': '1', 'dc2': '1'";
+ String afterDecommissionedDatacenters = "'dc1': '1'";
+ validateExpandAndContract(initialDatacenters, expandedDatacenters,
beforeDecommissionedDatacenters, afterDecommissionedDatacenters);
+ }
+
+ @Test
+ public void replicaFactorTest() throws Throwable
+ {
+ String initialDatacenters = "'replication_factor': '1'";
+ String expandedDatacenters = "'replication_factor': '1'";
+ String beforeDecommissionedDatacenters = "'replication_factor': '1',
'dc2': '1'";
+ String afterDecommissionedDatacenters = "'dc1': '1'";
+ validateExpandAndContract(initialDatacenters, expandedDatacenters,
beforeDecommissionedDatacenters, afterDecommissionedDatacenters);
+ }
+}
\ No newline at end of file
diff --git
a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterNTSTest.java
b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterNTSTest.java
index 4cc95e1..e8f9842 100644
---
a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterNTSTest.java
+++
b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterNTSTest.java
@@ -19,12 +19,21 @@
package org.apache.cassandra.cql3.validation.operations;
import java.util.List;
+import java.util.UUID;
import org.junit.Test;
import com.datastax.driver.core.PreparedStatement;
+import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.exceptions.ConfigurationException;
+import org.apache.cassandra.locator.IEndpointSnitch;
+import org.apache.cassandra.locator.InetAddressAndPort;
+import org.apache.cassandra.locator.Replica;
+import org.apache.cassandra.locator.ReplicaCollection;
+import org.apache.cassandra.schema.SchemaConstants;
import org.apache.cassandra.service.ClientWarn;
+import org.apache.cassandra.service.StorageService;
import org.assertj.core.api.Assertions;
import static org.junit.Assert.assertEquals;
@@ -100,4 +109,43 @@ public class AlterNTSTest extends CQLTester
warnings = ClientWarn.instance.getWarnings();
assertNull(warnings);
}
+
+ @Test
+ public void
testAlterKeyspaceSystem_AuthWithNTSOnlyAcceptsConfiguredDataCenterNames()
throws Throwable
+ {
+ requireAuthentication();
+
+ // Add a peer
+
StorageService.instance.getTokenMetadata().updateHostId(UUID.randomUUID(),
InetAddressAndPort.getByName("127.0.0.2"));
+
+ // Register an Endpoint snitch which returns fixed value for data
center.
+ DatabaseDescriptor.setEndpointSnitch(new IEndpointSnitch()
+ {
+ public String getRack(InetAddressAndPort endpoint) { return RACK1;
}
+ public String getDatacenter(InetAddressAndPort endpoint)
+ {
+
if(endpoint.getHostAddress(false).equalsIgnoreCase("127.0.0.2"))
+ return "datacenter2";
+ return DATA_CENTER;
+ }
+ public <C extends ReplicaCollection<? extends C>> C
sortedByProximity(InetAddressAndPort address, C addresses)
+ {
+ return null;
+ }
+
+ public int compareEndpoints(InetAddressAndPort target, Replica r1,
Replica r2)
+ {
+ return 0;
+ }
+
+ // NOOP
+ public void gossiperStarting() { }
+
+ public boolean isWorthMergingForRangeQuery(ReplicaCollection<?>
merged, ReplicaCollection<?> l1, ReplicaCollection<?> l2) { return false; }
+ });
+
+ // try modifying the system_auth keyspace without second DC which has
active node.
+ assertInvalidThrow(ConfigurationException.class, "ALTER KEYSPACE
system_auth WITH replication = { 'class' : 'NetworkTopologyStrategy', '" +
DATA_CENTER + "' : 2 }");
+ execute("ALTER KEYSPACE " + SchemaConstants.AUTH_KEYSPACE_NAME + "
WITH replication = {'class' : 'NetworkTopologyStrategy', '" + DATA_CENTER + "'
: 1 , '" + DATA_CENTER_REMOTE + "' : 1 }");
+ }
}
diff --git
a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java
b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java
index ee8957d..2b8fc0a 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java
@@ -22,12 +22,6 @@ import java.util.UUID;
import org.junit.Test;
import org.apache.cassandra.config.DatabaseDescriptor;
-import org.apache.cassandra.dht.OrderPreservingPartitioner;
-import org.apache.cassandra.locator.InetAddressAndPort;
-import org.apache.cassandra.locator.TokenMetadata;
-import org.apache.cassandra.config.DatabaseDescriptor;
-import org.apache.cassandra.schema.SchemaConstants;
-import org.apache.cassandra.schema.SchemaKeyspaceTables;
import org.apache.cassandra.cql3.CQLTester;
import org.apache.cassandra.db.ColumnFamilyStore;
import org.apache.cassandra.db.Keyspace;
@@ -38,6 +32,7 @@ import org.apache.cassandra.exceptions.SyntaxException;
import org.apache.cassandra.locator.InetAddressAndPort;
import org.apache.cassandra.locator.TokenMetadata;
import org.apache.cassandra.schema.SchemaConstants;
+import org.apache.cassandra.schema.SchemaKeyspaceTables;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.utils.FBUtilities;
@@ -284,6 +279,14 @@ public class AlterTest extends CQLTester
assertRowsIgnoringOrderAndExtra(execute("SELECT keyspace_name,
durable_writes, replication FROM system_schema.keyspaces"),
row(KEYSPACE, true, map("class",
"org.apache.cassandra.locator.SimpleStrategy", "replication_factor", "1")),
row(KEYSPACE_PER_TEST, true,
map("class", "org.apache.cassandra.locator.SimpleStrategy",
"replication_factor", "1")),
+ row(ks1, true, map("class",
"org.apache.cassandra.locator.NetworkTopologyStrategy", DATA_CENTER, "0",
DATA_CENTER_REMOTE, "3")));
+
+ schemaChange("ALTER KEYSPACE " + ks1 + " WITH replication = { 'class'
: 'NetworkTopologyStrategy', '" + DATA_CENTER_REMOTE + "': 3 }");
+
+ // Removal is a two-step process as the "0" filter has been removed
from NTS.prepareOptions
+ assertRowsIgnoringOrderAndExtra(execute("SELECT keyspace_name,
durable_writes, replication FROM system_schema.keyspaces"),
+ row(KEYSPACE, true, map("class",
"org.apache.cassandra.locator.SimpleStrategy", "replication_factor", "1")),
+ row(KEYSPACE_PER_TEST, true,
map("class", "org.apache.cassandra.locator.SimpleStrategy",
"replication_factor", "1")),
row(ks1, true, map("class",
"org.apache.cassandra.locator.NetworkTopologyStrategy", DATA_CENTER_REMOTE,
"3")));
// The auto-expansion should not change existing replication counts;
do not let the user shoot themselves in the foot
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]