[jira] [Commented] (CASSANDRA-15160) Add flag to ignore unreplicated keyspaces during repair

David Capwell (Jira) Thu, 10 Sep 2020 11:36:10 -0700


    [ 
https://issues.apache.org/jira/browse/CASSANDRA-15160?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17193787#comment-17193787
 ]


David Capwell commented on CASSANDRA-15160:
-------------------------------------------

left comments on the commits, but I am unable to replicate the defined behavior 
in 3.0; ran the below tests in 3.0 and the unreplicated case all failed for me.

{code}
package org.apache.cassandra.distributed.test;

import java.io.IOException;

import org.junit.Test;

import org.apache.cassandra.distributed.Cluster;
import org.apache.cassandra.distributed.api.ConsistencyLevel;
import org.apache.cassandra.distributed.api.IInvokableInstance;
import org.assertj.core.api.Assertions;

public class RepairFilteringTest extends TestBaseImpl
{
    @Test
    public void dcFilterOnEmptyDC() throws IOException
    {
        try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
        {
            // 1-2 : datacenter1
            // 3-4 : datacenter2
            cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH 
replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 
'datacenter2':0}");
            cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int 
PRIMARY KEY, i int)");
            for (int i = 0; i < 10; i++)
                cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + 
".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
            cluster.forEach(i -> i.flush(KEYSPACE));

            // choose a node in the DC that doesn't have any replicas
            IInvokableInstance node = cluster.get(3);
            
Assertions.assertThat(node.config().localDatacenter()).isEqualTo("datacenter2");
            // fails with "the local data center must be part of the repair"
            node.nodetoolResult("repair", "-full", "-dc", "datacenter1", "-st", 
"0", "-et", "1000", KEYSPACE, "tbl")
                   .asserts().failure().errorContains("the local data center 
must be part of the repair");
        }
    }

    @Test
    public void hostFilterDifferentDC() throws IOException
    {
        try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
        {
            // 1-2 : datacenter1
            // 3-4 : datacenter2
            cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH 
replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 
'datacenter2':0}");
            cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int 
PRIMARY KEY, i int)");
            for (int i = 0; i < 10; i++)
                cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + 
".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
            cluster.forEach(i -> i.flush(KEYSPACE));

            // choose a node in the DC that doesn't have any replicas
            IInvokableInstance node = cluster.get(3);
            
Assertions.assertThat(node.config().localDatacenter()).isEqualTo("datacenter2");
            // fails with "Specified hosts [127.0.0.3, 127.0.0.1] do not share 
range (0,1000] needed for repair. Either restrict repair ranges with -st/-et 
options, or specify one of the neighbors that share this range with this node: 
[].. Check the logs on the repair participants for further details"
            node.nodetoolResult("repair", "-full",
                                "-hosts", 
cluster.get(1).broadcastAddress().getAddress().getHostAddress(),
                                "-hosts", 
node.broadcastAddress().getAddress().getHostAddress(),
                                "-st", "0", "-et", "1000", KEYSPACE, "tbl")
                .asserts().failure().errorContains("do not share range (0,1000] 
needed for repair");
        }
    }

    @Test
    public void emptyDC() throws IOException
    {
        try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
        {
            // 1-2 : datacenter1
            // 3-4 : datacenter2
            cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH 
replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 
'datacenter2':0}");
            cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int 
PRIMARY KEY, i int)");
            for (int i = 0; i < 10; i++)
                cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + 
".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
            cluster.forEach(i -> i.flush(KEYSPACE));

            // choose a node in the DC that doesn't have any replicas
            IInvokableInstance node = cluster.get(3);
            
Assertions.assertThat(node.config().localDatacenter()).isEqualTo("datacenter2");
            // fails with [2020-09-10 11:30:04,139] Repair command #1 failed 
with error Nothing to repair for (0,1000] in distributed_test_keyspace - 
aborting. Check the logs on the repair participants for further details
            node.nodetoolResult("repair", "-full",
                                "-st", "0", "-et", "1000", KEYSPACE, "tbl")
                .asserts().failure().errorContains("Nothing to repair for 
(0,1000] in " + KEYSPACE);
        }
    }

    @Test
    public void mainDC() throws IOException
    {
        try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
        {
            // 1-2 : datacenter1
            // 3-4 : datacenter2
            cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH 
replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 
'datacenter2':0}");
            cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int 
PRIMARY KEY, i int)");
            for (int i = 0; i < 10; i++)
                cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + 
".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
            cluster.forEach(i -> i.flush(KEYSPACE));

            // choose a node in the DC that doesn't have any replicas
            IInvokableInstance node = cluster.get(1);
            
Assertions.assertThat(node.config().localDatacenter()).isEqualTo("datacenter1");
            node.nodetoolResult("repair", "-full",
                                "-st", "0", "-et", "1000", KEYSPACE, "tbl")
                .asserts().success();
        }
    }
}
{code}

> Add flag to ignore unreplicated keyspaces during repair
> -------------------------------------------------------
>
>                 Key: CASSANDRA-15160
>                 URL: https://issues.apache.org/jira/browse/CASSANDRA-15160
>             Project: Cassandra
>          Issue Type: Improvement
>          Components: Consistency/Repair
>            Reporter: Marcus Eriksson
>            Assignee: Marcus Eriksson
>            Priority: Normal
>
> When a repair is triggered on a node in 'dc2' for a keyspace with replication 
> factor {'dc1':3, 'dc2':0} we just ignore the repair in versions < 4. In 4.0 
> we fail the repair to make sure the operator does not think the keyspace is 
> fully repaired.
> There might be tooling that relies on the old behaviour though, so we should 
> add a flag to ignore those unreplicated keyspaces
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[jira] [Commented] (CASSANDRA-15160) Add flag to ignore unreplicated keyspaces during repair

Reply via email to