jaydeepkumar1984 commented on code in PR #4455: URL: https://github.com/apache/cassandra/pull/4455#discussion_r2515957363
########## test/distributed/org/apache/cassandra/distributed/test/repair/AutoRepairOrphanCleanupTest.java: ########## @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.distributed.test.repair; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import com.google.common.collect.ImmutableMap; + +import org.apache.cassandra.service.StorageService; +import org.apache.cassandra.utils.FBUtilities; + +import org.apache.cassandra.Util; +import org.apache.cassandra.distributed.Cluster; +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.TokenSupplier; +import org.apache.cassandra.distributed.test.TestBaseImpl; +import org.apache.cassandra.repair.autorepair.AutoRepair; +import org.apache.cassandra.repair.autorepair.AutoRepairConfig; +import org.apache.cassandra.service.AutoRepairService; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.apache.cassandra.schema.SchemaConstants.DISTRIBUTED_KEYSPACE_NAME; +import static org.apache.cassandra.schema.SystemDistributedKeyspace.AUTO_REPAIR_HISTORY; +import static org.junit.Assert.assertEquals; + +/** + * Test that verifies orphan nodes are cleaned up from auto_repair_history even when repairs + * are skipped due to min_repair_interval constraints. + */ +public class AutoRepairOrphanCleanupTest extends TestBaseImpl +{ + private static Cluster cluster; + + @BeforeClass + public static void init() throws IOException + { + // Configure a 3-node cluster with auto_repair enabled but with a very high min_repair_interval + // This ensures that when we test, repairs will be skipped due to "too soon to repair" + cluster = Cluster.build(3) + .withTokenCount(4) + .withTokenSupplier(TokenSupplier.evenlyDistributedTokens(3, 4)) + .withConfig(config -> config + .set("num_tokens", 4) + .set("auto_repair", + ImmutableMap.of( + "repair_check_interval", "1s", + "repair_type_overrides", + ImmutableMap.of(AutoRepairConfig.RepairType.FULL.getConfigName(), + ImmutableMap.builder() + .put("initial_scheduler_delay", "0s") + .put("enabled", "true") + // Set very high min_repair_interval + // to ensure repairs are skipped + .put("min_repair_interval", "24h") + .put("allow_parallel_replica_repair", "true") + .put("repair_by_keyspace", "true") + .build()))) + .set("auto_repair.enabled", "true")) + .start(); + } + + @AfterClass + public static void tearDown() + { + cluster.close(); + } + + @Test + public void testOrphanNodeCleanupWhenRepairSkipped() + { + // Insert 3 auto-repair records for each live node and 1 for the orphan node + List<UUID> liveHostIds = new ArrayList<>(); + for (int i = 1; i <= 3; i++) + { + liveHostIds.add( + cluster.get(i).callOnInstance(() -> + StorageService.instance.getHostIdForEndpoint( + FBUtilities.getBroadcastAddressAndPort()))); + } + UUID orphanHostId = UUID.randomUUID(); + + long currentTime = System.currentTimeMillis(); + // Orphan node: oldest finish time, so it is next in line to run repair + long orphanStart = currentTime - TimeUnit.HOURS.toMillis(4); // 4 hours ago + long orphanFinish = currentTime - TimeUnit.HOURS.toMillis(3); // 3 hours ago + + // Live nodes: more recent finish times + long[] liveStart = { + currentTime - TimeUnit.HOURS.toMillis(3), // 3 hours ago + currentTime - TimeUnit.HOURS.toMillis(2), // 2 hours ago + currentTime - TimeUnit.HOURS.toMillis(1) // 1 hour ago + }; + long[] liveFinish = { + currentTime - TimeUnit.HOURS.toMillis(2), // 2 hours ago + currentTime - TimeUnit.HOURS.toMillis(1), // 1 hour ago + currentTime // now + }; + + // Insert live node records + for (int i = 0; i < 3; i++) + { + cluster.coordinator(1).execute(String.format( + "INSERT INTO %s.%s (repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn) " + + "VALUES ('%s', %s, %d, %d, '%s')", + DISTRIBUTED_KEYSPACE_NAME, + AUTO_REPAIR_HISTORY, + AutoRepairConfig.RepairType.FULL, + liveHostIds.get(i), + liveStart[i], + liveFinish[i], + "NOT_MY_TURN" + ), ConsistencyLevel.QUORUM); + } + + // Insert orphan node record (should be next in line to run repair) + cluster.coordinator(1).execute(String.format( + "INSERT INTO %s.%s (repair_type, host_id, repair_start_ts, repair_finish_ts, repair_turn) " + + "VALUES ('%s', %s, %d, %d, '%s')", + DISTRIBUTED_KEYSPACE_NAME, + AUTO_REPAIR_HISTORY, + AutoRepairConfig.RepairType.FULL, + orphanHostId, + orphanStart, + orphanFinish, + "NOT_MY_TURN" + ), ConsistencyLevel.QUORUM); + Review Comment: We should validate the orphan node and the three live nodes entries by doing a SELECT query before starting the auto repair. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]

