This is an automated email from the ASF dual-hosted git repository.
paulo pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push:
new afa55123c8 Fix initial auto-repairs skipped by too soon check
afa55123c8 is described below
commit afa55123c87be7fd31a68abd87c3427141fe60c0
Author: Paulo Motta <[email protected]>
AuthorDate: Tue Jan 13 17:11:18 2026 -0500
Fix initial auto-repairs skipped by too soon check
patch by Paulo Motta; reviewed by Jaydeepkumar Chovatia for CASSANDRA-21115
---
CHANGES.txt | 1 +
.../cassandra/repair/autorepair/AutoRepair.java | 30 ++++++---
.../repair/autorepair/AutoRepairState.java | 22 +++++--
.../repair/autorepair/AutoRepairUtils.java | 31 ++++++++-
.../autorepair/AutoRepairParameterizedTest.java | 36 +++++------
.../repair/autorepair/AutoRepairStateTest.java | 8 +--
.../repair/autorepair/AutoRepairTest.java | 75 ++++++++++++++++++++++
.../repair/autorepair/AutoRepairUtilsTest.java | 24 ++++++-
8 files changed, 186 insertions(+), 41 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 56e6b8dada..90cbf6d684 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
5.1
+ * Fix initial auto-repairs skipped by too soon check (CASSANDRA-21115)
* Add configuration to disk usage guardrails to stop writes across all
replicas of a keyspace when any node replicating that keyspace exceeds the disk
usage failure threshold. (CASSANDRA-21024)
* BETWEEN where token(Y) > token(Z) returns wrong answer (CASSANDRA-20154)
* Optimize memtable flush logic (CASSANDRA-21083)
diff --git a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java
b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java
index 87d342e443..967fe540eb 100644
--- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java
+++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java
@@ -209,7 +209,8 @@ public class AutoRepair
long startTimeInMillis = timeFunc.get();
logger.info("My host id: {}, my turn to run repair...repair
primary-ranges only? {}", myId,
config.getRepairPrimaryTokenRangeOnly(repairType));
- AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId,
timeFunc.get(), turn);
+ AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId,
startTimeInMillis, turn);
+ repairState.setLastRepairStartTime(startTimeInMillis);
repairState.setRepairKeyspaceCount(0);
repairState.setRepairInProgress(true);
@@ -402,20 +403,31 @@ public class AutoRepair
}
}
- private boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType,
AutoRepairState repairState, AutoRepairConfig config, UUID myId)
+ @VisibleForTesting
+ boolean tooSoonToRunRepair(AutoRepairConfig.RepairType repairType,
AutoRepairState repairState, AutoRepairConfig config, UUID myId)
{
- if (repairState.getLastRepairTime() == 0)
+ if (repairState.getLastRepairFinishTime() == 0)
{
- // the node has either just boooted or has not run repair before,
+ // the node has either just booted or has not run repair before,
// we should check for the node's repair history in the DB
-
repairState.setLastRepairTime(AutoRepairUtils.getLastRepairTimeForNode(repairType,
myId));
+
repairState.setLastRepairFinishTime(AutoRepairUtils.getLastRepairFinishTimeForNode(repairType,
myId));
+
repairState.setLastRepairStartTime(AutoRepairUtils.getLastRepairStartTimeForNode(repairType,
myId));
}
+
+ // If repair has not completed (start >= finish), don't skip - allow
it to continue/resume
+ if (repairState.getLastRepairStartTime() >=
repairState.getLastRepairFinishTime())
+ {
+ logger.info("Incomplete or unstarted repair detected (start_ts={}
>= finish_ts={}), allowing resume",
+ repairState.getLastRepairStartTime(),
repairState.getLastRepairFinishTime());
+ return false;
+ }
+
/*
* check if it is too soon to run repair. one of the reason we
* should not run frequent repair is that repair triggers
* memtable flush
*/
- long timeElapsedSinceLastRepair =
TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() -
repairState.getLastRepairTime());
+ long timeElapsedSinceLastRepair =
TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() -
repairState.getLastRepairFinishTime());
if (timeElapsedSinceLastRepair <
config.getRepairMinInterval(repairType).toSeconds())
{
logger.info("Too soon to run repair, last repair was done {}
seconds ago",
@@ -497,14 +509,14 @@ public class AutoRepair
"repairTokenRangesSkipCount {}, repairTablesSkipCount {}",
repairType, timeInHours, repairState.getRepairKeyspaceCount(),
repairState.getSucceededTokenRangesCount(),
repairState.getFailedTokenRangesCount(),
repairState.getSkippedTokenRangesCount(),
repairState.getSkippedTablesCount());
- if (repairState.getLastRepairTime() != 0)
+ if (repairState.getLastRepairFinishTime() != 0)
{
repairState.setClusterRepairTimeInSec((int)
TimeUnit.MILLISECONDS.toSeconds(timeFunc.get() -
-
repairState.getLastRepairTime()));
+
repairState.getLastRepairFinishTime()));
logger.info("Cluster repair time for repair type {}: {} day(s)",
repairType,
TimeUnit.SECONDS.toDays(repairState.getClusterRepairTimeInSec()));
}
- repairState.setLastRepairTime(timeFunc.get());
+ repairState.setLastRepairFinishTime(timeFunc.get());
repairState.setRepairInProgress(false);
AutoRepairUtils.updateFinishAutoRepairHistory(repairType, myId,
timeFunc.get());
diff --git
a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java
b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java
index 6e7a19bc12..878f13e05a 100644
--- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java
+++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairState.java
@@ -64,7 +64,9 @@ public abstract class AutoRepairState
@VisibleForTesting
protected int totalTablesConsideredForRepair = 0;
@VisibleForTesting
- protected long lastRepairTimeInMs;
+ protected long lastRepairFinishTimeInMs;
+ @VisibleForTesting
+ protected long lastRepairStartTimeInMs;
@VisibleForTesting
protected int nodeRepairTimeInSec = 0;
@VisibleForTesting
@@ -120,9 +122,9 @@ public abstract class AutoRepairState
setTotalKeyspaceRepairPlansToRepair(repairPlans.stream().mapToInt(repairPlan ->
repairPlan.getKeyspaceRepairPlans().size()).sum());
}
- public long getLastRepairTime()
+ public long getLastRepairFinishTime()
{
- return lastRepairTimeInMs;
+ return lastRepairFinishTimeInMs;
}
public void setTotalTablesConsideredForRepair(int count)
@@ -135,9 +137,19 @@ public abstract class AutoRepairState
return totalTablesConsideredForRepair;
}
- public void setLastRepairTime(long lastRepairTime)
+ public void setLastRepairFinishTime(long lastRepairFinishTime)
+ {
+ lastRepairFinishTimeInMs = lastRepairFinishTime;
+ }
+
+ public long getLastRepairStartTime()
+ {
+ return lastRepairStartTimeInMs;
+ }
+
+ public void setLastRepairStartTime(long lastRepairStartTime)
{
- lastRepairTimeInMs = lastRepairTime;
+ lastRepairStartTimeInMs = lastRepairStartTime;
}
public int getClusterRepairTimeInSec()
diff --git
a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java
b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java
index 2574e5cfa8..d1a10f9d7e 100644
--- a/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java
+++ b/src/java/org/apache/cassandra/repair/autorepair/AutoRepairUtils.java
@@ -177,11 +177,16 @@ public class AutoRepairUtils
"SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?", COL_REPAIR_FINISH_TS,
SchemaConstants.DISTRIBUTED_KEYSPACE_NAME,
SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE,
COL_HOST_ID);
+ final static String SELECT_LAST_REPAIR_START_TIME_FOR_NODE = String.format(
+ "SELECT %s FROM %s.%s WHERE %s = ? AND %s = ?", COL_REPAIR_START_TS,
SchemaConstants.DISTRIBUTED_KEYSPACE_NAME,
+ SystemDistributedKeyspace.AUTO_REPAIR_HISTORY, COL_REPAIR_TYPE,
COL_HOST_ID);
+
static ModificationStatement delStatementRepairHistory;
static SelectStatement selectStatementRepairHistory;
static ModificationStatement delStatementPriorityStatus;
static SelectStatement selectStatementRepairPriority;
static SelectStatement selectLastRepairTimeForNode;
+ static SelectStatement selectLastRepairStartTimeForNode;
static ModificationStatement addPriorityHost;
static ModificationStatement insertNewRepairHistoryStatement;
static ModificationStatement recordStartRepairHistoryStatement;
@@ -207,6 +212,8 @@ public class AutoRepairUtils
.forInternalCalls());
selectLastRepairTimeForNode = (SelectStatement)
QueryProcessor.getStatement(SELECT_LAST_REPAIR_TIME_FOR_NODE, ClientState
.forInternalCalls());
+ selectLastRepairStartTimeForNode = (SelectStatement)
QueryProcessor.getStatement(SELECT_LAST_REPAIR_START_TIME_FOR_NODE, ClientState
+
.forInternalCalls());
delStatementPriorityStatus = (ModificationStatement)
QueryProcessor.getStatement(DEL_REPAIR_PRIORITY, ClientState
.forInternalCalls());
addPriorityHost = (ModificationStatement)
QueryProcessor.getStatement(ADD_PRIORITY_HOST, ClientState
@@ -382,7 +389,8 @@ public class AutoRepairUtils
// this function will be called when a node bootstrap finished
UUID hostId =
StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort());
// insert the data first
- insertNewRepairHistory(repairType, currentTimeMillis(),
currentTimeMillis());
+ long timestamp = currentTimeMillis();
+ insertNewRepairHistory(repairType, timestamp, timestamp);
setForceRepair(repairType, hostId);
}
@@ -407,7 +415,7 @@ public class AutoRepairUtils
logger.info("Set force repair repair type: {}, node: {}", repairType,
hostId);
}
- public static long getLastRepairTimeForNode(RepairType repairType, UUID
hostId)
+ public static long getLastRepairFinishTimeForNode(RepairType repairType,
UUID hostId)
{
ResultMessage.Rows rows =
selectLastRepairTimeForNode.execute(QueryState.forInternalCalls(),
QueryOptions.forInternalCalls(internalQueryCL,
@@ -423,6 +431,22 @@ public class AutoRepairUtils
return repairTime.one().getLong(COL_REPAIR_FINISH_TS);
}
+ public static long getLastRepairStartTimeForNode(RepairType repairType,
UUID hostId)
+ {
+ ResultMessage.Rows rows =
selectLastRepairStartTimeForNode.execute(QueryState.forInternalCalls(),
+
QueryOptions.forInternalCalls(internalQueryCL,
+
Lists.newArrayList(
+
ByteBufferUtil.bytes(repairType.toString()),
+
ByteBufferUtil.bytes(hostId))),
+
Dispatcher.RequestTime.forImmediateExecution());
+ UntypedResultSet repairTime = UntypedResultSet.create(rows.result);
+ if (repairTime.isEmpty())
+ {
+ return 0;
+ }
+ return repairTime.one().getLong(COL_REPAIR_START_TS);
+ }
+
@VisibleForTesting
public static CurrentRepairStatus getCurrentRepairStatus(RepairType
repairType, List<AutoRepairHistory> autoRepairHistories, UUID myId)
{
@@ -840,7 +864,8 @@ public class AutoRepairUtils
if (!autoRepairHistoryIds.contains(hostId))
{
logger.info("{} for repair type {} doesn't exist in the
auto repair history table, insert a new record.", repairType, hostId);
- insertNewRepairHistory(repairType, hostId,
currentTimeMillis(), currentTimeMillis());
+ long timestamp = currentTimeMillis();
+ insertNewRepairHistory(repairType, hostId, timestamp,
timestamp);
}
}
diff --git
a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java
b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java
index 485257c16e..9eae6b10e5 100644
---
a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java
+++
b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairParameterizedTest.java
@@ -249,7 +249,7 @@ public class AutoRepairParameterizedTest extends CQLTester
AutoRepair.instance.repair(repairType);
assertEquals(0,
AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair());
assertEquals(0,
AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue());
- long lastRepairTime =
AutoRepair.instance.repairStates.get(repairType).getLastRepairTime();
+ long lastRepairTime =
AutoRepair.instance.repairStates.get(repairType).getLastRepairFinishTime();
//if repair was done then lastRepairTime should be non-zero
Assert.assertTrue(String.format("Expected lastRepairTime > 0, actual
value lastRepairTime %d",
lastRepairTime), lastRepairTime > 0);
@@ -264,7 +264,7 @@ public class AutoRepairParameterizedTest extends CQLTester
//in the first round let repair run
config.setRepairMinInterval(repairType, "0s");
AutoRepair.instance.repair(repairType);
- long lastRepairTime1 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairTime();
+ long lastRepairTime1 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairFinishTime();
int consideredTables =
AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair();
Assert.assertNotEquals(String.format("Expected total repaired tables >
0, actual value %s ", consideredTables),
0, consideredTables);
@@ -272,7 +272,7 @@ public class AutoRepairParameterizedTest extends CQLTester
//if repair was done in last 24 hours then it should not trigger
another repair
config.setRepairMinInterval(repairType, "24h");
AutoRepair.instance.repair(repairType);
- long lastRepairTime2 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairTime();
+ long lastRepairTime2 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairFinishTime();
Assert.assertEquals(String.format("Expected repair time to be same,
actual value lastRepairTime1 %d, " +
"lastRepairTime2 %d",
lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2);
assertEquals(0,
AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair());
@@ -287,14 +287,14 @@ public class AutoRepairParameterizedTest extends CQLTester
long prevCount = state.getTotalMVTablesConsideredForRepair();
AutoRepairService.instance.getAutoRepairConfig().setRepairMinInterval(repairType,
"0s");
AutoRepair.instance.repair(repairType);
- long lastRepairTime1 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairTime();
+ long lastRepairTime1 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairFinishTime();
Assert.assertTrue(String.format("Expected lastRepairTime1 > 0, actual
value lastRepairTime1 %d",
lastRepairTime1), lastRepairTime1 > 0);
UUID myId =
StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort());
Assert.assertNotEquals("Expected my turn for the repair",
NOT_MY_TURN,
AutoRepairUtils.myTurnToRunRepair(repairType, myId));
AutoRepair.instance.repair(repairType);
- long lastRepairTime2 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairTime();
+ long lastRepairTime2 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairFinishTime();
Assert.assertNotSame(String.format("Expected repair time to be same,
actual value lastRepairTime1 %d, " +
"lastRepairTime2 %d",
lastRepairTime1, lastRepairTime2), lastRepairTime1, lastRepairTime2);
assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair());
@@ -330,9 +330,9 @@ public class AutoRepairParameterizedTest extends CQLTester
long prevCount = state.getTotalMVTablesConsideredForRepair();
config.setRepairMinInterval(repairType, "0s");
config.setAutoRepairEnabled(repairType, false);
- long lastRepairTime1 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairTime();
+ long lastRepairTime1 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairFinishTime();
AutoRepair.instance.repair(repairType);
- long lastRepairTime2 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairTime();
+ long lastRepairTime2 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairFinishTime();
//Since repair has not happened, both the last repair times should be
same
Assert.assertEquals(String.format("Expected lastRepairTime1 %d, and
lastRepairTime2 %d to be same",
lastRepairTime1, lastRepairTime2),
lastRepairTime1, lastRepairTime2);
@@ -340,7 +340,7 @@ public class AutoRepairParameterizedTest extends CQLTester
config.setAutoRepairEnabled(repairType, true);
AutoRepair.instance.repair(repairType);
//since repair is done now, so lastRepairTime1/lastRepairTime2 and
lastRepairTime3 should not be same
- long lastRepairTime3 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairTime();
+ long lastRepairTime3 =
AutoRepair.instance.repairStates.get(repairType).getLastRepairFinishTime();
Assert.assertNotSame(String.format("Expected lastRepairTime1 %d, and
lastRepairTime3 %d to be not same",
lastRepairTime1, lastRepairTime2),
lastRepairTime1, lastRepairTime3);
assertEquals(prevCount, state.getTotalMVTablesConsideredForRepair());
@@ -376,19 +376,19 @@ public class AutoRepairParameterizedTest extends CQLTester
AutoRepairConfig config =
AutoRepairService.instance.getAutoRepairConfig();
config.setMaterializedViewRepairEnabled(repairType, true);
config.setRepairMinInterval(repairType, "0s");
-
AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis());
+
AutoRepair.instance.repairStates.get(repairType).setLastRepairFinishTime(System.currentTimeMillis());
AutoRepair.instance.repair(repairType);
assertEquals(1,
AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair());
assertEquals(1,
AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue());
config.setMaterializedViewRepairEnabled(repairType, false);
-
AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis());
+
AutoRepair.instance.repairStates.get(repairType).setLastRepairFinishTime(System.currentTimeMillis());
AutoRepair.instance.repair(repairType);
assertEquals(0,
AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair());
assertEquals(0,
AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue());
config.setMaterializedViewRepairEnabled(repairType, true);
-
AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(System.currentTimeMillis());
+
AutoRepair.instance.repairStates.get(repairType).setLastRepairFinishTime(System.currentTimeMillis());
AutoRepair.instance.repair(repairType);
assertEquals(1,
AutoRepair.instance.repairStates.get(repairType).getTotalMVTablesConsideredForRepair());
assertEquals(1,
AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue());
@@ -427,7 +427,7 @@ public class AutoRepairParameterizedTest extends CQLTester
config.setRepairSSTableCountHigherThreshold(repairType, 9);
assertEquals(0, state.getSkippedTokenRangesCount());
assertEquals(0,
AutoRepairMetricsManager.getMetrics(repairType).skippedTokenRangesCount.getValue().intValue());
- state.setLastRepairTime(0);
+ state.setLastRepairFinishTime(0);
AutoRepair.instance.repair(repairType);
assertEquals(0, state.getTotalMVTablesConsideredForRepair());
assertEquals(0,
AutoRepairMetricsManager.getMetrics(repairType).totalMVTablesConsideredForRepair.getValue().intValue());
@@ -439,7 +439,7 @@ public class AutoRepairParameterizedTest extends CQLTester
// set it to higher value, and this time, the tables should not be
skipped
config.setRepairSSTableCountHigherThreshold(repairType, beforeCount);
- state.setLastRepairTime(0);
+ state.setLastRepairFinishTime(0);
state.setSkippedTablesCount(0);
state.setTotalMVTablesConsideredForRepair(0);
AutoRepair.instance.repair(repairType);
@@ -474,7 +474,7 @@ public class AutoRepairParameterizedTest extends CQLTester
timeFuncCalls++;
return timeFuncCalls * 1000L;
};
-
AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(1000L);
+
AutoRepair.instance.repairStates.get(repairType).setLastRepairFinishTime(1000L);
AutoRepair.instance.repair(repairType);
@@ -509,7 +509,7 @@ public class AutoRepairParameterizedTest extends CQLTester
{
AutoRepairConfig config =
AutoRepairService.instance.getAutoRepairConfig();
AutoRepair.instance.repairStates.put(repairType, autoRepairState);
- when(autoRepairState.getLastRepairTime()).thenReturn((long) 0);
+ when(autoRepairState.getLastRepairFinishTime()).thenReturn((long) 0);
AtomicInteger getRepairRunnableCalls = new AtomicInteger();
AtomicReference<AutoRepair.RepairProgressListener> prevListener = new
AtomicReference<>();
doAnswer(invocation -> {
@@ -608,7 +608,7 @@ public class AutoRepairParameterizedTest extends CQLTester
config.setMaterializedViewRepairEnabled(repairType, true);
long lastRepairTime = System.currentTimeMillis() - 1000;
AutoRepairUtils.insertNewRepairHistory(repairType, 0, lastRepairTime);
- AutoRepair.instance.repairStates.get(repairType).setLastRepairTime(0);
+
AutoRepair.instance.repairStates.get(repairType).setLastRepairFinishTime(0);
config.setRepairMinInterval(repairType, "1h");
AutoRepair.instance.repair(repairType);
@@ -616,7 +616,7 @@ public class AutoRepairParameterizedTest extends CQLTester
// repair scheduler should not attempt to run repair as last repair
time in DB is current time - 1s
assertEquals(0,
AutoRepair.instance.repairStates.get(repairType).getTotalTablesConsideredForRepair());
// repair scheduler should load the repair time from the DB
- assertEquals(lastRepairTime,
AutoRepair.instance.repairStates.get(repairType).getLastRepairTime());
+ assertEquals(lastRepairTime,
AutoRepair.instance.repairStates.get(repairType).getLastRepairFinishTime());
}
@Test
@@ -841,7 +841,7 @@ public class AutoRepairParameterizedTest extends CQLTester
}
return runnable;
}).when(spyState).getRepairRunnable(Mockito.any(), Mockito.any(),
Mockito.any(), anyBoolean());
- when(spyState.getLastRepairTime()).thenReturn((long) 0);
+ when(spyState.getLastRepairFinishTime()).thenReturn((long) 0);
AutoRepairService.instance.getAutoRepairConfig().setRepairMaxRetries(repairType,
0);
AutoRepair.instance.repairStates.put(repairType, spyState);
diff --git
a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java
b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java
index 1e5503415f..305f7b2bb8 100644
--- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java
+++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairStateTest.java
@@ -82,9 +82,9 @@ public class AutoRepairStateTest extends CQLTester
public void testGetLastRepairTime()
{
AutoRepairState state = RepairType.getAutoRepairState(repairType, new
AutoRepairConfig());
- state.lastRepairTimeInMs = 1;
+ state.lastRepairFinishTimeInMs = 1;
- assertEquals(1, state.getLastRepairTime());
+ assertEquals(1, state.getLastRepairFinishTime());
}
@Test
@@ -111,9 +111,9 @@ public class AutoRepairStateTest extends CQLTester
{
AutoRepairState state = RepairType.getAutoRepairState(repairType, new
AutoRepairConfig());
- state.setLastRepairTime(1);
+ state.setLastRepairFinishTime(1);
- assertEquals(1, state.lastRepairTimeInMs);
+ assertEquals(1, state.lastRepairFinishTimeInMs);
}
@Test
diff --git
a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java
b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java
index e82a364de9..5a47ce398e 100644
--- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java
+++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairTest.java
@@ -20,6 +20,7 @@ package org.apache.cassandra.repair.autorepair;
import java.util.HashMap;
import java.util.Map;
+import java.util.UUID;
import org.junit.Assert;
import org.junit.Before;
@@ -29,17 +30,23 @@ import org.junit.Test;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.config.DurationSpec;
import org.apache.cassandra.cql3.CQLTester;
+import org.apache.cassandra.cql3.QueryProcessor;
import org.apache.cassandra.db.Keyspace;
import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.repair.autorepair.AutoRepairConfig.RepairType;
import org.apache.cassandra.schema.KeyspaceMetadata;
import org.apache.cassandra.schema.KeyspaceParams;
import org.apache.cassandra.schema.ReplicationParams;
+import org.apache.cassandra.schema.SchemaConstants;
import org.apache.cassandra.schema.SchemaTestUtil;
+import org.apache.cassandra.schema.SystemDistributedKeyspace;
import org.apache.cassandra.service.AutoRepairService;
+import org.apache.cassandra.service.StorageService;
+import org.apache.cassandra.utils.FBUtilities;
import static org.apache.cassandra.Util.setAutoRepairEnabled;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
/**
@@ -52,6 +59,8 @@ public class AutoRepairTest extends CQLTester
{
setAutoRepairEnabled(true);
requireNetwork();
+ AutoRepairUtils.setup();
+ StorageService.instance.doAutoRepairSetup();
}
@Before
@@ -161,4 +170,70 @@ public class AutoRepairTest extends CQLTester
}
}
}
+
+ @Test
+ public void testTooSoonToRunRepairAllowsResumeOfInProgressRepair()
+ {
+ RepairType repairType = RepairType.FULL;
+ UUID myId =
StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort());
+ long now = System.currentTimeMillis();
+
+ // Truncate history table to start fresh
+ QueryProcessor.executeInternal(String.format(
+ "TRUNCATE %s.%s",
+ SchemaConstants.DISTRIBUTED_KEYSPACE_NAME,
SystemDistributedKeyspace.AUTO_REPAIR_HISTORY));
+
+ // Insert with start_ts > finish_ts to simulate in-progress repair
(crashed before completion)
+ AutoRepairUtils.insertNewRepairHistory(repairType, myId, now, now -
1000);
+
+ AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig();
+ AutoRepairState repairState =
RepairType.getAutoRepairState(repairType, config);
+
+ // Even though finish_ts was very recent, should return false because
repair is in progress
+ assertFalse(AutoRepair.instance.tooSoonToRunRepair(repairType,
repairState, config, myId));
+ }
+
+ @Test
+ public void testTooSoonToRunRepairReturnsTrueWhenRepairCompletedRecently()
+ {
+ RepairType repairType = RepairType.FULL;
+ UUID myId =
StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort());
+ long now = System.currentTimeMillis();
+
+ // Truncate history table to start fresh
+ QueryProcessor.executeInternal(String.format(
+ "TRUNCATE %s.%s",
+ SchemaConstants.DISTRIBUTED_KEYSPACE_NAME,
SystemDistributedKeyspace.AUTO_REPAIR_HISTORY));
+
+ // Insert with finish_ts > start_ts to simulate completed repair
+ AutoRepairUtils.insertNewRepairHistory(repairType, myId, now - 1000,
now);
+
+ AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig();
+ AutoRepairState repairState =
RepairType.getAutoRepairState(repairType, config);
+
+ // Should return true because repair completed recently and
min_repair_interval hasn't passed
+ assertTrue(AutoRepair.instance.tooSoonToRunRepair(repairType,
repairState, config, myId));
+ }
+
+ @Test
+ public void testTooSoonToRunRepairAllowsResumeWhenStartEqualsFinish()
+ {
+ RepairType repairType = RepairType.FULL;
+ UUID myId =
StorageService.instance.getHostIdForEndpoint(FBUtilities.getBroadcastAddressAndPort());
+ long now = System.currentTimeMillis();
+
+ // Truncate history table to start fresh
+ QueryProcessor.executeInternal(String.format(
+ "TRUNCATE %s.%s",
+ SchemaConstants.DISTRIBUTED_KEYSPACE_NAME,
SystemDistributedKeyspace.AUTO_REPAIR_HISTORY));
+
+ // Insert with start_ts == finish_ts (edge case: repair never started
progressing)
+ AutoRepairUtils.insertNewRepairHistory(repairType, myId, now, now);
+
+ AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig();
+ AutoRepairState repairState =
RepairType.getAutoRepairState(repairType, config);
+
+ // Should return false because repair is incomplete (start >= finish
edge case)
+ assertFalse(AutoRepair.instance.tooSoonToRunRepair(repairType,
repairState, config, myId));
+ }
}
diff --git
a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java
b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java
index cc49f0249a..6be9382661 100644
--- a/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java
+++ b/test/unit/org/apache/cassandra/repair/autorepair/AutoRepairUtilsTest.java
@@ -521,7 +521,7 @@ public class AutoRepairUtilsTest extends CQLTester
AutoRepairUtils.insertNewRepairHistory(repairType, myID,
currentMillis, currentMillis - 100);
AutoRepairUtils.insertNewRepairHistory(repairType, otherID,
currentMillis, currentMillis + 100);
- assertEquals(currentMillis - 100,
AutoRepairUtils.getLastRepairTimeForNode(repairType, myID));
+ assertEquals(currentMillis - 100,
AutoRepairUtils.getLastRepairFinishTimeForNode(repairType, myID));
}
@Test
@@ -529,7 +529,27 @@ public class AutoRepairUtilsTest extends CQLTester
{
UUID myID = UUID.randomUUID();
- assertEquals(0, AutoRepairUtils.getLastRepairTimeForNode(repairType,
myID));
+ assertEquals(0,
AutoRepairUtils.getLastRepairFinishTimeForNode(repairType, myID));
+ }
+
+ @Test
+ public void testGetLastRepairStartTimeForNode()
+ {
+ UUID myID = UUID.randomUUID();
+ UUID otherID = UUID.randomUUID();
+ long currentMillis = System.currentTimeMillis();
+ AutoRepairUtils.insertNewRepairHistory(repairType, myID,
currentMillis, currentMillis - 100);
+ AutoRepairUtils.insertNewRepairHistory(repairType, otherID,
currentMillis + 50, currentMillis + 100);
+
+ assertEquals(currentMillis,
AutoRepairUtils.getLastRepairStartTimeForNode(repairType, myID));
+ }
+
+ @Test
+ public void testGetLastRepairStartTimeForNodeWhenHistoryIsEmpty()
+ {
+ UUID myID = UUID.randomUUID();
+
+ assertEquals(0,
AutoRepairUtils.getLastRepairStartTimeForNode(repairType, myID));
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]