kzalys commented on code in PR #3306:
URL: https://github.com/apache/cassandra/pull/3306#discussion_r1602234937


##########
src/java/org/apache/cassandra/repair/autorepair/AutoRepair.java:
##########
@@ -0,0 +1,401 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.repair.autorepair;
+
+import java.util.ArrayList;
+import java.util.EnumMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import java.util.concurrent.FutureTask;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Supplier;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableList;
+
+import org.apache.cassandra.repair.RepairRunnable;
+import org.apache.cassandra.utils.Pair;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.cassandra.concurrent.NamedThreadFactory;
+import org.apache.cassandra.concurrent.ScheduledExecutorPlus;
+import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.db.Keyspace;
+import org.apache.cassandra.dht.Range;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.exceptions.ConfigurationException;
+import org.apache.cassandra.gms.Gossiper;
+import org.apache.cassandra.schema.TableMetadata;
+import org.apache.cassandra.schema.Tables;
+import org.apache.cassandra.service.AutoRepairService;
+import org.apache.cassandra.utils.FBUtilities;
+import org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn;
+
+import static 
org.apache.cassandra.concurrent.ExecutorFactory.Global.executorFactory;
+import static 
org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN;
+import static 
org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_DUE_TO_PRIORITY;
+import static 
org.apache.cassandra.repair.autorepair.AutoRepairUtils.RepairTurn.MY_TURN_FORCE_REPAIR;
+
+// TODO: add class documentation (SO-28898)
+public class AutoRepair
+{
+    // Initial delay for repair session to start after setup
+    final static long INITIAL_REPAIR_DELAY_SEC = 30;
+
+    private static final Logger logger = 
LoggerFactory.getLogger(AutoRepair.class);
+
+    @VisibleForTesting
+    protected static Supplier<Long> timeFunc = System::currentTimeMillis;
+
+    public static AutoRepair instance = new AutoRepair();
+
+    @VisibleForTesting
+    protected final Map<AutoRepairConfig.RepairType, ScheduledExecutorPlus> 
repairExecutors;
+    @VisibleForTesting
+    protected final Map<AutoRepairConfig.RepairType, AutoRepairState> 
repairStates;
+
+    protected final Map<AutoRepairConfig.RepairType, 
IAutoRepairTokenRangeSplitter> tokenRangeSplitters = new 
EnumMap<>(AutoRepairConfig.RepairType.class);
+
+
+    @VisibleForTesting
+    protected AutoRepair()
+    {
+        AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig();
+        repairExecutors = new EnumMap<>(AutoRepairConfig.RepairType.class);
+        repairStates = new EnumMap<>(AutoRepairConfig.RepairType.class);
+        for (AutoRepairConfig.RepairType repairType : 
AutoRepairConfig.RepairType.values())
+        {
+            repairExecutors.put(repairType, executorFactory().scheduled(false, 
"AutoRepair-Repair-" + repairType, Thread.NORM_PRIORITY));
+            repairStates.put(repairType, 
AutoRepairStateFactory.getAutoRepairState(repairType));
+            tokenRangeSplitters.put(repairType, 
FBUtilities.newAutoRepairTokenRangeSplitter(config.getTokenRangeSplitter(repairType)));
+        }
+    }
+
+    public void setup()
+    {
+        verifyIsSafeToEnable();
+
+        AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig();
+        AutoRepairService.setup();
+        AutoRepairUtils.setup();
+
+        for (AutoRepairConfig.RepairType repairType : 
AutoRepairConfig.RepairType.values())
+        {
+            repairExecutors.get(repairType).scheduleWithFixedDelay(
+            () -> repair(repairType, 60000),
+            INITIAL_REPAIR_DELAY_SEC,
+            config.getRepairCheckIntervalInSec(),
+            TimeUnit.SECONDS);
+        }
+    }
+
+    @VisibleForTesting
+    protected void verifyIsSafeToEnable()
+    {
+        AutoRepairConfig config = DatabaseDescriptor.getAutoRepairConfig();
+        if 
(config.isAutoRepairEnabled(AutoRepairConfig.RepairType.incremental) &&
+            (DatabaseDescriptor.getMaterializedViewsEnabled() || 
DatabaseDescriptor.isCDCEnabled()))
+            throw new ConfigurationException("Cannot enable incremental repair 
with materialized views or CDC enabled");
+    }
+
+    // repairAsync runs a repair session of the given type asynchronously.
+    public void repairAsync(AutoRepairConfig.RepairType repairType, long 
millisToWait)
+    {
+        repairExecutors.get(repairType).submit(() -> repair(repairType, 
millisToWait));
+    }
+
+    // repair runs a repair session of the given type synchronously.
+    public void repair(AutoRepairConfig.RepairType repairType, long 
millisToWait)
+    {
+        AutoRepairConfig config = 
AutoRepairService.instance.getAutoRepairConfig();
+        if (!config.isAutoRepairEnabled(repairType))
+        {
+            logger.debug("Auto-repair is disabled for repair type {}", 
repairType);
+            return;
+        }
+
+
+        AutoRepairState repairState = repairStates.get(repairType);
+
+        try
+        {
+            String localDC = DatabaseDescriptor.getLocalDataCenter();
+            if (config.getIgnoreDCs(repairType).contains(localDC))
+            {
+                logger.info("Not running repair as this node belongs to 
datacenter {}", localDC);
+                return;
+            }
+
+            // refresh the longest unrepaired node
+            
repairState.setLongestUnrepairedNode(AutoRepairUtils.getHostWithLongestUnrepairTime(repairType));
+
+            //consistency level to use for local query
+            UUID myId = 
Gossiper.instance.getHostId(FBUtilities.getBroadcastAddressAndPort());
+            RepairTurn turn = AutoRepairUtils.myTurnToRunRepair(repairType, 
myId);
+            if (turn == MY_TURN || turn == MY_TURN_DUE_TO_PRIORITY || turn == 
MY_TURN_FORCE_REPAIR)
+            {
+                repairState.recordTurn(turn);
+                // For normal auto repair, we will use primary range only 
repairs (Repair with -pr option).
+                // For some cases, we may set the 
auto_repair_primary_token_range_only flag to false then we will do repair
+                // without -pr. We may also do force repair for certain node 
that we want to repair all the data on one node
+                // When doing force repair, we want to repair without -pr.
+                boolean primaryRangeOnly = 
config.getRepairPrimaryTokenRangeOnly(repairType)
+                                           && turn != MY_TURN_FORCE_REPAIR;
+                repairState.setTotalTablesConsideredForRepair(0);
+                if (repairState.getLastRepairTime() != 0)
+                {
+                    /** check if it is too soon to run repair. one of the 
reason we
+                     * should not run frequent repair is because repair 
triggers
+                     * memtable flush
+                     */
+                    long timeElapsedSinceLastRepairInHours = 
TimeUnit.MILLISECONDS.toHours(timeFunc.get() - repairState.getLastRepairTime());
+                    if (timeElapsedSinceLastRepairInHours < 
config.getRepairMinIntervalInHours(repairType))
+                    {
+                        logger.info("Too soon to run repair, last repair was 
done {} hour(s) ago",
+                                    timeElapsedSinceLastRepairInHours);
+                        return;
+                    }
+                }
+
+                long startTime = timeFunc.get();
+                logger.info("My host id: {}, my turn to run repair...repair 
primary-ranges only? {}", myId,
+                            config.getRepairPrimaryTokenRangeOnly(repairType));
+                AutoRepairUtils.updateStartAutoRepairHistory(repairType, myId, 
timeFunc.get(), turn);
+
+                repairState.setRepairKeyspaceCount(0);
+                repairState.setRepairTableSuccessCount(0);
+                repairState.setRepairFailedTablesCount(0);
+                repairState.setRepairSkippedTablesCount(0);
+                repairState.setRepairInProgress(true);
+                repairState.setTotalMVTablesConsideredForRepair(0);
+                for (Keyspace keyspace : Keyspace.all())
+                {
+                    Tables tables = keyspace.getMetadata().tables;
+                    Iterator<TableMetadata> iter = tables.iterator();
+                    String keyspaceName = keyspace.getName();
+                    if 
(!AutoRepairUtils.checkNodeContainsKeyspaceReplica(keyspace))
+                    {
+                        continue;
+                    }
+
+                    
repairState.setRepairKeyspaceCount(repairState.getRepairKeyspaceCount() + 1);
+                    List<String> tablesToBeRepaired = new ArrayList<>();
+                    while (iter.hasNext())
+                    {
+                        
repairState.setTotalTablesConsideredForRepair(repairState.getTotalTablesConsideredForRepair()
 + 1);
+                        TableMetadata tableMetadata = iter.next();
+                        String tableName = tableMetadata.name;
+                        tablesToBeRepaired.add(tableName);
+
+                        // See if we should repair MVs as well that are 
associated with this given table
+                        List<String> mvs = 
AutoRepairUtils.getAllMVs(repairType, keyspace, tableMetadata);
+                        if (!mvs.isEmpty())
+                        {
+                            tablesToBeRepaired.addAll(mvs);
+                            
repairState.setTotalMVTablesConsideredForRepair(repairState.getTotalMVTablesConsideredForRepair()
 + mvs.size());
+                        }
+                    }
+
+                    for (String tableName : tablesToBeRepaired)
+                    {
+                        try
+                        {
+                            ColumnFamilyStore columnFamilyStore = 
keyspace.getColumnFamilyStore(tableName);
+                            if 
(columnFamilyStore.metadata().params.disableAutomatedRepair)
+                            {
+                                logger.info("Repair is disabled for keyspace 
{} for tables: {}", keyspaceName, tableName);

Review Comment:
   ```suggestion
                                   logger.info("Repair is disabled for keyspace 
{} for table: {}", keyspaceName, tableName);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to