yifan-c commented on code in PR #231: URL: https://github.com/apache/cassandra-sidecar/pull/231#discussion_r2196064517
########## server/src/main/java/org/apache/cassandra/sidecar/handlers/RepairHandler.java: ########## @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.handlers; + +import java.util.Collections; +import java.util.Set; + +import com.datastax.driver.core.utils.UUIDs; +import com.google.inject.Inject; +import io.netty.handler.codec.http.HttpResponseStatus; +import io.vertx.core.Vertx; +import io.vertx.core.http.HttpServerRequest; +import io.vertx.core.json.DecodeException; +import io.vertx.core.json.Json; +import io.vertx.core.net.SocketAddress; +import io.vertx.ext.auth.authorization.Authorization; +import io.vertx.ext.web.RoutingContext; +import org.apache.cassandra.sidecar.acl.authorization.BasicPermissions; +import org.apache.cassandra.sidecar.common.data.OperationalJobStatus; +import org.apache.cassandra.sidecar.common.request.data.RepairPayload; +import org.apache.cassandra.sidecar.common.response.OperationalJobResponse; +import org.apache.cassandra.sidecar.common.server.StorageOperations; +import org.apache.cassandra.sidecar.common.server.data.Name; +import org.apache.cassandra.sidecar.concurrent.ExecutorPools; +import org.apache.cassandra.sidecar.config.ServiceConfiguration; +import org.apache.cassandra.sidecar.exceptions.OperationalJobConflictException; +import org.apache.cassandra.sidecar.handlers.data.RepairRequestParam; +import org.apache.cassandra.sidecar.job.OperationalJobManager; +import org.apache.cassandra.sidecar.job.RepairJob; +import org.apache.cassandra.sidecar.utils.CassandraInputValidator; +import org.apache.cassandra.sidecar.utils.InstanceMetadataFetcher; +import org.apache.cassandra.sidecar.utils.OperationalJobUtils; +import org.jetbrains.annotations.NotNull; + +import static org.apache.cassandra.sidecar.utils.HttpExceptions.wrapHttpException; + +/** + * Handler for triggering repair + */ +public class RepairHandler extends AbstractHandler<RepairRequestParam> implements AccessProtected +{ + private final ServiceConfiguration config; + private final OperationalJobManager jobManager; + private final Vertx vertx; + + /** + * Constructs a handler with the provided {@code metadataFetcher} + * + * @param vertx the vertx instance + * @param metadataFetcher the metadata fetcher + * @param executorPools executor pools for blocking executions + * @param serviceConfiguration configuration object holding config details of Sidecar + * @param validator a validator instance to validate Cassandra-specific input + * @param jobManager manager for long-running operational jobs + */ + @Inject + protected RepairHandler(Vertx vertx, + InstanceMetadataFetcher metadataFetcher, + ExecutorPools executorPools, + ServiceConfiguration serviceConfiguration, + CassandraInputValidator validator, + OperationalJobManager jobManager) + { + super(metadataFetcher, executorPools, validator); + this.vertx = vertx; + this.jobManager = jobManager; + this.config = serviceConfiguration; + } + + /** + * {@inheritDoc} + */ + @Override + protected RepairRequestParam extractParamsOrThrow(RoutingContext context) + { + Name keyspace = keyspace(context, true); + if (keyspace == null) + { + throw wrapHttpException(HttpResponseStatus.BAD_REQUEST, "'keyspace' is required but not supplied"); + } + + String bodyString = context.body().asString(); + if (bodyString == null || bodyString.equalsIgnoreCase("null")) // json encoder writes null as "null" + { + logger.warn("Bad request to create repair job. Received null payload."); + throw wrapHttpException(HttpResponseStatus.BAD_REQUEST, "Unexpected null payload for request"); + } + + RepairPayload payload; + try + { + payload = Json.decodeValue(bodyString, RepairPayload.class); + } + catch (DecodeException decodeException) + { + logger.warn("Bad request to create repair job. Received invalid JSON payload."); + throw wrapHttpException(HttpResponseStatus.BAD_REQUEST, + "Invalid request payload", + decodeException); + } + + return RepairRequestParam.from(keyspace, payload); + } + + @Override + protected void handleInternal(RoutingContext context, + HttpServerRequest httpRequest, + @NotNull String host, + SocketAddress remoteAddress, + RepairRequestParam repairRequestParam) + { + StorageOperations operations = metadataFetcher.delegate(host).storageOperations(); + RepairJob job = new RepairJob(vertx, config.repairConfiguration(), UUIDs.timeBased(), operations, repairRequestParam); + try + { + jobManager.trySubmitJob(job); + } + catch (OperationalJobConflictException oje) + { + String reason = oje.getMessage(); + logger.error("Conflicting job encountered. reason={}", reason); Review Comment: should the log level be warning or even info? It is not a server error. ########## server/src/main/java/org/apache/cassandra/sidecar/job/RepairJob.java: ########## @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.job; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.vertx.core.Future; +import io.vertx.core.Promise; +import io.vertx.core.Vertx; +import org.apache.cassandra.sidecar.adapters.base.RepairOptions; +import org.apache.cassandra.sidecar.common.request.data.RepairPayload; +import org.apache.cassandra.sidecar.common.server.StorageOperations; +import org.apache.cassandra.sidecar.config.RepairConfiguration; +import org.apache.cassandra.sidecar.handlers.data.RepairRequestParam; + +import static java.util.Objects.requireNonNull; + +/** + * Implementation of {@link OperationalJob} to perform repair operation. + */ +public class RepairJob extends OperationalJob +{ + private static final Logger LOGGER = LoggerFactory.getLogger(RepairJob.class); + private static final String OPERATION = "repair"; + private static final String PREVIEW_KIND_REPAIRED = "REPAIRED"; + + private final RepairRequestParam repairParams; + private final Vertx vertx; + private final RepairConfiguration repairConfiguration; + protected StorageOperations storageOperations; + + /** + * Enum representing the status of a parent repair session + */ + public enum ParentRepairStatus + { + IN_PROGRESS, COMPLETED, FAILED + } + + /** + * Constructs a job with a unique UUID, in Pending state + * + * @param vertx + * @param repairConfiguration + * @param jobId UUID representing the Job to be created + * @param storageOps + * @param repairParams + */ + public RepairJob(Vertx vertx, RepairConfiguration repairConfiguration, UUID jobId, StorageOperations storageOps, RepairRequestParam repairParams) + { + super(jobId); + this.vertx = vertx; + this.repairConfiguration = repairConfiguration; + this.storageOperations = storageOps; + this.repairParams = repairParams; + } + + @Override + public boolean isRunningOnCassandra() + { + // TODO: Leverage repair vtables to fail-fast on conflicting repairs (overlapping token-ranges or replica-sets) + // Currently does not check for concurrent repairs + return false; + } + + @Override + protected void executeInternal() Review Comment: Since repair is async, you might want to override `execute(Promise<Void> promise)` instead, completing the parameter `promise` when repair is complete. `executeInternal()` is to run blocking operations. ########## server/src/main/java/org/apache/cassandra/sidecar/job/RepairJob.java: ########## @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.sidecar.job; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.vertx.core.Future; +import io.vertx.core.Promise; +import io.vertx.core.Vertx; +import org.apache.cassandra.sidecar.adapters.base.RepairOptions; +import org.apache.cassandra.sidecar.common.request.data.RepairPayload; +import org.apache.cassandra.sidecar.common.server.StorageOperations; +import org.apache.cassandra.sidecar.config.RepairConfiguration; +import org.apache.cassandra.sidecar.handlers.data.RepairRequestParam; + +import static java.util.Objects.requireNonNull; + +/** + * Implementation of {@link OperationalJob} to perform repair operation. + */ +public class RepairJob extends OperationalJob +{ + private static final Logger LOGGER = LoggerFactory.getLogger(RepairJob.class); + private static final String OPERATION = "repair"; + private static final String PREVIEW_KIND_REPAIRED = "REPAIRED"; + + private final RepairRequestParam repairParams; + private final Vertx vertx; + private final RepairConfiguration repairConfiguration; + protected StorageOperations storageOperations; + + /** + * Enum representing the status of a parent repair session + */ + public enum ParentRepairStatus + { + IN_PROGRESS, COMPLETED, FAILED + } + + /** + * Constructs a job with a unique UUID, in Pending state + * + * @param vertx + * @param repairConfiguration + * @param jobId UUID representing the Job to be created + * @param storageOps + * @param repairParams + */ + public RepairJob(Vertx vertx, RepairConfiguration repairConfiguration, UUID jobId, StorageOperations storageOps, RepairRequestParam repairParams) + { + super(jobId); + this.vertx = vertx; + this.repairConfiguration = repairConfiguration; + this.storageOperations = storageOps; + this.repairParams = repairParams; + } + + @Override + public boolean isRunningOnCassandra() + { + // TODO: Leverage repair vtables to fail-fast on conflicting repairs (overlapping token-ranges or replica-sets) + // Currently does not check for concurrent repairs + return false; + } + + @Override + protected void executeInternal() + { + Map<String, String> options = generateRepairOptions(repairParams.requestpayload()); + String keyspace = repairParams.keyspace().name(); + + LOGGER.info("Executing repair operation for keyspace {} jobId={} maxRuntime={}", + keyspace, this.jobId(), repairConfiguration.maxRepairJobRuntimeMillis()); + + int cmd = storageOperations.repair(keyspace, options); + if (cmd <= 0) + { + // repairAsync can only return 0 for replication factor 1. + LOGGER.info("Replication factor is 1. No repair is needed for keyspace '{}'", keyspace); + } + else + { + // complete the max wait time promise either when exceeding the wait time, or the result is available + Promise<Boolean> maxWaitTimePromise = Promise.promise(); + vertx.setTimer(repairConfiguration.maxRepairJobRuntimeMillis(), d -> { + LOGGER.info("Timer Poll"); + maxWaitTimePromise.tryComplete(true); + }); + + // Promise for completion of repair operation + Promise<Void> promise = Promise.promise(); + Future<Void> resultFut = promise.future(); + + // main event loop checks periodically (10s) for completion + vertx.setPeriodic(repairConfiguration.repairPollIntervalMillis(), id -> queryForCompletedRepair(promise, cmd)); + resultFut.onComplete(res -> maxWaitTimePromise.tryComplete(false)); + Future<Boolean> maxWaitTimeFut = maxWaitTimePromise.future(); + + Future<Void> compositeFut = Future.any(maxWaitTimeFut, resultFut) + // If this lambda below is evaluated, either one of the futures have completed; + // In either case, the future corresponding to the job execution is returned + .compose(f -> { + LOGGER.info("One of the futures ended waitStatus={} resultStatus={}", + maxWaitTimeFut.isComplete(), resultFut.isComplete()); + boolean isTimeout = (maxWaitTimeFut.succeeded()) ? maxWaitTimeFut.result() : false; + if (isTimeout) + { + LOGGER.error("Timer ran out before the repair job completed. Repair took too long"); + // TODO: Cancel repair? (Nice to have) + // We free up the thread (job fails) and stop polling for completion + return Future.failedFuture("Repair job taking too long"); Review Comment: Can you implement this in the `asyncResult` override? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: pr-unsubscr...@cassandra.apache.org For additional commands, e-mail: pr-h...@cassandra.apache.org