adoroszlai commented on code in PR #3615:
URL: https://github.com/apache/ozone/pull/3615#discussion_r928492433
##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java:
##########
@@ -181,6 +181,31 @@ public void incrementCount(List<Long> txIDs)
}
}
+ /**
+ * {@inheritDoc}
+ *
+ * @throws IOException
+ */
+ @Override
+ public int resetCount(List<Long> txIDs) throws IOException {
+ List<Long> failedTransactions = getFailedTransactions().stream()
+ .map(DeletedBlocksTransaction::getTxID).collect(Collectors.toList());
+ if (txIDs != null && !txIDs.isEmpty()) {
+ failedTransactions = failedTransactions.stream().filter(txIDs::contains)
+ .collect(Collectors.toList());
+ }
Review Comment:
Nit: the stream should be collected only once, after the map and optional
filter operations. Or we can use `Set.retainAll` instead of
`filter(txIDs::contains)`.
```suggestion
Set<Long> failedTransactions = getFailedTransactions().stream()
.map(DeletedBlocksTransaction::getTxID)
.collect(Collectors.toSet());
if (txIDs != null && !txIDs.isEmpty()) {
failedTransactions.retainAll(txIDs);
}
```
##########
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DeletedBlockRetryCountRenewer.java:
##########
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
under
+ * the License.
+ */
+package org.apache.hadoop.ozone.debug;
+
+import org.apache.hadoop.hdds.cli.SubcommandWithParent;
+import org.apache.hadoop.hdds.scm.cli.ScmSubcommand;
+import org.apache.hadoop.hdds.scm.client.ScmClient;
+import org.kohsuke.MetaInfServices;
+import picocli.CommandLine;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * Tool to edit on-disk container meta.
+ */
[email protected](
+ name = "deletedBlockRetryCountRenewer",
+ description = "Renew deleted block transactions whose retry count is -1")
+@MetaInfServices(SubcommandWithParent.class)
+public class DeletedBlockRetryCountRenewer extends ScmSubcommand implements
+ SubcommandWithParent {
+
+ @CommandLine.Option(names = {"-r", "--renew"},
+ required = true,
+ description = "Renew the deleted block transaction retry count from" +
+ " -1 to 0. By default renew all expired transactions.")
+ private boolean toRenew;
+
+ @CommandLine.Option(names = {"-l", "--list"},
+ split = ",",
+ description = "Renew the only given deletedBlock transaction ID list, " +
+ "e.g 100,101,102.(Separated by ',')")
+ private List<Long> txList;
+
+ @Override
+ public Class<?> getParentType() {
+ return OzoneDebug.class;
Review Comment:
I think this is an admin command, albeit a very obscure one, since it
modifies state.
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlockRetryCountRenewer.java:
##########
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
under
+ * the License.
+ */
+package org.apache.hadoop.ozone.shell;
+
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.hdds.scm.block.DeletedBlockLog;
+import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient;
+import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
+import org.apache.hadoop.ozone.MiniOzoneCluster;
+import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
+import org.apache.hadoop.ozone.debug.DeletedBlockRetryCountRenewer;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import picocli.CommandLine;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.UUID;
+import java.util.concurrent.TimeoutException;
+import java.util.stream.Collectors;
+
+import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY;
+
+/**
+ * Test for DeletedBlockRetryCountRenewer Cli.
+ */
+public class TestDeletedBlockRetryCountRenewer {
+
+ private static final Logger LOG = LoggerFactory
+ .getLogger(TestDeletedBlockRetryCountRenewer.class);
+ private MiniOzoneHAClusterImpl cluster = null;
+ private OzoneConfiguration conf;
+ private String clusterId;
+ private String scmId;
+ private String scmServiceId;
+ private int numOfSCMs = 3;
+
+ /**
+ * Create a MiniOzoneHACluster for testing.
+ *
+ * @throws IOException
+ */
+ @BeforeEach
+ public void init() throws Exception {
+ conf = new OzoneConfiguration();
+ clusterId = UUID.randomUUID().toString();
+ scmId = UUID.randomUUID().toString();
+ scmServiceId = "scm-service-test1";
+
+ conf.setBoolean(ScmConfigKeys.OZONE_SCM_HA_ENABLE_KEY, true);
+ conf.setInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20);
+
+ cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newOMHABuilder(conf)
+ .setClusterId(clusterId)
+ .setScmId(scmId)
+ .setSCMServiceId(scmServiceId)
+ .setNumOfStorageContainerManagers(numOfSCMs)
+ .setNumOfActiveSCMs(numOfSCMs)
+ .setNumOfOzoneManagers(1)
+ .build();
+ cluster.waitForClusterToBeReady();
+ }
+
+ /**
+ * Shutdown MiniDFSCluster.
+ */
+ @AfterEach
+ public void shutdown() {
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+
+ //<containerID, List<blockID>>
+ private Map<Long, List<Long>> generateData(int dataSize) {
+ Map<Long, List<Long>> blockMap = new HashMap<>();
+ Random random = new Random(1);
+ int continerIDBase = random.nextInt(100);
+ int localIDBase = random.nextInt(1000);
+ for (int i = 0; i < dataSize; i++) {
+ long containerID = continerIDBase + i;
+ List<Long> blocks = new ArrayList<>();
+ for (int j = 0; j < 5; j++) {
+ long localID = localIDBase + j;
+ blocks.add(localID);
+ }
+ blockMap.put(containerID, blocks);
+ }
+ return blockMap;
+ }
+
+ private StorageContainerManager getSCMLeader() {
+ return cluster.getStorageContainerManagersList()
+ .stream().filter(a -> a.getScmContext().isLeaderReady())
+ .collect(Collectors.toList()).get(0);
+ }
+
+ @Test
+ public void testRenewCmd() throws IOException, TimeoutException {
+ int maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20);
+ // add some block deletion transactions
+ DeletedBlockLog deletedBlockLog = getSCMLeader().
+ getScmBlockManager().getDeletedBlockLog();
+ deletedBlockLog.addTransactions(generateData(30));
+ getSCMLeader().getScmHAManager().asSCMHADBTransactionBuffer().flush();
+ LOG.info("Valid num of txns: {}", deletedBlockLog.
+ getNumOfValidTransactions());
+ Assertions.assertEquals(deletedBlockLog.getNumOfValidTransactions(), 30);
Review Comment:
Nit: `assertEquals` arguments should be swapped (`expected`, `actual`) in
all assertions.
https://junit.org/junit5/docs/current/api/org.junit.jupiter.api/org/junit/jupiter/api/Assertions.html#assertEquals(int,int)
##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java:
##########
@@ -72,6 +72,15 @@ List<DeletedBlocksTransaction> getFailedTransactions()
void incrementCount(List<Long> txIDs)
throws IOException, TimeoutException;
+
+ /**
+ * Reset DeletedBlock transaction retry count.
+ *
+ * @return the
Review Comment:
Nit: incomplete comment.
##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java:
##########
@@ -181,6 +181,31 @@ public void incrementCount(List<Long> txIDs)
}
}
+ /**
+ * {@inheritDoc}
+ *
+ * @throws IOException
+ */
+ @Override
+ public int resetCount(List<Long> txIDs) throws IOException {
+ List<Long> failedTransactions = getFailedTransactions().stream()
+ .map(DeletedBlocksTransaction::getTxID).collect(Collectors.toList());
+ if (txIDs != null && !txIDs.isEmpty()) {
+ failedTransactions = failedTransactions.stream().filter(txIDs::contains)
+ .collect(Collectors.toList());
+ }
+ lock.lock();
+ try {
+ return deletedBlockLogStateManager.resetRetryCountOfTransactionInDB(
+ new ArrayList<>(failedTransactions));
+ } catch (TimeoutException | IOException ex) {
+ LOG.error("Cannot reset block deletion transactions {}",
+ failedTransactions, ex);
+ return 0;
Review Comment:
I think the method should either declare `throws IOException` or catch it,
but not both.
##########
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DeletedBlockRetryCountRenewer.java:
##########
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
under
+ * the License.
+ */
+package org.apache.hadoop.ozone.debug;
+
+import org.apache.hadoop.hdds.cli.SubcommandWithParent;
+import org.apache.hadoop.hdds.scm.cli.ScmSubcommand;
+import org.apache.hadoop.hdds.scm.client.ScmClient;
+import org.kohsuke.MetaInfServices;
+import picocli.CommandLine;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * Tool to edit on-disk container meta.
Review Comment:
What does this comment mean?
##########
hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java:
##########
@@ -360,6 +360,12 @@ StartContainerBalancerResponseProto startContainerBalancer(
*/
List<String> getScmRatisRoles() throws IOException;
+ /**
+ * Renew the expired deleted block retry count.
+ * @throws IOException
+ */
+ int renewDeletedBlockRetryCount(List<Long> txIDs) throws IOException;
Review Comment:
Since the operation is called `resetCount` in `DeletedBlockLog`, would it
make sense to use `reset` instead of `renew` consistently?
##########
hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/DeletedBlockRetryCountRenewer.java:
##########
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
under
+ * the License.
+ */
+package org.apache.hadoop.ozone.debug;
+
+import org.apache.hadoop.hdds.cli.SubcommandWithParent;
+import org.apache.hadoop.hdds.scm.cli.ScmSubcommand;
+import org.apache.hadoop.hdds.scm.client.ScmClient;
+import org.kohsuke.MetaInfServices;
+import picocli.CommandLine;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+
+/**
+ * Tool to edit on-disk container meta.
+ */
[email protected](
+ name = "deletedBlockRetryCountRenewer",
+ description = "Renew deleted block transactions whose retry count is -1")
+@MetaInfServices(SubcommandWithParent.class)
+public class DeletedBlockRetryCountRenewer extends ScmSubcommand implements
+ SubcommandWithParent {
+
+ @CommandLine.Option(names = {"-r", "--renew"},
+ required = true,
+ description = "Renew the deleted block transaction retry count from" +
+ " -1 to 0. By default renew all expired transactions.")
+ private boolean toRenew;
+
Review Comment:
What is the reason for adding this required option? Do you want to print
the help by default to avoid accidentally executing the command without a list
of IDs?
##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java:
##########
@@ -783,6 +783,16 @@ public ScmInfo getScmInfo() throws IOException {
}
}
+ @Override
+ public int renewDeletedBlockRetryCount(List<Long> txIDs) throws IOException {
+ getScm().checkAdminAccess(getRemoteUser());
+ AUDIT.logWriteSuccess(buildAuditMessageForSuccess(
+ SCMAction.RENEW_DELETED_BLOCK_RETRY_COUNT, null));
Review Comment:
Audit should log success only after the operation is done. Exceptions
should be logged as failure (and re-thrown). Both should include the command's
parameters. See `listContainer` for an example.
https://github.com/apache/ozone/blob/58c3166094cc27f65da49f5edd5a12ff1103a96e/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java#L492-L549
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]