errose28 commented on code in PR #4655:
URL: https://github.com/apache/ozone/pull/4655#discussion_r1212191418
##########
hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestLegacyReplicationManager.java:
##########
@@ -544,8 +544,7 @@ public void testContainerWithMissingReplicas()
throws IOException, TimeoutException {
createContainer(LifeCycleState.CLOSED);
assertReplicaScheduled(0);
- assertUnderReplicatedCount(1);
- assertMissingCount(1);
+ assertEmptyReplicatedCount(1);
Review Comment:
It looks like the functionality of this test was changed to test something
different. Can we restore the old behavior to test missing containers and make
a change elsewhere if the test is failing?
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java:
##########
@@ -216,8 +236,6 @@ public void testDeleteNonEmptyContainerDir()
5 * 2000);
Assert.assertTrue(!isContainerDeleted(hddsDatanodeService,
containerId.getId()));
- Assert.assertEquals(1,
- metrics.getContainerDeleteFailedNonEmptyDir());
Review Comment:
We should still check the updated metric here right?
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestDeleteContainerHandler.java:
##########
@@ -301,26 +319,13 @@ public void testDeleteNonEmptyContainerBlockTable()
Assert.assertFalse(isContainerDeleted(hddsDatanodeService,
containerId.getId()));
- // Set container blockCount to 0 to mock that it is empty as per RocksDB
- getContainerfromDN(hddsDatanodeService, containerId.getId())
- .getContainerData().setBlockCount(0);
- // Write entries to the block Table.
- try (DBHandle dbHandle
- = BlockUtils.getDB(
- (KeyValueContainerData)getContainerfromDN(hddsDatanodeService,
- containerId.getId()).getContainerData(),
- conf)) {
- BlockData blockData = new BlockData(new BlockID(1, 1));
- dbHandle.getStore().getBlockDataTable().put("block1", blockData);
- }
-
- long containerDeleteFailedNonEmptyBefore =
- metrics.getContainerDeleteFailedNonEmptyDir();
Review Comment:
This test is `testDeleteNonEmptyContainerBlockTable` but this part that
actually modifies the block table has been deleted. It looks like the test no
longer does what it says and overlaps with `testDeleteNonEmptyContainerDir`.
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java:
##########
@@ -435,6 +441,257 @@ public void testContainerStatisticsAfterDelete() throws
Exception {
LOG.info(metrics.toString());
}
+ @Test
+ public void testContainerStateAfterDNRestart() throws Exception {
+ ReplicationManager replicationManager = scm.getReplicationManager();
+
+ String volumeName = UUID.randomUUID().toString();
+ String bucketName = UUID.randomUUID().toString();
+
+ String value = RandomStringUtils.random(10 * 10);
+ store.createVolume(volumeName);
+ OzoneVolume volume = store.getVolume(volumeName);
+ volume.createBucket(bucketName);
+ OzoneBucket bucket = volume.getBucket(bucketName);
+
+ String keyName = UUID.randomUUID().toString();
+ OzoneOutputStream out = bucket.createKey(keyName,
+ value.getBytes(UTF_8).length, ReplicationType.RATIS,
+ ReplicationFactor.THREE, new HashMap<>());
+ out.write(value.getBytes(UTF_8));
+ out.close();
+
+ OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName)
+ .setBucketName(bucketName).setKeyName(keyName).setDataSize(0)
+ .setReplicationConfig(
+ RatisReplicationConfig
+ .getInstance(HddsProtos.ReplicationFactor.THREE))
+ .build();
+ List<OmKeyLocationInfoGroup> omKeyLocationInfoGroupList =
+ om.lookupKey(keyArgs).getKeyLocationVersions();
+ Thread.sleep(5000);
+ List<ContainerInfo> containerInfos =
+ scm.getContainerManager().getContainers();
+ final int valueSize = value.getBytes(UTF_8).length;
+ final int keyCount = 1;
+ containerInfos.stream().forEach(container -> {
+ Assertions.assertEquals(valueSize, container.getUsedBytes());
+ Assertions.assertEquals(keyCount, container.getNumberOfKeys());
+ });
+
+ OzoneTestUtils.closeAllContainers(scm.getEventQueue(), scm);
+ // Wait for container to close
+ Thread.sleep(2000);
+ // make sure the containers are closed on the dn
+ omKeyLocationInfoGroupList.forEach((group) -> {
+ List<OmKeyLocationInfo> locationInfo = group.getLocationList();
+ locationInfo.forEach(
+ (info) -> cluster.getHddsDatanodes().get(0).getDatanodeStateMachine()
+ .getContainer().getContainerSet()
+ .getContainer(info.getContainerID()).getContainerData()
+ .setState(ContainerProtos.ContainerDataProto.State.CLOSED));
+ });
+
+ ContainerID containerId = ContainerID.valueOf(
+ containerInfos.get(0).getContainerID());
+ // Before restart container state is non-empty
+ Assertions.assertFalse(getContainerFromDN(
+ cluster.getHddsDatanodes().get(0), containerId.getId())
+ .getContainerData().isEmpty());
+ // Restart DataNode
+ cluster.restartHddsDatanode(0, true);
+
+ // After restart also container state remains non-empty.
+ Assertions.assertFalse(getContainerFromDN(
+ cluster.getHddsDatanodes().get(0), containerId.getId())
+ .getContainerData().isEmpty());
+
+ // Delete key
+ writeClient.deleteKey(keyArgs);
+ Thread.sleep(10000);
+
+ GenericTestUtils.waitFor(() -> {
+ try {
+ return scm.getContainerManager().getContainerReplicas(
+ containerId).stream().
+ allMatch(replica -> replica.isEmpty());
+ } catch (ContainerNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ },
+ 100, 10 * 1000);
+
+ // Container state should be empty now as key got deleted
+ Assertions.assertTrue(getContainerFromDN(
+ cluster.getHddsDatanodes().get(0), containerId.getId())
+ .getContainerData().isEmpty());
+
+ // Restart DataNode
+ cluster.restartHddsDatanode(0, true);
+ // Container state should be empty even after restart
+ Assertions.assertTrue(getContainerFromDN(
+ cluster.getHddsDatanodes().get(0), containerId.getId())
+ .getContainerData().isEmpty());
+
+ GenericTestUtils.waitFor(() -> {
+ replicationManager.processAll();
+ ((EventQueue)scm.getEventQueue()).processAll(1000);
+ List<ContainerInfo> infos = scm.getContainerManager().getContainers();
+ try {
+ infos.stream().forEach(container -> {
+ Assertions.assertEquals(HddsProtos.LifeCycleState.DELETED,
+ container.getState());
+ try {
+ Assertions.assertEquals(HddsProtos.LifeCycleState.DELETED,
+ scm.getScmMetadataStore().getContainerTable()
+ .get(container.containerID()).getState());
+ } catch (IOException e) {
+ Assertions.fail(
+ "Container from SCM DB should be marked as DELETED");
+ }
+ });
+ } catch (Throwable e) {
+ LOG.info(e.getMessage());
+ return false;
+ }
+ return true;
+ }, 500, 30000);
+ LOG.info(metrics.toString());
+ }
+
+ /**
+ * Return the container for the given containerID from the given DN.
+ */
+ private Container getContainerFromDN(HddsDatanodeService hddsDatanodeService,
+ long containerID) {
+ return hddsDatanodeService.getDatanodeStateMachine().getContainer()
+ .getContainerSet().getContainer(containerID);
+ }
+
+ @Test
+ public void testContainerDeleteWithInvalidKeyCount()
+ throws Exception {
+ ReplicationManager replicationManager = scm.getReplicationManager();
+ String volumeName = UUID.randomUUID().toString();
+ String bucketName = UUID.randomUUID().toString();
+
+ String value = RandomStringUtils.random(1024 * 1024);
+ store.createVolume(volumeName);
+ OzoneVolume volume = store.getVolume(volumeName);
+ volume.createBucket(bucketName);
+ OzoneBucket bucket = volume.getBucket(bucketName);
+
+ String keyName = UUID.randomUUID().toString();
+ OzoneOutputStream out = bucket.createKey(keyName,
+ value.getBytes(UTF_8).length, ReplicationType.RATIS,
+ ReplicationFactor.THREE, new HashMap<>());
+ out.write(value.getBytes(UTF_8));
+ out.close();
+
+ OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName)
+ .setBucketName(bucketName).setKeyName(keyName).setDataSize(0)
+ .setReplicationConfig(
+ RatisReplicationConfig
+ .getInstance(HddsProtos.ReplicationFactor.THREE))
+ .build();
+ List<OmKeyLocationInfoGroup> omKeyLocationInfoGroupList =
+ om.lookupKey(keyArgs).getKeyLocationVersions();
+ Thread.sleep(5000);
+ List<ContainerInfo> containerInfos =
+ scm.getContainerManager().getContainers();
+ final int valueSize = value.getBytes(UTF_8).length;
+ final int keyCount = 1;
+ containerInfos.stream().forEach(container -> {
+ Assertions.assertEquals(valueSize, container.getUsedBytes());
+ Assertions.assertEquals(keyCount, container.getNumberOfKeys());
+ });
+
+ OzoneTestUtils.closeAllContainers(scm.getEventQueue(), scm);
+ // Wait for container to close
+ Thread.sleep(2000);
+ // make sure the containers are closed on the dn
+ omKeyLocationInfoGroupList.forEach((group) -> {
+ List<OmKeyLocationInfo> locationInfo = group.getLocationList();
+ locationInfo.forEach(
+ (info) -> cluster.getHddsDatanodes().get(0).getDatanodeStateMachine()
+ .getContainer().getContainerSet()
+ .getContainer(info.getContainerID()).getContainerData()
+ .setState(ContainerProtos.ContainerDataProto.State.CLOSED));
+ });
+
+ ContainerStateManager containerStateManager = scm.getContainerManager()
+ .getContainerStateManager();
+ ContainerID containerId = ContainerID.valueOf(
+ containerInfos.get(0).getContainerID());
+ // Get all the replicas state from SCM
+ Set<ContainerReplica> replicas
+ = scm.getContainerManager().getContainerReplicas(containerId);
+
+ // Ensure for all replica isEmpty are false in SCM
+ Assert.assertTrue(scm.getContainerManager().getContainerReplicas(
+ containerId).stream().
+ allMatch(replica -> !replica.isEmpty()));
+
+ // Delete key
+ writeClient.deleteKey(keyArgs);
+ Thread.sleep(5000);
+
+ // Ensure isEmpty are true for all replica after delete key
+ GenericTestUtils.waitFor(() -> {
+ try {
+ return scm.getContainerManager().getContainerReplicas(
+ containerId).stream()
+ .allMatch(replica -> replica.isEmpty());
+ } catch (ContainerNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ },
+ 500, 5 * 2000);
Review Comment:
Once this runs, the container could be deleted at any time. We probably want
to explicitly set the replication manager run interval very high so we know it
won't run before we invoke it manually.
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java:
##########
@@ -1298,64 +1298,21 @@ private void deleteInternal(Container container,
boolean force)
// If the container is not empty, it should not be deleted unless the
// container is being forcefully deleted (which happens when
// container is unhealthy or over-replicated).
- if (container.getContainerData().getBlockCount() != 0) {
- metrics.incContainerDeleteFailedBlockCountNotZero();
+ if (!container.isEmpty()) {
Review Comment:
Should we change the name of this method so it is not confused with
`KeyValueContainerData#isEmpty`? Maybe changing this method to `hasBlocks` or
something like that?
##########
hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java:
##########
@@ -769,4 +786,93 @@ public void testAutoCompactionSmallSstFile() throws
IOException {
}
}
}
+
+ @Test
+ public void testIsEmptyContainerStateWhileImport() throws Exception {
+ long containerId = keyValueContainer.getContainerData().getContainerID();
+ createContainer();
+ long numberOfKeysToWrite = 1;
+ closeContainer();
+ populate(numberOfKeysToWrite);
+
+ //destination path
+ File folderToExport = folder.newFile("export.tar");
+ for (CopyContainerCompression compr : CopyContainerCompression.values()) {
+ TarContainerPacker packer = new TarContainerPacker(compr);
+
+ //export the container
+ try (FileOutputStream fos = new FileOutputStream(folderToExport)) {
+ keyValueContainer
+ .exportContainerData(fos, packer);
+ }
+
+ //delete the original one
+ keyValueContainer.delete();
+
+ //create a new one
+ KeyValueContainerData containerData =
+ new KeyValueContainerData(containerId,
+ keyValueContainerData.getLayoutVersion(),
+ keyValueContainerData.getMaxSize(), UUID.randomUUID().toString(),
+ datanodeId.toString());
+ containerData.setSchemaVersion(keyValueContainerData.getSchemaVersion());
+ KeyValueContainer container = new KeyValueContainer(containerData, CONF);
+
+ HddsVolume containerVolume = volumeChoosingPolicy.chooseVolume(
+ StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList()), 1);
+
+ container.populatePathFields(scmId, containerVolume);
Review Comment:
Is this step necessary? Shouldn't this be handled by
`KeyValueContainer#importContainerData` like in
`testEmptyContainerImportExport`?
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestBlockDeletion.java:
##########
@@ -435,6 +441,257 @@ public void testContainerStatisticsAfterDelete() throws
Exception {
LOG.info(metrics.toString());
}
+ @Test
+ public void testContainerStateAfterDNRestart() throws Exception {
+ ReplicationManager replicationManager = scm.getReplicationManager();
+
+ String volumeName = UUID.randomUUID().toString();
+ String bucketName = UUID.randomUUID().toString();
+
+ String value = RandomStringUtils.random(10 * 10);
+ store.createVolume(volumeName);
+ OzoneVolume volume = store.getVolume(volumeName);
+ volume.createBucket(bucketName);
+ OzoneBucket bucket = volume.getBucket(bucketName);
+
+ String keyName = UUID.randomUUID().toString();
+ OzoneOutputStream out = bucket.createKey(keyName,
+ value.getBytes(UTF_8).length, ReplicationType.RATIS,
+ ReplicationFactor.THREE, new HashMap<>());
+ out.write(value.getBytes(UTF_8));
+ out.close();
+
+ OmKeyArgs keyArgs = new OmKeyArgs.Builder().setVolumeName(volumeName)
+ .setBucketName(bucketName).setKeyName(keyName).setDataSize(0)
+ .setReplicationConfig(
+ RatisReplicationConfig
+ .getInstance(HddsProtos.ReplicationFactor.THREE))
+ .build();
+ List<OmKeyLocationInfoGroup> omKeyLocationInfoGroupList =
+ om.lookupKey(keyArgs).getKeyLocationVersions();
+ Thread.sleep(5000);
+ List<ContainerInfo> containerInfos =
+ scm.getContainerManager().getContainers();
+ final int valueSize = value.getBytes(UTF_8).length;
+ final int keyCount = 1;
+ containerInfos.stream().forEach(container -> {
+ Assertions.assertEquals(valueSize, container.getUsedBytes());
+ Assertions.assertEquals(keyCount, container.getNumberOfKeys());
+ });
+
+ OzoneTestUtils.closeAllContainers(scm.getEventQueue(), scm);
+ // Wait for container to close
+ Thread.sleep(2000);
Review Comment:
Can we use `TestHelper#waitForContainerClose` to make this more reliable?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]