dombizita commented on code in PR #4107:
URL: https://github.com/apache/ozone/pull/4107#discussion_r1166699124


##########
hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/persistence/TestUtilizationSchemaDefinition.java:
##########
@@ -103,6 +104,21 @@ public void testReconSchemaCreated() throws Exception {
         "Unexpected number of columns");
     assertEquals(expectedPairsFileCount, actualPairsFileCount,
         "Columns Do not Match ");
+
+
+    ResultSet resultSetContainerCount = metaData.getColumns(null, null,
+        CONTAINER_COUNT_BY_SIZE_TABLE_NAME, null);
+

Review Comment:
   maybe add a similar `expectedPairsContainerCount` here for the expected 
columns, like for the file count by size on line 87-95. also add the check if 
the columns match (like on line 105). 



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/UtilizationEndpoint.java:
##########
@@ -98,4 +111,44 @@ public Response getFileCounts(
     }
     return Response.ok(resultSet).build();
   }
+
+  /**
+   * Return the container size counts from Recon DB.
+   *
+   * @return {@link Response}
+   */
+  @GET
+  @Path("/containerCount")
+  public Response getContainerCounts(
+      @QueryParam(RECON_QUERY_CONTAINER_SIZE)
+          long containerSize) {
+    DSLContext dslContext = utilizationSchemaDefinition.getDSLContext();
+    Long containerSizeUpperBound =
+        ReconUtils.getContainerSizeUpperBound(containerSize);
+    List<ContainerCountBySize> resultSet;
+    try {
+      if (containerSize > 0) {
+        // Get the current count from database and update

Review Comment:
   what do you mean by "update" in the comment?



##########
hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/tasks/TestContainerSizeCountTask.java:
##########
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.recon.tasks;
+
+import static 
org.hadoop.ozone.recon.schema.tables.ContainerCountBySizeTable.CONTAINER_COUNT_BY_SIZE;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.BDDMockito.given;
+import static org.mockito.Mockito.mock;
+
+import java.time.Duration;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerManager;
+import org.apache.hadoop.ozone.recon.persistence.AbstractReconSqlDBTest;
+import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider;
+import org.hadoop.ozone.recon.schema.UtilizationSchemaDefinition;
+import org.hadoop.ozone.recon.schema.tables.daos.ContainerCountBySizeDao;
+import org.hadoop.ozone.recon.schema.tables.daos.ReconTaskStatusDao;
+import org.jooq.DSLContext;
+import org.jooq.Record1;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Class to test a process and reprocess methods of ContainerSizeCountTask.

Review Comment:
   please update documentation as there is no reprocess method anymore



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconIncrementalContainerReportHandler.java:
##########
@@ -99,5 +103,6 @@ public void onMessage(final 
IncrementalContainerReportFromDatanode report,
       }
     }
     containerManager.notifyContainerReportProcessing(false, success);
+    containerSizeCountTask.process(containerManager.getContainers());

Review Comment:
   here the same: this means that the ContainerSizeCountTask is possible to run 
more times in each defined interval? if we are calling the `process()` here 
too. 



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDeadNodeHandler.java:
##########
@@ -44,17 +45,22 @@ public class ReconDeadNodeHandler extends DeadNodeHandler {
   private StorageContainerServiceProvider scmClient;
   private ContainerHealthTask containerHealthTask;
   private PipelineSyncTask pipelineSyncTask;
+  private ContainerSizeCountTask containerSizeCountTask;
+  private ContainerManager containerManager;
 
   public ReconDeadNodeHandler(NodeManager nodeManager,
                               PipelineManager pipelineManager,
                               ContainerManager containerManager,
                               StorageContainerServiceProvider scmClient,
                               ContainerHealthTask containerHealthTask,
-                              PipelineSyncTask pipelineSyncTask) {
+                              PipelineSyncTask pipelineSyncTask,
+                              ContainerSizeCountTask containerSizeCountTask) {
     super(nodeManager, pipelineManager, containerManager);
     this.scmClient = scmClient;
+    this.containerManager = containerManager;

Review Comment:
   what was the conclusion here?



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ReconTaskConfig.java:
##########
@@ -83,4 +83,23 @@ public Duration getSafeModeWaitThreshold() {
   public void setSafeModeWaitThreshold(Duration safeModeWaitThreshold) {
     this.safeModeWaitThreshold = safeModeWaitThreshold.toMillis();
   }
+
+  @Config(key = "containercounttask.interval",
+      type = ConfigType.TIME,
+      defaultValue = "60s",
+      tags = { ConfigTag.RECON, ConfigTag.OZONE },
+      description = "The time interval of the periodic check for " +
+          "container size distribution in the cluster as reported "

Review Comment:
   is this sentence unfinished? 
   ```suggestion
         description = "The time interval of the periodic check for " +
             "container size distribution in the cluster."
   ```



##########
hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconIncrementalContainerReportHandler.java:
##########
@@ -133,6 +137,8 @@ public void testProcessICRStateMismatch()
           containerWithPipeline.getPipeline().getFirstNode();
       NodeManager nodeManagerMock = mock(NodeManager.class);
       when(nodeManagerMock.getNodeByUuid(any())).thenReturn(datanodeDetails);
+      ContainerSizeCountTask containerSizeCountTask =
+          mock(ContainerSizeCountTask.class);

Review Comment:
   I also think this would be great but I don't think the task has any methods 
that we can try here.
   nit (if you do changes here, otherwise you can leave it like it is).
   ```suggestion
         ContainerSizeCountTask containerSizeCountTaskMock =
             mock(ContainerSizeCountTask.class);
   ```



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/tasks/ContainerSizeCountTask.java:
##########
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.recon.tasks;
+
+
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerManager;
+import org.apache.hadoop.ozone.recon.ReconUtils;
+import org.apache.hadoop.ozone.recon.scm.ReconScmTask;
+import org.apache.hadoop.ozone.recon.spi.StorageContainerServiceProvider;
+import org.hadoop.ozone.recon.schema.UtilizationSchemaDefinition;
+import org.hadoop.ozone.recon.schema.tables.daos.ContainerCountBySizeDao;
+import org.hadoop.ozone.recon.schema.tables.daos.ReconTaskStatusDao;
+import org.hadoop.ozone.recon.schema.tables.pojos.ContainerCountBySize;
+import org.jooq.DSLContext;
+import org.jooq.Record1;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import static 
org.hadoop.ozone.recon.schema.tables.ContainerCountBySizeTable.CONTAINER_COUNT_BY_SIZE;
+
+
+/**
+ * Class that scans the list of containers and keeps track of container sizes
+ * binned into ranges (1KB, 2Kb…,4MB,…1GB,…5GB…) to the Recon
+ * containerSize DB.
+ */
+public class ContainerSizeCountTask extends ReconScmTask {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ContainerSizeCountTask.class);
+
+  private StorageContainerServiceProvider scmClient;
+  private ContainerManager containerManager;
+  private final long interval;
+  private ContainerCountBySizeDao containerCountBySizeDao;
+  private DSLContext dslContext;
+  private HashMap<ContainerID, Long> processedContainers = new HashMap<>();
+  private ReadWriteLock lock = new ReentrantReadWriteLock(true);
+
+  public ContainerSizeCountTask(
+      ContainerManager containerManager,
+      StorageContainerServiceProvider scmClient,
+      ReconTaskStatusDao reconTaskStatusDao,
+      ReconTaskConfig reconTaskConfig,
+      ContainerCountBySizeDao containerCountBySizeDao,
+      UtilizationSchemaDefinition utilizationSchemaDefinition) {
+    super(reconTaskStatusDao);
+    this.scmClient = scmClient;
+    this.containerManager = containerManager;
+    this.containerCountBySizeDao = containerCountBySizeDao;
+    this.dslContext = utilizationSchemaDefinition.getDSLContext();
+    interval = reconTaskConfig.getContainerSizeCountTaskInterval().toMillis();
+  }
+
+
+  /**
+   * The run() method is the main loop of the ContainerSizeCountTask class.
+   * It periodically retrieves a list of containers from the containerManager,
+   * and then calls either to reprocess() or process() method depending on

Review Comment:
   please update documentation as there is no reprocess method anymore



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/UtilizationEndpoint.java:
##########
@@ -98,4 +111,44 @@ public Response getFileCounts(
     }
     return Response.ok(resultSet).build();
   }
+
+  /**
+   * Return the container size counts from Recon DB.
+   *
+   * @return {@link Response}
+   */
+  @GET
+  @Path("/containerCount")
+  public Response getContainerCounts(
+      @QueryParam(RECON_QUERY_CONTAINER_SIZE)
+          long containerSize) {
+    DSLContext dslContext = utilizationSchemaDefinition.getDSLContext();
+    Long containerSizeUpperBound =
+        ReconUtils.getContainerSizeUpperBound(containerSize);
+    List<ContainerCountBySize> resultSet;
+    try {
+      if (containerSize > 0) {
+        // Get the current count from database and update
+        Record1<Long> recordToFind =
+            dslContext.newRecord(
+                    CONTAINER_COUNT_BY_SIZE.CONTAINER_SIZE)
+                .value1(containerSizeUpperBound);
+        ContainerCountBySize record =
+            containerCountBySizeDao.findById(recordToFind.value1());
+        resultSet = record != null ?
+            Collections.singletonList(record) : Collections.emptyList();
+      } else {
+        // fetch all records having values greater than zero

Review Comment:
   why don't we fetch all the records, even if a container size has 0 occurance?



##########
hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconDeadNodeHandler.java:
##########
@@ -79,6 +85,7 @@ public void onMessage(final DatanodeDetails datanodeDetails,
       }
       containerHealthTask.triggerContainerHealthCheck();
       pipelineSyncTask.triggerPipelineSyncTask();
+      containerSizeCountTask.process(containerManager.getContainers());

Review Comment:
   this means that the ContainerSizeCountTask is possible to run more times in 
each defined interval? if we are calling the `process()` here too. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to