devmadhuu commented on code in PR #9258: URL: https://github.com/apache/ozone/pull/9258#discussion_r2828921218
########## hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconReplicationManagerReport.java: ########## @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.recon.fsck; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; + +/** + * Extended ReplicationManagerReport that captures ALL container health states, + * not just the first 100 samples per state. + * + * <p>SCM's standard ReplicationManagerReport uses sampling (SAMPLE_LIMIT = 100) + * to limit memory usage. This is appropriate for SCM which only needs samples + * for debugging/UI display.</p> + * + * <p>Recon, however, needs to track per-container health states for ALL containers + * to populate its UNHEALTHY_CONTAINERS_V2 table. This extended report removes + * the sampling limitation while maintaining backward compatibility by still + * calling the parent's incrementAndSample() method.</p> + * + * <p><b>REPLICA_MISMATCH Handling:</b> Since SCM's HealthState enum doesn't include + * REPLICA_MISMATCH (it's a Recon-specific check for data checksum mismatches), + * we track it separately in replicaMismatchContainers.</p> + * + * <p><b>Memory Impact:</b> For a cluster with 100K containers and 5% unhealthy rate, + * this adds approximately 620KB of memory during report generation (5K containers + * × 124 bytes per container). Even in worst case (100% unhealthy), memory usage + * is only ~14MB, which is negligible for Recon.</p> + */ +public class ReconReplicationManagerReport extends ReplicationManagerReport { + + // Captures ALL containers per health state (no SAMPLE_LIMIT restriction) + private final Map<HealthState, List<ContainerID>> allContainersByState = + new HashMap<>(); + + // Captures containers with REPLICA_MISMATCH (Recon-specific, not in SCM's HealthState) + private final List<ContainerID> replicaMismatchContainers = new ArrayList<>(); Review Comment: done. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
