Tejaskriya commented on code in PR #7368: URL: https://github.com/apache/ozone/pull/7368#discussion_r1842107612
########## hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java: ########## @@ -0,0 +1,687 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.repair.om; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.hdds.cli.GenericCli; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.client.BucketArgs; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.ha.ConfUtils; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.shell.OzoneShell; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.concurrent.TimeUnit; + +import static java.lang.System.err; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * FSORepairTool test cases. + */ +public class TestFSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(TestFSORepairTool.class); + + private static final String DEFAULT_ENCODING = UTF_8.name(); + private static MiniOzoneHAClusterImpl cluster; + private static FileSystem fs; + private static OzoneClient client; + private static OzoneConfiguration conf = null; + private FSORepairTool tool; + + @BeforeAll + public static void init() throws Exception { + // Set configs. + conf = new OzoneConfiguration(); + // deletion services will be triggered manually. Review Comment: Seems like we are setting some intervals below and not triggering it manually. We can remove this comment. ########## hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java: ########## @@ -0,0 +1,687 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.repair.om; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.hdds.cli.GenericCli; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.client.BucketArgs; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.ha.ConfUtils; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.shell.OzoneShell; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.concurrent.TimeUnit; + +import static java.lang.System.err; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * FSORepairTool test cases. + */ +public class TestFSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(TestFSORepairTool.class); + + private static final String DEFAULT_ENCODING = UTF_8.name(); + private static MiniOzoneHAClusterImpl cluster; + private static FileSystem fs; + private static OzoneClient client; + private static OzoneConfiguration conf = null; + private FSORepairTool tool; + + @BeforeAll + public static void init() throws Exception { + // Set configs. + conf = new OzoneConfiguration(); + // deletion services will be triggered manually. + conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 2000); + conf.setInt(OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK, 5); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, + TimeUnit.MILLISECONDS); + conf.setInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, 20); + conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true); + // Since delete services use RocksDB iterators, make sure the double + // buffer is flushed between runs. + conf.setInt(OMConfigKeys.OZONE_OM_UNFLUSHED_TRANSACTION_MAX_COUNT, 2); Review Comment: Is it intentional to set it to 2? ########## hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java: ########## @@ -0,0 +1,687 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.repair.om; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.hdds.cli.GenericCli; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.client.BucketArgs; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.io.OzoneOutputStream; +import org.apache.hadoop.ozone.ha.ConfUtils; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.shell.OzoneShell; +import org.apache.ozone.test.GenericTestUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import picocli.CommandLine; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.concurrent.TimeUnit; + +import static java.lang.System.err; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL; +import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OFS_URI_SCHEME; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * FSORepairTool test cases. + */ +public class TestFSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(TestFSORepairTool.class); + + private static final String DEFAULT_ENCODING = UTF_8.name(); + private static MiniOzoneHAClusterImpl cluster; + private static FileSystem fs; + private static OzoneClient client; + private static OzoneConfiguration conf = null; + private FSORepairTool tool; + + @BeforeAll + public static void init() throws Exception { + // Set configs. + conf = new OzoneConfiguration(); + // deletion services will be triggered manually. + conf.setInt(OMConfigKeys.OZONE_DIR_DELETING_SERVICE_INTERVAL, 2000); + conf.setInt(OMConfigKeys.OZONE_PATH_DELETING_LIMIT_PER_TASK, 5); + conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100, + TimeUnit.MILLISECONDS); + conf.setInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK, 20); + conf.setBoolean(OMConfigKeys.OZONE_OM_RATIS_ENABLE_KEY, true); + // Since delete services use RocksDB iterators, make sure the double + // buffer is flushed between runs. + conf.setInt(OMConfigKeys.OZONE_OM_UNFLUSHED_TRANSACTION_MAX_COUNT, 2); + + // Build cluster. + cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) + .setNumOfOzoneManagers(1) + .setOMServiceId("omservice") + .setNumDatanodes(3) + .build(); + cluster.waitForClusterToBeReady(); + + // Init ofs. + final String rootPath = String.format("%s://%s/", + OZONE_OFS_URI_SCHEME, cluster.getOzoneManager().getOMServiceId()); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, rootPath); + fs = FileSystem.get(conf); + client = OzoneClientFactory.getRpcClient("omservice", conf); + } + + @AfterEach + public void cleanNamespace() throws Exception { + OzoneShell shell = new OzoneShell(); + + if (fs.exists(new Path("/vol1"))) { + String[] args1 = new String[]{"volume", "delete", "-r", "-y", "vol1"}; + int exitC = execute(shell, args1); + assertEquals(0, exitC); + } + + if (fs.exists(new Path("/vol2"))) { + String[] args1 = new String[]{"volume", "delete", "-r", "-y", "vol2"}; + int exitC = execute(shell, args1); + assertEquals(0, exitC); + } + + cluster.getOzoneManager().prepareOzoneManager(120L, 5L); + runDeletes(); + assertFileAndDirTablesEmpty(); + } + + private int execute(GenericCli shell, String[] args) { + LOG.info("Executing shell command with args {}", Arrays.asList(args)); + CommandLine cmd = shell.getCmd(); + + CommandLine.IExecutionExceptionHandler exceptionHandler = + (ex, commandLine, parseResult) -> { + new PrintStream(err, true, DEFAULT_ENCODING).println(ex.getMessage()); + return commandLine.getCommandSpec().exitCodeOnExecutionException(); + }; + + // Since there is no elegant way to pass Ozone config to the shell, + // the idea is to use 'set' to place those OM HA configs. + String[] argsWithHAConf = getHASetConfStrings(args); + + cmd.setExecutionExceptionHandler(exceptionHandler); + return cmd.execute(argsWithHAConf); + } + + private String getSetConfStringFromConf(String key) { + return String.format("--set=%s=%s", key, conf.get(key)); + } + + private String generateSetConfString(String key, String value) { + return String.format("--set=%s=%s", key, value); + } + + private String[] getHASetConfStrings(int numOfArgs) { + assert (numOfArgs >= 0); + String[] res = new String[1 + 1 + 1 + numOfArgs]; Review Comment: nit: ```suggestion String[] res = new String[3 + numOfArgs]; ``` ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java: ########## @@ -0,0 +1,756 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.repair.om; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.RocksDatabase; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.hdds.utils.db.TableConfig; +import org.apache.hadoop.hdds.utils.db.DBProfile; +import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions; +import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.helpers.WithObjectID; +import org.apache.hadoop.ozone.om.request.file.OMFileRequest; +import org.apache.ratis.util.Preconditions; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.Stack; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE; +import static org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PROFILE; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; + +/** + * Base Tool to identify disconnected FSO trees in all buckets. + * The tool will log information about unreachable files or directories. + * If deletes are still in progress (the deleted directory table is not empty), the tool may + * report that the tree is disconnected, even though pending deletes would + * fix the issue. + * + * Before using the tool, make sure all OMs are stopped, + * and that all Ratis logs have been flushed to the OM DB. This can be + * done using `ozone admin prepare` before running the tool, and `ozone admin + * cancelprepare` when done. + * + * The tool will run a DFS from each bucket, and save all reachable + * directories as keys in a new temporary RocksDB instance called "reachable.db" + * In the same directory as om.db. + * will then scan the entire file and directory tables for each bucket to see + * if each object's parent is in the reachable table of reachable.db. The + * reachable table will be dropped and recreated for each bucket. + * The tool is idempotent. reachable.db will not be deleted automatically + * when the tool finishes, in case users want to manually inspect it. It can + * be safely deleted once the tool finishes. + */ +public class FSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(FSORepairTool.class); + + private final String omDBPath; + private final DBStore store; + private final Table<String, OmVolumeArgs> volumeTable; + private final Table<String, OmBucketInfo> bucketTable; + private final Table<String, OmDirectoryInfo> directoryTable; + private final Table<String, OmKeyInfo> fileTable; + private final Table<String, OmKeyInfo> deletedDirectoryTable; + private final Table<String, RepeatedOmKeyInfo> deletedTable; + private final Table<String, SnapshotInfo> snapshotInfoTable; + private final String volumeFilter; + private final String bucketFilter; + // The temporary DB is used to track which items have been seen. + // Since usage of this DB is simple, use it directly from + // RocksDB. + private String reachableDBPath; + private static final String REACHABLE_TABLE = "reachable"; + private static final byte[] REACHABLE_TABLE_BYTES = + REACHABLE_TABLE.getBytes(StandardCharsets.UTF_8); + private ColumnFamilyHandle reachableCFHandle; + private RocksDatabase reachableDB; + + private final ReportStatistics reachableStats; + private final ReportStatistics unreachableStats; + private final boolean repair; + + public FSORepairTool(String dbPath, boolean repair, String volume, String bucket) throws IOException { + this(getStoreFromPath(dbPath), dbPath, repair, volume, bucket); + } + + /** + * Allows passing RocksDB instance from a MiniOzoneCluster directly to this + * class for testing. + */ + public FSORepairTool(DBStore dbStore, String dbPath, boolean repair, String volume, String bucket) + throws IOException { + this.reachableStats = new ReportStatistics(0, 0, 0); + this.unreachableStats = new ReportStatistics(0, 0, 0); + + this.store = dbStore; + this.omDBPath = dbPath; + this.repair = repair; + this.volumeFilter = volume; + this.bucketFilter = bucket; + volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, + String.class, + OmVolumeArgs.class); + bucketTable = store.getTable(OmMetadataManagerImpl.BUCKET_TABLE, + String.class, + OmBucketInfo.class); + directoryTable = store.getTable(OmMetadataManagerImpl.DIRECTORY_TABLE, + String.class, + OmDirectoryInfo.class); + fileTable = store.getTable(OmMetadataManagerImpl.FILE_TABLE, + String.class, + OmKeyInfo.class); + deletedDirectoryTable = store.getTable( + OmMetadataManagerImpl.DELETED_DIR_TABLE, + String.class, + OmKeyInfo.class); + deletedTable = store.getTable( + OmMetadataManagerImpl.DELETED_TABLE, + String.class, + RepeatedOmKeyInfo.class); + snapshotInfoTable = store.getTable( + OmMetadataManagerImpl.SNAPSHOT_INFO_TABLE, + String.class, + SnapshotInfo.class); + } + + protected static DBStore getStoreFromPath(String dbPath) throws IOException { + File omDBFile = new File(dbPath); + if (!omDBFile.exists() || !omDBFile.isDirectory()) { + throw new IOException(String.format("Specified OM DB instance %s does " + + "not exist or is not a RocksDB directory.", dbPath)); + } + // Load RocksDB and tables needed. + return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), + new File(dbPath).getParentFile()); + } + + public FSORepairTool.Report run() throws IOException { + + if (bucketFilter != null && volumeFilter == null) { + System.out.println("--bucket flag cannot be used without specifying --volume."); + return null; + } + + if (volumeFilter != null) { + OmVolumeArgs volumeArgs = volumeTable.getIfExist(volumeFilter); + if (volumeArgs == null) { + System.out.println("Volume '" + volumeFilter + "' does not exist."); + return null; + } + } + + // Iterate all volumes or a specific volume if specified + try (TableIterator<String, ? extends Table.KeyValue<String, OmVolumeArgs>> + volumeIterator = volumeTable.iterator()) { + openReachableDB(); + + while (volumeIterator.hasNext()) { + Table.KeyValue<String, OmVolumeArgs> volumeEntry = + volumeIterator.next(); + String volumeKey = volumeEntry.getKey(); + + if (volumeFilter != null && !volumeFilter.equals(volumeKey)) { + continue; + } + + System.out.println("Processing volume: " + volumeKey); + + if (bucketFilter != null) { + OmBucketInfo bucketInfo = bucketTable.getIfExist(volumeKey + "/" + bucketFilter); + if (bucketInfo == null) { + //Bucket does not exist in the volume + System.out.println("Bucket '" + bucketFilter + "' does not exist in volume '" + volumeKey + "'."); + return null; + } + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + System.out.println("Skipping non-FSO bucket " + bucketFilter); + continue; + } + + // Check for snapshots in the specified bucket + if (checkIfSnapshotExistsForBucket(volumeFilter, bucketFilter)) { + if (!repair) { + System.out.println("Snapshot detected in bucket '" + bucketFilter + "'"); + } else { + System.out.println("Snapshot exists in bucket '" + bucketFilter + "'. " + + "Repair is not allowed if snapshots exist."); + return null; + } + } + + processBucket(volumeEntry.getValue(), bucketInfo); + } else { + + // Iterate all buckets in the volume. + try (TableIterator<String, ? extends Table.KeyValue<String, OmBucketInfo>> + bucketIterator = bucketTable.iterator()) { + bucketIterator.seek(volumeKey); + while (bucketIterator.hasNext()) { + Table.KeyValue<String, OmBucketInfo> bucketEntry = + bucketIterator.next(); + String bucketKey = bucketEntry.getKey(); + OmBucketInfo bucketInfo = bucketEntry.getValue(); + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + System.out.println("Skipping non-FSO bucket " + bucketKey); + continue; + } + + // Stop this loop once we have seen all buckets in the current + // volume. + if (!bucketKey.startsWith(volumeKey)) { + break; + } + + processBucket(volumeEntry.getValue(), bucketInfo); + } + } + } + } + } finally { + closeReachableDB(); + } + + return buildReportAndLog(); + } + + private boolean checkIfSnapshotExistsForBucket(String volumeName, String bucketName) throws IOException { + if (snapshotInfoTable == null) { + return false; + } + + try (TableIterator<String, ? extends Table.KeyValue<String, SnapshotInfo>> iterator = + snapshotInfoTable.iterator()) { + while (iterator.hasNext()) { + SnapshotInfo snapshotInfo = iterator.next().getValue(); + String snapshotPath = (volumeName + "/" + bucketName).replaceFirst("^/", ""); + if (snapshotInfo.getSnapshotPath().equals(snapshotPath)) { + return true; + } + } + } + return false; + } + + private void processBucket(OmVolumeArgs volume, OmBucketInfo bucketInfo) throws IOException { + System.out.println("Processing bucket: " + volume.getVolume() + "/" + bucketInfo.getBucketName()); + if (checkIfSnapshotExistsForBucket(volume.getVolume(), bucketInfo.getBucketName())) { + if (!repair) { + System.out.println( + "Snapshot detected in bucket '" + volume.getVolume() + "/" + bucketInfo.getBucketName() + "'. "); + } else { + System.out.println( + "Skipping repair for bucket '" + volume.getVolume() + "/" + bucketInfo.getBucketName() + "' " + + "due to snapshot presence."); + return; + } + } + dropReachableTableIfExists(); + createReachableTable(); + markReachableObjectsInBucket(volume, bucketInfo); + handleUnreachableObjects(volume, bucketInfo); + dropReachableTableIfExists(); + } + + private Report buildReportAndLog() { + Report report = new Report.Builder() + .setReachable(reachableStats) + .setUnreachable(unreachableStats) + .build(); + + System.out.println("\n" + report); + return report; + } + + private void markReachableObjectsInBucket(OmVolumeArgs volume, + OmBucketInfo bucket) throws IOException { + // Only put directories in the stack. + // Directory keys should have the form /volumeID/bucketID/parentID/name. + Stack<String> dirKeyStack = new Stack<>(); + + // Since the tool uses parent directories to check for reachability, add + // a reachable entry for the bucket as well. + addReachableEntry(volume, bucket, bucket); + // Initialize the stack with all immediate child directories of the + // bucket, and mark them all as reachable. + Collection<String> childDirs = + getChildDirectoriesAndMarkAsReachable(volume, bucket, bucket); + dirKeyStack.addAll(childDirs); + + while (!dirKeyStack.isEmpty()) { + // Get one directory and process its immediate children. + String currentDirKey = dirKeyStack.pop(); + OmDirectoryInfo currentDir = directoryTable.get(currentDirKey); + if (currentDir == null) { + System.out.println("Directory key" + currentDirKey + "to be processed was not found in the " + + "directory table."); + continue; + } + + // TODO revisit this for a more memory efficient implementation, + // possibly making better use of RocksDB iterators. + childDirs = getChildDirectoriesAndMarkAsReachable(volume, bucket, + currentDir); Review Comment: As the character limit has been increased to 120 characters line length, such statements can be in one line. Could you please cross check over the full patch for such occurrences and change them to not be split unnecessarily? ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java: ########## @@ -0,0 +1,756 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.repair.om; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.RocksDatabase; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.hdds.utils.db.TableConfig; +import org.apache.hadoop.hdds.utils.db.DBProfile; +import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions; +import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.helpers.WithObjectID; +import org.apache.hadoop.ozone.om.request.file.OMFileRequest; +import org.apache.ratis.util.Preconditions; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.Stack; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE; +import static org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PROFILE; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; + +/** + * Base Tool to identify disconnected FSO trees in all buckets. + * The tool will log information about unreachable files or directories. + * If deletes are still in progress (the deleted directory table is not empty), the tool may + * report that the tree is disconnected, even though pending deletes would + * fix the issue. + * + * Before using the tool, make sure all OMs are stopped, + * and that all Ratis logs have been flushed to the OM DB. This can be + * done using `ozone admin prepare` before running the tool, and `ozone admin + * cancelprepare` when done. + * + * The tool will run a DFS from each bucket, and save all reachable + * directories as keys in a new temporary RocksDB instance called "reachable.db" + * In the same directory as om.db. + * will then scan the entire file and directory tables for each bucket to see + * if each object's parent is in the reachable table of reachable.db. The + * reachable table will be dropped and recreated for each bucket. + * The tool is idempotent. reachable.db will not be deleted automatically + * when the tool finishes, in case users want to manually inspect it. It can + * be safely deleted once the tool finishes. + */ +public class FSORepairTool { + public static final Logger LOG = + LoggerFactory.getLogger(FSORepairTool.class); + + private final String omDBPath; + private final DBStore store; + private final Table<String, OmVolumeArgs> volumeTable; + private final Table<String, OmBucketInfo> bucketTable; + private final Table<String, OmDirectoryInfo> directoryTable; + private final Table<String, OmKeyInfo> fileTable; + private final Table<String, OmKeyInfo> deletedDirectoryTable; + private final Table<String, RepeatedOmKeyInfo> deletedTable; + private final Table<String, SnapshotInfo> snapshotInfoTable; + private final String volumeFilter; + private final String bucketFilter; + // The temporary DB is used to track which items have been seen. + // Since usage of this DB is simple, use it directly from + // RocksDB. + private String reachableDBPath; + private static final String REACHABLE_TABLE = "reachable"; + private static final byte[] REACHABLE_TABLE_BYTES = + REACHABLE_TABLE.getBytes(StandardCharsets.UTF_8); + private ColumnFamilyHandle reachableCFHandle; + private RocksDatabase reachableDB; + + private final ReportStatistics reachableStats; + private final ReportStatistics unreachableStats; + private final boolean repair; + + public FSORepairTool(String dbPath, boolean repair, String volume, String bucket) throws IOException { + this(getStoreFromPath(dbPath), dbPath, repair, volume, bucket); + } + + /** + * Allows passing RocksDB instance from a MiniOzoneCluster directly to this + * class for testing. + */ + public FSORepairTool(DBStore dbStore, String dbPath, boolean repair, String volume, String bucket) + throws IOException { + this.reachableStats = new ReportStatistics(0, 0, 0); + this.unreachableStats = new ReportStatistics(0, 0, 0); + + this.store = dbStore; + this.omDBPath = dbPath; + this.repair = repair; + this.volumeFilter = volume; + this.bucketFilter = bucket; + volumeTable = store.getTable(OmMetadataManagerImpl.VOLUME_TABLE, + String.class, + OmVolumeArgs.class); + bucketTable = store.getTable(OmMetadataManagerImpl.BUCKET_TABLE, + String.class, + OmBucketInfo.class); + directoryTable = store.getTable(OmMetadataManagerImpl.DIRECTORY_TABLE, + String.class, + OmDirectoryInfo.class); + fileTable = store.getTable(OmMetadataManagerImpl.FILE_TABLE, + String.class, + OmKeyInfo.class); + deletedDirectoryTable = store.getTable( + OmMetadataManagerImpl.DELETED_DIR_TABLE, + String.class, + OmKeyInfo.class); + deletedTable = store.getTable( + OmMetadataManagerImpl.DELETED_TABLE, + String.class, + RepeatedOmKeyInfo.class); + snapshotInfoTable = store.getTable( + OmMetadataManagerImpl.SNAPSHOT_INFO_TABLE, + String.class, + SnapshotInfo.class); + } + + protected static DBStore getStoreFromPath(String dbPath) throws IOException { + File omDBFile = new File(dbPath); + if (!omDBFile.exists() || !omDBFile.isDirectory()) { + throw new IOException(String.format("Specified OM DB instance %s does " + + "not exist or is not a RocksDB directory.", dbPath)); + } + // Load RocksDB and tables needed. + return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), + new File(dbPath).getParentFile()); + } + + public FSORepairTool.Report run() throws IOException { + + if (bucketFilter != null && volumeFilter == null) { + System.out.println("--bucket flag cannot be used without specifying --volume."); + return null; + } + + if (volumeFilter != null) { + OmVolumeArgs volumeArgs = volumeTable.getIfExist(volumeFilter); + if (volumeArgs == null) { + System.out.println("Volume '" + volumeFilter + "' does not exist."); + return null; + } + } + + // Iterate all volumes or a specific volume if specified + try (TableIterator<String, ? extends Table.KeyValue<String, OmVolumeArgs>> + volumeIterator = volumeTable.iterator()) { + openReachableDB(); + + while (volumeIterator.hasNext()) { + Table.KeyValue<String, OmVolumeArgs> volumeEntry = + volumeIterator.next(); + String volumeKey = volumeEntry.getKey(); + + if (volumeFilter != null && !volumeFilter.equals(volumeKey)) { + continue; + } + + System.out.println("Processing volume: " + volumeKey); + + if (bucketFilter != null) { + OmBucketInfo bucketInfo = bucketTable.getIfExist(volumeKey + "/" + bucketFilter); + if (bucketInfo == null) { + //Bucket does not exist in the volume + System.out.println("Bucket '" + bucketFilter + "' does not exist in volume '" + volumeKey + "'."); + return null; + } + + if (bucketInfo.getBucketLayout() != BucketLayout.FILE_SYSTEM_OPTIMIZED) { + System.out.println("Skipping non-FSO bucket " + bucketFilter); + continue; + } + + // Check for snapshots in the specified bucket + if (checkIfSnapshotExistsForBucket(volumeFilter, bucketFilter)) { + if (!repair) { + System.out.println("Snapshot detected in bucket '" + bucketFilter + "'"); + } else { + System.out.println("Snapshot exists in bucket '" + bucketFilter + "'. " + + "Repair is not allowed if snapshots exist."); + return null; + } + } Review Comment: The check is being performed in `processBuckets()`. Can be removed from here as it's duplicated. ########## hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java: ########## @@ -0,0 +1,756 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.repair.om; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.utils.db.RocksDatabase; +import org.apache.hadoop.hdds.utils.db.Table; +import org.apache.hadoop.hdds.utils.db.DBStore; +import org.apache.hadoop.hdds.utils.db.TableIterator; +import org.apache.hadoop.hdds.utils.db.BatchOperation; +import org.apache.hadoop.hdds.utils.db.TableConfig; +import org.apache.hadoop.hdds.utils.db.DBProfile; +import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions; +import org.apache.hadoop.ozone.OmUtils; +import org.apache.hadoop.ozone.om.OmMetadataManagerImpl; +import org.apache.hadoop.ozone.om.helpers.BucketLayout; +import org.apache.hadoop.ozone.om.helpers.OmBucketInfo; +import org.apache.hadoop.ozone.om.helpers.OmDirectoryInfo; +import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.OmVolumeArgs; +import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo; +import org.apache.hadoop.ozone.om.helpers.SnapshotInfo; +import org.apache.hadoop.ozone.om.helpers.WithObjectID; +import org.apache.hadoop.ozone.om.request.file.OMFileRequest; +import org.apache.ratis.util.Preconditions; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.Stack; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE; +import static org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PROFILE; +import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX; + +/** + * Base Tool to identify disconnected FSO trees in all buckets. + * The tool will log information about unreachable files or directories. + * If deletes are still in progress (the deleted directory table is not empty), the tool may + * report that the tree is disconnected, even though pending deletes would + * fix the issue. + * + * Before using the tool, make sure all OMs are stopped, + * and that all Ratis logs have been flushed to the OM DB. This can be + * done using `ozone admin prepare` before running the tool, and `ozone admin + * cancelprepare` when done. + * + * The tool will run a DFS from each bucket, and save all reachable + * directories as keys in a new temporary RocksDB instance called "reachable.db" + * In the same directory as om.db. + * will then scan the entire file and directory tables for each bucket to see Review Comment: nit: ```suggestion * in the same directory as om.db. * It will then scan the entire file and directory tables for each bucket to see ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
