Copilot commented on code in PR #9585: URL: https://github.com/apache/ozone/pull/9585#discussion_r2709432308
########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBArchiver.java: ########## @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.hdds.utils.Archiver.includeFile; +import static org.apache.hadoop.hdds.utils.Archiver.tar; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for handling operations relevant to archiving the OM DB tarball. + * Mainly maintains a map for recording the files collected from reading + * the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores + * the link data in the map to release the bootstrap lock quickly + * and do the actual write at the end outside the lock. + */ +public class OMDBArchiver { + + private Path tmpDir; + private Map<String, File> filesToWriteIntoTarball; + private Map<String, String> hardLinkFileMap; + private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class); + private boolean completed; + + public OMDBArchiver() { + this.tmpDir = null; + this.filesToWriteIntoTarball = new HashMap<>(); + this.hardLinkFileMap = null; + this.completed = false; + } + + public void setTmpDir(Path tmpDir) { + this.tmpDir = tmpDir; + } + + public Path getTmpDir() { + return tmpDir; + } + + public Map<String, String> getHardLinkFileMap() { + return hardLinkFileMap; + } + + public Map<String, File> getFilesToWriteIntoTarball() { + return filesToWriteIntoTarball; + } + + public void setHardLinkFileMap(Map<String, String> hardLinkFileMap) { + this.hardLinkFileMap = hardLinkFileMap; + } + + public boolean isCompleted() { + return completed; + } + + public void setCompleted(boolean completed) { + this.completed = completed; + } + + /** + * @param file the file to create a hardlink and record into the map + * @param entryName name of the entry corresponding to file + * @return the file size + * @throws IOException in case of hardlink failure + * + * Records the given file entry into the map after taking a hardlink. + */ + public long recordFileEntry(File file, String entryName) throws IOException { + File link = tmpDir.resolve(entryName).toFile(); + long bytes = 0; + try { + Files.createLink(link.toPath(), file.toPath()); + filesToWriteIntoTarball.put(entryName, link); + bytes = file.length(); + } catch (IOException ioe) { + LOG.error("Couldn't create hardlink for file {} while including it in tarball.", + file.getAbsolutePath(), ioe); + throw ioe; + } + return bytes; + } + + /** + * @param conf the configuration object to obtain metadata paths + * @param outputStream the tarball archive output stream + * @throws IOException in case of write failure to the archive + * + * Writes all the files captured by the map into the archive and + * also includes the hardlinkFile and the completion marker file. + */ + public void writeToArchive(OzoneConfiguration conf, OutputStream outputStream) + throws IOException { + long bytesWritten = 0; + long lastLoggedTime = Time.now(); + long filesWritten = 0; + try (ArchiveOutputStream<TarArchiveEntry> archiveOutput = tar(outputStream)) { + for (Map.Entry<String, File> kv : filesToWriteIntoTarball.entrySet()) { + String entryName = kv.getKey(); + File link = kv.getValue(); + try { + bytesWritten += includeFile(link, entryName, archiveOutput); + if (Time.monotonicNow() - lastLoggedTime >= 30000) { + LOG.info("Transferred {} KB, #files {} to checkpoint tarball stream...", + bytesWritten / (1024), filesWritten); + lastLoggedTime = Time.monotonicNow(); Review Comment: The variable 'filesWritten' is incremented but never used in the logging statement. Line 133 references 'filesWritten' in the log message, but it should be incremented after line 130 to track the actual number of files written. Currently, it remains at 0 throughout the loop. ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBArchiver.java: ########## @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.hdds.utils.Archiver.includeFile; +import static org.apache.hadoop.hdds.utils.Archiver.tar; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for handling operations relevant to archiving the OM DB tarball. + * Mainly maintains a map for recording the files collected from reading + * the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores + * the link data in the map to release the bootstrap lock quickly + * and do the actual write at the end outside the lock. + */ +public class OMDBArchiver { + + private Path tmpDir; + private Map<String, File> filesToWriteIntoTarball; + private Map<String, String> hardLinkFileMap; + private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class); + private boolean completed; + + public OMDBArchiver() { + this.tmpDir = null; + this.filesToWriteIntoTarball = new HashMap<>(); + this.hardLinkFileMap = null; + this.completed = false; + } + + public void setTmpDir(Path tmpDir) { + this.tmpDir = tmpDir; + } + + public Path getTmpDir() { + return tmpDir; + } + + public Map<String, String> getHardLinkFileMap() { + return hardLinkFileMap; + } + + public Map<String, File> getFilesToWriteIntoTarball() { + return filesToWriteIntoTarball; + } + + public void setHardLinkFileMap(Map<String, String> hardLinkFileMap) { + this.hardLinkFileMap = hardLinkFileMap; + } + + public boolean isCompleted() { + return completed; + } + + public void setCompleted(boolean completed) { + this.completed = completed; + } + + /** + * @param file the file to create a hardlink and record into the map + * @param entryName name of the entry corresponding to file + * @return the file size + * @throws IOException in case of hardlink failure + * + * Records the given file entry into the map after taking a hardlink. + */ + public long recordFileEntry(File file, String entryName) throws IOException { Review Comment: Missing null check for tmpDir: The recordFileEntry method uses tmpDir.resolve() on line 98 without verifying that tmpDir has been set. If recordFileEntry is called before setTmpDir, this will result in a NullPointerException. Consider adding validation to ensure tmpDir is not null before using it. ```suggestion public long recordFileEntry(File file, String entryName) throws IOException { if (tmpDir == null) { throw new IllegalStateException( "Temporary directory not set. Call setTmpDir() before recordFileEntry()."); } ``` ########## hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java: ########## @@ -446,49 +441,38 @@ public void testWriteDBToArchive(boolean expectOnlySstFiles) throws Exception { // Create dummy files: one SST, one non-SST Path sstFile = dbDir.resolve("test.sst"); Files.write(sstFile, "sst content".getBytes(StandardCharsets.UTF_8)); // Write some content to make it non-empty - Path nonSstFile = dbDir.resolve("test.log"); Files.write(nonSstFile, "log content".getBytes(StandardCharsets.UTF_8)); Set<String> sstFilesToExclude = new HashSet<>(); AtomicLong maxTotalSstSize = new AtomicLong(1000000); // Sufficient size - Map<String, String> hardLinkFileMap = new java.util.HashMap<>(); + OMDBArchiver omdbArchiver = new OMDBArchiver(); Path tmpDir = folder.resolve("tmp"); Files.createDirectories(tmpDir); - TarArchiveOutputStream mockArchiveOutputStream = mock(TarArchiveOutputStream.class); + omdbArchiver.setTmpDir(tmpDir); + OMDBArchiver omDbArchiverSpy = spy(omdbArchiver); Review Comment: Inconsistent variable naming: The variable is named 'omDbArchiverSpy' on line 452 while the original instance is named 'omdbArchiver' on line 448. The inconsistency in capitalization (omDb vs omdb) makes the code harder to read. Consider using 'omdbArchiverSpy' to match the original variable's naming style. ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBArchiver.java: ########## @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.hdds.utils.Archiver.includeFile; +import static org.apache.hadoop.hdds.utils.Archiver.tar; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for handling operations relevant to archiving the OM DB tarball. + * Mainly maintains a map for recording the files collected from reading + * the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores + * the link data in the map to release the bootstrap lock quickly + * and do the actual write at the end outside the lock. + */ +public class OMDBArchiver { + + private Path tmpDir; + private Map<String, File> filesToWriteIntoTarball; + private Map<String, String> hardLinkFileMap; + private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class); + private boolean completed; + + public OMDBArchiver() { + this.tmpDir = null; + this.filesToWriteIntoTarball = new HashMap<>(); + this.hardLinkFileMap = null; + this.completed = false; + } + + public void setTmpDir(Path tmpDir) { + this.tmpDir = tmpDir; + } + + public Path getTmpDir() { + return tmpDir; + } + + public Map<String, String> getHardLinkFileMap() { + return hardLinkFileMap; + } + + public Map<String, File> getFilesToWriteIntoTarball() { + return filesToWriteIntoTarball; + } + + public void setHardLinkFileMap(Map<String, String> hardLinkFileMap) { + this.hardLinkFileMap = hardLinkFileMap; + } + + public boolean isCompleted() { + return completed; + } + + public void setCompleted(boolean completed) { + this.completed = completed; + } + + /** + * @param file the file to create a hardlink and record into the map + * @param entryName name of the entry corresponding to file + * @return the file size + * @throws IOException in case of hardlink failure + * + * Records the given file entry into the map after taking a hardlink. Review Comment: Non-standard javadoc format: The javadoc comment uses @param and @return tags that appear after the method description (lines 90-93) instead of following standard javadoc conventions where these tags should come after the main description. While this may still compile, it doesn't follow the standard javadoc format where the description comes first, followed by all @param tags, and then @return and @throws tags. ```suggestion * Records the given file entry into the map after taking a hardlink. * * @param file the file to create a hardlink and record into the map * @param entryName name of the entry corresponding to file * @return the file size * @throws IOException in case of hardlink failure ``` ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBArchiver.java: ########## @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.hdds.utils.Archiver.includeFile; +import static org.apache.hadoop.hdds.utils.Archiver.tar; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for handling operations relevant to archiving the OM DB tarball. + * Mainly maintains a map for recording the files collected from reading + * the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores + * the link data in the map to release the bootstrap lock quickly + * and do the actual write at the end outside the lock. + */ +public class OMDBArchiver { + + private Path tmpDir; + private Map<String, File> filesToWriteIntoTarball; + private Map<String, String> hardLinkFileMap; + private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class); + private boolean completed; + + public OMDBArchiver() { + this.tmpDir = null; + this.filesToWriteIntoTarball = new HashMap<>(); + this.hardLinkFileMap = null; + this.completed = false; + } + + public void setTmpDir(Path tmpDir) { + this.tmpDir = tmpDir; + } + + public Path getTmpDir() { + return tmpDir; + } + + public Map<String, String> getHardLinkFileMap() { + return hardLinkFileMap; + } + + public Map<String, File> getFilesToWriteIntoTarball() { + return filesToWriteIntoTarball; + } + + public void setHardLinkFileMap(Map<String, String> hardLinkFileMap) { + this.hardLinkFileMap = hardLinkFileMap; + } + + public boolean isCompleted() { + return completed; + } + + public void setCompleted(boolean completed) { + this.completed = completed; + } + + /** + * @param file the file to create a hardlink and record into the map + * @param entryName name of the entry corresponding to file + * @return the file size + * @throws IOException in case of hardlink failure + * + * Records the given file entry into the map after taking a hardlink. + */ + public long recordFileEntry(File file, String entryName) throws IOException { + File link = tmpDir.resolve(entryName).toFile(); + long bytes = 0; + try { + Files.createLink(link.toPath(), file.toPath()); + filesToWriteIntoTarball.put(entryName, link); + bytes = file.length(); + } catch (IOException ioe) { + LOG.error("Couldn't create hardlink for file {} while including it in tarball.", + file.getAbsolutePath(), ioe); + throw ioe; + } + return bytes; + } + + /** + * @param conf the configuration object to obtain metadata paths + * @param outputStream the tarball archive output stream + * @throws IOException in case of write failure to the archive + * + * Writes all the files captured by the map into the archive and + * also includes the hardlinkFile and the completion marker file. + */ Review Comment: Non-standard javadoc format: Similar to the recordFileEntry method, this javadoc uses @param and @throws tags (lines 113-115) appearing before the method description (lines 117-119). The description should come first, followed by the parameter documentation tags. ########## hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java: ########## @@ -298,14 +295,12 @@ public void testWriteDBToArchiveClosesFilesListStream() throws Exception { // Do not use CALLS_REAL_METHODS for java.nio.file.Files: internal/private static // methods (eg Files.provider()) get intercepted too and Mockito will try to invoke // them reflectively, which fails on JDK9+ without --add-opens. - try (MockedStatic<Files> files = mockStatic(Files.class); - TarArchiveOutputStream tar = new TarArchiveOutputStream(new java.io.ByteArrayOutputStream())) { + try (MockedStatic<Files> files = mockStatic(Files.class)) { files.when(() -> Files.exists(dbDir)).thenReturn(true); files.when(() -> Files.list(dbDir)).thenReturn(stream); - boolean result = servlet.writeDBToArchive( - new HashSet<>(), dbDir, new AtomicLong(Long.MAX_VALUE), - tar, folder, null, true); + boolean result = servlet.collectFilesFromDir(new HashSet<>(), dbDir, + new AtomicLong(Long.MAX_VALUE), true, new OMDBArchiver()); Review Comment: Test does not initialize tmpDir for OMDBArchiver: The test creates a new OMDBArchiver instance on line 303 but doesn't set its tmpDir before passing it to collectFilesFromDir. If collectFilesFromDir calls recordFileEntry internally (which it does), this will cause a NullPointerException when tmpDir.resolve() is called in recordFileEntry. The test should call omdbArchiver.setTmpDir() before use. ```suggestion OMDBArchiver archiver = new OMDBArchiver(); archiver.setTmpDir(folder); boolean result = servlet.collectFilesFromDir(new HashSet<>(), dbDir, new AtomicLong(Long.MAX_VALUE), true, archiver); ``` ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBArchiver.java: ########## @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.hdds.utils.Archiver.includeFile; +import static org.apache.hadoop.hdds.utils.Archiver.tar; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for handling operations relevant to archiving the OM DB tarball. + * Mainly maintains a map for recording the files collected from reading + * the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores + * the link data in the map to release the bootstrap lock quickly + * and do the actual write at the end outside the lock. + */ +public class OMDBArchiver { + + private Path tmpDir; + private Map<String, File> filesToWriteIntoTarball; + private Map<String, String> hardLinkFileMap; + private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class); + private boolean completed; + + public OMDBArchiver() { + this.tmpDir = null; + this.filesToWriteIntoTarball = new HashMap<>(); + this.hardLinkFileMap = null; + this.completed = false; + } + + public void setTmpDir(Path tmpDir) { + this.tmpDir = tmpDir; + } + + public Path getTmpDir() { + return tmpDir; + } + + public Map<String, String> getHardLinkFileMap() { + return hardLinkFileMap; + } + + public Map<String, File> getFilesToWriteIntoTarball() { Review Comment: getFilesToWriteIntoTarball exposes the internal representation stored in field filesToWriteIntoTarball. The value may be modified [after this call to getFilesToWriteIntoTarball](1). ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBArchiver.java: ########## @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.hdds.utils.Archiver.includeFile; +import static org.apache.hadoop.hdds.utils.Archiver.tar; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for handling operations relevant to archiving the OM DB tarball. + * Mainly maintains a map for recording the files collected from reading + * the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores + * the link data in the map to release the bootstrap lock quickly + * and do the actual write at the end outside the lock. + */ +public class OMDBArchiver { + + private Path tmpDir; + private Map<String, File> filesToWriteIntoTarball; + private Map<String, String> hardLinkFileMap; + private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class); + private boolean completed; + + public OMDBArchiver() { + this.tmpDir = null; + this.filesToWriteIntoTarball = new HashMap<>(); + this.hardLinkFileMap = null; + this.completed = false; + } + + public void setTmpDir(Path tmpDir) { + this.tmpDir = tmpDir; + } + + public Path getTmpDir() { + return tmpDir; + } + + public Map<String, String> getHardLinkFileMap() { + return hardLinkFileMap; + } + + public Map<String, File> getFilesToWriteIntoTarball() { + return filesToWriteIntoTarball; + } + + public void setHardLinkFileMap(Map<String, String> hardLinkFileMap) { + this.hardLinkFileMap = hardLinkFileMap; + } + + public boolean isCompleted() { + return completed; + } + + public void setCompleted(boolean completed) { + this.completed = completed; + } + + /** + * @param file the file to create a hardlink and record into the map + * @param entryName name of the entry corresponding to file + * @return the file size + * @throws IOException in case of hardlink failure + * + * Records the given file entry into the map after taking a hardlink. + */ + public long recordFileEntry(File file, String entryName) throws IOException { + File link = tmpDir.resolve(entryName).toFile(); + long bytes = 0; + try { + Files.createLink(link.toPath(), file.toPath()); + filesToWriteIntoTarball.put(entryName, link); + bytes = file.length(); + } catch (IOException ioe) { + LOG.error("Couldn't create hardlink for file {} while including it in tarball.", + file.getAbsolutePath(), ioe); + throw ioe; + } + return bytes; + } + + /** + * @param conf the configuration object to obtain metadata paths + * @param outputStream the tarball archive output stream + * @throws IOException in case of write failure to the archive + * + * Writes all the files captured by the map into the archive and + * also includes the hardlinkFile and the completion marker file. + */ + public void writeToArchive(OzoneConfiguration conf, OutputStream outputStream) + throws IOException { + long bytesWritten = 0; + long lastLoggedTime = Time.now(); Review Comment: Inconsistent time method usage. Line 123 uses 'Time.now()' for initialization, but lines 131 and 134 use 'Time.monotonicNow()' for comparison. Since monotonic time should be used for measuring elapsed time intervals, line 123 should also use 'Time.monotonicNow()' to ensure consistent time measurement. ```suggestion long lastLoggedTime = Time.monotonicNow(); ``` ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBArchiver.java: ########## @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.hdds.utils.Archiver.includeFile; +import static org.apache.hadoop.hdds.utils.Archiver.tar; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for handling operations relevant to archiving the OM DB tarball. + * Mainly maintains a map for recording the files collected from reading + * the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores + * the link data in the map to release the bootstrap lock quickly + * and do the actual write at the end outside the lock. + */ +public class OMDBArchiver { + + private Path tmpDir; + private Map<String, File> filesToWriteIntoTarball; + private Map<String, String> hardLinkFileMap; + private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class); + private boolean completed; + + public OMDBArchiver() { + this.tmpDir = null; + this.filesToWriteIntoTarball = new HashMap<>(); + this.hardLinkFileMap = null; + this.completed = false; + } + + public void setTmpDir(Path tmpDir) { + this.tmpDir = tmpDir; + } + + public Path getTmpDir() { + return tmpDir; + } + + public Map<String, String> getHardLinkFileMap() { + return hardLinkFileMap; + } + + public Map<String, File> getFilesToWriteIntoTarball() { + return filesToWriteIntoTarball; + } + + public void setHardLinkFileMap(Map<String, String> hardLinkFileMap) { + this.hardLinkFileMap = hardLinkFileMap; + } + + public boolean isCompleted() { + return completed; + } + + public void setCompleted(boolean completed) { + this.completed = completed; + } + + /** + * @param file the file to create a hardlink and record into the map + * @param entryName name of the entry corresponding to file + * @return the file size + * @throws IOException in case of hardlink failure + * + * Records the given file entry into the map after taking a hardlink. + */ + public long recordFileEntry(File file, String entryName) throws IOException { + File link = tmpDir.resolve(entryName).toFile(); + long bytes = 0; + try { + Files.createLink(link.toPath(), file.toPath()); Review Comment: Potential file name collision in hardlink creation: The method creates a hardlink using only the entryName without considering potential collisions. If recordFileEntry is called multiple times with the same entryName but different files, the second call will fail because Files.createLink will throw FileAlreadyExistsException. The existing hardlink should either be checked and handled, or the code should ensure unique entryNames are passed. ```suggestion Path linkPath = tmpDir.resolve(entryName); File link = linkPath.toFile(); long bytes = 0; try { if (Files.exists(linkPath)) { // If the existing file is already a link to the same source, just reuse it. if (Files.isSameFile(linkPath, file.toPath())) { filesToWriteIntoTarball.put(entryName, link); return file.length(); } // Otherwise, remove the stale link/entry so we can recreate it. Files.delete(linkPath); } Files.createLink(linkPath, file.toPath()); ``` ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java: ########## @@ -293,25 +299,22 @@ public void writeDbDataToStream(HttpServletRequest request, OutputStream destina snapshotInCheckpoint = getSnapshotDirsFromDB(omMetadataManager, checkpointMetadataManager, snapshotLocalDataManager); } - writeDBToArchive(sstFilesToExclude, getCompactionLogDir(), maxTotalSstSize, archiveOutputStream, tmpdir, - hardLinkFileMap, false); + collectFilesFromDir(sstFilesToExclude, getCompactionLogDir(), maxTotalSstSize, false, omdbArchiver); try (Stream<Path> backupFiles = sstBackupFiles.stream()) { - writeDBToArchive(sstFilesToExclude, backupFiles, maxTotalSstSize, archiveOutputStream, tmpdir, - hardLinkFileMap, false); + collectFilesFromDir(sstFilesToExclude, backupFiles, maxTotalSstSize, false, omdbArchiver); } Collection<Path> snapshotLocalPropertyFiles = getSnapshotLocalDataPaths(localDataManager, snapshotInCheckpoint.keySet()); // This is done to ensure all data to be copied correctly is flushed in the snapshot DB - transferSnapshotData(sstFilesToExclude, tmpdir, snapshotInCheckpoint.values(), snapshotLocalPropertyFiles, - maxTotalSstSize, archiveOutputStream, hardLinkFileMap); + collectSnapshotData(sstFilesToExclude, snapshotInCheckpoint.values(), snapshotLocalPropertyFiles, + maxTotalSstSize, omdbArchiver); } } - writeHardlinkFile(getConf(), hardLinkFileMap, archiveOutputStream); - includeRatisSnapshotCompleteFlag(archiveOutputStream); + omdbArchiver.setCompleted(true); } } catch (IOException ioe) { - LOG.error("got exception writing to archive " + ioe); + LOG.error("got exception while collecting files to archive " + ioe); Review Comment: Inconsistent error logging: The error message on line 317 concatenates the exception directly as a string ("got exception while collecting files to archive " + ioe), which will print the exception's toString() inline. This is inconsistent with best practices where the exception should be passed as a separate parameter to the logger for proper stack trace handling. The LOG.error call should be: LOG.error("got exception while collecting files to archive", ioe); ```suggestion LOG.error("got exception while collecting files to archive", ioe); ``` ########## hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java: ########## @@ -239,19 +235,20 @@ public void write(int b) throws IOException { doCallRealMethod().when(omDbCheckpointServletMock) .writeDbDataToStream(any(), any(), any(), any(), any()); doCallRealMethod().when(omDbCheckpointServletMock) - .writeDBToArchive(any(), any(), any(), any(), any(), any(), anyBoolean()); + .collectFilesFromDir(any(), any(), any(), anyBoolean(), any()); + doCallRealMethod().when(omDbCheckpointServletMock).collectDbDataToTransfer(any(), any(), any()); when(omDbCheckpointServletMock.getBootstrapStateLock()) .thenReturn(lock); doCallRealMethod().when(omDbCheckpointServletMock).getCheckpoint(any(), anyBoolean()); assertNull(doCallRealMethod().when(omDbCheckpointServletMock).getBootstrapTempData()); doCallRealMethod().when(omDbCheckpointServletMock). processMetadataSnapshotRequest(any(), any(), anyBoolean(), anyBoolean()); - doCallRealMethod().when(omDbCheckpointServletMock).writeDbDataToStream(any(), any(), any(), any()); + doCallRealMethod().when(omDbCheckpointServletMock).writeDbDataToStream(any(), any(), any(), any(), any()); Review Comment: Duplicate mock setup: Line 247 sets up a mock for 'writeDbDataToStream' with 5 parameters, but the same method is already mocked on line 236 with the same signature. This is redundant and one of these lines should be removed. ```suggestion ``` ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBArchiver.java: ########## @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.hdds.utils.Archiver.includeFile; +import static org.apache.hadoop.hdds.utils.Archiver.tar; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for handling operations relevant to archiving the OM DB tarball. + * Mainly maintains a map for recording the files collected from reading + * the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores + * the link data in the map to release the bootstrap lock quickly + * and do the actual write at the end outside the lock. + */ +public class OMDBArchiver { + + private Path tmpDir; + private Map<String, File> filesToWriteIntoTarball; + private Map<String, String> hardLinkFileMap; + private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class); + private boolean completed; + + public OMDBArchiver() { + this.tmpDir = null; + this.filesToWriteIntoTarball = new HashMap<>(); + this.hardLinkFileMap = null; + this.completed = false; + } + + public void setTmpDir(Path tmpDir) { + this.tmpDir = tmpDir; + } + + public Path getTmpDir() { + return tmpDir; + } + + public Map<String, String> getHardLinkFileMap() { + return hardLinkFileMap; + } + + public Map<String, File> getFilesToWriteIntoTarball() { + return filesToWriteIntoTarball; + } + + public void setHardLinkFileMap(Map<String, String> hardLinkFileMap) { + this.hardLinkFileMap = hardLinkFileMap; + } + + public boolean isCompleted() { + return completed; + } + + public void setCompleted(boolean completed) { + this.completed = completed; + } + + /** + * @param file the file to create a hardlink and record into the map + * @param entryName name of the entry corresponding to file + * @return the file size + * @throws IOException in case of hardlink failure + * + * Records the given file entry into the map after taking a hardlink. + */ + public long recordFileEntry(File file, String entryName) throws IOException { + File link = tmpDir.resolve(entryName).toFile(); + long bytes = 0; + try { + Files.createLink(link.toPath(), file.toPath()); + filesToWriteIntoTarball.put(entryName, link); + bytes = file.length(); + } catch (IOException ioe) { + LOG.error("Couldn't create hardlink for file {} while including it in tarball.", + file.getAbsolutePath(), ioe); + throw ioe; + } + return bytes; + } + + /** + * @param conf the configuration object to obtain metadata paths + * @param outputStream the tarball archive output stream + * @throws IOException in case of write failure to the archive + * + * Writes all the files captured by the map into the archive and + * also includes the hardlinkFile and the completion marker file. + */ + public void writeToArchive(OzoneConfiguration conf, OutputStream outputStream) + throws IOException { + long bytesWritten = 0; + long lastLoggedTime = Time.now(); + long filesWritten = 0; + try (ArchiveOutputStream<TarArchiveEntry> archiveOutput = tar(outputStream)) { + for (Map.Entry<String, File> kv : filesToWriteIntoTarball.entrySet()) { + String entryName = kv.getKey(); + File link = kv.getValue(); + try { + bytesWritten += includeFile(link, entryName, archiveOutput); + if (Time.monotonicNow() - lastLoggedTime >= 30000) { + LOG.info("Transferred {} KB, #files {} to checkpoint tarball stream...", + bytesWritten / (1024), filesWritten); + lastLoggedTime = Time.monotonicNow(); + } + } catch (IOException ioe) { + LOG.error("Couldn't create hardlink for file {} while including it in tarball.", Review Comment: The error message on line 137 is misleading. It states "Couldn't create hardlink for file" but this error occurs during the write phase when including the file in the tarball, not when creating the hardlink. The hardlink was already created earlier in the recordFileEntry method. The error message should accurately describe that the issue is with writing the file to the archive. ```suggestion LOG.error("Failed to write file {} to checkpoint tarball archive.", ``` ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBArchiver.java: ########## @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.hdds.utils.Archiver.includeFile; +import static org.apache.hadoop.hdds.utils.Archiver.tar; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag; +import static org.apache.hadoop.ozone.om.OMDBCheckpointServletInodeBasedXfer.writeHardlinkFile; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.compress.archivers.ArchiveOutputStream; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.util.Time; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class for handling operations relevant to archiving the OM DB tarball. + * Mainly maintains a map for recording the files collected from reading + * the checkpoint and snapshot DB's. It temporarily creates hardlinks and stores + * the link data in the map to release the bootstrap lock quickly + * and do the actual write at the end outside the lock. + */ +public class OMDBArchiver { + + private Path tmpDir; + private Map<String, File> filesToWriteIntoTarball; + private Map<String, String> hardLinkFileMap; + private static final Logger LOG = LoggerFactory.getLogger(OMDBArchiver.class); + private boolean completed; + + public OMDBArchiver() { + this.tmpDir = null; + this.filesToWriteIntoTarball = new HashMap<>(); + this.hardLinkFileMap = null; + this.completed = false; + } + + public void setTmpDir(Path tmpDir) { + this.tmpDir = tmpDir; + } + + public Path getTmpDir() { + return tmpDir; + } + + public Map<String, String> getHardLinkFileMap() { Review Comment: getHardLinkFileMap exposes the internal representation stored in field hardLinkFileMap. The value may be modified [after this call to getHardLinkFileMap](1). getHardLinkFileMap exposes the internal representation stored in field hardLinkFileMap. The value may be modified [after this call to getHardLinkFileMap](2). ########## hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java: ########## @@ -151,27 +147,39 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST); Set<String> receivedSstFiles = extractFilesToExclude(sstParam); Path tmpdir = null; + OMDBArchiver omdbArchiver = new OMDBArchiver(); try (UncheckedAutoCloseable lock = getBootstrapStateLock().acquireWriteLock()) { tmpdir = Files.createTempDirectory(getBootstrapTempData().toPath(), "bootstrap-data-"); if (tmpdir == null) { throw new IOException("tmp dir is null"); } + omdbArchiver.setTmpDir(tmpdir); String tarName = "om.data-" + System.currentTimeMillis() + ".tar"; response.setContentType("application/x-tar"); response.setHeader("Content-Disposition", "attachment; filename=\"" + tarName + "\""); Instant start = Instant.now(); - writeDbDataToStream(request, response.getOutputStream(), receivedSstFiles, tmpdir); + collectDbDataToTransfer(request, receivedSstFiles, omdbArchiver); Instant end = Instant.now(); long duration = Duration.between(start, end).toMillis(); - LOG.info("Time taken to write the checkpoint to response output " + - "stream: {} milliseconds", duration); + LOG.info("Time taken to collect the DB data : {} milliseconds", duration); logSstFileList(receivedSstFiles, "Excluded {} SST files from the latest checkpoint{}: {}", 5); } catch (Exception e) { LOG.error( "Unable to process metadata snapshot request. ", e); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } + try { + Instant start = Instant.now(); + OutputStream outputStream = response.getOutputStream(); + omdbArchiver.writeToArchive(getConf(), outputStream); + Instant end = Instant.now(); + long duration = Duration.between(start, end).toMillis(); + LOG.info("Time taken to write the checkpoint to response output " + + "stream: {} milliseconds", duration); + } catch (IOException e) { + LOG.error("unable to write to archive stream", e); Review Comment: Error handling may cause incomplete response: If an error occurs during the data collection phase (lines 151-172) and sets the HTTP status to SC_INTERNAL_SERVER_ERROR, the code still proceeds to execute the second try block (lines 173-182) which attempts to write to the response output stream. This could result in writing data after an error status has been set, potentially causing confusion or additional errors. Consider using a flag to skip the write phase if collection failed, or restructure the error handling to prevent writing after an error. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
