[ https://issues.apache.org/jira/browse/HDFS-17573?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17869157#comment-17869157 ]
ASF GitHub Bot commented on HDFS-17573: --------------------------------------- Last-remote11 commented on code in PR #6929: URL: https://github.com/apache/hadoop/pull/6929#discussion_r1694183111 ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshotParallelAndCompress.java: ########## @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import org.slf4j.event.Level; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; +import org.apache.hadoop.test.GenericTestUtils; + +/** + * This test extends TestFSImageWithSnapshot to test + * enable both fsimage load parallel and fsimage compress. + */ +public class TestFSImageWithSnapshotParallelAndCompress extends TestFSImageWithSnapshot { + { + SnapshotTestHelper.disableLogs(); + GenericTestUtils.setLogLevel(INode.LOG, Level.TRACE); + } + + @Override + public void createCluster() throws IOException { + + // turn on both parallelization and compression + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true); + conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY, + "org.apache.hadoop.io.compress.GzipCodec"); Review Comment: https://github.com/apache/hadoop/pull/6929/commits/aee9b9cd08bbc6b283f68990807f055f4d4d5c39 ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshotParallelAndCompress.java: ########## @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import org.slf4j.event.Level; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; +import org.apache.hadoop.test.GenericTestUtils; + +/** + * This test extends TestFSImageWithSnapshot to test + * enable both fsimage load parallel and fsimage compress. + */ +public class TestFSImageWithSnapshotParallelAndCompress extends TestFSImageWithSnapshot { + { + SnapshotTestHelper.disableLogs(); + GenericTestUtils.setLogLevel(INode.LOG, Level.TRACE); + } + + @Override + public void createCluster() throws IOException { + + // turn on both parallelization and compression + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true); + conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY, + "org.apache.hadoop.io.compress.GzipCodec"); + conf.set(DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_KEY, "true"); + conf.set(DFSConfigKeys.DFS_IMAGE_PARALLEL_INODE_THRESHOLD_KEY, "2"); + conf.set(DFSConfigKeys.DFS_IMAGE_PARALLEL_TARGET_SECTIONS_KEY, "2"); + conf.set(DFSConfigKeys.DFS_IMAGE_PARALLEL_THREADS_KEY, "2"); Review Comment: https://github.com/apache/hadoop/pull/6929/commits/aee9b9cd08bbc6b283f68990807f055f4d4d5c39 ########## hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshotParallelAndCompress.java: ########## @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import org.slf4j.event.Level; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; +import org.apache.hadoop.test.GenericTestUtils; + +/** + * This test extends TestFSImageWithSnapshot to test + * enable both fsimage load parallel and fsimage compress. + */ +public class TestFSImageWithSnapshotParallelAndCompress extends TestFSImageWithSnapshot { + { + SnapshotTestHelper.disableLogs(); + GenericTestUtils.setLogLevel(INode.LOG, Level.TRACE); + } + + @Override + public void createCluster() throws IOException { + + // turn on both parallelization and compression + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true); + conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY, + "org.apache.hadoop.io.compress.GzipCodec"); + conf.set(DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_KEY, "true"); Review Comment: https://github.com/apache/hadoop/pull/6929/commits/aee9b9cd08bbc6b283f68990807f055f4d4d5c39 > Allow turn on both FSImage parallelization and compression > ---------------------------------------------------------- > > Key: HDFS-17573 > URL: https://issues.apache.org/jira/browse/HDFS-17573 > Project: Hadoop HDFS > Issue Type: Improvement > Components: hdfs, namenode > Affects Versions: 3.4.1 > Reporter: Sungdong Kim > Priority: Minor > Labels: pull-request-available > Fix For: 3.4.1, 3.5.0 > > Attachments: compressed-image-load-serial.png, > compressed-subsection-image-load-parallel.png, > compressed-subsection-image-load-serial.png > > > The feature added HDFS-14617(in Improve FSImage load time by writing > sub-sections to the FSImage index. by [Stephen > O'Donnell|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=sodonnell]) > makes loading FSImage very faster. > > But this option cannot be activated when turn on dfs.image.compress=true. > In my opinion, larger clusters require both settings at the same time. > For Example, the cluster I'm using has approximately 6 million file system > objects and FSImage is approximately 11GB with dfs.image.compress=true > setting. > If turn off the dfs.image.compress option, it is expected to exceed 30GB, in > which case it will take a long time to move FSImage from standby to active > namenode using high network resource. > > It was proved in this jira(HDFS-16147 by > [kinit|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=mofei]) > that loading FSImage parallel and FSImage compression can be turned on at the > same time. (And worked well on my environment also.) > I created this new Jira and PR because the discussion in HDFS-16147 ended in > 2021, and I want it to be officially added in the next release, instead of > patch available. > The actual code of the patch was written by > [kinit|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=mofei] and > I resolved empty sub-section problem(see below comment of HDFS-16147) and > added test code. > If this is not a proper method, please let me know another way to contribute. > Thanks. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org