HDFS-8791. block ID-based DN storage layout can be very slow for datanode on ext4. Contributed by Chris Trezzo.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2c8496eb Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2c8496eb Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2c8496eb Branch: refs/heads/HDFS-1312 Commit: 2c8496ebf3b7b31c2e18fdf8d4cb2a0115f43112 Parents: 2137e8f Author: Kihwal Lee <[email protected]> Authored: Tue Mar 1 14:50:02 2016 -0600 Committer: Kihwal Lee <[email protected]> Committed: Tue Mar 1 15:04:59 2016 -0600 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../server/datanode/DataNodeLayoutVersion.java | 5 +++- .../hdfs/server/datanode/DataStorage.java | 11 ++++--- .../hdfs/server/datanode/DatanodeUtil.java | 4 +-- .../hadoop/hdfs/TestDatanodeLayoutUpgrade.java | 29 +++++++++++++++++-- .../resources/hadoop-56-layout-datanode-dir.tgz | Bin 0 -> 198996 bytes .../resources/hadoop-to-57-dn-layout-dir.txt | 24 +++++++++++++++ 7 files changed, 67 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c8496eb/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 8e3eef7..39650e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -2896,6 +2896,9 @@ Release 2.7.3 - UNRELEASED HDFS-9395. Make HDFS audit logging consistant (Kuhu Shukla via kihwal) + HDFS-8791. block ID-based DN storage layout can be very slow for datanode + on ext4 (Chris Trezzo via kihwal) + OPTIMIZATIONS BUG FIXES http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c8496eb/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeLayoutVersion.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeLayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeLayoutVersion.java index 23e7cfe..609a740 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeLayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeLayoutVersion.java @@ -65,7 +65,10 @@ public class DataNodeLayoutVersion { FIRST_LAYOUT(-55, -53, "First datanode layout", false), BLOCKID_BASED_LAYOUT(-56, "The block ID of a finalized block uniquely determines its position " + - "in the directory structure"); + "in the directory structure"), + BLOCKID_BASED_LAYOUT_32_by_32(-57, + "Identical to the block id based layout (-56) except it uses a smaller" + + " directory structure (32x32)"); private final FeatureInfo info; http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c8496eb/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java index 6697054..3844c8e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java @@ -1102,10 +1102,13 @@ public class DataStorage extends Storage { LOG.info("Start linking block files from " + from + " to " + to); boolean upgradeToIdBasedLayout = false; // If we are upgrading from a version older than the one where we introduced - // block ID-based layout AND we're working with the finalized directory, - // we'll need to upgrade from the old flat layout to the block ID-based one - if (oldLV > DataNodeLayoutVersion.Feature.BLOCKID_BASED_LAYOUT.getInfo(). - getLayoutVersion() && to.getName().equals(STORAGE_DIR_FINALIZED)) { + // block ID-based layout (32x32) AND we're working with the finalized + // directory, we'll need to upgrade from the old layout to the new one. The + // upgrade path from pre-blockid based layouts (>-56) and blockid based + // 256x256 layouts (-56) is fortunately the same. + if (oldLV > DataNodeLayoutVersion.Feature.BLOCKID_BASED_LAYOUT_32_by_32 + .getInfo().getLayoutVersion() + && to.getName().equals(STORAGE_DIR_FINALIZED)) { upgradeToIdBasedLayout = true; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c8496eb/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeUtil.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeUtil.java index 746c3f6..ad054a8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeUtil.java @@ -113,8 +113,8 @@ public class DatanodeUtil { * @return */ public static File idToBlockDir(File root, long blockId) { - int d1 = (int)((blockId >> 16) & 0xff); - int d2 = (int)((blockId >> 8) & 0xff); + int d1 = (int) ((blockId >> 16) & 0x1F); + int d2 = (int) ((blockId >> 8) & 0x1F); String path = DataStorage.BLOCK_SUBDIR_PREFIX + d1 + SEP + DataStorage.BLOCK_SUBDIR_PREFIX + d2; return new File(root, path); http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c8496eb/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeLayoutUpgrade.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeLayoutUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeLayoutUpgrade.java index 224abea..6b60b2b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeLayoutUpgrade.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeLayoutUpgrade.java @@ -28,10 +28,16 @@ public class TestDatanodeLayoutUpgrade { private static final String HADOOP_DATANODE_DIR_TXT = "hadoop-datanode-dir.txt"; private static final String HADOOP24_DATANODE = "hadoop-24-datanode-dir.tgz"; + private static final String HADOOP_56_DN_LAYOUT_TXT = + "hadoop-to-57-dn-layout-dir.txt"; + private static final String HADOOP_56_DN_LAYOUT = + "hadoop-56-layout-datanode-dir.tgz"; + /** + * Upgrade from LDir-based layout to 32x32 block ID-based layout (-57) -- + * change described in HDFS-6482 and HDFS-8791 + */ @Test - // Upgrade from LDir-based layout to block ID-based layout -- change described - // in HDFS-6482 public void testUpgradeToIdBasedLayout() throws IOException { TestDFSUpgradeFromImage upgrade = new TestDFSUpgradeFromImage(); upgrade.unpackStorage(HADOOP24_DATANODE, HADOOP_DATANODE_DIR_TXT); @@ -45,4 +51,23 @@ public class TestDatanodeLayoutUpgrade { upgrade.upgradeAndVerify(new MiniDFSCluster.Builder(conf).numDataNodes(1) .manageDataDfsDirs(false).manageNameDfsDirs(false), null); } + + /** + * Test upgrade from block ID-based layout 256x256 (-56) to block ID-based + * layout 32x32 (-57) + */ + @Test + public void testUpgradeFrom256To32Layout() throws IOException { + TestDFSUpgradeFromImage upgrade = new TestDFSUpgradeFromImage(); + upgrade.unpackStorage(HADOOP_56_DN_LAYOUT, HADOOP_56_DN_LAYOUT_TXT); + Configuration conf = new Configuration(TestDFSUpgradeFromImage.upgradeConf); + conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, + new File(System.getProperty("test.build.data"), "dfs" + File.separator + + "data").toURI().toString()); + conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, + new File(System.getProperty("test.build.data"), "dfs" + File.separator + + "name").toURI().toString()); + upgrade.upgradeAndVerify(new MiniDFSCluster.Builder(conf).numDataNodes(1) + .manageDataDfsDirs(false).manageNameDfsDirs(false), null); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c8496eb/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-56-layout-datanode-dir.tgz ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-56-layout-datanode-dir.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-56-layout-datanode-dir.tgz new file mode 100644 index 0000000..b43ac17 Binary files /dev/null and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-56-layout-datanode-dir.tgz differ http://git-wip-us.apache.org/repos/asf/hadoop/blob/2c8496eb/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-to-57-dn-layout-dir.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-to-57-dn-layout-dir.txt b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-to-57-dn-layout-dir.txt new file mode 100644 index 0000000..2dd1766 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-to-57-dn-layout-dir.txt @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Similar to hadoop-dfs-dir.txt, except this is used for a datanode layout +# upgrade test to 32x32 (layout -57) +# Uncomment the following line to produce checksum info for a new DFS image. +#printChecksums +/blocks/part1 286881285 +/blocks/part12922 1068680946 +/blocks/part972 2479788008 +/blocks/part973 1221039573 +overallCRC 1902127725
