Author: jing9 Date: Sat Jan 18 01:23:00 2014 New Revision: 1559302 URL: http://svn.apache.org/r1559302 Log: HDFS-5793. Optimize the serialization of PermissionStatus. Contributed by Haohui Mai.
Modified: hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5698.txt hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto Modified: hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5698.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5698.txt?rev=1559302&r1=1559301&r2=1559302&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5698.txt (original) +++ hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/CHANGES_HDFS-5698.txt Sat Jan 18 01:23:00 2014 @@ -9,3 +9,6 @@ HDFS-5698 subtasks HDFS-5783. Compute the digest before loading FSImage. (Haohui Mai via jing9) HDFS-5785. Serialize symlink in protobuf. (Haohui Mai via jing9) + + HDFS-5793. Optimize the serialization of PermissionStatus. (Haohui Mai via + jing9) Modified: hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java?rev=1559302&r1=1559301&r2=1559302&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java (original) +++ hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java Sat Jan 18 01:23:00 2014 @@ -37,7 +37,6 @@ import org.apache.hadoop.hdfs.server.nam import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; -import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.Permission; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.hdfs.util.ReadOnlyList; @@ -45,18 +44,27 @@ import com.google.common.base.Preconditi import com.google.protobuf.ByteString; final class FSImageFormatPBINode { + private final static int USER_GROUP_STRID_MASK = (1 << 24) - 1; + private final static int USER_STRID_OFFSET = 40; + private final static int GROUP_STRID_OFFSET = 16; + final static class Loader { - private static PermissionStatus loadPermission(Permission p) { - return new PermissionStatus(p.getUser(), p.getGroup(), new FsPermission( - (short) p.getPermission())); + private PermissionStatus loadPermission(long id) { + short perm = (short) (id & ((1 << GROUP_STRID_OFFSET) - 1)); + int gsid = (int) ((id >> GROUP_STRID_OFFSET) & USER_GROUP_STRID_MASK); + int usid = (int) ((id >> USER_STRID_OFFSET) & USER_GROUP_STRID_MASK); + return new PermissionStatus(parent.stringTable[usid], + parent.stringTable[gsid], new FsPermission(perm)); } private final FSDirectory dir; private final FSNamesystem fsn; + private final FSImageFormatProtobuf.Loader parent; - Loader(FSNamesystem fsn) { + Loader(FSNamesystem fsn, final FSImageFormatProtobuf.Loader parent) { this.fsn = fsn; this.dir = fsn.dir; + this.parent = parent; } void loadINodeDirectorySection(InputStream in) throws IOException { @@ -278,9 +286,12 @@ final class FSImageFormatPBINode { FSImageFormatProtobuf.SectionName.FILES_UNDERCONSTRUCTION); } - private INodeSection.Permission.Builder buildPermissionStatus(INode n) { - return INodeSection.Permission.newBuilder().setUser(n.getUserName()) - .setGroup(n.getGroupName()).setPermission(n.getFsPermissionShort()); + private long buildPermissionStatus(INode n) { + int userId = parent.getStringId(n.getUserName()); + int groupId = parent.getStringId(n.getGroupName()); + return ((userId & USER_GROUP_STRID_MASK) << USER_STRID_OFFSET) + | ((groupId & USER_GROUP_STRID_MASK) << GROUP_STRID_OFFSET) + | n.getFsPermissionShort(); } private void save(OutputStream out, INode n) throws IOException { Modified: hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java?rev=1559302&r1=1559301&r2=1559302&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java (original) +++ hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java Sat Jan 18 01:23:00 2014 @@ -33,6 +33,10 @@ import java.nio.channels.FileChannel; import java.security.DigestOutputStream; import java.security.MessageDigest; import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -41,11 +45,14 @@ import org.apache.hadoop.hdfs.protocol.L import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection; import org.apache.hadoop.hdfs.util.MD5FileUtils; import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressorStream; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.io.LimitInputStream; import com.google.protobuf.CodedOutputStream; @@ -63,6 +70,8 @@ final class FSImageFormatProtobuf { private final Configuration conf; private final FSNamesystem fsn; + String[] stringTable; + /** The MD5 sum of the loaded file */ private MD5Hash imgDigest; /** The transaction ID of the last edit represented by the loaded file */ @@ -143,9 +152,26 @@ final class FSImageFormatProtobuf { FileChannel channel = fin.getChannel(); FSImageFormatPBINode.Loader inodeLoader = new FSImageFormatPBINode.Loader( - fsn); + fsn, this); - for (FileSummary.Section s : summary.getSectionsList()) { + ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary + .getSectionsList()); + Collections.sort(sections, new Comparator<FileSummary.Section>() { + @Override + public int compare(FileSummary.Section s1, FileSummary.Section s2) { + SectionName n1 = SectionName.fromString(s1.getName()); + SectionName n2 = SectionName.fromString(s2.getName()); + if (n1 == null) { + return n2 == null ? 0 : -1; + } else if (n2 == null) { + return -1; + } else { + return n1.ordinal() - n2.ordinal(); + } + } + }); + + for (FileSummary.Section s : sections) { channel.position(s.getOffset()); InputStream in = new BufferedInputStream(new LimitInputStream(fin, s.getLength())); @@ -165,6 +191,9 @@ final class FSImageFormatProtobuf { case NS_INFO: loadNameSystemSection(in, s); break; + case STRING_TABLE: + loadStringTableSection(in); + break; case INODE: inodeLoader.loadINodeSection(in); break; @@ -190,12 +219,23 @@ final class FSImageFormatProtobuf { fsn.setLastAllocatedBlockId(s.getLastAllocatedBlockId()); imgTxId = s.getTransactionId(); } + + private void loadStringTableSection(InputStream in) throws IOException { + StringTableSection s = StringTableSection.parseDelimitedFrom(in); + stringTable = new String[s.getNumEntry() + 1]; + for (int i = 0; i < s.getNumEntry(); ++i) { + StringTableSection.Entry e = StringTableSection.Entry + .parseDelimitedFrom(in); + stringTable[e.getId()] = e.getStr(); + } + } } static final class Saver { final SaveNamespaceContext context; private long currentOffset = MAGIC_HEADER.length; private MD5Hash savedDigest; + private HashMap<String, Integer> stringMap = Maps.newHashMap(); private FileChannel fileChannel; // OutputStream for the section data @@ -284,6 +324,7 @@ final class FSImageFormatProtobuf { saveNameSystemSection(b); saveInodes(b); + saveStringTableSection(b); // Flush the buffered data into the file before appending the header flushSectionOutputStream(); @@ -316,14 +357,46 @@ final class FSImageFormatProtobuf { commitSection(summary, SectionName.NS_INFO); } + + private void saveStringTableSection(FileSummary.Builder summary) throws IOException { + OutputStream out = sectionOutputStream; + StringTableSection.Builder b = StringTableSection.newBuilder() + .setNumEntry(stringMap.size()); + b.build().writeDelimitedTo(out); + for (Entry<String, Integer> e : stringMap.entrySet()) { + StringTableSection.Entry.Builder eb = StringTableSection.Entry + .newBuilder().setId(e.getValue()).setStr(e.getKey()); + eb.build().writeDelimitedTo(out); + } + commitSection(summary, SectionName.STRING_TABLE); + } + + int getStringId(String str) { + if (str == null) { + return 0; + } + + Integer v = stringMap.get(str); + if (v == null) { + int nv = stringMap.size() + 1; + stringMap.put(str, nv); + return nv; + } + return v; + } } /** - * Supported section name + * Supported section name. The order of the enum determines the order of + * loading. */ enum SectionName { - INODE("INODE"), INODE_DIR("INODE_DIR"), NS_INFO("NS_INFO"), + NS_INFO("NS_INFO"), + STRING_TABLE("STRING_TABLE"), + INODE("INODE"), INODE_DIR("INODE_DIR"), FILES_UNDERCONSTRUCTION("FILES_UNDERCONSTRUCTION"); + SECRET_MANAGER("SECRET_MANAGER"), + CACHE_MANAGER("CACHE_MANAGER"); private static final SectionName[] values = SectionName.values(); Modified: hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto?rev=1559302&r1=1559301&r2=1559302&view=diff ============================================================================== --- hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto (original) +++ hadoop/common/branches/HDFS-5698/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto Sat Jan 18 01:23:00 2014 @@ -73,15 +73,13 @@ message NameSystemSection { } /** + * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) (in Big Endian). + * The first and the second parts are the string ids of the user and + * group name, and the last 16 bits are the permission bits. + * * Name: INODE */ message INodeSection { - message Permission { - optional string user = 1; - optional string group = 2; - optional uint32 permission = 3; - } - /** * under-construction feature for INodeFile */ @@ -95,7 +93,7 @@ message INodeSection { optional uint64 modificationTime = 2; optional uint64 accessTime = 3; optional uint64 preferredBlockSize = 4; - optional Permission permission = 5; + optional fixed64 permission = 5; repeated BlockProto blocks = 6; optional FileUnderConstructionFeature fileUC = 7; } @@ -106,11 +104,11 @@ message INodeSection { optional uint64 nsQuota = 2; // diskspace quota optional uint64 dsQuota = 3; - optional Permission permission = 4; + optional fixed64 permission = 4; } message INodeSymlink { - optional Permission permission = 1; + optional fixed64 permission = 1; optional bytes target = 2; } @@ -157,4 +155,17 @@ message INodeDirectorySection { repeated uint64 children = 2; } // repeated DirEntry, ended at the boundary of the section. -} \ No newline at end of file +} + +/** + * This section maps string to id + * NAME: STRING_TABLE + **/ +message StringTableSection { + message Entry { + optional uint32 id = 1; + optional string str = 2; + } + optional uint32 numEntry = 1; + // repeated Entry +}