HDFS-7285. Erasure Coding Support inside HDFS.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/bc2564af Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/bc2564af Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/bc2564af Branch: refs/heads/HDFS-7285-merge Commit: bc2564afe511eab0a5cfc3cc3d9b68fe5caa7c77 Parents: 1c12adb Author: Zhe Zhang <[email protected]> Authored: Thu Aug 6 23:24:03 2015 -0700 Committer: Zhe Zhang <[email protected]> Committed: Wed Aug 12 11:17:53 2015 -0700 ---------------------------------------------------------------------- .../hadoop-common/CHANGES-HDFS-EC-7285.txt | 74 + .../hadoop/fs/CommonConfigurationKeys.java | 15 + .../org/apache/hadoop/fs/FSOutputSummer.java | 4 + .../main/java/org/apache/hadoop/fs/FsShell.java | 8 +- .../apache/hadoop/io/erasurecode/CodecUtil.java | 144 ++ .../apache/hadoop/io/erasurecode/ECBlock.java | 80 ++ .../hadoop/io/erasurecode/ECBlockGroup.java | 100 ++ .../apache/hadoop/io/erasurecode/ECChunk.java | 87 ++ .../apache/hadoop/io/erasurecode/ECSchema.java | 257 ++++ .../hadoop/io/erasurecode/SchemaLoader.java | 152 +++ .../erasurecode/codec/AbstractErasureCodec.java | 51 + .../io/erasurecode/codec/ErasureCodec.java | 49 + .../io/erasurecode/codec/RSErasureCodec.java | 43 + .../io/erasurecode/codec/XORErasureCodec.java | 44 + .../erasurecode/coder/AbstractErasureCoder.java | 62 + .../coder/AbstractErasureCodingStep.java | 59 + .../coder/AbstractErasureDecoder.java | 167 +++ .../coder/AbstractErasureEncoder.java | 60 + .../io/erasurecode/coder/ErasureCoder.java | 77 ++ .../io/erasurecode/coder/ErasureCodingStep.java | 55 + .../erasurecode/coder/ErasureDecodingStep.java | 52 + .../erasurecode/coder/ErasureEncodingStep.java | 49 + .../io/erasurecode/coder/RSErasureDecoder.java | 67 + .../io/erasurecode/coder/RSErasureEncoder.java | 67 + .../io/erasurecode/coder/XORErasureDecoder.java | 86 ++ .../io/erasurecode/coder/XORErasureEncoder.java | 53 + .../io/erasurecode/grouper/BlockGrouper.java | 90 ++ .../rawcoder/AbstractRawErasureCoder.java | 138 ++ .../rawcoder/AbstractRawErasureDecoder.java | 207 +++ .../rawcoder/AbstractRawErasureEncoder.java | 136 ++ .../io/erasurecode/rawcoder/RSRawDecoder.java | 216 +++ .../io/erasurecode/rawcoder/RSRawEncoder.java | 79 ++ .../rawcoder/RSRawErasureCoderFactory.java | 34 + .../erasurecode/rawcoder/RawErasureCoder.java | 66 + .../rawcoder/RawErasureCoderFactory.java | 42 + .../erasurecode/rawcoder/RawErasureDecoder.java | 88 ++ .../erasurecode/rawcoder/RawErasureEncoder.java | 64 + .../io/erasurecode/rawcoder/XORRawDecoder.java | 83 ++ .../io/erasurecode/rawcoder/XORRawEncoder.java | 77 ++ .../rawcoder/XORRawErasureCoderFactory.java | 34 + .../io/erasurecode/rawcoder/util/DumpUtil.java | 85 ++ .../erasurecode/rawcoder/util/GaloisField.java | 561 ++++++++ .../io/erasurecode/rawcoder/util/RSUtil.java | 39 + .../hadoop/io/erasurecode/BufferAllocator.java | 91 ++ .../hadoop/io/erasurecode/TestCoderBase.java | 500 +++++++ .../hadoop/io/erasurecode/TestECSchema.java | 51 + .../hadoop/io/erasurecode/TestSchemaLoader.java | 74 + .../erasurecode/coder/TestErasureCoderBase.java | 297 ++++ .../erasurecode/coder/TestRSErasureCoder.java | 126 ++ .../io/erasurecode/coder/TestXORCoder.java | 64 + .../io/erasurecode/rawcoder/TestRSRawCoder.java | 118 ++ .../rawcoder/TestRSRawCoderBase.java | 58 + .../erasurecode/rawcoder/TestRawCoderBase.java | 232 ++++ .../erasurecode/rawcoder/TestXORRawCoder.java | 66 + hadoop-hdfs-project/hadoop-hdfs-client/pom.xml | 1 + .../hdfs/client/HdfsClientConfigKeys.java | 12 + .../hadoop/hdfs/protocol/ClientProtocol.java | 27 + .../hadoop/hdfs/protocol/ErasureCodingZone.java | 66 + .../hadoop/hdfs/protocol/HdfsConstants.java | 11 + .../hadoop/hdfs/protocol/HdfsFileStatus.java | 16 +- .../hadoop/hdfs/protocol/LocatedBlock.java | 8 +- .../hadoop/hdfs/protocol/LocatedBlocks.java | 26 +- .../hdfs/protocol/LocatedStripedBlock.java | 86 ++ .../protocol/SnapshottableDirectoryStatus.java | 2 +- .../apache/hadoop/hdfs/web/JsonUtilClient.java | 4 +- .../src/main/proto/ClientNamenodeProtocol.proto | 7 + .../src/main/proto/erasurecoding.proto | 71 + .../src/main/proto/hdfs.proto | 36 +- .../hadoop-hdfs/CHANGES-HDFS-EC-7285.txt | 396 ++++++ .../hadoop-hdfs/src/main/bin/hdfs | 6 + .../org/apache/hadoop/hdfs/BlockReader.java | 10 +- .../apache/hadoop/hdfs/BlockReaderLocal.java | 5 + .../hadoop/hdfs/BlockReaderLocalLegacy.java | 5 + .../java/org/apache/hadoop/hdfs/DFSClient.java | 109 +- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 19 +- .../org/apache/hadoop/hdfs/DFSInputStream.java | 66 +- .../org/apache/hadoop/hdfs/DFSOutputStream.java | 23 +- .../java/org/apache/hadoop/hdfs/DFSPacket.java | 34 +- .../hadoop/hdfs/DFSStripedInputStream.java | 939 +++++++++++++ .../hadoop/hdfs/DFSStripedOutputStream.java | 653 +++++++++ .../java/org/apache/hadoop/hdfs/DFSUtil.java | 9 +- .../org/apache/hadoop/hdfs/DataStreamer.java | 72 +- .../hadoop/hdfs/DistributedFileSystem.java | 66 + .../apache/hadoop/hdfs/RemoteBlockReader.java | 5 + .../apache/hadoop/hdfs/RemoteBlockReader2.java | 5 + .../apache/hadoop/hdfs/StripedDataStreamer.java | 240 ++++ .../apache/hadoop/hdfs/client/HdfsAdmin.java | 40 + .../hadoop/hdfs/client/impl/DfsClientConf.java | 22 +- .../hdfs/protocol/HdfsLocatedFileStatus.java | 6 +- ...tNamenodeProtocolServerSideTranslatorPB.java | 68 +- .../ClientNamenodeProtocolTranslatorPB.java | 79 +- .../DatanodeProtocolClientSideTranslatorPB.java | 2 +- .../DatanodeProtocolServerSideTranslatorPB.java | 2 +- .../apache/hadoop/hdfs/protocolPB/PBHelper.java | 326 ++++- .../hadoop/hdfs/server/balancer/Balancer.java | 6 +- .../hadoop/hdfs/server/balancer/Dispatcher.java | 150 +- .../server/blockmanagement/BlockCollection.java | 17 +- .../server/blockmanagement/BlockIdManager.java | 58 +- .../hdfs/server/blockmanagement/BlockInfo.java | 73 +- .../blockmanagement/BlockInfoContiguous.java | 45 +- .../BlockInfoContiguousUnderConstruction.java | 189 +-- .../blockmanagement/BlockInfoStriped.java | 286 ++++ .../BlockInfoStripedUnderConstruction.java | 298 ++++ .../BlockInfoUnderConstruction.java | 84 ++ .../server/blockmanagement/BlockManager.java | 1281 ++++++++++++------ .../blockmanagement/BlockPlacementPolicies.java | 54 + .../blockmanagement/BlockPlacementPolicy.java | 26 +- .../BlockPlacementPolicyRackFaultTolarent.java | 154 +++ .../hdfs/server/blockmanagement/BlocksMap.java | 25 +- .../blockmanagement/DatanodeDescriptor.java | 62 +- .../server/blockmanagement/DatanodeManager.java | 24 +- .../blockmanagement/DatanodeStorageInfo.java | 17 +- .../blockmanagement/DecommissionManager.java | 83 +- .../ReplicaUnderConstruction.java | 119 ++ .../SequentialBlockGroupIdGenerator.java | 85 ++ .../SequentialBlockIdGenerator.java | 6 +- .../blockmanagement/UnderReplicatedBlocks.java | 51 +- .../hdfs/server/common/HdfsServerConstants.java | 5 + .../hdfs/server/datanode/BPOfferService.java | 8 + .../hadoop/hdfs/server/datanode/DNConf.java | 27 + .../hadoop/hdfs/server/datanode/DataNode.java | 58 +- .../erasurecode/ErasureCodingWorker.java | 988 ++++++++++++++ .../apache/hadoop/hdfs/server/mover/Mover.java | 36 +- .../namenode/ErasureCodingSchemaManager.java | 127 ++ .../namenode/ErasureCodingZoneManager.java | 170 +++ .../hdfs/server/namenode/FSDirAppendOp.java | 5 + .../hdfs/server/namenode/FSDirAttrOp.java | 7 +- .../hdfs/server/namenode/FSDirConcatOp.java | 6 + .../server/namenode/FSDirErasureCodingOp.java | 217 +++ .../hdfs/server/namenode/FSDirRenameOp.java | 2 + .../server/namenode/FSDirStatAndListingOp.java | 27 +- .../hdfs/server/namenode/FSDirTruncateOp.java | 16 +- .../hdfs/server/namenode/FSDirWriteFileOp.java | 170 ++- .../hdfs/server/namenode/FSDirectory.java | 6 +- .../hdfs/server/namenode/FSEditLogLoader.java | 101 +- .../hdfs/server/namenode/FSImageFormat.java | 22 +- .../server/namenode/FSImageFormatPBINode.java | 57 +- .../server/namenode/FSImageFormatProtobuf.java | 9 +- .../server/namenode/FSImageSerialization.java | 13 +- .../hdfs/server/namenode/FSNamesystem.java | 304 ++++- .../namenode/FileUnderConstructionFeature.java | 10 +- .../hadoop/hdfs/server/namenode/INodeFile.java | 152 ++- .../server/namenode/INodeFileAttributes.java | 12 +- .../hdfs/server/namenode/LeaseManager.java | 2 +- .../server/namenode/NameNodeLayoutVersion.java | 3 +- .../hdfs/server/namenode/NameNodeRpcServer.java | 34 +- .../hdfs/server/namenode/NamenodeFsck.java | 227 +++- .../hadoop/hdfs/server/namenode/Namesystem.java | 15 +- .../hadoop/hdfs/server/namenode/SafeMode.java | 5 +- .../snapshot/FSImageFormatPBSnapshot.java | 14 +- .../server/namenode/snapshot/FileDiffList.java | 5 +- .../server/protocol/BlockECRecoveryCommand.java | 153 +++ .../server/protocol/BlocksWithLocations.java | 25 + .../hdfs/server/protocol/DatanodeProtocol.java | 1 + .../hadoop/hdfs/tools/erasurecode/ECCli.java | 48 + .../hdfs/tools/erasurecode/ECCommand.java | 226 +++ .../hadoop/hdfs/util/StripedBlockUtil.java | 947 +++++++++++++ .../src/main/proto/DatanodeProtocol.proto | 10 + .../hadoop-hdfs/src/main/proto/fsimage.proto | 3 + .../src/main/resources/hdfs-default.xml | 31 +- .../hadoop/cli/CLITestCmdErasureCoding.java | 38 + .../apache/hadoop/cli/TestErasureCodingCLI.java | 114 ++ .../cli/util/CLICommandErasureCodingCli.java | 21 + .../cli/util/ErasureCodingCliCmdExecutor.java | 37 + .../apache/hadoop/hdfs/BlockReaderTestUtil.java | 7 +- .../org/apache/hadoop/hdfs/DFSTestUtil.java | 182 ++- .../org/apache/hadoop/hdfs/MiniDFSCluster.java | 2 - .../apache/hadoop/hdfs/StripedFileTestUtil.java | 261 ++++ .../hadoop/hdfs/TestBlockReaderFactory.java | 16 +- .../hadoop/hdfs/TestDFSClientRetries.java | 6 +- .../hadoop/hdfs/TestDFSStripedInputStream.java | 335 +++++ .../hadoop/hdfs/TestDFSStripedOutputStream.java | 293 ++++ .../TestDFSStripedOutputStreamWithFailure.java | 329 +++++ .../org/apache/hadoop/hdfs/TestDFSUtil.java | 2 +- .../org/apache/hadoop/hdfs/TestECSchemas.java | 54 + .../apache/hadoop/hdfs/TestEncryptionZones.java | 2 +- .../hadoop/hdfs/TestErasureCodingZones.java | 223 +++ .../hadoop/hdfs/TestFileStatusWithECschema.java | 65 + .../java/org/apache/hadoop/hdfs/TestLease.java | 4 +- .../hdfs/TestReadStripedFileWithDecoding.java | 358 +++++ .../TestReadStripedFileWithMissingBlocks.java | 150 ++ .../hadoop/hdfs/TestRecoverStripedFile.java | 400 ++++++ .../org/apache/hadoop/hdfs/TestSafeMode.java | 4 +- .../hdfs/TestSafeModeWithStripedFile.java | 150 ++ .../hadoop/hdfs/TestWriteReadStripedFile.java | 251 ++++ .../hdfs/TestWriteStripedFileWithFailure.java | 162 +++ .../hadoop/hdfs/protocol/TestLayoutVersion.java | 3 +- .../hadoop/hdfs/protocolPB/TestPBHelper.java | 170 ++- .../hdfs/server/balancer/TestBalancer.java | 87 ++ .../blockmanagement/BlockManagerTestUtil.java | 10 +- .../server/blockmanagement/TestBlockInfo.java | 2 +- .../blockmanagement/TestBlockInfoStriped.java | 250 ++++ .../TestBlockInfoUnderConstruction.java | 3 +- .../blockmanagement/TestBlockManager.java | 63 +- .../blockmanagement/TestBlockTokenWithDFS.java | 422 +++--- .../TestBlockTokenWithDFSStriped.java | 115 ++ .../blockmanagement/TestHeartbeatHandling.java | 1 - .../TestNameNodePrunesMissingStorages.java | 5 +- .../server/blockmanagement/TestNodeCount.java | 2 +- .../TestOverReplicatedBlocks.java | 4 +- .../blockmanagement/TestReplicationPolicy.java | 25 +- .../TestSequentialBlockGroupId.java | 222 +++ .../TestUnderReplicatedBlockQueues.java | 62 + .../server/datanode/SimulatedFSDataset.java | 2 +- .../datanode/TestIncrementalBrVariations.java | 14 +- .../hadoop/hdfs/server/mover/TestMover.java | 125 +- .../hdfs/server/namenode/NameNodeAdapter.java | 9 +- .../TestAddOverReplicatedStripedBlocks.java | 266 ++++ .../server/namenode/TestAddStripedBlocks.java | 433 ++++++ .../hdfs/server/namenode/TestDeadDatanode.java | 3 +- .../server/namenode/TestFSEditLogLoader.java | 273 ++++ .../hdfs/server/namenode/TestFSImage.java | 345 ++++- .../hdfs/server/namenode/TestFileTruncate.java | 4 +- .../hadoop/hdfs/server/namenode/TestFsck.java | 83 +- .../hdfs/server/namenode/TestINodeFile.java | 2 +- .../namenode/TestQuotaWithStripedBlocks.java | 125 ++ .../namenode/TestRecoverStripedBlocks.java | 169 +++ .../server/namenode/TestStripedINodeFile.java | 284 ++++ .../namenode/snapshot/SnapshotTestHelper.java | 2 +- ...TestOfflineImageViewerWithStripedBlocks.java | 162 +++ .../hadoop/hdfs/util/TestStripedBlockUtil.java | 279 ++++ .../apache/hadoop/hdfs/web/TestJsonUtil.java | 2 +- .../test/resources/testErasureCodingConf.xml | 377 ++++++ 223 files changed, 23039 insertions(+), 1587 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt new file mode 100644 index 0000000..1f3006e --- /dev/null +++ b/hadoop-common-project/hadoop-common/CHANGES-HDFS-EC-7285.txt @@ -0,0 +1,74 @@ + BREAKDOWN OF HADOOP-11264 SUBTASKS AND RELATED JIRAS (Common part of HDFS-7285) + + HADOOP-11514. Raw Erasure Coder API for concrete encoding and decoding + (Kai Zheng via umamahesh) + + HADOOP-11534. Minor improvements for raw erasure coders + ( Kai Zheng via vinayakumarb ) + + HADOOP-11541. Raw XOR coder + ( Kai Zheng ) + + HADOOP-11542. Raw Reed-Solomon coder in pure Java. Contributed by Kai Zheng + ( Kai Zheng ) + + HADOOP-11643. Define EC schema API for ErasureCodec. Contributed by Kai Zheng + ( Kai Zheng ) + + HADOOP-11646. Erasure Coder API for encoding and decoding of block group + ( Kai Zheng via vinayakumarb ) + + HADOOP-11705. Make erasure coder configurable. Contributed by Kai Zheng + ( Kai Zheng ) + + HADOOP-11706. Refine a little bit erasure coder API. Contributed by Kai Zheng + ( Kai Zheng ) + + HADOOP-11707. Add factory to create raw erasure coder. Contributed by Kai Zheng + ( Kai Zheng ) + + HADOOP-11647. Reed-Solomon ErasureCoder. Contributed by Kai Zheng + ( Kai Zheng ) + + HADOOP-11782 Correct two thrown messages in ECSchema class. Contributed by Xinwei Qin + ( Xinwei Qin via Kai Zheng ) + + HADOOP-11740. Combine erasure encoder and decoder interfaces (Zhe Zhang) + + HADOOP-11805 Better to rename some raw erasure coders. Contributed by Kai Zheng + ( Kai Zheng ) + + HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code + ( Kai Zheng via vinayakumarb ) + + HADOOP-11818. Minor improvements for erasurecode classes. (Rakesh R via Kai Zheng) + + HADOOP-11841. Remove unused ecschema-def.xml files. (szetszwo) + + HADOOP-11921. Enhance tests for erasure coders. (Kai Zheng via Zhe Zhang) + + HADOOP-11920. Refactor some codes for erasure coders. (Kai Zheng via Zhe Zhang) + + HADOOP-11566. Add tests and fix for erasure coders to recover erased parity + units. (Kai Zheng via Zhe Zhang) + + HADOOP-11938. Enhance ByteBuffer version encode/decode API of raw erasure + coder. (Kai Zheng via Zhe Zhang) + + HADOOP-12013. Generate fixed data to perform erasure coder test. (Kai Zheng) + + HADOOP-12029. Remove chunkSize from ECSchema as its not required for coders + (vinayakumarb) + + HADOOP-11847. Enhance raw coder allowing to read least required inputs in decoding. + (Kai Zheng) + + HADOOP-12011. Allow to dump verbose information to ease debugging in raw erasure coders + (Kai Zheng) + + HADOOP-12065. Using more meaningful keys in EC schema. (Kai Zheng) + + HDFS-8557. Allow to configure RS and XOR raw coders (Kai Zheng) + + HADOOP-12060. Fix ByteBuffer usage for raw erasure coders. (Kai Zheng via + jing9) http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java index 2721466..9588254 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java @@ -137,6 +137,21 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { false; /** + * Erasure Coding configuration family + */ + + /** Supported erasure codec classes */ + public static final String IO_ERASURECODE_CODECS_KEY = "io.erasurecode.codecs"; + + /** Raw coder factory for the RS codec. */ + public static final String IO_ERASURECODE_CODEC_RS_RAWCODER_KEY = + "io.erasurecode.codec.rs.rawcoder"; + + /** Raw coder factory for the XOR codec. */ + public static final String IO_ERASURECODE_CODEC_XOR_RAWCODER_KEY = + "io.erasurecode.codec.xor.rawcoder"; + + /** * Service Authorization */ public static final String http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java index bdc5585..a8a7494 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java @@ -196,6 +196,10 @@ abstract public class FSOutputSummer extends OutputStream { return sum.getChecksumSize(); } + protected DataChecksum getDataChecksum() { + return sum; + } + protected TraceScope createWriteTraceScope() { return NullScope.INSTANCE; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java index a0510be..bee7c8c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java @@ -124,6 +124,10 @@ public class FsShell extends Configured implements Tool { return getTrash().getCurrentTrashDir(); } + protected String getUsagePrefix() { + return usagePrefix; + } + // NOTE: Usage/Help are inner classes to allow access to outer methods // that access commandFactory @@ -207,7 +211,7 @@ public class FsShell extends Configured implements Tool { } } else { // display help or usage for all commands - out.println(usagePrefix); + out.println(getUsagePrefix()); // display list of short usages ArrayList<Command> instances = new ArrayList<Command>(); @@ -231,7 +235,7 @@ public class FsShell extends Configured implements Tool { } private void printInstanceUsage(PrintStream out, Command instance) { - out.println(usagePrefix + " " + instance.getUsage()); + out.println(getUsagePrefix() + " " + instance.getUsage()); } private void printInstanceHelp(PrintStream out, Command instance) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java new file mode 100644 index 0000000..027d58b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/CodecUtil.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.io.erasurecode.rawcoder.*; + +/** + * A codec & coder utility to help create raw coders conveniently. + */ +public final class CodecUtil { + + private CodecUtil() { } + + /** + * Create RS raw encoder according to configuration. + * @param conf configuration possibly with some items to configure the coder + * @param numDataUnits number of data units in a coding group + * @param numParityUnits number of parity units in a coding group + * @return raw encoder + */ + public static RawErasureEncoder createRSRawEncoder( + Configuration conf, int numDataUnits, int numParityUnits) { + RawErasureCoder rawCoder = createRawCoder(conf, + CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_RAWCODER_KEY, + true, numDataUnits, numParityUnits); + if (rawCoder == null) { + rawCoder = new RSRawEncoder(numDataUnits, numParityUnits); + } + + return (RawErasureEncoder) rawCoder; + } + + /** + * Create RS raw decoder according to configuration. + * @param conf configuration possibly with some items to configure the coder + * @param numDataUnits number of data units in a coding group + * @param numParityUnits number of parity units in a coding group + * @return raw decoder + */ + public static RawErasureDecoder createRSRawDecoder( + Configuration conf, int numDataUnits, int numParityUnits) { + RawErasureCoder rawCoder = createRawCoder(conf, + CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_RAWCODER_KEY, + false, numDataUnits, numParityUnits); + if (rawCoder == null) { + rawCoder = new RSRawDecoder(numDataUnits, numParityUnits); + } + + return (RawErasureDecoder) rawCoder; + } + + /** + * Create XOR raw encoder according to configuration. + * @param conf configuration possibly with some items to configure the coder + * @param numDataUnits number of data units in a coding group + * @param numParityUnits number of parity units in a coding group + * @return raw encoder + */ + public static RawErasureEncoder createXORRawEncoder( + Configuration conf, int numDataUnits, int numParityUnits) { + RawErasureCoder rawCoder = createRawCoder(conf, + CommonConfigurationKeys.IO_ERASURECODE_CODEC_XOR_RAWCODER_KEY, + true, numDataUnits, numParityUnits); + if (rawCoder == null) { + rawCoder = new XORRawEncoder(numDataUnits, numParityUnits); + } + + return (RawErasureEncoder) rawCoder; + } + + /** + * Create XOR raw decoder according to configuration. + * @param conf configuration possibly with some items to configure the coder + * @param numDataUnits number of data units in a coding group + * @param numParityUnits number of parity units in a coding group + * @return raw decoder + */ + public static RawErasureDecoder createXORRawDecoder( + Configuration conf, int numDataUnits, int numParityUnits) { + RawErasureCoder rawCoder = createRawCoder(conf, + CommonConfigurationKeys.IO_ERASURECODE_CODEC_XOR_RAWCODER_KEY, + false, numDataUnits, numParityUnits); + if (rawCoder == null) { + rawCoder = new XORRawDecoder(numDataUnits, numParityUnits); + } + + return (RawErasureDecoder) rawCoder; + } + + /** + * Create raw coder using specified conf and raw coder factory key. + * @param conf configuration possibly with some items to configure the coder + * @param rawCoderFactoryKey configuration key to find the raw coder factory + * @param isEncoder is encoder or not we're going to create + * @param numDataUnits number of data units in a coding group + * @param numParityUnits number of parity units in a coding group + * @return raw coder + */ + public static RawErasureCoder createRawCoder(Configuration conf, + String rawCoderFactoryKey, boolean isEncoder, int numDataUnits, + int numParityUnits) { + + if (conf == null) { + return null; + } + + Class<? extends RawErasureCoderFactory> factClass = null; + factClass = conf.getClass(rawCoderFactoryKey, + factClass, RawErasureCoderFactory.class); + + if (factClass == null) { + return null; + } + + RawErasureCoderFactory fact; + try { + fact = factClass.newInstance(); + } catch (InstantiationException e) { + throw new RuntimeException("Failed to create raw coder", e); + } catch (IllegalAccessException e) { + throw new RuntimeException("Failed to create raw coder", e); + } + + return isEncoder ? fact.createEncoder(numDataUnits, numParityUnits) : + fact.createDecoder(numDataUnits, numParityUnits); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlock.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlock.java new file mode 100644 index 0000000..5c0a160 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlock.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +/** + * A wrapper of block level data source/output that {@link ECChunk}s can be + * extracted from. For HDFS, it can be an HDFS block (250MB). Note it only cares + * about erasure coding specific logic thus avoids coupling with any HDFS block + * details. We can have something like HdfsBlock extend it. + */ +public class ECBlock { + + private boolean isParity; + private boolean isErased; + + /** + * A default constructor. isParity and isErased are false by default. + */ + public ECBlock() { + this(false, false); + } + + /** + * A constructor specifying isParity and isErased. + * @param isParity is a parity block + * @param isErased is erased or not + */ + public ECBlock(boolean isParity, boolean isErased) { + this.isParity = isParity; + this.isErased = isErased; + } + + /** + * Set true if it's for a parity block. + * @param isParity is parity or not + */ + public void setParity(boolean isParity) { + this.isParity = isParity; + } + + /** + * Set true if the block is missing. + * @param isErased is erased or not + */ + public void setErased(boolean isErased) { + this.isErased = isErased; + } + + /** + * + * @return true if it's parity block, otherwise false + */ + public boolean isParity() { + return isParity; + } + + /** + * + * @return true if it's erased due to erasure, otherwise false + */ + public boolean isErased() { + return isErased; + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java new file mode 100644 index 0000000..91e4fb8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECBlockGroup.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +/** + * A group of blocks or {@link ECBlock} incurred in an erasure coding task. + */ +public class ECBlockGroup { + + private ECBlock[] dataBlocks; + private ECBlock[] parityBlocks; + + /** + * A constructor specifying data blocks and parity blocks. + * @param dataBlocks data blocks in the group + * @param parityBlocks parity blocks in the group + */ + public ECBlockGroup(ECBlock[] dataBlocks, ECBlock[] parityBlocks) { + this.dataBlocks = dataBlocks; + this.parityBlocks = parityBlocks; + } + + /** + * Get data blocks + * @return data blocks + */ + public ECBlock[] getDataBlocks() { + return dataBlocks; + } + + /** + * Get parity blocks + * @return parity blocks + */ + public ECBlock[] getParityBlocks() { + return parityBlocks; + } + + /** + * Any erased data block? + * @return true if any erased data block, false otherwise + */ + public boolean anyErasedDataBlock() { + for (int i = 0; i < dataBlocks.length; ++i) { + if (dataBlocks[i].isErased()) { + return true; + } + } + + return false; + } + + /** + * Any erased parity block? + * @return true if any erased parity block, false otherwise + */ + public boolean anyErasedParityBlock() { + for (int i = 0; i < parityBlocks.length; ++i) { + if (parityBlocks[i].isErased()) { + return true; + } + } + + return false; + } + + /** + * Get erased blocks count + * @return erased count of blocks + */ + public int getErasedCount() { + int erasedCount = 0; + + for (ECBlock dataBlock : dataBlocks) { + if (dataBlock.isErased()) erasedCount++; + } + + for (ECBlock parityBlock : parityBlocks) { + if (parityBlock.isErased()) erasedCount++; + } + + return erasedCount; + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECChunk.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECChunk.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECChunk.java new file mode 100644 index 0000000..d0120d8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECChunk.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +import java.nio.ByteBuffer; + +/** + * A wrapper for ByteBuffer or bytes array for an erasure code chunk. + */ +public class ECChunk { + + private ByteBuffer chunkBuffer; + + /** + * Wrapping a ByteBuffer + * @param buffer buffer to be wrapped by the chunk + */ + public ECChunk(ByteBuffer buffer) { + this.chunkBuffer = buffer; + } + + /** + * Wrapping a bytes array + * @param buffer buffer to be wrapped by the chunk + */ + public ECChunk(byte[] buffer) { + this.chunkBuffer = ByteBuffer.wrap(buffer); + } + + /** + * Convert to ByteBuffer + * @return ByteBuffer + */ + public ByteBuffer getBuffer() { + return chunkBuffer; + } + + /** + * Convert an array of this chunks to an array of ByteBuffers + * @param chunks chunks to convert into buffers + * @return an array of ByteBuffers + */ + public static ByteBuffer[] toBuffers(ECChunk[] chunks) { + ByteBuffer[] buffers = new ByteBuffer[chunks.length]; + + ECChunk chunk; + for (int i = 0; i < chunks.length; i++) { + chunk = chunks[i]; + if (chunk == null) { + buffers[i] = null; + } else { + buffers[i] = chunk.getBuffer(); + } + } + + return buffers; + } + + /** + * Convert to a bytes array, just for test usage. + * @return bytes array + */ + public byte[] toBytesArray() { + byte[] bytesArr = new byte[chunkBuffer.remaining()]; + // Avoid affecting the original one + chunkBuffer.mark(); + chunkBuffer.get(bytesArr); + chunkBuffer.reset(); + + return bytesArr; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java new file mode 100644 index 0000000..fb02476 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java @@ -0,0 +1,257 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Erasure coding schema to housekeeper relevant information. + */ +public final class ECSchema { + public static final String NUM_DATA_UNITS_KEY = "numDataUnits"; + public static final String NUM_PARITY_UNITS_KEY = "numParityUnits"; + public static final String CODEC_NAME_KEY = "codec"; + + /** + * A friendly and understandable name that can mean what's it, also serves as + * the identifier that distinguish it from other schemas. + */ + private final String schemaName; + + /** + * The erasure codec name associated. + */ + private final String codecName; + + /** + * Number of source data units coded + */ + private final int numDataUnits; + + /** + * Number of parity units generated in a coding + */ + private final int numParityUnits; + + /* + * An erasure code can have its own specific advanced parameters, subject to + * itself to interpret these key-value settings. + */ + private final Map<String, String> extraOptions; + + /** + * Constructor with schema name and provided all options. Note the options may + * contain additional information for the erasure codec to interpret further. + * @param schemaName schema name + * @param allOptions all schema options + */ + public ECSchema(String schemaName, Map<String, String> allOptions) { + assert (schemaName != null && ! schemaName.isEmpty()); + + this.schemaName = schemaName; + + if (allOptions == null || allOptions.isEmpty()) { + throw new IllegalArgumentException("No schema options are provided"); + } + + this.codecName = allOptions.get(CODEC_NAME_KEY); + if (codecName == null || codecName.isEmpty()) { + throw new IllegalArgumentException("No codec option is provided"); + } + + int tmpNumDataUnits = extractIntOption(NUM_DATA_UNITS_KEY, allOptions); + int tmpNumParityUnits = extractIntOption(NUM_PARITY_UNITS_KEY, allOptions); + if (tmpNumDataUnits < 0 || tmpNumParityUnits < 0) { + throw new IllegalArgumentException( + "No good option for numDataUnits or numParityUnits found "); + } + this.numDataUnits = tmpNumDataUnits; + this.numParityUnits = tmpNumParityUnits; + + allOptions.remove(CODEC_NAME_KEY); + allOptions.remove(NUM_DATA_UNITS_KEY); + allOptions.remove(NUM_PARITY_UNITS_KEY); + // After some cleanup + this.extraOptions = Collections.unmodifiableMap(allOptions); + } + + /** + * Constructor with key parameters provided. + * @param schemaName schema name + * @param codecName codec name + * @param numDataUnits number of data units used in the schema + * @param numParityUnits number os parity units used in the schema + */ + public ECSchema(String schemaName, String codecName, + int numDataUnits, int numParityUnits) { + this(schemaName, codecName, numDataUnits, numParityUnits, null); + } + + /** + * Constructor with key parameters provided. Note the extraOptions may contain + * additional information for the erasure codec to interpret further. + * @param schemaName schema name + * @param codecName codec name + * @param numDataUnits number of data units used in the schema + * @param numParityUnits number os parity units used in the schema + * @param extraOptions extra options to configure the codec + */ + public ECSchema(String schemaName, String codecName, int numDataUnits, + int numParityUnits, Map<String, String> extraOptions) { + + assert (schemaName != null && ! schemaName.isEmpty()); + assert (codecName != null && ! codecName.isEmpty()); + assert (numDataUnits > 0 && numParityUnits > 0); + + this.schemaName = schemaName; + this.codecName = codecName; + this.numDataUnits = numDataUnits; + this.numParityUnits = numParityUnits; + + if (extraOptions == null) { + extraOptions = new HashMap<>(); + } + + // After some cleanup + this.extraOptions = Collections.unmodifiableMap(extraOptions); + } + + private int extractIntOption(String optionKey, Map<String, String> options) { + int result = -1; + + try { + if (options.containsKey(optionKey)) { + result = Integer.parseInt(options.get(optionKey)); + if (result <= 0) { + throw new IllegalArgumentException("Bad option value " + result + + " found for " + optionKey); + } + } + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Option value " + + options.get(optionKey) + " for " + optionKey + + " is found. It should be an integer"); + } + + return result; + } + + /** + * Get the schema name + * @return schema name + */ + public String getSchemaName() { + return schemaName; + } + + /** + * Get the codec name + * @return codec name + */ + public String getCodecName() { + return codecName; + } + + /** + * Get extra options specific to a erasure code. + * @return extra options + */ + public Map<String, String> getExtraOptions() { + return extraOptions; + } + + /** + * Get required data units count in a coding group + * @return count of data units + */ + public int getNumDataUnits() { + return numDataUnits; + } + + /** + * Get required parity units count in a coding group + * @return count of parity units + */ + public int getNumParityUnits() { + return numParityUnits; + } + + /** + * Make a meaningful string representation for log output. + * @return string representation + */ + @Override + public String toString() { + StringBuilder sb = new StringBuilder("ECSchema=["); + + sb.append("Name=" + schemaName + ", "); + sb.append("Codec=" + codecName + ", "); + sb.append(NUM_DATA_UNITS_KEY + "=" + numDataUnits + ", "); + sb.append(NUM_PARITY_UNITS_KEY + "=" + numParityUnits); + sb.append((extraOptions.isEmpty() ? "" : ", ")); + + int i = 0; + for (String opt : extraOptions.keySet()) { + sb.append(opt + "=" + extraOptions.get(opt) + + (++i < extraOptions.size() ? ", " : "")); + } + + sb.append("]"); + + return sb.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + ECSchema ecSchema = (ECSchema) o; + + if (numDataUnits != ecSchema.numDataUnits) { + return false; + } + if (numParityUnits != ecSchema.numParityUnits) { + return false; + } + if (!schemaName.equals(ecSchema.schemaName)) { + return false; + } + if (!codecName.equals(ecSchema.codecName)) { + return false; + } + return extraOptions.equals(ecSchema.extraOptions); + } + + @Override + public int hashCode() { + int result = schemaName.hashCode(); + result = 31 * result + codecName.hashCode(); + result = 31 * result + extraOptions.hashCode(); + result = 31 * result + numDataUnits; + result = 31 * result + numParityUnits; + + return result; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java new file mode 100644 index 0000000..fce46f8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/SchemaLoader.java @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.Text; +import org.xml.sax.SAXException; + +/** + * A EC schema loading utility that loads predefined EC schemas from XML file + */ +public class SchemaLoader { + private static final Logger LOG = LoggerFactory.getLogger( + SchemaLoader.class.getName()); + + /** + * Load predefined ec schemas from configuration file. This file is + * expected to be in the XML format. + */ + public List<ECSchema> loadSchema(String schemaFilePath) { + File confFile = getSchemaFile(schemaFilePath); + if (confFile == null) { + LOG.warn("Not found any predefined EC schema file"); + return Collections.emptyList(); + } + + try { + return loadSchema(confFile); + } catch (ParserConfigurationException e) { + throw new RuntimeException("Failed to load schema file: " + confFile); + } catch (IOException e) { + throw new RuntimeException("Failed to load schema file: " + confFile); + } catch (SAXException e) { + throw new RuntimeException("Failed to load schema file: " + confFile); + } + } + + private List<ECSchema> loadSchema(File schemaFile) + throws ParserConfigurationException, IOException, SAXException { + + LOG.info("Loading predefined EC schema file {}", schemaFile); + + // Read and parse the schema file. + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setIgnoringComments(true); + DocumentBuilder builder = dbf.newDocumentBuilder(); + Document doc = builder.parse(schemaFile); + Element root = doc.getDocumentElement(); + + if (!"schemas".equals(root.getTagName())) { + throw new RuntimeException("Bad EC schema config file: " + + "top-level element not <schemas>"); + } + + NodeList elements = root.getChildNodes(); + List<ECSchema> schemas = new ArrayList<ECSchema>(); + for (int i = 0; i < elements.getLength(); i++) { + Node node = elements.item(i); + if (node instanceof Element) { + Element element = (Element) node; + if ("schema".equals(element.getTagName())) { + ECSchema schema = loadSchema(element); + schemas.add(schema); + } else { + LOG.warn("Bad element in EC schema configuration file: {}", + element.getTagName()); + } + } + } + + return schemas; + } + + /** + * Path to the XML file containing predefined ec schemas. If the path is + * relative, it is searched for in the classpath. + */ + private File getSchemaFile(String schemaFilePath) { + File schemaFile = new File(schemaFilePath); + if (! schemaFile.isAbsolute()) { + URL url = Thread.currentThread().getContextClassLoader() + .getResource(schemaFilePath); + if (url == null) { + LOG.warn("{} not found on the classpath.", schemaFilePath); + schemaFile = null; + } else if (! url.getProtocol().equalsIgnoreCase("file")) { + throw new RuntimeException( + "EC predefined schema file " + url + + " found on the classpath is not on the local filesystem."); + } else { + schemaFile = new File(url.getPath()); + } + } + + return schemaFile; + } + + /** + * Loads a schema from a schema element in the configuration file + */ + private ECSchema loadSchema(Element element) { + String schemaName = element.getAttribute("name"); + Map<String, String> ecOptions = new HashMap<String, String>(); + NodeList fields = element.getChildNodes(); + + for (int i = 0; i < fields.getLength(); i++) { + Node fieldNode = fields.item(i); + if (fieldNode instanceof Element) { + Element field = (Element) fieldNode; + String tagName = field.getTagName(); + String value = ((Text) field.getFirstChild()).getData().trim(); + ecOptions.put(tagName, value); + } + } + + ECSchema schema = new ECSchema(schemaName, ecOptions); + return schema; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java new file mode 100644 index 0000000..0cacfbc --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/AbstractErasureCodec.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.codec; + +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.erasurecode.ECSchema; +import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper; + +/** + * Abstract Erasure Codec that implements {@link ErasureCodec}. + */ +public abstract class AbstractErasureCodec extends Configured + implements ErasureCodec { + + private final ECSchema schema; + + public AbstractErasureCodec(ECSchema schema) { + this.schema = schema; + } + + public String getName() { + return schema.getCodecName(); + } + + public ECSchema getSchema() { + return schema; + } + + @Override + public BlockGrouper createBlockGrouper() { + BlockGrouper blockGrouper = new BlockGrouper(); + blockGrouper.setSchema(getSchema()); + + return blockGrouper; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java new file mode 100644 index 0000000..9aa3db2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/ErasureCodec.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.codec; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.io.erasurecode.coder.ErasureCoder; +import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper; + +/** + * Erasure Codec API that's to cover the essential specific aspects of a code. + * Currently it cares only block grouper and erasure coder. In future we may + * add more aspects here to make the behaviors customizable. + */ +public interface ErasureCodec extends Configurable { + + /** + * Create block grouper + * @return block grouper + */ + public BlockGrouper createBlockGrouper(); + + /** + * Create Erasure Encoder + * @return erasure encoder + */ + public ErasureCoder createEncoder(); + + /** + * Create Erasure Decoder + * @return erasure decoder + */ + public ErasureCoder createDecoder(); + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java new file mode 100644 index 0000000..6edd638 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/RSErasureCodec.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.codec; + +import org.apache.hadoop.io.erasurecode.ECSchema; +import org.apache.hadoop.io.erasurecode.coder.ErasureCoder; +import org.apache.hadoop.io.erasurecode.coder.RSErasureDecoder; +import org.apache.hadoop.io.erasurecode.coder.RSErasureEncoder; + +/** + * A Reed-Solomon erasure codec. + */ +public class RSErasureCodec extends AbstractErasureCodec { + + public RSErasureCodec(ECSchema schema) { + super(schema); + } + + @Override + public ErasureCoder createEncoder() { + return new RSErasureEncoder(getSchema()); + } + + @Override + public ErasureCoder createDecoder() { + return new RSErasureDecoder(getSchema()); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java new file mode 100644 index 0000000..e2dcfa7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/codec/XORErasureCodec.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.codec; + +import org.apache.hadoop.io.erasurecode.ECSchema; +import org.apache.hadoop.io.erasurecode.coder.ErasureCoder; +import org.apache.hadoop.io.erasurecode.coder.XORErasureDecoder; +import org.apache.hadoop.io.erasurecode.coder.XORErasureEncoder; + +/** + * A XOR erasure codec. + */ +public class XORErasureCodec extends AbstractErasureCodec { + + public XORErasureCodec(ECSchema schema) { + super(schema); + assert(schema.getNumParityUnits() == 1); + } + + @Override + public ErasureCoder createEncoder() { + return new XORErasureEncoder(getSchema()); + } + + @Override + public ErasureCoder createDecoder() { + return new XORErasureDecoder(getSchema()); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java new file mode 100644 index 0000000..5cd0ee8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCoder.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.erasurecode.ECSchema; + +/** + * A common class of basic facilities to be shared by encoder and decoder + * + * It implements the {@link ErasureCoder} interface. + */ +public abstract class AbstractErasureCoder + extends Configured implements ErasureCoder { + + private final int numDataUnits; + private final int numParityUnits; + + public AbstractErasureCoder(int numDataUnits, int numParityUnits) { + this.numDataUnits = numDataUnits; + this.numParityUnits = numParityUnits; + } + + public AbstractErasureCoder(ECSchema schema) { + this(schema.getNumDataUnits(), schema.getNumParityUnits()); + } + + @Override + public int getNumDataUnits() { + return numDataUnits; + } + + @Override + public int getNumParityUnits() { + return numParityUnits; + } + + @Override + public boolean preferDirectBuffer() { + return false; + } + + @Override + public void release() { + // Nothing to do by default + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCodingStep.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCodingStep.java new file mode 100644 index 0000000..c429d49 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureCodingStep.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.io.erasurecode.ECBlock; + +/** + * Abstract class for common facilities shared by {@link ErasureEncodingStep} + * and {@link ErasureDecodingStep}. + * + * It implements {@link ErasureEncodingStep}. + */ +public abstract class AbstractErasureCodingStep implements ErasureCodingStep { + + private ECBlock[] inputBlocks; + private ECBlock[] outputBlocks; + + /** + * Constructor given input blocks and output blocks. + * @param inputBlocks + * @param outputBlocks + */ + public AbstractErasureCodingStep(ECBlock[] inputBlocks, + ECBlock[] outputBlocks) { + this.inputBlocks = inputBlocks; + this.outputBlocks = outputBlocks; + } + + @Override + public ECBlock[] getInputBlocks() { + return inputBlocks; + } + + @Override + public ECBlock[] getOutputBlocks() { + return outputBlocks; + } + + @Override + public void finish() { + // NOOP by default + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureDecoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureDecoder.java new file mode 100644 index 0000000..3ea9311 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureDecoder.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.io.erasurecode.ECBlock; +import org.apache.hadoop.io.erasurecode.ECBlockGroup; +import org.apache.hadoop.io.erasurecode.ECSchema; + +/** + * An abstract erasure decoder that's to be inherited by new decoders. + * + * It implements the {@link ErasureCoder} interface. + */ +public abstract class AbstractErasureDecoder extends AbstractErasureCoder { + + public AbstractErasureDecoder(int numDataUnits, int numParityUnits) { + super(numDataUnits, numParityUnits); + } + + public AbstractErasureDecoder(ECSchema schema) { + super(schema); + } + + @Override + public ErasureCodingStep calculateCoding(ECBlockGroup blockGroup) { + // We may have more than this when considering complicate cases. HADOOP-11550 + return prepareDecodingStep(blockGroup); + } + + /** + * Perform decoding against a block blockGroup. + * @param blockGroup + * @return decoding step for caller to do the real work + */ + protected abstract ErasureCodingStep prepareDecodingStep( + ECBlockGroup blockGroup); + + /** + * We have all the data blocks and parity blocks as input blocks for + * recovering by default. It's codec specific + * @param blockGroup + * @return + */ + protected ECBlock[] getInputBlocks(ECBlockGroup blockGroup) { + ECBlock[] inputBlocks = new ECBlock[getNumParityUnits() + + getNumDataUnits()]; + + System.arraycopy(blockGroup.getParityBlocks(), 0, inputBlocks, 0, + getNumParityUnits()); + System.arraycopy(blockGroup.getDataBlocks(), 0, inputBlocks, + getNumParityUnits(), getNumDataUnits()); + + return inputBlocks; + } + + /** + * Which blocks were erased ? + * @param blockGroup + * @return output blocks to recover + */ + protected ECBlock[] getOutputBlocks(ECBlockGroup blockGroup) { + ECBlock[] outputBlocks = new ECBlock[getNumErasedBlocks(blockGroup)]; + + int idx = 0; + + for (int i = 0; i < getNumParityUnits(); i++) { + if (blockGroup.getParityBlocks()[i].isErased()) { + outputBlocks[idx++] = blockGroup.getParityBlocks()[i]; + } + } + + for (int i = 0; i < getNumDataUnits(); i++) { + if (blockGroup.getDataBlocks()[i].isErased()) { + outputBlocks[idx++] = blockGroup.getDataBlocks()[i]; + } + } + + return outputBlocks; + } + + /** + * Get the number of erased blocks in the block group. + * @param blockGroup + * @return number of erased blocks + */ + protected int getNumErasedBlocks(ECBlockGroup blockGroup) { + int num = getNumErasedBlocks(blockGroup.getParityBlocks()); + num += getNumErasedBlocks(blockGroup.getDataBlocks()); + return num; + } + + /** + * Find out how many blocks are erased. + * @param inputBlocks all the input blocks + * @return number of erased blocks + */ + protected static int getNumErasedBlocks(ECBlock[] inputBlocks) { + int numErased = 0; + for (int i = 0; i < inputBlocks.length; i++) { + if (inputBlocks[i].isErased()) { + numErased ++; + } + } + + return numErased; + } + + /** + * Get indexes of erased blocks from inputBlocks + * @param inputBlocks + * @return indexes of erased blocks from inputBlocks + */ + protected int[] getErasedIndexes(ECBlock[] inputBlocks) { + int numErased = getNumErasedBlocks(inputBlocks); + if (numErased == 0) { + return new int[0]; + } + + int[] erasedIndexes = new int[numErased]; + int i = 0, j = 0; + for (; i < inputBlocks.length && j < erasedIndexes.length; i++) { + if (inputBlocks[i].isErased()) { + erasedIndexes[j++] = i; + } + } + + return erasedIndexes; + } + + /** + * Get erased input blocks from inputBlocks + * @param inputBlocks + * @return an array of erased blocks from inputBlocks + */ + protected ECBlock[] getErasedBlocks(ECBlock[] inputBlocks) { + int numErased = getNumErasedBlocks(inputBlocks); + if (numErased == 0) { + return new ECBlock[0]; + } + + ECBlock[] erasedBlocks = new ECBlock[numErased]; + int i = 0, j = 0; + for (; i < inputBlocks.length && j < erasedBlocks.length; i++) { + if (inputBlocks[i].isErased()) { + erasedBlocks[j++] = inputBlocks[i]; + } + } + + return erasedBlocks; + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureEncoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureEncoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureEncoder.java new file mode 100644 index 0000000..7c887e8 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/AbstractErasureEncoder.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.io.erasurecode.ECBlock; +import org.apache.hadoop.io.erasurecode.ECBlockGroup; +import org.apache.hadoop.io.erasurecode.ECSchema; + +/** + * An abstract erasure encoder that's to be inherited by new encoders. + * + * It implements the {@link ErasureCoder} interface. + */ +public abstract class AbstractErasureEncoder extends AbstractErasureCoder { + + public AbstractErasureEncoder(int numDataUnits, int numParityUnits) { + super(numDataUnits, numParityUnits); + } + + public AbstractErasureEncoder(ECSchema schema) { + super(schema); + } + + @Override + public ErasureCodingStep calculateCoding(ECBlockGroup blockGroup) { + // We may have more than this when considering complicate cases. HADOOP-11550 + return prepareEncodingStep(blockGroup); + } + + /** + * Perform encoding against a block group. + * @param blockGroup + * @return encoding step for caller to do the real work + */ + protected abstract ErasureCodingStep prepareEncodingStep( + ECBlockGroup blockGroup); + + protected ECBlock[] getInputBlocks(ECBlockGroup blockGroup) { + return blockGroup.getDataBlocks(); + } + + protected ECBlock[] getOutputBlocks(ECBlockGroup blockGroup) { + return blockGroup.getParityBlocks(); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java new file mode 100644 index 0000000..f05ea41 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCoder.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.io.erasurecode.ECBlockGroup; + +/** + * An erasure coder to perform encoding or decoding given a group. Generally it + * involves calculating necessary internal steps according to codec logic. For + * each step,it calculates necessary input blocks to read chunks from and output + * parity blocks to write parity chunks into from the group. It also takes care + * of appropriate raw coder to use for the step. And encapsulates all the + * necessary info (input blocks, output blocks and raw coder) into a step + * represented by {@link ErasureCodingStep}. ErasureCoder callers can use the + * step to do the real work with retrieved input and output chunks. + * + * Note, currently only one coding step is supported. Will support complex cases + * of multiple coding steps. + * + */ +public interface ErasureCoder extends Configurable { + + /** + * The number of data input units for the coding. A unit can be a byte, + * chunk or buffer or even a block. + * @return count of data input units + */ + public int getNumDataUnits(); + + /** + * The number of parity output units for the coding. A unit can be a byte, + * chunk, buffer or even a block. + * @return count of parity output units + */ + public int getNumParityUnits(); + + /** + * Calculate the encoding or decoding steps given a block blockGroup. + * + * Note, currently only one coding step is supported. Will support complex + * cases of multiple coding steps. + * + * @param blockGroup the erasure coding block group containing all necessary + * information for codec calculation + */ + public ErasureCodingStep calculateCoding(ECBlockGroup blockGroup); + + /** + * Tell if direct or off-heap buffer is preferred or not. It's for callers to + * decide how to allocate coding chunk buffers, either on heap or off heap. + * It will return false by default. + * @return true if direct buffer is preferred for performance consideration, + * otherwise false. + */ + public boolean preferDirectBuffer(); + + /** + * Release the resources if any. Good chance to invoke RawErasureCoder#release. + */ + public void release(); +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java new file mode 100644 index 0000000..a3b177f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureCodingStep.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.io.erasurecode.ECBlock; +import org.apache.hadoop.io.erasurecode.ECChunk; + +/** + * Erasure coding step that's involved in encoding/decoding of a block group. + */ +public interface ErasureCodingStep { + + /** + * Input blocks of readable data involved in this step, may be data blocks + * or parity blocks. + * @return input blocks + */ + public ECBlock[] getInputBlocks(); + + /** + * Output blocks of writable buffers involved in this step, may be data + * blocks or parity blocks. + * @return output blocks + */ + public ECBlock[] getOutputBlocks(); + + /** + * Perform encoding or decoding given the input chunks, and generated results + * will be written to the output chunks. + * @param inputChunks + * @param outputChunks + */ + public void performCoding(ECChunk[] inputChunks, ECChunk[] outputChunks); + + /** + * Notify erasure coder that all the chunks of input blocks are processed so + * the coder can be able to update internal states, considering next step. + */ + public void finish(); +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java new file mode 100644 index 0000000..980c580 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureDecodingStep.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.io.erasurecode.ECBlock; +import org.apache.hadoop.io.erasurecode.ECChunk; +import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder; + +/** + * Erasure decoding step, a wrapper of all the necessary information to perform + * a decoding step involved in the whole process of decoding a block group. + */ +public class ErasureDecodingStep extends AbstractErasureCodingStep { + private int[] erasedIndexes; + private RawErasureDecoder rawDecoder; + + /** + * The constructor with all the necessary info. + * @param inputBlocks + * @param erasedIndexes the indexes of erased blocks in inputBlocks array + * @param outputBlocks + * @param rawDecoder + */ + public ErasureDecodingStep(ECBlock[] inputBlocks, int[] erasedIndexes, + ECBlock[] outputBlocks, + RawErasureDecoder rawDecoder) { + super(inputBlocks, outputBlocks); + this.erasedIndexes = erasedIndexes; + this.rawDecoder = rawDecoder; + } + + @Override + public void performCoding(ECChunk[] inputChunks, ECChunk[] outputChunks) { + rawDecoder.decode(inputChunks, erasedIndexes, outputChunks); + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java new file mode 100644 index 0000000..bd7587f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/ErasureEncodingStep.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.io.erasurecode.ECBlock; +import org.apache.hadoop.io.erasurecode.ECChunk; +import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder; + +/** + * Erasure encoding step, a wrapper of all the necessary information to perform + * an encoding step involved in the whole process of encoding a block group. + */ +public class ErasureEncodingStep extends AbstractErasureCodingStep { + + private RawErasureEncoder rawEncoder; + + /** + * The constructor with all the necessary info. + * @param inputBlocks + * @param outputBlocks + * @param rawEncoder + */ + public ErasureEncodingStep(ECBlock[] inputBlocks, ECBlock[] outputBlocks, + RawErasureEncoder rawEncoder) { + super(inputBlocks, outputBlocks); + this.rawEncoder = rawEncoder; + } + + @Override + public void performCoding(ECChunk[] inputChunks, ECChunk[] outputChunks) { + rawEncoder.encode(inputChunks, outputChunks); + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/RSErasureDecoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/RSErasureDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/RSErasureDecoder.java new file mode 100644 index 0000000..f56674d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/coder/RSErasureDecoder.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.io.erasurecode.CodecUtil; +import org.apache.hadoop.io.erasurecode.ECBlock; +import org.apache.hadoop.io.erasurecode.ECBlockGroup; +import org.apache.hadoop.io.erasurecode.ECSchema; +import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder; + +/** + * Reed-Solomon erasure decoder that decodes a block group. + * + * It implements {@link ErasureCoder}. + */ +public class RSErasureDecoder extends AbstractErasureDecoder { + private RawErasureDecoder rsRawDecoder; + + public RSErasureDecoder(int numDataUnits, int numParityUnits) { + super(numDataUnits, numParityUnits); + } + + public RSErasureDecoder(ECSchema schema) { + super(schema); + } + + @Override + protected ErasureCodingStep prepareDecodingStep(final ECBlockGroup blockGroup) { + + ECBlock[] inputBlocks = getInputBlocks(blockGroup); + ECBlock[] outputBlocks = getOutputBlocks(blockGroup); + + RawErasureDecoder rawDecoder = checkCreateRSRawDecoder(); + return new ErasureDecodingStep(inputBlocks, + getErasedIndexes(inputBlocks), outputBlocks, rawDecoder); + } + + private RawErasureDecoder checkCreateRSRawDecoder() { + if (rsRawDecoder == null) { + rsRawDecoder = CodecUtil.createRSRawDecoder(getConf(), + getNumDataUnits(), getNumParityUnits()); + } + return rsRawDecoder; + } + + @Override + public void release() { + if (rsRawDecoder != null) { + rsRawDecoder.release(); + } + } +}
