http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GaloisField.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GaloisField.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GaloisField.java new file mode 100644 index 0000000..03683b0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GaloisField.java @@ -0,0 +1,561 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder.util; + +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; + +/** + * Implementation of Galois field arithmetic with 2^p elements. The input must + * be unsigned integers. It's ported from HDFS-RAID, slightly adapted. + */ +public class GaloisField { + + // Field size 256 is good for byte based system + private static final int DEFAULT_FIELD_SIZE = 256; + // primitive polynomial 1 + X^2 + X^3 + X^4 + X^8 (substitute 2) + private static final int DEFAULT_PRIMITIVE_POLYNOMIAL = 285; + static private final Map<Integer, GaloisField> instances = + new HashMap<Integer, GaloisField>(); + private final int[] logTable; + private final int[] powTable; + private final int[][] mulTable; + private final int[][] divTable; + private final int fieldSize; + private final int primitivePeriod; + private final int primitivePolynomial; + + private GaloisField(int fieldSize, int primitivePolynomial) { + assert fieldSize > 0; + assert primitivePolynomial > 0; + + this.fieldSize = fieldSize; + this.primitivePeriod = fieldSize - 1; + this.primitivePolynomial = primitivePolynomial; + logTable = new int[fieldSize]; + powTable = new int[fieldSize]; + mulTable = new int[fieldSize][fieldSize]; + divTable = new int[fieldSize][fieldSize]; + int value = 1; + for (int pow = 0; pow < fieldSize - 1; pow++) { + powTable[pow] = value; + logTable[value] = pow; + value = value * 2; + if (value >= fieldSize) { + value = value ^ primitivePolynomial; + } + } + // building multiplication table + for (int i = 0; i < fieldSize; i++) { + for (int j = 0; j < fieldSize; j++) { + if (i == 0 || j == 0) { + mulTable[i][j] = 0; + continue; + } + int z = logTable[i] + logTable[j]; + z = z >= primitivePeriod ? z - primitivePeriod : z; + z = powTable[z]; + mulTable[i][j] = z; + } + } + // building division table + for (int i = 0; i < fieldSize; i++) { + for (int j = 1; j < fieldSize; j++) { + if (i == 0) { + divTable[i][j] = 0; + continue; + } + int z = logTable[i] - logTable[j]; + z = z < 0 ? z + primitivePeriod : z; + z = powTable[z]; + divTable[i][j] = z; + } + } + } + + /** + * Get the object performs Galois field arithmetics + * + * @param fieldSize size of the field + * @param primitivePolynomial a primitive polynomial corresponds to the size + */ + public static GaloisField getInstance(int fieldSize, + int primitivePolynomial) { + int key = ((fieldSize << 16) & 0xFFFF0000) + + (primitivePolynomial & 0x0000FFFF); + GaloisField gf; + synchronized (instances) { + gf = instances.get(key); + if (gf == null) { + gf = new GaloisField(fieldSize, primitivePolynomial); + instances.put(key, gf); + } + } + return gf; + } + + /** + * Get the object performs Galois field arithmetic with default setting + */ + public static GaloisField getInstance() { + return getInstance(DEFAULT_FIELD_SIZE, DEFAULT_PRIMITIVE_POLYNOMIAL); + } + + /** + * Return number of elements in the field + * + * @return number of elements in the field + */ + public int getFieldSize() { + return fieldSize; + } + + /** + * Return the primitive polynomial in GF(2) + * + * @return primitive polynomial as a integer + */ + public int getPrimitivePolynomial() { + return primitivePolynomial; + } + + /** + * Compute the sum of two fields + * + * @param x input field + * @param y input field + * @return result of addition + */ + public int add(int x, int y) { + assert (x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize()); + return x ^ y; + } + + /** + * Compute the multiplication of two fields + * + * @param x input field + * @param y input field + * @return result of multiplication + */ + public int multiply(int x, int y) { + assert (x >= 0 && x < getFieldSize() && y >= 0 && y < getFieldSize()); + return mulTable[x][y]; + } + + /** + * Compute the division of two fields + * + * @param x input field + * @param y input field + * @return x/y + */ + public int divide(int x, int y) { + assert (x >= 0 && x < getFieldSize() && y > 0 && y < getFieldSize()); + return divTable[x][y]; + } + + /** + * Compute power n of a field + * + * @param x input field + * @param n power + * @return x^n + */ + public int power(int x, int n) { + assert (x >= 0 && x < getFieldSize()); + if (n == 0) { + return 1; + } + if (x == 0) { + return 0; + } + x = logTable[x] * n; + if (x < primitivePeriod) { + return powTable[x]; + } + x = x % primitivePeriod; + return powTable[x]; + } + + /** + * Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such + * that Vz=y. The output z will be placed in y. + * + * @param x the vector which describe the Vandermonde matrix + * @param y right-hand side of the Vandermonde system equation. will be + * replaced the output in this vector + */ + public void solveVandermondeSystem(int[] x, int[] y) { + solveVandermondeSystem(x, y, x.length); + } + + /** + * Given a Vandermonde matrix V[i][j]=x[j]^i and vector y, solve for z such + * that Vz=y. The output z will be placed in y. + * + * @param x the vector which describe the Vandermonde matrix + * @param y right-hand side of the Vandermonde system equation. will be + * replaced the output in this vector + * @param len consider x and y only from 0...len-1 + */ + public void solveVandermondeSystem(int[] x, int[] y, int len) { + assert (x.length <= len && y.length <= len); + for (int i = 0; i < len - 1; i++) { + for (int j = len - 1; j > i; j--) { + y[j] = y[j] ^ mulTable[x[i]][y[j - 1]]; + } + } + for (int i = len - 1; i >= 0; i--) { + for (int j = i + 1; j < len; j++) { + y[j] = divTable[y[j]][x[j] ^ x[j - i - 1]]; + } + for (int j = i; j < len - 1; j++) { + y[j] = y[j] ^ y[j + 1]; + } + } + } + + /** + * A "bulk" version to the solving of Vandermonde System + */ + public void solveVandermondeSystem(int[] x, byte[][] y, int[] outputOffsets, + int len, int dataLen) { + int idx1, idx2; + for (int i = 0; i < len - 1; i++) { + for (int j = len - 1; j > i; j--) { + for (idx2 = outputOffsets[j-1], idx1 = outputOffsets[j]; + idx1 < outputOffsets[j] + dataLen; idx1++, idx2++) { + y[j][idx1] = (byte) (y[j][idx1] ^ mulTable[x[i]][y[j - 1][idx2] & + 0x000000FF]); + } + } + } + for (int i = len - 1; i >= 0; i--) { + for (int j = i + 1; j < len; j++) { + for (idx1 = outputOffsets[j]; + idx1 < outputOffsets[j] + dataLen; idx1++) { + y[j][idx1] = (byte) (divTable[y[j][idx1] & 0x000000FF][x[j] ^ + x[j - i - 1]]); + } + } + for (int j = i; j < len - 1; j++) { + for (idx2 = outputOffsets[j+1], idx1 = outputOffsets[j]; + idx1 < outputOffsets[j] + dataLen; idx1++, idx2++) { + y[j][idx1] = (byte) (y[j][idx1] ^ y[j + 1][idx2]); + } + } + } + } + + /** + * A "bulk" version of the solveVandermondeSystem, using ByteBuffer. + */ + public void solveVandermondeSystem(int[] x, ByteBuffer[] y, int len) { + ByteBuffer p; + int idx1, idx2; + for (int i = 0; i < len - 1; i++) { + for (int j = len - 1; j > i; j--) { + p = y[j]; + for (idx1 = p.position(), idx2 = y[j-1].position(); + idx1 < p.limit(); idx1++, idx2++) { + p.put(idx1, (byte) (p.get(idx1) ^ mulTable[x[i]][y[j-1].get(idx2) & + 0x000000FF])); + } + } + } + + for (int i = len - 1; i >= 0; i--) { + for (int j = i + 1; j < len; j++) { + p = y[j]; + for (idx1 = p.position(); idx1 < p.limit(); idx1++) { + p.put(idx1, (byte) (divTable[p.get(idx1) & + 0x000000FF][x[j] ^ x[j - i - 1]])); + } + } + + for (int j = i; j < len - 1; j++) { + p = y[j]; + for (idx1 = p.position(), idx2 = y[j+1].position(); + idx1 < p.limit(); idx1++, idx2++) { + p.put(idx1, (byte) (p.get(idx1) ^ y[j+1].get(idx2))); + } + } + } + } + + /** + * Compute the multiplication of two polynomials. The index in the array + * corresponds to the power of the entry. For example p[0] is the constant + * term of the polynomial p. + * + * @param p input polynomial + * @param q input polynomial + * @return polynomial represents p*q + */ + public int[] multiply(int[] p, int[] q) { + int len = p.length + q.length - 1; + int[] result = new int[len]; + for (int i = 0; i < len; i++) { + result[i] = 0; + } + for (int i = 0; i < p.length; i++) { + + for (int j = 0; j < q.length; j++) { + result[i + j] = add(result[i + j], multiply(p[i], q[j])); + } + } + return result; + } + + /** + * Compute the remainder of a dividend and divisor pair. The index in the + * array corresponds to the power of the entry. For example p[0] is the + * constant term of the polynomial p. + * + * @param dividend dividend polynomial, the remainder will be placed + * here when return + * @param divisor divisor polynomial + */ + public void remainder(int[] dividend, int[] divisor) { + for (int i = dividend.length - divisor.length; i >= 0; i--) { + int ratio = divTable[dividend[i + + divisor.length - 1]][divisor[divisor.length - 1]]; + for (int j = 0; j < divisor.length; j++) { + int k = j + i; + dividend[k] = dividend[k] ^ mulTable[ratio][divisor[j]]; + } + } + } + + /** + * Compute the sum of two polynomials. The index in the array corresponds to + * the power of the entry. For example p[0] is the constant term of the + * polynomial p. + * + * @param p input polynomial + * @param q input polynomial + * @return polynomial represents p+q + */ + public int[] add(int[] p, int[] q) { + int len = Math.max(p.length, q.length); + int[] result = new int[len]; + for (int i = 0; i < len; i++) { + if (i < p.length && i < q.length) { + result[i] = add(p[i], q[i]); + } else if (i < p.length) { + result[i] = p[i]; + } else { + result[i] = q[i]; + } + } + return result; + } + + /** + * Substitute x into polynomial p(x). + * + * @param p input polynomial + * @param x input field + * @return p(x) + */ + public int substitute(int[] p, int x) { + int result = 0; + int y = 1; + for (int i = 0; i < p.length; i++) { + result = result ^ mulTable[p[i]][y]; + y = mulTable[x][y]; + } + return result; + } + + /** + * A "bulk" version of the substitute. + * Tends to be 2X faster than the "int" substitute in a loop. + * + * @param p input polynomial + * @param q store the return result + * @param x input field + */ + public void substitute(byte[][] p, byte[] q, int x) { + int y = 1; + for (int i = 0; i < p.length; i++) { + byte[] pi = p[i]; + for (int j = 0; j < pi.length; j++) { + int pij = pi[j] & 0x000000FF; + q[j] = (byte) (q[j] ^ mulTable[pij][y]); + } + y = mulTable[x][y]; + } + } + + /** + * A "bulk" version of the substitute. + * Tends to be 2X faster than the "int" substitute in a loop. + * + * @param p input polynomial + * @param offsets + * @param len + * @param q store the return result + * @param offset + * @param x input field + */ + public void substitute(byte[][] p, int[] offsets, + int len, byte[] q, int offset, int x) { + int y = 1, iIdx, oIdx; + for (int i = 0; i < p.length; i++) { + byte[] pi = p[i]; + for (iIdx = offsets[i], oIdx = offset; + iIdx < offsets[i] + len; iIdx++, oIdx++) { + int pij = pi != null ? pi[iIdx] & 0x000000FF : 0; + q[oIdx] = (byte) (q[oIdx] ^ mulTable[pij][y]); + } + y = mulTable[x][y]; + } + } + + /** + * A "bulk" version of the substitute, using ByteBuffer. + * Tends to be 2X faster than the "int" substitute in a loop. + * + * @param p input polynomial + * @param q store the return result + * @param x input field + */ + public void substitute(ByteBuffer[] p, int len, ByteBuffer q, int x) { + int y = 1, iIdx, oIdx; + for (int i = 0; i < p.length; i++) { + ByteBuffer pi = p[i]; + int pos = pi != null ? pi.position() : 0; + int limit = pi != null ? pi.limit() : len; + for (oIdx = q.position(), iIdx = pos; + iIdx < limit; iIdx++, oIdx++) { + int pij = pi != null ? pi.get(iIdx) & 0x000000FF : 0; + q.put(oIdx, (byte) (q.get(oIdx) ^ mulTable[pij][y])); + } + y = mulTable[x][y]; + } + } + + /** + * The "bulk" version of the remainder. + * Warning: This function will modify the "dividend" inputs. + */ + public void remainder(byte[][] dividend, int[] divisor) { + for (int i = dividend.length - divisor.length; i >= 0; i--) { + for (int j = 0; j < divisor.length; j++) { + for (int k = 0; k < dividend[i].length; k++) { + int ratio = divTable[dividend[i + divisor.length - 1][k] & + 0x00FF][divisor[divisor.length - 1]]; + dividend[j + i][k] = (byte) ((dividend[j + i][k] & 0x00FF) ^ + mulTable[ratio][divisor[j]]); + } + } + } + } + + /** + * The "bulk" version of the remainder. + * Warning: This function will modify the "dividend" inputs. + */ + public void remainder(byte[][] dividend, int[] offsets, + int len, int[] divisor) { + int idx1, idx2; + for (int i = dividend.length - divisor.length; i >= 0; i--) { + for (int j = 0; j < divisor.length; j++) { + for (idx2 = offsets[j + i], idx1 = offsets[i + divisor.length - 1]; + idx1 < offsets[i + divisor.length - 1] + len; + idx1++, idx2++) { + int ratio = divTable[dividend[i + divisor.length - 1][idx1] & + 0x00FF][divisor[divisor.length - 1]]; + dividend[j + i][idx2] = (byte) ((dividend[j + i][idx2] & 0x00FF) ^ + mulTable[ratio][divisor[j]]); + } + } + } + } + + /** + * The "bulk" version of the remainder, using ByteBuffer. + * Warning: This function will modify the "dividend" inputs. + */ + public void remainder(ByteBuffer[] dividend, int[] divisor) { + int idx1, idx2; + ByteBuffer b1, b2; + for (int i = dividend.length - divisor.length; i >= 0; i--) { + for (int j = 0; j < divisor.length; j++) { + b1 = dividend[i + divisor.length - 1]; + b2 = dividend[j + i]; + for (idx1 = b1.position(), idx2 = b2.position(); + idx1 < b1.limit(); idx1++, idx2++) { + int ratio = divTable[b1.get(idx1) & + 0x00FF][divisor[divisor.length - 1]]; + b2.put(idx2, (byte) ((b2.get(idx2) & 0x00FF) ^ + mulTable[ratio][divisor[j]])); + } + } + } + } + + /** + * Perform Gaussian elimination on the given matrix. This matrix has to be a + * fat matrix (number of rows > number of columns). + */ + public void gaussianElimination(int[][] matrix) { + assert(matrix != null && matrix.length > 0 && matrix[0].length > 0 + && matrix.length < matrix[0].length); + int height = matrix.length; + int width = matrix[0].length; + for (int i = 0; i < height; i++) { + boolean pivotFound = false; + // scan the column for a nonzero pivot and swap it to the diagonal + for (int j = i; j < height; j++) { + if (matrix[i][j] != 0) { + int[] tmp = matrix[i]; + matrix[i] = matrix[j]; + matrix[j] = tmp; + pivotFound = true; + break; + } + } + if (!pivotFound) { + continue; + } + int pivot = matrix[i][i]; + for (int j = i; j < width; j++) { + matrix[i][j] = divide(matrix[i][j], pivot); + } + for (int j = i + 1; j < height; j++) { + int lead = matrix[j][i]; + for (int k = i; k < width; k++) { + matrix[j][k] = add(matrix[j][k], multiply(lead, matrix[i][k])); + } + } + } + for (int i = height - 1; i >=0; i--) { + for (int j = 0; j < i; j++) { + int lead = matrix[j][i]; + for (int k = i; k < width; k++) { + matrix[j][k] = add(matrix[j][k], multiply(lead, matrix[i][k])); + } + } + } + } + +}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java new file mode 100644 index 0000000..8badf02 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder.util; + +/** + * Some utilities for Reed-Solomon coding. + */ +public class RSUtil { + + // We always use the byte system (with symbol size 8, field size 256, + // primitive polynomial 285, and primitive root 2). + public static GaloisField GF = GaloisField.getInstance(); + public static final int PRIMITIVE_ROOT = 2; + + public static int[] getPrimitivePower(int numDataUnits, int numParityUnits) { + int[] primitivePower = new int[numDataUnits + numParityUnits]; + // compute powers of the primitive root + for (int i = 0; i < numDataUnits + numParityUnits; i++) { + primitivePower[i] = GF.power(PRIMITIVE_ROOT, i); + } + return primitivePower; + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/BufferAllocator.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/BufferAllocator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/BufferAllocator.java new file mode 100644 index 0000000..8f552b7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/BufferAllocator.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + + +import java.nio.ByteBuffer; + +/** + * An abstract buffer allocator used for test. + */ +public abstract class BufferAllocator { + private boolean usingDirect = false; + + public BufferAllocator(boolean usingDirect) { + this.usingDirect = usingDirect; + } + + protected boolean isUsingDirect() { + return usingDirect; + } + + /** + * Allocate and return a ByteBuffer of specified length. + * @param bufferLen + * @return + */ + public abstract ByteBuffer allocate(int bufferLen); + + /** + * A simple buffer allocator that just uses ByteBuffer's + * allocate/allocateDirect API. + */ + public static class SimpleBufferAllocator extends BufferAllocator { + + public SimpleBufferAllocator(boolean usingDirect) { + super(usingDirect); + } + + @Override + public ByteBuffer allocate(int bufferLen) { + return isUsingDirect() ? ByteBuffer.allocateDirect(bufferLen) : + ByteBuffer.allocate(bufferLen); + } + } + + /** + * A buffer allocator that allocates a buffer from an existing large buffer by + * slice calling, but if no available space just degrades as + * SimpleBufferAllocator. So please ensure enough space for it. + */ + public static class SlicedBufferAllocator extends BufferAllocator { + private ByteBuffer overallBuffer; + + public SlicedBufferAllocator(boolean usingDirect, int totalBufferLen) { + super(usingDirect); + overallBuffer = isUsingDirect() ? + ByteBuffer.allocateDirect(totalBufferLen) : + ByteBuffer.allocate(totalBufferLen); + } + + @Override + public ByteBuffer allocate(int bufferLen) { + if (bufferLen > overallBuffer.capacity() - overallBuffer.position()) { + // If no available space for the requested length, then allocate new + return isUsingDirect() ? ByteBuffer.allocateDirect(bufferLen) : + ByteBuffer.allocate(bufferLen); + } + + overallBuffer.limit(overallBuffer.position() + bufferLen); + ByteBuffer result = overallBuffer.slice(); + overallBuffer.position(overallBuffer.position() + bufferLen); + return result; + } + } + +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestCoderBase.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestCoderBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestCoderBase.java new file mode 100644 index 0000000..8f277f4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestCoderBase.java @@ -0,0 +1,500 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.erasurecode.BufferAllocator.SimpleBufferAllocator; +import org.apache.hadoop.io.erasurecode.BufferAllocator.SlicedBufferAllocator; +import org.apache.hadoop.io.erasurecode.rawcoder.util.DumpUtil; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.Random; + +import static org.junit.Assert.assertTrue; + +/** + * Test base of common utilities for tests not only raw coders but also block + * coders. + */ +public abstract class TestCoderBase { + protected static Random RAND = new Random(); + + private boolean allowDump = true; + + private Configuration conf; + protected int numDataUnits; + protected int numParityUnits; + protected int baseChunkSize = 513; + private int chunkSize = baseChunkSize; + private BufferAllocator allocator; + + private byte[] zeroChunkBytes; + + private boolean startBufferWithZero = true; + + // Indexes of erased data units. + protected int[] erasedDataIndexes = new int[] {0}; + + // Indexes of erased parity units. + protected int[] erasedParityIndexes = new int[] {0}; + + // Data buffers are either direct or on-heap, for performance the two cases + // may go to different coding implementations. + protected boolean usingDirectBuffer = true; + + protected boolean usingFixedData = true; + // Using this the generated data can be repeatable across multiple calls to + // encode(), in order for troubleshooting. + private static int FIXED_DATA_GENERATOR = 0; + protected byte[][] fixedData; + + protected int getChunkSize() { + return chunkSize; + } + + protected void setChunkSize(int chunkSize) { + this.chunkSize = chunkSize; + this.zeroChunkBytes = new byte[chunkSize]; // With ZERO by default + } + + protected void prepareBufferAllocator(boolean usingSlicedBuffer) { + if (usingSlicedBuffer) { + int roughEstimationSpace = + chunkSize * (numDataUnits + numParityUnits) * 10; + allocator = new SlicedBufferAllocator(usingDirectBuffer, + roughEstimationSpace); + } else { + allocator = new SimpleBufferAllocator(usingDirectBuffer); + } + } + + /** + * Set true during setup if want to dump test settings and coding data, + * useful in debugging. + * @param allowDump + */ + protected void setAllowDump(boolean allowDump) { + this.allowDump = allowDump; + } + + /** + * Prepare before running the case. + * @param conf + * @param numDataUnits + * @param numParityUnits + * @param erasedDataIndexes + * @param erasedParityIndexes + * @param usingFixedData Using fixed or pre-generated data to test instead of + * generating data + */ + protected void prepare(Configuration conf, int numDataUnits, + int numParityUnits, int[] erasedDataIndexes, + int[] erasedParityIndexes, boolean usingFixedData) { + this.conf = conf; + this.numDataUnits = numDataUnits; + this.numParityUnits = numParityUnits; + this.erasedDataIndexes = erasedDataIndexes != null ? + erasedDataIndexes : new int[] {0}; + this.erasedParityIndexes = erasedParityIndexes != null ? + erasedParityIndexes : new int[] {0}; + this.usingFixedData = usingFixedData; + if (usingFixedData) { + prepareFixedData(); + } + } + + /** + * Prepare before running the case. + * @param conf + * @param numDataUnits + * @param numParityUnits + * @param erasedDataIndexes + * @param erasedParityIndexes + */ + protected void prepare(Configuration conf, int numDataUnits, + int numParityUnits, int[] erasedDataIndexes, + int[] erasedParityIndexes) { + prepare(conf, numDataUnits, numParityUnits, erasedDataIndexes, + erasedParityIndexes, false); + } + + /** + * Prepare before running the case. + * @param numDataUnits + * @param numParityUnits + * @param erasedDataIndexes + * @param erasedParityIndexes + */ + protected void prepare(int numDataUnits, int numParityUnits, + int[] erasedDataIndexes, int[] erasedParityIndexes) { + prepare(null, numDataUnits, numParityUnits, erasedDataIndexes, + erasedParityIndexes, false); + } + + /** + * Get the conf the test. + * @return configuration + */ + protected Configuration getConf() { + return this.conf; + } + + /** + * Compare and verify if erased chunks are equal to recovered chunks + * @param erasedChunks + * @param recoveredChunks + */ + protected void compareAndVerify(ECChunk[] erasedChunks, + ECChunk[] recoveredChunks) { + byte[][] erased = toArrays(erasedChunks); + byte[][] recovered = toArrays(recoveredChunks); + boolean result = Arrays.deepEquals(erased, recovered); + assertTrue("Decoding and comparing failed.", result); + } + + /** + * Adjust and return erased indexes altogether, including erased data indexes + * and parity indexes. + * @return erased indexes altogether + */ + protected int[] getErasedIndexesForDecoding() { + int[] erasedIndexesForDecoding = + new int[erasedParityIndexes.length + erasedDataIndexes.length]; + + int idx = 0; + + for (int i = 0; i < erasedParityIndexes.length; i++) { + erasedIndexesForDecoding[idx ++] = erasedParityIndexes[i]; + } + + for (int i = 0; i < erasedDataIndexes.length; i++) { + erasedIndexesForDecoding[idx ++] = erasedDataIndexes[i] + numParityUnits; + } + + return erasedIndexesForDecoding; + } + + /** + * Return input chunks for decoding, which is parityChunks + dataChunks. + * @param dataChunks + * @param parityChunks + * @return + */ + protected ECChunk[] prepareInputChunksForDecoding(ECChunk[] dataChunks, + ECChunk[] parityChunks) { + ECChunk[] inputChunks = new ECChunk[numParityUnits + numDataUnits]; + + int idx = 0; + for (int i = 0; i < numParityUnits; i++) { + inputChunks[idx ++] = parityChunks[i]; + } + for (int i = 0; i < numDataUnits; i++) { + inputChunks[idx ++] = dataChunks[i]; + } + + return inputChunks; + } + + /** + * Erase some data chunks to test the recovering of them. As they're erased, + * we don't need to read them and will not have the buffers at all, so just + * set them as null. + * @param dataChunks + * @param parityChunks + * @return clone of erased chunks + */ + protected ECChunk[] backupAndEraseChunks(ECChunk[] dataChunks, + ECChunk[] parityChunks) { + ECChunk[] toEraseChunks = new ECChunk[erasedParityIndexes.length + + erasedDataIndexes.length]; + + int idx = 0; + + for (int i = 0; i < erasedParityIndexes.length; i++) { + toEraseChunks[idx ++] = parityChunks[erasedParityIndexes[i]]; + parityChunks[erasedParityIndexes[i]] = null; + } + + for (int i = 0; i < erasedDataIndexes.length; i++) { + toEraseChunks[idx ++] = dataChunks[erasedDataIndexes[i]]; + dataChunks[erasedDataIndexes[i]] = null; + } + + return toEraseChunks; + } + + /** + * Erase data from the specified chunks, just setting them as null. + * @param chunks + */ + protected void eraseDataFromChunks(ECChunk[] chunks) { + for (int i = 0; i < chunks.length; i++) { + chunks[i] = null; + } + } + + /** + * Clone chunks along with copying the associated data. It respects how the + * chunk buffer is allocated, direct or non-direct. It avoids affecting the + * original chunk buffers. + * @param chunks + * @return + */ + protected ECChunk[] cloneChunksWithData(ECChunk[] chunks) { + ECChunk[] results = new ECChunk[chunks.length]; + for (int i = 0; i < chunks.length; i++) { + results[i] = cloneChunkWithData(chunks[i]); + } + + return results; + } + + /** + * Clone chunk along with copying the associated data. It respects how the + * chunk buffer is allocated, direct or non-direct. It avoids affecting the + * original chunk. + * @param chunk + * @return a new chunk + */ + protected ECChunk cloneChunkWithData(ECChunk chunk) { + ByteBuffer srcBuffer = chunk.getBuffer(); + + byte[] bytesArr = new byte[srcBuffer.remaining()]; + srcBuffer.mark(); + srcBuffer.get(bytesArr, 0, bytesArr.length); + srcBuffer.reset(); + + ByteBuffer destBuffer = allocateOutputBuffer(bytesArr.length); + int pos = destBuffer.position(); + destBuffer.put(bytesArr); + destBuffer.flip(); + destBuffer.position(pos); + + return new ECChunk(destBuffer); + } + + /** + * Allocate a chunk for output or writing. + * @return + */ + protected ECChunk allocateOutputChunk() { + ByteBuffer buffer = allocateOutputBuffer(chunkSize); + + return new ECChunk(buffer); + } + + /** + * Allocate a buffer for output or writing. It can prepare for two kinds of + * data buffers: one with position as 0, the other with position > 0 + * @return a buffer ready to write chunkSize bytes from current position + */ + protected ByteBuffer allocateOutputBuffer(int bufferLen) { + /** + * When startBufferWithZero, will prepare a buffer as:--------------- + * otherwise, the buffer will be like: ___TO--BE--WRITTEN___, + * and in the beginning, dummy data are prefixed, to simulate a buffer of + * position > 0. + */ + int startOffset = startBufferWithZero ? 0 : 11; // 11 is arbitrary + int allocLen = startOffset + bufferLen + startOffset; + ByteBuffer buffer = allocator.allocate(allocLen); + buffer.limit(startOffset + bufferLen); + fillDummyData(buffer, startOffset); + startBufferWithZero = ! startBufferWithZero; + + return buffer; + } + + /** + * Prepare data chunks for each data unit, by generating random data. + * @return + */ + protected ECChunk[] prepareDataChunksForEncoding() { + if (usingFixedData) { + ECChunk[] chunks = new ECChunk[numDataUnits]; + for (int i = 0; i < chunks.length; i++) { + chunks[i] = makeChunkUsingData(fixedData[i]); + } + return chunks; + } + + return generateDataChunks(); + } + + private ECChunk makeChunkUsingData(byte[] data) { + ECChunk chunk = allocateOutputChunk(); + ByteBuffer buffer = chunk.getBuffer(); + int pos = buffer.position(); + buffer.put(data, 0, chunkSize); + buffer.flip(); + buffer.position(pos); + + return chunk; + } + + private ECChunk[] generateDataChunks() { + ECChunk[] chunks = new ECChunk[numDataUnits]; + for (int i = 0; i < chunks.length; i++) { + chunks[i] = generateDataChunk(); + } + + return chunks; + } + + private void prepareFixedData() { + // We may load test data from a resource, or just generate randomly. + // The generated data will be used across subsequent encode/decode calls. + this.fixedData = new byte[numDataUnits][]; + for (int i = 0; i < numDataUnits; i++) { + fixedData[i] = generateFixedData(baseChunkSize * 2); + } + } + + /** + * Generate data chunk by making random data. + * @return + */ + protected ECChunk generateDataChunk() { + ByteBuffer buffer = allocateOutputBuffer(chunkSize); + int pos = buffer.position(); + buffer.put(generateData(chunkSize)); + buffer.flip(); + buffer.position(pos); + + return new ECChunk(buffer); + } + + /** + * Fill len of dummy data in the buffer at the current position. + * @param buffer + * @param len + */ + protected void fillDummyData(ByteBuffer buffer, int len) { + byte[] dummy = new byte[len]; + RAND.nextBytes(dummy); + buffer.put(dummy); + } + + protected byte[] generateData(int len) { + byte[] buffer = new byte[len]; + for (int i = 0; i < buffer.length; i++) { + buffer[i] = (byte) RAND.nextInt(256); + } + return buffer; + } + + protected byte[] generateFixedData(int len) { + byte[] buffer = new byte[len]; + for (int i = 0; i < buffer.length; i++) { + buffer[i] = (byte) FIXED_DATA_GENERATOR++; + if (FIXED_DATA_GENERATOR == 256) { + FIXED_DATA_GENERATOR = 0; + } + } + return buffer; + } + + /** + * Prepare parity chunks for encoding, each chunk for each parity unit. + * @return + */ + protected ECChunk[] prepareParityChunksForEncoding() { + ECChunk[] chunks = new ECChunk[numParityUnits]; + for (int i = 0; i < chunks.length; i++) { + chunks[i] = allocateOutputChunk(); + } + + return chunks; + } + + /** + * Prepare output chunks for decoding, each output chunk for each erased + * chunk. + * @return + */ + protected ECChunk[] prepareOutputChunksForDecoding() { + ECChunk[] chunks = new ECChunk[erasedDataIndexes.length + + erasedParityIndexes.length]; + + for (int i = 0; i < chunks.length; i++) { + chunks[i] = allocateOutputChunk(); + } + + return chunks; + } + + /** + * Convert an array of this chunks to an array of byte array. + * Note the chunk buffers are not affected. + * @param chunks + * @return an array of byte array + */ + protected byte[][] toArrays(ECChunk[] chunks) { + byte[][] bytesArr = new byte[chunks.length][]; + + for (int i = 0; i < chunks.length; i++) { + bytesArr[i] = chunks[i].toBytesArray(); + } + + return bytesArr; + } + + /** + * Dump all the settings used in the test case if allowDump is enabled. + */ + protected void dumpSetting() { + if (allowDump) { + StringBuilder sb = new StringBuilder("Erasure coder test settings:\n"); + sb.append(" numDataUnits=").append(numDataUnits); + sb.append(" numParityUnits=").append(numParityUnits); + sb.append(" chunkSize=").append(chunkSize).append("\n"); + + sb.append(" erasedDataIndexes="). + append(Arrays.toString(erasedDataIndexes)); + sb.append(" erasedParityIndexes="). + append(Arrays.toString(erasedParityIndexes)); + sb.append(" usingDirectBuffer=").append(usingDirectBuffer).append("\n"); + + System.out.println(sb.toString()); + } + } + + /** + * Dump chunks prefixed with a header if allowDump is enabled. + * @param header + * @param chunks + */ + protected void dumpChunks(String header, ECChunk[] chunks) { + if (allowDump) { + DumpUtil.dumpChunks(header, chunks); + } + } + + /** + * Make some chunk messy or not correct any more + * @param chunks + */ + protected void corruptSomeChunk(ECChunk[] chunks) { + int idx = new Random().nextInt(chunks.length); + ByteBuffer buffer = chunks[idx].getBuffer(); + if (buffer.hasRemaining()) { + buffer.position(buffer.position() + 1); + } + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestECSchema.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestECSchema.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestECSchema.java new file mode 100644 index 0000000..c362b96 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestECSchema.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +import org.junit.Test; +import static org.junit.Assert.assertEquals; +import java.util.HashMap; +import java.util.Map; + +public class TestECSchema { + + @Test + public void testGoodSchema() { + String schemaName = "goodSchema"; + int numDataUnits = 6; + int numParityUnits = 3; + String codec = "rs"; + String extraOption = "extraOption"; + String extraOptionValue = "extraOptionValue"; + + Map<String, String> options = new HashMap<String, String>(); + options.put(ECSchema.NUM_DATA_UNITS_KEY, String.valueOf(numDataUnits)); + options.put(ECSchema.NUM_PARITY_UNITS_KEY, String.valueOf(numParityUnits)); + options.put(ECSchema.CODEC_NAME_KEY, codec); + options.put(extraOption, extraOptionValue); + + ECSchema schema = new ECSchema(schemaName, options); + System.out.println(schema.toString()); + + assertEquals(schemaName, schema.getSchemaName()); + assertEquals(numDataUnits, schema.getNumDataUnits()); + assertEquals(numParityUnits, schema.getNumParityUnits()); + assertEquals(codec, schema.getCodecName()); + assertEquals(extraOptionValue, schema.getExtraOptions().get(extraOption)); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java new file mode 100644 index 0000000..50d2091 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestSchemaLoader.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.io.FileWriter; +import java.io.PrintWriter; +import java.util.List; + +import org.junit.Test; + +public class TestSchemaLoader { + + final static String TEST_DIR = new File(System.getProperty( + "test.build.data", "/tmp")).getAbsolutePath(); + + final static String SCHEMA_FILE = new File(TEST_DIR, "test-ecschema") + .getAbsolutePath(); + + @Test + public void testLoadSchema() throws Exception { + PrintWriter out = new PrintWriter(new FileWriter(SCHEMA_FILE)); + out.println("<?xml version=\"1.0\"?>"); + out.println("<schemas>"); + out.println(" <schema name=\"RSk6m3\">"); + out.println(" <numDataUnits>6</numDataUnits>"); + out.println(" <numParityUnits>3</numParityUnits>"); + out.println(" <codec>RS</codec>"); + out.println(" </schema>"); + out.println(" <schema name=\"RSk10m4\">"); + out.println(" <numDataUnits>10</numDataUnits>"); + out.println(" <numParityUnits>4</numParityUnits>"); + out.println(" <codec>RS</codec>"); + out.println(" </schema>"); + out.println("</schemas>"); + out.close(); + + SchemaLoader schemaLoader = new SchemaLoader(); + List<ECSchema> schemas = schemaLoader.loadSchema(SCHEMA_FILE); + + assertEquals(2, schemas.size()); + + ECSchema schema1 = schemas.get(0); + assertEquals("RSk6m3", schema1.getSchemaName()); + assertEquals(0, schema1.getExtraOptions().size()); + assertEquals(6, schema1.getNumDataUnits()); + assertEquals(3, schema1.getNumParityUnits()); + assertEquals("RS", schema1.getCodecName()); + + ECSchema schema2 = schemas.get(1); + assertEquals("RSk10m4", schema2.getSchemaName()); + assertEquals(0, schema2.getExtraOptions().size()); + assertEquals(10, schema2.getNumDataUnits()); + assertEquals(4, schema2.getNumParityUnits()); + assertEquals("RS", schema2.getCodecName()); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestErasureCoderBase.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestErasureCoderBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestErasureCoderBase.java new file mode 100644 index 0000000..738d28e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestErasureCoderBase.java @@ -0,0 +1,297 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.io.erasurecode.ECBlock; +import org.apache.hadoop.io.erasurecode.ECBlockGroup; +import org.apache.hadoop.io.erasurecode.ECChunk; +import org.apache.hadoop.io.erasurecode.TestCoderBase; + +import java.lang.reflect.Constructor; + +/** + * Erasure coder test base with utilities. + */ +public abstract class TestErasureCoderBase extends TestCoderBase { + protected Class<? extends ErasureCoder> encoderClass; + protected Class<? extends ErasureCoder> decoderClass; + + private ErasureCoder encoder; + private ErasureCoder decoder; + + protected int numChunksInBlock = 16; + + /** + * It's just a block for this test purpose. We don't use HDFS block here + * at all for simple. + */ + protected static class TestBlock extends ECBlock { + private ECChunk[] chunks; + + // For simple, just assume the block have the chunks already ready. + // In practice we need to read/write chunks from/to the block via file IO. + public TestBlock(ECChunk[] chunks) { + this.chunks = chunks; + } + } + + /** + * Generating source data, encoding, recovering and then verifying. + * RawErasureCoder mainly uses ECChunk to pass input and output data buffers, + * it supports two kinds of ByteBuffers, one is array backed, the other is + * direct ByteBuffer. Have usingDirectBuffer to indicate which case to test. + * @param usingDirectBuffer + */ + protected void testCoding(boolean usingDirectBuffer) { + this.usingDirectBuffer = usingDirectBuffer; + prepareCoders(); + + /** + * The following runs will use 3 different chunkSize for inputs and outputs, + * to verify the same encoder/decoder can process variable width of data. + */ + performTestCoding(baseChunkSize, true); + performTestCoding(baseChunkSize - 17, false); + performTestCoding(baseChunkSize + 16, true); + } + + private void performTestCoding(int chunkSize, boolean usingSlicedBuffer) { + setChunkSize(chunkSize); + prepareBufferAllocator(usingSlicedBuffer); + + // Generate data and encode + ECBlockGroup blockGroup = prepareBlockGroupForEncoding(); + // Backup all the source chunks for later recovering because some coders + // may affect the source data. + TestBlock[] clonedDataBlocks = + cloneBlocksWithData((TestBlock[]) blockGroup.getDataBlocks()); + TestBlock[] parityBlocks = (TestBlock[]) blockGroup.getParityBlocks(); + + ErasureCodingStep codingStep; + codingStep = encoder.calculateCoding(blockGroup); + performCodingStep(codingStep); + // Erase specified sources but return copies of them for later comparing + TestBlock[] backupBlocks = backupAndEraseBlocks(clonedDataBlocks, parityBlocks); + + // Decode + blockGroup = new ECBlockGroup(clonedDataBlocks, blockGroup.getParityBlocks()); + codingStep = decoder.calculateCoding(blockGroup); + performCodingStep(codingStep); + + // Compare + compareAndVerify(backupBlocks, codingStep.getOutputBlocks()); + } + + /** + * This is typically how a coding step should be performed. + * @param codingStep + */ + private void performCodingStep(ErasureCodingStep codingStep) { + // Pretend that we're opening these input blocks and output blocks. + ECBlock[] inputBlocks = codingStep.getInputBlocks(); + ECBlock[] outputBlocks = codingStep.getOutputBlocks(); + // We allocate input and output chunks accordingly. + ECChunk[] inputChunks = new ECChunk[inputBlocks.length]; + ECChunk[] outputChunks = new ECChunk[outputBlocks.length]; + + for (int i = 0; i < numChunksInBlock; ++i) { + // Pretend that we're reading input chunks from input blocks. + for (int j = 0; j < inputBlocks.length; ++j) { + inputChunks[j] = ((TestBlock) inputBlocks[j]).chunks[i]; + } + + // Pretend that we allocate and will write output results to the blocks. + for (int j = 0; j < outputBlocks.length; ++j) { + outputChunks[j] = allocateOutputChunk(); + ((TestBlock) outputBlocks[j]).chunks[i] = outputChunks[j]; + } + + // Given the input chunks and output chunk buffers, just call it ! + codingStep.performCoding(inputChunks, outputChunks); + } + + codingStep.finish(); + } + + /** + * Compare and verify if recovered blocks data are the same with the erased + * blocks data. + * @param erasedBlocks + * @param recoveredBlocks + */ + protected void compareAndVerify(ECBlock[] erasedBlocks, + ECBlock[] recoveredBlocks) { + for (int i = 0; i < erasedBlocks.length; ++i) { + compareAndVerify(((TestBlock) erasedBlocks[i]).chunks, ((TestBlock) recoveredBlocks[i]).chunks); + } + } + + private void prepareCoders() { + if (encoder == null) { + encoder = createEncoder(); + } + + if (decoder == null) { + decoder = createDecoder(); + } + } + + /** + * Create the raw erasure encoder to test + * @return + */ + protected ErasureCoder createEncoder() { + ErasureCoder encoder; + try { + Constructor<? extends ErasureCoder> constructor = + (Constructor<? extends ErasureCoder>) + encoderClass.getConstructor(int.class, int.class); + encoder = constructor.newInstance(numDataUnits, numParityUnits); + } catch (Exception e) { + throw new RuntimeException("Failed to create encoder", e); + } + + encoder.setConf(getConf()); + return encoder; + } + + /** + * create the raw erasure decoder to test + * @return + */ + protected ErasureCoder createDecoder() { + ErasureCoder decoder; + try { + Constructor<? extends ErasureCoder> constructor = + (Constructor<? extends ErasureCoder>) + decoderClass.getConstructor(int.class, int.class); + decoder = constructor.newInstance(numDataUnits, numParityUnits); + } catch (Exception e) { + throw new RuntimeException("Failed to create decoder", e); + } + + decoder.setConf(getConf()); + return decoder; + } + + /** + * Prepare a block group for encoding. + * @return + */ + protected ECBlockGroup prepareBlockGroupForEncoding() { + ECBlock[] dataBlocks = new TestBlock[numDataUnits]; + ECBlock[] parityBlocks = new TestBlock[numParityUnits]; + + for (int i = 0; i < numDataUnits; i++) { + dataBlocks[i] = generateDataBlock(); + } + + for (int i = 0; i < numParityUnits; i++) { + parityBlocks[i] = allocateOutputBlock(); + } + + return new ECBlockGroup(dataBlocks, parityBlocks); + } + + /** + * Generate random data and return a data block. + * @return + */ + protected ECBlock generateDataBlock() { + ECChunk[] chunks = new ECChunk[numChunksInBlock]; + + for (int i = 0; i < numChunksInBlock; ++i) { + chunks[i] = generateDataChunk(); + } + + return new TestBlock(chunks); + } + + /** + * Erase blocks to test the recovering of them. Before erasure clone them + * first so could return themselves. + * @param dataBlocks + * @return clone of erased dataBlocks + */ + protected TestBlock[] backupAndEraseBlocks(TestBlock[] dataBlocks, + TestBlock[] parityBlocks) { + TestBlock[] toEraseBlocks = new TestBlock[erasedDataIndexes.length + + erasedParityIndexes.length]; + int idx = 0; + TestBlock block; + + for (int i = 0; i < erasedParityIndexes.length; i++) { + block = parityBlocks[erasedParityIndexes[i]]; + toEraseBlocks[idx ++] = cloneBlockWithData(block); + eraseDataFromBlock(block); + } + + for (int i = 0; i < erasedDataIndexes.length; i++) { + block = dataBlocks[erasedDataIndexes[i]]; + toEraseBlocks[idx ++] = cloneBlockWithData(block); + eraseDataFromBlock(block); + } + + return toEraseBlocks; + } + + /** + * Allocate an output block. Note the chunk buffer will be allocated by the + * up caller when performing the coding step. + * @return + */ + protected TestBlock allocateOutputBlock() { + ECChunk[] chunks = new ECChunk[numChunksInBlock]; + + return new TestBlock(chunks); + } + + /** + * Clone blocks with data copied along with, avoiding affecting the original + * blocks. + * @param blocks + * @return + */ + protected TestBlock[] cloneBlocksWithData(TestBlock[] blocks) { + TestBlock[] results = new TestBlock[blocks.length]; + for (int i = 0; i < blocks.length; ++i) { + results[i] = cloneBlockWithData(blocks[i]); + } + + return results; + } + + /** + * Clone exactly a block, avoiding affecting the original block. + * @param block + * @return a new block + */ + protected TestBlock cloneBlockWithData(TestBlock block) { + ECChunk[] newChunks = cloneChunksWithData(block.chunks); + + return new TestBlock(newChunks); + } + + /** + * Erase data from a block. + */ + protected void eraseDataFromBlock(TestBlock theBlock) { + eraseDataFromChunks(theBlock.chunks); + theBlock.setErased(true); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestRSErasureCoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestRSErasureCoder.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestRSErasureCoder.java new file mode 100644 index 0000000..94f77db --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestRSErasureCoder.java @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.io.erasurecode.rawcoder.RSRawErasureCoderFactory; +import org.junit.Before; +import org.junit.Test; + +/** + * Test Reed-Solomon encoding and decoding. + */ +public class TestRSErasureCoder extends TestErasureCoderBase { + + @Before + public void setup() { + this.encoderClass = RSErasureEncoder.class; + this.decoderClass = RSErasureDecoder.class; + + this.numDataUnits = 10; + this.numParityUnits = 1; + + this.numChunksInBlock = 10; + } + + @Test + public void testCodingNoDirectBuffer_10x4_erasing_d0_p0() { + prepare(null, 10, 4, new int[] {0}, new int[] {0}); + /** + * Doing twice to test if the coders can be repeatedly reused. This matters + * as the underlying coding buffers are shared, which may have bugs. + */ + testCoding(false); + testCoding(false); + } + + @Test + public void testCodingDirectBufferWithConf_10x4_erasing_d0() { + /** + * This tests if the configuration items work or not. + */ + Configuration conf = new Configuration(); + conf.set(CommonConfigurationKeys.IO_ERASURECODE_CODEC_RS_RAWCODER_KEY, + RSRawErasureCoderFactory.class.getCanonicalName()); + prepare(conf, 10, 4, new int[]{0}, new int[0]); + + testCoding(true); + testCoding(true); + } + + @Test + public void testCodingDirectBuffer_10x4_erasing_p1() { + prepare(null, 10, 4, new int[]{}, new int[]{1}); + testCoding(true); + testCoding(true); + } + + @Test + public void testCodingDirectBuffer_10x4_erasing_d2() { + prepare(null, 10, 4, new int[] {2}, new int[] {}); + testCoding(true); + testCoding(true); + } + + @Test + public void testCodingDirectBuffer_10x4_erasing_d0_p0() { + prepare(null, 10, 4, new int[] {0}, new int[] {0}); + testCoding(true); + testCoding(true); + } + + @Test + public void testCodingBothBuffers_10x4_erasing_d0_p0() { + prepare(null, 10, 4, new int[] {0}, new int[] {0}); + + /** + * Doing in mixed buffer usage model to test if the coders can be repeatedly + * reused with different buffer usage model. This matters as the underlying + * coding buffers are shared, which may have bugs. + */ + testCoding(true); + testCoding(false); + testCoding(true); + testCoding(false); + } + + @Test + public void testCodingDirectBuffer_10x4_erasure_of_d2_d4_p0() { + prepare(null, 10, 4, new int[] {2, 4}, new int[] {0}); + testCoding(true); + } + + @Test + public void testCodingDirectBuffer_10x4_erasing_d0_d1_p0_p1() { + prepare(null, 10, 4, new int[] {0, 1}, new int[] {0, 1}); + testCoding(true); + } + + @Test + public void testCodingNoDirectBuffer_3x3_erasing_d0_p0() { + prepare(null, 3, 3, new int[] {0}, new int[] {0}); + testCoding(false); + } + + @Test + public void testCodingDirectBuffer_3x3_erasing_d0_p0() { + prepare(null, 3, 3, new int[] {0}, new int[] {0}); + testCoding(true); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestXORCoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestXORCoder.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestXORCoder.java new file mode 100644 index 0000000..06e0087 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/coder/TestXORCoder.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.coder; + +import org.junit.Before; +import org.junit.Test; + +/** + * Test XOR encoding and decoding. + */ +public class TestXORCoder extends TestErasureCoderBase { + + @Before + public void setup() { + this.encoderClass = XORErasureEncoder.class; + this.decoderClass = XORErasureDecoder.class; + + this.numDataUnits = 10; + this.numParityUnits = 1; + this.numChunksInBlock = 10; + } + + @Test + public void testCodingNoDirectBuffer_erasing_p0() { + prepare(null, 10, 1, new int[0], new int[] {0}); + + /** + * Doing twice to test if the coders can be repeatedly reused. This matters + * as the underlying coding buffers are shared, which may have bugs. + */ + testCoding(false); + testCoding(false); + } + + @Test + public void testCodingBothBuffers_erasing_d5() { + prepare(null, 10, 1, new int[]{5}, new int[0]); + + /** + * Doing in mixed buffer usage model to test if the coders can be repeatedly + * reused with different buffer usage model. This matters as the underlying + * coding buffers are shared, which may have bugs. + */ + testCoding(true); + testCoding(false); + testCoding(true); + testCoding(false); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java new file mode 100644 index 0000000..a35a4dd --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.junit.Before; +import org.junit.Test; + +/** + * Test raw Reed-solomon coder implemented in Java. + */ +public class TestRSRawCoder extends TestRSRawCoderBase { + + @Before + public void setup() { + this.encoderClass = RSRawEncoder.class; + this.decoderClass = RSRawDecoder.class; + setAllowDump(false); // Change to true to allow verbose dump for debugging + } + + @Test + public void testCoding_6x3_erasing_all_d() { + prepare(null, 6, 3, new int[]{0, 1, 2}, new int[0], true); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d0_d2() { + prepare(null, 6, 3, new int[] {0, 2}, new int[]{}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d0() { + prepare(null, 6, 3, new int[]{0}, new int[0]); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d2() { + prepare(null, 6, 3, new int[]{2}, new int[]{}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d0_p0() { + prepare(null, 6, 3, new int[]{0}, new int[]{0}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_all_p() { + prepare(null, 6, 3, new int[0], new int[]{0, 1, 2}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_p0() { + prepare(null, 6, 3, new int[0], new int[]{0}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_p2() { + prepare(null, 6, 3, new int[0], new int[]{2}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasure_p0_p2() { + prepare(null, 6, 3, new int[0], new int[]{0, 2}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d0_p0_p1() { + prepare(null, 6, 3, new int[]{0}, new int[]{0, 1}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d0_d2_p2() { + prepare(null, 6, 3, new int[]{0, 2}, new int[]{2}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCodingNegative_6x3_erasing_d2_d4() { + prepare(null, 6, 3, new int[]{2, 4}, new int[0]); + testCodingDoMixAndTwice(); + } + + @Test + public void testCodingNegative_6x3_erasing_too_many() { + prepare(null, 6, 3, new int[]{2, 4}, new int[]{0, 1}); + testCodingWithErasingTooMany(); + } + + @Test + public void testCoding_10x4_erasing_d0_p0() { + prepare(null, 10, 4, new int[] {0}, new int[] {0}); + testCodingDoMixAndTwice(); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java new file mode 100644 index 0000000..efde332 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil; + +/** + * Test base for raw Reed-solomon coders. + */ +public abstract class TestRSRawCoderBase extends TestRawCoderBase { + + private static int symbolSize = 0; + private static int symbolMax = 0; + + private static int RS_FIXED_DATA_GENERATOR = 0; + + static { + symbolSize = (int) Math.round(Math.log( + RSUtil.GF.getFieldSize()) / Math.log(2)); + symbolMax = (int) Math.pow(2, symbolSize); + } + + @Override + protected byte[] generateData(int len) { + byte[] buffer = new byte[len]; + for (int i = 0; i < buffer.length; i++) { + buffer[i] = (byte) RAND.nextInt(symbolMax); + } + return buffer; + } + + @Override + protected byte[] generateFixedData(int len) { + byte[] buffer = new byte[len]; + for (int i = 0; i < buffer.length; i++) { + buffer[i] = (byte) RS_FIXED_DATA_GENERATOR++; + if (RS_FIXED_DATA_GENERATOR == symbolMax) { + RS_FIXED_DATA_GENERATOR = 0; + } + } + return buffer; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRawCoderBase.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRawCoderBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRawCoderBase.java new file mode 100644 index 0000000..2b7a3c4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRawCoderBase.java @@ -0,0 +1,232 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.apache.hadoop.io.erasurecode.ECChunk; +import org.apache.hadoop.io.erasurecode.TestCoderBase; +import org.junit.Assert; +import org.junit.Test; + +import java.lang.reflect.Constructor; + +/** + * Raw coder test base with utilities. + */ +public abstract class TestRawCoderBase extends TestCoderBase { + protected Class<? extends RawErasureEncoder> encoderClass; + protected Class<? extends RawErasureDecoder> decoderClass; + private RawErasureEncoder encoder; + private RawErasureDecoder decoder; + + /** + * Doing twice to test if the coders can be repeatedly reused. This matters + * as the underlying coding buffers are shared, which may have bugs. + */ + protected void testCodingDoMixAndTwice() { + testCodingDoMixed(); + testCodingDoMixed(); + } + + /** + * Doing in mixed buffer usage model to test if the coders can be repeatedly + * reused with different buffer usage model. This matters as the underlying + * coding buffers are shared, which may have bugs. + */ + protected void testCodingDoMixed() { + testCoding(true); + testCoding(false); + } + + /** + * Generating source data, encoding, recovering and then verifying. + * RawErasureCoder mainly uses ECChunk to pass input and output data buffers, + * it supports two kinds of ByteBuffers, one is array backed, the other is + * direct ByteBuffer. Use usingDirectBuffer indicate which case to test. + * + * @param usingDirectBuffer + */ + protected void testCoding(boolean usingDirectBuffer) { + this.usingDirectBuffer = usingDirectBuffer; + prepareCoders(); + + /** + * The following runs will use 3 different chunkSize for inputs and outputs, + * to verify the same encoder/decoder can process variable width of data. + */ + performTestCoding(baseChunkSize, true, false, false); + performTestCoding(baseChunkSize - 17, false, false, false); + performTestCoding(baseChunkSize + 16, true, false, false); + } + + /** + * Similar to above, but perform negative cases using bad input for encoding. + * @param usingDirectBuffer + */ + protected void testCodingWithBadInput(boolean usingDirectBuffer) { + this.usingDirectBuffer = usingDirectBuffer; + prepareCoders(); + + try { + performTestCoding(baseChunkSize, false, true, false); + Assert.fail("Encoding test with bad input should fail"); + } catch (Exception e) { + // Expected + } + } + + /** + * Similar to above, but perform negative cases using bad output for decoding. + * @param usingDirectBuffer + */ + protected void testCodingWithBadOutput(boolean usingDirectBuffer) { + this.usingDirectBuffer = usingDirectBuffer; + prepareCoders(); + + try { + performTestCoding(baseChunkSize, false, false, true); + Assert.fail("Decoding test with bad output should fail"); + } catch (Exception e) { + // Expected + } + } + + @Test + public void testCodingWithErasingTooMany() { + try { + testCoding(true); + Assert.fail("Decoding test erasing too many should fail"); + } catch (Exception e) { + // Expected + } + + try { + testCoding(false); + Assert.fail("Decoding test erasing too many should fail"); + } catch (Exception e) { + // Expected + } + } + + private void performTestCoding(int chunkSize, boolean usingSlicedBuffer, + boolean useBadInput, boolean useBadOutput) { + setChunkSize(chunkSize); + prepareBufferAllocator(usingSlicedBuffer); + + dumpSetting(); + + // Generate data and encode + ECChunk[] dataChunks = prepareDataChunksForEncoding(); + if (useBadInput) { + corruptSomeChunk(dataChunks); + } + dumpChunks("Testing data chunks", dataChunks); + + ECChunk[] parityChunks = prepareParityChunksForEncoding(); + + // Backup all the source chunks for later recovering because some coders + // may affect the source data. + ECChunk[] clonedDataChunks = cloneChunksWithData(dataChunks); + + encoder.encode(dataChunks, parityChunks); + dumpChunks("Encoded parity chunks", parityChunks); + + // Backup and erase some chunks + ECChunk[] backupChunks = backupAndEraseChunks(clonedDataChunks, parityChunks); + + // Decode + ECChunk[] inputChunks = prepareInputChunksForDecoding( + clonedDataChunks, parityChunks); + + // Remove unnecessary chunks, allowing only least required chunks to be read. + ensureOnlyLeastRequiredChunks(inputChunks); + + ECChunk[] recoveredChunks = prepareOutputChunksForDecoding(); + if (useBadOutput) { + corruptSomeChunk(recoveredChunks); + } + + dumpChunks("Decoding input chunks", inputChunks); + decoder.decode(inputChunks, getErasedIndexesForDecoding(), recoveredChunks); + dumpChunks("Decoded/recovered chunks", recoveredChunks); + + // Compare + compareAndVerify(backupChunks, recoveredChunks); + } + + private void prepareCoders() { + if (encoder == null) { + encoder = createEncoder(); + } + + if (decoder == null) { + decoder = createDecoder(); + } + } + + private void ensureOnlyLeastRequiredChunks(ECChunk[] inputChunks) { + int leastRequiredNum = numDataUnits; + int erasedNum = erasedDataIndexes.length + erasedParityIndexes.length; + int goodNum = inputChunks.length - erasedNum; + int redundantNum = goodNum - leastRequiredNum; + + for (int i = 0; i < inputChunks.length && redundantNum > 0; i++) { + if (inputChunks[i] != null) { + inputChunks[i] = null; // Setting it null, not needing it actually + redundantNum--; + } + } + } + + /** + * Create the raw erasure encoder to test + * @return + */ + protected RawErasureEncoder createEncoder() { + RawErasureEncoder encoder; + try { + Constructor<? extends RawErasureEncoder> constructor = + (Constructor<? extends RawErasureEncoder>) + encoderClass.getConstructor(int.class, int.class); + encoder = constructor.newInstance(numDataUnits, numParityUnits); + } catch (Exception e) { + throw new RuntimeException("Failed to create encoder", e); + } + + encoder.setConf(getConf()); + return encoder; + } + + /** + * create the raw erasure decoder to test + * @return + */ + protected RawErasureDecoder createDecoder() { + RawErasureDecoder decoder; + try { + Constructor<? extends RawErasureDecoder> constructor = + (Constructor<? extends RawErasureDecoder>) + decoderClass.getConstructor(int.class, int.class); + decoder = constructor.newInstance(numDataUnits, numParityUnits); + } catch (Exception e) { + throw new RuntimeException("Failed to create decoder", e); + } + + decoder.setConf(getConf()); + return decoder; + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java new file mode 100644 index 0000000..48463ad --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.junit.Before; +import org.junit.Test; + +/** + * Test XOR encoding and decoding. + */ +public class TestXORRawCoder extends TestRawCoderBase { + + @Before + public void setup() { + this.encoderClass = XORRawEncoder.class; + this.decoderClass = XORRawDecoder.class; + } + + @Test + public void testCoding_10x1_erasing_d0() { + prepare(null, 10, 1, new int[] {0}, new int[0]); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_10x1_erasing_p0() { + prepare(null, 10, 1, new int[0], new int[] {0}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_10x1_erasing_d5() { + prepare(null, 10, 1, new int[]{5}, new int[0]); + testCodingDoMixAndTwice(); + } + + @Test + public void testCodingNegative_10x1_erasing_too_many() { + prepare(null, 10, 1, new int[]{2}, new int[]{0}); + testCodingWithErasingTooMany(); + } + + @Test + public void testCodingNegative_10x1_erasing_d5() { + prepare(null, 10, 1, new int[]{5}, new int[0]); + testCodingWithBadInput(true); + testCodingWithBadOutput(false); + testCodingWithBadInput(true); + testCodingWithBadOutput(false); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml index aeaa980..03b4a1c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml @@ -91,6 +91,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> <include>hdfs.proto</include> <include>encryption.proto</include> <include>inotify.proto</include> + <include>erasurecoding.proto</include> </includes> </source> <output>${project.build.directory}/generated-sources/java</output> http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java index 600c7ca..214e15d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java @@ -179,6 +179,18 @@ public interface HdfsClientConfigKeys { int THREADPOOL_SIZE_DEFAULT = 0; } + /** dfs.client.read.striped configuration properties */ + interface StripedRead { + String PREFIX = Read.PREFIX + "striped."; + + String THREADPOOL_SIZE_KEY = PREFIX + "threadpool.size"; + /** + * With default 6+3 schema, each normal read could span 6 DNs. So this + * default value accommodates 3 read streams + */ + int THREADPOOL_SIZE_DEFAULT = 18; + } + /** dfs.http.client configuration properties */ interface HttpClient { String PREFIX = "dfs.http.client."; http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index 8528999..e37f440 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.erasurecode.ECSchema; import org.apache.hadoop.io.retry.AtMostOnce; import org.apache.hadoop.io.retry.Idempotent; import org.apache.hadoop.security.KerberosInfo; @@ -1483,4 +1484,30 @@ public interface ClientProtocol { */ @Idempotent EventBatchList getEditsFromTxid(long txid) throws IOException; + + /** + * Create an erasure coding zone with specified schema, if any, otherwise + * default + */ + @AtMostOnce + void createErasureCodingZone(String src, ECSchema schema, int cellSize) + throws IOException; + + /** + * Gets list of ECSchemas loaded in Namenode + * + * @return Returns the list of ECSchemas loaded at Namenode + * @throws IOException + */ + @Idempotent + ECSchema[] getECSchemas() throws IOException; + + /** + * Get the information about the EC zone for the path + * + * @param src path to get the info for + * @throws IOException + */ + @Idempotent + ErasureCodingZone getErasureCodingZone(String src) throws IOException; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/bc2564af/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingZone.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingZone.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingZone.java new file mode 100644 index 0000000..655def3 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ErasureCodingZone.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdfs.protocol; + +import org.apache.hadoop.io.erasurecode.ECSchema; + +/** + * Information about the EC Zone at the specified path. + */ +public class ErasureCodingZone { + + private String dir; + private ECSchema schema; + private int cellSize; + + public ErasureCodingZone(String dir, ECSchema schema, int cellSize) { + this.dir = dir; + this.schema = schema; + this.cellSize = cellSize; + } + + /** + * Get directory of the EC zone. + * + * @return + */ + public String getDir() { + return dir; + } + + /** + * Get the schema for the EC Zone + * + * @return + */ + public ECSchema getSchema() { + return schema; + } + + /** + * Get cellSize for the EC Zone + */ + public int getCellSize() { + return cellSize; + } + + @Override + public String toString() { + return "Dir: " + getDir() + ", Schema: " + schema + ", cellSize: " + + cellSize; + } +}
