Repository: systemml Updated Branches: refs/heads/master c69bd7f30 -> 8ca61ae26
[SYSTEMML-1958] Performance sparse conv2d via transposed im2col-mm This patch improves the performance of sparse conv2d operations, where the input matrix is sparse. The traditional approach performs im2col on Xi to get dXi and computes the partial result with F %*% dXi, which relies on dense-sparse matrix multiplications that cause unnecessary overhead due to scattered writes in the inner loop. Instead, we now perform this operation as t(t(dXi) %*% t(F)), which uses sparse-dense matrix multiplications that exploit sparsity in an outer loop (skipping of entire rows in the rhs) and in a cache-conscious manner. The overhead for transpose operations is reduced by piggybacking t(dXi) and t(out) into the im2col and output copy as well as doing t(F) just once for all rows in the batch. On a cnn scoring scenario, with varying sparsity betweeen 0.02 and 0.1, this patch improved the matrix multiplication performance by ~2.5x and the end-to-end performance over the entire dataset from 1587s to 1086s. Furthermore, this patch also fixes and extends the existing sparse conv2d tests. So far, the respective scripts used a sparsity of 0.5 which left the data and weights in dense representation. We now use a sparsity of ~0.25. Additionally, this also includes a hardening of the output format handling of unary and binary operations (i.e., a more accurate decision if the released inputs cover the memory requirements of a potential format change). Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d641c224 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d641c224 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d641c224 Branch: refs/heads/master Commit: d641c22499b1f620d8cf4b6b8975bb1b9e3d1602 Parents: c69bd7f Author: Matthias Boehm <[email protected]> Authored: Fri Oct 13 19:29:40 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sat Oct 14 02:30:36 2017 -0700 ---------------------------------------------------------------------- .../cp/ComputationCPInstruction.java | 10 +- .../cp/MatrixMatrixArithmeticCPInstruction.java | 4 +- .../cp/MatrixScalarBuiltinCPInstruction.java | 2 +- .../LibMatrixCuDNNConvolutionAlgorithm.java | 3 +- .../LibMatrixDNNConv2dBackwardFilterHelper.java | 3 +- .../matrix/data/LibMatrixDNNConv2dHelper.java | 103 +++++++++-- .../runtime/matrix/data/LibMatrixDNNHelper.java | 19 ++- .../matrix/data/LibMatrixDNNIm2ColHelper.java | 161 ++++++++--------- .../runtime/matrix/data/LibMatrixReorg.java | 4 +- .../sysml/runtime/matrix/data/MatrixBlock.java | 17 +- .../functions/tensor/Conv2DTest.java | 171 ++++++++++++------- src/test/scripts/functions/tensor/Conv2DTest.R | 4 +- .../scripts/functions/tensor/Conv2DTest.dml | 4 +- 13 files changed, 321 insertions(+), 184 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java index 67dc051..572290d 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ComputationCPInstruction.java @@ -19,7 +19,6 @@ package org.apache.sysml.runtime.instructions.cp; -import org.apache.sysml.hops.OptimizerUtils; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.operators.Operator; @@ -54,11 +53,12 @@ public abstract class ComputationCPInstruction extends CPInstruction { return checkGuardedRepresentationChange(in1, null, out); } - protected boolean checkGuardedRepresentationChange( MatrixBlock in1, MatrixBlock in2, MatrixBlock out ) - { - double memDense = OptimizerUtils.estimateSize(out.getNumRows(), out.getNumColumns()); + protected boolean checkGuardedRepresentationChange( MatrixBlock in1, MatrixBlock in2, MatrixBlock out ) { double memIn1 = (in1 != null) ? in1.getInMemorySize() : 0; double memIn2 = (in2 != null) ? in2.getInMemorySize() : 0; - return ( memDense < memIn1 + memIn2 ); + double memReq = out.isInSparseFormat() ? + MatrixBlock.estimateSizeDenseInMemory(out.getNumRows(), out.getNumColumns()) : + MatrixBlock.estimateSizeSparseInMemory(out.getNumRows(), out.getNumColumns(), out.getSparsity()); + return ( memReq < memIn1 + memIn2 ); } } http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixMatrixArithmeticCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixMatrixArithmeticCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixMatrixArithmeticCPInstruction.java index 309fe07..6333f9b 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixMatrixArithmeticCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixMatrixArithmeticCPInstruction.java @@ -37,8 +37,8 @@ public class MatrixMatrixArithmeticCPInstruction extends ArithmeticBinaryCPInstr throws DMLRuntimeException { // Read input matrices - MatrixBlock inBlock1 = ec.getMatrixInput(input1.getName(), getExtendedOpcode()); - MatrixBlock inBlock2 = ec.getMatrixInput(input2.getName(), getExtendedOpcode()); + MatrixBlock inBlock1 = ec.getMatrixInput(input1.getName(), getExtendedOpcode()); + MatrixBlock inBlock2 = ec.getMatrixInput(input2.getName(), getExtendedOpcode()); // Perform computation using input matrices, and produce the result matrix BinaryOperator bop = (BinaryOperator) _optr; http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixScalarBuiltinCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixScalarBuiltinCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixScalarBuiltinCPInstruction.java index dcacb21..35c5177 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixScalarBuiltinCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixScalarBuiltinCPInstruction.java @@ -43,7 +43,7 @@ public class MatrixScalarBuiltinCPInstruction extends BuiltinBinaryCPInstruction MatrixBlock inBlock = ec.getMatrixInput(mat.getName(), getExtendedOpcode()); ScalarObject constant = (ScalarObject) ec.getScalarInput(scalar.getName(), scalar.getValueType(), scalar.isLiteral()); - ScalarOperator sc_op = (ScalarOperator) _optr; + ScalarOperator sc_op = (ScalarOperator) _optr; sc_op.setConstant(constant.getDoubleValue()); MatrixBlock retBlock = (MatrixBlock) inBlock.scalarOperations(sc_op, new MatrixBlock()); http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java index 0378c7a..f49433d 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java @@ -25,7 +25,6 @@ import org.apache.sysml.runtime.instructions.gpu.context.GPUContext; import org.apache.sysml.utils.GPUStatistics; import jcuda.Pointer; -import jcuda.jcudnn.cudnnConvolutionBwdDataPreference; import jcuda.jcudnn.cudnnConvolutionBwdFilterPreference; import jcuda.jcudnn.cudnnConvolutionDescriptor; import jcuda.jcudnn.cudnnConvolutionFwdPreference; @@ -218,7 +217,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements java.lang.AutoCloseab public static LibMatrixCuDNNConvolutionAlgorithm cudnnGetConvolutionBackwardDataAlgorithm( GPUContext gCtx, String instName, int N, int C, int H, int W, int K, int R, int S, int pad_h, int pad_w, int stride_h, int stride_w, int P, int Q, long workspaceLimit) throws DMLRuntimeException { - long t1 = GPUStatistics.DISPLAY_STATISTICS ? System.nanoTime() : 0; + //long t1 = GPUStatistics.DISPLAY_STATISTICS ? System.nanoTime() : 0; LibMatrixCuDNNConvolutionAlgorithm ret = new LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S, pad_h, pad_w, stride_h, stride_w, P, Q); http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dBackwardFilterHelper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dBackwardFilterHelper.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dBackwardFilterHelper.java index a135f62..b89be82 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dBackwardFilterHelper.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dBackwardFilterHelper.java @@ -21,6 +21,7 @@ package org.apache.sysml.runtime.matrix.data; import java.util.concurrent.Callable; import org.apache.sysml.api.DMLScript; +import org.apache.sysml.runtime.matrix.data.LibMatrixDNNIm2ColHelper.Im2colWorker; import org.apache.sysml.runtime.util.ConvolutionUtils; import org.apache.sysml.utils.NativeHelper; @@ -86,7 +87,7 @@ public class LibMatrixDNNConv2dBackwardFilterHelper { MatrixBlock im2ColOutBlock = new MatrixBlock(CRS, PQ, false); MatrixBlock dout_reshaped = new MatrixBlock(PQ, K, false); dout_reshaped.allocateDenseBlock(); - LibMatrixDNNIm2ColHelper.Im2colWorker im2ColWorker = LibMatrixDNNIm2ColHelper.Im2colWorker.getWorker( _params.input1, im2ColOutBlock, _params, true); + Im2colWorker im2ColWorker = Im2colWorker.getWorker( _params.input1, im2ColOutBlock, _params, true, false); LibMatrixDNNRotate180Helper.Rotate180Worker rotate180Worker = LibMatrixDNNRotate180Helper.Rotate180Worker.getWorker( dout, dout_reshaped.getDenseBlock(), _params, true); double [] partialRetBlock = new double[CRS*_params.K]; http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dHelper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dHelper.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dHelper.java index 876996f..dd44de2 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dHelper.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2dHelper.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.concurrent.Callable; import org.apache.sysml.api.DMLScript; +import org.apache.sysml.runtime.matrix.data.LibMatrixDNNIm2ColHelper.Im2colWorker; import org.apache.sysml.utils.NativeHelper; /** @@ -33,11 +34,13 @@ public class LibMatrixDNNConv2dHelper { * Performs convolution via: partialCopy1(filter %*% im2col(input)) = output. * This operator has less memory pressure than LoopedIm2ColConv2dAllChannels. */ - public static class LoopedIm2ColConv2dOneChannel implements Callable<Long> + public static class LoopedIm2ColConv2dOneChan implements Callable<Long> { - public int _rl; public int _ru; - private final ConvolutionParameters _params; ArrayList<MatrixBlock> _filters; - public LoopedIm2ColConv2dOneChannel(int rl, int ru, ConvolutionParameters params, ArrayList<MatrixBlock> filters) { + protected final int _rl, _ru; + protected final ConvolutionParameters _params; + protected final ArrayList<MatrixBlock> _filters; + + public LoopedIm2ColConv2dOneChan(int rl, int ru, ConvolutionParameters params, ArrayList<MatrixBlock> filters) { _rl = rl; _ru = ru; _params = params; _filters = filters; @@ -48,7 +51,7 @@ public class LibMatrixDNNConv2dHelper { int PQ = _params.P*_params.Q; int K = _params.K; int RS = _params.R*_params.S; MatrixBlock im2ColOutBlock = new MatrixBlock(RS, PQ, false); - LibMatrixDNNIm2ColHelper.Im2colWorker im2ColWorker = LibMatrixDNNIm2ColHelper.Im2colWorker.getWorker( _params.input1, im2ColOutBlock, _params, false); + Im2colWorker im2ColWorker = Im2colWorker.getWorker( _params.input1, im2ColOutBlock, _params, false, false); long time1 = 0; long time2 = 0; for(int n = _rl; n < _ru; n++) { for(int c = 0; c < _params.C; c++) { @@ -115,22 +118,22 @@ public class LibMatrixDNNConv2dHelper { /** * Performs convolution via: partialCopy1(filter %*% im2col(input)) = output */ - public static class LoopedIm2ColConv2dAllChannels implements Callable<Long> + public static class LoopedIm2ColConv2dAllChan implements Callable<Long> { - public int _rl; public int _ru; - private final ConvolutionParameters _params; - public LoopedIm2ColConv2dAllChannels(int rl, int ru, ConvolutionParameters params) { + protected final int _rl, _ru; + protected final ConvolutionParameters _params; + + public LoopedIm2ColConv2dAllChan(int rl, int ru, ConvolutionParameters params) { _rl = rl; _ru = ru; _params = params; } @Override public Long call() throws Exception { - int PQ = _params.P*_params.Q; int K = _params.K; int CRS = _params.C*_params.R*_params.S; + final int PQ = _params.P*_params.Q, K = _params.K, CRS = _params.C*_params.R*_params.S; MatrixBlock outIm2col = new MatrixBlock(CRS, PQ, false); MatrixBlock outMM = new MatrixBlock(K, PQ, false); - LibMatrixDNNIm2ColHelper.Im2colWorker im2ColWorker = - LibMatrixDNNIm2ColHelper.Im2colWorker.getWorker( _params.input1, outIm2col, _params, true); + Im2colWorker im2ColWorker = Im2colWorker.getWorker( _params.input1, outIm2col, _params, true, false); long time1 = 0; long time2 = 0; for(int n = _rl; n < _ru; n++) { // im2col(input) => _im2ColOutBlock @@ -189,6 +192,82 @@ public class LibMatrixDNNConv2dHelper { } } + /** + * This implementation is similar to LoopedIm2ColConv2dAllChan, except for using a + * sparse-dense matrix multiplication with t(t(Xi) %*% t(F)) instead of a + * dense-sparse matrix multiplication with Xi %*% F. + * + * NOTE: this implementation assumes that the filter is passed in transposed form + * in order to share this temporary matrix (and its creation cost) across threads. + */ + public static class LoopedIm2ColConv2dTransAllChan extends LoopedIm2ColConv2dAllChan + { + public LoopedIm2ColConv2dTransAllChan(int rl, int ru, ConvolutionParameters params) { + super(rl, ru, params); + } + + @Override + public Long call() throws Exception { + final int PQ = _params.P*_params.Q, K = _params.K, CRS = _params.C*_params.R*_params.S; + MatrixBlock outIm2col = new MatrixBlock(PQ, CRS, false); + MatrixBlock outMM = new MatrixBlock(PQ, K, false); + Im2colWorker im2ColWorker = Im2colWorker.getWorker( _params.input1, outIm2col, _params, true, true); + + for(int n = _rl; n < _ru; n++) { + // im2col(input) => _im2ColOutBlock + im2ColWorker.execute(n); + + // t(_im2ColOutBlock) %*% t(filter) => t(matMultOutBlock) + outMM.reset(outMM.rlen, outMM.clen, false); + LibMatrixDNNHelper.singleThreadedMatMult(outIm2col, _params.input2, outMM, false, true, _params); + + // Copy the matrix matMultOutBlock of shape [K X PQ] to params.output.denseBlock + destPos + partialCopyTrans(outMM, _params.output, n*K*PQ, K, PQ); + + // Add bias to current row if necessary, always dense + if(_params.bias != null) + LibMatrixDNNHelper.addBias(n, _params.output.getDenseBlock(), _params.bias.getDenseBlock(), K, PQ); + } + + //multi-threaded nnz maintenance of current working set + return _params.output.recomputeNonZeros(_rl, _ru-1); + } + + private static void partialCopyTrans(MatrixBlock src, MatrixBlock dest, int destPos, int K, int PQ) { + if( src.isEmptyBlock() ) + return; + //copy src into its destination row w/ piggybacked transpose + //src is [PQ x K] -> [K x PQ] -> [1 x KPQ] + if(src.isInSparseFormat()) { + SparseBlock sblock = src.sparseBlock; + double[] c = dest.denseBlock; + for(int i = 0; i < src.getNumRows(); i++) { + if( sblock.isEmpty(i) ) continue; + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + int desPosK = destPos + i; + for(int j = apos; j < apos+alen; j++) + c[desPosK+aix[j]*PQ] = avals[j]; + } + } + else { + double[] a = src.denseBlock; + double[] c = dest.denseBlock; + final int blocksizeIJ = 128; //128KB for L2 + //cache-conscious blocked execution + for( int bi = 0; bi < PQ; bi+=blocksizeIJ ) + for( int bj = 0; bj < K; bj+=blocksizeIJ ) { + int bimin = Math.min(bi+blocksizeIJ, PQ); + int bjmin = Math.min(bj+blocksizeIJ, K); + //core transpose operation + for(int i=bi, aix=bi*K+bj, cix=bj*PQ+bi; i<bimin; i++, aix+=K, cix++) + LibMatrixReorg.transposeRow(a, c, aix, destPos+cix, PQ, bjmin-bj); + } + } + } + } /** * This operator is used only if native is enabled, filter is dense and input is sparse http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java index 276a78e..b80a786 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java @@ -125,15 +125,28 @@ public class LibMatrixDNNHelper { filters = splitFilter(params); } - boolean isEmptyDenseInput = !params.input1.isInSparseFormat() && params.input1.denseBlock == null; + MatrixBlock in1 = params.input1; + boolean isEmptyDenseInput = !in1.isInSparseFormat() && in1.denseBlock == null; + boolean isTransPref = in1.sparse && !params.input2.sparse && + MatrixBlock.evalSparseFormatInMemory(in1.clen, in1.rlen, in1.nonZeros); + + //transpose filter once for efficient sparse-dense multiplies in LoopedIm2ColConv2dTransAllChan + //in order to share the temporary object and its creation costs across threads + if( !LibMatrixDNN.isEligibleForConv2dSparse(params) + && !isEmptyDenseInput && allChannels && isTransPref ) { + params.input2 = LibMatrixReorg.transpose(params.input2, + new MatrixBlock(params.input2.clen, params.input2.rlen, false), k); + } for(int i = 0; i*taskSize < params.N; i++) { if(LibMatrixDNN.isEligibleForConv2dSparse(params)) ret.add(new LibMatrixDNNConv2dHelper.SparseNativeConv2d(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); + else if(!isEmptyDenseInput && allChannels && isTransPref) + ret.add(new LibMatrixDNNConv2dHelper.LoopedIm2ColConv2dTransAllChan(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); else if(!isEmptyDenseInput && allChannels) - ret.add(new LibMatrixDNNConv2dHelper.LoopedIm2ColConv2dAllChannels(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); + ret.add(new LibMatrixDNNConv2dHelper.LoopedIm2ColConv2dAllChan(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); else if(!isEmptyDenseInput && !allChannels) - ret.add(new LibMatrixDNNConv2dHelper.LoopedIm2ColConv2dOneChannel(i*taskSize, Math.min((i+1)*taskSize, params.N), params, filters)); + ret.add(new LibMatrixDNNConv2dHelper.LoopedIm2ColConv2dOneChan(i*taskSize, Math.min((i+1)*taskSize, params.N), params, filters)); else throw new DMLRuntimeException("Unsupported operator"); } http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2ColHelper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2ColHelper.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2ColHelper.java index 3296c7f..7b43257 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2ColHelper.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2ColHelper.java @@ -31,7 +31,7 @@ public class LibMatrixDNNIm2ColHelper { static interface Im2colWorker { public void execute(int n); public void execute(int n, int c); - public static Im2colWorker getWorker(MatrixBlock input, MatrixBlock out, ConvolutionParameters params, boolean allChannels) { + public static Im2colWorker getWorker(MatrixBlock input, MatrixBlock out, ConvolutionParameters params, boolean allChannels, boolean trans) { if(!input.isInSparseFormat()) { boolean stride1Pad0 = params.stride_h == 1 && params.stride_w == 1 && params.pad_h == 0 && params.pad_w == 0; @@ -58,10 +58,11 @@ public class LibMatrixDNNIm2ColHelper { //preallocate sparse-rows double sparsity = Math.min(MatrixBlock.SPARSITY_TURN_POINT, (input.getNonZeros()*2.0) / (input.getNumRows()*input.getNumColumns())); + int estnnz = (int)Math.ceil((trans ? params.C*params.R*params.S : params.P*params.Q)*sparsity); for(int r = 0; r < out.rlen; r++) - out.getSparseBlock().allocate(r, (int)Math.ceil(params.P*params.Q*sparsity)); + out.getSparseBlock().allocate(r, estnnz); - return new SparseSparseIm2colWorkerAllChannels(input, out, params); + return new SparseSparseIm2colWorkerAllChan(input, out, params, trans); } } } @@ -203,7 +204,7 @@ public class LibMatrixDNNIm2ColHelper { /** * Performing dense im2col (general case) */ - static class DenseIm2colWorkerAllChannels implements Im2colWorker { + private static class DenseIm2colWorkerAllChannels implements Im2colWorker { double [] inputArray; double [] outputArray; int CRS; int S; int R; int P; int Q; int CHW; int H; int W; int stride_h; int stride_w; int pad_h; int pad_w; @@ -252,17 +253,19 @@ public class LibMatrixDNNIm2ColHelper { /** * Performing sparse im2col for all channels for a given row n of the input matrix. */ - private static class SparseSparseIm2colWorkerAllChannels implements Im2colWorker { - final MatrixBlock input, output; - final int S, R, P, Q, W, HW; - final int stride_h, stride_w, pad_h, pad_w; - public SparseSparseIm2colWorkerAllChannels(MatrixBlock input, MatrixBlock im2ColOutBlock, ConvolutionParameters params) { + private static class SparseSparseIm2colWorkerAllChan implements Im2colWorker { + private final MatrixBlock input, output; + private final int S, R, P, Q, W, HW; + private final int stride_h, stride_w, pad_h, pad_w; + private final boolean trans; + public SparseSparseIm2colWorkerAllChan(MatrixBlock input, MatrixBlock im2ColOutBlock, ConvolutionParameters params, boolean trans) { this.input = input; this.output = im2ColOutBlock; this.HW = params.H * params.W; this.W = params.W; this.R = params.R; this.S = params.S; this.P = params.P; this.Q = params.Q; this.stride_h = params.stride_h; this.stride_w = params.stride_w; this.pad_h = params.pad_h; this.pad_w = params.pad_w; + this.trans = trans; if(!input.isInSparseFormat()) throw new RuntimeException("Incorrect operator selection. Expected dense input for SparseIm2colWorkerAllChannels"); } @@ -275,16 +278,13 @@ public class LibMatrixDNNIm2ColHelper { @Override public void execute(int n) { output.reset(); - SparseBlock sblock = input.sparseBlock; - if( sblock.isEmpty(n) ) { + if( sblock.isEmpty(n) ) return; - } - - int apos = input.sparseBlock.pos(n); - int alen = input.sparseBlock.size(n); - int[] aix = input.sparseBlock.indexes(n); - double[] avals = input.sparseBlock.values(n); + int apos = sblock.pos(n); + int alen = sblock.size(n); + int[] aix = sblock.indexes(n); + double[] avals = sblock.values(n); // Iterate over the sparse block for(int j=apos; j<apos+alen; j++) { @@ -297,9 +297,69 @@ public class LibMatrixDNNIm2ColHelper { int wInput = chw % W; appendInputValueToIm2colOutput(output, cInput, hInput, wInput, avals[j], - R, S, P, Q, stride_h, stride_w, pad_h, pad_w); + R, S, P, Q, stride_h, stride_w, pad_h, pad_w, trans); } // Since the chw are appended in sorted order, no need to sort the output rows + // unless in trans mode, then sorting is needed + if( trans ) + output.sortSparseRows(); + } + } + + /** + * Performing sparse im2col for a given channel c and for a given row n of the input matrix. + */ + @SuppressWarnings("unused") + private static class SparseSparseIm2colWorker implements Im2colWorker { + private final MatrixBlock input, output; + private final int S, R, P, Q, W, HW; + private final int stride_h, stride_w, pad_h, pad_w; + final boolean trans; + + public SparseSparseIm2colWorker(MatrixBlock input, MatrixBlock im2ColOutBlock, ConvolutionParameters params, boolean trans) { + this.input = input; + this.output = im2ColOutBlock; + this.HW = params.H*params.W; + this.W = params.W; this.R = params.R; this.S = params.S; this.P = params.P; this.Q = params.Q; + this.stride_h = params.stride_h; this.stride_w = params.stride_w; + this.pad_h = params.pad_h; this.pad_w = params.pad_w; + this.trans = trans; + } + + @Override + public void execute(int n) { + throw new RuntimeException("Not supported"); + } + + @Override + public void execute(int n, int cInput) { + output.reset(); + SparseBlock sblock = input.sparseBlock; + if( sblock.isEmpty(n) ) + return; + int apos = sblock.pos(n); + int alen = sblock.size(n); + int[] aix = sblock.indexes(n); + double[] avals = sblock.values(n); + + // Iterate over the sparse block + for(int j=apos; j<apos+alen; j++) { + // Note: the input is of shape [N, CHW] + int chw = aix[j]; + + if(cInput == (chw / HW)) { + // Get individual zero-based c,h,w indexes from zero-based 'chw' + int hInput = (chw - cInput*HW)/W; + int wInput = chw % W; + + appendInputValueToIm2colOutput(output, cInput, hInput, wInput, avals[j], + R, S, P, Q, stride_h, stride_w, pad_h, pad_w, trans); + } + } + // Since the chw are appended in sorted order, no need to sort the output rows + // unless in trans mode, then sorting is needed + if( trans ) + output.sortSparseRows(); } } @@ -321,7 +381,7 @@ public class LibMatrixDNNIm2ColHelper { * @param pad_w pad width */ private static void appendInputValueToIm2colOutput(MatrixBlock output, int cInput, int hInput, int wInput, double value, - int R, int S, int P, int Q, int stride_h, int stride_w, int pad_h, int pad_w) { + int R, int S, int P, int Q, int stride_h, int stride_w, int pad_h, int pad_w, boolean trans) { if(value == 0) return; int RS = R*S; @@ -350,67 +410,12 @@ public class LibMatrixDNNIm2ColHelper { for(int s = sMin; s <= sMax; s += stride_w) { int q = (wInput - s + pad_w) / stride_w; // chw -> [crs, pq] - output.appendValue(outRowIndex + s, pQ + q, value); - // Since the chw are appended in sorted order, no need to sort the output rows - } - } - } - - /** - * Performing sparse im2col for a given channel c and for a given row n of the input matrix. - */ - static class SparseSparseIm2colWorker implements Im2colWorker { - MatrixBlock input; MatrixBlock output; - int CRS; int S; int R; int P; int Q; int H; int W; int HW; int RS; - int stride_h; int stride_w; int pad_h; int pad_w; - public SparseSparseIm2colWorker(MatrixBlock input, MatrixBlock im2ColOutBlock, ConvolutionParameters params) { - this.input = input; - this.output = im2ColOutBlock; - this.CRS = params.C * params.R * params.S; - this.HW = params.H*params.W; - this.RS = params.R*params.S; - this.H = params.H; this.W = params.W; this.R = params.R; this.S = params.S; this.P = params.P; this.Q = params.Q; - this.stride_h = params.stride_h; this.stride_w = params.stride_w; - this.pad_h = params.pad_h; this.pad_w = params.pad_w; - } - - @Override - public void execute(int n) { - throw new RuntimeException("Not supported"); - } - - @Override - public void execute(int n, int cInput) { - if( !input.sparseBlock.isEmpty(n) ) { - output.sparseBlock.reset(); - output.setNonZeros(0); - int apos = input.sparseBlock.pos(n); - int alen = input.sparseBlock.size(n); - int[] aix = input.sparseBlock.indexes(n); - double[] avals = input.sparseBlock.values(n); - - // Iterate over the sparse block - for(int j=apos; j<apos+alen; j++) { - // Note: the input is of shape [N, CHW] - int chw = aix[j]; - - if(cInput == (chw / HW)) { - // Get individual zero-based c,h,w indexes from zero-based 'chw' - int hInput = (chw - cInput*HW)/W; - int wInput = chw % W; - - appendInputValueToIm2colOutput(output, cInput, hInput, wInput, avals[j], - R, S, P, Q, stride_h, stride_w, pad_h, pad_w); - } - } + if( trans ) + output.appendValue(pQ + q, outRowIndex + s, value); + else + output.appendValue(outRowIndex + s, pQ + q, value); // Since the chw are appended in sorted order, no need to sort the output rows - // if(meta.sortRows) output.sortSparseRows(); - } - else { - output.setNonZeros(0); } } - } - } http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java index 76002fe..8d7d4a5 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java @@ -948,7 +948,7 @@ public class LibMatrixReorg //compute rest (not aligned to 8-blocks) for( int j=0; j<bn; j++, aix++, cix+=n2 ) - c[ cix ] = a[ aix+0 ]; + c[ cix ] = a[ aix+0 ]; //unrolled 8-blocks for( int j=bn; j<len; j+=8, aix+=8, cix+=8*n2 ) @@ -960,7 +960,7 @@ public class LibMatrixReorg c[ cix + 4*n2 ] = a[ aix+4 ]; c[ cix + 5*n2 ] = a[ aix+5 ]; c[ cix + 6*n2 ] = a[ aix+6 ]; - c[ cix + 7*n2 ] = a[ aix+7 ]; + c[ cix + 7*n2 ] = a[ aix+7 ]; } } http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 66a5d9a..8117c04 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -112,7 +112,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab EMPTY_BLOCK, ULTRA_SPARSE_BLOCK, //ultra sparse representation, in-mem same as sparse SPARSE_BLOCK, //sparse representation, see sparseRows - DENSE_BLOCK, //dense representation, see denseBlock + DENSE_BLOCK, //dense representation, see denseBlock } //matrix meta data @@ -470,6 +470,10 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab return (nonZeros = nnz); } + public double getSparsity() { + return OptimizerUtils.getSparsity(rlen, clen, nonZeros); + } + public boolean isVector() { return (rlen == 1 || clen == 1); } @@ -776,7 +780,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab */ public void sortSparseRows() { if( !sparse || sparseBlock==null ) - return; + return; sparseBlock.sort(); } @@ -2413,10 +2417,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab //////// // Estimates size and sparsity - public long estimateSizeInMemory() - { - double sp = OptimizerUtils.getSparsity(rlen, clen, nonZeros); - return estimateSizeInMemory(rlen, clen, sp); + public long estimateSizeInMemory() { + return estimateSizeInMemory(rlen, clen, getSparsity()); } public static long estimateSizeInMemory(long nrows, long ncols, double sparsity) @@ -2607,9 +2609,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab if( !isAllocated() ) return 44; //in-memory size of dense/sparse representation - double sp = OptimizerUtils.getSparsity(rlen, clen, nonZeros); return !sparse ? estimateSizeDenseInMemory(rlen, clen) : - estimateSizeSparseInMemory(rlen, clen, sp, + estimateSizeSparseInMemory(rlen, clen, getSparsity(), SparseBlockFactory.getSparseBlockType(sparseBlock)); } http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java index 0a6bbb1..1341212 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java @@ -31,7 +31,6 @@ import org.junit.Test; public class Conv2DTest extends AutomatedTestBase { - private final static String TEST_NAME = "Conv2DTest"; private final static String TEST_DIR = "functions/tensor/"; private final static String TEST_CLASS_DIR = TEST_DIR + Conv2DTest.class.getSimpleName() + "/"; @@ -39,77 +38,134 @@ public class Conv2DTest extends AutomatedTestBase @Override public void setUp() { - addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, - new String[] {"B"})); + TestUtils.clearAssertionInformation(); + addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] {"B"})); } @Test - public void testConv2DDense1() - { + public void testConv2DDense1() { int numImg = 5; int imgSize = 3; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0; runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test - public void testConv2DDense2() - { + public void testConv2DDense2() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test - public void testConv2DDense3() - { + public void testConv2DDense3() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test - public void testConv2DDense4() - { + public void testConv2DDense4() { int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test - public void testConv2DDense5() - { + public void testConv2DDense5() { int numImg = 3; int imgSize = 8; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2; runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test - public void testConv2DDense6() - { + public void testConv2DDense6() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 1; int pad = 0; runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test - public void testConv2DDense7() - { - int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 1; int pad = 0; + public void testConv2DDense7() { + int numImg = 3; int imgSize = 64; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 1; int pad = 0; runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, false); } @Test - public void testConv2DSparse1() - { - int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + public void testConv2DSparse1a() { + int numImg = 64; int imgSize = 16; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); } @Test - public void testConv2DSparse2() - { - int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; + public void testConv2DSparse2a() { + int numImg = 64; int imgSize = 16; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); } - public void testConv2DSparse3() - { - int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; + @Test + public void testConv2DSparse3a() { + int numImg = 64; int imgSize = 16; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DSparse4a() { + int numImg = 64; int imgSize = 16; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DSparse5a() { + int numImg = 64; int imgSize = 16; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DSparse6a() { + int numImg = 64; int imgSize = 16; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 1; int pad = 0; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DSparse7a() { + int numImg = 64; int imgSize = 16; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, false); + } + + @Test + public void testConv2DSparse1b() { + int numImg = 64; int imgSize = 16; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DSparse2b() { + int numImg = 64; int imgSize = 16; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DSparse3b() { + int numImg = 64; int imgSize = 16; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DSparse4b() { + int numImg = 64; int imgSize = 16; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DSparse5b() { + int numImg = 64; int imgSize = 16; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DSparse6b() { + int numImg = 64; int imgSize = 16; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 1; int pad = 0; + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); + } + + @Test + public void testConv2DSparse7b() { + int numImg = 64; int imgSize = 16; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 1; int pad = 0; runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false, true); } @@ -192,66 +248,49 @@ public class Conv2DTest extends AutomatedTestBase runConv2DTest(ExecType.SPARK, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true, true); } - /** - * - * @param et - * @param sparse - */ public void runConv2DTest( ExecType et, int imgSize, int numImg, int numChannels, int numFilters, int filterSize, int stride, int pad, boolean sparse1, boolean sparse2) { - RUNTIME_PLATFORM oldRTP = rtplatform; - + RUNTIME_PLATFORM platformOld = rtplatform; + switch( et ){ + case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break; + case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break; + default: rtplatform = RUNTIME_PLATFORM.HYBRID; break; + } boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; + if( rtplatform == RUNTIME_PLATFORM.SPARK ) + DMLScript.USE_LOCAL_SPARK_CONFIG = true; try { - String sparseVal1 = (""+sparse1).toUpperCase(); - String sparseVal2 = (""+sparse2).toUpperCase(); - - TestConfiguration config = getTestConfiguration(TEST_NAME); - if(et == ExecType.SPARK) { - rtplatform = RUNTIME_PLATFORM.SPARK; - } - else { - rtplatform = (et==ExecType.MR)? RUNTIME_PLATFORM.HADOOP : RUNTIME_PLATFORM.SINGLE_NODE; - } - if( rtplatform == RUNTIME_PLATFORM.SPARK ) - DMLScript.USE_LOCAL_SPARK_CONFIG = true; - + String sparseVal1 = String.valueOf(sparse1).toUpperCase(); + String sparseVal2 = String.valueOf(sparse2).toUpperCase(); + TestConfiguration config = getTestConfiguration(TEST_NAME); loadTestConfiguration(config); - - /* This is for running the junit test the new way, i.e., construct the arguments directly */ + String RI_HOME = SCRIPT_DIR + TEST_DIR; fullDMLScriptName = RI_HOME + TEST_NAME + ".dml"; - - - programArgs = new String[]{"-explain", "recompile_runtime", "-args", "" + imgSize, "" + numImg, - "" + numChannels, "" + numFilters, - "" + filterSize, "" + stride, "" + pad, + programArgs = new String[]{"-explain", "recompile_runtime", "-args", + String.valueOf(imgSize), String.valueOf(numImg), + String.valueOf(numChannels), String.valueOf(numFilters), + String.valueOf(filterSize), String.valueOf(stride), String.valueOf(pad), output("B"), sparseVal1, sparseVal2}; - fullRScriptName = RI_HOME + TEST_NAME + ".R"; rCmd = "Rscript" + " " + fullRScriptName + " " + imgSize + " " + numImg + " " + numChannels + " " + numFilters + " " + filterSize + " " + stride + " " + pad + " " + expectedDir() + " " + sparseVal1 + " " + sparseVal2; - boolean exceptionExpected = false; - int expectedNumberOfJobs = -1; - runTest(true, exceptionExpected, null, expectedNumberOfJobs); - - // Run comparison R script + // Run DML and R scripts + runTest(true, false, null, -1); runRScript(true); - HashMap<CellIndex, Double> bHM = readRMatrixFromFS("B"); + HashMap<CellIndex, Double> bHM = readRMatrixFromFS("B"); HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("B"); TestUtils.compareMatrices(dmlfile, bHM, epsilon, "B-DML", "B-R"); - } - finally - { - rtplatform = oldRTP; + finally { + rtplatform = platformOld; DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; } } http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/test/scripts/functions/tensor/Conv2DTest.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/tensor/Conv2DTest.R b/src/test/scripts/functions/tensor/Conv2DTest.R index 6c6641f..dbe9ea4 100644 --- a/src/test/scripts/functions/tensor/Conv2DTest.R +++ b/src/test/scripts/functions/tensor/Conv2DTest.R @@ -33,13 +33,13 @@ x=matrix(seq(1, numImg*numChannels*imgSize*imgSize), numImg, numChannels*imgSize w=matrix(seq(1, numFilters*numChannels*filterSize*filterSize), numFilters, numChannels*filterSize*filterSize, byrow=TRUE) if(as.logical(args[9])) { - zero_mask = (x - mean(x)) > 0 + zero_mask = (x - mean(x)*1.5) > 0 x = x * zero_mask } else { x = x - mean(x) } if(as.logical(args[10])) { - zero_mask = (w - mean(w)) > 0 + zero_mask = (w - mean(w)*1.5) > 0 w = w * zero_mask } else { w = w - mean(w) http://git-wip-us.apache.org/repos/asf/systemml/blob/d641c224/src/test/scripts/functions/tensor/Conv2DTest.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/tensor/Conv2DTest.dml b/src/test/scripts/functions/tensor/Conv2DTest.dml index 2eedca8..8ba3dcf 100644 --- a/src/test/scripts/functions/tensor/Conv2DTest.dml +++ b/src/test/scripts/functions/tensor/Conv2DTest.dml @@ -32,14 +32,14 @@ w=matrix(seq(1, numFilters*numChannels*filterSize*filterSize), rows=numFilters, b=matrix(seq(1, numFilters), rows=numFilters, cols=1) if($9) { - zero_mask = (x - mean(x)) > 0 + zero_mask = (x - mean(x)*1.5) > 0 x = x * zero_mask } else { x = x - mean(x) } if($10) { - zero_mask = (w - mean(w)) > 0 + zero_mask = (w - mean(w)*1.5) > 0 w = w * zero_mask } else {
