Repository: incubator-systemml Updated Branches: refs/heads/master 623779912 -> e9aa58414
[SYSTEMML-540] Removed non-performing operators as well as avoided unnecessary sparse conversions - Removed im2col, col2im, rotate180, reshape_col as instructions - Improved performance of conv2d, conv2d_backward, conv2d_backward_filter - Converted sparse filters to dense Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/e9aa5841 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/e9aa5841 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/e9aa5841 Branch: refs/heads/master Commit: e9aa58414fcbcc39b9099e8722ab40e7c60a159f Parents: 6237799 Author: Niketan Pansare <[email protected]> Authored: Sun Aug 14 12:58:54 2016 -0700 Committer: Niketan Pansare <[email protected]> Committed: Sun Aug 14 12:58:54 2016 -0700 ---------------------------------------------------------------------- .../org/apache/sysml/hops/ConvolutionOp.java | 133 +----------- src/main/java/org/apache/sysml/hops/Hop.java | 5 - .../java/org/apache/sysml/hops/ReorgOp.java | 7 - .../apache/sysml/lops/ConvolutionTransform.java | 21 +- .../sysml/parser/BuiltinFunctionExpression.java | 4 +- .../org/apache/sysml/parser/DMLTranslator.java | 41 +--- .../instructions/CPInstructionParser.java | 4 - .../cp/ConvolutionCPInstruction.java | 78 +------ .../sysml/runtime/matrix/data/LibMatrixDNN.java | 202 ++----------------- .../runtime/matrix/data/LibMatrixMult.java | 31 ++- .../sysml/runtime/matrix/data/MatrixBlock.java | 2 +- .../sysml/runtime/util/ConvolutionUtils.java | 201 ------------------ .../functions/tensor/Conv2DBackwardTest.java | 51 +---- .../functions/tensor/Conv2DTest.java | 53 +---- 14 files changed, 74 insertions(+), 759 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/hops/ConvolutionOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java index fe277d1..8c38a48 100644 --- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java +++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java @@ -33,15 +33,12 @@ import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.ConvolutionParameters; -import org.apache.sysml.runtime.util.ConvolutionUtils; public class ConvolutionOp extends Hop implements MultiThreadedHop { private Hop.ConvOp op; private int _maxNumThreads = -1; //-1 for unlimited - - public static boolean FORCE_NON_IM2COL = false; private ConvolutionOp() { //default constructor for clone @@ -94,41 +91,14 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop ExecType et = optFindExecType(); - Lop ret = ConvolutionUtils.constructConvolutionLops(this, et); - if(ret != null) { - setLops(ret); - return ret; - } - ret = ConvolutionUtils.constructConvolutionBackwardDataLops(this, et); - if(ret != null) { - setLops(ret); - return ret; - } - ArrayList<Hop> inputs = getInput(); switch( op ) { - case IM2COL: - case RESHAPE_COL: - case ROTATE180: - case COL2IM: - { - et = ExecType.CP; // TODO: Since max_backwards and other Convolution Ops only implemented for CP - - if( et == ExecType.CP ) - { - setLops(constructConvolutionLops(et, inputs)); - break; - } - else { - // TODO: Add support for SPARK/MR backends once we are happy with the performance of - // single node Lenet script. - throw new HopsException("Unimplemented ConvolutionOp for execution type: " + et.name()); - } - // break; - } case MAX_POOLING: case MAX_POOLING_BACKWARD: + case DIRECT_CONV2D: + case DIRECT_CONV2D_BACKWARD_DATA: + case DIRECT_CONV2D_BACKWARD_FILTER: { //TODO: Fix me. Currently forcing the instruction to GPU if gpu flag is set if(DMLScript.USE_ACCELERATOR) { @@ -147,22 +117,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop } // break; } - case DIRECT_CONV2D: - case DIRECT_CONV2D_BACKWARD_DATA: - case DIRECT_CONV2D_BACKWARD_FILTER: - { - if( et == ExecType.GPU ) - { - setLops(constructConvolutionLops(et, inputs)); - break; - } - else { - // TODO: Add support for SPARK/MR backends once we are happy with the performance of - // single node Lenet script. - throw new HopsException("Unimplemented ConvolutionOp for execution type: " + et.name()); - } - // break; - } default: throw new HopsException("Unsupported lops construction for operation type '"+op+"'."); } @@ -261,24 +215,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop protected double computeOutputMemEstimate( long dim1, long dim2, long nnz ) { double sparsity = 1.0; - switch(op) - { - case RESHAPE_COL: - case ROTATE180: - { - sparsity = OptimizerUtils.getSparsity(dim1, dim2, nnz); - break; - } - case IM2COL: - case COL2IM: - case MAX_POOLING: - case MAX_POOLING_BACKWARD: - case DIRECT_CONV2D: - case DIRECT_CONV2D_BACKWARD_FILTER: - case DIRECT_CONV2D_BACKWARD_DATA: - sparsity = 1.0; // worst-case estimate - break; - } return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, sparsity); } @@ -306,38 +242,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop switch(op) { - case RESHAPE_COL: - { - ret = new long[3]; - ret[0] = params.N; - ret[1] = getExtractedVal(params.K, params.P, params.Q); - ret[2] = mc.getNonZeros(); // exact estimates - break; - } - case ROTATE180: - { - ret = new long[3]; - ret[0] = getExtractedVal(params.N, params.P, params.Q); - ret[1] = params.K; - ret[2] = mc.getNonZeros(); // exact estimates - break; - } - case IM2COL: - { - ret = new long[3]; - ret[0] = getExtractedVal(params.C, params.R, params.S); - ret[1] = getExtractedVal(params.N, params.P, params.Q); - ret[2] = -1; - break; - } - case COL2IM: - { - ret = new long[3]; - ret[0] = params.N; - ret[1] = getExtractedVal(params.C, params.H, params.W); - ret[2] = -1; - break; - } case MAX_POOLING: { ret = new long[3]; @@ -496,8 +400,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop @Override public void refreshSizeInformation() { - Hop input1 = getInput().get(0); - ConvolutionParameters params; try { params = parseInput(); @@ -507,35 +409,6 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop switch(op) { - case IM2COL: - { - _dim1 = getExtractedVal(params.C, params.R, params.S); - _dim2 = getExtractedVal(params.N, params.P, params.Q); - _nnz = -1; - break; - } - case COL2IM: - { - // Set _dim1, _dim2 and if possible _nnz (use input1.getNnz()) - _dim1 = params.N; - _dim2 = getExtractedVal(params.C, params.H, params.W); - _nnz = -1; // cannot infer stats - break; - } - case RESHAPE_COL: - { - _dim1 = params.N; - _dim2 = getExtractedVal(params.K, params.P, params.Q); - _nnz = input1.getNnz(); // exact estimates - break; - } - case ROTATE180: - { - _dim1 = getExtractedVal(params.N, params.P, params.Q); - _dim2 = params.K; - _nnz = input1.getNnz(); // exact estimates - break; - } case MAX_POOLING: { _dim1 = params.N; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/hops/Hop.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/Hop.java b/src/main/java/org/apache/sysml/hops/Hop.java index 7d69940..6afe60e 100644 --- a/src/main/java/org/apache/sysml/hops/Hop.java +++ b/src/main/java/org/apache/sysml/hops/Hop.java @@ -1147,7 +1147,6 @@ public abstract class Hop }; public enum ConvOp { - IM2COL, RESHAPE_COL, ROTATE180, COL2IM, MAX_POOLING, MAX_POOLING_BACKWARD, DIRECT_CONV2D, DIRECT_CONV2D_BACKWARD_FILTER, DIRECT_CONV2D_BACKWARD_DATA }; @@ -1220,10 +1219,6 @@ public abstract class Hop protected static final HashMap<ConvOp, org.apache.sysml.lops.ConvolutionTransform.OperationTypes> HopsConv2Lops; static { HopsConv2Lops = new HashMap<ConvOp, org.apache.sysml.lops.ConvolutionTransform.OperationTypes>(); - HopsConv2Lops.put(ConvOp.IM2COL, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.IM2COL); - HopsConv2Lops.put(ConvOp.RESHAPE_COL, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.RESHAPE_COL); - HopsConv2Lops.put(ConvOp.ROTATE180, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.ROTATE180); - HopsConv2Lops.put(ConvOp.COL2IM, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.COL2IM); HopsConv2Lops.put(ConvOp.MAX_POOLING, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.MAX_POOLING); HopsConv2Lops.put(ConvOp.MAX_POOLING_BACKWARD, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.MAX_POOLING_BACKWARD); HopsConv2Lops.put(ConvOp.DIRECT_CONV2D, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.DIRECT_CONV2D); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/hops/ReorgOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/ReorgOp.java b/src/main/java/org/apache/sysml/hops/ReorgOp.java index 5f5138b..7c87a76 100644 --- a/src/main/java/org/apache/sysml/hops/ReorgOp.java +++ b/src/main/java/org/apache/sysml/hops/ReorgOp.java @@ -35,7 +35,6 @@ import org.apache.sysml.lops.Transform.OperationTypes; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; -import org.apache.sysml.runtime.util.ConvolutionUtils; /** * Reorg (cell) operation: aij @@ -120,12 +119,6 @@ public class ReorgOp extends Hop implements MultiThreadedHop if( getLops() != null ) return getLops(); - Lop ret = ConvolutionUtils.constructConvolutionBackwardFilterLops(this); - if(ret != null) { - setLops( ret ); - return ret; - } - ExecType et = optFindExecType(); switch( op ) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java index fdf280d..9164d36 100644 --- a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java +++ b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java @@ -30,12 +30,7 @@ public class ConvolutionTransform extends Lop public enum OperationTypes { - IM2COL, - RESHAPE_COL, - ROTATE180, - COL2IM, - MAX_POOLING, - MAX_POOLING_BACKWARD, + MAX_POOLING, MAX_POOLING_BACKWARD, DIRECT_CONV2D, DIRECT_CONV2D_BACKWARD_FILTER, DIRECT_CONV2D_BACKWARD_DATA }; @@ -101,19 +96,7 @@ public class ConvolutionTransform extends Lop private String getOpcode() { switch(operation) { - - case IM2COL: - return "im2col"; - - case RESHAPE_COL: - return "reshape_col"; - - case ROTATE180: - return "rotate180"; - - case COL2IM: - return "col2im"; - + case MAX_POOLING: return "maxpooling"; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java index 3bb7b0a..bf31347 100644 --- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java +++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java @@ -1109,8 +1109,8 @@ public class BuiltinFunctionExpression extends DataIdentifier case MAX_POOL_BACKWARD: { // At DML level: - // output = conv2d(input, filter, input_shape=[3, 2, 2], filter_shape=[3, 2, 2], - // strides=[1, 1], border_mode="valid") + // output = conv2d(input, filter, input_shape=[1, 3, 2, 2], filter_shape=[1, 3, 2, 2], + // strides=[1, 1], padding=[1,1]) // // Converted to following in constructor (only supported NCHW): // output = conv2d(input, filter, stride1, stride2, padding1,padding2, http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/parser/DMLTranslator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java index b5bb7c3..f3cb0b1 100644 --- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java +++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java @@ -2803,18 +2803,9 @@ public class DMLTranslator case CONV2D: { - Hop filter = expr2; - // Step 1: IM2COL Hop image = expr; - ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 2, hops); - Hop loweredMat = new ConvolutionOp(image.getName(), image.getDataType(), image.getValueType(), Hop.ConvOp.IM2COL, inHops1); - - // Step 2: Matrix multiplication - Hop temp = new AggBinaryOp("temp" + target.getName(), target.getDataType(), target.getValueType(), OpOp2.MULT, AggOp.SUM, filter, loweredMat); - - // Step 3: Reshape col - ArrayList<Hop> inHops2 = getALHopsForConvOp(temp, source, 2, hops); - currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.RESHAPE_COL, inHops2); + ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 1, hops); + currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.DIRECT_CONV2D, inHops1); setBlockSizeAndRefreshSizeInfo(image, currBuiltinOp); break; } @@ -2841,33 +2832,17 @@ public class DMLTranslator case CONV2D_BACKWARD_FILTER: { Hop image = expr; - Hop dout = expr2; - - ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 2, hops); - Hop x_col = new ConvolutionOp(image.getName(), image.getDataType(), image.getValueType(), Hop.ConvOp.IM2COL, inHops1); - - ArrayList<Hop> inHops2 = getALHopsForConvOp(dout, source, 2, hops); - Hop dout_reshaped = new ConvolutionOp(dout.getName(), dout.getDataType(), dout.getValueType(), Hop.ConvOp.ROTATE180, inHops2); - - Hop dfilter1 = new AggBinaryOp(target.getName(), target.getDataType(), target.getValueType(), OpOp2.MULT, AggOp.SUM, x_col, dout_reshaped); - currBuiltinOp = new ReorgOp("tempTranspose" + image.getName(), image.getDataType(), image.getValueType(), Hop.ReOrgOp.TRANSPOSE, dfilter1); + ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 1, hops); + currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.DIRECT_CONV2D_BACKWARD_FILTER, inHops1); setBlockSizeAndRefreshSizeInfo(image, currBuiltinOp); break; } case CONV2D_BACKWARD_DATA: { - Hop filter = expr; - Hop dout = expr2; - - ArrayList<Hop> inHops1 = getALHopsForConvOp(dout, source, 2, hops); - Hop dout_reshaped = new ConvolutionOp(dout.getName(), dout.getDataType(), dout.getValueType(), Hop.ConvOp.ROTATE180, inHops1); - - Hop temp1 = new AggBinaryOp("temp" + target.getName(), target.getDataType(), target.getValueType(), OpOp2.MULT, AggOp.SUM, dout_reshaped, filter); - // Hop temp2 = new ReorgOp("tempTranspose" + target.getName(), target.getDataType(), target.getValueType(), Hop.ReOrgOp.TRANSPOSE, temp1); - - ArrayList<Hop> inHops2 = getALHopsForConvOp(temp1, source, 2, hops); - currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.COL2IM, inHops2); - setBlockSizeAndRefreshSizeInfo(filter, currBuiltinOp); + Hop image = expr; + ArrayList<Hop> inHops1 = getALHopsForConvOp(image, source, 1, hops); + currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), Hop.ConvOp.DIRECT_CONV2D_BACKWARD_DATA, inHops1); + setBlockSizeAndRefreshSizeInfo(image, currBuiltinOp); break; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java index ae13d3d..909525f 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java @@ -218,10 +218,6 @@ public class CPInstructionParser extends InstructionParser String2CPInstructionType.put( "rsort" , CPINSTRUCTION_TYPE.Reorg); // Opcodes related to convolutions - String2CPInstructionType.put( "im2col" , CPINSTRUCTION_TYPE.Convolution); - String2CPInstructionType.put( "reshape_col" , CPINSTRUCTION_TYPE.Convolution); - String2CPInstructionType.put( "rotate180" , CPINSTRUCTION_TYPE.Convolution); - String2CPInstructionType.put( "col2im" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "maxpooling" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "maxpooling_backward" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "conv2d" , CPINSTRUCTION_TYPE.Convolution); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java index 4b04eca..5e83ffa 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java @@ -80,13 +80,7 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { String[] parts = InstructionUtils.getInstructionPartsWithValueType(str); String opcode = parts[0]; - if (opcode.equalsIgnoreCase("reshape_col") - || opcode.equalsIgnoreCase("rotate180") - || opcode.equalsIgnoreCase("im2col") - || opcode.equalsIgnoreCase("col2im") - || opcode.equalsIgnoreCase("pooling_pre_reshape") - || opcode.equalsIgnoreCase("pooling_post_reshape") - || opcode.equalsIgnoreCase("maxpooling")) { + if (opcode.equalsIgnoreCase("maxpooling")) { InstructionUtils.checkNumFields(parts, 15); // stride1, stride2, padding1, padding2 // input_shape1, input_shape2, input_shape3, input_shape4, @@ -115,8 +109,7 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { return new ConvolutionCPInstruction(in, out, opcode, str, stride, padding, input_shape, filter_shape, k); } - else if (opcode.equalsIgnoreCase("pooling_backward_reshape") - || opcode.equalsIgnoreCase("maxpooling_backward") + else if (opcode.equalsIgnoreCase("maxpooling_backward") || opcode.equalsIgnoreCase("conv2d") || opcode.equalsIgnoreCase("conv2d_backward_filter") || opcode.equalsIgnoreCase("conv2d_backward_data")) { @@ -186,38 +179,7 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { int Q = (int) ConvolutionUtils.getQ(W, S, stride_w, pad_w); ConvolutionParameters params = new ConvolutionParameters(N, C, H, W, K, R, S, stride_h, stride_w, pad_h, pad_w, _numThreads); - - if (instOpcode.equalsIgnoreCase("im2col")) { - checkHeightWidth(ec, params); - checkInputDimensionForIm2col(matBlock, params); - outputBlock = getDenseOutputBlock(ec, C * R * S, N * P * Q, true); - params.setReuseNonZeroedOutput(_reuseNonZeroedOutput); - LibMatrixDNN.im2col(matBlock, outputBlock, params); - } - else if (instOpcode.equalsIgnoreCase("reshape_col")) { - checkHeightWidth(ec, params); - // Is eligible for REUSE_NONZEROED_OUTPUT but cannot guarantee that previous output has been rmvar-ed - // without somewhat expensive HashMap checks - outputBlock = getDenseOutputBlock(ec, N, K * P * Q, true); - params.setReuseNonZeroedOutput(_reuseNonZeroedOutput); - LibMatrixDNN.reshape_col(matBlock, outputBlock, params); - } - else if (instOpcode.equalsIgnoreCase("rotate180")) { - checkHeightWidth(ec, params); - // Is eligible for REUSE_NONZEROED_OUTPUT and always an intermediate instruction - outputBlock = getDenseOutputBlock(ec, N * P * Q, K, true); - params.setReuseNonZeroedOutput(_reuseNonZeroedOutput); - LibMatrixDNN.rotate180(matBlock, outputBlock, params); - } - else if (instOpcode.equalsIgnoreCase("col2im")) { - checkHeightWidth(ec, params); - checkInputDimensionForCol2im(matBlock, params); - // needs to be zeroed-out - outputBlock = getDenseOutputBlock(ec, N, C * H * W, false); - params.setReuseNonZeroedOutput(_reuseNonZeroedOutput); - LibMatrixDNN.col2im(matBlock, outputBlock, params); - } - else if (instOpcode.equalsIgnoreCase("maxpooling")) { + if (instOpcode.equalsIgnoreCase("maxpooling")) { // Is eligible for REUSE_NONZEROED_OUTPUT but cannot guarantee that previous output has been rmvar-ed // without somewhat expensive HashMap checks outputBlock = getDenseOutputBlock(ec, N, C*P*Q, true); @@ -284,38 +246,4 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { Statistics.incrementAllocationTime(System.nanoTime()-start, false); return outputBlock; } - - private void checkHeightWidth(ExecutionContext ec, ConvolutionParameters params) throws DMLRuntimeException { - int numChannelsInFilter = getScalarInput(ec, _filter_shape, 1); - - if (numChannelsInFilter != params.C) { - throw new DMLRuntimeException("The number of channels of input and filter should match"); - } - if((params.W + 2 * params.pad_w - params.S) % params.stride_w != 0) { - throw new DMLRuntimeException("The width does not work (Hint: (W + 2 * pad_w - S) % stride_w should be 0 [ ==> (" + params.W + "+" + " 2*" + params.pad_w + "-" + params.S + ") % " + params.stride_w + "!= 0] "); - } - if((params.H + 2 * params.pad_h - params.R) % params.stride_h != 0) { - throw new DMLRuntimeException("The height does not work (Hint: (H + 2 * pad_h - R) % stride_h should be 0 [ ==> (" + params.H + "+" + " 2*" + params.pad_h + "-" + params.R + ") % " + params.stride_h + "!= 0] "); - } - if(params.H <= 0) { - throw new DMLRuntimeException("Height of output patch should be zero"); - } - if(params.Q <= 0) { - throw new DMLRuntimeException("Width of output patch should be zero"); - } - } - - - - private void checkInputDimensionForIm2col(MatrixBlock matBlock, ConvolutionParameters params) throws DMLRuntimeException { - if((params.N != matBlock.getNumRows() || params.C*params.H*params.W != matBlock.getNumColumns())) { - throw new DMLRuntimeException("Incorrect input shape in im2col"); - } - } - - private void checkInputDimensionForCol2im(MatrixBlock matBlock, ConvolutionParameters params) throws DMLRuntimeException { - if((params.N*params.P*params.Q != matBlock.getNumRows() || params.C*params.R*params.S != matBlock.getNumColumns())) { - throw new DMLRuntimeException("Incorrect input shape in col2im"); - } - } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java index 59a6a47..c2b3f7d 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java @@ -74,7 +74,7 @@ public class LibMatrixDNN { } enum TaskType { - ReshapeCol, Rotate180, Im2Col, Col2Im, MaxPooling_Forward, MaxPooling_Backward, + MaxPooling_Forward, MaxPooling_Backward, LoopedIm2ColConv2d, LoopedIm2ColConv2dBwdFilter, LoopedIm2ColConv2dBwdData } @@ -250,6 +250,11 @@ public class LibMatrixDNN { throw new DMLRuntimeException("Only positive strides supported"); } + // Convert filter (which is relatively small matrix) to dense + if(params.input1.isInSparseFormat()) { + params.input1.sparseToDense(); + } + if(DMLScript.STATISTICS) { if(filter.isInSparseFormat() || dout.isInSparseFormat()) { conv2dBwdDataSparseCount.addAndGet(1); @@ -375,7 +380,7 @@ public class LibMatrixDNN { MatrixBlock temp = new MatrixBlock(params.P*params.Q, params.C*params.R*params.S, false); long t1 = DMLScript.STATISTICS ? System.nanoTime() : 0; - LibMatrixMult.matrixMult(dout_reshaped, filter, temp); + LibMatrixMult.matrixMult(dout_reshaped, filter, temp, false); long t2 = DMLScript.STATISTICS ? System.nanoTime() : 0 ; doCol2imOverSingleImage(n, temp, params); long t3 = DMLScript.STATISTICS ? System.nanoTime() : 0 ; @@ -400,7 +405,7 @@ public class LibMatrixDNN { MatrixBlock temp = new MatrixBlock(params.C*params.R*params.S, params.K, false); long t3 = DMLScript.STATISTICS ? System.nanoTime() : 0 ; - LibMatrixMult.matrixMult(im2ColOutBlock, dout_reshaped, temp); + LibMatrixMult.matrixMult(im2ColOutBlock, dout_reshaped, temp, false); long t4 = DMLScript.STATISTICS ? System.nanoTime() : 0 ; if(DMLScript.STATISTICS) { loopedConvBwdFilterMatMultTime.addAndGet(t4-t3); @@ -427,6 +432,11 @@ public class LibMatrixDNN { throw new DMLRuntimeException("Incorrect input to conv2d"); } + // Convert filter (which is relatively small matrix) to dense + if(params.input2.isInSparseFormat()) { + params.input2.sparseToDense(); + } + if(DMLScript.STATISTICS) { if(input.isInSparseFormat() || filter.isInSparseFormat()) { conv2dSparseCount.addAndGet(1); @@ -461,7 +471,7 @@ public class LibMatrixDNN { im2ColOutBlock.setNonZeros(nnz); MatrixBlock matMultOutBlock = new MatrixBlock(params.K, params.P*params.Q, false); - LibMatrixMult.matrixMult(params.input2, im2ColOutBlock, matMultOutBlock); + LibMatrixMult.matrixMult(params.input2, im2ColOutBlock, matMultOutBlock, false); long t3 = DMLScript.STATISTICS ? System.nanoTime() : 0; if(DMLScript.STATISTICS) { @@ -751,37 +761,6 @@ public class LibMatrixDNN { } params.outputNNZ.addAndGet(tmpNNZ); } - - // Reshape a 4D tensor of dimension (N, K, P, Q) to matrix of dimension (NPQ, K) - public static void rotate180(MatrixBlock input, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException { - params.input1 = input; - params.output = outputBlock; - - if(input.getNumColumns() != params.K*params.P*params.Q || input.getNumRows() != params.N) { - throw new DMLRuntimeException("Incorrect input dimensions in rotate180:" + input.getNumRows() + " " + input.getNumColumns() + " " + params.N + " " + params.K*params.P*params.Q); - } - - int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads); - if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) { - warnSingleThreaded(); - for (int n = 0; n < params.N; n++) { - doRotate180(n, params); - } - } - else { - runConvTask(constrainedNumThreads, 1, TaskType.Rotate180, params); - } - outputBlock.setNonZeros(input.getNonZeros()); // As number of non-zeros doesnot change for rotate180 - } - - private static void doRotate180(int n, ConvolutionParameters params) throws DMLRuntimeException { - double [] outputArray = null; - if (!params.output.isInSparseFormat()) - outputArray = params.output.getDenseBlock(); - else - throw new DMLRuntimeException("Sparse output is not supported for rotate180"); - doRotate180(n, n, params.input1, outputArray, params, false); - } private static void doRotate180(int inputN, int outputN, MatrixBlock input, double [] outputArray, ConvolutionParameters params, boolean zeroOutSparseOutput) throws DMLRuntimeException { @@ -818,29 +797,6 @@ public class LibMatrixDNN { } } - - // Reshape a matrix of dimension (K, NPQ) to 4D tensor of dimension (N, K, P, params.Q) - public static void reshape_col(MatrixBlock input, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException { - params.input1 = input; - params.output = outputBlock; - - if(input.getNumColumns() != params.N*params.P*params.Q || input.getNumRows() != params.K) { - throw new DMLRuntimeException("Incorrect input dimensions in reshape_col:" + input.getNumRows() + " " + input.getNumColumns()); - } - - int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads); - if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) { - warnSingleThreaded(); - for (int n = 0; n < params.N; n++) { - doReshapeCol(n, params); - } - } - else { - runConvTask(constrainedNumThreads, 1, TaskType.ReshapeCol, params); - } - outputBlock.setNonZeros(input.getNonZeros()); // As number of non-zeros doesnot change for reshape_col - } - private static int [] getTaskSize(int constrainedNumThreads, int maxNumTaskSize1, int maxNumTaskSize2) { int taskSize1 = 1; int taskSize2 = 1; // Why this heuristics ? To reduce the impact of the thread-creation overhead in case of small tasks @@ -939,30 +895,6 @@ public class LibMatrixDNN { @Override public Object call() throws DMLRuntimeException { switch(type) { - case ReshapeCol: - for (int n = n1; n < n2; n++) { - doReshapeCol(n, params); - } - break; - case Rotate180: - for (int n = n1; n < n2; n++) { - doRotate180(n, params); - } - break; - case Im2Col: - long nnz = 0; - for (int n = n1; n < n2; n++) { - for (int z = z1; z < z2; z++) { - nnz += doIm2colOverInputPath_NCHW(n, z, params); - } - } - params.outputNNZ.addAndGet(nnz); - break; - case Col2Im: - for (int n = n1; n < n2; n++) { - doCol2imOverMultipleImages(n, params); - } - break; case MaxPooling_Forward: for (int n = n1; n < n2; n++) { for (int z = z1; z < z2; z++) { @@ -1011,84 +943,6 @@ public class LibMatrixDNN { } } - private static void doReshapeCol(int n, ConvolutionParameters params) { - double [] inputArray = null; - if (!params.input1.isInSparseFormat()) - inputArray = params.input1.getDenseBlock(); - double [] outputArray = null; - if (!params.output.isInSparseFormat()) - outputArray = params.output.getDenseBlock(); - - if(inputArray != null) { - for (int k = 0; k < params.K; k++) { - System.arraycopy(inputArray, k*params.N*params.P*params.Q + n*params.P*params.Q, outputArray, n*params.K*params.P*params.Q + k*params.P*params.Q, params.P*params.Q); - } - } - else { - for (int k = 0; k < params.K; k++) { - for (int p = 0; p < params.P; p++) { - for (int q = 0; q < params.Q; q++) { - outputArray[n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q + q] = params.input1.quickGetValue(k, n*params.P*params.Q + p*params.Q + q); - } - } - } - } - } - - // Converts a 4D tensor (N, C, R, S) to a matrix of dimension (CRS, NPQ) - public static void im2col(MatrixBlock input, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException { - params.input1 = input; - params.output = outputBlock; - - params.outputNNZ.set(0); - - if(DMLScript.STATISTICS) { - if(input.isInSparseFormat()) { - im2colSparseCount.addAndGet(1); - } - else { - im2colDenseCount.addAndGet(1); - } - } - - int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads); - if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) { - warnSingleThreaded(); - long nnz = 0; - for (int n = 0; n < params.N; n++) { // Do following for all images - for (int c = 0; c < params.C; c++) { // Since format is NCHW - nnz += doIm2colOverInputPath_NCHW(n, c, params); - } - } - outputBlock.setNonZeros(nnz); - } - else { - runConvTask(constrainedNumThreads, params.C, TaskType.Im2Col, params); - outputBlock.setNonZeros(params.outputNNZ.get()); - } - - } - - // Converts a matrix of dimension (CRS, NPQ) to a 4D tensor (N, C, H, W) - public static void col2im(MatrixBlock input, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException { - params.input1 = input; - params.output = outputBlock; - - int constrainedNumThreads = OptimizerUtils.getConstrainedNumThreads(params.numThreads); - if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) { - warnSingleThreaded(); - // Sequential col2im - for (int n = 0; n < params.N; n++) { // Do following for all images - doCol2imOverMultipleImages(n, params); - } - } - else { - // Parallel col2im - runConvTask(constrainedNumThreads, 1, TaskType.Col2Im, params); - } - } - - // Converts input: PQ X CRS matrix and writes to 1 X CHW private static void doCol2imOverSingleImage(int outputN, MatrixBlock input, ConvolutionParameters params) throws DMLRuntimeException { if(input.rlen != params.P*params.Q || input.clen != params.C*params.R*params.S) { @@ -1169,34 +1023,6 @@ public class LibMatrixDNN { } } - // NPQ X CRS - private static void doCol2imOverMultipleImages(int n, ConvolutionParameters params) throws DMLRuntimeException { - MatrixBlock input = params.input1; - - if(input.rlen != params.N*params.P*params.Q || input.clen != params.C*params.R*params.S) { - throw new DMLRuntimeException("Incorrect input dimensions"); - } - - double [] outputArray = null; - if (!params.output.isInSparseFormat()) - outputArray = params.output.getDenseBlock(); - else { - throw new DMLRuntimeException("Only dense output is implemented"); - } - - if(!input.isInSparseFormat()) { - double [] inputArray = input.getDenseBlock(); - doCol2IMDenseInput(n, n, inputArray, outputArray, params); - } - else { - doCol2IMSparseInput(n, n, input.getSparseBlockIterator(n*params.P*params.Q, (n+1)*params.P*params.Q), outputArray, params); - } - } - - private static long doIm2colOverInputPath_NCHW(int n, int c, ConvolutionParameters params) throws DMLRuntimeException { - return doIm2colOverInputPath_NCHW(n, c, null, params); - } - private static long doIm2colOverInputPath_NCHW(int n, int c, MatrixBlock output, ConvolutionParameters params) throws DMLRuntimeException { double [] inputArray = null; if (!params.input1.isInSparseFormat()) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java index 9d878be..6902d40 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java @@ -100,6 +100,31 @@ public class LibMatrixMult } /** + * This method allows one to disabling exam sparsity. This feature is useful if matrixMult is used as an intermediate + * operation (for example: LibMatrixDNN). It makes sense for LibMatrixDNN because the output is internally + * consumed by another dense instruction, which makes repeated conversion to sparse wasteful. + * This should be used in rare cases and if you are unsure, + * use the method 'matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret)' instead. + * + * @param m1 + * @param m2 + * @param ret + * @param examSparsity + * @throws DMLRuntimeException + */ + public static void matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean examSparsity) + throws DMLRuntimeException + { + matrixMult(m1, m2, ret, 0, m1.rlen, examSparsity); + } + + public static void matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru) + throws DMLRuntimeException + { + matrixMult(m1, m2, ret, rl, ru, true); + } + + /** * * @param m1 * @param m2 @@ -108,7 +133,7 @@ public class LibMatrixMult * @param ru * @throws DMLRuntimeException */ - public static void matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru) + public static void matrixMult(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int rl, int ru, boolean examSparsity) throws DMLRuntimeException { //check inputs / outputs @@ -146,7 +171,9 @@ public class LibMatrixMult //post-processing: nnz/representation if( !ret.sparse ) ret.recomputeNonZeros(); - ret.examSparsity(); + + if(examSparsity) + ret.examSparsity(); //System.out.println("MM ("+m1.isInSparseFormat()+","+m1.getNumRows()+","+m1.getNumColumns()+","+m1.getNonZeros()+")x" + // "("+m2.isInSparseFormat()+","+m2.getNumRows()+","+m2.getNumColumns()+","+m2.getNonZeros()+") in "+time.stop()); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 8f84bd7..1316ad8 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -1224,7 +1224,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab * * @throws DMLRuntimeException */ - private void sparseToDense() + void sparseToDense() throws DMLRuntimeException { //set target representation http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java index ac19816..80b20cd 100644 --- a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java +++ b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java @@ -19,20 +19,6 @@ package org.apache.sysml.runtime.util; -import java.util.ArrayList; - -import org.apache.sysml.api.DMLScript; -import org.apache.sysml.hops.AggBinaryOp; -import org.apache.sysml.hops.ConvolutionOp; -import org.apache.sysml.hops.Hop; -import org.apache.sysml.hops.HopsException; -import org.apache.sysml.hops.ReorgOp; -import org.apache.sysml.hops.Hop.ConvOp; -import org.apache.sysml.hops.Hop.ReOrgOp; -import org.apache.sysml.lops.Lop; -import org.apache.sysml.lops.LopsException; -import org.apache.sysml.lops.LopProperties.ExecType; - public class ConvolutionUtils { @@ -54,191 +40,4 @@ public class ConvolutionUtils { return ret; } - private static boolean isMatMult(Hop hop) { - if(hop != null && hop instanceof AggBinaryOp) { - return true; - } - return false; - } - private static boolean isTranspose(Hop hop) { - if(hop != null && hop instanceof ReorgOp && ((ReorgOp)hop).getOp() == ReOrgOp.TRANSPOSE) { - return true; - } - return false; - } - private static boolean isConvolutionOp(Hop hop, Hop.ConvOp op) { - if(hop != null && hop instanceof ConvolutionOp && ((ConvolutionOp) hop).getOp() == op) { - return true; - } - return false; - } - - // Simple heuristic that prefers im2col for non-test/non-validation cases. - private static boolean preferIm2Col(ExecType et, long N, long K, long C, long R, long S, long P, long Q) throws HopsException { - if(et == ExecType.CP && ConvolutionOp.FORCE_NON_IM2COL) { - return false; - } -// else if(et == ExecType.CP && N < 256 ) { -// return true; // Prefer im2col to non-test/non-validation -// } - return false; - } - - public static Lop constructConvolutionBackwardFilterLops(Hop currentHop) throws HopsException, LopsException { - ExecType et = ExecType.CP; // TODO: Check memory estimates - if(DMLScript.USE_ACCELERATOR) - et = ExecType.GPU; // TODO: Add memory estimate checks - else if(et == ExecType.MR || et == ExecType.SPARK) - return null; - - if(currentHop != null && isTranspose(currentHop)) { - Hop matMult = currentHop.getInput().get(0); - if(matMult != null && isMatMult(matMult)) { - Hop x_col = matMult.getInput().get(0); - Hop right = matMult.getInput().get(1); - if(isConvolutionOp(x_col, ConvOp.IM2COL) && isConvolutionOp(right, ConvOp.ROTATE180)) { - Hop image = x_col.getInput().get(0); - Hop dout = right.getInput().get(0); - ArrayList<Hop> inputs = new ArrayList<Hop>(); - inputs.add(image); - inputs.add(dout); - for(int i = 1; i < x_col.getInput().size(); i++) { - inputs.add(x_col.getInput().get(i)); - } - - // K, C * R * S - long N = currentHop.computeSizeInformation(inputs.get(6)); - long C = currentHop.computeSizeInformation(inputs.get(7)); - long H = currentHop.computeSizeInformation(inputs.get(8)); - long W = currentHop.computeSizeInformation(inputs.get(9)); - long K = currentHop.computeSizeInformation(inputs.get(10)); - long R = currentHop.computeSizeInformation(inputs.get(12)); - long S = currentHop.computeSizeInformation(inputs.get(13)); - long stride_h = currentHop.computeSizeInformation(inputs.get(2)); - long stride_w = currentHop.computeSizeInformation(inputs.get(3)); - long pad_h = currentHop.computeSizeInformation(inputs.get(4)); - long pad_w = currentHop.computeSizeInformation(inputs.get(5)); - long P = -1; long Q = -1; - if(H > 0 && R > 0 && stride_h > 0 && pad_h > 0) - P = ConvolutionUtils.getP(H, R, stride_h, pad_h); - if(W > 0 && S > 0 && stride_w > 0 && pad_w > 0) - Q = ConvolutionUtils.getQ(W, S, stride_w, pad_w); - - if(preferIm2Col(et, N, K, C, R, S, P, Q)) { - return null; - } - - long rlen = K; - long clen = ConvolutionOp.getExtractedVal(C, R, S); - return ConvolutionOp.constructFusedConvolutionLops(et, inputs, ConvOp.DIRECT_CONV2D_BACKWARD_FILTER, (ConvolutionOp) x_col, rlen, clen); - } - } - } - return null; - } - - public static Lop constructConvolutionLops(Hop currentHop, ExecType et) throws HopsException, LopsException { - if(DMLScript.USE_ACCELERATOR) - et = ExecType.GPU; // TODO: Add memory estimate checks - else if(et == ExecType.MR || et == ExecType.SPARK) - return null; - - if(currentHop != null && isConvolutionOp(currentHop, ConvOp.RESHAPE_COL)) { - Hop matMult = currentHop.getInput().get(0); - if(matMult != null && isMatMult(matMult)) { - Hop filter = matMult.getInput().get(0); - Hop x_col = matMult.getInput().get(1); - if(isConvolutionOp(x_col, ConvOp.IM2COL)) { - Hop image = x_col.getInput().get(0); - ArrayList<Hop> inputs = new ArrayList<Hop>(); - inputs.add(image); - inputs.add(filter); - for(int i = 1; i < x_col.getInput().size(); i++) { - inputs.add(x_col.getInput().get(i)); - } - - // N, K * P * Q - long N = currentHop.computeSizeInformation(inputs.get(6)); - long C = currentHop.computeSizeInformation(inputs.get(7)); - long H = currentHop.computeSizeInformation(inputs.get(8)); - long W = currentHop.computeSizeInformation(inputs.get(9)); - long K = currentHop.computeSizeInformation(inputs.get(10)); - long R = currentHop.computeSizeInformation(inputs.get(12)); - long S = currentHop.computeSizeInformation(inputs.get(13)); - long stride_h = currentHop.computeSizeInformation(inputs.get(2)); - long stride_w = currentHop.computeSizeInformation(inputs.get(3)); - long pad_h = currentHop.computeSizeInformation(inputs.get(4)); - long pad_w = currentHop.computeSizeInformation(inputs.get(5)); - long P = -1; long Q = -1; - if(H > 0 && R > 0 && stride_h > 0 && pad_h > 0) - P = ConvolutionUtils.getP(H, R, stride_h, pad_h); - if(W > 0 && S > 0 && stride_w > 0 && pad_w > 0) - Q = ConvolutionUtils.getQ(W, S, stride_w, pad_w); - - if(preferIm2Col(et, N, K, C, R, S, P, Q)) { - return null; - } - - long rlen = N; - long clen = ConvolutionOp.getExtractedVal(K, P, Q); - return ConvolutionOp.constructFusedConvolutionLops(et, inputs, ConvOp.DIRECT_CONV2D, (ConvolutionOp) x_col, rlen, clen); - } - } - } - - return null; - } - - public static Lop constructConvolutionBackwardDataLops(Hop currentHop, ExecType et) throws HopsException, LopsException { - if(DMLScript.USE_ACCELERATOR) - et = ExecType.GPU; // TODO: Add memory estimate checks - else if(et == ExecType.MR || et == ExecType.SPARK) - return null; - - if(currentHop != null && isConvolutionOp(currentHop, ConvOp.COL2IM)) { - Hop matMult = currentHop.getInput().get(0); - if(matMult != null && isMatMult(matMult)) { - Hop rotate180 = matMult.getInput().get(0); - Hop filter = matMult.getInput().get(1); - if(isConvolutionOp(rotate180, ConvOp.ROTATE180)) { - ArrayList<Hop> inputs = new ArrayList<Hop>(); - inputs.add(filter); - inputs.add(rotate180.getInput().get(0)); - for(int i = 1; i < rotate180.getInput().size(); i++) { - inputs.add(rotate180.getInput().get(i)); - } - - // N, C * H * W - long N = currentHop.computeSizeInformation(inputs.get(6)); - long C = currentHop.computeSizeInformation(inputs.get(7)); - long H = currentHop.computeSizeInformation(inputs.get(8)); - long W = currentHop.computeSizeInformation(inputs.get(9)); - long K = currentHop.computeSizeInformation(inputs.get(10)); - long R = currentHop.computeSizeInformation(inputs.get(12)); - long S = currentHop.computeSizeInformation(inputs.get(13)); - long stride_h = currentHop.computeSizeInformation(inputs.get(2)); - long stride_w = currentHop.computeSizeInformation(inputs.get(3)); - long pad_h = currentHop.computeSizeInformation(inputs.get(4)); - long pad_w = currentHop.computeSizeInformation(inputs.get(5)); - long P = -1; long Q = -1; - if(H > 0 && R > 0 && stride_h > 0 && pad_h > 0) - P = ConvolutionUtils.getP(H, R, stride_h, pad_h); - if(W > 0 && S > 0 && stride_w > 0 && pad_w > 0) - Q = ConvolutionUtils.getQ(W, S, stride_w, pad_w); - - if(preferIm2Col(et, N, K, C, R, S, P, Q)) { - return null; - } - long rlen = N; - long clen = ConvolutionOp.getExtractedVal(C, H, W); - return ConvolutionOp.constructFusedConvolutionLops(et, inputs, ConvOp.DIRECT_CONV2D_BACKWARD_DATA, (ConvolutionOp) rotate180, rlen, clen); - } - } - - } - - return null; - } - - } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java index c213b55..74d3d14 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DBackwardTest.java @@ -22,7 +22,6 @@ import java.util.HashMap; import org.apache.sysml.api.DMLScript; import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; -import org.apache.sysml.hops.ConvolutionOp; import org.apache.sysml.lops.LopProperties.ExecType; import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex; import org.apache.sysml.runtime.util.ConvolutionUtils; @@ -50,70 +49,35 @@ public class Conv2DBackwardTest extends AutomatedTestBase public void testConv2DBackwardFilterDense1() { int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 1; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } @Test public void testConv2DBackwardFilterDense2() { int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 4; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } @Test public void testConv2DBackwardFilterDense3() { int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } @Test public void testConv2DBackwardFilterDense4() { int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 5; int stride = 1; int pad = 1; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } @Test public void testConv2DBackwardFilterDense5() { int numImg = 3; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 5; int stride = 3; int pad = 2; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); - } - - @Test - public void testConv2DBackwardFilterDense6() - { - int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 1; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); - } - - @Test - public void testConv2DBackwardFilterDense7() - { - int numImg = 3; int imgSize = 3; int numChannels = 3; int numFilters = 4; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); - } - - @Test - public void testConv2DBackwardFilterDense8() - { - int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); - } - - @Test - public void testConv2DBackwardFilterDense9() - { - int numImg = 3; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 5; int stride = 1; int pad = 1; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); - } - - @Test - public void testConv2DBackwardFilterDense10() - { - int numImg = 3; int imgSize = 10; int numChannels = 2; int numFilters = 3; int filterSize = 5; int stride = 3; int pad = 2; - runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); + runConv2DBackwardFilterTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } /** @@ -122,13 +86,11 @@ public class Conv2DBackwardTest extends AutomatedTestBase * @param sparse */ public void runConv2DBackwardFilterTest( ExecType et, int imgSize, int numImg, int numChannels, int numFilters, - int filterSize, int stride, int pad, boolean forceNonIm2Col) + int filterSize, int stride, int pad) { RUNTIME_PLATFORM oldRTP = rtplatform; boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; - boolean oldForceNonIm2col = ConvolutionOp.FORCE_NON_IM2COL; - ConvolutionOp.FORCE_NON_IM2COL = forceNonIm2Col; try { TestConfiguration config = getTestConfiguration(TEST_NAME); @@ -176,7 +138,6 @@ public class Conv2DBackwardTest extends AutomatedTestBase { rtplatform = oldRTP; DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; - ConvolutionOp.FORCE_NON_IM2COL = oldForceNonIm2col; } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/e9aa5841/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java index 8b87372..e247d08 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/tensor/Conv2DTest.java @@ -22,7 +22,6 @@ import java.util.HashMap; import org.apache.sysml.api.DMLScript; import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; -import org.apache.sysml.hops.ConvolutionOp; import org.apache.sysml.lops.LopProperties.ExecType; import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex; import org.apache.sysml.test.integration.AutomatedTestBase; @@ -48,88 +47,49 @@ public class Conv2DTest extends AutomatedTestBase public void testConv2DDense1() { int numImg = 5; int imgSize = 3; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } @Test public void testConv2DDense2() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } @Test public void testConv2DDense3() { int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } @Test public void testConv2DDense4() { int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } @Test public void testConv2DDense5() { int numImg = 3; int imgSize = 8; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, false); + runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad); } - @Test - public void testConv2DDense6() - { - int numImg = 5; int imgSize = 3; int numChannels = 3; int numFilters = 6; int filterSize = 2; int stride = 1; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); - } - - @Test - public void testConv2DDense7() - { - int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 0; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); - } - - @Test - public void testConv2DDense8() - { - int numImg = 1; int imgSize = 10; int numChannels = 4; int numFilters = 3; int filterSize = 4; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); - } - - @Test - public void testConv2DDense9() - { - int numImg = 3; int imgSize = 10; int numChannels = 1; int numFilters = 3; int filterSize = 2; int stride = 2; int pad = 1; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); - } - - @Test - public void testConv2DDense10() - { - int numImg = 3; int imgSize = 8; int numChannels = 2; int numFilters = 3; int filterSize = 3; int stride = 1; int pad = 2; - runConv2DTest(ExecType.CP, imgSize, numImg, numChannels, numFilters, filterSize, stride, pad, true); - } - - /** * * @param et * @param sparse */ public void runConv2DTest( ExecType et, int imgSize, int numImg, int numChannels, int numFilters, - int filterSize, int stride, int pad, boolean FORCE_NON_IM2COL) + int filterSize, int stride, int pad) { RUNTIME_PLATFORM oldRTP = rtplatform; boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; - boolean oldForceNonIm2col = ConvolutionOp.FORCE_NON_IM2COL; - ConvolutionOp.FORCE_NON_IM2COL = FORCE_NON_IM2COL; - try { TestConfiguration config = getTestConfiguration(TEST_NAME); @@ -175,7 +135,6 @@ public class Conv2DTest extends AutomatedTestBase { rtplatform = oldRTP; DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; - ConvolutionOp.FORCE_NON_IM2COL = oldForceNonIm2col; } } }
