Repository: incubator-systemml Updated Branches: refs/heads/master 0ff4f14b6 -> 7af36f80b
[HOTFIX] [SYSTEMML-540] Fixed javadoc errors and added fused CP conv2d + bias_add Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7af36f80 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7af36f80 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7af36f80 Branch: refs/heads/master Commit: 7af36f80b7b2726d3411eb308592dcb3ea00ccc3 Parents: 0ff4f14 Author: Niketan Pansare <[email protected]> Authored: Tue Jan 10 19:04:58 2017 -0800 Committer: Niketan Pansare <[email protected]> Committed: Tue Jan 10 19:04:58 2017 -0800 ---------------------------------------------------------------------- .../org/apache/sysml/hops/ConvolutionOp.java | 27 +++++- .../apache/sysml/lops/ConvolutionTransform.java | 82 ++++++++-------- src/main/java/org/apache/sysml/lops/Lop.java | 8 +- .../java/org/apache/sysml/lops/compile/Dag.java | 20 ++++ .../instructions/CPInstructionParser.java | 1 + .../cp/ConvolutionCPInstruction.java | 74 ++++++++++++++- .../matrix/data/ConvolutionParameters.java | 1 + .../runtime/matrix/data/LibMatrixCUDA.java | 98 ++++++++++---------- .../sysml/runtime/matrix/data/LibMatrixDNN.java | 86 +++++++++++------ 9 files changed, 264 insertions(+), 133 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/hops/ConvolutionOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java index 3f9ca7e..9f67968 100644 --- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java +++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java @@ -137,14 +137,27 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop throw new HopsException("Incorrect number of inputs for " + op.name()); } - Lop in = null; + Lop in = null; Lop in2 = null; OperationTypes lopOp = HopsConv2Lops.get(op); int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); + ArrayList<Hop> inputs1 = inputs; if(op == ConvOp.MAX_POOLING && et == ExecType.CP && inputs.get(0) instanceof UnaryOp && ((UnaryOp) inputs.get(0)).getOp() == OpOp1.SELP) { in = inputs.get(0).getInput().get(0).constructLops(); lopOp = OperationTypes.RELU_MAX_POOLING; } + else if(op == ConvOp.BIAS_ADD && et == ExecType.CP && inputs.get(0) instanceof ConvolutionOp + && ((ConvolutionOp) inputs.get(0)).getOp() == ConvOp.DIRECT_CONV2D) { + lopOp = OperationTypes.DIRECT_CONV2D_BIAS_ADD; + + // the first lop is image + in = inputs.get(0).getInput().get(0).constructLops(); + // the second lop is bias + in2 = inputs.get(1).constructLops(); + + // Use the inputs from conv2d rather than bias_add + inputs1 = inputs.get(0).getInput(); + } else { in = inputs.get(0).constructLops(); } @@ -153,15 +166,19 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop setLineNumbers(transform1); in.addOutput(transform1); + if(in2 != null) { + transform1.addInput(in2); + in2.addOutput(transform1); + } + // stride1, stride2, padding1, padding2 // input_shape1, input_shape2, input_shape3, input_shape4, // filter_shape1, filter_shape2, filter_shape3, filter_shape4 - for( int i=1; i < inputs.size(); i++ ) + for( int i=1; i < inputs1.size(); i++ ) { - Lop ltmp = inputs.get(i).constructLops(); + Lop ltmp = inputs1.get(i).constructLops(); transform1.addInput(ltmp); - //if(i == 1 && expectedNumInputs == 14) - ltmp.addOutput(transform1); + ltmp.addOutput(transform1); } transform1.setLevel(); //force order of added lops return transform1; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java index 6f2a20e..558deb3 100644 --- a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java +++ b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java @@ -32,7 +32,7 @@ public class ConvolutionTransform extends Lop public enum OperationTypes { MAX_POOLING, MAX_POOLING_BACKWARD, RELU_MAX_POOLING, RELU_BACKWARD, DIRECT_CONV2D, DIRECT_CONV2D_BACKWARD_FILTER, DIRECT_CONV2D_BACKWARD_DATA, - BIAS_ADD + BIAS_ADD, DIRECT_CONV2D_BIAS_ADD }; private OperationTypes operation = null; @@ -121,6 +121,9 @@ public class ConvolutionTransform extends Lop case DIRECT_CONV2D: return "conv2d"; + case DIRECT_CONV2D_BIAS_ADD: + return "conv2d_bias_add"; + case BIAS_ADD: return "bias_add"; @@ -163,66 +166,57 @@ public class ConvolutionTransform extends Lop } } - //CP instructions - // stride1, stride2, padding1, padding2 - // input_shape1, input_shape2, input_shape3, input_shape4, - // filter_shape1, filter_shape2, filter_shape3, filter_shape4, + // Used by maxpool public String getInstructions(String input, String stride1, String stride2, String padding1, String padding2, String input_shape1, String input_shape2, String input_shape3, String input_shape4, String filter_shape1, String filter_shape2, String filter_shape3, String filter_shape4, String output) throws LopsException { - //only used for im2col and col2im StringBuilder sb = new StringBuilder(); - sb.append( getExecType() ); - - sb.append( OPERAND_DELIMITOR ); - sb.append( getOpcode() ); - sb.append( OPERAND_DELIMITOR ); + appendOpcode(sb); sb.append( getInputs().get(0).prepInputOperand(input)); - - //rows, cols, byrow - String[] inputX = new String[]{stride1, stride2, padding1, padding2, - input_shape1, input_shape2, input_shape3, input_shape4, - filter_shape1, filter_shape2, filter_shape3, filter_shape4}; - for( int i=1; i<=(inputX.length); i++ ) { - Lop ltmp = getInputs().get(i); - sb.append( OPERAND_DELIMITOR ); - sb.append( ltmp.prepScalarInputOperand(getExecType())); - } - - //output - sb.append( OPERAND_DELIMITOR ); - sb.append( this.prepOutputOperand(output)); - - //append degree of parallelism - if( getExecType()==ExecType.CP ) { - sb.append( OPERAND_DELIMITOR ); - sb.append( numThreads ); - } - + appendOperands(1, 13, output, sb); return sb.toString(); } + // Used by conv2d*, maxpool_bwd public String getInstructions(String input, String dout, String stride1, String stride2, String padding1, String padding2, String input_shape1, String input_shape2, String input_shape3, String input_shape4, String filter_shape1, String filter_shape2, String filter_shape3, String filter_shape4, String output) throws LopsException { - //only used for im2col and col2im StringBuilder sb = new StringBuilder(); + appendOpcode(sb); + sb.append( getInputs().get(0).prepInputOperand(input)); + sb.append( OPERAND_DELIMITOR ); + sb.append( getInputs().get(1).prepInputOperand(dout)); + appendOperands(2, 14, output, sb); + return sb.toString(); + } + + // Used by fused conv2d+bias_add + public String getInstructions(String input, String bias, String filter, String stride1, String stride2, String padding1, String padding2, + String input_shape1, String input_shape2, String input_shape3, String input_shape4, + String filter_shape1, String filter_shape2, String filter_shape3, String filter_shape4, + String output) throws LopsException { + StringBuilder sb = new StringBuilder(); + appendOpcode(sb); + sb.append( getInputs().get(0).prepInputOperand(input)); + sb.append( OPERAND_DELIMITOR ); + sb.append( getInputs().get(1).prepInputOperand(bias)); + sb.append( OPERAND_DELIMITOR ); + sb.append( getInputs().get(2).prepInputOperand(filter)); + appendOperands(3, 15, output, sb); + return sb.toString(); + } + + public void appendOpcode(StringBuilder sb) { sb.append( getExecType() ); - sb.append( OPERAND_DELIMITOR ); sb.append( getOpcode() ); sb.append( OPERAND_DELIMITOR ); - sb.append( getInputs().get(0).prepInputOperand(input)); - - sb.append( OPERAND_DELIMITOR ); - sb.append( getInputs().get(1).prepInputOperand(dout)); - - String[] inputX = new String[]{input, dout, stride1, stride2, padding1, padding2, - input_shape1, input_shape2, input_shape3, input_shape4, - filter_shape1, filter_shape2, filter_shape3, filter_shape4}; - for( int i=2; i < inputX.length; i++ ) { + } + + public void appendOperands(int startInputIndex, int endInputIndex, String output, StringBuilder sb) { + for( int i=startInputIndex; i < endInputIndex; i++ ) { Lop ltmp = getInputs().get(i); sb.append( OPERAND_DELIMITOR ); sb.append( ltmp.prepScalarInputOperand(getExecType())); @@ -237,8 +231,6 @@ public class ConvolutionTransform extends Lop sb.append( OPERAND_DELIMITOR ); sb.append( numThreads ); } - - return sb.toString(); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/lops/Lop.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/Lop.java b/src/main/java/org/apache/sysml/lops/Lop.java index ad25970..567b0be 100644 --- a/src/main/java/org/apache/sysml/lops/Lop.java +++ b/src/main/java/org/apache/sysml/lops/Lop.java @@ -647,7 +647,6 @@ public abstract class Lop throw new LopsException(this.printErrorLocation() + "Should never be invoked in Baseclass"); } - // For pooling backward public String getInstructions(String input, String dout, String stride1, String stride2, String padding1, String padding2, String input_shape1, String input_shape2, String input_shape3, String input_shape4, String filter_shape1, String filter_shape2, String filter_shape3, String filter_shape4, @@ -655,6 +654,13 @@ public abstract class Lop throw new LopsException(this.printErrorLocation() + "Should never be invoked in Baseclass"); } + public String getInstructions(String input, String bias, String dout, String stride1, String stride2, String padding1, String padding2, + String input_shape1, String input_shape2, String input_shape3, String input_shape4, + String filter_shape1, String filter_shape2, String filter_shape3, String filter_shape4, + String output) throws LopsException { + throw new LopsException(this.printErrorLocation() + "Should never be invoked in Baseclass"); + } + public String getInstructions(int input, int rowl, int rowu, int coll, int colu, int leftRowDim, int leftColDim, int output) throws LopsException { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/lops/compile/Dag.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/compile/Dag.java b/src/main/java/org/apache/sysml/lops/compile/Dag.java index 8f17b2c..898f4ec 100644 --- a/src/main/java/org/apache/sysml/lops/compile/Dag.java +++ b/src/main/java/org/apache/sysml/lops/compile/Dag.java @@ -1528,6 +1528,26 @@ public class Dag<N extends Lop> node.getInputs().get(13).getOutputParameters().getLabel(), node.getOutputParameters().getLabel()); } + else if (node.getInputs().size() == 15) { + // Used for fused conv2d_bias_add + inst_string = node.getInstructions( + node.getInputs().get(0).getOutputParameters().getLabel(), + node.getInputs().get(1).getOutputParameters().getLabel(), + node.getInputs().get(2).getOutputParameters().getLabel(), + node.getInputs().get(3).getOutputParameters().getLabel(), + node.getInputs().get(4).getOutputParameters().getLabel(), + node.getInputs().get(5).getOutputParameters().getLabel(), + node.getInputs().get(6).getOutputParameters().getLabel(), + node.getInputs().get(7).getOutputParameters().getLabel(), + node.getInputs().get(8).getOutputParameters().getLabel(), + node.getInputs().get(9).getOutputParameters().getLabel(), + node.getInputs().get(10).getOutputParameters().getLabel(), + node.getInputs().get(11).getOutputParameters().getLabel(), + node.getInputs().get(12).getOutputParameters().getLabel(), + node.getInputs().get(13).getOutputParameters().getLabel(), + node.getInputs().get(14).getOutputParameters().getLabel(), + node.getOutputParameters().getLabel()); + } else { throw new LopsException(node.printErrorLocation() + "Node with " + node.getInputs().size() + " inputs is not supported in CP yet! \n"); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java index 11d4661..f631527 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java @@ -225,6 +225,7 @@ public class CPInstructionParser extends InstructionParser String2CPInstructionType.put( "maxpooling" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "maxpooling_backward" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "conv2d" , CPINSTRUCTION_TYPE.Convolution); + String2CPInstructionType.put( "conv2d_bias_add" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "conv2d_backward_filter" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "conv2d_backward_data" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "bias_add" , CPINSTRUCTION_TYPE.Convolution); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java index 997c79b..ed0b548 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java @@ -21,7 +21,6 @@ package org.apache.sysml.runtime.instructions.cp; import java.util.ArrayList; import java.util.Arrays; - import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; @@ -36,7 +35,8 @@ import org.apache.sysml.runtime.util.ConvolutionUtils; public class ConvolutionCPInstruction extends UnaryCPInstruction { - private CPOperand _in2; // used for pooling backward + private CPOperand _in2; + private CPOperand _in3; private ArrayList<CPOperand> _input_shape; private ArrayList<CPOperand> _filter_shape; private ArrayList<CPOperand> _stride = new ArrayList<CPOperand>(); @@ -82,6 +82,22 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { _filter_shape = filter_shape; _numThreads = numThreads; } + + public ConvolutionCPInstruction(CPOperand in, CPOperand in2, CPOperand in3, CPOperand out, String opcode, + String istr, ArrayList<CPOperand> stride, + ArrayList<CPOperand> padding, ArrayList<CPOperand> input_shape, + ArrayList<CPOperand> filter_shape, int numThreads) { + super(new ReorgOperator(SwapIndex.getSwapIndexFnObject()), in, out, + opcode, istr); + _in2 = in2; + _in3 = in3; + _cptype = CPINSTRUCTION_TYPE.Convolution; + _stride = stride; + _padding = padding; + _input_shape = input_shape; + _filter_shape = filter_shape; + _numThreads = numThreads; + } public static ConvolutionCPInstruction parseInstruction(String str) throws DMLRuntimeException { @@ -152,7 +168,40 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { return new ConvolutionCPInstruction(in, in2, out, opcode, str, stride, padding, input_shape, filter_shape, k); - } + } + else if (opcode.equalsIgnoreCase("conv2d_bias_add")) { + InstructionUtils.checkNumFields(parts, 17); + // dout, stride1, stride2, padding1, padding2 + // input_shape1, input_shape2, input_shape3, input_shape4, + // filter_shape1, filter_shape2, filter_shape3, filter_shape4, k + in.split(parts[1]); + CPOperand in2 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN); + in2.split(parts[2]); + CPOperand in3 = new CPOperand("", ValueType.UNKNOWN, DataType.UNKNOWN); + in3.split(parts[3]); + out.split(parts[16]); + + ArrayList<CPOperand> stride = new ArrayList<CPOperand>(); + ArrayList<CPOperand> padding = new ArrayList<CPOperand>(); + ArrayList<CPOperand> input_shape = new ArrayList<CPOperand>(); + ArrayList<CPOperand> filter_shape = new ArrayList<CPOperand>(); + stride.add(new CPOperand(parts[4])); + stride.add(new CPOperand(parts[5])); + padding.add(new CPOperand(parts[6])); + padding.add(new CPOperand(parts[7])); + input_shape.add(new CPOperand(parts[8])); + input_shape.add(new CPOperand(parts[9])); + input_shape.add(new CPOperand(parts[10])); + input_shape.add(new CPOperand(parts[11])); + filter_shape.add(new CPOperand(parts[12])); + filter_shape.add(new CPOperand(parts[13])); + filter_shape.add(new CPOperand(parts[14])); + filter_shape.add(new CPOperand(parts[15])); + int k = Integer.parseInt(parts[17]); + + return new ConvolutionCPInstruction(in, in2, in3, out, opcode, str, stride, + padding, input_shape, filter_shape, k); + } else if (opcode.equalsIgnoreCase("bias_add") || opcode.equals("relu_backward")) { InstructionUtils.checkNumFields(parts, 4); in.split(parts[1]); @@ -194,7 +243,7 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { ec.setMatrixOutput(getOutputVariableName(), outputBlock); } - public void processBiasInstruction(ExecutionContext ec) throws DMLRuntimeException { + public void processBiasAddInstruction(ExecutionContext ec) throws DMLRuntimeException { MatrixBlock outputBlock = null; MatrixBlock input = ec.getMatrixInput(input1.getName()); MatrixBlock bias = ec.getMatrixInput(_in2.getName()); @@ -227,7 +276,7 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { public void processInstruction(ExecutionContext ec) throws DMLRuntimeException { if (instOpcode.equalsIgnoreCase("bias_add")) { - processBiasInstruction(ec); + processBiasAddInstruction(ec); return; } else if (instOpcode.equalsIgnoreCase("relu_backward")) { @@ -289,6 +338,21 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { } ec.releaseMatrixInput(_in2.getName()); } + else if (instOpcode.equalsIgnoreCase("conv2d_bias_add")) { + MatrixBlock filter = ec.getMatrixInput(_in3.getName()); + MatrixBlock bias = ec.getMatrixInput(_in2.getName()); + if((filter.isEmptyBlock() || matBlock.isEmptyBlock()) && bias.isEmptyBlock()) { + outputBlock = new MatrixBlock(N, K*P*Q, true, 0); + } + else { + outputBlock = getDenseOutputBlock(ec, N, K*P*Q); + if(!bias.isEmptyBlock()) + params.bias = bias; + LibMatrixDNN.conv2d(matBlock, filter, outputBlock, params); + } + ec.releaseMatrixInput(_in3.getName()); + ec.releaseMatrixInput(_in2.getName()); + } else if (instOpcode.equalsIgnoreCase("conv2d_backward_filter")) { MatrixBlock dout = ec.getMatrixInput(_in2.getName()); if(dout.isEmptyBlock() || matBlock.isEmptyBlock()) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java index cd37c06..9cd187c 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java @@ -34,6 +34,7 @@ public class ConvolutionParameters { MatrixBlock input1; MatrixBlock input2; MatrixBlock output; + public MatrixBlock bias; public int [] start_indexes_h, end_indexes_h, start_indexes_w, end_indexes_w; private int convertToInt(long val) throws DMLRuntimeException { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index f160bc7..1af2a1d 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -126,10 +126,6 @@ public class LibMatrixCUDA { dstTensorDesc = allocateTensorDescriptor(N, K, P, Q); filterDesc = allocateFilterDescriptor(K, C, R, S); - // Allocate data - // (Pointer) gpuCtx.prepare(image, true, true); - // (Pointer) gpuCtx.prepare(filter, true, true); - Pointer imagePointer = ((JCudaObject)image.getGPUObject()).jcudaDenseMatrixPtr; Pointer filterPointer = ((JCudaObject)filter.getGPUObject()).jcudaDenseMatrixPtr; Pointer dstPointer = ((JCudaObject)outputBlock.getGPUObject()).jcudaDenseMatrixPtr; @@ -245,10 +241,10 @@ public class LibMatrixCUDA { /** * This method computes the backpropagation errors for previous layer of relu operation * - * @param input - * @param dout - * @param outputBlock - * @throws DMLRuntimeException + * @param input input image + * @param dout next layer error propogation + * @param outputBlock output + * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void reluBackward(MatrixObject input, MatrixObject dout, MatrixObject outputBlock) throws DMLRuntimeException { if(isInSparseFormat(input)) { @@ -273,10 +269,10 @@ public class LibMatrixCUDA { * output = input + matrix(bias %*% ones, rows=1, cols=F*Hout*Wout) * This operation is often followed by conv2d and hence we have introduced bias_add(input, bias) built-in function * - * @param input - * @param bias - * @param outputBlock - * @throws DMLRuntimeException + * @param input input image + * @param bias bias + * @param outputBlock output + * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void biasAdd(MatrixObject input, MatrixObject bias, MatrixObject outputBlock) throws DMLRuntimeException { if(isInSparseFormat(input)) { @@ -320,7 +316,7 @@ public class LibMatrixCUDA { * @param stride_w stride width * @param P output activation height * @param Q output activation width - * @throws DMLRuntimeException + * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void conv2dBackwardFilter(MatrixObject image, MatrixObject dout, MatrixObject outputBlock, int N, int C, int H, int W, int K, int R, @@ -501,8 +497,8 @@ public class LibMatrixCUDA { * Hence, we compute only the upper triangular matrix and copy this partial * result down to lower triangular matrix once. * - * @param ret - * @throws DMLRuntimeException + * @param ret upper triangular matrix + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void copyUpperToLowerTriangle(MatrixObject ret) throws DMLRuntimeException { if(isInSparseFormat(ret)) { @@ -1185,7 +1181,7 @@ public class LibMatrixCUDA { * @param in {@link Pointer} to matrix in device memory * @param n size of array * @return the reduced value - * @throws DMLRuntimeException + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static double reduceAll(String kernelFunction, Pointer in, int n) throws DMLRuntimeException { int[] tmp = getKernelParamsForReduceAll(n); @@ -1218,7 +1214,7 @@ public class LibMatrixCUDA { * @param out {@link Pointer} to output matrix in device memory (size - rows * 1) * @param rows number of rows in input matrix * @param cols number of columns in input matrix - * @throws DMLRuntimeException + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void reduceRow(String kernelFunction, Pointer in, Pointer out, int rows, int cols) throws DMLRuntimeException { int[] tmp = getKernelParamsForReduceByRow(rows, cols); @@ -1236,7 +1232,7 @@ public class LibMatrixCUDA { * @param out {@link Pointer} to output matrix in device memory (size - 1 * cols) * @param rows number of rows in input matrix * @param cols number of columns in input matrix - * @throws DMLRuntimeException + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void reduceCol(String kernelFunction, Pointer in, Pointer out, int rows, int cols) throws DMLRuntimeException { int[] tmp = getKernelParamsForReduceByCol(rows, cols); @@ -1328,7 +1324,7 @@ public class LibMatrixCUDA { * @param stride_w stride width * @param P output activation height * @param Q output activation width - * @throws DMLRuntimeException + * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void conv2dBackwardData(MatrixObject filter, MatrixObject dout, MatrixObject output, int N, int C, int H, int W, int K, int R, @@ -1632,12 +1628,12 @@ public class LibMatrixCUDA { /** * Utility to launch binCellScalarOp kernel * - * @param ec - * @param in - * @param outputName - * @param isInputTransposed - * @param op - * @throws DMLRuntimeException + * @param ec execution context + * @param in input matrix + * @param outputName output variable name + * @param isInputTransposed true if input is transposed + * @param op operator + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void launchBinCellOpKernel(ExecutionContext ec, MatrixObject in, String outputName, boolean isInputTransposed, ScalarOperator op) throws DMLRuntimeException { @@ -1665,14 +1661,14 @@ public class LibMatrixCUDA { /** * Utility to launch binCellOp kernel * - * @param ec - * @param in1 - * @param in2 - * @param outputName - * @param isLeftTransposed - * @param isRightTransposed - * @param op - * @throws DMLRuntimeException + * @param ec execution context + * @param in1 left input matrix + * @param in2 right input matrix + * @param outputName output variable name + * @param isLeftTransposed true if left matrix is transposed + * @param isRightTransposed true if right matrix is transposed + * @param op operator + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void launchBinCellOpKernel(ExecutionContext ec, MatrixObject in1, MatrixObject in2, String outputName, boolean isLeftTransposed, boolean isRightTransposed, BinaryOperator op) throws DMLRuntimeException { @@ -1759,10 +1755,10 @@ public class LibMatrixCUDA { /** * Performs a deep device copy of input matrix * - * @param ec - * @param src - * @param outputName - * @throws DMLRuntimeException + * @param ec execution context + * @param src source matrix + * @param outputName destination variable name + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void deviceCopy(ExecutionContext ec, MatrixObject src, String outputName) throws DMLRuntimeException { if(isInSparseFormat(src)) { @@ -1821,11 +1817,11 @@ public class LibMatrixCUDA { /** * Performs a deep copy of input device double pointer corresponding to matrix * - * @param src - * @param dest - * @param rlen - * @param clen - * @throws DMLRuntimeException + * @param src source matrix + * @param dest destination matrix + * @param rlen number of rows + * @param clen number of columns + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void deviceCopy(Pointer src, Pointer dest, int rlen, int clen) throws DMLRuntimeException { kernels.launchKernel("dense_matrix_copy", @@ -1933,15 +1929,15 @@ public class LibMatrixCUDA { * C = alpha* op( A ) + beta* op ( B ) * where op = transpose or not (specified by isLeftTransposed and isRightTransposed). * - * @param ec - * @param in1 - * @param in2 - * @param outputName - * @param isLeftTransposed - * @param isRightTransposed - * @param alpha - * @param beta - * @throws DMLRuntimeException + * @param ec execution context + * @param in1 left input matrix + * @param in2 right input matrix + * @param outputName output variable name + * @param isLeftTransposed true if left matrix is transposed + * @param isRightTransposed true if right matrix is transposed + * @param alpha alpha + * @param beta beta + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void dgeam(ExecutionContext ec, MatrixObject in1, MatrixObject in2, String outputName, boolean isLeftTransposed, boolean isRightTransposed, double alpha, double beta) throws DMLRuntimeException { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java index 63571c3..b3198ab 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java @@ -132,8 +132,8 @@ public class LibMatrixDNN { * @param filter filter used in conv2d * @param dout errors from next layer * @param outputBlock output errors - * @param params - * @throws DMLRuntimeException + * @param params convolution parameters + * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void conv2dBackwardData(MatrixBlock filter, MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException { params.input1 = filter; @@ -162,11 +162,11 @@ public class LibMatrixDNN { /** * This method computes the backpropogation errors for filter of convolution operation * - * @param image input image + * @param input input image * @param dout errors from next layer * @param outputBlock output errors - * @param params - * @throws DMLRuntimeException + * @param params convolution parameters + * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void conv2dBackwardFilter(MatrixBlock input, MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException { params.input1 = input; @@ -194,9 +194,9 @@ public class LibMatrixDNN { /** * Performs the operation: ret += elem - * @param ret - * @param elem - * @throws DMLRuntimeException + * @param ret left and output matrix + * @param elem right matrix + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void elementWiseInPlaceAddition(MatrixBlock ret, MatrixBlock elem) throws DMLRuntimeException { if(ret.getNumRows() != elem.getNumRows() || ret.getNumColumns() != elem.getNumColumns()) { @@ -225,9 +225,10 @@ public class LibMatrixDNN { /** * Performs the operation: ret += t(elem) - * @param ret - * @param elem - * @throws DMLRuntimeException + * @param ret left and output matrix + * @param elem right untransposed matrix + * @param params convolution parameters + * @throws DMLRuntimeException if DMLRuntimeException occurs */ private static void elementWiseInPlaceTransposedAddition(MatrixBlock ret, MatrixBlock elem) throws DMLRuntimeException { if(ret.getNumRows() != elem.getNumColumns() || ret.getNumColumns() != elem.getNumRows()) { @@ -376,11 +377,11 @@ public class LibMatrixDNN { /** * This method computes the backpropogation errors for previous layer of maxpooling operation * - * @param input - * @param dout - * @param outputBlock - * @param params - * @throws DMLRuntimeException + * @param input input matrix + * @param dout dout matrix + * @param outputBlock output matrix + * @param params convolution parameters + * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void maxpoolingBackward(MatrixBlock input, MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params) throws DMLRuntimeException { params.input1 = input; @@ -594,11 +595,11 @@ public class LibMatrixDNN { /** * This method computes the backpropagation errors for previous layer of relu operation * - * @param input - * @param dout - * @param outputBlock - * @param numThreads - * @throws DMLRuntimeException + * @param input input matrix + * @param dout errors from next layer + * @param outputBlock output matrix + * @param numThreads number of threads + * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void reluBackward(MatrixBlock input, MatrixBlock dout, MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException { int N = input.getNumRows(); @@ -668,11 +669,11 @@ public class LibMatrixDNN { * output = input + matrix(bias %*% ones, rows=1, cols=F*Hout*Wout) * This operation is often followed by conv2d and hence we have introduced bias_add(input, bias) built-in function * - * @param input - * @param bias - * @param outputBlock - * @param numThreads - * @throws DMLRuntimeException + * @param input input matrix + * @param bias bias matrix + * @param outputBlock output matrix + * @param numThreads number of threads + * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static void biasAdd(MatrixBlock input, MatrixBlock bias, MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException { int N = input.getNumRows(); @@ -1004,6 +1005,8 @@ public class LibMatrixDNN { for(int n = n1; n < n2; n++) doLoopedIm2ColConv2d(n, im2ColOutBlock, params); im2ColOutBlocks.add(im2ColOutBlock); + if(params.bias != null) + addBias(n1, n2, params); break; } case LoopedIm2ColConv2dBwdFilter: @@ -1032,6 +1035,37 @@ public class LibMatrixDNN { return null; } } + + private static void addBias(int n1, int n2, ConvolutionParameters params) { + int PQ = params.P*params.Q; + int K = params.K; + double [] outputArr = params.output.getDenseBlock(); + if(!params.bias.isInSparseFormat()) { + double [] biasArr = params.bias.getDenseBlock(); + int index = n1*K*PQ; + for(int n = n1; n < n2; n++) { + for(int k = 0; k < K; k++) { + for(int pq = 0; pq < PQ; pq++, index++) { + outputArr[index] += biasArr[k]; + } + } + } + } + else { + Iterator<IJV> iter = params.bias.getSparseBlockIterator(); + while(iter.hasNext()) { + IJV ijv = iter.next(); + int k = ijv.getI(); + double val = ijv.getV(); + for(int n = n1; n < n2; n++) { + int index = n*K*PQ + k*PQ; + for(int pq = 0; pq < PQ; pq++, index++) { + outputArr[index] += val; + } + } + } + } + } // Converts input: PQ X CRS matrix and writes to 1 X CHW private static void doCol2imOverSingleImage(int outputN, MatrixBlock input, ConvolutionParameters params) throws DMLRuntimeException {
