Repository: incubator-systemml
Updated Branches:
  refs/heads/master 0ff4f14b6 -> 7af36f80b


[HOTFIX] [SYSTEMML-540] Fixed javadoc errors and added fused CP conv2d + 
bias_add


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7af36f80
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7af36f80
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7af36f80

Branch: refs/heads/master
Commit: 7af36f80b7b2726d3411eb308592dcb3ea00ccc3
Parents: 0ff4f14
Author: Niketan Pansare <[email protected]>
Authored: Tue Jan 10 19:04:58 2017 -0800
Committer: Niketan Pansare <[email protected]>
Committed: Tue Jan 10 19:04:58 2017 -0800

----------------------------------------------------------------------
 .../org/apache/sysml/hops/ConvolutionOp.java    | 27 +++++-
 .../apache/sysml/lops/ConvolutionTransform.java | 82 ++++++++--------
 src/main/java/org/apache/sysml/lops/Lop.java    |  8 +-
 .../java/org/apache/sysml/lops/compile/Dag.java | 20 ++++
 .../instructions/CPInstructionParser.java       |  1 +
 .../cp/ConvolutionCPInstruction.java            | 74 ++++++++++++++-
 .../matrix/data/ConvolutionParameters.java      |  1 +
 .../runtime/matrix/data/LibMatrixCUDA.java      | 98 ++++++++++----------
 .../sysml/runtime/matrix/data/LibMatrixDNN.java | 86 +++++++++++------
 9 files changed, 264 insertions(+), 133 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java 
b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
index 3f9ca7e..9f67968 100644
--- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
+++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
@@ -137,14 +137,27 @@ public class ConvolutionOp extends Hop  implements 
MultiThreadedHop
                        throw new HopsException("Incorrect number of inputs for 
" + op.name());
                }
                
-               Lop in = null;
+               Lop in = null; Lop in2 = null;
                OperationTypes lopOp = HopsConv2Lops.get(op);
                int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
+               ArrayList<Hop> inputs1 = inputs;
                if(op == ConvOp.MAX_POOLING && et == ExecType.CP && 
inputs.get(0) instanceof UnaryOp
                                && ((UnaryOp) inputs.get(0)).getOp() == 
OpOp1.SELP) {
                        in = inputs.get(0).getInput().get(0).constructLops();
                        lopOp = OperationTypes.RELU_MAX_POOLING;
                }
+               else if(op == ConvOp.BIAS_ADD && et == ExecType.CP && 
inputs.get(0) instanceof ConvolutionOp
+                               && ((ConvolutionOp) inputs.get(0)).getOp() == 
ConvOp.DIRECT_CONV2D) {
+                       lopOp = OperationTypes.DIRECT_CONV2D_BIAS_ADD;
+                       
+                       // the first lop is image 
+                       in = inputs.get(0).getInput().get(0).constructLops();
+                       // the second lop is bias
+                       in2 = inputs.get(1).constructLops();
+                       
+                       // Use the inputs from conv2d rather than bias_add
+                       inputs1 = inputs.get(0).getInput();
+               }
                else {
                        in = inputs.get(0).constructLops();
                }
@@ -153,15 +166,19 @@ public class ConvolutionOp extends Hop  implements 
MultiThreadedHop
                setLineNumbers(transform1);
                in.addOutput(transform1);
                
+               if(in2 != null) {
+                       transform1.addInput(in2);
+                       in2.addOutput(transform1);
+               }
+               
                // stride1, stride2, padding1, padding2  
                // input_shape1, input_shape2, input_shape3, input_shape4, 
                // filter_shape1, filter_shape2, filter_shape3, filter_shape4
-               for( int i=1; i < inputs.size(); i++ )
+               for( int i=1; i < inputs1.size(); i++ )
                {
-                       Lop ltmp = inputs.get(i).constructLops();
+                       Lop ltmp = inputs1.get(i).constructLops();
                        transform1.addInput(ltmp);
-                       //if(i == 1 && expectedNumInputs == 14)
-                               ltmp.addOutput(transform1);
+                       ltmp.addOutput(transform1);
                }
                transform1.setLevel(); //force order of added lops
                return transform1;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java 
b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java
index 6f2a20e..558deb3 100644
--- a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java
+++ b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java
@@ -32,7 +32,7 @@ public class ConvolutionTransform extends Lop
        public enum OperationTypes {
                MAX_POOLING, MAX_POOLING_BACKWARD, RELU_MAX_POOLING, 
RELU_BACKWARD,
                DIRECT_CONV2D, DIRECT_CONV2D_BACKWARD_FILTER, 
DIRECT_CONV2D_BACKWARD_DATA,
-               BIAS_ADD
+               BIAS_ADD, DIRECT_CONV2D_BIAS_ADD
        };
        
        private OperationTypes operation = null;
@@ -121,6 +121,9 @@ public class ConvolutionTransform extends Lop
                case DIRECT_CONV2D:
                        return "conv2d";
                
+               case DIRECT_CONV2D_BIAS_ADD:
+                       return "conv2d_bias_add";
+               
                case BIAS_ADD:
                        return "bias_add";
                        
@@ -163,66 +166,57 @@ public class ConvolutionTransform extends Lop
                }
        }
        
-       //CP instructions
-       // stride1, stride2, padding1, padding2  
-       // input_shape1, input_shape2, input_shape3, input_shape4, 
-       // filter_shape1, filter_shape2, filter_shape3, filter_shape4,
+       // Used by maxpool
        public String getInstructions(String input, String stride1, String 
stride2, String padding1, String padding2, 
                        String input_shape1, String input_shape2, String 
input_shape3, String input_shape4,
                        String filter_shape1, String filter_shape2, String 
filter_shape3, String filter_shape4,
                        String output) throws LopsException {
-               //only used for im2col and col2im
                StringBuilder sb = new StringBuilder();
-               sb.append( getExecType() );
-               
-               sb.append( OPERAND_DELIMITOR );
-               sb.append( getOpcode() );
-               sb.append( OPERAND_DELIMITOR );
+               appendOpcode(sb);
                sb.append( getInputs().get(0).prepInputOperand(input));
-               
-               //rows, cols, byrow
-               String[] inputX = new String[]{stride1, stride2, padding1, 
padding2, 
-                        input_shape1, input_shape2, input_shape3, input_shape4,
-                        filter_shape1, filter_shape2, filter_shape3, 
filter_shape4};
-               for( int i=1; i<=(inputX.length); i++ ) {
-                       Lop ltmp = getInputs().get(i);
-                       sb.append( OPERAND_DELIMITOR );
-                       sb.append( ltmp.prepScalarInputOperand(getExecType()));
-               }
-               
-               //output
-               sb.append( OPERAND_DELIMITOR );
-               sb.append( this.prepOutputOperand(output));
-               
-               //append degree of parallelism
-               if( getExecType()==ExecType.CP ) {
-                       sb.append( OPERAND_DELIMITOR );
-                       sb.append( numThreads );
-               }
-               
+               appendOperands(1, 13, output, sb);
                return sb.toString();
        }
        
+       // Used by conv2d*, maxpool_bwd
        public String getInstructions(String input, String dout, String 
stride1, String stride2, String padding1, String padding2, 
                        String input_shape1, String input_shape2, String 
input_shape3, String input_shape4,
                        String filter_shape1, String filter_shape2, String 
filter_shape3, String filter_shape4,
                        String output) throws LopsException {
-               //only used for im2col and col2im
                StringBuilder sb = new StringBuilder();
+               appendOpcode(sb);
+               sb.append( getInputs().get(0).prepInputOperand(input));
+               sb.append( OPERAND_DELIMITOR );
+               sb.append( getInputs().get(1).prepInputOperand(dout));
+               appendOperands(2, 14, output, sb);
+               return sb.toString();
+       }
+       
+       // Used by fused conv2d+bias_add
+       public String getInstructions(String input, String bias, String filter, 
String stride1, String stride2, String padding1, String padding2, 
+                       String input_shape1, String input_shape2, String 
input_shape3, String input_shape4,
+                       String filter_shape1, String filter_shape2, String 
filter_shape3, String filter_shape4,
+                       String output) throws LopsException {
+               StringBuilder sb = new StringBuilder();
+               appendOpcode(sb);
+               sb.append( getInputs().get(0).prepInputOperand(input));
+               sb.append( OPERAND_DELIMITOR );
+               sb.append( getInputs().get(1).prepInputOperand(bias));
+               sb.append( OPERAND_DELIMITOR );
+               sb.append( getInputs().get(2).prepInputOperand(filter));
+               appendOperands(3, 15, output, sb);
+               return sb.toString();
+       }
+       
+       public void appendOpcode(StringBuilder sb) {
                sb.append( getExecType() );
-               
                sb.append( OPERAND_DELIMITOR );
                sb.append( getOpcode() );
                sb.append( OPERAND_DELIMITOR );
-               sb.append( getInputs().get(0).prepInputOperand(input));
-               
-               sb.append( OPERAND_DELIMITOR );
-               sb.append( getInputs().get(1).prepInputOperand(dout));
-               
-               String[] inputX = new String[]{input, dout, stride1, stride2, 
padding1, padding2, 
-                        input_shape1, input_shape2, input_shape3, input_shape4,
-                        filter_shape1, filter_shape2, filter_shape3, 
filter_shape4};
-               for( int i=2; i < inputX.length; i++ ) {
+       }
+       
+       public void appendOperands(int startInputIndex, int endInputIndex, 
String output, StringBuilder sb) {
+               for( int i=startInputIndex; i < endInputIndex; i++ ) {
                        Lop ltmp = getInputs().get(i);
                        sb.append( OPERAND_DELIMITOR );
                        sb.append( ltmp.prepScalarInputOperand(getExecType()));
@@ -237,8 +231,6 @@ public class ConvolutionTransform extends Lop
                        sb.append( OPERAND_DELIMITOR );
                        sb.append( numThreads );
                }
-               
-               return sb.toString();
        }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/lops/Lop.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/Lop.java 
b/src/main/java/org/apache/sysml/lops/Lop.java
index ad25970..567b0be 100644
--- a/src/main/java/org/apache/sysml/lops/Lop.java
+++ b/src/main/java/org/apache/sysml/lops/Lop.java
@@ -647,7 +647,6 @@ public abstract class Lop
                throw new LopsException(this.printErrorLocation() + "Should 
never be invoked in Baseclass");
        }
        
-       // For pooling backward
        public String getInstructions(String input, String dout, String 
stride1, String stride2, String padding1, String padding2, 
                        String input_shape1, String input_shape2, String 
input_shape3, String input_shape4,
                        String filter_shape1, String filter_shape2, String 
filter_shape3, String filter_shape4,
@@ -655,6 +654,13 @@ public abstract class Lop
                throw new LopsException(this.printErrorLocation() + "Should 
never be invoked in Baseclass");
        }
        
+       public String getInstructions(String input, String bias, String dout, 
String stride1, String stride2, String padding1, String padding2, 
+                       String input_shape1, String input_shape2, String 
input_shape3, String input_shape4,
+                       String filter_shape1, String filter_shape2, String 
filter_shape3, String filter_shape4,
+                       String output) throws LopsException {
+               throw new LopsException(this.printErrorLocation() + "Should 
never be invoked in Baseclass");
+       }
+       
        public String getInstructions(int input, int rowl, int rowu,
                        int coll, int colu, int leftRowDim,
                        int leftColDim, int output) throws LopsException {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/lops/compile/Dag.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/compile/Dag.java 
b/src/main/java/org/apache/sysml/lops/compile/Dag.java
index 8f17b2c..898f4ec 100644
--- a/src/main/java/org/apache/sysml/lops/compile/Dag.java
+++ b/src/main/java/org/apache/sysml/lops/compile/Dag.java
@@ -1528,6 +1528,26 @@ public class Dag<N extends Lop>
                                                                
node.getInputs().get(13).getOutputParameters().getLabel(),
                                                                
node.getOutputParameters().getLabel());
                                        }
+                                       else if (node.getInputs().size() == 15) 
{
+                                                // Used for fused 
conv2d_bias_add
+                                                inst_string = 
node.getInstructions(
+                                                               
node.getInputs().get(0).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(1).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(2).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(3).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(4).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(5).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(6).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(7).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(8).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(9).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(10).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(11).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(12).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(13).getOutputParameters().getLabel(),
+                                                               
node.getInputs().get(14).getOutputParameters().getLabel(),
+                                                               
node.getOutputParameters().getLabel());
+                                       }
                                        else {
                                                throw new 
LopsException(node.printErrorLocation() + "Node with " + 
node.getInputs().size() + " inputs is not supported in CP yet! \n");
                                        }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java 
b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
index 11d4661..f631527 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
@@ -225,6 +225,7 @@ public class CPInstructionParser extends InstructionParser
                String2CPInstructionType.put( "maxpooling"      , 
CPINSTRUCTION_TYPE.Convolution);
                String2CPInstructionType.put( "maxpooling_backward"      , 
CPINSTRUCTION_TYPE.Convolution);
                String2CPInstructionType.put( "conv2d"      , 
CPINSTRUCTION_TYPE.Convolution);
+               String2CPInstructionType.put( "conv2d_bias_add"      , 
CPINSTRUCTION_TYPE.Convolution);
                String2CPInstructionType.put( "conv2d_backward_filter"      , 
CPINSTRUCTION_TYPE.Convolution);
                String2CPInstructionType.put( "conv2d_backward_data"      , 
CPINSTRUCTION_TYPE.Convolution);
                String2CPInstructionType.put( "bias_add"      , 
CPINSTRUCTION_TYPE.Convolution);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
index 997c79b..ed0b548 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java
@@ -21,7 +21,6 @@ package org.apache.sysml.runtime.instructions.cp;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.DMLRuntimeException;
@@ -36,7 +35,8 @@ import org.apache.sysml.runtime.util.ConvolutionUtils;
 
 public class ConvolutionCPInstruction extends UnaryCPInstruction {
        
-       private CPOperand _in2; // used for pooling backward
+       private CPOperand _in2;
+       private CPOperand _in3; 
        private ArrayList<CPOperand> _input_shape;
        private ArrayList<CPOperand> _filter_shape;
        private ArrayList<CPOperand> _stride = new ArrayList<CPOperand>();
@@ -82,6 +82,22 @@ public class ConvolutionCPInstruction extends 
UnaryCPInstruction {
                _filter_shape = filter_shape;
                _numThreads = numThreads;
        }
+       
+       public ConvolutionCPInstruction(CPOperand in, CPOperand in2, CPOperand 
in3, CPOperand out, String opcode,
+                       String istr, ArrayList<CPOperand> stride,
+                       ArrayList<CPOperand> padding, ArrayList<CPOperand> 
input_shape,
+                       ArrayList<CPOperand> filter_shape, int numThreads) {
+               super(new ReorgOperator(SwapIndex.getSwapIndexFnObject()), in, 
out,
+                               opcode, istr);
+               _in2 = in2;
+               _in3 = in3;
+               _cptype = CPINSTRUCTION_TYPE.Convolution;
+               _stride = stride;
+               _padding = padding;
+               _input_shape = input_shape;
+               _filter_shape = filter_shape;
+               _numThreads = numThreads;
+       }
 
        public static ConvolutionCPInstruction parseInstruction(String str)
                        throws DMLRuntimeException {
@@ -152,7 +168,40 @@ public class ConvolutionCPInstruction extends 
UnaryCPInstruction {
 
                        return new ConvolutionCPInstruction(in, in2, out, 
opcode, str, stride,
                                        padding, input_shape, filter_shape, k);
-               } 
+               }
+               else if (opcode.equalsIgnoreCase("conv2d_bias_add")) {
+                       InstructionUtils.checkNumFields(parts, 17);
+                       // dout, stride1, stride2, padding1, padding2
+                       // input_shape1, input_shape2, input_shape3, 
input_shape4,
+                       // filter_shape1, filter_shape2, filter_shape3, 
filter_shape4, k
+                       in.split(parts[1]);
+                       CPOperand in2 = new CPOperand("", ValueType.UNKNOWN, 
DataType.UNKNOWN);
+                       in2.split(parts[2]);
+                       CPOperand in3 = new CPOperand("", ValueType.UNKNOWN, 
DataType.UNKNOWN);
+                       in3.split(parts[3]);
+                       out.split(parts[16]);
+
+                       ArrayList<CPOperand> stride = new 
ArrayList<CPOperand>();
+                       ArrayList<CPOperand> padding = new 
ArrayList<CPOperand>();
+                       ArrayList<CPOperand> input_shape = new 
ArrayList<CPOperand>();
+                       ArrayList<CPOperand> filter_shape = new 
ArrayList<CPOperand>();
+                       stride.add(new CPOperand(parts[4]));
+                       stride.add(new CPOperand(parts[5]));
+                       padding.add(new CPOperand(parts[6]));
+                       padding.add(new CPOperand(parts[7]));
+                       input_shape.add(new CPOperand(parts[8]));
+                       input_shape.add(new CPOperand(parts[9]));
+                       input_shape.add(new CPOperand(parts[10]));
+                       input_shape.add(new CPOperand(parts[11]));
+                       filter_shape.add(new CPOperand(parts[12]));
+                       filter_shape.add(new CPOperand(parts[13]));
+                       filter_shape.add(new CPOperand(parts[14]));
+                       filter_shape.add(new CPOperand(parts[15]));
+                       int k = Integer.parseInt(parts[17]);
+
+                       return new ConvolutionCPInstruction(in, in2, in3, out, 
opcode, str, stride,
+                                       padding, input_shape, filter_shape, k);
+               }
                else if (opcode.equalsIgnoreCase("bias_add") || 
opcode.equals("relu_backward")) {
                        InstructionUtils.checkNumFields(parts, 4);
                        in.split(parts[1]);
@@ -194,7 +243,7 @@ public class ConvolutionCPInstruction extends 
UnaryCPInstruction {
                ec.setMatrixOutput(getOutputVariableName(), outputBlock);
        }
        
-       public void processBiasInstruction(ExecutionContext ec) throws 
DMLRuntimeException {
+       public void processBiasAddInstruction(ExecutionContext ec) throws 
DMLRuntimeException {
                MatrixBlock outputBlock = null;
                MatrixBlock input = ec.getMatrixInput(input1.getName());
                MatrixBlock bias = ec.getMatrixInput(_in2.getName());
@@ -227,7 +276,7 @@ public class ConvolutionCPInstruction extends 
UnaryCPInstruction {
        public void processInstruction(ExecutionContext ec)
                        throws DMLRuntimeException {
                if (instOpcode.equalsIgnoreCase("bias_add")) {
-                       processBiasInstruction(ec);
+                       processBiasAddInstruction(ec);
                        return;
                }
                else if (instOpcode.equalsIgnoreCase("relu_backward")) {
@@ -289,6 +338,21 @@ public class ConvolutionCPInstruction extends 
UnaryCPInstruction {
                        }
                        ec.releaseMatrixInput(_in2.getName());
                }
+               else if (instOpcode.equalsIgnoreCase("conv2d_bias_add")) {
+                       MatrixBlock filter = ec.getMatrixInput(_in3.getName());
+                       MatrixBlock bias = ec.getMatrixInput(_in2.getName());
+                       if((filter.isEmptyBlock() || matBlock.isEmptyBlock()) 
&& bias.isEmptyBlock()) {
+                               outputBlock = new MatrixBlock(N, K*P*Q, true, 
0);
+                       }
+                       else {
+                               outputBlock = getDenseOutputBlock(ec, N, K*P*Q);
+                               if(!bias.isEmptyBlock())
+                                       params.bias = bias;
+                               LibMatrixDNN.conv2d(matBlock, filter, 
outputBlock, params);
+                       }
+                       ec.releaseMatrixInput(_in3.getName());
+                       ec.releaseMatrixInput(_in2.getName());
+               }
                else if (instOpcode.equalsIgnoreCase("conv2d_backward_filter")) 
{
                        MatrixBlock dout = ec.getMatrixInput(_in2.getName());
                        if(dout.isEmptyBlock() || matBlock.isEmptyBlock()) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java
index cd37c06..9cd187c 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java
@@ -34,6 +34,7 @@ public class ConvolutionParameters {
        
        MatrixBlock input1; MatrixBlock input2; MatrixBlock output;
        
+       public MatrixBlock bias;
        public int [] start_indexes_h, end_indexes_h, start_indexes_w, 
end_indexes_w; 
        
        private int convertToInt(long val) throws DMLRuntimeException {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index f160bc7..1af2a1d 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -126,10 +126,6 @@ public class LibMatrixCUDA {
                        dstTensorDesc = allocateTensorDescriptor(N, K, P, Q);
                        filterDesc = allocateFilterDescriptor(K, C, R, S);
 
-                       // Allocate data
-                       // (Pointer) gpuCtx.prepare(image, true, true);
-                       // (Pointer) gpuCtx.prepare(filter, true, true);
-
                        Pointer imagePointer = 
((JCudaObject)image.getGPUObject()).jcudaDenseMatrixPtr;
                        Pointer filterPointer = 
((JCudaObject)filter.getGPUObject()).jcudaDenseMatrixPtr;
                        Pointer dstPointer = 
((JCudaObject)outputBlock.getGPUObject()).jcudaDenseMatrixPtr;
@@ -245,10 +241,10 @@ public class LibMatrixCUDA {
        /**
         * This method computes the backpropagation errors for previous layer 
of relu operation
         * 
-        * @param input
-        * @param dout
-        * @param outputBlock
-        * @throws DMLRuntimeException
+        * @param input input image
+        * @param dout  next layer error propogation
+        * @param outputBlock output
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void reluBackward(MatrixObject input, MatrixObject dout, 
MatrixObject outputBlock) throws DMLRuntimeException {
                if(isInSparseFormat(input)) {
@@ -273,10 +269,10 @@ public class LibMatrixCUDA {
         * output = input + matrix(bias %*% ones, rows=1, cols=F*Hout*Wout)
         * This operation is often followed by conv2d and hence we have 
introduced bias_add(input, bias) built-in function
         * 
-        * @param input
-        * @param bias
-        * @param outputBlock
-        * @throws DMLRuntimeException
+        * @param input input image
+        * @param bias bias
+        * @param outputBlock output
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void biasAdd(MatrixObject input, MatrixObject bias, 
MatrixObject outputBlock) throws DMLRuntimeException {
                if(isInSparseFormat(input)) {
@@ -320,7 +316,7 @@ public class LibMatrixCUDA {
         * @param stride_w stride width
         * @param P output activation height
         * @param Q output activation width
-        * @throws DMLRuntimeException
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void conv2dBackwardFilter(MatrixObject image, 
MatrixObject dout,
                        MatrixObject outputBlock, int N, int C, int H, int W, 
int K, int R,
@@ -501,8 +497,8 @@ public class LibMatrixCUDA {
         * Hence, we compute only the upper triangular matrix and copy this 
partial
         * result down to lower triangular matrix once.
         *
-        * @param ret
-        * @throws DMLRuntimeException
+        * @param ret upper triangular matrix
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void copyUpperToLowerTriangle(MatrixObject ret) throws 
DMLRuntimeException {
                if(isInSparseFormat(ret)) {
@@ -1185,7 +1181,7 @@ public class LibMatrixCUDA {
         * @param in                                                    {@link 
Pointer} to matrix in device memory
         * @param n                                                             
size of array
         * @return      the reduced value
-        * @throws DMLRuntimeException
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static double reduceAll(String kernelFunction, Pointer in, int 
n) throws DMLRuntimeException {
                int[] tmp = getKernelParamsForReduceAll(n);
@@ -1218,7 +1214,7 @@ public class LibMatrixCUDA {
         * @param out                                                   {@link 
Pointer} to output matrix in device memory (size - rows * 1)
         * @param rows                                          number of rows 
in input matrix
         * @param cols                                          number of 
columns in input matrix
-        * @throws DMLRuntimeException
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void reduceRow(String kernelFunction, Pointer in, 
Pointer out, int rows, int cols) throws DMLRuntimeException {
                int[] tmp = getKernelParamsForReduceByRow(rows, cols);
@@ -1236,7 +1232,7 @@ public class LibMatrixCUDA {
         * @param out                                                   {@link 
Pointer} to output matrix in device memory (size - 1 * cols)
         * @param rows                                          number of rows 
in input matrix
         * @param cols                                          number of 
columns in input matrix
-        * @throws DMLRuntimeException
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void reduceCol(String kernelFunction, Pointer in, 
Pointer out, int rows, int cols) throws DMLRuntimeException {
                int[] tmp = getKernelParamsForReduceByCol(rows, cols);
@@ -1328,7 +1324,7 @@ public class LibMatrixCUDA {
         * @param stride_w stride width
         * @param P output activation height
         * @param Q output activation width
-        * @throws DMLRuntimeException
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void conv2dBackwardData(MatrixObject filter, MatrixObject 
dout,
                        MatrixObject output, int N, int C, int H, int W, int K, 
int R,
@@ -1632,12 +1628,12 @@ public class LibMatrixCUDA {
        /**
         * Utility to launch binCellScalarOp kernel
         *
-        * @param ec
-        * @param in
-        * @param outputName
-        * @param isInputTransposed
-        * @param op
-        * @throws DMLRuntimeException
+        * @param ec execution context
+        * @param in input matrix
+        * @param outputName output variable name
+        * @param isInputTransposed true if input is transposed
+        * @param op operator
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void launchBinCellOpKernel(ExecutionContext ec, 
MatrixObject in, String outputName, boolean isInputTransposed,
                        ScalarOperator op) throws DMLRuntimeException {
@@ -1665,14 +1661,14 @@ public class LibMatrixCUDA {
        /**
         * Utility to launch binCellOp kernel
         *
-        * @param ec
-        * @param in1
-        * @param in2
-        * @param outputName
-        * @param isLeftTransposed
-        * @param isRightTransposed
-        * @param op
-        * @throws DMLRuntimeException
+        * @param ec execution context
+        * @param in1 left input matrix
+        * @param in2 right input matrix
+        * @param outputName output variable name
+        * @param isLeftTransposed true if left matrix is transposed
+        * @param isRightTransposed true if right matrix is transposed
+        * @param op operator
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void launchBinCellOpKernel(ExecutionContext ec, 
MatrixObject in1, MatrixObject in2,
                        String outputName, boolean isLeftTransposed, boolean 
isRightTransposed, BinaryOperator op) throws DMLRuntimeException {
@@ -1759,10 +1755,10 @@ public class LibMatrixCUDA {
        /**
         * Performs a deep device copy of input matrix
         *
-        * @param ec
-        * @param src
-        * @param outputName
-        * @throws DMLRuntimeException
+        * @param ec execution context
+        * @param src source matrix
+        * @param outputName destination variable name
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void deviceCopy(ExecutionContext ec, MatrixObject src, 
String outputName) throws DMLRuntimeException {
                if(isInSparseFormat(src)) {
@@ -1821,11 +1817,11 @@ public class LibMatrixCUDA {
        /**
         * Performs a deep copy of input device double pointer corresponding to 
matrix
         *
-        * @param src
-        * @param dest
-        * @param rlen
-        * @param clen
-        * @throws DMLRuntimeException
+        * @param src source matrix
+        * @param dest destination matrix
+        * @param rlen number of rows
+        * @param clen number of columns
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void deviceCopy(Pointer src, Pointer dest, int rlen, int 
clen) throws DMLRuntimeException {
                kernels.launchKernel("dense_matrix_copy",
@@ -1933,15 +1929,15 @@ public class LibMatrixCUDA {
         * C = alpha* op( A ) + beta* op ( B )
         * where op = transpose or not (specified by isLeftTransposed and 
isRightTransposed).
         *
-        * @param ec
-        * @param in1
-        * @param in2
-        * @param outputName
-        * @param isLeftTransposed
-        * @param isRightTransposed
-        * @param alpha
-        * @param beta
-        * @throws DMLRuntimeException
+        * @param ec execution context
+        * @param in1 left input matrix
+        * @param in2 right input matrix
+        * @param outputName output variable name
+        * @param isLeftTransposed true if left matrix is transposed
+        * @param isRightTransposed true if right matrix is transposed
+        * @param alpha alpha
+        * @param beta beta
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void dgeam(ExecutionContext ec, MatrixObject in1, 
MatrixObject in2, String outputName,
                        boolean isLeftTransposed, boolean isRightTransposed, 
double alpha, double beta) throws DMLRuntimeException {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7af36f80/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 63571c3..b3198ab 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -132,8 +132,8 @@ public class LibMatrixDNN {
         * @param filter filter used in conv2d 
         * @param dout errors from next layer
         * @param outputBlock  output errors
-        * @param params
-        * @throws DMLRuntimeException
+        * @param params convolution parameters
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void conv2dBackwardData(MatrixBlock filter, MatrixBlock 
dout, MatrixBlock outputBlock, ConvolutionParameters params) throws 
DMLRuntimeException {
                params.input1 = filter;
@@ -162,11 +162,11 @@ public class LibMatrixDNN {
        /**
         * This method computes the backpropogation errors for filter of 
convolution operation
         * 
-        * @param image input image 
+        * @param input input image 
         * @param dout errors from next layer
         * @param outputBlock  output errors
-        * @param params 
-        * @throws DMLRuntimeException
+        * @param params convolution parameters
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void conv2dBackwardFilter(MatrixBlock input, MatrixBlock 
dout, MatrixBlock outputBlock, ConvolutionParameters params) throws 
DMLRuntimeException {
                params.input1 = input;
@@ -194,9 +194,9 @@ public class LibMatrixDNN {
        
        /**
         * Performs the operation: ret += elem
-        * @param ret
-        * @param elem
-        * @throws DMLRuntimeException
+        * @param ret left and output matrix
+        * @param elem right matrix
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void elementWiseInPlaceAddition(MatrixBlock ret, 
MatrixBlock elem) throws DMLRuntimeException {
                if(ret.getNumRows() != elem.getNumRows() || ret.getNumColumns() 
!= elem.getNumColumns()) {
@@ -225,9 +225,10 @@ public class LibMatrixDNN {
        
        /**
         * Performs the operation: ret += t(elem)
-        * @param ret
-        * @param elem
-        * @throws DMLRuntimeException
+        * @param ret left and output matrix
+        * @param elem right untransposed matrix
+        * @param params convolution parameters
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void elementWiseInPlaceTransposedAddition(MatrixBlock 
ret, MatrixBlock elem) throws DMLRuntimeException {
                if(ret.getNumRows() != elem.getNumColumns() || 
ret.getNumColumns() != elem.getNumRows()) {
@@ -376,11 +377,11 @@ public class LibMatrixDNN {
        /**
         * This method computes the backpropogation errors for previous layer 
of maxpooling operation
         * 
-        * @param input
-        * @param dout
-        * @param outputBlock
-        * @param params
-        * @throws DMLRuntimeException
+        * @param input input matrix
+        * @param dout dout matrix
+        * @param outputBlock output matrix
+        * @param params convolution parameters
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void maxpoolingBackward(MatrixBlock input, MatrixBlock 
dout, MatrixBlock outputBlock, ConvolutionParameters params) throws 
DMLRuntimeException {
                params.input1 = input;
@@ -594,11 +595,11 @@ public class LibMatrixDNN {
        /**
         * This method computes the backpropagation errors for previous layer 
of relu operation
         * 
-        * @param input
-        * @param dout
-        * @param outputBlock
-        * @param numThreads
-        * @throws DMLRuntimeException
+        * @param input input matrix
+        * @param dout errors from next layer
+        * @param outputBlock output matrix
+        * @param numThreads number of threads
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void reluBackward(MatrixBlock input, MatrixBlock dout, 
MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException {
                int N = input.getNumRows();
@@ -668,11 +669,11 @@ public class LibMatrixDNN {
         * output = input + matrix(bias %*% ones, rows=1, cols=F*Hout*Wout)
         * This operation is often followed by conv2d and hence we have 
introduced bias_add(input, bias) built-in function
         * 
-        * @param input
-        * @param bias
-        * @param outputBlock
-        * @param numThreads
-        * @throws DMLRuntimeException
+        * @param input input matrix
+        * @param bias bias matrix
+        * @param outputBlock output matrix
+        * @param numThreads number of threads
+        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void biasAdd(MatrixBlock input, MatrixBlock bias, 
MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException {
                int N = input.getNumRows();
@@ -1004,6 +1005,8 @@ public class LibMatrixDNN {
                                        for(int n = n1; n < n2; n++) 
                                                doLoopedIm2ColConv2d(n, 
im2ColOutBlock, params);
                                        im2ColOutBlocks.add(im2ColOutBlock);
+                                       if(params.bias != null)
+                                               addBias(n1, n2, params);
                                        break;
                                }
                                case LoopedIm2ColConv2dBwdFilter:
@@ -1032,6 +1035,37 @@ public class LibMatrixDNN {
                        return null;
                }
        }
+       
+       private static void addBias(int n1, int n2, ConvolutionParameters 
params) {
+               int PQ = params.P*params.Q;
+               int K = params.K;
+               double [] outputArr = params.output.getDenseBlock();
+               if(!params.bias.isInSparseFormat()) {
+                       double [] biasArr = params.bias.getDenseBlock();
+                       int index = n1*K*PQ;
+                       for(int n = n1; n < n2; n++) {
+                               for(int k = 0; k < K; k++) {
+                                       for(int pq = 0; pq < PQ; pq++, index++) 
{
+                                               outputArr[index] += biasArr[k];
+                                       }
+                               }
+                       }
+               }
+               else {
+                       Iterator<IJV> iter = 
params.bias.getSparseBlockIterator();
+                       while(iter.hasNext()) {
+                               IJV ijv = iter.next();
+                               int k = ijv.getI();
+                               double val = ijv.getV();
+                               for(int n = n1; n < n2; n++) {
+                                       int index = n*K*PQ + k*PQ;
+                                       for(int pq = 0; pq < PQ; pq++, index++) 
{
+                                               outputArr[index] += val;
+                                       }
+                               }
+                       }
+               }
+       }
                
        // Converts input: PQ X CRS matrix and writes to 1 X CHW
        private static void doCol2imOverSingleImage(int outputN, MatrixBlock 
input, ConvolutionParameters params) throws DMLRuntimeException {

Reply via email to