[SYSTEMML-2131] Codegen support for DNN biasAdd/biasMult in row/cell

This patch adds codegen support for the DNN operations bias_add and
bias_multiply by incorporating these operations into the row and cell
templates. Furthermore, this includes the necessary codegen templates
and runtime primitives. The vector runtime reuses the existing DNN
operations per row.

Furthermore, this includes various refactorings to allow for a more
seamless compiler integration, and to make the DNN biasAdd and biasMult
more efficient.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/305eb28c
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/305eb28c
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/305eb28c

Branch: refs/heads/master
Commit: 305eb28c4353e6f6dfb1349b34844bd1012effe9
Parents: 9fa5a09
Author: Matthias Boehm <[email protected]>
Authored: Sat Jun 16 16:34:32 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sat Jun 16 16:34:32 2018 -0700

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/hops/DnnOp.java  | 20 ++++----
 src/main/java/org/apache/sysml/hops/Hop.java    |  6 +--
 .../sysml/hops/codegen/cplan/CNodeBinary.java   | 12 ++++-
 .../sysml/hops/codegen/cplan/CNodeTernary.java  | 13 ++++-
 .../opt/PlanSelectionFuseCostBasedV2.java       | 11 ++++
 .../hops/codegen/template/TemplateCell.java     | 29 +++++++++--
 .../hops/codegen/template/TemplateRow.java      | 19 ++++++-
 .../hops/codegen/template/TemplateUtils.java    |  4 +-
 .../sysml/parser/BuiltinFunctionExpression.java |  8 +--
 .../org/apache/sysml/parser/DMLTranslator.java  |  4 +-
 .../org/apache/sysml/parser/Expression.java     |  2 +-
 .../runtime/codegen/LibSpoofPrimitives.java     | 35 +++++++++++++
 .../sysml/runtime/matrix/data/LibMatrixDNN.java | 53 ++++++++------------
 .../functions/codegen/CellwiseTmplTest.java     | 34 ++++++++++++-
 .../functions/codegen/RowAggTmplTest.java       | 18 ++++++-
 .../scripts/functions/codegen/cellwisetmpl25.R  | 37 ++++++++++++++
 .../functions/codegen/cellwisetmpl25.dml        | 36 +++++++++++++
 .../scripts/functions/codegen/cellwisetmpl26.R  | 37 ++++++++++++++
 .../functions/codegen/cellwisetmpl26.dml        | 36 +++++++++++++
 .../scripts/functions/codegen/rowAggPattern43.R | 41 +++++++++++++++
 .../functions/codegen/rowAggPattern43.dml       | 38 ++++++++++++++
 21 files changed, 431 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/hops/DnnOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DnnOp.java 
b/src/main/java/org/apache/sysml/hops/DnnOp.java
index 26f8f8c..0a0e50a 100644
--- a/src/main/java/org/apache/sysml/hops/DnnOp.java
+++ b/src/main/java/org/apache/sysml/hops/DnnOp.java
@@ -121,8 +121,8 @@ public class DnnOp extends MultiThreadedHop
                        case CONV2D:
                        case CONV2D_BACKWARD_DATA:
                        case CONV2D_BACKWARD_FILTER:
-                       case BIAS_ADD:
-                       case BIAS_MULTIPLY:
+                       case BIASADD:
+                       case BIASMULT:
                        {       
                                if(et == ExecType.CP || et == ExecType.GPU) {
                                        setLops(constructDnnLops(et, inputs));
@@ -155,8 +155,8 @@ public class DnnOp extends MultiThreadedHop
                        case CONV2D_BACKWARD_FILTER:
                        case CONV2D_BACKWARD_DATA:
                                return 14;
-                       case BIAS_ADD:
-                       case BIAS_MULTIPLY:
+                       case BIASADD:
+                       case BIASMULT:
                                return 2;
                        default:
                                return 13;
@@ -248,7 +248,7 @@ public class DnnOp extends MultiThreadedHop
                        lhsInputLop = parentReLU.constructLops();
                        lopOp = OperationTypes.RELU_MAX_POOLING_BACKWARD;
                }
-               else if(OptimizerUtils.ALLOW_OPERATOR_FUSION && op == 
OpOpDnn.BIAS_ADD && isInputConv2d(inputs.get(0))) {
+               else if(OptimizerUtils.ALLOW_OPERATOR_FUSION && op == 
OpOpDnn.BIASADD && isInputConv2d(inputs.get(0))) {
                        lopOp = OperationTypes.CONV2D_BIAS_ADD;
                        
                        // the first lop is image 
@@ -321,7 +321,7 @@ public class DnnOp extends MultiThreadedHop
        @Override
        protected double computeOutputMemEstimate( long dim1, long dim2, long 
nnz )
        {               
-               if(getOp() == OpOpDnn.BIAS_MULTIPLY) {
+               if(getOp() == OpOpDnn.BIASMULT) {
                        // in non-gpu mode, the worst case size of bias 
multiply operation is same as that of input.
                        if(DMLScript.USE_ACCELERATOR) 
                                return 
OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, 1.0);
@@ -505,7 +505,7 @@ public class DnnOp extends MultiThreadedHop
                // [numRows, numCols, NNZ] 
                long[] ret = new long[3];
                
-               if(op == OpOpDnn.BIAS_ADD || op == OpOpDnn.BIAS_MULTIPLY) {
+               if(op == OpOpDnn.BIASADD || op == OpOpDnn.BIASMULT) {
                        MatrixCharacteristics[] mc = 
memo.getAllInputStats(getInput());
                        ret[0] = mc[0].rowsKnown() ? mc[0].getRows() : -1;
                        ret[1] = mc[0].colsKnown() ? mc[0].getCols() : -1;
@@ -639,7 +639,7 @@ public class DnnOp extends MultiThreadedHop
         * @return true if the given hop is BIAS_ADD
         */
        private static boolean isInputBiasAdd(Hop hop) {
-               return HopRewriteUtils.isDnn(hop, OpOpDnn.BIAS_ADD);
+               return HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD);
        }
        
        /**
@@ -708,7 +708,7 @@ public class DnnOp extends MultiThreadedHop
        @Override
        public void refreshSizeInformation()
        {
-               if(op == OpOpDnn.BIAS_ADD || op == OpOpDnn.BIAS_MULTIPLY) {
+               if(op == OpOpDnn.BIASADD || op == OpOpDnn.BIASMULT) {
                        Hop input1 = getInput().get(0);
                        setDim1(input1.getDim1());
                        setDim2(input1.getDim2());
@@ -807,7 +807,7 @@ public class DnnOp extends MultiThreadedHop
         * @return either -1 or value associated with the dimString
         */
        private long getDim(String dimString) {
-               if(op == OpOpDnn.BIAS_ADD || op == OpOpDnn.BIAS_MULTIPLY) {
+               if(op == OpOpDnn.BIASADD || op == OpOpDnn.BIASMULT) {
                        throw new RuntimeException("getDim method should not be 
invoked for bias_add and bias_multiply");
                }
                try {

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/hops/Hop.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/Hop.java 
b/src/main/java/org/apache/sysml/hops/Hop.java
index 53cfdfa..436d45f 100644
--- a/src/main/java/org/apache/sysml/hops/Hop.java
+++ b/src/main/java/org/apache/sysml/hops/Hop.java
@@ -1099,7 +1099,7 @@ public abstract class Hop implements ParseInfo
        public enum OpOpDnn {
                MAX_POOL, MAX_POOL_BACKWARD, AVG_POOL, AVG_POOL_BACKWARD,
                CONV2D, CONV2D_BACKWARD_FILTER, CONV2D_BACKWARD_DATA,
-               BIAS_ADD, BIAS_MULTIPLY
+               BIASADD, BIASMULT
        }
        
        public enum DataGenMethod {
@@ -1168,8 +1168,8 @@ public abstract class Hop implements ParseInfo
                HopsConv2Lops.put(OpOpDnn.AVG_POOL, 
org.apache.sysml.lops.DnnTransform.OperationTypes.AVG_POOL);
                HopsConv2Lops.put(OpOpDnn.AVG_POOL_BACKWARD, 
org.apache.sysml.lops.DnnTransform.OperationTypes.AVG_POOL_BACKWARD);
                HopsConv2Lops.put(OpOpDnn.CONV2D, 
org.apache.sysml.lops.DnnTransform.OperationTypes.CONV2D);
-               HopsConv2Lops.put(OpOpDnn.BIAS_ADD, 
org.apache.sysml.lops.DnnTransform.OperationTypes.BIAS_ADD);
-               HopsConv2Lops.put(OpOpDnn.BIAS_MULTIPLY, 
org.apache.sysml.lops.DnnTransform.OperationTypes.BIAS_MULTIPLY);
+               HopsConv2Lops.put(OpOpDnn.BIASADD, 
org.apache.sysml.lops.DnnTransform.OperationTypes.BIAS_ADD);
+               HopsConv2Lops.put(OpOpDnn.BIASMULT, 
org.apache.sysml.lops.DnnTransform.OperationTypes.BIAS_MULTIPLY);
                HopsConv2Lops.put(OpOpDnn.CONV2D_BACKWARD_FILTER, 
org.apache.sysml.lops.DnnTransform.OperationTypes.CONV2D_BACKWARD_FILTER);
                HopsConv2Lops.put(OpOpDnn.CONV2D_BACKWARD_DATA, 
org.apache.sysml.lops.DnnTransform.OperationTypes.CONV2D_BACKWARD_DATA);
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java 
b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
index 7ef21c1..a09f810 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
@@ -48,7 +48,8 @@ public class CNodeBinary extends CNode
                //vector-vector operations
                VECT_MULT, VECT_DIV, VECT_MINUS, VECT_PLUS, VECT_MIN, VECT_MAX, 
VECT_EQUAL, 
                VECT_NOTEQUAL, VECT_LESS, VECT_LESSEQUAL, VECT_GREATER, 
VECT_GREATEREQUAL,
-               VECT_XOR, VECT_BITWAND,
+               VECT_XOR, VECT_BITWAND, 
+               VECT_BIASADD, VECT_BIASMULT,
                //scalar-scalar operations
                MULT, DIV, PLUS, MINUS, MODULUS, INTDIV, 
                LESS, LESSEQUAL, GREATER, GREATEREQUAL, EQUAL,NOTEQUAL,
@@ -153,6 +154,8 @@ public class CNodeBinary extends CNode
                                case VECT_PLUS:
                                case VECT_XOR:
                                case VECT_BITWAND:
+                               case VECT_BIASADD:
+                               case VECT_BIASMULT:
                                case VECT_MIN:
                                case VECT_MAX:
                                case VECT_EQUAL:
@@ -244,7 +247,8 @@ public class CNodeBinary extends CNode
                                || this == VECT_EQUAL || this == VECT_NOTEQUAL
                                || this == VECT_LESS || this == VECT_LESSEQUAL
                                || this == VECT_GREATER || this == 
VECT_GREATEREQUAL
-                               || this == VECT_XOR || this == VECT_BITWAND;
+                               || this == VECT_XOR || this == VECT_BITWAND
+                               || this == VECT_BIASADD || this == 
VECT_BIASMULT;
                }
                public boolean isVectorMatrixPrimitive() {
                        return this == VECT_MATRIXMULT
@@ -394,6 +398,8 @@ public class CNodeBinary extends CNode
                        case VECT_GREATEREQUAL:        return "b(v2gte)";
                        case VECT_GREATER:             return "b(v2gt)";
                        case VECT_CBIND:               return "b(cbind)";
+                       case VECT_BIASADD:             return "b(vbias+)";
+                       case VECT_BIASMULT:            return "b(vbias*)";
                        case MULT:                     return "b(*)";
                        case DIV:                      return "b(/)";
                        case PLUS:                     return "b(+)";
@@ -486,6 +492,8 @@ public class CNodeBinary extends CNode
                        case VECT_LESSEQUAL:
                        case VECT_GREATER:
                        case VECT_GREATEREQUAL:
+                       case VECT_BIASADD:
+                       case VECT_BIASMULT:
                                boolean scalarVector = 
(_inputs.get(0).getDataType()==DataType.SCALAR);
                                _rows = _inputs.get(scalarVector ? 1 : 0)._rows;
                                _cols = _inputs.get(scalarVector ? 1 : 0)._cols;

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java 
b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java
index 61140b4..ac0a86d 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java
@@ -29,6 +29,7 @@ public class CNodeTernary extends CNode
 {
        public enum TernaryType {
                PLUS_MULT, MINUS_MULT,
+               BIASADD, BIASMULT,
                REPLACE, REPLACE_NAN, IFELSE,
                LOOKUP_RC1, LOOKUP_RVECT1;
                
@@ -43,7 +44,13 @@ public class CNodeTernary extends CNode
                                
                                case MINUS_MULT:
                                        return "    double %TMP% = %IN1% - 
%IN2% * %IN3%;\n";
-                                       
+                               
+                               case BIASADD:
+                                       return "    double %TMP% = %IN1% + 
getValue(%IN2%, cix/%IN3%);\n";
+                               
+                               case BIASMULT:
+                                       return "    double %TMP% = %IN1% * 
getValue(%IN2%, cix/%IN3%);\n";
+                               
                                case REPLACE:
                                        return "    double %TMP% = (%IN1% == 
%IN2% || (Double.isNaN(%IN1%) "
                                                        + "&& 
Double.isNaN(%IN2%))) ? %IN3% : %IN1%;\n";
@@ -127,6 +134,8 @@ public class CNodeTernary extends CNode
                switch(_type) {
                        case PLUS_MULT:     return "t(+*)";
                        case MINUS_MULT:    return "t(-*)";
+                       case BIASADD:       return "t(bias+)";
+                       case BIASMULT:      return "t(bias*)";
                        case REPLACE:
                        case REPLACE_NAN:   return "t(rplc)";
                        case IFELSE:        return "t(ifelse)";
@@ -141,6 +150,8 @@ public class CNodeTernary extends CNode
                switch(_type) {
                        case PLUS_MULT: 
                        case MINUS_MULT:
+                       case BIASADD:
+                       case BIASMULT:
                        case REPLACE:
                        case REPLACE_NAN:
                        case IFELSE:

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
 
b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
index e96f3a7..0e0091c 100644
--- 
a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
+++ 
b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
@@ -42,6 +42,7 @@ import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
+import org.apache.sysml.hops.DnnOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.Hop.AggOp;
 import org.apache.sysml.hops.Hop.DataGenMethod;
@@ -1132,6 +1133,16 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                else if( current instanceof ReorgOp ) {
                        costs = 1;
                }
+               else if( current instanceof DnnOp ) {
+                       switch( ((DnnOp)current).getOp() ) {
+                               case BIASADD:
+                               case BIASMULT:
+                                       costs = 2;
+                               default:
+                                       LOG.warn("Cost model not "
+                                               + "implemented yet for: 
"+((DnnOp)current).getOp());
+                       }
+               }
                else if( current instanceof AggBinaryOp ) {
                        //outer product template w/ matrix-matrix 
                        //or row template w/ matrix-vector or matrix-matrix

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
index 0106a59..dda6665 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
@@ -31,12 +31,15 @@ import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
 import org.apache.sysml.hops.DataGenOp;
 import org.apache.sysml.hops.DataOp;
+import org.apache.sysml.hops.DnnOp;
 import org.apache.sysml.hops.Hop;
+import org.apache.sysml.hops.HopsException;
 import org.apache.sysml.hops.UnaryOp;
 import org.apache.sysml.hops.Hop.AggOp;
 import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.Hop.OpOp3;
+import org.apache.sysml.hops.Hop.OpOpDnn;
 import org.apache.sysml.hops.Hop.OpOpN;
 import org.apache.sysml.hops.Hop.ParamBuiltinOp;
 import org.apache.sysml.hops.IndexingOp;
@@ -85,7 +88,9 @@ public class TemplateCell extends TemplateBase
                                && (((IndexingOp)hop).isColLowerEqualsUpper() 
|| hop.getDim2()==1))
                        || (HopRewriteUtils.isDataGenOpWithLiteralInputs(hop, 
DataGenMethod.SEQ)
                                && 
HopRewriteUtils.hasOnlyUnaryBinaryParents(hop, true))
-                       || (HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX) 
&& hop.isMatrix());
+                       || (HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX) 
&& hop.isMatrix())
+                       || (HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD, 
OpOpDnn.BIASMULT)
+                               && hop.getInput().get(0).dimsKnown() && 
hop.getInput().get(1).dimsKnown());
        }
 
        @Override
@@ -97,7 +102,9 @@ public class TemplateCell extends TemplateBase
                                && 
HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))
                        || (HopRewriteUtils.isTransposeOperation(hop) 
                                && hop.getDim1()==1 && hop.getDim2()>1))
-                       || (HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX) 
&& hop.isMatrix());
+                       || (HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX) 
&& hop.isMatrix())
+                       || (HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD, 
OpOpDnn.BIASMULT)
+                               && hop.getInput().get(0).dimsKnown() && 
hop.getInput().get(1).dimsKnown());
        }
 
        @Override
@@ -108,7 +115,9 @@ public class TemplateCell extends TemplateBase
                                && 
HopRewriteUtils.isTransposeOperation(input))))
                        || (HopRewriteUtils.isDataGenOpWithLiteralInputs(input, 
DataGenMethod.SEQ)
                                && 
HopRewriteUtils.hasOnlyUnaryBinaryParents(input, false))
-                       || (HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX) 
&& hop.isMatrix());
+                       || (HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX) 
&& hop.isMatrix())
+                       || (HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD, 
OpOpDnn.BIASMULT)
+                               && hop.getInput().get(0).dimsKnown() && 
hop.getInput().get(1).dimsKnown());
        }
 
        @Override
@@ -226,7 +235,15 @@ public class TemplateCell extends TemplateBase
                        out = new CNodeTernary(cdata1, cdata2, cdata3, 
                                TernaryType.valueOf(top.getOp().name()));
                }
-               else if(HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX)) {
+               else if( HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD, 
OpOpDnn.BIASMULT) ) {
+                       CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
+                       CNode cdata2 = 
tmp.get(hop.getInput().get(1).getHopID());
+                       long c = hop.getInput().get(0).getDim2() / 
hop.getInput().get(1).getDim1();
+                       CNode cdata3 = TemplateUtils.createCNodeData(new 
LiteralOp(c), true);
+                       out = new CNodeTernary(cdata1, cdata2, cdata3,
+                               
TernaryType.valueOf(((DnnOp)hop).getOp().name()));
+               }
+               else if( HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX) ) {
                        String op = ((NaryOp)hop).getOp().name();
                        CNode[] inputs = hop.getInput().stream().map(c -> 
                                
TemplateUtils.wrapLookupIfNecessary(tmp.get(c.getHopID()), 
c)).toArray(CNode[]::new);
@@ -298,6 +315,10 @@ public class TemplateCell extends TemplateBase
                                out = new CNodeBinary(cdata1, cdata2, 
BinType.MULT);
                        }
                } 
+       
+               if( out == null ) {
+                       throw new HopsException(hop.getHopID()+" 
"+hop.getOpString());
+               }
                
                tmp.put(hop.getHopID(), out);
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index 55b62ba..15bec59 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -28,6 +28,7 @@ import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
 import org.apache.sysml.hops.DataGenOp;
+import org.apache.sysml.hops.DnnOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.IndexingOp;
 import org.apache.sysml.hops.LiteralOp;
@@ -55,6 +56,7 @@ import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.OpOp1;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.Hop.OpOp3;
+import org.apache.sysml.hops.Hop.OpOpDnn;
 import org.apache.sysml.hops.Hop.OpOpN;
 import org.apache.sysml.hops.HopsException;
 import org.apache.sysml.parser.Statement;
@@ -110,7 +112,10 @@ public class TemplateRow extends TemplateBase
                                && HopRewriteUtils.isAggUnaryOp(hop, 
SUPPORTED_ROW_AGG))
                        || (hop instanceof IndexingOp && 
hop.getInput().get(0).getDim1() > 1
                                && hop.getInput().get(0).getDim2() >= 0
-                               && 
HopRewriteUtils.isColumnRangeIndexing((IndexingOp)hop));
+                               && 
HopRewriteUtils.isColumnRangeIndexing((IndexingOp)hop))
+                       || (HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD, 
OpOpDnn.BIASMULT)
+                               && hop.getInput().get(0).dimsKnown() && 
hop.getInput().get(1).dimsKnown()
+                               && hop.getInput().get(0).getDim2()>1);
        }
 
        @Override
@@ -132,6 +137,9 @@ public class TemplateRow extends TemplateBase
                        || (hop instanceof AggBinaryOp && hop.dimsKnown() && 
isFuseSkinnyMatrixMult(hop) //MM
                                && 
HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))
                                && hop.getInput().get(0).getDim1()>1 && 
hop.getInput().get(0).getDim2()>1)
+                       || (HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD, 
OpOpDnn.BIASMULT)
+                               && hop.getInput().get(0).dimsKnown() && 
hop.getInput().get(1).dimsKnown()
+                               && hop.getInput().get(0).getDim2()>1)
                        || isPartOfValidCumAggChain(hop) //cum* with transpose
                        || isPartOfValidTransposeMMChain(hop)); //t(f(X))%*%X
        }
@@ -145,6 +153,9 @@ public class TemplateRow extends TemplateBase
                        || (HopRewriteUtils.isBinary(hop, OpOp2.CBIND) && 
hop.getInput().get(0).isMatrix() && hop.dimsKnown())
                        || (HopRewriteUtils.isNary(hop, OpOpN.CBIND) && 
hop.getInput().get(0).isMatrix() && hop.dimsKnown())
                        || (HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX) 
&& hop.isMatrix())
+                       || (HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD, 
OpOpDnn.BIASMULT)
+                               && hop.getInput().get(0).dimsKnown() && 
hop.getInput().get(1).dimsKnown()
+                               && hop.getInput().get(0).getDim2()>1 )
                        || (HopRewriteUtils.isDataGenOpWithLiteralInputs(input, 
DataGenMethod.SEQ)
                                && 
HopRewriteUtils.hasOnlyUnaryBinaryParents(input, false))
                        || (hop instanceof AggBinaryOp
@@ -459,6 +470,12 @@ public class TemplateRow extends TemplateBase
                                        
TernaryType.valueOf(top.getOp().name()));
                        }
                }
+               else if( HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD, 
OpOpDnn.BIASMULT) ) {
+                       CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
+                       CNode cdata2 = 
tmp.get(hop.getInput().get(1).getHopID());
+                       out = new CNodeBinary(cdata1, cdata2,
+                               
BinType.valueOf("VECT_"+((DnnOp)hop).getOp().name()));
+               }
                else if( hop instanceof NaryOp ) {
                        CNode[] inputs = new CNode[hop.getInput().size()];
                        for( int i=0; i<hop.getInput().size(); i++ ) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index 232b214..3ca15d3 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -37,6 +37,7 @@ import org.apache.sysml.hops.TernaryOp;
 import org.apache.sysml.hops.Hop.AggOp;
 import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.OpOp1;
+import org.apache.sysml.hops.Hop.OpOpDnn;
 import org.apache.sysml.hops.Hop.OpOpN;
 import org.apache.sysml.hops.IndexingOp;
 import org.apache.sysml.hops.UnaryOp;
@@ -347,7 +348,8 @@ public class TemplateUtils
        
        public static boolean isValidSingleOperation(Hop hop) {
                return HopRewriteUtils.isNary(hop, OpOpN.MIN, OpOpN.MAX)
-                       || HopRewriteUtils.isUnary(hop, OpOp1.EXP, OpOp1.LOG);
+                       || HopRewriteUtils.isUnary(hop, OpOp1.EXP, OpOp1.LOG)
+                       || HopRewriteUtils.isDnn(hop, OpOpDnn.BIASADD, 
OpOpDnn.BIASMULT);
        }
        
        public static boolean hasNoOperation(CNodeTpl tpl) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java 
b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
index ca78106..519bd7d 100644
--- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
+++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
@@ -1316,8 +1316,8 @@ public class BuiltinFunctionExpression extends 
DataIdentifier
                        output.setBlockDimensions(id.getRowsInBlock(), 
id.getColumnsInBlock());
                        break;
                
-               case BIAS_ADD:
-               case BIAS_MULTIPLY:
+               case BIASADD:
+               case BIASMULT:
                {
                        Expression input = _args[0];
                        Expression bias = _args[1];
@@ -1956,9 +1956,9 @@ public class BuiltinFunctionExpression extends 
DataIdentifier
                else if (functionName.equals("conv2d"))
                         bifop = Expression.BuiltinFunctionOp.CONV2D;
                else if (functionName.equals("bias_add"))
-                        bifop = Expression.BuiltinFunctionOp.BIAS_ADD;
+                        bifop = Expression.BuiltinFunctionOp.BIASADD;
                else if (functionName.equals("bias_multiply"))
-                        bifop = Expression.BuiltinFunctionOp.BIAS_MULTIPLY;
+                        bifop = Expression.BuiltinFunctionOp.BIASMULT;
                else if (functionName.equals("conv2d_backward_filter"))
                         bifop = 
Expression.BuiltinFunctionOp.CONV2D_BACKWARD_FILTER;
                else if (functionName.equals("conv2d_backward_data"))

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java 
b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index 08df72f..089edce 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -2684,8 +2684,8 @@ public class DMLTranslator
                        currBuiltinOp.refreshSizeInformation(); //force size 
reevaluation according to 'outer' flag otherwise danger of incorrect dims
                        break;
                
-               case BIAS_ADD:
-               case BIAS_MULTIPLY: {
+               case BIASADD:
+               case BIASMULT: {
                        ArrayList<Hop> inHops1 = new ArrayList<>();
                        inHops1.add(expr);
                        inHops1.add(expr2);

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/parser/Expression.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/Expression.java 
b/src/main/java/org/apache/sysml/parser/Expression.java
index 1708ed3..b394372 100644
--- a/src/main/java/org/apache/sysml/parser/Expression.java
+++ b/src/main/java/org/apache/sysml/parser/Expression.java
@@ -90,7 +90,7 @@ public abstract class Expression implements ParseInfo
                EIGEN,
                EVAL,
                EXISTS,
-               CONV2D, CONV2D_BACKWARD_FILTER, CONV2D_BACKWARD_DATA, BIAS_ADD, 
BIAS_MULTIPLY,
+               CONV2D, CONV2D_BACKWARD_FILTER, CONV2D_BACKWARD_DATA, BIASADD, 
BIASMULT,
                MAX_POOL, AVG_POOL, MAX_POOL_BACKWARD, AVG_POOL_BACKWARD,
                LSTM, LSTM_BACKWARD, BATCH_NORM2D, BATCH_NORM2D_BACKWARD,
                EXP,

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java 
b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
index bcb2f4d..42d61d3 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
@@ -25,6 +25,7 @@ import org.apache.commons.math3.util.FastMath;
 import org.apache.sysml.runtime.functionobjects.BitwAnd;
 import org.apache.sysml.runtime.functionobjects.IntegerDivide;
 import org.apache.sysml.runtime.functionobjects.Modulus;
+import org.apache.sysml.runtime.matrix.data.LibMatrixDNN;
 import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
 
 /**
@@ -2009,6 +2010,40 @@ public class LibSpoofPrimitives
        public static double[] vectBitwandWrite(double[] a, double[] b, int ai, 
int[] aix, int bi, int alen, int len) {
                return vectBitwandWrite(a, b, aix, ai, bi, alen, len);
        }
+       
+       // bias add
+       
+       public static double[] vectBiasaddWrite(double[] a, double[] b, int ai, 
int bi, int len) {
+               double[] c = allocVector(len, false);
+               System.arraycopy(a, ai, c, 0, len);
+               LibMatrixDNN.addBias(c, b, 1, 1, b.length, len/b.length);
+               return c;
+       }
+       
+       public static double[] vectBiasaddWrite(double[] a, double[] b, int[] 
aix, int ai, int bi, int alen, int len) {
+               double[] c = allocVector(len, true);
+               for(int k=ai; k<ai+alen; k++)
+                       c[aix[k]] = a[k];
+               LibMatrixDNN.addBias(c, b, 1, 1, b.length, len/b.length);
+               return c;
+       }
+       
+       // bias mult
+       
+       public static double[] vectBiasmultWrite(double[] a, double[] b, int 
ai, int bi, int len) {
+               double[] c = allocVector(len, false);
+               System.arraycopy(a, ai, c, 0, len);
+               LibMatrixDNN.multBias(c, b, 1, b.length, len/b.length);
+               return c;
+       }
+       
+       public static double[] vectBiasmultWrite(double[] a, double[] b, int[] 
aix, int ai, int bi, int alen, int len) {
+               double[] c = allocVector(len, true);
+               for(int k=ai; k<ai+alen; k++)
+                       c[aix[k]] = a[k];
+               LibMatrixDNN.multBias(c, b, 1, b.length, len/b.length);
+               return c;
+       }
 
        //complex builtin functions that are not directly generated
        //(included here in order to reduce the number of imports)

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 963bfbf..0c6f41a 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -315,19 +315,11 @@ public class LibMatrixDNN {
                }
                else {
                        // Handles both dense and sparse inputs and copies it 
to dense output
-                       outputBlock.copy(input, false); 
-                       int index = 0;
+                       outputBlock.copy(input, false);
                        if(bias.isInSparseFormat())
                                bias.sparseToDense(); // Since bias is 
extremely small array
                        double [] biasArr = bias.getDenseBlockValues();
-                       for(int n = 0; n < N; n++) {
-                               for(int k = 0; k < K; k++) {
-                                       double biasVal = biasArr[k];
-                                       for(int pq = 0; pq < PQ; pq++, index++) 
{
-                                               outputArray[index] += biasVal;
-                                       }
-                               }
-                       }
+                       addBias(outputArray, biasArr, 1, N, K, PQ);
                }
                
                //post-processing: maintain nnz
@@ -548,8 +540,8 @@ public class LibMatrixDNN {
                }
                else {
                        addBias(retArr, resultSaveMeanArr, -1, N, K, PQ);
-                       multiplyBias(retArr, resultSaveInvVarianceArr, N, K, 
PQ);
-                       multiplyBias(retArr, scaleArr, N, K, PQ);
+                       multBias(retArr, resultSaveInvVarianceArr, N, K, PQ);
+                       multBias(retArr, scaleArr, N, K, PQ);
                        addBias(retArr, biasArr, 1, N, K, PQ);
                }
                ret.recomputeNonZeros();
@@ -585,33 +577,32 @@ public class LibMatrixDNN {
                }
        }
        
-       private static void addBias(double [] arr, double [] bias, double 
biasMultiplier, int N, int K, int PQ) {
+       public static void addBias(double[] a, double[] bias, double 
biasMultiplier, int N, int K, int PQ) {
+               if( bias == null )
+                       return;
                int index = 0;
-               if(bias != null) {
-                       for(int n = 0; n < N; n++) {
-                               for(int k = 0; k < K; k++) {
-                                       for(int pq = 0; pq < PQ; pq++, index++) 
{
-                                               arr[index] += 
biasMultiplier*bias[k];
-                                       }
-                               }
+               for(int n = 0; n < N; n++) {
+                       for(int k = 0; k < K; k++) {
+                               double biasVal = biasMultiplier*bias[k];
+                               for(int pq = 0; pq < PQ; pq++, index++)
+                                       a[index] += biasVal;
                        }
                }
        }
        
-       private static void multiplyBias(double [] arr, double [] bias, int N, 
int K, int PQ) {
+       public static void multBias(double[] a, double[] bias, int N, int K, 
int PQ) {
+               if( bias == null ) {
+                       Arrays.fill(a, 0);
+                       return;
+               }
                int index = 0;
-               if(bias != null) {
-                       for(int n = 0; n < N; n++) {
-                               for(int k = 0; k < K; k++) {
-                                       for(int pq = 0; pq < PQ; pq++, index++) 
{
-                                               arr[index] *= bias[k];
-                                       }
-                               }
+               for(int n = 0; n < N; n++) {
+                       for(int k = 0; k < K; k++) {
+                               double biasVal = bias[k];
+                               for(int pq = 0; pq < PQ; pq++, index++)
+                                       a[index] *= biasVal;
                        }
                }
-               else {
-                       Arrays.fill(arr, 0);
-               }
        }
        
        private static void computeBiasSumAndSumSquares(MatrixBlock image, 
double [] sumArr, double [] sumSquaresArr, int K, int PQ) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
index 90a6dc8..8d962cc 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
@@ -60,6 +60,8 @@ public class CellwiseTmplTest extends AutomatedTestBase
        private static final String TEST_NAME22 = TEST_NAME+22; //sum(X * 
seq(1,N) + t(seq(M,1)))
        private static final String TEST_NAME23 = TEST_NAME+23; 
//sum(min(X,Y,Z))
        private static final String TEST_NAME24 = TEST_NAME+24; //min(X, Y, Z, 
3, 7)
+       private static final String TEST_NAME25 = TEST_NAME+25; //bias_add
+       private static final String TEST_NAME26 = TEST_NAME+26; //bias_mult
 
        private static final String TEST_DIR = "functions/codegen/";
        private static final String TEST_CLASS_DIR = TEST_DIR + 
CellwiseTmplTest.class.getSimpleName() + "/";
@@ -72,7 +74,7 @@ public class CellwiseTmplTest extends AutomatedTestBase
        @Override
        public void setUp() {
                TestUtils.clearAssertionInformation();
-               for( int i=1; i<=24; i++ ) {
+               for( int i=1; i<=26; i++ ) {
                        addTestConfiguration( TEST_NAME+i, new 
TestConfiguration(
                                TEST_CLASS_DIR, TEST_NAME+i, new String[] 
{String.valueOf(i)}) );
                }
@@ -414,6 +416,36 @@ public class CellwiseTmplTest extends AutomatedTestBase
        public void testCodegenCellwiseRewrite24_sp() {
                testCodegenIntegration( TEST_NAME24, true, ExecType.SPARK );
        }
+       
+       @Test
+       public void testCodegenCellwiseRewrite25() {
+               testCodegenIntegration( TEST_NAME25, true, ExecType.CP );
+       }
+
+       @Test
+       public void testCodegenCellwise25() {
+               testCodegenIntegration( TEST_NAME25, false, ExecType.CP );
+       }
+
+       @Test //TODO handling of global col index
+       public void testCodegenCellwiseRewrite25_sp() {
+               testCodegenIntegration( TEST_NAME25, true, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCodegenCellwiseRewrite26() {
+               testCodegenIntegration( TEST_NAME26, true, ExecType.CP );
+       }
+
+       @Test
+       public void testCodegenCellwise26() {
+               testCodegenIntegration( TEST_NAME26, false, ExecType.CP );
+       }
+
+       @Test //TODO handling of global col index
+       public void testCodegenCellwiseRewrite26_sp() {
+               testCodegenIntegration( TEST_NAME26, true, ExecType.SPARK );
+       }
 
        private void testCodegenIntegration( String testname, boolean rewrites, 
ExecType instType )
        {

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
index 203e9b1..bec9be2 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
@@ -79,6 +79,7 @@ public class RowAggTmplTest extends AutomatedTestBase
        private static final String TEST_NAME40 = TEST_NAME+"40"; //relu 
operation -> (X>0)* dout
        private static final String TEST_NAME41 = TEST_NAME+"41"; 
//X*rowSums(X/seq(1,N)+t(seq(M,1)))
        private static final String TEST_NAME42 = TEST_NAME+"42"; 
//X/rowSums(min(X, Y, Z))
+       private static final String TEST_NAME43 = TEST_NAME+"43"; 
//bias_add(X,B) + bias_mult(X,B)
        
        private static final String TEST_DIR = "functions/codegen/";
        private static final String TEST_CLASS_DIR = TEST_DIR + 
RowAggTmplTest.class.getSimpleName() + "/";
@@ -90,7 +91,7 @@ public class RowAggTmplTest extends AutomatedTestBase
        @Override
        public void setUp() {
                TestUtils.clearAssertionInformation();
-               for(int i=1; i<=42; i++)
+               for(int i=1; i<=43; i++)
                        addTestConfiguration( TEST_NAME+i, new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME+i, new String[] { String.valueOf(i) 
}) );
        }
        
@@ -723,6 +724,21 @@ public class RowAggTmplTest extends AutomatedTestBase
        public void testCodegenRowAgg42SP() {
                testCodegenIntegration( TEST_NAME42, false, ExecType.SPARK );
        }
+       
+       @Test
+       public void testCodegenRowAggRewrite43CP() {
+               testCodegenIntegration( TEST_NAME43, true, ExecType.CP );
+       }
+
+       @Test
+       public void testCodegenRowAgg43CP() {
+               testCodegenIntegration( TEST_NAME43, false, ExecType.CP );
+       }
+
+       @Test
+       public void testCodegenRowAgg43SP() {
+               testCodegenIntegration( TEST_NAME43, false, ExecType.SPARK );
+       }
 
        private void testCodegenIntegration( String testname, boolean rewrites, 
ExecType instType )
        {

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/test/scripts/functions/codegen/cellwisetmpl25.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl25.R 
b/src/test/scripts/functions/codegen/cellwisetmpl25.R
new file mode 100644
index 0000000..4a77717
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl25.R
@@ -0,0 +1,37 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+N = 64
+C = 4
+H = 12
+W = 12
+X = matrix(7, N, C*H*W);
+B = as.matrix(seq(1, C))
+ones = matrix(1, 1, H*W)
+
+R = X + (matrix(1,N,1) %*% matrix(t(B%*%ones), 1, C*H*W))
+R = as.matrix(sum(R));
+
+writeMM(as(R,"CsparseMatrix"), paste(args[2], "S", sep=""));

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/test/scripts/functions/codegen/cellwisetmpl25.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl25.dml 
b/src/test/scripts/functions/codegen/cellwisetmpl25.dml
new file mode 100644
index 0000000..fab4088
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl25.dml
@@ -0,0 +1,36 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+N = 64
+C = 4
+H = 12
+W = 12
+X = matrix(7, N, C*H*W);
+B = seq(1, C)
+
+while(FALSE){}
+
+R = bias_add(X, B);
+
+while(FALSE){}
+R = as.matrix(sum(R));
+
+write(R, $1)

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/test/scripts/functions/codegen/cellwisetmpl26.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl26.R 
b/src/test/scripts/functions/codegen/cellwisetmpl26.R
new file mode 100644
index 0000000..708b7a8
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl26.R
@@ -0,0 +1,37 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+N = 64
+C = 4
+H = 12
+W = 12
+X = matrix(7, N, C*H*W);
+B = as.matrix(seq(1, C))
+ones = matrix(1, 1, H*W)
+
+R = X * (matrix(1,N,1) %*% matrix(t(B%*%ones), 1, C*H*W))
+R = as.matrix(sum(R));
+
+writeMM(as(R,"CsparseMatrix"), paste(args[2], "S", sep=""));

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/test/scripts/functions/codegen/cellwisetmpl26.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/cellwisetmpl26.dml 
b/src/test/scripts/functions/codegen/cellwisetmpl26.dml
new file mode 100644
index 0000000..d2a1fa9
--- /dev/null
+++ b/src/test/scripts/functions/codegen/cellwisetmpl26.dml
@@ -0,0 +1,36 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+N = 64
+C = 4
+H = 12
+W = 12
+X = matrix(7, N, C*H*W);
+B = seq(1, C)
+
+while(FALSE){}
+
+R = bias_multiply(X, B);
+
+while(FALSE){}
+R = as.matrix(sum(R));
+
+write(R, $1)

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/test/scripts/functions/codegen/rowAggPattern43.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern43.R 
b/src/test/scripts/functions/codegen/rowAggPattern43.R
new file mode 100644
index 0000000..bffc850
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern43.R
@@ -0,0 +1,41 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+N = 64
+C = 4
+H = 12
+W = 12
+X = matrix(7, N, C*H*W);
+B = as.matrix(seq(1, C))
+ones = matrix(1, 1, H*W)
+
+R1 = X + (matrix(1,N,1) %*% matrix(t(B%*%ones), 1, C*H*W))
+R2 = X * (matrix(1,N,1) %*% matrix(t(B%*%ones), 1, C*H*W))
+R = (R1 + R2) / (rowSums(X) %*% matrix(1,1,ncol(X)));
+
+R = as.matrix(sum(R));
+
+
+writeMM(as(R,"CsparseMatrix"), paste(args[2], "S", sep=""));

http://git-wip-us.apache.org/repos/asf/systemml/blob/305eb28c/src/test/scripts/functions/codegen/rowAggPattern43.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern43.dml 
b/src/test/scripts/functions/codegen/rowAggPattern43.dml
new file mode 100644
index 0000000..6f1558c
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern43.dml
@@ -0,0 +1,38 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+N = 64
+C = 4
+H = 12
+W = 12
+X = matrix(7, N, C*H*W);
+B = seq(1, C)
+
+while(FALSE){}
+
+R1 = bias_add(X, B);
+R2 = bias_multiply(X, B);
+R = (R1 + R2) / rowSums(X);
+
+while(FALSE){}
+R = as.matrix(sum(R));
+
+write(R, $1)

Reply via email to