[SYSTEMML-1986] New nary cbind/rbind (compiler/runtime for cp/spark)

This patch generalizes the existing cbind and rbind from binary to nary
operations, which avoids unnecessary materialized intermediates and
simplifies its usage. Depending on the number of inputs, we now either
compile a BinaryOp or NaryOp which both however share a common block
runtime. For distributed operations, we support a new physical append
operator that relies on a simple "shift-pad-union-merge" approach and
thus, avoids unnecessary repeated shuffling. 

Additionally, this patch also adds the related tests for rbind and cbind
with three inputs and all combinations of sparse and dense as well as
modifies some existing scripts to exploit the new capabilities.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c0b6ef5c
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c0b6ef5c
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c0b6ef5c

Branch: refs/heads/master
Commit: c0b6ef5ca92af4766ff7d758f9307ddbcbafc5b6
Parents: aaa9481
Author: Matthias Boehm <[email protected]>
Authored: Sat Nov 4 18:44:09 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sat Nov 4 18:56:37 2017 -0700

----------------------------------------------------------------------
 scripts/algorithms/KM.dml                       |   8 +-
 scripts/datagen/genRandData4StratStats.dml      |   6 +-
 scripts/nn/layers/lstm.dml                      |   2 +-
 scripts/utils/image_utils.dml                   |   2 +-
 src/main/java/org/apache/sysml/hops/Hop.java    |  14 +-
 .../java/org/apache/sysml/hops/MultipleOp.java  | 185 ----------------
 src/main/java/org/apache/sysml/hops/NaryOp.java | 219 +++++++++++++++++++
 .../sysml/hops/rewrite/HopRewriteUtils.java     |  17 ++
 .../RewriteRemoveDanglingParentReferences.java  |   6 +-
 src/main/java/org/apache/sysml/lops/Lop.java    |   4 +-
 .../java/org/apache/sysml/lops/MultipleCP.java  | 128 -----------
 src/main/java/org/apache/sysml/lops/Nary.java   | 128 +++++++++++
 .../java/org/apache/sysml/lops/compile/Dag.java |  12 +-
 .../sysml/parser/BuiltinFunctionExpression.java |  61 +++---
 .../org/apache/sysml/parser/DMLTranslator.java  |  33 +--
 .../context/ExecutionContext.java               |  14 +-
 .../instructions/CPInstructionParser.java       |  14 +-
 .../instructions/SPInstructionParser.java       |   8 +-
 .../cp/BuiltinMultipleCPInstruction.java        |  78 -------
 .../runtime/instructions/cp/BuiltinNary.java    |  74 +++++++
 .../runtime/instructions/cp/CPInstruction.java  |   4 +-
 .../cp/MatrixBuiltinNaryCPInstruction.java      |  55 +++++
 .../cp/ScalarBuiltinMultipleCPInstruction.java  | 101 ---------
 .../cp/ScalarBuiltinNaryCPInstruction.java      | 100 +++++++++
 .../spark/BuiltinNarySPInstruction.java         | 144 ++++++++++++
 .../instructions/spark/SPInstruction.java       |   2 +-
 .../sysml/runtime/matrix/data/MatrixBlock.java  |  70 +++---
 .../functions/append/NaryCBindTest.java         | 192 ++++++++++++++++
 .../functions/append/NaryRBindTest.java         | 192 ++++++++++++++++
 src/test/scripts/functions/append/NaryCbind.R   |  32 +++
 src/test/scripts/functions/append/NaryCbind.dml |  26 +++
 src/test/scripts/functions/append/NaryRbind.R   |  32 +++
 src/test/scripts/functions/append/NaryRbind.dml |  26 +++
 .../functions/append/ZPackageSuite.java         |   2 +
 34 files changed, 1392 insertions(+), 599 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/scripts/algorithms/KM.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/KM.dml b/scripts/algorithms/KM.dml
index 26f42d6..8a4b1f7 100644
--- a/scripts/algorithms/KM.dml
+++ b/scripts/algorithms/KM.dml
@@ -167,7 +167,7 @@ if (GI_1_1 == 0 & SI_1_1 == 0) {
 } else if (SI_1_1 == 0) {
        Is = cbind (TE, GI);
 } else {
-       Is = cbind (TE, cbind (GI, SI));
+       Is = cbind (TE, GI, SI);
 }
 X = X %*% table (Is, seq (1, 2 + n_group_cols + n_stratum_cols), ncol (X), 2 + 
n_group_cols + n_stratum_cols); 
 
@@ -201,7 +201,7 @@ if (n_group_cols > 0) {
                Gi = matrix (1, rows = num_records, cols = 1);
        }
        if (n_stratum_cols > 0) {
-               X = cbind (cbind (X[,1:2],Gi), X[,(3 + g):ncol (X)]);
+               X = cbind (X[,1:2], Gi, X[,(3 + g):ncol (X)]);
        } else { # no strata
                X = cbind (X[,1:2],Gi);
        }
@@ -244,7 +244,7 @@ if (n_group_cols == 0 & n_stratum_cols == 0) {
        X = cbind (X, matrix (1, rows = num_records, cols = 2));
        SB = matrix (1, rows = 1, cols = 1);    
 } else if (n_group_cols == 0) {        
-       X = cbind (X[,1:2], cbind (matrix (1, rows = num_records, cols = 1), 
X[,3]));
+       X = cbind (X[,1:2], matrix (1, rows = num_records, cols = 1), X[,3]);
 } else if (n_stratum_cols == 0) {
        X = cbind (X, matrix (1, rows = num_records, cols = 1));
        SB = matrix (1, rows = 1, cols = 1);
@@ -586,7 +586,7 @@ M = replace (target = M, pattern = "Infinity", replacement 
= "NaN");
 
 # pull out non-empty rows from TEST
 if (n_group_cols > 0 & n_stratum_cols > 0) {
-       M = cbind (cbind (G_cols, S_cols), M);
+       M = cbind (G_cols, S_cols, M);
        if (test_type != "none") {
                TEST = cbind (G_cols_original, TEST);
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/scripts/datagen/genRandData4StratStats.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4StratStats.dml 
b/scripts/datagen/genRandData4StratStats.dml
index fb2aae0..6a4c07f 100644
--- a/scripts/datagen/genRandData4StratStats.dml
+++ b/scripts/datagen/genRandData4StratStats.dml
@@ -132,21 +132,21 @@ RY_records = Rand (rows = num_features, cols = 
num_records, pdf = "normal");  #
 
 t_X = RX_records * stdev_X_within_strata + (RX_strata * stdev_X_between_strata 
+ mean_X) %*% Smap;
 t_Y = RY_records * stdev_Y_within_strata + (RY_strata * stdev_Y_between_strata 
+ mean_Y) %*% Smap + (t_X * betas);
-Data = cbind (cbind (min_stratumID - 1 + SID, t(t_X)), t(t_Y));
+Data = cbind (min_stratumID - 1 + SID, t(t_X), t(t_Y));
 
 # Set up the NaNs
 
 RNaNS = Rand  (rows = num_records, cols = 1, min = 1.0, max = 1.0, sparsity = 
prob_NaN_in_stratum);
 RNaNX = Rand  (rows = num_records, cols = num_features, min = 1.0, max = 1.0, 
sparsity = prob_NaN_in_X);
 RNaNY = Rand  (rows = num_records, cols = num_features, min = 1.0, max = 1.0, 
sparsity = prob_NaN_in_Y);
-Mask = (cbind (cbind (RNaNS, RNaNX), RNaNY)) != 0;
+Mask = cbind (RNaNS, RNaNX, RNaNY) != 0;
 Data = Data + (1.0 - Mask) / (1.0 - Mask);
 
 # Output the dataset and the auxiliaries
 
 Xcid = t(seq (2, num_features + 1, 1));
 Ycid = t(seq (num_features + 2, 2 * num_features + 1, 1));
-Aux = cbind (cbind (mean_X, mean_Y), betas);
+Aux = cbind (mean_X, mean_Y, betas);
 
 write (Data, fileData, format=fmt);
 write (Xcid, fileXcid, format=fmt);

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/scripts/nn/layers/lstm.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/lstm.dml b/scripts/nn/layers/lstm.dml
index 7c21f2b..4e8460c 100644
--- a/scripts/nn/layers/lstm.dml
+++ b/scripts/nn/layers/lstm.dml
@@ -206,7 +206,7 @@ backward = function(matrix[double] dout, matrix[double] dc,
     df_raw = f * (1-f) * df
     do_raw = o * (1-o) * do
     dg_raw = (1-g^2) * dg
-    difog_raw = cbind(di_raw, cbind(df_raw, cbind(do_raw, dg_raw)))  # shape 
(N, 4M)
+    difog_raw = cbind(di_raw, df_raw, do_raw, dg_raw)  # shape (N, 4M)
 
     dW = dW + t(input) %*% difog_raw  # shape (D+M, 4M)
     db = db + colSums(difog_raw)  # shape (1, 4M)

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/scripts/utils/image_utils.dml
----------------------------------------------------------------------
diff --git a/scripts/utils/image_utils.dml b/scripts/utils/image_utils.dml
index 813bc0e..3802f01 100644
--- a/scripts/utils/image_utils.dml
+++ b/scripts/utils/image_utils.dml
@@ -32,7 +32,7 @@ crop_rgb = function(matrix[double] input, int Hin, int Win, 
int Hout, int Wout)
        temp_mask = matrix(1, rows=Hout, cols=Wout)
        mask[start_h:end_h, start_w:end_w] = temp_mask
        mask = matrix(mask, rows=1, cols=Hin*Win)
-       mask = cbind(cbind(mask, mask), mask)
+       mask = cbind(mask, mask, mask)
        out = removeEmpty(target=(input+1), margin="cols", select=mask) - 1
 }
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/hops/Hop.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/Hop.java 
b/src/main/java/org/apache/sysml/hops/Hop.java
index ec0a80c..350c64a 100644
--- a/src/main/java/org/apache/sysml/hops/Hop.java
+++ b/src/main/java/org/apache/sysml/hops/Hop.java
@@ -38,7 +38,7 @@ import org.apache.sysml.lops.Data;
 import org.apache.sysml.lops.Lop;
 import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.lops.LopsException;
-import org.apache.sysml.lops.MultipleCP;
+import org.apache.sysml.lops.Nary;
 import org.apache.sysml.lops.ReBlock;
 import org.apache.sysml.lops.UnaryCP;
 import org.apache.sysml.parser.Expression.DataType;
@@ -1100,8 +1100,8 @@ public abstract class Hop implements ParseInfo
        }
        
        // Operations that require a variable number of operands
-       public enum MultiInputOp {
-               PRINTF
+       public enum OpOpN {
+               PRINTF, CBIND, RBIND,
        }
        
        public enum AggOp {
@@ -1351,10 +1351,12 @@ public abstract class Hop implements ParseInfo
         * constructLops() method that is used to construct the Lops that 
correspond
         * to a Hop.
         */
-       protected static final HashMap<MultiInputOp, MultipleCP.OperationType> 
MultipleOperandOperationHopTypeToLopType;
+       protected static final HashMap<OpOpN, Nary.OperationType> HopsOpOpNLops;
        static {
-               MultipleOperandOperationHopTypeToLopType = new HashMap<>();
-               
MultipleOperandOperationHopTypeToLopType.put(MultiInputOp.PRINTF, 
MultipleCP.OperationType.PRINTF);
+               HopsOpOpNLops = new HashMap<>();
+               HopsOpOpNLops.put(OpOpN.PRINTF, Nary.OperationType.PRINTF);
+               HopsOpOpNLops.put(OpOpN.CBIND, Nary.OperationType.CBIND);
+               HopsOpOpNLops.put(OpOpN.RBIND, Nary.OperationType.RBIND);
        }
 
        protected static final HashMap<Hop.OpOp1, String> HopsOpOp12String;

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/hops/MultipleOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/MultipleOp.java 
b/src/main/java/org/apache/sysml/hops/MultipleOp.java
deleted file mode 100644
index 5c178c0..0000000
--- a/src/main/java/org/apache/sysml/hops/MultipleOp.java
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.hops;
-
-import java.util.ArrayList;
-
-import org.apache.sysml.lops.Lop;
-import org.apache.sysml.lops.LopProperties.ExecType;
-import org.apache.sysml.lops.LopsException;
-import org.apache.sysml.lops.MultipleCP;
-import org.apache.sysml.parser.Expression.DataType;
-import org.apache.sysml.parser.Expression.ValueType;
-
-/**
- * The MultipleOp Hop allows for a variable number of operands. Functionality
- * such as 'printf' (overloaded into the existing print function) is an example
- * of an operation that potentially takes a variable number of operands.
- *
- */
-public class MultipleOp extends Hop {
-       protected MultiInputOp _op = null;
-
-       protected MultipleOp() {
-       }
-
-       /**
-        * MultipleOp constructor.
-        * 
-        * @param name
-        *            the target name, typically set by the DMLTranslator when
-        *            constructing Hops. (For example, 'parsertemp1'.)
-        * @param dataType
-        *            the target data type (SCALAR for printf)
-        * @param valueType
-        *            the target value type (STRING for printf)
-        * @param multipleOperandOperation
-        *            the operation type (such as PRINTF)
-        * @param inputs
-        *            a variable number of input Hops
-        * @throws HopsException
-        *             thrown if a HopsException occurs
-        */
-       public MultipleOp(String name, DataType dataType, ValueType valueType,
-                       MultiInputOp multipleOperandOperation, Hop... inputs) 
throws HopsException {
-               super(name, dataType, valueType);
-               _op = multipleOperandOperation;
-
-               for (int i = 0; i < inputs.length; i++) {
-                       getInput().add(i, inputs[i]);
-                       inputs[i].getParent().add(this);
-               }
-       }
-
-       /** MultipleOp may have any number of inputs. */
-       @Override
-       public void checkArity() throws HopsException {}
-
-       public MultiInputOp getOp() {
-               return _op;
-       }
-
-       @Override
-       public String getOpString() {
-               return "m(" + _op.name().toLowerCase() + ")";
-       }
-       
-       @Override
-       public boolean isGPUEnabled() {
-               return false;
-       }
-
-       /**
-        * Construct the corresponding Lops for this Hop
-        */
-       @Override
-       public Lop constructLops() throws HopsException, LopsException {
-               // reuse existing lop
-               if (getLops() != null)
-                       return getLops();
-
-               try {
-                       ArrayList<Hop> inHops = getInput();
-                       Lop[] inLops = new Lop[inHops.size()];
-                       for (int i = 0; i < inHops.size(); i++) {
-                               Hop inHop = inHops.get(i);
-                               Lop inLop = inHop.constructLops();
-                               inLops[i] = inLop;
-                       }
-
-                       MultipleCP.OperationType opType = 
MultipleOperandOperationHopTypeToLopType.get(_op);
-                       if (opType == null) {
-                               throw new HopsException("Unknown MultipleCP Lop 
operation type for "
-                                               + "MultipleOperandOperation Hop 
type '" + _op + "'");
-                       }
-
-                       MultipleCP multipleCPLop = new MultipleCP(opType, 
getDataType(), getValueType(), inLops);
-                       setOutputDimensions(multipleCPLop);
-                       setLineNumbers(multipleCPLop);
-                       setLops(multipleCPLop);
-               } catch (Exception e) {
-                       throw new HopsException(this.printErrorLocation() + 
"error constructing Lops for MultipleOp Hop -- \n ", e);
-               }
-
-               // add reblock/checkpoint lops if necessary
-               constructAndSetLopsDataFlowProperties();
-
-               return getLops();
-       }
-
-       @Override
-       protected double computeOutputMemEstimate(long dim1, long dim2, long 
nnz) {
-               double sparsity = OptimizerUtils.getSparsity(dim1, dim2, nnz);
-               return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, 
sparsity);
-       }
-
-       @Override
-       public boolean allowsAllExecTypes() {
-               return false;
-       }
-
-       @Override
-       protected ExecType optFindExecType() throws HopsException {
-               return ExecType.CP;
-       }
-
-       @Override
-       public void refreshSizeInformation() {
-               // do nothing
-       }
-
-       @Override
-       public Object clone() throws CloneNotSupportedException {
-               MultipleOp multipleOp = new MultipleOp();
-
-               // copy generic attributes
-               multipleOp.clone(this, false);
-
-               // copy specific attributes
-               multipleOp._op = _op;
-
-               return multipleOp;
-       }
-
-       @Override
-       public boolean compare(Hop that) {
-               if (!(that instanceof MultipleOp))
-                       return false;
-
-               if (_op == MultiInputOp.PRINTF) {
-                       return false;
-               }
-
-               // if add new multiple operand types in addition to PRINTF,
-               // probably need to modify this.
-               MultipleOp mo = (MultipleOp) that;
-               return (_op == mo._op);
-       }
-
-       @Override
-       protected double computeIntermediateMemEstimate(long dim1, long dim2, 
long nnz) {
-               return 0;
-       }
-
-       @Override
-       protected long[] inferOutputCharacteristics(MemoTable memo) {
-               return null;
-       }
-}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/hops/NaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/NaryOp.java 
b/src/main/java/org/apache/sysml/hops/NaryOp.java
new file mode 100644
index 0000000..cc90044
--- /dev/null
+++ b/src/main/java/org/apache/sysml/hops/NaryOp.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.hops;
+
+import org.apache.sysml.hops.rewrite.HopRewriteUtils;
+import org.apache.sysml.lops.Lop;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.lops.LopsException;
+import org.apache.sysml.lops.Nary;
+import org.apache.sysml.parser.Expression.DataType;
+import org.apache.sysml.parser.Expression.ValueType;
+
+/**
+ * The NaryOp Hop allows for a variable number of operands. Functionality
+ * such as 'printf' (overloaded into the existing print function) is an example
+ * of an operation that potentially takes a variable number of operands.
+ *
+ */
+public class NaryOp extends Hop {
+       protected OpOpN _op = null;
+
+       protected NaryOp() {
+       }
+
+       /**
+        * NaryOp constructor.
+        * 
+        * @param name
+        *            the target name, typically set by the DMLTranslator when
+        *            constructing Hops. (For example, 'parsertemp1'.)
+        * @param dataType
+        *            the target data type (SCALAR for printf)
+        * @param valueType
+        *            the target value type (STRING for printf)
+        * @param multipleOperandOperation
+        *            the operation type (such as PRINTF)
+        * @param inputs
+        *            a variable number of input Hops
+        * @throws HopsException
+        *             thrown if a HopsException occurs
+        */
+       public NaryOp(String name, DataType dataType, ValueType valueType,
+                       OpOpN op, Hop... inputs) throws HopsException {
+               super(name, dataType, valueType);
+               _op = op;
+
+               for (int i = 0; i < inputs.length; i++) {
+                       getInput().add(i, inputs[i]);
+                       inputs[i].getParent().add(this);
+               }
+       }
+
+       /** MultipleOp may have any number of inputs. */
+       @Override
+       public void checkArity() throws HopsException {}
+
+       public OpOpN getOp() {
+               return _op;
+       }
+
+       @Override
+       public String getOpString() {
+               return "m(" + _op.name().toLowerCase() + ")";
+       }
+       
+       @Override
+       public boolean isGPUEnabled() {
+               return false;
+       }
+
+       /**
+        * Construct the corresponding Lops for this Hop
+        */
+       @Override
+       public Lop constructLops() throws HopsException, LopsException {
+               // reuse existing lop
+               if (getLops() != null)
+                       return getLops();
+
+               try {
+                       Lop[] inLops = new Lop[getInput().size()];
+                       for (int i = 0; i < getInput().size(); i++)
+                               inLops[i] = getInput().get(i).constructLops();
+                       
+                       Nary.OperationType opType = HopsOpOpNLops.get(_op);
+                       if (opType == null)
+                               throw new HopsException("Unknown Nary Lop type 
for '"+_op+"'");
+                       
+                       ExecType et = optFindExecType();
+                       Nary multipleCPLop = new Nary(opType, getDataType(), 
getValueType(), inLops, et);
+                       setOutputDimensions(multipleCPLop);
+                       setLineNumbers(multipleCPLop);
+                       setLops(multipleCPLop);
+               } 
+               catch (Exception e) {
+                       throw new HopsException(this.printErrorLocation() + 
"error constructing Lops for NaryOp -- \n ", e);
+               }
+
+               // add reblock/checkpoint lops if necessary
+               constructAndSetLopsDataFlowProperties();
+
+               return getLops();
+       }
+       
+       @Override
+       public boolean allowsAllExecTypes() {
+               return false;
+       }
+
+       @Override
+       protected double computeOutputMemEstimate(long dim1, long dim2, long 
nnz) {
+               double sparsity = OptimizerUtils.getSparsity(dim1, dim2, nnz);
+               return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, 
sparsity);
+       }
+
+       @Override
+       protected ExecType optFindExecType() throws HopsException {
+               
+               checkAndSetForcedPlatform();
+               
+               ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? 
ExecType.SPARK : ExecType.MR;
+               
+               //forced / memory-based / threshold-based decision
+               if( _etypeForced != null ) {
+                       _etype = _etypeForced;
+               }
+               else
+               {
+                       if ( OptimizerUtils.isMemoryBasedOptLevel() ) 
+                               _etype = findExecTypeByMemEstimate();
+                       // Choose CP, if the input dimensions are below 
threshold or if the input is a vector
+                       else if ( areDimsBelowThreshold() )
+                               _etype = ExecType.CP;
+                       else 
+                               _etype = REMOTE;
+                       
+                       //check for valid CP dimensions and matrix size
+                       checkAndSetInvalidCPDimsAndSize();
+               }
+               
+               //mark for recompile (forever)
+               setRequiresRecompileIfNecessary();
+               
+               //ensure cp exec type for single-node operations
+               if ( _op == OpOpN.PRINTF )
+                       _etype = ExecType.CP;
+               
+               return _etype;
+       }
+
+       @Override
+       protected double computeIntermediateMemEstimate(long dim1, long dim2, 
long nnz) {
+               return 0;
+       }
+
+       @Override
+       protected long[] inferOutputCharacteristics(MemoTable memo) {
+               return null; //do nothing
+       }
+       
+       @Override
+       public void refreshSizeInformation() {
+               switch( _op ) {
+                       case CBIND:
+                               setDim1(HopRewriteUtils.getMaxInputDim(this, 
true));
+                               
setDim2(HopRewriteUtils.getSumValidInputDims(this, false));
+                               break;
+                       case RBIND:
+                               
setDim1(HopRewriteUtils.getSumValidInputDims(this, false));
+                               setDim2(HopRewriteUtils.getMaxInputDim(this, 
true));
+                               break;
+                       case PRINTF:
+                               //do nothing:
+               }
+       }
+
+       @Override
+       public Object clone() throws CloneNotSupportedException {
+               NaryOp multipleOp = new NaryOp();
+
+               // copy generic attributes
+               multipleOp.clone(this, false);
+
+               // copy specific attributes
+               multipleOp._op = _op;
+
+               return multipleOp;
+       }
+
+       @Override
+       public boolean compare(Hop that) {
+               if (!(that instanceof NaryOp) || _op == OpOpN.PRINTF)
+                       return false;
+               
+               NaryOp that2 = (NaryOp) that;
+               boolean ret = (_op == that2._op
+                       && getInput().size() == that2.getInput().size());
+               for( int i=0; i<getInput().size() && ret; i++ )
+                       ret &= (getInput().get(i) == that2.getInput().get(i));
+               return ret;
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java 
b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index ad2392a..b6db466 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -1214,4 +1214,21 @@ public class HopRewriteUtils
                        || sb instanceof IfStatementBlock
                        || sb instanceof ForStatementBlock); //incl parfor
        }
+       
+       public static long getMaxInputDim(Hop hop, boolean dim1) {
+               return hop.getInput().stream().mapToLong(
+                       h -> (dim1?h.getDim1():h.getDim2())).max().orElse(-1);
+       }
+       
+       public static long getSumValidInputDims(Hop hop, boolean dim1) {
+               if( !hasValidInputDims(hop, dim1) )
+                       return -1;
+               return hop.getInput().stream().mapToLong(
+                       h -> (dim1?h.getDim1():h.getDim2())).sum();
+       }
+       
+       public static boolean hasValidInputDims(Hop hop, boolean dim1) {
+               return hop.getInput().stream().allMatch(
+                       h -> (dim1?h.getDim1()>0:h.getDim2()>0));
+       }
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveDanglingParentReferences.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveDanglingParentReferences.java
 
b/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveDanglingParentReferences.java
index ac8fb68..573b9fb 100644
--- 
a/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveDanglingParentReferences.java
+++ 
b/src/main/java/org/apache/sysml/hops/rewrite/RewriteRemoveDanglingParentReferences.java
@@ -25,10 +25,10 @@ import org.apache.sysml.hops.DataOp;
 import org.apache.sysml.hops.FunctionOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.Hop.DataOpTypes;
-import org.apache.sysml.hops.Hop.MultiInputOp;
+import org.apache.sysml.hops.Hop.OpOpN;
 import org.apache.sysml.hops.Hop.OpOp1;
 import org.apache.sysml.hops.HopsException;
-import org.apache.sysml.hops.MultipleOp;
+import org.apache.sysml.hops.NaryOp;
 import org.apache.sysml.hops.UnaryOp;
 
 /**
@@ -108,7 +108,7 @@ public class RewriteRemoveDanglingParentReferences extends 
HopRewriteRule
                return (hop instanceof DataOp && ((DataOp)hop).isWrite())
                        || (hop instanceof UnaryOp && 
((UnaryOp)hop).getOp()==OpOp1.STOP)
                        || (hop instanceof UnaryOp && 
((UnaryOp)hop).getOp()==OpOp1.PRINT)
-                       || (hop instanceof MultipleOp && 
((MultipleOp)hop).getOp()==MultiInputOp.PRINTF)
+                       || (hop instanceof NaryOp && 
((NaryOp)hop).getOp()==OpOpN.PRINTF)
                        || (hop instanceof FunctionOp)
                        || (hop instanceof DataOp && 
((DataOp)hop).getDataOpType()==DataOpTypes.FUNCTIONOUTPUT);
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/lops/Lop.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/Lop.java 
b/src/main/java/org/apache/sysml/lops/Lop.java
index 499cd59..8909ea4 100644
--- a/src/main/java/org/apache/sysml/lops/Lop.java
+++ b/src/main/java/org/apache/sysml/lops/Lop.java
@@ -41,7 +41,7 @@ public abstract class Lop
                Data, DataGen,                                      //CP/MR 
read/write/datagen 
                ReBlock, CSVReBlock,                                //MR 
reblock operations
                MMCJ, MMRJ, MMTSJ, PMMJ, MapMult, MapMultChain,     //MR matrix 
multiplications
-               UnaryCP, UNARY, BinaryCP, Binary, Ternary,          //CP/MR 
unary/binary/ternary
+               UnaryCP, UNARY, BinaryCP, Binary, Ternary, Nary,    //CP/MR 
unary/binary/ternary
                RightIndex, LeftIndex, ZeroOut,                     //CP/MR 
indexing 
                Aggregate, PartialAggregate,                        //CP/MR 
aggregation
                BinUaggChain, UaggOuterChain,                       //CP/MR 
aggregation
@@ -59,8 +59,6 @@ public abstract class Lop
                Checkpoint,                                         //Spark 
persist into storage level
                PlusMult, MinusMult,                                //CP
                SpoofFused,                                         //CP/SP 
generated fused operator
-               /** CP operation on a variable number of operands */
-               MULTIPLE_CP
        }
 
        /**

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/lops/MultipleCP.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/MultipleCP.java 
b/src/main/java/org/apache/sysml/lops/MultipleCP.java
deleted file mode 100644
index 2560861..0000000
--- a/src/main/java/org/apache/sysml/lops/MultipleCP.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.lops;
-
-import org.apache.sysml.lops.LopProperties.ExecLocation;
-import org.apache.sysml.lops.LopProperties.ExecType;
-import org.apache.sysml.lops.compile.JobType;
-import org.apache.sysml.parser.Expression.DataType;
-import org.apache.sysml.parser.Expression.ValueType;
-
-/**
- * Lop to perform an operation on a variable number of operands.
- * 
- */
-public class MultipleCP extends Lop {
-
-       public enum OperationType {
-               PRINTF
-       }
-
-       OperationType operationType;
-
-       public MultipleCP(OperationType operationType, DataType dt, ValueType 
vt, Lop... inputLops) {
-               super(Lop.Type.MULTIPLE_CP, dt, vt);
-               this.operationType = operationType;
-               for (Lop inputLop : inputLops) {
-                       addInput(inputLop);
-                       inputLop.addOutput(this);
-               }
-
-               boolean breaksAlignment = false; // ?
-               boolean aligner = false; // ?
-               boolean definesMRJob = false; // ?
-               lps.addCompatibility(JobType.INVALID); // ?
-               this.lps.setProperties(inputs, ExecType.CP, 
ExecLocation.ControlProgram, breaksAlignment, aligner,
-                               definesMRJob); // ?
-       }
-
-       @Override
-       public String toString() {
-               return "Operation Type: " + operationType;
-       }
-
-       public OperationType getOperationType() {
-               return operationType;
-       }
-
-       /**
-        * Generate the complete instruction string for this Lop. This 
instruction
-        * string can have a variable number of input operands. It displays the
-        * following:
-        * 
-        * <ul>
-        * <li>Execution type (CP, SPARK, etc.)
-        * <li>Operand delimiter (&deg;)
-        * <li>Opcode (printf, etc.)
-        * <li>Operand delimiter (&deg;)
-        * <li>Variable number of inputs, each followed by an operand delimiter
-        * (&deg;)
-        * <ul>
-        * <li>Input consists of (label &middot; data type &middot; value type
-        * &middot; is literal)
-        * </ul>
-        * <li>Output consisting of (label &middot; data type &middot; value
-        * type)
-        * </ul>
-        *
-        * Example: <br>
-        * The following DML<br>
-        * <code>print('hello %s', 'world')</code><br>
-        * generates the instruction string:<br>
-        * <code>CP&deg;printf&deg;hello 
%s&middot;SCALAR&middot;STRING&middot;true&deg;world&middot;SCALAR&middot;STRING&middot;true&deg;_Var1&middot;SCALAR&middot;STRING</code><br>
-        * 
-        * Note: This generated instruction string is parsed in the
-        * parseInstruction() method of BuiltinMultipleCPInstruction, which 
parses
-        * the instruction string to generate an instruction object that is a
-        * subclass of BuiltinMultipleCPInstruction.
-        */
-       @Override
-       public String getInstructions(String output) throws LopsException {
-               String opString = getOpcode();
-
-               StringBuilder sb = new StringBuilder();
-
-               sb.append(getExecType());
-               sb.append(Lop.OPERAND_DELIMITOR);
-
-               sb.append(opString);
-               sb.append(OPERAND_DELIMITOR);
-
-               for (Lop input : inputs) {
-                       sb.append(input.prepScalarInputOperand(getExecType()));
-                       sb.append(OPERAND_DELIMITOR);
-               }
-
-               sb.append(prepOutputOperand(output));
-
-               return sb.toString();
-       }
-
-       private String getOpcode() throws LopsException {
-               switch (operationType) {
-               case PRINTF:
-                       return OperationType.PRINTF.toString().toLowerCase();
-               default:
-                       throw new UnsupportedOperationException(
-                                       "MultipleCP operation type (" + 
operationType + ") is not defined.");
-               }
-       }
-
-}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/lops/Nary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/Nary.java 
b/src/main/java/org/apache/sysml/lops/Nary.java
new file mode 100644
index 0000000..90966fa
--- /dev/null
+++ b/src/main/java/org/apache/sysml/lops/Nary.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.lops;
+
+import org.apache.sysml.lops.LopProperties.ExecLocation;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.lops.compile.JobType;
+import org.apache.sysml.parser.Expression.DataType;
+import org.apache.sysml.parser.Expression.ValueType;
+
+/**
+ * Lop to perform an operation on a variable number of operands.
+ * 
+ */
+public class Nary extends Lop {
+
+       public enum OperationType {
+               PRINTF, CBIND, RBIND,
+       }
+       
+       private OperationType operationType;
+
+       public Nary(OperationType operationType, DataType dt, ValueType vt, 
Lop[] inputLops, ExecType et) 
+               throws LopsException 
+       {
+               super(Lop.Type.Nary, dt, vt);
+               this.operationType = operationType;
+               for (Lop inputLop : inputLops) {
+                       addInput(inputLop);
+                       inputLop.addOutput(this);
+               }
+               
+               if( et == ExecType.CP || et == ExecType.SPARK ) {
+                       lps.addCompatibility(JobType.INVALID);
+                       lps.setProperties(inputs, et, 
ExecLocation.ControlProgram, false, false, false);
+               }
+               else {
+                       throw new LopsException("Unsupported exec type for nary 
lop:" + et.name());
+               }
+       }
+
+       @Override
+       public String toString() {
+               return "Operation Type: " + operationType;
+       }
+
+       public OperationType getOperationType() {
+               return operationType;
+       }
+
+       /**
+        * Generate the complete instruction string for this Lop. This 
instruction
+        * string can have a variable number of input operands. It displays the
+        * following:
+        * 
+        * <ul>
+        * <li>Execution type (CP, SPARK, etc.)
+        * <li>Operand delimiter (&deg;)
+        * <li>Opcode (printf, etc.)
+        * <li>Operand delimiter (&deg;)
+        * <li>Variable number of inputs, each followed by an operand delimiter
+        * (&deg;)
+        * <ul>
+        * <li>Input consists of (label &middot; data type &middot; value type
+        * &middot; is literal)
+        * </ul>
+        * <li>Output consisting of (label &middot; data type &middot; value
+        * type)
+        * </ul>
+        *
+        * Example: <br>
+        * The following DML<br>
+        * <code>print('hello %s', 'world')</code><br>
+        * generates the instruction string:<br>
+        * <code>CP&deg;printf&deg;hello 
%s&middot;SCALAR&middot;STRING&middot;true&deg;world&middot;SCALAR&middot;STRING&middot;true&deg;_Var1&middot;SCALAR&middot;STRING</code><br>
+        * 
+        */
+       @Override
+       public String getInstructions(String[] inputs, String output) throws 
LopsException {
+               String opString = getOpcode();
+
+               StringBuilder sb = new StringBuilder();
+
+               sb.append(getExecType());
+               sb.append(Lop.OPERAND_DELIMITOR);
+
+               sb.append(opString);
+               sb.append(OPERAND_DELIMITOR);
+
+               for( int i=0; i<inputs.length; i++ ) {
+                       
sb.append(getInputs().get(i).prepInputOperand(inputs[i]));
+                       sb.append(OPERAND_DELIMITOR);
+               }
+
+               sb.append(prepOutputOperand(output));
+
+               return sb.toString();
+       }
+       
+       private String getOpcode() throws LopsException {
+               switch (operationType) {
+                       case PRINTF:
+                       case CBIND:
+                       case RBIND:
+                               return operationType.name().toLowerCase();
+                       default:
+                               throw new UnsupportedOperationException(
+                                       "Nary operation type (" + operationType 
+ ") is not defined.");
+               }
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/lops/compile/Dag.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/compile/Dag.java 
b/src/main/java/org/apache/sysml/lops/compile/Dag.java
index 91f49f0..4edf230 100644
--- a/src/main/java/org/apache/sysml/lops/compile/Dag.java
+++ b/src/main/java/org/apache/sysml/lops/compile/Dag.java
@@ -1383,14 +1383,16 @@ public class Dag<N extends Lop>
                                                inputs[count++] = 
in.getOutputParameters().getLabel();
                                        count = 0;
                                        for( Lop out : node.getOutputs() )
-                                       {
                                                outputs[count++] = 
out.getOutputParameters().getLabel();
-                                       }
-                                       
                                        inst_string = 
node.getInstructions(inputs, outputs);
                                }
-                               else if (node.getType() == 
Lop.Type.MULTIPLE_CP) { // ie, MultipleCP class
-                                       inst_string = 
node.getInstructions(node.getOutputParameters().getLabel());
+                               else if (node.getType() == Lop.Type.Nary) {
+                                       String[] inputs = new 
String[node.getInputs().size()];
+                                       int count = 0;
+                                       for( Lop in : node.getInputs() )
+                                               inputs[count++] = 
in.getOutputParameters().getLabel();
+                                       inst_string = 
node.getInstructions(inputs, 
+                                               
node.getOutputParameters().getLabel());
                                }
                                else {
                                        if ( node.getInputs().isEmpty() ) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java 
b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
index 9a8129c..566ddf8 100644
--- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
+++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java
@@ -91,6 +91,10 @@ public class BuiltinFunctionExpression extends DataIdentifier
                return _args;
        }
        
+       public Expression getExpr(int i) {
+               return (_args.length > i ? _args[i] : null);    
+       }
+       
        @Override
        public void validateExpression(MultiAssignmentStatement stmt, 
HashMap<String, DataIdentifier> ids, HashMap<String, ConstIdentifier> 
constVars, boolean conditional)
                        throws LanguageException 
@@ -539,50 +543,53 @@ public class BuiltinFunctionExpression extends 
DataIdentifier
                        break;
                        
                case CBIND:
-               case RBIND:     
-                       checkNumParameters(2);
-                       
+               case RBIND:
                        //scalar string append (string concatenation with \n)
                        if( 
getFirstExpr().getOutput().getDataType()==DataType.SCALAR ) {
+                               checkNumParameters(2);
                                checkScalarParam(getFirstExpr());
                                checkScalarParam(getSecondExpr());
                                checkValueTypeParam(getFirstExpr(), 
ValueType.STRING);
                                checkValueTypeParam(getSecondExpr(), 
ValueType.STRING);
                        }
                        //matrix append (rbind/cbind)
-                       else {                          
-                               checkMatrixFrameParam(getFirstExpr());
-                               checkMatrixFrameParam(getSecondExpr());
+                       else {
+                               if( getAllExpr().length < 2 )
+                                       raiseValidateError("Invalid number of 
arguments for "+getOpCode(), conditional);
+                               for(int i=0; i<getAllExpr().length; i++)
+                                       checkMatrixFrameParam(getExpr(i));
                        }
                        
                        output.setDataType(id.getDataType());
                        output.setValueType(id.getValueType());
                        
                        // set output dimensions and validate consistency
-                       long appendDim1 = -1, appendDim2 = -1;
                        long m1rlen = getFirstExpr().getOutput().getDim1();
                        long m1clen = getFirstExpr().getOutput().getDim2();
-                       long m2rlen = getSecondExpr().getOutput().getDim1();
-                       long m2clen = getSecondExpr().getOutput().getDim2();
-                       
-                       if( getOpCode() == BuiltinFunctionOp.CBIND ) {
-                               if (m1rlen > 0 && m2rlen > 0 && m1rlen!=m2rlen) 
{
-                                       raiseValidateError("inputs to cbind 
must have same number of rows: input 1 rows: " + 
-                                               m1rlen+", input 2 rows: 
"+m2rlen, conditional, LanguageErrorCodes.INVALID_PARAMETERS);
-                               }                               
-                               appendDim1 = (m1rlen>0) ? m1rlen : m2rlen;
-                               appendDim2 = (m1clen>0 && m2clen>0)? m1clen + 
m2clen : -1;
-                       }
-                       else if( getOpCode() == BuiltinFunctionOp.RBIND ) {
-                               if (m1clen > 0 && m2clen > 0 && m1clen!=m2clen) 
{
-                                       raiseValidateError("inputs to rbind 
must have same number of columns: input 1 columns: " + 
-                                               m1clen+", input 2 columns: 
"+m2clen, conditional, LanguageErrorCodes.INVALID_PARAMETERS);
-                               }                               
-                               appendDim1 = (m1rlen>0 && m2rlen>0)? m1rlen + 
m2rlen : -1;
-                               appendDim2 = (m1clen>0) ? m1clen : m2clen;
-                       }
+                       long appendDim1 = m1rlen, appendDim2 = m1clen;
                        
-                       output.setDimensions(appendDim1, appendDim2);           
        
+                       for(int i=1; i<getAllExpr().length; i++) {
+                               long m2rlen = getExpr(i).getOutput().getDim1();
+                               long m2clen = getExpr(i).getOutput().getDim2();
+                               
+                               if( getOpCode() == BuiltinFunctionOp.CBIND ) {
+                                       if (m1rlen > 0 && m2rlen > 0 && 
m1rlen!=m2rlen) {
+                                               raiseValidateError("inputs to 
cbind must have same number of rows: input 1 rows: " + 
+                                                       m1rlen+", input 2 rows: 
"+m2rlen, conditional, LanguageErrorCodes.INVALID_PARAMETERS);
+                                       }
+                                       appendDim1 = (m2rlen>0) ? m2rlen : 
appendDim1;
+                                       appendDim2 = (appendDim2>0 && m2clen>0) 
? appendDim2 + m2clen : -1;
+                               }
+                               else if( getOpCode() == BuiltinFunctionOp.RBIND 
) {
+                                       if (m1clen > 0 && m2clen > 0 && 
m1clen!=m2clen) {
+                                               raiseValidateError("inputs to 
rbind must have same number of columns: input 1 columns: " + 
+                                                       m1clen+", input 2 
columns: "+m2clen, conditional, LanguageErrorCodes.INVALID_PARAMETERS);
+                                       }
+                                       appendDim1 = (appendDim1>0 && 
m2rlen>0)? appendDim1 + m2rlen : -1;
+                                       appendDim2 = (m2clen>0) ? m2clen : 
appendDim2;
+                               }
+                       }
+                       output.setDimensions(appendDim1, appendDim2);
                        output.setBlockDimensions (id.getRowsInBlock(), 
id.getColumnsInBlock());
                        
                        break;

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java 
b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index fb8404f..0d466e2 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -43,7 +43,7 @@ import org.apache.sysml.hops.Hop.AggOp;
 import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.DataOpTypes;
 import org.apache.sysml.hops.Hop.Direction;
-import org.apache.sysml.hops.Hop.MultiInputOp;
+import org.apache.sysml.hops.Hop.OpOpN;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.Hop.OpOp3;
 import org.apache.sysml.hops.Hop.ParamBuiltinOp;
@@ -53,7 +53,7 @@ import org.apache.sysml.hops.IndexingOp;
 import org.apache.sysml.hops.LeftIndexingOp;
 import org.apache.sysml.hops.LiteralOp;
 import org.apache.sysml.hops.MemoTable;
-import org.apache.sysml.hops.MultipleOp;
+import org.apache.sysml.hops.NaryOp;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.hops.ParameterizedBuiltinOp;
 import org.apache.sysml.hops.ReorgOp;
@@ -1185,10 +1185,10 @@ public class DMLTranslator
                HashMap<String, Hop> ids = new HashMap<>();
                ArrayList<Hop> output = new ArrayList<>();
 
-               VariableSet liveIn      = sb.liveIn();
+               VariableSet liveIn  = sb.liveIn();
                VariableSet liveOut = sb.liveOut();
-               VariableSet     updated = sb._updated;
-               VariableSet gen         = sb._gen;
+               VariableSet updated = sb._updated;
+               VariableSet gen     = sb._gen;
                VariableSet updatedLiveOut = new VariableSet();
 
                // handle liveout variables that are updated --> target 
identifiers for Assignment
@@ -1317,8 +1317,8 @@ public class DMLTranslator
                                                        inHops[j] = inHop;
                                                }
                                                
target.setValueType(ValueType.STRING);
-                                               Hop printfHop = new 
MultipleOp(target.getName(), target.getDataType(), target.getValueType(),
-                                                               
MultiInputOp.PRINTF, inHops);
+                                               Hop printfHop = new 
NaryOp(target.getName(), target.getDataType(), target.getValueType(),
+                                                               OpOpN.PRINTF, 
inHops);
                                                output.add(printfHop);
                                        }
 
@@ -2663,13 +2663,13 @@ public class DMLTranslator
                        break;
                        
                case CBIND:
-                       currBuiltinOp = new BinaryOp(target.getName(), 
target.getDataType(), target.getValueType(), 
-                                                                               
Hop.OpOp2.CBIND, expr, expr2);
-                       break;
-               
                case RBIND:
-                       currBuiltinOp = new BinaryOp(target.getName(), 
target.getDataType(), target.getValueType(), 
-                                                                               
Hop.OpOp2.RBIND, expr, expr2);
+                       OpOp2 appendOp1 = 
(source.getOpCode()==BuiltinFunctionOp.CBIND) ? OpOp2.CBIND : OpOp2.RBIND;
+                       OpOpN appendOp2 = 
(source.getOpCode()==BuiltinFunctionOp.CBIND) ? OpOpN.CBIND : OpOpN.RBIND;
+                       currBuiltinOp = (source.getAllExpr().length == 2) ?
+                                       new BinaryOp(target.getName(), 
target.getDataType(), target.getValueType(), appendOp1, expr, expr2) :
+                                       new NaryOp(target.getName(), 
target.getDataType(), target.getValueType(), appendOp2,
+                                                       
processAllExpressions(source.getAllExpr(), hops));
                        break;
                
                case DIAG:
@@ -3098,6 +3098,13 @@ public class DMLTranslator
                return currBuiltinOp;
        }
        
+       private Hop[] processAllExpressions(Expression[] expr, HashMap<String, 
Hop> hops) throws ParseException {
+               Hop[] ret = new Hop[expr.length];
+               for(int i=0; i<expr.length; i++)
+                       ret[i] = processExpression(expr[i], null, hops);
+               return ret;
+       }
+       
        private static void setBlockSizeAndRefreshSizeInfo(Hop in, Hop out) {
                out.setOutputBlocksizes(in.getRowsInBlock(), 
in.getColsInBlock());
                out.refreshSizeInformation();

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
index ecb9629..3b9c075 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
@@ -267,7 +267,7 @@ public class ExecutionContext {
         * @return matrix block
         * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       private MatrixBlock getMatrixInput(String varName) 
+       public MatrixBlock getMatrixInput(String varName) 
                throws DMLRuntimeException 
        {       
                MatrixObject mo = getMatrixObject(varName);
@@ -419,6 +419,11 @@ public class ExecutionContext {
                }
        }
        
+       public void releaseMatrixInput(String varName) throws 
DMLRuntimeException {
+               MatrixObject mo = getMatrixObject(varName);
+               mo.release(null);
+       }
+       
        public void releaseMatrixInputForGPUInstruction(String varName)
                throws DMLRuntimeException 
        {
@@ -485,10 +490,11 @@ public class ExecutionContext {
                mo.getGPUObject(getGPUContext(0)).releaseOutput();
        }
        
+       public void setMatrixOutput(String varName, MatrixBlock outputData) 
throws DMLRuntimeException  {
+               setMatrixOutput(varName, outputData, null);
+       }
 
-       public void setMatrixOutput(String varName, MatrixBlock outputData, 
String opcode) 
-                       throws DMLRuntimeException 
-       {
+       public void setMatrixOutput(String varName, MatrixBlock outputData, 
String opcode) throws DMLRuntimeException {
                MatrixObject mo = getMatrixObject(varName);
                mo.acquireModify(outputData, opcode);
                mo.release(opcode);

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java 
b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
index d0bc429..97e1bcd 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java
@@ -37,7 +37,7 @@ import 
org.apache.sysml.runtime.instructions.cp.ArithmeticBinaryCPInstruction;
 import org.apache.sysml.runtime.instructions.cp.BooleanBinaryCPInstruction;
 import org.apache.sysml.runtime.instructions.cp.BooleanUnaryCPInstruction;
 import org.apache.sysml.runtime.instructions.cp.BuiltinBinaryCPInstruction;
-import org.apache.sysml.runtime.instructions.cp.BuiltinMultipleCPInstruction;
+import org.apache.sysml.runtime.instructions.cp.BuiltinNary;
 import org.apache.sysml.runtime.instructions.cp.BuiltinUnaryCPInstruction;
 import org.apache.sysml.runtime.instructions.cp.CPInstruction;
 import 
org.apache.sysml.runtime.instructions.cp.CPInstruction.CPINSTRUCTION_TYPE;
@@ -181,7 +181,9 @@ public class CPInstructionParser extends InstructionParser
                String2CPInstructionType.put( "sigmoid", 
CPINSTRUCTION_TYPE.BuiltinUnary);
                String2CPInstructionType.put( "sel+", 
CPINSTRUCTION_TYPE.BuiltinUnary);
                
-               String2CPInstructionType.put( "printf" , 
CPINSTRUCTION_TYPE.BuiltinMultiple);
+               String2CPInstructionType.put( "printf" , 
CPINSTRUCTION_TYPE.BuiltinNary);
+               String2CPInstructionType.put( "cbind" , 
CPINSTRUCTION_TYPE.BuiltinNary);
+               String2CPInstructionType.put( "rbind" , 
CPINSTRUCTION_TYPE.BuiltinNary);
                
                // Parameterized Builtin Functions
                String2CPInstructionType.put( "cdf"                     , 
CPINSTRUCTION_TYPE.ParameterizedBuiltin);
@@ -343,8 +345,10 @@ public class CPInstructionParser extends InstructionParser
                                
                        case BuiltinUnary:
                                return 
BuiltinUnaryCPInstruction.parseInstruction(str);
-                       case BuiltinMultiple:
-                               return 
BuiltinMultipleCPInstruction.parseInstruction(str);
+                       
+                       case BuiltinNary:
+                               return BuiltinNary.parseInstruction(str);
+                       
                        case Reorg:
                                return ReorgCPInstruction.parseInstruction(str);
                                
@@ -355,7 +359,7 @@ public class CPInstructionParser extends InstructionParser
                                return 
UaggOuterChainCPInstruction.parseInstruction(str);
                                
                        case MatrixReshape:
-                               return 
MatrixReshapeCPInstruction.parseInstruction(str);        
+                               return 
MatrixReshapeCPInstruction.parseInstruction(str);
        
                        case Append:
                                return 
AppendCPInstruction.parseInstruction(str);

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java 
b/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java
index bff75d7..1a5033e 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java
@@ -46,6 +46,7 @@ import 
org.apache.sysml.runtime.instructions.spark.AppendRSPInstruction;
 import 
org.apache.sysml.runtime.instructions.spark.ArithmeticBinarySPInstruction;
 import org.apache.sysml.runtime.instructions.spark.BinUaggChainSPInstruction;
 import org.apache.sysml.runtime.instructions.spark.BuiltinBinarySPInstruction;
+import org.apache.sysml.runtime.instructions.spark.BuiltinNarySPInstruction;
 import org.apache.sysml.runtime.instructions.spark.BuiltinUnarySPInstruction;
 import org.apache.sysml.runtime.instructions.spark.CSVReblockSPInstruction;
 import org.apache.sysml.runtime.instructions.spark.CastSPInstruction;
@@ -243,6 +244,8 @@ public class SPInstructionParser extends InstructionParser
                String2SPInstructionType.put( "rappend", 
SPINSTRUCTION_TYPE.RAppend);
                String2SPInstructionType.put( "gappend", 
SPINSTRUCTION_TYPE.GAppend);
                String2SPInstructionType.put( "galignedappend", 
SPINSTRUCTION_TYPE.GAlignedAppend);
+               String2SPInstructionType.put( "cbind", 
SPINSTRUCTION_TYPE.BuiltinNary);
+               String2SPInstructionType.put( "rbind", 
SPINSTRUCTION_TYPE.BuiltinNary);
                
                String2SPInstructionType.put( DataGen.RAND_OPCODE  , 
SPINSTRUCTION_TYPE.Rand);
                String2SPInstructionType.put( DataGen.SEQ_OPCODE   , 
SPINSTRUCTION_TYPE.Rand);
@@ -399,7 +402,10 @@ public class SPInstructionParser extends InstructionParser
                                
                        case BuiltinUnary:
                                return 
BuiltinUnarySPInstruction.parseInstruction(str);
-                               
+                       
+                       case BuiltinNary:
+                               return 
BuiltinNarySPInstruction.parseInstruction(str);
+                       
                        case ParameterizedBuiltin:
                                return 
ParameterizedBuiltinSPInstruction.parseInstruction(str);
                                

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/cp/BuiltinMultipleCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/BuiltinMultipleCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/BuiltinMultipleCPInstruction.java
deleted file mode 100644
index 6da8e48..0000000
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/BuiltinMultipleCPInstruction.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.instructions.cp;
-
-import java.util.Arrays;
-
-import org.apache.sysml.lops.MultipleCP;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.functionobjects.Builtin;
-import org.apache.sysml.runtime.functionobjects.ValueFunction;
-import org.apache.sysml.runtime.instructions.InstructionUtils;
-import org.apache.sysml.runtime.matrix.operators.Operator;
-import org.apache.sysml.runtime.matrix.operators.SimpleOperator;
-
-/**
- * Instruction to handle a variable number of input operands. It parses an
- * instruction string to generate an object that is a subclass of
- * BuiltinMultipleCPInstruction. Currently the only subclass of
- * BuiltinMultipleCPInstruction is ScalarBuiltinMultipleCPInstruction. The
- * ScalarBuiltinMultipleCPInstruction class is responsible for printf-style
- * Java-based string formatting.
- *
- */
-public abstract class BuiltinMultipleCPInstruction extends CPInstruction {
-
-       public CPOperand output;
-       public CPOperand[] inputs;
-
-       BuiltinMultipleCPInstruction(Operator op, String opcode, String istr, 
CPOperand output, CPOperand... inputs) {
-               super(op, opcode, istr);
-               _cptype = CPINSTRUCTION_TYPE.BuiltinMultiple;
-               this.output = output;
-               this.inputs = inputs;
-       }
-
-       public static BuiltinMultipleCPInstruction parseInstruction(String str) 
throws DMLRuntimeException {
-               String[] parts = 
InstructionUtils.getInstructionPartsWithValueType(str);
-
-               String opcode = parts[0];
-
-               String outputString = parts[parts.length - 1];
-               CPOperand outputOperand = new CPOperand(outputString);
-
-               String[] inputStrings = null;
-               CPOperand[] inputOperands = null;
-               if (parts.length > 2) {
-                       inputStrings = Arrays.copyOfRange(parts, 1, 
parts.length - 1);
-                       inputOperands = new CPOperand[parts.length - 2];
-                       for (int i = 0; i < inputStrings.length; i++) {
-                               inputOperands[i] = new 
CPOperand(inputStrings[i]);
-                       }
-               }
-
-               if 
(MultipleCP.OperationType.PRINTF.toString().equalsIgnoreCase(opcode)) {
-                       ValueFunction func = Builtin.getBuiltinFnObject(opcode);
-                       return new ScalarBuiltinMultipleCPInstruction(new 
SimpleOperator(func), opcode, str, outputOperand,
-                                       inputOperands);
-               }
-               throw new DMLRuntimeException("Opcode (" + opcode + ") not 
recognized in BuiltinMultipleCPInstruction");
-       }
-}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/cp/BuiltinNary.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/BuiltinNary.java 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/BuiltinNary.java
new file mode 100644
index 0000000..1bacc70
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/BuiltinNary.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.instructions.cp;
+
+import org.apache.sysml.lops.Nary;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.functionobjects.Builtin;
+import org.apache.sysml.runtime.functionobjects.ValueFunction;
+import org.apache.sysml.runtime.instructions.InstructionUtils;
+import org.apache.sysml.runtime.matrix.operators.Operator;
+import org.apache.sysml.runtime.matrix.operators.SimpleOperator;
+
+/**
+ * Instruction to handle a variable number of input operands. It parses an
+ * instruction string to generate an object that is a subclass of
+ * BuiltinMultipleCPInstruction. Currently the only subclass of
+ * BuiltinMultipleCPInstruction is ScalarBuiltinMultipleCPInstruction. The
+ * ScalarBuiltinMultipleCPInstruction class is responsible for printf-style
+ * Java-based string formatting.
+ *
+ */
+public abstract class BuiltinNary extends CPInstruction 
+{
+       public CPOperand output;
+       public CPOperand[] inputs;
+
+       public BuiltinNary(Operator op, String opcode, String istr, CPOperand 
output, CPOperand... inputs) {
+               super(op, opcode, istr);
+               _cptype = CPINSTRUCTION_TYPE.BuiltinNary;
+               this.output = output;
+               this.inputs = inputs;
+       }
+
+       public static BuiltinNary parseInstruction(String str) throws 
DMLRuntimeException {
+               String[] parts = 
InstructionUtils.getInstructionPartsWithValueType(str);
+               String opcode = parts[0];
+               CPOperand outputOperand = new CPOperand(parts[parts.length - 
1]);
+               CPOperand[] inputOperands = null;
+               if (parts.length > 2) {
+                       inputOperands = new CPOperand[parts.length - 2];
+                       for (int i = 1; i < parts.length-1; i++)
+                               inputOperands[i-1] = new CPOperand(parts[i]);
+               }
+               
+               if( Nary.OperationType.PRINTF.name().equalsIgnoreCase(opcode) ) 
{
+                       ValueFunction func = Builtin.getBuiltinFnObject(opcode);
+                       return new ScalarBuiltinNaryCPInstruction(new 
SimpleOperator(func), 
+                               opcode, str, outputOperand, inputOperands);
+               }
+               else if( opcode.equals("cbind") || opcode.equals("rbind") ) {
+                       return new MatrixBuiltinNaryCPInstruction(null, 
+                                       opcode, str, outputOperand, 
inputOperands);
+               }
+               
+               throw new DMLRuntimeException("Opcode (" + opcode + ") not 
recognized in BuiltinMultipleCPInstruction");
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java
index 46251e9..d7fc623 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java
@@ -31,8 +31,8 @@ public abstract class CPInstruction extends Instruction
 {
        public enum CPINSTRUCTION_TYPE { INVALID, 
                AggregateUnary, AggregateBinary, AggregateTernary, 
ArithmeticBinary, 
-               Ternary, Quaternary, BooleanBinary, BooleanUnary, 
BuiltinBinary, BuiltinUnary, 
-               BuiltinMultiple, MultiReturnParameterizedBuiltin, 
ParameterizedBuiltin, MultiReturnBuiltin, 
+               Ternary, Quaternary, BooleanBinary, BooleanUnary, 
BuiltinBinary, BuiltinUnary, BuiltinNary, 
+               MultiReturnParameterizedBuiltin, ParameterizedBuiltin, 
MultiReturnBuiltin, 
                Builtin, Reorg, RelationalBinary, Variable, External, Append, 
Rand, QSort, QPick, 
                MatrixIndexing, MMTSJ, PMMJ, MMChain, MatrixReshape, Partition, 
Compression, SpoofFused,
                StringInit, CentralMoment, Covariance, UaggOuterChain, 
Convolution }

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixBuiltinNaryCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixBuiltinNaryCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixBuiltinNaryCPInstruction.java
new file mode 100644
index 0000000..8ad619b
--- /dev/null
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixBuiltinNaryCPInstruction.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.instructions.cp;
+
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.operators.Operator;
+
+public class MatrixBuiltinNaryCPInstruction extends BuiltinNary {
+
+       protected MatrixBuiltinNaryCPInstruction(Operator op, String opcode, 
String istr, CPOperand output, CPOperand[] inputs) {
+               super(op, opcode, istr, output, inputs);
+       }
+
+       @Override
+       public void processInstruction(ExecutionContext ec) throws 
DMLRuntimeException {
+               //pin input matrix blocks
+               MatrixBlock in1 = ec.getMatrixInput(inputs[0].getName());
+               MatrixBlock[] in2 = new MatrixBlock[inputs.length-1];
+               for( int i=1; i<inputs.length; i++ )
+                       in2[i-1] = ec.getMatrixInput(inputs[i].getName());
+               
+               MatrixBlock outBlock = null;
+               if( "cbind".equals(getOpcode()) || "rbind".equals(getOpcode()) 
) {
+                       boolean cbind = "cbind".equals(getOpcode());
+                       outBlock = in1.appendOperations(in2, new MatrixBlock(), 
cbind);
+               }
+               else {
+                       throw new DMLRuntimeException("Unknown opcode: 
"+getOpcode());
+               }
+               
+               //release inputs and set output
+               for( int i=0; i<inputs.length; i++ )
+                       ec.releaseMatrixInput(inputs[i].getName());
+               ec.setMatrixOutput(output.getName(), outBlock);
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinMultipleCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinMultipleCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinMultipleCPInstruction.java
deleted file mode 100644
index 8169695..0000000
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinMultipleCPInstruction.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.runtime.instructions.cp;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.sysml.api.DMLScript;
-import org.apache.sysml.lops.MultipleCP;
-import org.apache.sysml.parser.Expression;
-import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
-import org.apache.sysml.runtime.matrix.operators.Operator;
-
-/**
- * The ScalarBuiltinMultipleCPInstruction class is responsible for printf-style
- * Java-based string formatting. The first input is the format string. The
- * inputs after the first input are the arguments to be formatted in the format
- * string.
- *
- */
-public class ScalarBuiltinMultipleCPInstruction extends 
BuiltinMultipleCPInstruction {
-
-       protected ScalarBuiltinMultipleCPInstruction(Operator op, String 
opcode, String istr, CPOperand output,
-                       CPOperand... inputs) {
-               super(op, opcode, istr, output, inputs);
-       }
-
-       @Override
-       public void processInstruction(ExecutionContext ec) throws 
DMLRuntimeException {
-               if 
(MultipleCP.OperationType.PRINTF.toString().equalsIgnoreCase(getOpcode())) {
-                       List<ScalarObject> scalarObjects = new ArrayList<>();
-                       for (CPOperand input : inputs) {
-                               ScalarObject so = 
ec.getScalarInput(input.getName(), input.getValueType(), input.isLiteral());
-                               scalarObjects.add(so);
-                       }
-
-                       // determine the format string (first argument) to pass 
to String.format
-                       ScalarObject formatStringObject = scalarObjects.get(0);
-                       if (formatStringObject.getValueType() != 
Expression.ValueType.STRING) {
-                               throw new DMLRuntimeException("First parameter 
needs to be a string");
-                       }
-                       String formatString = 
formatStringObject.getStringValue();
-
-                       // determine the arguments after the format string to 
pass to String.format
-                       Object[] objects = null;
-                       if (scalarObjects.size() > 1) {
-                               objects = new Object[scalarObjects.size() - 1];
-                               for (int i = 1; i < scalarObjects.size(); i++) {
-                                       ScalarObject scalarObject = 
scalarObjects.get(i);
-                                       switch (scalarObject.getValueType()) {
-                                       case INT:
-                                               objects[i - 1] = 
scalarObject.getLongValue();
-                                               break;
-                                       case DOUBLE:
-                                               objects[i - 1] = 
scalarObject.getDoubleValue();
-                                               break;
-                                       case BOOLEAN:
-                                               objects[i - 1] = 
scalarObject.getBooleanValue();
-                                               break;
-                                       case STRING:
-                                               objects[i - 1] = 
scalarObject.getStringValue();
-                                               break;
-                                       default:
-                                       }
-                               }
-                       }
-
-                       String result = String.format(formatString, objects);
-                       if (!DMLScript.suppressPrint2Stdout()) {
-                               System.out.println(result);
-                       }
-
-                       // this is necessary so that the remove variable 
operation can be
-                       // performed
-                       ec.setScalarOutput(output.getName(), new 
StringObject(result));
-               } else {
-                       throw new DMLRuntimeException(
-                                       "Opcode (" + getOpcode() + ") not 
recognized in ScalarBuiltinMultipleCPInstruction");
-               }
-
-       }
-
-}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
new file mode 100644
index 0000000..9b15696
--- /dev/null
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.instructions.cp;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.lops.Nary;
+import org.apache.sysml.parser.Expression;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
+import org.apache.sysml.runtime.matrix.operators.Operator;
+
+/**
+ * The ScalarBuiltinMultipleCPInstruction class is responsible for printf-style
+ * Java-based string formatting. The first input is the format string. The
+ * inputs after the first input are the arguments to be formatted in the format
+ * string.
+ *
+ */
+public class ScalarBuiltinNaryCPInstruction extends BuiltinNary {
+
+       protected ScalarBuiltinNaryCPInstruction(Operator op, String opcode, 
String istr, CPOperand output, CPOperand[] inputs) {
+               super(op, opcode, istr, output, inputs);
+       }
+
+       @Override
+       public void processInstruction(ExecutionContext ec) throws 
DMLRuntimeException {
+               if 
(Nary.OperationType.PRINTF.toString().equalsIgnoreCase(getOpcode())) {
+                       List<ScalarObject> scalarObjects = new ArrayList<>();
+                       for (CPOperand input : inputs) {
+                               ScalarObject so = 
ec.getScalarInput(input.getName(), input.getValueType(), input.isLiteral());
+                               scalarObjects.add(so);
+                       }
+
+                       // determine the format string (first argument) to pass 
to String.format
+                       ScalarObject formatStringObject = scalarObjects.get(0);
+                       if (formatStringObject.getValueType() != 
Expression.ValueType.STRING) {
+                               throw new DMLRuntimeException("First parameter 
needs to be a string");
+                       }
+                       String formatString = 
formatStringObject.getStringValue();
+
+                       // determine the arguments after the format string to 
pass to String.format
+                       Object[] objects = null;
+                       if (scalarObjects.size() > 1) {
+                               objects = new Object[scalarObjects.size() - 1];
+                               for (int i = 1; i < scalarObjects.size(); i++) {
+                                       ScalarObject scalarObject = 
scalarObjects.get(i);
+                                       switch (scalarObject.getValueType()) {
+                                       case INT:
+                                               objects[i - 1] = 
scalarObject.getLongValue();
+                                               break;
+                                       case DOUBLE:
+                                               objects[i - 1] = 
scalarObject.getDoubleValue();
+                                               break;
+                                       case BOOLEAN:
+                                               objects[i - 1] = 
scalarObject.getBooleanValue();
+                                               break;
+                                       case STRING:
+                                               objects[i - 1] = 
scalarObject.getStringValue();
+                                               break;
+                                       default:
+                                       }
+                               }
+                       }
+
+                       String result = String.format(formatString, objects);
+                       if (!DMLScript.suppressPrint2Stdout()) {
+                               System.out.println(result);
+                       }
+
+                       // this is necessary so that the remove variable 
operation can be
+                       // performed
+                       ec.setScalarOutput(output.getName(), new 
StringObject(result));
+               } else {
+                       throw new DMLRuntimeException(
+                                       "Opcode (" + getOpcode() + ") not 
recognized in ScalarBuiltinMultipleCPInstruction");
+               }
+
+       }
+
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/spark/BuiltinNarySPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/BuiltinNarySPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/BuiltinNarySPInstruction.java
new file mode 100644
index 0000000..4edb1cf
--- /dev/null
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/BuiltinNarySPInstruction.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.instructions.spark;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.function.PairFunction;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
+import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
+import org.apache.sysml.runtime.instructions.InstructionUtils;
+import org.apache.sysml.runtime.instructions.cp.CPOperand;
+import 
org.apache.sysml.runtime.instructions.spark.AppendGSPInstruction.ShiftMatrix;
+import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
+import org.apache.sysml.runtime.util.UtilFunctions;
+
+import scala.Tuple2;
+
+public class BuiltinNarySPInstruction extends SPInstruction 
+{
+       private CPOperand[] inputs;
+       private CPOperand output;
+       
+       protected BuiltinNarySPInstruction(CPOperand[] in, CPOperand out, 
String opcode, String istr) {
+               super(opcode, istr);
+               _sptype = SPINSTRUCTION_TYPE.BuiltinNary;
+               inputs = in;
+               output = out;
+       }
+
+       public static BuiltinNarySPInstruction parseInstruction ( String str ) 
+                       throws DMLRuntimeException 
+       {
+               String[] parts = 
InstructionUtils.getInstructionPartsWithValueType(str);
+               String opcode = parts[0];
+               CPOperand output = new CPOperand(parts[parts.length - 1]);
+               CPOperand[] inputs = null;
+               inputs = new CPOperand[parts.length - 2];
+               for (int i = 1; i < parts.length-1; i++)
+                       inputs[i-1] = new CPOperand(parts[i]);
+               return new BuiltinNarySPInstruction(inputs, output, opcode, 
str);
+       }
+
+       @Override 
+       public void processInstruction(ExecutionContext ec) 
+               throws DMLRuntimeException 
+       {       
+               SparkExecutionContext sec = (SparkExecutionContext)ec;
+               boolean cbind = getOpcode().equals("cbind");
+               
+               //compute output characteristics
+               MatrixCharacteristics mcOut = 
computeOutputMatrixCharacteristics(sec, inputs, cbind);
+               
+               //get consolidated input via union over shifted and padded 
inputs
+               MatrixCharacteristics off = new MatrixCharacteristics(
+                       0, 0, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), 
0);
+               JavaPairRDD<MatrixIndexes,MatrixBlock> out = null;
+               for( CPOperand input : inputs ) {
+                       MatrixCharacteristics mcIn = 
sec.getMatrixCharacteristics(input.getName());
+                       JavaPairRDD<MatrixIndexes,MatrixBlock> in = sec
+                               .getBinaryBlockRDDHandleForVariable( 
input.getName() )
+                               .flatMapToPair(new ShiftMatrix(off, mcIn, 
cbind))
+                               .mapToPair(new PadBlocksFunction(mcOut)); 
//just padding
+                       out = (out != null) ? out.union(in) : in;
+                       updateMatrixCharacteristics(mcIn, off, cbind);
+               }
+               
+               //aggregate partially overlapping blocks w/ single shuffle
+               out = RDDAggregateUtils.mergeByKey(out);
+               
+               //set output RDD and add lineage
+               sec.getMatrixCharacteristics(output.getName()).set(mcOut);
+               sec.setRDDHandleForVariable(output.getName(), out);
+               for( CPOperand input : inputs )
+                       sec.addLineageRDD(output.getName(), input.getName());
+       }
+       
+       private static MatrixCharacteristics 
computeOutputMatrixCharacteristics(SparkExecutionContext sec, CPOperand[] 
inputs, boolean cbind) 
+               throws DMLRuntimeException 
+       {
+               MatrixCharacteristics mcIn1 = 
sec.getMatrixCharacteristics(inputs[0].getName());
+               MatrixCharacteristics mcOut = new MatrixCharacteristics(
+                       0, 0, mcIn1.getRowsPerBlock(), mcIn1.getColsPerBlock(), 
0);
+               for( CPOperand input : inputs ) {
+                       MatrixCharacteristics mcIn = 
sec.getMatrixCharacteristics(input.getName());
+                       updateMatrixCharacteristics(mcIn, mcOut, cbind);
+               }
+               return mcOut;
+       }
+       
+       private static void updateMatrixCharacteristics(MatrixCharacteristics 
in, MatrixCharacteristics out, boolean cbind) {
+               out.setDimension(cbind ? Math.max(out.getRows(), in.getRows()) 
: out.getRows()+in.getRows(),
+                       cbind ? out.getCols()+in.getCols() : 
Math.max(out.getCols(), in.getCols()));
+               out.setNonZeros((out.getNonZeros()!=-1 && in.dimsKnown(true)) ? 
out.getNonZeros()+in.getNonZeros() : -1);
+       }
+       
+       public static class PadBlocksFunction implements 
PairFunction<Tuple2<MatrixIndexes,MatrixBlock>,MatrixIndexes,MatrixBlock> 
+       {
+               private static final long serialVersionUID = 
1291358959908299855L;
+               
+               private final MatrixCharacteristics _mcOut;
+               
+               public PadBlocksFunction(MatrixCharacteristics mcOut) {
+                       _mcOut = mcOut;
+               }
+
+               public Tuple2<MatrixIndexes, MatrixBlock> 
call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
+                       MatrixIndexes ix = arg0._1();
+                       MatrixBlock mb = arg0._2();
+                       int brlen = 
UtilFunctions.computeBlockSize(_mcOut.getRows(), ix.getRowIndex(), 
_mcOut.getRowsPerBlock());
+                       int bclen = 
UtilFunctions.computeBlockSize(_mcOut.getCols(), ix.getColumnIndex(), 
_mcOut.getColsPerBlock());
+                       
+                       //check for pass-through
+                       if( brlen == mb.getNumRows() && bclen == 
mb.getNumColumns() )
+                               return arg0;
+                       
+                       //cbind or rbind to pad to right blocksize
+                       if( brlen > mb.getNumRows() ) //rbind
+                               mb = mb.appendOperations(new 
MatrixBlock(brlen-mb.getNumRows(),bclen,true), new MatrixBlock(), false);
+                       else if( bclen > mb.getNumColumns() ) //cbind
+                               mb = mb.appendOperations(new 
MatrixBlock(brlen,bclen-mb.getNumColumns(),true), new MatrixBlock(), true);
+                       return new Tuple2<>(ix, mb);
+               }
+       }
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/instructions/spark/SPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/SPInstruction.java 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/SPInstruction.java
index f0ee6e3..bafe4e8 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/SPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/SPInstruction.java
@@ -32,7 +32,7 @@ public abstract class SPInstruction extends Instruction {
        public enum SPINSTRUCTION_TYPE { 
                MAPMM, MAPMMCHAIN, CPMM, RMM, TSMM, TSMM2, PMM, ZIPMM, PMAPMM, 
//matrix multiplication instructions  
                MatrixIndexing, Reorg, ArithmeticBinary, RelationalBinary, 
AggregateUnary, AggregateTernary, Reblock, CSVReblock, 
-               Builtin, BuiltinUnary, BuiltinBinary, MultiReturnBuiltin, 
Checkpoint, Compression, Cast,
+               Builtin, BuiltinUnary, BuiltinBinary, BuiltinNary, 
MultiReturnBuiltin, Checkpoint, Compression, Cast,
                CentralMoment, Covariance, QSort, QPick, 
                ParameterizedBuiltin, MAppend, RAppend, GAppend, 
GAlignedAppend, Rand, 
                MatrixReshape, Ternary, Quaternary, CumsumAggregate, 
CumsumOffset, BinUaggChain, UaggOuterChain, 

http://git-wip-us.apache.org/repos/asf/systemml/blob/c0b6ef5c/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index c023890..48051de 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -1650,7 +1650,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                nonZeros -= recomputeNonZeros(rl, ru, cl, cu);
                                copyEmptyToDense(rl, ru, cl, cu);
                        }
-                       return;         
+                       return;
                }
                
                //allocate output block
@@ -1661,7 +1661,7 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        nonZeros = nonZeros - recomputeNonZeros(rl, ru, cl, cu) 
+ src.nonZeros;
                
                //copy values
-               int rowLen = cu-cl+1;                           
+               int rowLen = cu-cl+1;
                if(clen == src.clen) //optimization for equal width
                        System.arraycopy(src.denseBlock, 0, denseBlock, 
rl*clen+cl, src.rlen*src.clen);
                else
@@ -3521,24 +3521,25 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return result;
        }
 
-       public MatrixBlock appendOperations( MatrixBlock that, MatrixBlock ret 
)        
-               throws DMLRuntimeException 
-       {
-               //default append-cbind
-               return appendOperations(that, ret, true);
+       public MatrixBlock appendOperations( MatrixBlock that, MatrixBlock ret 
) throws DMLRuntimeException {
+               return appendOperations(that, ret, true); //default cbind
        }
 
-       public MatrixBlock appendOperations( MatrixBlock that, MatrixBlock ret, 
boolean cbind )         
+       public MatrixBlock appendOperations( MatrixBlock that, MatrixBlock ret, 
boolean cbind ) throws DMLRuntimeException {
+               return appendOperations(new MatrixBlock[]{that}, ret, cbind);
+       }
+       
+       public MatrixBlock appendOperations( MatrixBlock[] that, MatrixBlock 
ret, boolean cbind )
                throws DMLRuntimeException 
        {
                MatrixBlock result = checkType( ret );
-               final int m = cbind ? rlen : rlen+that.rlen;
-               final int n = cbind ? clen+that.clen : clen;
-               final long nnz = nonZeros+that.nonZeros;                
+               final int m = cbind ? rlen : 
rlen+Arrays.stream(that).mapToInt(mb -> mb.rlen).sum();
+               final int n = cbind ? clen+Arrays.stream(that).mapToInt(mb -> 
mb.clen).sum() : clen;
+               final long nnz = nonZeros+Arrays.stream(that).mapToLong(mb -> 
mb.nonZeros).sum();
                boolean sp = evalSparseFormatInMemory(m, n, nnz);
                
                //init result matrix 
-               if( result == null ) 
+               if( result == null )
                        result = new MatrixBlock(m, n, sp, nnz);
                else
                        result.reset(m, n, sp, nnz);
@@ -3546,54 +3547,67 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                //core append operation
                //copy left and right input into output
                if( !result.sparse ) //DENSE
-               {       
+               {
                        if( cbind ) {
                                result.copy(0, m-1, 0, clen-1, this, false);
-                               result.copy(0, m-1, clen, n-1, that, false);
+                               for(int i=0, off=clen; i<that.length; i++) {
+                                       result.copy(0, m-1, off, 
off+that[i].clen-1, that[i], false);
+                                       off += that[i].clen;
+                               }
                        }
                        else { //rbind
                                result.copy(0, rlen-1, 0, n-1, this, false);
-                               result.copy(rlen, m-1, 0, n-1, that, false);    
+                               for(int i=0, off=rlen; i<that.length; i++) {
+                                       result.copy(off, off+that[i].rlen-1, 0, 
n-1, that[i], false);
+                                       off += that[i].rlen;
+                               }
                        }
                }
                else //SPARSE
                {
                        //adjust sparse rows if required
-                       if( !this.isEmptyBlock(false) || 
!that.isEmptyBlock(false) ) {
+                       if( !this.isEmptyBlock(false) || 
!Arrays.stream(that).allMatch(mb -> mb.isEmptyBlock(false)) ) {
                                result.allocateSparseRowsBlock();
-                       
                                //allocate sparse rows once for cbind
                                if( cbind && result.getSparseBlock() instanceof 
SparseBlockMCSR ) {
                                        SparseBlock sblock = 
result.getSparseBlock();
                                        for( int i=0; i<result.rlen; i++ ) {
-                                               int lnnz = 
(int)(this.recomputeNonZeros(i, i, 0, this.clen-1)
-                                                       + 
that.recomputeNonZeros(i, i, 0, that.clen-1));
+                                               final int row = i; //workaround 
for lambda compile issue
+                                               int lnnz = (int) 
(this.recomputeNonZeros(i, i, 0, this.clen-1) + Arrays.stream(that)
+                                                       .mapToLong(mb -> 
mb.recomputeNonZeros(row, row, 0, mb.clen-1)).sum());
                                                sblock.allocate(i, lnnz);
                                        }
                                }
                        }
                        
                        //core append operation
-                       result.appendToSparse(this, 0, 0);                      
-                       if( cbind )
-                               result.appendToSparse(that, 0, clen);
-                       else //rbind
-                               result.appendToSparse(that, rlen, 0);
-               }               
+                       result.appendToSparse(this, 0, 0);
+                       if( cbind ) {
+                               for(int i=0, off=clen; i<that.length; i++) {
+                                       result.appendToSparse(that[i], 0, off);
+                                       off += that[i].clen;
+                               }
+                       }
+                       else { //rbind
+                               for(int i=0, off=rlen; i<that.length; i++) {
+                                       result.appendToSparse(that[i], off, 0);
+                                       off += that[i].rlen;
+                               }
+                       }
+               }
                
                //update meta data
                result.nonZeros = nnz;
-               
                return result;
        }
 
-       public MatrixBlock transposeSelfMatrixMultOperations( MatrixBlock out, 
MMTSJType tstype )       
+       public MatrixBlock transposeSelfMatrixMultOperations( MatrixBlock out, 
MMTSJType tstype )
                throws DMLRuntimeException 
        {
                return transposeSelfMatrixMultOperations(out, tstype, 1);
        }
 
-       public MatrixBlock transposeSelfMatrixMultOperations( MatrixBlock out, 
MMTSJType tstype, int k )        
+       public MatrixBlock transposeSelfMatrixMultOperations( MatrixBlock out, 
MMTSJType tstype, int k )
                throws DMLRuntimeException 
        {
                //check for transpose type

Reply via email to