[SYSTEMML-2119] Support for seq operations in codegen (local/spark)

This patch extends the code generation framework to support sequence
operators, in cell, magg, and row templates. Along with the typical
extensions of candidate exploration, templates, and minor optimizer
extensions, this also required a runtime extension for distributed
operations to feed the global row index into generated operators.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d0b4373f
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d0b4373f
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d0b4373f

Branch: refs/heads/master
Commit: d0b4373fa13951d159e0d2443b703f0d1a93bfc6
Parents: 1a37cfa
Author: Matthias Boehm <[email protected]>
Authored: Tue May 8 22:22:59 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Tue May 8 23:07:20 2018 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/DataGenOp.java   |   4 +
 .../sysml/hops/codegen/SpoofCompiler.java       |   6 +-
 .../sysml/hops/codegen/cplan/CNodeBinary.java   |  79 ++++---
 .../sysml/hops/codegen/cplan/CNodeCell.java     |   2 +-
 .../sysml/hops/codegen/cplan/CNodeMultiAgg.java |   2 +-
 .../sysml/hops/codegen/cplan/CNodeRow.java      |   4 +-
 .../opt/PlanSelectionFuseCostBasedV2.java       |   4 +-
 .../hops/codegen/template/TemplateCell.java     |  50 ++--
 .../hops/codegen/template/TemplateRow.java      |  42 ++--
 .../hops/codegen/template/TemplateUtils.java    |  13 ++
 .../sysml/hops/rewrite/HopRewriteUtils.java     |  24 +-
 .../sysml/runtime/codegen/SpoofCellwise.java    | 234 ++++++++++---------
 .../runtime/codegen/SpoofMultiAggregate.java    |  46 ++--
 .../sysml/runtime/codegen/SpoofRowwise.java     |  54 +++--
 .../instructions/spark/SpoofSPInstruction.java  |  58 +++--
 .../functions/codegen/CellwiseTmplTest.java     |  20 +-
 .../functions/codegen/RowAggTmplTest.java       |  20 +-
 .../scripts/functions/codegen/cellwisetmpl22.R  |  31 +++
 .../functions/codegen/cellwisetmpl22.dml        |  28 +++
 .../scripts/functions/codegen/rowAggPattern41.R |  31 +++
 .../functions/codegen/rowAggPattern41.dml       |  28 +++
 21 files changed, 518 insertions(+), 262 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/DataGenOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/DataGenOp.java 
b/src/main/java/org/apache/sysml/hops/DataGenOp.java
index 0c6b586..3101721 100644
--- a/src/main/java/org/apache/sysml/hops/DataGenOp.java
+++ b/src/main/java/org/apache/sysml/hops/DataGenOp.java
@@ -366,6 +366,10 @@ public class DataGenOp extends Hop implements 
MultiThreadedHop
                return _paramIndexMap;
        }
        
+       public Hop getParam(String key) {
+               return getInput().get(getParamIndex(key));
+       }
+       
        public int getParamIndex(String key) {
                return _paramIndexMap.get(key);
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java 
b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
index 932b0be..6ef8dc4 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
@@ -665,8 +665,10 @@ public class SpoofCompiler
                        CNodeTpl tpl = e.getValue().getValue();
                        Hop[] inHops = e.getValue().getKey();
                        
-                       //remove invalid plans with null inputs 
-                       if( Arrays.stream(inHops).anyMatch(h -> (h==null)) )
+                       //remove invalid plans with null, empty, or all scalar 
inputs 
+                       if( inHops == null || inHops.length == 0
+                               || Arrays.stream(inHops).anyMatch(h -> 
(h==null))
+                               || Arrays.stream(inHops).allMatch(h -> 
h.isScalar()))
                                continue;
                        
                        //perform simplifications and cse rewrites

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java 
b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
index 4aee712..80a7f83 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java
@@ -52,6 +52,7 @@ public class CNodeBinary extends CNode
                LESS, LESSEQUAL, GREATER, GREATEREQUAL, EQUAL,NOTEQUAL,
                MIN, MAX, AND, OR, XOR, LOG, LOG_NZ, POW,
                BITWAND,
+               SEQ_RIX,
                MINUS1_MULT, MINUS_NZ;
 
                public static boolean contains(String value) {
@@ -214,7 +215,9 @@ public class CNodeBinary extends CNode
                                        return "    double %TMP% = ( (%IN1% != 
0) != (%IN2% != 0) ) ? 1 : 0;\n";
                                case BITWAND:
                                        return "    double %TMP% = 
LibSpoofPrimitives.bwAnd(%IN1%, %IN2%);\n";
-
+                               case SEQ_RIX:
+                                       return "    double %TMP% = %IN1% + grix 
* %IN2%;\n"; //0-based global rix
+                               
                                default: 
                                        throw new RuntimeException("Invalid 
binary type: "+this.toString());
                        }
@@ -409,6 +412,7 @@ public class CNodeBinary extends CNode
                        case AND:                      return "b(&)";
                        case XOR:                      return "b(xor)";
                        case BITWAND:                  return "b(bitwAnd)";
+                       case SEQ_RIX:                  return "b(seqr)";
                        case MINUS1_MULT:              return "b(1-*)";
                        case MINUS_NZ:                 return "b(-nz)";
                        default: return "b("+_type.name().toLowerCase()+")";
@@ -420,7 +424,7 @@ public class CNodeBinary extends CNode
        {
                switch(_type) {
                        //VECT
-                       case VECT_MULT_ADD: 
+                       case VECT_MULT_ADD:
                        case VECT_DIV_ADD:
                        case VECT_MINUS_ADD:
                        case VECT_PLUS_ADD:
@@ -428,10 +432,10 @@ public class CNodeBinary extends CNode
                        case VECT_MIN_ADD:
                        case VECT_MAX_ADD:
                        case VECT_EQUAL_ADD: 
-                       case VECT_NOTEQUAL_ADD: 
-                       case VECT_LESS_ADD: 
-                       case VECT_LESSEQUAL_ADD: 
-                       case VECT_GREATER_ADD: 
+                       case VECT_NOTEQUAL_ADD:
+                       case VECT_LESS_ADD:
+                       case VECT_LESSEQUAL_ADD:
+                       case VECT_GREATER_ADD:
                        case VECT_GREATEREQUAL_ADD:
                        case VECT_CBIND_ADD:
                        case VECT_XOR_ADD:
@@ -452,8 +456,8 @@ public class CNodeBinary extends CNode
                                _cols = _inputs.get(1)._cols;
                                _dataType = DataType.MATRIX;
                                break;
-                               
-                       case VECT_DIV_SCALAR:   
+                       
+                       case VECT_DIV_SCALAR:
                        case VECT_MULT_SCALAR:
                        case VECT_MINUS_SCALAR:
                        case VECT_PLUS_SCALAR:
@@ -463,13 +467,13 @@ public class CNodeBinary extends CNode
                        case VECT_MIN_SCALAR:
                        case VECT_MAX_SCALAR:
                        case VECT_EQUAL_SCALAR: 
-                       case VECT_NOTEQUAL_SCALAR: 
-                       case VECT_LESS_SCALAR: 
-                       case VECT_LESSEQUAL_SCALAR: 
+                       case VECT_NOTEQUAL_SCALAR:
+                       case VECT_LESS_SCALAR:
+                       case VECT_LESSEQUAL_SCALAR:
                        case VECT_GREATER_SCALAR: 
                        case VECT_GREATEREQUAL_SCALAR:
                        
-                       case VECT_DIV:  
+                       case VECT_DIV:
                        case VECT_MULT:
                        case VECT_MINUS:
                        case VECT_PLUS:
@@ -477,12 +481,12 @@ public class CNodeBinary extends CNode
                        case VECT_BITWAND:
                        case VECT_MIN:
                        case VECT_MAX:
-                       case VECT_EQUAL: 
-                       case VECT_NOTEQUAL: 
-                       case VECT_LESS: 
-                       case VECT_LESSEQUAL: 
-                       case VECT_GREATER: 
-                       case VECT_GREATEREQUAL: 
+                       case VECT_EQUAL:
+                       case VECT_NOTEQUAL:
+                       case VECT_LESS:
+                       case VECT_LESSEQUAL:
+                       case VECT_GREATER:
+                       case VECT_GREATEREQUAL:
                                boolean scalarVector = 
(_inputs.get(0).getDataType()==DataType.SCALAR);
                                _rows = _inputs.get(scalarVector ? 1 : 0)._rows;
                                _cols = _inputs.get(scalarVector ? 1 : 0)._cols;
@@ -494,39 +498,40 @@ public class CNodeBinary extends CNode
                                _cols = _inputs.get(1)._cols;
                                _dataType = DataType.MATRIX;
                                break;
-                               
-                       case DOT_PRODUCT: 
+                       
+                       case DOT_PRODUCT:
                        
                        //SCALAR Arithmetic
-                       case MULT: 
-                       case DIV: 
-                       case PLUS: 
-                       case MINUS: 
+                       case MULT:
+                       case DIV:
+                       case PLUS:
+                       case MINUS:
                        case MINUS1_MULT:
                        case MINUS_NZ:
-                       case MODULUS: 
-                       case INTDIV:    
+                       case MODULUS:
+                       case INTDIV:
                        //SCALAR Comparison
-                       case LESS: 
-                       case LESSEQUAL: 
+                       case LESS:
+                       case LESSEQUAL:
                        case GREATER: 
-                       case GREATEREQUAL: 
-                       case EQUAL: 
-                       case NOTEQUAL: 
+                       case GREATEREQUAL:
+                       case EQUAL:
+                       case NOTEQUAL:
                        //SCALAR LOGIC
-                       case MIN: 
-                       case MAX: 
-                       case AND: 
+                       case MIN:
+                       case MAX:
+                       case AND:
                        case OR:
                        case XOR:
                        case BITWAND:
-                       case LOG: 
+                       case LOG:
                        case LOG_NZ:
-                       case POW: 
+                       case POW:
+                       case SEQ_RIX:
                                _rows = 0;
                                _cols = 0;
                                _dataType= DataType.SCALAR;
-                               break;  
+                               break;
                }
        }
        

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java 
b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
index 3b30124..e97a00a 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java
@@ -41,7 +41,7 @@ public class CNodeCell extends CNodeTpl
                        + "  public %TMP%() {\n"
                        + "    super(CellType.%TYPE%, %SPARSE_SAFE%, 
%AGG_OP%);\n"
                        + "  }\n"
-                       + "  protected double genexec(double a, SideInput[] b, 
double[] scalars, int m, int n, int rix, int cix) { \n"
+                       + "  protected double genexec(double a, SideInput[] b, 
double[] scalars, int m, int n, long grix, int rix, int cix) { \n"
                        + "%BODY_dense%"
                        + "    return %OUT%;\n"
                        + "  }\n"

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java 
b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java
index fd39c7c..b0bd03a 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java
@@ -44,7 +44,7 @@ public class CNodeMultiAgg extends CNodeTpl
                        + "    super(%SPARSE_SAFE%, %AGG_OP%);\n"
                        + "  }\n"
                        + "  protected void genexec(double a, SideInput[] b, 
double[] scalars, double[] c, "
-                                       + "int m, int n, int rix, int cix) { \n"
+                                       + "int m, int n, long grix, int rix, 
int cix) { \n"
                        + "%BODY_dense%"
                        + "  }\n"
                        + "}\n";

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java 
b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
index e6aa53a..e21ff2b 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
@@ -41,10 +41,10 @@ public class CNodeRow extends CNodeTpl
                        + "  public %TMP%() {\n"
                        + "    super(RowType.%TYPE%, %CONST_DIM2%, %TB1%, 
%VECT_MEM%);\n"
                        + "  }\n"
-                       + "  protected void genexec(double[] a, int ai, 
SideInput[] b, double[] scalars, double[] c, int ci, int len, int rix) { \n"
+                       + "  protected void genexec(double[] a, int ai, 
SideInput[] b, double[] scalars, double[] c, int ci, int len, long grix, int 
rix) { \n"
                        + "%BODY_dense%"
                        + "  }\n"
-                       + "  protected void genexec(double[] avals, int[] aix, 
int ai, SideInput[] b, double[] scalars, double[] c, int ci, int alen, int len, 
int rix) { \n"
+                       + "  protected void genexec(double[] avals, int[] aix, 
int ai, SideInput[] b, double[] scalars, double[] c, int ci, int alen, int len, 
long grix, int rix) { \n"
                        + "%BODY_sparse%"
                        + "  }\n"
                        + "}\n";

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
 
b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
index 1e5bcf3..3db4ce8 100644
--- 
a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
+++ 
b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java
@@ -44,6 +44,7 @@ import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.Hop.AggOp;
+import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.DataOpTypes;
 import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.OpOp2;
@@ -718,7 +719,8 @@ public class PlanSelectionFuseCostBasedV2 extends 
PlanSelection
                        || (hop instanceof AggBinaryOp && (inRow || 
!hop.dimsKnown()
                                || (hop.getDim1()!=1 && hop.getDim2()!=1)))
                        || (HopRewriteUtils.isTransposeOperation(hop)
-                               && (hop.getDim1()!=1 && hop.getDim2()!=1))
+                               && (hop.getDim1()!=1 && hop.getDim2()!=1)
+                               && 
!HopRewriteUtils.isDataGenOp(hop.getInput().get(0),DataGenMethod.SEQ))
                        || (hop instanceof AggUnaryOp && inRow);
        }
        

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
index cfac2d7..8eaffaf 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java
@@ -29,10 +29,12 @@ import java.util.List;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
+import org.apache.sysml.hops.DataGenOp;
 import org.apache.sysml.hops.DataOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.UnaryOp;
 import org.apache.sysml.hops.Hop.AggOp;
+import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.Hop.OpOp3;
 import org.apache.sysml.hops.Hop.ParamBuiltinOp;
@@ -52,6 +54,7 @@ import 
org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry;
 import org.apache.sysml.hops.codegen.cplan.CNodeTernary;
 import org.apache.sysml.hops.codegen.cplan.CNodeTernary.TernaryType;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
+import org.apache.sysml.parser.Statement;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.matrix.data.Pair;
 
@@ -77,7 +80,9 @@ public class TemplateCell extends TemplateBase
                return hop.dimsKnown() && isValidOperation(hop)
                                && !(hop.getDim1()==1 && hop.getDim2()==1) 
                        || (hop instanceof IndexingOp && 
hop.getInput().get(0).getDim2() >= 0
-                               && (((IndexingOp)hop).isColLowerEqualsUpper() 
|| hop.getDim2()==1));
+                               && (((IndexingOp)hop).isColLowerEqualsUpper() 
|| hop.getDim2()==1))
+                       || (HopRewriteUtils.isDataGenOpWithLiteralInputs(hop, 
DataGenMethod.SEQ)
+                               && 
HopRewriteUtils.hasOnlyUnaryBinaryParents(hop, true));
        }
 
        @Override
@@ -96,7 +101,9 @@ public class TemplateCell extends TemplateBase
                //merge of other cell tpl possible
                return (!isClosed() && (isValidOperation(hop) 
                        || (hop instanceof AggBinaryOp && 
hop.getInput().indexOf(input)==0 
-                               && 
HopRewriteUtils.isTransposeOperation(input))));
+                               && 
HopRewriteUtils.isTransposeOperation(input))))
+                       || (HopRewriteUtils.isDataGenOpWithLiteralInputs(input, 
DataGenMethod.SEQ)
+                               && 
HopRewriteUtils.hasOnlyUnaryBinaryParents(input, false));
        }
 
        @Override
@@ -177,16 +184,14 @@ public class TemplateCell extends TemplateBase
                
                //construct cnode for current hop
                CNode out = null;
-               if(hop instanceof UnaryOp)
-               {
+               if(hop instanceof UnaryOp) {
                        CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
                        cdata1 = TemplateUtils.wrapLookupIfNecessary(cdata1, 
hop.getInput().get(0));
                        
                        String primitiveOpName = ((UnaryOp)hop).getOp().name();
                        out = new CNodeUnary(cdata1, 
UnaryType.valueOf(primitiveOpName));
                }
-               else if(hop instanceof BinaryOp)
-               {
+               else if(hop instanceof BinaryOp) {
                        BinaryOp bop = (BinaryOp) hop;
                        CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
                        CNode cdata2 = 
tmp.get(hop.getInput().get(1).getHopID());
@@ -200,8 +205,7 @@ public class TemplateCell extends TemplateBase
                        out = new CNodeBinary(cdata1, cdata2, 
                                BinType.valueOf(primitiveOpName));
                }
-               else if(hop instanceof TernaryOp) 
-               {
+               else if(hop instanceof TernaryOp) {
                        TernaryOp top = (TernaryOp) hop;
                        CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
                        CNode cdata2 = 
tmp.get(hop.getInput().get(1).getHopID());
@@ -216,27 +220,34 @@ public class TemplateCell extends TemplateBase
                        out = new CNodeTernary(cdata1, cdata2, cdata3, 
                                TernaryType.valueOf(top.getOp().name()));
                }
-               else if( hop instanceof ParameterizedBuiltinOp ) 
-               {
+               else if( hop instanceof ParameterizedBuiltinOp ) {
                        CNode cdata1 = 
tmp.get(((ParameterizedBuiltinOp)hop).getTargetHop().getHopID());
                        cdata1 = TemplateUtils.wrapLookupIfNecessary(cdata1, 
hop.getInput().get(0));
                        
                        CNode cdata2 = 
tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("pattern").getHopID());
                        CNode cdata3 = 
tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("replacement").getHopID());
                        TernaryType ttype = (cdata2.isLiteral() && 
cdata2.getVarname().equals("Double.NaN")) ? 
-                                       TernaryType.REPLACE_NAN : 
TernaryType.REPLACE;
+                               TernaryType.REPLACE_NAN : TernaryType.REPLACE;
                        out = new CNodeTernary(cdata1, cdata2, cdata3, ttype);
                }
-               else if( hop instanceof IndexingOp ) 
-               {
+               else if( hop instanceof IndexingOp ) {
                        CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
                        out = new CNodeTernary(cdata1, 
-                                       TemplateUtils.createCNodeData(new 
LiteralOp(hop.getInput().get(0).getDim2()), true), 
-                                       
TemplateUtils.createCNodeData(hop.getInput().get(4), true),
-                                       TernaryType.LOOKUP_RC1);
+                               TemplateUtils.createCNodeData(new 
LiteralOp(hop.getInput().get(0).getDim2()), true), 
+                               
TemplateUtils.createCNodeData(hop.getInput().get(4), true),
+                               TernaryType.LOOKUP_RC1);
                }
-               else if( HopRewriteUtils.isTransposeOperation(hop) ) 
-               {
+               else if( HopRewriteUtils.isDataGenOp(hop, DataGenMethod.SEQ) ) {
+                       CNodeData from = 
TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_FROM).getHopID()));
+                       CNodeData to = 
TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_TO).getHopID()));
+                       CNodeData incr = 
TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_INCR).getHopID()));
+                       if( Double.parseDouble(from.getVarname()) > 
Double.parseDouble(to.getVarname())
+                               && Double.parseDouble(incr.getVarname()) > 0 ) {
+                               incr = TemplateUtils.createCNodeData(new 
LiteralOp("-"+incr.getVarname()), true);
+                       }
+                       out = new CNodeBinary(from, incr, BinType.SEQ_RIX);
+               }
+               else if( HopRewriteUtils.isTransposeOperation(hop) ) {
                        out = 
TemplateUtils.skipTranspose(tmp.get(hop.getHopID()), 
                                hop, tmp, compileLiterals);
                        //correct indexing types of existing lookups
@@ -246,8 +257,7 @@ public class TemplateCell extends TemplateBase
                        if( out instanceof CNodeData && 
!inHops.contains(hop.getInput().get(0)) )
                                inHops.add(hop.getInput().get(0));
                }
-               else if( hop instanceof AggUnaryOp )
-               {
+               else if( hop instanceof AggUnaryOp ) {
                        //aggregation handled in template implementation (note: 
we do not compile 
                        //^2 of SUM_SQ into the operator to simplify the 
detection of single operators)
                        out = tmp.get(hop.getInput().get(0).getHopID());

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index 1e996d9..fb153de 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
 import org.apache.sysml.hops.AggBinaryOp;
 import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
+import org.apache.sysml.hops.DataGenOp;
 import org.apache.sysml.hops.Hop;
 import org.apache.sysml.hops.IndexingOp;
 import org.apache.sysml.hops.LiteralOp;
@@ -48,10 +49,13 @@ import 
org.apache.sysml.hops.codegen.cplan.CNodeUnary.UnaryType;
 import org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.hops.Hop.AggOp;
+import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.OpOp1;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.Hop.OpOpN;
+import org.apache.sysml.hops.HopsException;
+import org.apache.sysml.parser.Statement;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.codegen.SpoofRowwise.RowType;
 import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
@@ -134,6 +138,8 @@ public class TemplateRow extends TemplateBase
                                && hop.getDim1() > 1 && input.getDim1()>1)
                        || (HopRewriteUtils.isBinary(hop, OpOp2.CBIND) && 
hop.getInput().get(0).isMatrix() && hop.dimsKnown())
                        || (HopRewriteUtils.isNary(hop, OpOpN.CBIND) && 
hop.getInput().get(0).isMatrix() && hop.dimsKnown())
+                       || (HopRewriteUtils.isDataGenOpWithLiteralInputs(input, 
DataGenMethod.SEQ)
+                               && 
HopRewriteUtils.hasOnlyUnaryBinaryParents(input, false))
                        || (hop instanceof AggBinaryOp
                                && 
HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))
                                && (input.getDim2()==1 || 
(input==hop.getInput().get(1) 
@@ -328,15 +334,23 @@ public class TemplateRow extends TemplateBase
                                }
                        }
                }
-               else if( HopRewriteUtils.isTransposeOperation(hop) ) 
-               {
+               else if( HopRewriteUtils.isDataGenOp(hop, DataGenMethod.SEQ) ) {
+                       CNodeData from = 
TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_FROM).getHopID()));
+                       CNodeData to = 
TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_TO).getHopID()));
+                       CNodeData incr = 
TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_INCR).getHopID()));
+                       if( Double.parseDouble(from.getVarname()) > 
Double.parseDouble(to.getVarname())
+                               && Double.parseDouble(incr.getVarname()) > 0 ) {
+                               incr = TemplateUtils.createCNodeData(new 
LiteralOp("-"+incr.getVarname()), true);
+                       }
+                       out = new CNodeBinary(from, incr, BinType.SEQ_RIX);
+               }
+               else if( HopRewriteUtils.isTransposeOperation(hop) ) {
                        out = 
TemplateUtils.skipTranspose(tmp.get(hop.getHopID()), 
                                hop, tmp, compileLiterals);
                        if( out instanceof CNodeData && 
!inHops.contains(hop.getInput().get(0)) )
                                inHops.add(hop.getInput().get(0));
                }
-               else if(hop instanceof UnaryOp)
-               {
+               else if(hop instanceof UnaryOp) {
                        CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
                        
                        // if one input is a matrix then we need to do vector 
by scalar operations
@@ -360,8 +374,7 @@ public class TemplateRow extends TemplateBase
                                out = new CNodeUnary(cdata1, 
UnaryType.valueOf(primitiveOpName));
                        }
                }
-               else if(HopRewriteUtils.isBinary(hop, OpOp2.CBIND)) 
-               {
+               else if(HopRewriteUtils.isBinary(hop, OpOp2.CBIND)) {
                        //special case for cbind with zeros
                        CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
                        CNode cdata2 = null;
@@ -425,8 +438,7 @@ public class TemplateRow extends TemplateBase
                                out = new CNodeBinary(cdata1, cdata2, 
BinType.valueOf(primitiveOpName));
                        }
                }
-               else if(hop instanceof TernaryOp) 
-               {
+               else if(hop instanceof TernaryOp) {
                        TernaryOp top = (TernaryOp) hop;
                        CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
                        CNode cdata2 = 
tmp.get(hop.getInput().get(1).getHopID());
@@ -440,8 +452,7 @@ public class TemplateRow extends TemplateBase
                        out = new CNodeTernary(cdata1, cdata2, cdata3, 
                                TernaryType.valueOf(top.getOp().toString()));
                }
-               else if(HopRewriteUtils.isNary(hop, OpOpN.CBIND)) 
-               {
+               else if(HopRewriteUtils.isNary(hop, OpOpN.CBIND)) {
                        CNode[] inputs = new CNode[hop.getInput().size()];
                        for( int i=0; i<hop.getInput().size(); i++ ) {
                                Hop c = hop.getInput().get(i);
@@ -454,19 +465,16 @@ public class TemplateRow extends TemplateBase
                        }
                        out = new CNodeNary(inputs, NaryType.VECT_CBIND);
                }
-               else if( hop instanceof ParameterizedBuiltinOp )
-               {
+               else if( hop instanceof ParameterizedBuiltinOp ) {
                        CNode cdata1 = 
tmp.get(((ParameterizedBuiltinOp)hop).getTargetHop().getHopID());
                        cdata1 = TemplateUtils.wrapLookupIfNecessary(cdata1, 
hop.getInput().get(0));
-                       
                        CNode cdata2 = 
tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("pattern").getHopID());
                        CNode cdata3 = 
tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("replacement").getHopID());
                        TernaryType ttype = (cdata2.isLiteral() && 
cdata2.getVarname().equals("Double.NaN")) ?
-                                       TernaryType.REPLACE_NAN : 
TernaryType.REPLACE;
+                               TernaryType.REPLACE_NAN : TernaryType.REPLACE;
                        out = new CNodeTernary(cdata1, cdata2, cdata3, ttype);
                }
-               else if( hop instanceof IndexingOp ) 
-               {
+               else if( hop instanceof IndexingOp ) {
                        CNode cdata1 = 
tmp.get(hop.getInput().get(0).getHopID());
                        out = new CNodeTernary(cdata1, 
                                TemplateUtils.createCNodeData(new 
LiteralOp(hop.getInput().get(0).getDim2()), true),
@@ -475,7 +483,7 @@ public class TemplateRow extends TemplateBase
                }
                
                if( out == null ) {
-                       throw new RuntimeException(hop.getHopID()+" 
"+hop.getOpString());
+                       throw new HopsException(hop.getHopID()+" 
"+hop.getOpString());
                }
                
                if( out.getDataType().isMatrix() ) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index b833876..4a61678 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -239,6 +239,19 @@ public class TemplateUtils
                throw new RuntimeException("Undefined outer product type for 
hop "+out.getHopID());
        }
        
+       public static CNodeData getLiteral(CNode node) {
+               return ((CNodeData) node).isLiteral() ? (CNodeData)node :
+                       createCNodeData(new LiteralOp(node.getVarname()), true);
+       }
+       
+       public static boolean isLiteral(CNode node) {
+               return node instanceof CNodeData && 
((CNodeData)node).isLiteral();
+       }
+       
+       public static boolean isLiteral(CNode node, String val) {
+               return isLiteral(node) && 
((CNodeData)node).getVarname().equals(val);
+       }
+       
        public static boolean isLookup(CNode node, boolean includeRC1) {
                return isUnary(node, UnaryType.LOOKUP_C, UnaryType.LOOKUP_RC)
                        || (includeRC1 && isUnary(node, UnaryType.LOOKUP_R))

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java 
b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index 427b075..3a0c3f2 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -485,6 +485,13 @@ public class HopRewriteUtils
                        && ArrayUtils.contains(ops, ((DataGenOp)hop).getOp()));
        }
        
+       public static boolean isDataGenOpWithLiteralInputs(Hop hop, 
DataGenMethod... ops) {
+               boolean ret = isDataGenOp(hop, ops);
+               for( Hop c : hop.getInput() )
+                       ret &= c instanceof LiteralOp;
+               return ret;
+       }
+       
        public static boolean isDataGenOpWithConstantValue(Hop hop) {
                return hop instanceof DataGenOp
                        && ((DataGenOp)hop).getOp()==DataGenMethod.RAND
@@ -1167,13 +1174,10 @@ public class HopRewriteUtils
                        || (isColumnRightIndexing(input) && 
size.getInput().get(0)==input.getInput().get(0))));
        }
        
-       public static boolean hasOnlyWriteParents( Hop hop, boolean 
inclTransient, boolean inclPersistent )
-       {
+       public static boolean hasOnlyWriteParents( Hop hop, boolean 
inclTransient, boolean inclPersistent ) {
                boolean ret = true;
-               
                ArrayList<Hop> parents = hop.getParent();
-               for( Hop p : parents )
-               {
+               for( Hop p : parents ) {
                        if( inclTransient && inclPersistent )
                                ret &= ( p instanceof DataOp && 
(((DataOp)p).getDataOpType()==DataOpTypes.TRANSIENTWRITE
                                || 
((DataOp)p).getDataOpType()==DataOpTypes.PERSISTENTWRITE));
@@ -1182,8 +1186,14 @@ public class HopRewriteUtils
                        else if(inclPersistent)
                                ret &= ( p instanceof DataOp && 
((DataOp)p).getDataOpType()==DataOpTypes.PERSISTENTWRITE);
                }
-                       
-                               
+               return ret;
+       }
+       
+       public static boolean hasOnlyUnaryBinaryParents(Hop hop, boolean 
disallowLhs) {
+               boolean ret = true;
+               for( Hop p : hop.getParent() )
+                       ret &= (p instanceof UnaryOp || (p instanceof BinaryOp 
+                               && (!disallowLhs || p.getInput().get(1)==hop)));
                return ret;
        }
        

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
index 20e6a95..5ec18f5 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
@@ -111,6 +111,10 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        
        @Override
        public ScalarObject execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, int k) {
+               return execute(inputs, scalarObjects, k, 0);
+       }
+       
+       public ScalarObject execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, int k, long rix) {
                //sanity check
                if( inputs==null || inputs.size() < 1  )
                        throw new RuntimeException("Invalid input arguments.");
@@ -136,11 +140,11 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                if( k <= 1 ) //SINGLE-THREADED
                {
                        if( inputs.get(0) instanceof CompressedMatrixBlock )
-                               ret = 
executeCompressedAndAgg((CompressedMatrixBlock)a, b, scalars, m, n, sparseSafe, 
0, m);
+                               ret = 
executeCompressedAndAgg((CompressedMatrixBlock)a, b, scalars, m, n, sparseSafe, 
0, m, rix);
                        else if( !inputs.get(0).isInSparseFormat() )
-                               ret = executeDenseAndAgg(a.getDenseBlock(), b, 
scalars, m, n, sparseSafe, 0, m);
+                               ret = executeDenseAndAgg(a.getDenseBlock(), b, 
scalars, m, n, sparseSafe, 0, m, rix);
                        else
-                               ret = executeSparseAndAgg(a.getSparseBlock(), 
b, scalars, m, n, sparseSafe, 0, m);
+                               ret = executeSparseAndAgg(a.getSparseBlock(), 
b, scalars, m, n, sparseSafe, 0, m, rix);
                }
                else  //MULTI-THREADED
                {
@@ -187,11 +191,15 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
 
        @Override
        public MatrixBlock execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, MatrixBlock out) {
-               return execute(inputs, scalarObjects, out, 1);
+               return execute(inputs, scalarObjects, out, 1, 0);
        }
        
        @Override
        public MatrixBlock execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
+               return execute(inputs, scalarObjects, out, k, 0);
+       }
+       
+       public MatrixBlock execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k, long rix) {
                //sanity check
                if( inputs==null || inputs.size() < 1 || out==null )
                        throw new RuntimeException("Invalid input arguments.");
@@ -228,11 +236,11 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                if( k <= 1 ) //SINGLE-THREADED
                {
                        if( inputs.get(0) instanceof CompressedMatrixBlock )
-                               lnnz = 
executeCompressed((CompressedMatrixBlock)a, b, scalars, out, m, n, sparseSafe, 
0, m);
+                               lnnz = 
executeCompressed((CompressedMatrixBlock)a, b, scalars, out, m, n, sparseSafe, 
0, m, rix);
                        else if( !inputs.get(0).isInSparseFormat() )
-                               lnnz = executeDense(a.getDenseBlock(), b, 
scalars, out, m, n, sparseSafe, 0, m);
+                               lnnz = executeDense(a.getDenseBlock(), b, 
scalars, out, m, n, sparseSafe, 0, m, rix);
                        else
-                               lnnz = executeSparse(a.getSparseBlock(), b, 
scalars, out, m, n, sparseSafe, 0, m);
+                               lnnz = executeSparse(a.getSparseBlock(), b, 
scalars, out, m, n, sparseSafe, 0, m, rix);
                }
                else  //MULTI-THREADED
                {
@@ -286,42 +294,42 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        //function dispatch
        
        private long executeDense(DenseBlock a, SideInput[] b, double[] 
scalars, 
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru) {
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix) {
                DenseBlock c = out.getDenseBlock();
                SideInput[] lb = createSparseSideInputs(b);
                
                if( _type == CellType.NO_AGG ) {
-                       return executeDenseNoAgg(a, lb, scalars, c, m, n, 
sparseSafe, rl, ru);
+                       return executeDenseNoAgg(a, lb, scalars, c, m, n, 
sparseSafe, rl, ru, rix);
                }
                else if( _type == CellType.ROW_AGG ) {
                        if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-                               return executeDenseRowAggSum(a, lb, scalars, c, 
m, n, sparseSafe, rl, ru);
+                               return executeDenseRowAggSum(a, lb, scalars, c, 
m, n, sparseSafe, rl, ru, rix);
                        else
-                               return executeDenseRowAggMxx(a, lb, scalars, c, 
m, n, sparseSafe, rl, ru);
+                               return executeDenseRowAggMxx(a, lb, scalars, c, 
m, n, sparseSafe, rl, ru, rix);
                }
                else if( _type == CellType.COL_AGG ) {
                        if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-                               return executeDenseColAggSum(a, lb, scalars, c, 
m, n, sparseSafe, rl, ru);
+                               return executeDenseColAggSum(a, lb, scalars, c, 
m, n, sparseSafe, rl, ru, rix);
                        else
-                               return executeDenseColAggMxx(a, lb, scalars, c, 
m, n, sparseSafe, rl, ru);
+                               return executeDenseColAggMxx(a, lb, scalars, c, 
m, n, sparseSafe, rl, ru, rix);
                }
                return -1;
        }
        
        private double executeDenseAndAgg(DenseBlock a, SideInput[] b, double[] 
scalars, 
-                       int m, int n, boolean sparseSafe, int rl, int ru)
+                       int m, int n, boolean sparseSafe, int rl, int ru, long 
rix)
        {
                SideInput[] lb = createSparseSideInputs(b);
                
                //numerically stable aggregation for sum/sum_sq
                if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-                       return executeDenseAggSum(a, lb, scalars, m, n, 
sparseSafe, rl, ru);
+                       return executeDenseAggSum(a, lb, scalars, m, n, 
sparseSafe, rl, ru, rix);
                else
-                       return executeDenseAggMxx(a, lb, scalars, m, n, 
sparseSafe, rl, ru);
+                       return executeDenseAggMxx(a, lb, scalars, m, n, 
sparseSafe, rl, ru, rix);
        }
        
        private long executeSparse(SparseBlock sblock, SideInput[] b, double[] 
scalars,
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru)
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix)
        {
                if( sparseSafe && sblock == null )
                        return 0;
@@ -329,47 +337,47 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                SideInput[] lb = createSparseSideInputs(b);
                if( _type == CellType.NO_AGG ) {
                        if( out.isInSparseFormat() )
-                               return executeSparseNoAggSparse(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru);
+                               return executeSparseNoAggSparse(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru, rix);
                        else
-                               return executeSparseNoAggDense(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru);
+                               return executeSparseNoAggDense(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru, rix);
                }
                else if( _type == CellType.ROW_AGG ) {
                        if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-                               return executeSparseRowAggSum(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru);
+                               return executeSparseRowAggSum(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru, rix);
                        else
-                               return executeSparseRowAggMxx(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru);
+                               return executeSparseRowAggMxx(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru, rix);
                }
                else if( _type == CellType.COL_AGG ) {
                        if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-                               return executeSparseColAggSum(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru);
+                               return executeSparseColAggSum(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru, rix);
                        else
-                               return executeSparseColAggMxx(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru);
+                               return executeSparseColAggMxx(sblock, lb, 
scalars, out, m, n, sparseSafe, rl, ru, rix);
                }
                
                return -1;
        }
        
        private double executeSparseAndAgg(SparseBlock sblock, SideInput[] b, 
double[] scalars,
-                       int m, int n, boolean sparseSafe, int rl, int ru)
+                       int m, int n, boolean sparseSafe, int rl, int ru, long 
rix)
        {
                if( sparseSafe && sblock == null )
                        return 0;
                
                SideInput[] lb = createSparseSideInputs(b);
                if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-                       return executeSparseAggSum(sblock, lb, scalars, m, n, 
sparseSafe, rl, ru);
+                       return executeSparseAggSum(sblock, lb, scalars, m, n, 
sparseSafe, rl, ru, rix);
                else
-                       return executeSparseAggMxx(sblock, lb, scalars, m, n, 
sparseSafe, rl, ru);
+                       return executeSparseAggMxx(sblock, lb, scalars, m, n, 
sparseSafe, rl, ru, rix);
        }
        
        private long executeCompressed(CompressedMatrixBlock a, SideInput[] b, 
double[] scalars,
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru)
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix)
        {
                //NOTE: we don't create sparse side inputs w/ row-major cursors 
because 
                //compressed data is access in a column-major order 
                
                if( _type == CellType.NO_AGG ) {
-                       long lnnz = executeCompressedNoAgg(a, b, scalars, out, 
m, n, sparseSafe, rl, ru);
+                       long lnnz = executeCompressedNoAgg(a, b, scalars, out, 
m, n, sparseSafe, rl, ru, rix);
                        if( out.isInSparseFormat() )
                                out.sortSparseRows(rl, ru);
                        return lnnz;
@@ -377,38 +385,38 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                else if( _type == CellType.ROW_AGG ) {
                        double[] c = out.getDenseBlockValues();
                        if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-                               return executeCompressedRowAggSum(a, b, 
scalars, c, m, n, sparseSafe, rl, ru);
+                               return executeCompressedRowAggSum(a, b, 
scalars, c, m, n, sparseSafe, rl, ru, rix);
                        else
-                               return executeCompressedRowAggMxx(a, b, 
scalars, c, m, n, sparseSafe, rl, ru);
+                               return executeCompressedRowAggMxx(a, b, 
scalars, c, m, n, sparseSafe, rl, ru, rix);
                }
                else if( _type == CellType.COL_AGG ) {
                        double[] c = out.getDenseBlockValues();
                        if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-                               return executeCompressedColAggSum(a, b, 
scalars, c, m, n, sparseSafe, rl, ru);
+                               return executeCompressedColAggSum(a, b, 
scalars, c, m, n, sparseSafe, rl, ru, rix);
                        else
-                               return executeCompressedColAggMxx(a, b, 
scalars, c, m, n, sparseSafe, rl, ru);
+                               return executeCompressedColAggMxx(a, b, 
scalars, c, m, n, sparseSafe, rl, ru, rix);
                }
                return -1;
        }
        
        private double executeCompressedAndAgg(CompressedMatrixBlock a, 
SideInput[] b, double[] scalars,
-                       int m, int n, boolean sparseSafe, int rl, int ru)
+                       int m, int n, boolean sparseSafe, int rl, int ru, long 
rix)
        {
                //NOTE: we don't create sparse side inputs w/ row-major cursors 
because 
                //compressed data is access in a column-major order 
                
                //numerically stable aggregation for sum/sum_sq
                if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
-                       return executeCompressedAggSum(a, b, scalars, m, n, 
sparseSafe, rl, ru);
+                       return executeCompressedAggSum(a, b, scalars, m, n, 
sparseSafe, rl, ru, rix);
                else
-                       return executeCompressedAggMxx(a, b, scalars, m, n, 
sparseSafe, rl, ru);
+                       return executeCompressedAggMxx(a, b, scalars, m, n, 
sparseSafe, rl, ru, rix);
        }
        
        /////////
        //core operator skeletons for dense, sparse, and compressed
 
        private long executeDenseNoAgg(DenseBlock a, SideInput[] b, double[] 
scalars,
-                       DenseBlock c, int m, int n, boolean sparseSafe, int rl, 
int ru)
+                       DenseBlock c, int m, int n, boolean sparseSafe, int rl, 
int ru, long rix)
        {
                long lnnz = 0;
                if( a == null && !sparseSafe ) {
@@ -416,7 +424,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                double[] cvals = c.values(i);
                                int cix = c.pos(i);
                                for( int j=0; j<n; j++ )
-                                       lnnz += ((cvals[cix+j] = genexec(0, b, 
scalars, m, n, i, j))!=0) ? 1 : 0;
+                                       lnnz += ((cvals[cix+j] = genexec(0, b, 
scalars, m, n, rix+i, i, j))!=0) ? 1 : 0;
                        }
                }
                else if( a != null ) {
@@ -427,7 +435,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                for( int j=0; j<n; j++ ) {
                                        double aval = avals[ix+j];
                                        if( aval != 0 || !sparseSafe)
-                                               lnnz += ((cvals[ix+j] = 
genexec(aval, b, scalars, m, n, i, j))!=0) ? 1 : 0;
+                                               lnnz += ((cvals[ix+j] = 
genexec(aval, b, scalars, m, n, rix+i, i, j))!=0) ? 1 : 0;
                                }
                        }
                }
@@ -436,7 +444,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeDenseRowAggSum(DenseBlock a, SideInput[] b, 
double[] scalars,
-               DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru)
+               DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, 
long rix)
        {
                //note: output always single block
                double[] lc = c.valuesAt(0);
@@ -448,7 +456,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                        for( int i=rl; i<ru; i++ ) {
                                kbuff.set(0, 0);
                                for( int j=0; j<n; j++ )
-                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                                lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0;
                        }
                }
@@ -460,7 +468,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                for( int j=0; j<n; j++ ) {
                                        double aval = avals[aix+j];
                                        if( aval != 0 || !sparseSafe)
-                                               kplus.execute2(kbuff, 
genexec(aval, b, scalars, m, n, i, j));
+                                               kplus.execute2(kbuff, 
genexec(aval, b, scalars, m, n, rix+i, i, j));
                                }
                                lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0;
                        }
@@ -470,7 +478,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeDenseRowAggMxx(DenseBlock a, SideInput[] b, 
double[] scalars,
-                       DenseBlock c, int m, int n, boolean sparseSafe, int rl, 
int ru)
+                       DenseBlock c, int m, int n, boolean sparseSafe, int rl, 
int ru, long rix)
        {
                double[] lc = c.valuesAt(0); //single block
                
@@ -481,7 +489,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                        for( int i=rl; i<ru; i++ ) {
                                double tmp = initialVal;
                                for( int j=0; j<n; j++ )
-                                       tmp = vfun.execute(tmp, genexec(0, b, 
scalars, m, n, i, j));
+                                       tmp = vfun.execute(tmp, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                                lnnz += ((lc[i] = tmp)!=0) ? 1 : 0;
                        }
                }
@@ -493,7 +501,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                for( int j=0; j<n; j++ ) {
                                        double aval = avals[aix + j];
                                        if( aval != 0 || !sparseSafe)
-                                               tmp = vfun.execute(tmp, 
genexec(aval, b, scalars, m, n, i, j));
+                                               tmp = vfun.execute(tmp, 
genexec(aval, b, scalars, m, n, rix+i, i, j));
                                }
                                if( sparseSafe && 
UtilFunctions.containsZero(avals, aix, n) )
                                        tmp = vfun.execute(tmp, 0);
@@ -504,7 +512,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeDenseColAggSum(DenseBlock a, SideInput[] b, 
double[] scalars,
-               DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru)
+               DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, 
long rix)
        {
                double[] lc = c.valuesAt(0); //single block
                
@@ -516,7 +524,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                        for( int i=rl; i<ru; i++ )
                                for( int j=0; j<n; j++ ) {
                                        kbuff.set(lc[j], corr[j]);
-                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                                        lc[j] = kbuff._sum;
                                        corr[j] = kbuff._correction;
                                }
@@ -529,7 +537,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                        double aval = avals[aix + j];
                                        if( aval != 0 || !sparseSafe ) {
                                                kbuff.set(lc[j], corr[j]);
-                                               kplus.execute2(kbuff, 
genexec(aval, b, scalars, m, n, i, j));
+                                               kplus.execute2(kbuff, 
genexec(aval, b, scalars, m, n, rix+i, i, j));
                                                lc[j] = kbuff._sum;
                                                corr[j] = kbuff._correction;
                                        }
@@ -541,7 +549,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeDenseColAggMxx(DenseBlock a, SideInput[] b, 
double[] scalars,
-                       DenseBlock c, int m, int n, boolean sparseSafe, int rl, 
int ru)
+                       DenseBlock c, int m, int n, boolean sparseSafe, int rl, 
int ru, long rix)
        {
                double[] lc = c.valuesAt(0); //single block
                
@@ -552,7 +560,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                if( a == null && !sparseSafe ) { //empty
                        for( int i=rl; i<ru; i++ )
                                for( int j=0; j<n; j++ )
-                                       lc[j] = vfun.execute(lc[j], genexec(0, 
b, scalars, m, n, i, j));
+                                       lc[j] = vfun.execute(lc[j], genexec(0, 
b, scalars, m, n, rix+i, i, j));
                }
                else if( a != null ) { //general case
                        int[] counts = new int[n];
@@ -562,7 +570,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                for( int j=0; j<n; j++ ) {
                                        double aval = avals[aix + j];
                                        if( aval != 0 || !sparseSafe ) {
-                                               lc[j] = vfun.execute(lc[j], 
genexec(aval, b, scalars, m, n, i, j));
+                                               lc[j] = vfun.execute(lc[j], 
genexec(aval, b, scalars, m, n, rix+i, i, j));
                                                counts[j] ++;
                                        }
                                }
@@ -576,7 +584,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private double executeDenseAggSum(DenseBlock a, SideInput[] b, double[] 
scalars,
-                       int m, int n, boolean sparseSafe, int rl, int ru)
+                       int m, int n, boolean sparseSafe, int rl, int ru, long 
rix)
        {
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
@@ -584,7 +592,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                if( a == null && !sparseSafe ) {
                        for( int i=rl; i<ru; i++ )
                                for( int j=0; j<n; j++ )
-                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                }
                else if( a != null ) {
                        for( int i=rl; i<ru; i++ ) {
@@ -593,7 +601,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                for( int j=0; j<n; j++ ) {
                                        double aval = avals[aix + j];
                                        if( aval != 0 || !sparseSafe)
-                                               kplus.execute2(kbuff, 
genexec(aval, b, scalars, m, n, i, j));
+                                               kplus.execute2(kbuff, 
genexec(aval, b, scalars, m, n, rix+i, i, j));
                                }
                        }
                }
@@ -602,7 +610,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private double executeDenseAggMxx(DenseBlock a, SideInput[] b, double[] 
scalars,
-                       int m, int n, boolean sparseSafe, int rl, int ru)
+                       int m, int n, boolean sparseSafe, int rl, int ru, long 
rix)
        {
                //safe aggregation for min/max w/ handling of zero entries
                //note: sparse safe with zero value as min/max handled outside
@@ -612,7 +620,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                if( a == null && !sparseSafe ) {
                        for( int i=rl; i<ru; i++ )
                                for( int j=0; j<n; j++ )
-                                       ret = vfun.execute(ret, genexec(0, b, 
scalars, m, n, i, j));
+                                       ret = vfun.execute(ret, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                }
                else if( a != null ) {
                        for( int i=rl; i<ru; i++ ) {
@@ -621,7 +629,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                for( int j=0; j<n; j++ ) {
                                        double aval = avals[aix + j];
                                        if( aval != 0 || !sparseSafe)
-                                               ret = vfun.execute(ret, 
genexec(aval, b, scalars, m, n, i, j));
+                                               ret = vfun.execute(ret, 
genexec(aval, b, scalars, m, n, rix+i, i, j));
                                }
                        }
                }
@@ -630,7 +638,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeSparseNoAggSparse(SparseBlock sblock, SideInput[] 
b, double[] scalars,
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru)
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix)
        {
                //note: sequential scan algorithm for both sparse-safe and 
-unsafe
                //in order to avoid binary search for sparse-unsafe
@@ -649,16 +657,16 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                        //process zeros before current non-zero
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++)
-                                                       c.append(i, j, 
genexec(0, b, scalars, m, n, i, j));
+                                                       c.append(i, j, 
genexec(0, b, scalars, m, n, rix+i, i, j));
                                        //process current non-zero
                                        lastj = aix[k];
-                                       c.append(i, lastj, genexec(avals[k], b, 
scalars, m, n, i, lastj));
+                                       c.append(i, lastj, genexec(avals[k], b, 
scalars, m, n, rix+i, i, lastj));
                                }
                        }
                        //process empty rows or remaining zeros
                        if( !sparseSafe )
                                for(int j=lastj+1; j<n; j++)
-                                       c.append(i, j, genexec(0, b, scalars, 
m, n, i, j));
+                                       c.append(i, j, genexec(0, b, scalars, 
m, n, rix+i, i, j));
                        lnnz += c.size(i);
                }
                
@@ -666,7 +674,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeSparseNoAggDense(SparseBlock sblock, SideInput[] b, 
double[] scalars,
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru)
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix)
        {
                //note: sequential scan algorithm for both sparse-safe and 
-unsafe
                //in order to avoid binary search for sparse-unsafe
@@ -686,10 +694,10 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                        //process zeros before current non-zero
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++)
-                                                       lnnz += 
((cvals[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
+                                                       lnnz += 
((cvals[cix+j]=genexec(0, b, scalars, m, n, rix+i, i, j))!=0)?1:0;
                                        //process current non-zero
                                        lastj = aix[k];
-                                       lnnz += 
((cvals[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
+                                       lnnz += 
((cvals[cix+lastj]=genexec(avals[k], b, scalars, m, n, rix+i, i, 
lastj))!=0)?1:0;
                                }
                        }
                        //process empty rows or remaining zeros
@@ -697,14 +705,14 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                for(int j=lastj+1; j<n; j++) {
                                        double[] cvals = c.values(i);
                                        int cix = c.pos(i);
-                                       lnnz += ((cvals[cix+j]=genexec(0, b, 
scalars, m, n, i, j))!=0)?1:0;
+                                       lnnz += ((cvals[cix+j]=genexec(0, b, 
scalars, m, n, rix+i, i, j))!=0)?1:0;
                                }
                }
                return lnnz;
        }
        
        private long executeSparseRowAggSum(SparseBlock sblock, SideInput[] b, 
double[] scalars,
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru)
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix)
        {
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
@@ -726,23 +734,23 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                        //process zeros before current non-zero
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++)
-                                                       kplus.execute2(kbuff, 
genexec(0, b, scalars, m, n, i, j));
+                                                       kplus.execute2(kbuff, 
genexec(0, b, scalars, m, n, rix+i, i, j));
                                        //process current non-zero
                                        lastj = aix[k];
-                                       kplus.execute2(kbuff, genexec(avals[k], 
b, scalars, m, n, i, lastj));
+                                       kplus.execute2(kbuff, genexec(avals[k], 
b, scalars, m, n, rix+i, i, lastj));
                                }
                        }
                        //process empty rows or remaining zeros
                        if( !sparseSafe )
                                for(int j=lastj+1; j<n; j++)
-                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                        lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
                }
                return lnnz;
        }
        
        private long executeSparseRowAggMxx(SparseBlock sblock, SideInput[] b, 
double[] scalars,
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru)
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix)
        {
                double initialVal = (_aggOp==AggOp.MIN) ? 
Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
                ValueFunction vfun = getAggFunction();
@@ -764,23 +772,23 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                        //process zeros before current non-zero
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++)
-                                                       tmp = vfun.execute(tmp, 
genexec(0, b, scalars, m, n, i, j));
+                                                       tmp = vfun.execute(tmp, 
genexec(0, b, scalars, m, n, rix+i, i, j));
                                        //process current non-zero
                                        lastj = aix[k];
-                                       tmp = vfun.execute( tmp, 
genexec(avals[k], b, scalars, m, n, i, lastj));
+                                       tmp = vfun.execute( tmp, 
genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
                                }
                        }
                        //process empty rows or remaining zeros
                        if( !sparseSafe )
                                for(int j=lastj+1; j<n; j++)
-                                       tmp = vfun.execute(tmp, genexec(0, b, 
scalars, m, n, i, j));
+                                       tmp = vfun.execute(tmp, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                        lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
                }
                return lnnz;
        }
 
        private long executeSparseColAggSum(SparseBlock sblock, SideInput[] b, 
double[] scalars,
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru)
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix)
        {
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
@@ -803,14 +811,14 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++) {
                                                        kbuff.set(c[j], 
corr[j]);
-                                                       kplus.execute2(kbuff, 
genexec(0, b, scalars, m, n, i, j));
+                                                       kplus.execute2(kbuff, 
genexec(0, b, scalars, m, n, rix+i, i, j));
                                                        c[j] = kbuff._sum;
                                                        corr[j] = 
kbuff._correction;
                                                }
                                        //process current non-zero
                                        lastj = aix[k];
                                        kbuff.set(c[aix[k]], corr[aix[k]]);
-                                       kplus.execute2(kbuff, genexec(avals[k], 
b, scalars, m, n, i, lastj));
+                                       kplus.execute2(kbuff, genexec(avals[k], 
b, scalars, m, n, rix+i, i, lastj));
                                        c[aix[k]] = kbuff._sum;
                                        corr[aix[k]] = kbuff._correction;
                                }
@@ -819,7 +827,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                        if( !sparseSafe )
                                for(int j=lastj+1; j<n; j++) {
                                        kbuff.set(c[j], corr[j]);
-                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                                        c[j] = kbuff._sum;
                                        corr[j] = kbuff._correction;
                                }
@@ -828,7 +836,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeSparseColAggMxx(SparseBlock sblock, SideInput[] b, 
double[] scalars,
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru)
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix)
        {
                double initialVal = (_aggOp==AggOp.MIN) ? 
Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
                ValueFunction vfun = getAggFunction();
@@ -850,26 +858,26 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                        //process zeros before current non-zero
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++) {
-                                                       c[j] = 
vfun.execute(c[j], genexec(0, b, scalars, m, n, i, j));
+                                                       c[j] = 
vfun.execute(c[j], genexec(0, b, scalars, m, n, rix+i, i, j));
                                                        count[j] ++;
                                                }
                                        //process current non-zero
                                        lastj = aix[k];
-                                       c[aix[k]] = vfun.execute(c[aix[k]], 
genexec(avals[k], b, scalars, m, n, i, lastj));
+                                       c[aix[k]] = vfun.execute(c[aix[k]], 
genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
                                        count[aix[k]] ++;
                                }
                        }
                        //process empty rows or remaining zeros
                        if( !sparseSafe )
                                for(int j=lastj+1; j<n; j++)
-                                       c[j] = vfun.execute(c[j], genexec(0, b, 
scalars, m, n, i, j));
+                                       c[j] = vfun.execute(c[j], genexec(0, b, 
scalars, m, n, rix+i, i, j));
                }
                
                return -1;
        }
        
        private double executeSparseAggSum(SparseBlock sblock, SideInput[] b, 
double[] scalars,
-                       int m, int n, boolean sparseSafe, int rl, int ru)
+                       int m, int n, boolean sparseSafe, int rl, int ru, long 
rix)
        {
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
@@ -888,22 +896,22 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                        //process zeros before current non-zero
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++)
-                                                       kplus.execute2(kbuff, 
genexec(0, b, scalars, m, n, i, j));
+                                                       kplus.execute2(kbuff, 
genexec(0, b, scalars, m, n, rix+i, i, j));
                                        //process current non-zero
                                        lastj = aix[k];
-                                       kplus.execute2(kbuff, genexec(avals[k], 
b, scalars, m, n, i, lastj));
+                                       kplus.execute2(kbuff, genexec(avals[k], 
b, scalars, m, n, rix+i, i, lastj));
                                }
                        }
                        //process empty rows or remaining zeros
                        if( !sparseSafe )
                                for(int j=lastj+1; j<n; j++)
-                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                }
                return kbuff._sum;
        }
        
        private double executeSparseAggMxx(SparseBlock sblock, SideInput[] b, 
double[] scalars,
-                       int m, int n, boolean sparseSafe, int rl, int ru)
+                       int m, int n, boolean sparseSafe, int rl, int ru, long 
rix)
        {
                double ret = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : 
Double.NEGATIVE_INFINITY;
                ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret;
@@ -923,22 +931,22 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                        //process zeros before current non-zero
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++)
-                                                       ret = vfun.execute(ret, 
genexec(0, b, scalars, m, n, i, j));
+                                                       ret = vfun.execute(ret, 
genexec(0, b, scalars, m, n, rix+i, i, j));
                                        //process current non-zero
                                        lastj = aix[k];
-                                       ret = vfun.execute(ret, 
genexec(avals[k], b, scalars, m, n, i, lastj));
+                                       ret = vfun.execute(ret, 
genexec(avals[k], b, scalars, m, n, rix+i, i, lastj));
                                }
                        }
                        //process empty rows or remaining zeros
                        if( !sparseSafe )
                                for(int j=lastj+1; j<n; j++)
-                                       ret = vfun.execute(ret, genexec(0, b, 
scalars, m, n, i, j));
+                                       ret = vfun.execute(ret, genexec(0, b, 
scalars, m, n, rix+i, i, j));
                }
                return ret;
        }
        
        private long executeCompressedNoAgg(CompressedMatrixBlock a, 
SideInput[] b, double[] scalars,
-                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru)
+                       MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru, long rix)
        {
                double[] c = (out.getDenseBlock() != null) ?
                        out.getDenseBlockValues() : null;
@@ -958,7 +966,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
                while( iter.hasNext() ) {
                        IJV cell = iter.next();
-                       double val = genexec(cell.getV(), b, scalars, m, n, 
cell.getI(), cell.getJ());
+                       double val = genexec(cell.getV(), b, scalars, m, n, 
rix+cell.getI(), cell.getI(), cell.getJ());
                        if( out.isInSparseFormat() ) {
                                csblock.allocate(cell.getI());
                                csblock.append(cell.getI(), cell.getJ(), val);
@@ -971,7 +979,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeCompressedRowAggSum(CompressedMatrixBlock a, 
SideInput[] b, double[] scalars,
-                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru)
+                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru, long rix)
        {
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
@@ -979,7 +987,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
                while( iter.hasNext() ) {
                        IJV cell = iter.next();
-                       double val = genexec(cell.getV(), b, scalars, m, n, 
cell.getI(), cell.getJ());
+                       double val = genexec(cell.getV(), b, scalars, m, n, 
rix+cell.getI(), cell.getI(), cell.getJ());
                        kbuff.set(c[cell.getI()], 0);
                        kplus.execute2(kbuff, val);
                        c[cell.getI()] = kbuff._sum;
@@ -990,7 +998,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeCompressedRowAggMxx(CompressedMatrixBlock a, 
SideInput[] b, double[] scalars,
-                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru)
+                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru, long rix)
        {
                Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? 
Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY);
                ValueFunction vfun = getAggFunction();
@@ -998,7 +1006,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
                while( iter.hasNext() ) {
                        IJV cell = iter.next();
-                       double val = genexec(cell.getV(), b, scalars, m, n, 
cell.getI(), cell.getJ());
+                       double val = genexec(cell.getV(), b, scalars, m, n, 
rix+cell.getI(), cell.getI(), cell.getJ());
                        c[cell.getI()] = vfun.execute(c[cell.getI()], val);
                }
                for( int i=rl; i<ru; i++ )
@@ -1007,7 +1015,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeCompressedColAggSum(CompressedMatrixBlock a, 
SideInput[] b, double[] scalars,
-               double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+               double[] c, int m, int n, boolean sparseSafe, int rl, int ru, 
long rix)
        {
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
@@ -1016,7 +1024,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
                while( iter.hasNext() ) {
                        IJV cell = iter.next();
-                       double val = genexec(cell.getV(), b, scalars, m, n, 
cell.getI(), cell.getJ());
+                       double val = genexec(cell.getV(), b, scalars, m, n, 
rix+cell.getI(), cell.getI(), cell.getJ());
                        kbuff.set(c[cell.getJ()], corr[cell.getJ()]);
                        kplus.execute2(kbuff, val);
                        c[cell.getJ()] = kbuff._sum;
@@ -1026,7 +1034,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private long executeCompressedColAggMxx(CompressedMatrixBlock a, 
SideInput[] b, double[] scalars,
-                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru)
+                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru, long rix)
        {
                Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? 
Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY);
                ValueFunction vfun = getAggFunction();
@@ -1034,7 +1042,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
                while( iter.hasNext() ) {
                        IJV cell = iter.next();
-                       double val = genexec(cell.getV(), b, scalars, m, n, 
cell.getI(), cell.getJ());
+                       double val = genexec(cell.getV(), b, scalars, m, n, 
rix+cell.getI(), cell.getI(), cell.getJ());
                        c[cell.getI()] = vfun.execute(c[cell.getI()], val);
                }
                for( int i=rl; i<ru; i++ )
@@ -1043,8 +1051,9 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private double executeCompressedAggSum(CompressedMatrixBlock a, 
SideInput[] b, double[] scalars,
-                       int m, int n, boolean sparseSafe, int rl, int ru)
+                       int m, int n, boolean sparseSafe, int rl, int ru, long 
rix)
        {
+               //TODO handle sequences in special case summation
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
                KahanObject kbuff2 = new KahanObject(0, 0);
@@ -1073,7 +1082,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                        Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
                        while( iter.hasNext() ) {
                                IJV cell = iter.next();
-                               double val = genexec(cell.getV(), b, scalars, 
m, n, cell.getI(), cell.getJ());
+                               double val = genexec(cell.getV(), b, scalars, 
m, n, rix+cell.getI(), cell.getI(), cell.getJ());
                                kplus.execute2(kbuff, val);
                        }
                }
@@ -1081,7 +1090,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        }
        
        private double executeCompressedAggMxx(CompressedMatrixBlock a, 
SideInput[] b, double[] scalars,
-                       int m, int n, boolean sparseSafe, int rl, int ru)
+                       int m, int n, boolean sparseSafe, int rl, int ru, long 
rix)
        {
                //safe aggregation for min/max w/ handling of zero entries
                //note: sparse safe with zero value as min/max handled outside
@@ -1091,14 +1100,21 @@ public abstract class SpoofCellwise extends 
SpoofOperator implements Serializabl
                Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe);
                while( iter.hasNext() ) {
                        IJV cell = iter.next();
-                       double val = genexec(cell.getV(), b, scalars, m, n, 
cell.getI(), cell.getJ());
+                       double val = genexec(cell.getV(), b, scalars, m, n, 
rix+cell.getI(), cell.getI(), cell.getJ());
                        ret = vfun.execute(ret, val);
                }
                return ret;
        }
        
+       //local execution where grix==rix
+       protected final double genexec( double a, SideInput[] b,
+               double[] scalars, int m, int n, int rix, int cix) {
+               return genexec(a, b, scalars, m, n, rix, rix, cix);
+       }
+       
+       //distributed execution with additional global row index
        protected abstract double genexec( double a, SideInput[] b,
-                       double[] scalars, int m, int n, int rowIndex, int 
colIndex);
+               double[] scalars, int m, int n, long gix, int rix, int cix);
        
        private class ParAggTask implements Callable<Double> 
        {
@@ -1126,11 +1142,11 @@ public abstract class SpoofCellwise extends 
SpoofOperator implements Serializabl
                @Override
                public Double call() {
                        if( _a instanceof CompressedMatrixBlock )
-                               return 
executeCompressedAndAgg((CompressedMatrixBlock)_a, _b, _scalars, _rlen, _clen, 
_safe, _rl, _ru);
+                               return 
executeCompressedAndAgg((CompressedMatrixBlock)_a, _b, _scalars, _rlen, _clen, 
_safe, _rl, _ru, 0);
                        else if (!_a.isInSparseFormat())
-                               return executeDenseAndAgg(_a.getDenseBlock(), 
_b, _scalars, _rlen, _clen, _safe, _rl, _ru);
+                               return executeDenseAndAgg(_a.getDenseBlock(), 
_b, _scalars, _rlen, _clen, _safe, _rl, _ru, 0);
                        else
-                               return executeSparseAndAgg(_a.getSparseBlock(), 
_b, _scalars, _rlen, _clen, _safe, _rl, _ru);
+                               return executeSparseAndAgg(_a.getSparseBlock(), 
_b, _scalars, _rlen, _clen, _safe, _rl, _ru, 0);
                }
        }
 
@@ -1166,11 +1182,11 @@ public abstract class SpoofCellwise extends 
SpoofOperator implements Serializabl
                                _c.allocateDenseBlock();
                        }
                        if( _a instanceof CompressedMatrixBlock )
-                               return 
executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c, _rlen, _clen, 
_safe, _rl, _ru);
+                               return 
executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c, _rlen, _clen, 
_safe, _rl, _ru, 0);
                        else if( !_a.isInSparseFormat() )
-                               return executeDense(_a.getDenseBlock(), _b, 
_scalars, _c, _rlen, _clen, _safe, _rl, _ru);
+                               return executeDense(_a.getDenseBlock(), _b, 
_scalars, _c, _rlen, _clen, _safe, _rl, _ru, 0);
                        else
-                               return executeSparse(_a.getSparseBlock(), _b, 
_scalars, _c, _rlen, _clen, _safe, _rl, _ru);
+                               return executeSparse(_a.getSparseBlock(), _b, 
_scalars, _c, _rlen, _clen, _safe, _rl, _ru, 0);
                }
                
                public MatrixBlock getResult() {

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
index 8db037e..68ca6ff 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
@@ -73,11 +73,15 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
        
        @Override
        public MatrixBlock execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, MatrixBlock out) {
-               return execute(inputs, scalarObjects, out, 1);
+               return execute(inputs, scalarObjects, out, 1, 0);
        }
        
        @Override
        public MatrixBlock execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) {
+               return execute(inputs, scalarObjects, out, k, 0);
+       }
+       
+       public MatrixBlock execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k, long rix) {
                //sanity check
                if( inputs==null || inputs.size() < 1  )
                        throw new RuntimeException("Invalid input arguments.");
@@ -104,11 +108,11 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
                if( k <= 1 ) //SINGLE-THREADED
                {
                        if( inputs.get(0) instanceof CompressedMatrixBlock )
-                               
executeCompressed((CompressedMatrixBlock)inputs.get(0), b, scalars, c, m, n, 0, 
m);
+                               
executeCompressed((CompressedMatrixBlock)inputs.get(0), b, scalars, c, m, n, 0, 
m, rix);
                        else if( !inputs.get(0).isInSparseFormat() )
-                               executeDense(inputs.get(0).getDenseBlock(), b, 
scalars, c, m, n, sparseSafe, 0, m);
+                               executeDense(inputs.get(0).getDenseBlock(), b, 
scalars, c, m, n, sparseSafe, 0, m, rix);
                        else
-                               executeSparse(inputs.get(0).getSparseBlock(), 
b, scalars, c, m, n, sparseSafe, 0, m);
+                               executeSparse(inputs.get(0).getSparseBlock(), 
b, scalars, c, m, n, sparseSafe, 0, m, rix);
                }
                else  //MULTI-THREADED
                {
@@ -141,7 +145,7 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
                return out;
        }
        
-       private void executeDense(DenseBlock a, SideInput[] b, double[] 
scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+       private void executeDense(DenseBlock a, SideInput[] b, double[] 
scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix)
        {
                SideInput[] lb = createSparseSideInputs(b);
                
@@ -149,20 +153,20 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
                if( a == null && !sparseSafe ) {
                        for( int i=rl; i<ru; i++ )
                                for( int j=0; j<n; j++ )
-                                       genexec( 0, lb, scalars, c, m, n, i, j 
);
+                                       genexec( 0, lb, scalars, c, m, n, 
rix+i, i, j );
                }
                else if( a != null ) {
                        for( int i=rl; i<ru; i++ ) { 
                                double[] avals = a.values(i);
                                int aix = a.pos(i);
                                for( int j=0; j<n; j++ )
-                                       genexec( avals[aix+j], lb, scalars, c, 
m, n, i, j );
+                                       genexec( avals[aix+j], lb, scalars, c, 
m, n, rix+i, i, j );
                        }
                }
        }
        
        private void executeSparse(SparseBlock sblock, SideInput[] b, double[] 
scalars,
-                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru) 
+                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru, long rix) 
        {
                if( sblock == null && sparseSafe )
                        return;
@@ -183,30 +187,38 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
                                        //process zeros before current non-zero
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++)
-                                                       genexec(0, lb, scalars, 
c, m, n, i, j);
+                                                       genexec(0, lb, scalars, 
c, m, n, rix+i, i, j);
                                        //process current non-zero
                                        lastj = aix[k];
-                                       genexec(avals[k], lb, scalars, c, m, n, 
i, lastj);
+                                       genexec(avals[k], lb, scalars, c, m, n, 
rix+i, i, lastj);
                                }
                        }
                        //process empty rows or remaining zeros
                        if( !sparseSafe )
                                for(int j=lastj+1; j<n; j++)
-                                       genexec(0, lb, scalars, c, m, n, i, j);
+                                       genexec(0, lb, scalars, c, m, n, rix+i, 
i, j);
                }
        }
 
-       private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, 
double[] scalars, double[] c, int m, int n, int rl, int ru)
+       private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, 
double[] scalars, double[] c, int m, int n, int rl, int ru, long rix)
        {
                //core compressed aggregation operation
                Iterator<IJV> iter = a.getIterator(rl, ru, true);
                while( iter.hasNext() ) {
                        IJV cell = iter.next();
-                       genexec(cell.getV(), b, scalars, c, m, n, cell.getI(), 
cell.getJ());
+                       genexec(cell.getV(), b, scalars, c, m, n, 
rix+cell.getI(), cell.getI(), cell.getJ());
                }
        }
        
-       protected abstract void genexec( double a, SideInput[] b, double[] 
scalars, double[] c, int m, int n, int rowIndex, int colIndex);
+       //local execution where grix==rix
+       protected final void genexec( double a, SideInput[] b,
+               double[] scalars, double[] c, int m, int n, int rix, int cix) {
+               genexec(a, b, scalars, c, m, n, rix, rix, cix);
+       }
+       
+       //distributed execution with additional global row index
+       protected abstract void genexec( double a, SideInput[] b,
+               double[] scalars, double[] c, int m, int n, long grix, int rix, 
int cix);
        
        
        private void setInitialOutputValues(double[] c) {
@@ -304,11 +316,11 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
                        double[] c = new double[_aggOps.length];
                        setInitialOutputValues(c);
                        if( _a instanceof CompressedMatrixBlock )
-                               executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, c, _rlen, _clen, _rl, _ru);
+                               executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, c, _rlen, _clen, _rl, _ru, 0);
                        else if( !_a.isInSparseFormat() )
-                               executeDense(_a.getDenseBlock(), _b, _scalars, 
c, _rlen, _clen, _safe, _rl, _ru);
+                               executeDense(_a.getDenseBlock(), _b, _scalars, 
c, _rlen, _clen, _safe, _rl, _ru, 0);
                        else
-                               executeSparse(_a.getSparseBlock(), _b, 
_scalars, c, _rlen, _clen, _safe, _rl, _ru);
+                               executeSparse(_a.getSparseBlock(), _b, 
_scalars, c, _rlen, _clen, _safe, _rl, _ru, 0);
                        return c;
                }
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index 7454f78..507bd65 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -118,10 +118,10 @@ public abstract class SpoofRowwise extends SpoofOperator
        
        @Override
        public MatrixBlock execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, MatrixBlock out) {
-               return execute(inputs, scalarObjects, out, true, false);
+               return execute(inputs, scalarObjects, out, true, false, 0);
        }
        
-       public MatrixBlock execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, MatrixBlock out, boolean allocTmp, 
boolean aggIncr) {
+       public MatrixBlock execute(ArrayList<MatrixBlock> inputs, 
ArrayList<ScalarObject> scalarObjects, MatrixBlock out, boolean allocTmp, 
boolean aggIncr, long rix) {
                //sanity check
                if( inputs==null || inputs.size() < 1 || out==null )
                        throw new RuntimeException("Invalid input arguments.");
@@ -149,11 +149,11 @@ public abstract class SpoofRowwise extends SpoofOperator
                //core sequential execute
                MatrixBlock a = inputs.get(0);
                if( a instanceof CompressedMatrixBlock )
-                       executeCompressed((CompressedMatrixBlock)a, b, scalars, 
c, n, 0, m);
+                       executeCompressed((CompressedMatrixBlock)a, b, scalars, 
c, n, 0, m, rix);
                else if( !a.isInSparseFormat() )
-                       executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m);
+                       executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m, 
rix);
                else
-                       executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, 
m);
+                       executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, 
m, rix);
                
                //post-processing
                if( allocTmp &&_reqVectMem > 0 )
@@ -285,39 +285,39 @@ public abstract class SpoofRowwise extends SpoofOperator
                out.setNonZeros(out.getNumRows()*out.getNumColumns());
        }
        
-       private void executeDense(DenseBlock a, SideInput[] b, double[] 
scalars, DenseBlock c, int n, int rl, int ru) {
+       private void executeDense(DenseBlock a, SideInput[] b, double[] 
scalars, DenseBlock c, int n, int rl, int ru, long rix) {
                //forward empty block to sparse
                if( a == null ) {
-                       executeSparse(null, b, scalars, c, n, rl, ru);
+                       executeSparse(null, b, scalars, c, n, rl, ru, rix);
                        return;
                }
                
                SideInput[] lb = createSparseSideInputs(b, true);
                for( int i=rl; i<ru; i++ ) {
                        genexec(a.values(i), a.pos(i), lb, scalars,
-                               c.values(i), c.pos(i), n, i );
+                               c.values(i), c.pos(i), n, rix+i, i );
                }
        }
        
-       private void executeSparse(SparseBlock a, SideInput[] b, double[] 
scalars, DenseBlock c, int n, int rl, int ru) {
+       private void executeSparse(SparseBlock a, SideInput[] b, double[] 
scalars, DenseBlock c, int n, int rl, int ru, long rix) {
                SideInput[] lb = createSparseSideInputs(b, true);
                SparseRow empty = new SparseRowVector(1);
                for( int i=rl; i<ru; i++ ) {
                        if( a!=null && !a.isEmpty(i) ) {
                                //call generated method
                                genexec(a.values(i), a.indexes(i), a.pos(i), 
lb, scalars,
-                                       c.values(i), c.pos(i), a.size(i), n, i);
+                                       c.values(i), c.pos(i), a.size(i), n, 
rix+i, i);
                        }
                        else
                                genexec(empty.values(), empty.indexes(), 0, lb, 
scalars,
-                                       c.values(i), c.pos(i), 0, n, i);
+                                       c.values(i), c.pos(i), 0, n, rix+i, i);
                }
        }
        
-       private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, 
double[] scalars, DenseBlock c, int n, int rl, int ru) {
+       private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, 
double[] scalars, DenseBlock c, int n, int rl, int ru, long rix) {
                //forward empty block to sparse
                if( a.isEmptyBlock(false) ) {
-                       executeSparse(null, b, scalars, c, n, rl, ru);
+                       executeSparse(null, b, scalars, c, n, rl, ru, rix);
                        return;
                }
                
@@ -331,11 +331,23 @@ public abstract class SpoofRowwise extends SpoofOperator
        
        //methods to be implemented by generated operators of type 
SpoofRowAggrgate 
        
+       //local execution where grix==rix
+       protected final void genexec(double[] a, int ai, 
+               SideInput[] b, double[] scalars, double[] c, int ci, int len, 
int rix) {
+               genexec(a, ai, b, scalars, c, ci, len, rix, rix);
+       }
+       
+       protected final void genexec(double[] avals, int[] aix, int ai, 
+               SideInput[] b, double[] scalars, double[] c, int ci, int alen, 
int n, int rix) {
+               genexec(avals, aix, ai, b, scalars, c, ci, alen, n, rix, rix);
+       }
+       
+       //distributed execution with additional global row index
        protected abstract void genexec(double[] a, int ai, 
-               SideInput[] b, double[] scalars, double[] c, int ci, int len, 
int rowIndex);
+               SideInput[] b, double[] scalars, double[] c, int ci, int len, 
long grix, int rix);
        
        protected abstract void genexec(double[] avals, int[] aix, int ai, 
-               SideInput[] b, double[] scalars, double[] c, int ci, int alen, 
int n, int rowIndex);
+               SideInput[] b, double[] scalars, double[] c, int ci, int alen, 
int n, long grix, int rix);
 
        
        /**
@@ -369,11 +381,11 @@ public abstract class SpoofRowwise extends SpoofOperator
                        DenseBlock c = DenseBlockFactory.createDenseBlock(1, 
_outLen);
                        
                        if( _a instanceof CompressedMatrixBlock )
-                               executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, c, _clen, _rl, _ru);
+                               executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, c, _clen, _rl, _ru, 0);
                        else if( !_a.isInSparseFormat() )
-                               executeDense(_a.getDenseBlock(), _b, _scalars, 
c, _clen, _rl, _ru);
+                               executeDense(_a.getDenseBlock(), _b, _scalars, 
c, _clen, _rl, _ru, 0);
                        else
-                               executeSparse(_a.getSparseBlock(), _b, 
_scalars, c, _clen, _rl, _ru);
+                               executeSparse(_a.getSparseBlock(), _b, 
_scalars, c, _clen, _rl, _ru, 0);
                        
                        if( _reqVectMem > 0 )
                                LibSpoofPrimitives.cleanupThreadLocalMemory();
@@ -413,11 +425,11 @@ public abstract class SpoofRowwise extends SpoofOperator
                                
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
                        
                        if( _a instanceof CompressedMatrixBlock )
-                               executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
+                               executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru, 0);
                        else if( !_a.isInSparseFormat() )
-                               executeDense(_a.getDenseBlock(), _b, _scalars, 
_c.getDenseBlock(), _clen, _rl, _ru);
+                               executeDense(_a.getDenseBlock(), _b, _scalars, 
_c.getDenseBlock(), _clen, _rl, _ru, 0);
                        else
-                               executeSparse(_a.getSparseBlock(), _b, 
_scalars, _c.getDenseBlock(), _clen, _rl, _ru);
+                               executeSparse(_a.getSparseBlock(), _b, 
_scalars, _c.getDenseBlock(), _clen, _rl, _ru, 0);
                        
                        if( _reqVectMem > 0 )
                                LibSpoofPrimitives.cleanupThreadLocalMemory();

Reply via email to