[SYSTEMML-2119] Support for seq operations in codegen (local/spark) This patch extends the code generation framework to support sequence operators, in cell, magg, and row templates. Along with the typical extensions of candidate exploration, templates, and minor optimizer extensions, this also required a runtime extension for distributed operations to feed the global row index into generated operators.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d0b4373f Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d0b4373f Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d0b4373f Branch: refs/heads/master Commit: d0b4373fa13951d159e0d2443b703f0d1a93bfc6 Parents: 1a37cfa Author: Matthias Boehm <[email protected]> Authored: Tue May 8 22:22:59 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Tue May 8 23:07:20 2018 -0700 ---------------------------------------------------------------------- .../java/org/apache/sysml/hops/DataGenOp.java | 4 + .../sysml/hops/codegen/SpoofCompiler.java | 6 +- .../sysml/hops/codegen/cplan/CNodeBinary.java | 79 ++++--- .../sysml/hops/codegen/cplan/CNodeCell.java | 2 +- .../sysml/hops/codegen/cplan/CNodeMultiAgg.java | 2 +- .../sysml/hops/codegen/cplan/CNodeRow.java | 4 +- .../opt/PlanSelectionFuseCostBasedV2.java | 4 +- .../hops/codegen/template/TemplateCell.java | 50 ++-- .../hops/codegen/template/TemplateRow.java | 42 ++-- .../hops/codegen/template/TemplateUtils.java | 13 ++ .../sysml/hops/rewrite/HopRewriteUtils.java | 24 +- .../sysml/runtime/codegen/SpoofCellwise.java | 234 ++++++++++--------- .../runtime/codegen/SpoofMultiAggregate.java | 46 ++-- .../sysml/runtime/codegen/SpoofRowwise.java | 54 +++-- .../instructions/spark/SpoofSPInstruction.java | 58 +++-- .../functions/codegen/CellwiseTmplTest.java | 20 +- .../functions/codegen/RowAggTmplTest.java | 20 +- .../scripts/functions/codegen/cellwisetmpl22.R | 31 +++ .../functions/codegen/cellwisetmpl22.dml | 28 +++ .../scripts/functions/codegen/rowAggPattern41.R | 31 +++ .../functions/codegen/rowAggPattern41.dml | 28 +++ 21 files changed, 518 insertions(+), 262 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/DataGenOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/DataGenOp.java b/src/main/java/org/apache/sysml/hops/DataGenOp.java index 0c6b586..3101721 100644 --- a/src/main/java/org/apache/sysml/hops/DataGenOp.java +++ b/src/main/java/org/apache/sysml/hops/DataGenOp.java @@ -366,6 +366,10 @@ public class DataGenOp extends Hop implements MultiThreadedHop return _paramIndexMap; } + public Hop getParam(String key) { + return getInput().get(getParamIndex(key)); + } + public int getParamIndex(String key) { return _paramIndexMap.get(key); } http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java index 932b0be..6ef8dc4 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java +++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java @@ -665,8 +665,10 @@ public class SpoofCompiler CNodeTpl tpl = e.getValue().getValue(); Hop[] inHops = e.getValue().getKey(); - //remove invalid plans with null inputs - if( Arrays.stream(inHops).anyMatch(h -> (h==null)) ) + //remove invalid plans with null, empty, or all scalar inputs + if( inHops == null || inHops.length == 0 + || Arrays.stream(inHops).anyMatch(h -> (h==null)) + || Arrays.stream(inHops).allMatch(h -> h.isScalar())) continue; //perform simplifications and cse rewrites http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java index 4aee712..80a7f83 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java @@ -52,6 +52,7 @@ public class CNodeBinary extends CNode LESS, LESSEQUAL, GREATER, GREATEREQUAL, EQUAL,NOTEQUAL, MIN, MAX, AND, OR, XOR, LOG, LOG_NZ, POW, BITWAND, + SEQ_RIX, MINUS1_MULT, MINUS_NZ; public static boolean contains(String value) { @@ -214,7 +215,9 @@ public class CNodeBinary extends CNode return " double %TMP% = ( (%IN1% != 0) != (%IN2% != 0) ) ? 1 : 0;\n"; case BITWAND: return " double %TMP% = LibSpoofPrimitives.bwAnd(%IN1%, %IN2%);\n"; - + case SEQ_RIX: + return " double %TMP% = %IN1% + grix * %IN2%;\n"; //0-based global rix + default: throw new RuntimeException("Invalid binary type: "+this.toString()); } @@ -409,6 +412,7 @@ public class CNodeBinary extends CNode case AND: return "b(&)"; case XOR: return "b(xor)"; case BITWAND: return "b(bitwAnd)"; + case SEQ_RIX: return "b(seqr)"; case MINUS1_MULT: return "b(1-*)"; case MINUS_NZ: return "b(-nz)"; default: return "b("+_type.name().toLowerCase()+")"; @@ -420,7 +424,7 @@ public class CNodeBinary extends CNode { switch(_type) { //VECT - case VECT_MULT_ADD: + case VECT_MULT_ADD: case VECT_DIV_ADD: case VECT_MINUS_ADD: case VECT_PLUS_ADD: @@ -428,10 +432,10 @@ public class CNodeBinary extends CNode case VECT_MIN_ADD: case VECT_MAX_ADD: case VECT_EQUAL_ADD: - case VECT_NOTEQUAL_ADD: - case VECT_LESS_ADD: - case VECT_LESSEQUAL_ADD: - case VECT_GREATER_ADD: + case VECT_NOTEQUAL_ADD: + case VECT_LESS_ADD: + case VECT_LESSEQUAL_ADD: + case VECT_GREATER_ADD: case VECT_GREATEREQUAL_ADD: case VECT_CBIND_ADD: case VECT_XOR_ADD: @@ -452,8 +456,8 @@ public class CNodeBinary extends CNode _cols = _inputs.get(1)._cols; _dataType = DataType.MATRIX; break; - - case VECT_DIV_SCALAR: + + case VECT_DIV_SCALAR: case VECT_MULT_SCALAR: case VECT_MINUS_SCALAR: case VECT_PLUS_SCALAR: @@ -463,13 +467,13 @@ public class CNodeBinary extends CNode case VECT_MIN_SCALAR: case VECT_MAX_SCALAR: case VECT_EQUAL_SCALAR: - case VECT_NOTEQUAL_SCALAR: - case VECT_LESS_SCALAR: - case VECT_LESSEQUAL_SCALAR: + case VECT_NOTEQUAL_SCALAR: + case VECT_LESS_SCALAR: + case VECT_LESSEQUAL_SCALAR: case VECT_GREATER_SCALAR: case VECT_GREATEREQUAL_SCALAR: - case VECT_DIV: + case VECT_DIV: case VECT_MULT: case VECT_MINUS: case VECT_PLUS: @@ -477,12 +481,12 @@ public class CNodeBinary extends CNode case VECT_BITWAND: case VECT_MIN: case VECT_MAX: - case VECT_EQUAL: - case VECT_NOTEQUAL: - case VECT_LESS: - case VECT_LESSEQUAL: - case VECT_GREATER: - case VECT_GREATEREQUAL: + case VECT_EQUAL: + case VECT_NOTEQUAL: + case VECT_LESS: + case VECT_LESSEQUAL: + case VECT_GREATER: + case VECT_GREATEREQUAL: boolean scalarVector = (_inputs.get(0).getDataType()==DataType.SCALAR); _rows = _inputs.get(scalarVector ? 1 : 0)._rows; _cols = _inputs.get(scalarVector ? 1 : 0)._cols; @@ -494,39 +498,40 @@ public class CNodeBinary extends CNode _cols = _inputs.get(1)._cols; _dataType = DataType.MATRIX; break; - - case DOT_PRODUCT: + + case DOT_PRODUCT: //SCALAR Arithmetic - case MULT: - case DIV: - case PLUS: - case MINUS: + case MULT: + case DIV: + case PLUS: + case MINUS: case MINUS1_MULT: case MINUS_NZ: - case MODULUS: - case INTDIV: + case MODULUS: + case INTDIV: //SCALAR Comparison - case LESS: - case LESSEQUAL: + case LESS: + case LESSEQUAL: case GREATER: - case GREATEREQUAL: - case EQUAL: - case NOTEQUAL: + case GREATEREQUAL: + case EQUAL: + case NOTEQUAL: //SCALAR LOGIC - case MIN: - case MAX: - case AND: + case MIN: + case MAX: + case AND: case OR: case XOR: case BITWAND: - case LOG: + case LOG: case LOG_NZ: - case POW: + case POW: + case SEQ_RIX: _rows = 0; _cols = 0; _dataType= DataType.SCALAR; - break; + break; } } http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java index 3b30124..e97a00a 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java @@ -41,7 +41,7 @@ public class CNodeCell extends CNodeTpl + " public %TMP%() {\n" + " super(CellType.%TYPE%, %SPARSE_SAFE%, %AGG_OP%);\n" + " }\n" - + " protected double genexec(double a, SideInput[] b, double[] scalars, int m, int n, int rix, int cix) { \n" + + " protected double genexec(double a, SideInput[] b, double[] scalars, int m, int n, long grix, int rix, int cix) { \n" + "%BODY_dense%" + " return %OUT%;\n" + " }\n" http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java index fd39c7c..b0bd03a 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java @@ -44,7 +44,7 @@ public class CNodeMultiAgg extends CNodeTpl + " super(%SPARSE_SAFE%, %AGG_OP%);\n" + " }\n" + " protected void genexec(double a, SideInput[] b, double[] scalars, double[] c, " - + "int m, int n, int rix, int cix) { \n" + + "int m, int n, long grix, int rix, int cix) { \n" + "%BODY_dense%" + " }\n" + "}\n"; http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java index e6aa53a..e21ff2b 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java @@ -41,10 +41,10 @@ public class CNodeRow extends CNodeTpl + " public %TMP%() {\n" + " super(RowType.%TYPE%, %CONST_DIM2%, %TB1%, %VECT_MEM%);\n" + " }\n" - + " protected void genexec(double[] a, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int len, int rix) { \n" + + " protected void genexec(double[] a, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int len, long grix, int rix) { \n" + "%BODY_dense%" + " }\n" - + " protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int alen, int len, int rix) { \n" + + " protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int alen, int len, long grix, int rix) { \n" + "%BODY_sparse%" + " }\n" + "}\n"; http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java index 1e5bcf3..3db4ce8 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java +++ b/src/main/java/org/apache/sysml/hops/codegen/opt/PlanSelectionFuseCostBasedV2.java @@ -44,6 +44,7 @@ import org.apache.sysml.hops.AggUnaryOp; import org.apache.sysml.hops.BinaryOp; import org.apache.sysml.hops.Hop; import org.apache.sysml.hops.Hop.AggOp; +import org.apache.sysml.hops.Hop.DataGenMethod; import org.apache.sysml.hops.Hop.DataOpTypes; import org.apache.sysml.hops.Hop.Direction; import org.apache.sysml.hops.Hop.OpOp2; @@ -718,7 +719,8 @@ public class PlanSelectionFuseCostBasedV2 extends PlanSelection || (hop instanceof AggBinaryOp && (inRow || !hop.dimsKnown() || (hop.getDim1()!=1 && hop.getDim2()!=1))) || (HopRewriteUtils.isTransposeOperation(hop) - && (hop.getDim1()!=1 && hop.getDim2()!=1)) + && (hop.getDim1()!=1 && hop.getDim2()!=1) + && !HopRewriteUtils.isDataGenOp(hop.getInput().get(0),DataGenMethod.SEQ)) || (hop instanceof AggUnaryOp && inRow); } http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java index cfac2d7..8eaffaf 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateCell.java @@ -29,10 +29,12 @@ import java.util.List; import org.apache.sysml.hops.AggBinaryOp; import org.apache.sysml.hops.AggUnaryOp; import org.apache.sysml.hops.BinaryOp; +import org.apache.sysml.hops.DataGenOp; import org.apache.sysml.hops.DataOp; import org.apache.sysml.hops.Hop; import org.apache.sysml.hops.UnaryOp; import org.apache.sysml.hops.Hop.AggOp; +import org.apache.sysml.hops.Hop.DataGenMethod; import org.apache.sysml.hops.Hop.OpOp2; import org.apache.sysml.hops.Hop.OpOp3; import org.apache.sysml.hops.Hop.ParamBuiltinOp; @@ -52,6 +54,7 @@ import org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry; import org.apache.sysml.hops.codegen.cplan.CNodeTernary; import org.apache.sysml.hops.codegen.cplan.CNodeTernary.TernaryType; import org.apache.sysml.hops.rewrite.HopRewriteUtils; +import org.apache.sysml.parser.Statement; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.runtime.matrix.data.Pair; @@ -77,7 +80,9 @@ public class TemplateCell extends TemplateBase return hop.dimsKnown() && isValidOperation(hop) && !(hop.getDim1()==1 && hop.getDim2()==1) || (hop instanceof IndexingOp && hop.getInput().get(0).getDim2() >= 0 - && (((IndexingOp)hop).isColLowerEqualsUpper() || hop.getDim2()==1)); + && (((IndexingOp)hop).isColLowerEqualsUpper() || hop.getDim2()==1)) + || (HopRewriteUtils.isDataGenOpWithLiteralInputs(hop, DataGenMethod.SEQ) + && HopRewriteUtils.hasOnlyUnaryBinaryParents(hop, true)); } @Override @@ -96,7 +101,9 @@ public class TemplateCell extends TemplateBase //merge of other cell tpl possible return (!isClosed() && (isValidOperation(hop) || (hop instanceof AggBinaryOp && hop.getInput().indexOf(input)==0 - && HopRewriteUtils.isTransposeOperation(input)))); + && HopRewriteUtils.isTransposeOperation(input)))) + || (HopRewriteUtils.isDataGenOpWithLiteralInputs(input, DataGenMethod.SEQ) + && HopRewriteUtils.hasOnlyUnaryBinaryParents(input, false)); } @Override @@ -177,16 +184,14 @@ public class TemplateCell extends TemplateBase //construct cnode for current hop CNode out = null; - if(hop instanceof UnaryOp) - { + if(hop instanceof UnaryOp) { CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID()); cdata1 = TemplateUtils.wrapLookupIfNecessary(cdata1, hop.getInput().get(0)); String primitiveOpName = ((UnaryOp)hop).getOp().name(); out = new CNodeUnary(cdata1, UnaryType.valueOf(primitiveOpName)); } - else if(hop instanceof BinaryOp) - { + else if(hop instanceof BinaryOp) { BinaryOp bop = (BinaryOp) hop; CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID()); CNode cdata2 = tmp.get(hop.getInput().get(1).getHopID()); @@ -200,8 +205,7 @@ public class TemplateCell extends TemplateBase out = new CNodeBinary(cdata1, cdata2, BinType.valueOf(primitiveOpName)); } - else if(hop instanceof TernaryOp) - { + else if(hop instanceof TernaryOp) { TernaryOp top = (TernaryOp) hop; CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID()); CNode cdata2 = tmp.get(hop.getInput().get(1).getHopID()); @@ -216,27 +220,34 @@ public class TemplateCell extends TemplateBase out = new CNodeTernary(cdata1, cdata2, cdata3, TernaryType.valueOf(top.getOp().name())); } - else if( hop instanceof ParameterizedBuiltinOp ) - { + else if( hop instanceof ParameterizedBuiltinOp ) { CNode cdata1 = tmp.get(((ParameterizedBuiltinOp)hop).getTargetHop().getHopID()); cdata1 = TemplateUtils.wrapLookupIfNecessary(cdata1, hop.getInput().get(0)); CNode cdata2 = tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("pattern").getHopID()); CNode cdata3 = tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("replacement").getHopID()); TernaryType ttype = (cdata2.isLiteral() && cdata2.getVarname().equals("Double.NaN")) ? - TernaryType.REPLACE_NAN : TernaryType.REPLACE; + TernaryType.REPLACE_NAN : TernaryType.REPLACE; out = new CNodeTernary(cdata1, cdata2, cdata3, ttype); } - else if( hop instanceof IndexingOp ) - { + else if( hop instanceof IndexingOp ) { CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID()); out = new CNodeTernary(cdata1, - TemplateUtils.createCNodeData(new LiteralOp(hop.getInput().get(0).getDim2()), true), - TemplateUtils.createCNodeData(hop.getInput().get(4), true), - TernaryType.LOOKUP_RC1); + TemplateUtils.createCNodeData(new LiteralOp(hop.getInput().get(0).getDim2()), true), + TemplateUtils.createCNodeData(hop.getInput().get(4), true), + TernaryType.LOOKUP_RC1); } - else if( HopRewriteUtils.isTransposeOperation(hop) ) - { + else if( HopRewriteUtils.isDataGenOp(hop, DataGenMethod.SEQ) ) { + CNodeData from = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_FROM).getHopID())); + CNodeData to = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_TO).getHopID())); + CNodeData incr = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_INCR).getHopID())); + if( Double.parseDouble(from.getVarname()) > Double.parseDouble(to.getVarname()) + && Double.parseDouble(incr.getVarname()) > 0 ) { + incr = TemplateUtils.createCNodeData(new LiteralOp("-"+incr.getVarname()), true); + } + out = new CNodeBinary(from, incr, BinType.SEQ_RIX); + } + else if( HopRewriteUtils.isTransposeOperation(hop) ) { out = TemplateUtils.skipTranspose(tmp.get(hop.getHopID()), hop, tmp, compileLiterals); //correct indexing types of existing lookups @@ -246,8 +257,7 @@ public class TemplateCell extends TemplateBase if( out instanceof CNodeData && !inHops.contains(hop.getInput().get(0)) ) inHops.add(hop.getInput().get(0)); } - else if( hop instanceof AggUnaryOp ) - { + else if( hop instanceof AggUnaryOp ) { //aggregation handled in template implementation (note: we do not compile //^2 of SUM_SQ into the operator to simplify the detection of single operators) out = tmp.get(hop.getInput().get(0).getHopID()); http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java index 1e996d9..fb153de 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java @@ -27,6 +27,7 @@ import java.util.HashSet; import org.apache.sysml.hops.AggBinaryOp; import org.apache.sysml.hops.AggUnaryOp; import org.apache.sysml.hops.BinaryOp; +import org.apache.sysml.hops.DataGenOp; import org.apache.sysml.hops.Hop; import org.apache.sysml.hops.IndexingOp; import org.apache.sysml.hops.LiteralOp; @@ -48,10 +49,13 @@ import org.apache.sysml.hops.codegen.cplan.CNodeUnary.UnaryType; import org.apache.sysml.hops.codegen.template.CPlanMemoTable.MemoTableEntry; import org.apache.sysml.hops.rewrite.HopRewriteUtils; import org.apache.sysml.hops.Hop.AggOp; +import org.apache.sysml.hops.Hop.DataGenMethod; import org.apache.sysml.hops.Hop.Direction; import org.apache.sysml.hops.Hop.OpOp1; import org.apache.sysml.hops.Hop.OpOp2; import org.apache.sysml.hops.Hop.OpOpN; +import org.apache.sysml.hops.HopsException; +import org.apache.sysml.parser.Statement; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.runtime.codegen.SpoofRowwise.RowType; import org.apache.sysml.runtime.matrix.data.LibMatrixMult; @@ -134,6 +138,8 @@ public class TemplateRow extends TemplateBase && hop.getDim1() > 1 && input.getDim1()>1) || (HopRewriteUtils.isBinary(hop, OpOp2.CBIND) && hop.getInput().get(0).isMatrix() && hop.dimsKnown()) || (HopRewriteUtils.isNary(hop, OpOpN.CBIND) && hop.getInput().get(0).isMatrix() && hop.dimsKnown()) + || (HopRewriteUtils.isDataGenOpWithLiteralInputs(input, DataGenMethod.SEQ) + && HopRewriteUtils.hasOnlyUnaryBinaryParents(input, false)) || (hop instanceof AggBinaryOp && HopRewriteUtils.isTransposeOperation(hop.getInput().get(0)) && (input.getDim2()==1 || (input==hop.getInput().get(1) @@ -328,15 +334,23 @@ public class TemplateRow extends TemplateBase } } } - else if( HopRewriteUtils.isTransposeOperation(hop) ) - { + else if( HopRewriteUtils.isDataGenOp(hop, DataGenMethod.SEQ) ) { + CNodeData from = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_FROM).getHopID())); + CNodeData to = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_TO).getHopID())); + CNodeData incr = TemplateUtils.getLiteral(tmp.get(((DataGenOp)hop).getParam(Statement.SEQ_INCR).getHopID())); + if( Double.parseDouble(from.getVarname()) > Double.parseDouble(to.getVarname()) + && Double.parseDouble(incr.getVarname()) > 0 ) { + incr = TemplateUtils.createCNodeData(new LiteralOp("-"+incr.getVarname()), true); + } + out = new CNodeBinary(from, incr, BinType.SEQ_RIX); + } + else if( HopRewriteUtils.isTransposeOperation(hop) ) { out = TemplateUtils.skipTranspose(tmp.get(hop.getHopID()), hop, tmp, compileLiterals); if( out instanceof CNodeData && !inHops.contains(hop.getInput().get(0)) ) inHops.add(hop.getInput().get(0)); } - else if(hop instanceof UnaryOp) - { + else if(hop instanceof UnaryOp) { CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID()); // if one input is a matrix then we need to do vector by scalar operations @@ -360,8 +374,7 @@ public class TemplateRow extends TemplateBase out = new CNodeUnary(cdata1, UnaryType.valueOf(primitiveOpName)); } } - else if(HopRewriteUtils.isBinary(hop, OpOp2.CBIND)) - { + else if(HopRewriteUtils.isBinary(hop, OpOp2.CBIND)) { //special case for cbind with zeros CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID()); CNode cdata2 = null; @@ -425,8 +438,7 @@ public class TemplateRow extends TemplateBase out = new CNodeBinary(cdata1, cdata2, BinType.valueOf(primitiveOpName)); } } - else if(hop instanceof TernaryOp) - { + else if(hop instanceof TernaryOp) { TernaryOp top = (TernaryOp) hop; CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID()); CNode cdata2 = tmp.get(hop.getInput().get(1).getHopID()); @@ -440,8 +452,7 @@ public class TemplateRow extends TemplateBase out = new CNodeTernary(cdata1, cdata2, cdata3, TernaryType.valueOf(top.getOp().toString())); } - else if(HopRewriteUtils.isNary(hop, OpOpN.CBIND)) - { + else if(HopRewriteUtils.isNary(hop, OpOpN.CBIND)) { CNode[] inputs = new CNode[hop.getInput().size()]; for( int i=0; i<hop.getInput().size(); i++ ) { Hop c = hop.getInput().get(i); @@ -454,19 +465,16 @@ public class TemplateRow extends TemplateBase } out = new CNodeNary(inputs, NaryType.VECT_CBIND); } - else if( hop instanceof ParameterizedBuiltinOp ) - { + else if( hop instanceof ParameterizedBuiltinOp ) { CNode cdata1 = tmp.get(((ParameterizedBuiltinOp)hop).getTargetHop().getHopID()); cdata1 = TemplateUtils.wrapLookupIfNecessary(cdata1, hop.getInput().get(0)); - CNode cdata2 = tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("pattern").getHopID()); CNode cdata3 = tmp.get(((ParameterizedBuiltinOp)hop).getParameterHop("replacement").getHopID()); TernaryType ttype = (cdata2.isLiteral() && cdata2.getVarname().equals("Double.NaN")) ? - TernaryType.REPLACE_NAN : TernaryType.REPLACE; + TernaryType.REPLACE_NAN : TernaryType.REPLACE; out = new CNodeTernary(cdata1, cdata2, cdata3, ttype); } - else if( hop instanceof IndexingOp ) - { + else if( hop instanceof IndexingOp ) { CNode cdata1 = tmp.get(hop.getInput().get(0).getHopID()); out = new CNodeTernary(cdata1, TemplateUtils.createCNodeData(new LiteralOp(hop.getInput().get(0).getDim2()), true), @@ -475,7 +483,7 @@ public class TemplateRow extends TemplateBase } if( out == null ) { - throw new RuntimeException(hop.getHopID()+" "+hop.getOpString()); + throw new HopsException(hop.getHopID()+" "+hop.getOpString()); } if( out.getDataType().isMatrix() ) { http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java index b833876..4a61678 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java @@ -239,6 +239,19 @@ public class TemplateUtils throw new RuntimeException("Undefined outer product type for hop "+out.getHopID()); } + public static CNodeData getLiteral(CNode node) { + return ((CNodeData) node).isLiteral() ? (CNodeData)node : + createCNodeData(new LiteralOp(node.getVarname()), true); + } + + public static boolean isLiteral(CNode node) { + return node instanceof CNodeData && ((CNodeData)node).isLiteral(); + } + + public static boolean isLiteral(CNode node, String val) { + return isLiteral(node) && ((CNodeData)node).getVarname().equals(val); + } + public static boolean isLookup(CNode node, boolean includeRC1) { return isUnary(node, UnaryType.LOOKUP_C, UnaryType.LOOKUP_RC) || (includeRC1 && isUnary(node, UnaryType.LOOKUP_R)) http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java index 427b075..3a0c3f2 100644 --- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java +++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java @@ -485,6 +485,13 @@ public class HopRewriteUtils && ArrayUtils.contains(ops, ((DataGenOp)hop).getOp())); } + public static boolean isDataGenOpWithLiteralInputs(Hop hop, DataGenMethod... ops) { + boolean ret = isDataGenOp(hop, ops); + for( Hop c : hop.getInput() ) + ret &= c instanceof LiteralOp; + return ret; + } + public static boolean isDataGenOpWithConstantValue(Hop hop) { return hop instanceof DataGenOp && ((DataGenOp)hop).getOp()==DataGenMethod.RAND @@ -1167,13 +1174,10 @@ public class HopRewriteUtils || (isColumnRightIndexing(input) && size.getInput().get(0)==input.getInput().get(0)))); } - public static boolean hasOnlyWriteParents( Hop hop, boolean inclTransient, boolean inclPersistent ) - { + public static boolean hasOnlyWriteParents( Hop hop, boolean inclTransient, boolean inclPersistent ) { boolean ret = true; - ArrayList<Hop> parents = hop.getParent(); - for( Hop p : parents ) - { + for( Hop p : parents ) { if( inclTransient && inclPersistent ) ret &= ( p instanceof DataOp && (((DataOp)p).getDataOpType()==DataOpTypes.TRANSIENTWRITE || ((DataOp)p).getDataOpType()==DataOpTypes.PERSISTENTWRITE)); @@ -1182,8 +1186,14 @@ public class HopRewriteUtils else if(inclPersistent) ret &= ( p instanceof DataOp && ((DataOp)p).getDataOpType()==DataOpTypes.PERSISTENTWRITE); } - - + return ret; + } + + public static boolean hasOnlyUnaryBinaryParents(Hop hop, boolean disallowLhs) { + boolean ret = true; + for( Hop p : hop.getParent() ) + ret &= (p instanceof UnaryOp || (p instanceof BinaryOp + && (!disallowLhs || p.getInput().get(1)==hop))); return ret; } http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java index 20e6a95..5ec18f5 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java @@ -111,6 +111,10 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl @Override public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, int k) { + return execute(inputs, scalarObjects, k, 0); + } + + public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, int k, long rix) { //sanity check if( inputs==null || inputs.size() < 1 ) throw new RuntimeException("Invalid input arguments."); @@ -136,11 +140,11 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl if( k <= 1 ) //SINGLE-THREADED { if( inputs.get(0) instanceof CompressedMatrixBlock ) - ret = executeCompressedAndAgg((CompressedMatrixBlock)a, b, scalars, m, n, sparseSafe, 0, m); + ret = executeCompressedAndAgg((CompressedMatrixBlock)a, b, scalars, m, n, sparseSafe, 0, m, rix); else if( !inputs.get(0).isInSparseFormat() ) - ret = executeDenseAndAgg(a.getDenseBlock(), b, scalars, m, n, sparseSafe, 0, m); + ret = executeDenseAndAgg(a.getDenseBlock(), b, scalars, m, n, sparseSafe, 0, m, rix); else - ret = executeSparseAndAgg(a.getSparseBlock(), b, scalars, m, n, sparseSafe, 0, m); + ret = executeSparseAndAgg(a.getSparseBlock(), b, scalars, m, n, sparseSafe, 0, m, rix); } else //MULTI-THREADED { @@ -187,11 +191,15 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl @Override public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) { - return execute(inputs, scalarObjects, out, 1); + return execute(inputs, scalarObjects, out, 1, 0); } @Override public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) { + return execute(inputs, scalarObjects, out, k, 0); + } + + public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k, long rix) { //sanity check if( inputs==null || inputs.size() < 1 || out==null ) throw new RuntimeException("Invalid input arguments."); @@ -228,11 +236,11 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl if( k <= 1 ) //SINGLE-THREADED { if( inputs.get(0) instanceof CompressedMatrixBlock ) - lnnz = executeCompressed((CompressedMatrixBlock)a, b, scalars, out, m, n, sparseSafe, 0, m); + lnnz = executeCompressed((CompressedMatrixBlock)a, b, scalars, out, m, n, sparseSafe, 0, m, rix); else if( !inputs.get(0).isInSparseFormat() ) - lnnz = executeDense(a.getDenseBlock(), b, scalars, out, m, n, sparseSafe, 0, m); + lnnz = executeDense(a.getDenseBlock(), b, scalars, out, m, n, sparseSafe, 0, m, rix); else - lnnz = executeSparse(a.getSparseBlock(), b, scalars, out, m, n, sparseSafe, 0, m); + lnnz = executeSparse(a.getSparseBlock(), b, scalars, out, m, n, sparseSafe, 0, m, rix); } else //MULTI-THREADED { @@ -286,42 +294,42 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //function dispatch private long executeDense(DenseBlock a, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) { + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { DenseBlock c = out.getDenseBlock(); SideInput[] lb = createSparseSideInputs(b); if( _type == CellType.NO_AGG ) { - return executeDenseNoAgg(a, lb, scalars, c, m, n, sparseSafe, rl, ru); + return executeDenseNoAgg(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix); } else if( _type == CellType.ROW_AGG ) { if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) - return executeDenseRowAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru); + return executeDenseRowAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix); else - return executeDenseRowAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru); + return executeDenseRowAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix); } else if( _type == CellType.COL_AGG ) { if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) - return executeDenseColAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru); + return executeDenseColAggSum(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix); else - return executeDenseColAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru); + return executeDenseColAggMxx(a, lb, scalars, c, m, n, sparseSafe, rl, ru, rix); } return -1; } private double executeDenseAndAgg(DenseBlock a, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { SideInput[] lb = createSparseSideInputs(b); //numerically stable aggregation for sum/sum_sq if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) - return executeDenseAggSum(a, lb, scalars, m, n, sparseSafe, rl, ru); + return executeDenseAggSum(a, lb, scalars, m, n, sparseSafe, rl, ru, rix); else - return executeDenseAggMxx(a, lb, scalars, m, n, sparseSafe, rl, ru); + return executeDenseAggMxx(a, lb, scalars, m, n, sparseSafe, rl, ru, rix); } private long executeSparse(SparseBlock sblock, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { if( sparseSafe && sblock == null ) return 0; @@ -329,47 +337,47 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl SideInput[] lb = createSparseSideInputs(b); if( _type == CellType.NO_AGG ) { if( out.isInSparseFormat() ) - return executeSparseNoAggSparse(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru); + return executeSparseNoAggSparse(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix); else - return executeSparseNoAggDense(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru); + return executeSparseNoAggDense(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix); } else if( _type == CellType.ROW_AGG ) { if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) - return executeSparseRowAggSum(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru); + return executeSparseRowAggSum(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix); else - return executeSparseRowAggMxx(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru); + return executeSparseRowAggMxx(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix); } else if( _type == CellType.COL_AGG ) { if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) - return executeSparseColAggSum(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru); + return executeSparseColAggSum(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix); else - return executeSparseColAggMxx(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru); + return executeSparseColAggMxx(sblock, lb, scalars, out, m, n, sparseSafe, rl, ru, rix); } return -1; } private double executeSparseAndAgg(SparseBlock sblock, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { if( sparseSafe && sblock == null ) return 0; SideInput[] lb = createSparseSideInputs(b); if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) - return executeSparseAggSum(sblock, lb, scalars, m, n, sparseSafe, rl, ru); + return executeSparseAggSum(sblock, lb, scalars, m, n, sparseSafe, rl, ru, rix); else - return executeSparseAggMxx(sblock, lb, scalars, m, n, sparseSafe, rl, ru); + return executeSparseAggMxx(sblock, lb, scalars, m, n, sparseSafe, rl, ru, rix); } private long executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { //NOTE: we don't create sparse side inputs w/ row-major cursors because //compressed data is access in a column-major order if( _type == CellType.NO_AGG ) { - long lnnz = executeCompressedNoAgg(a, b, scalars, out, m, n, sparseSafe, rl, ru); + long lnnz = executeCompressedNoAgg(a, b, scalars, out, m, n, sparseSafe, rl, ru, rix); if( out.isInSparseFormat() ) out.sortSparseRows(rl, ru); return lnnz; @@ -377,38 +385,38 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl else if( _type == CellType.ROW_AGG ) { double[] c = out.getDenseBlockValues(); if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) - return executeCompressedRowAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru); + return executeCompressedRowAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru, rix); else - return executeCompressedRowAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru); + return executeCompressedRowAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru, rix); } else if( _type == CellType.COL_AGG ) { double[] c = out.getDenseBlockValues(); if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) - return executeCompressedColAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru); + return executeCompressedColAggSum(a, b, scalars, c, m, n, sparseSafe, rl, ru, rix); else - return executeCompressedColAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru); + return executeCompressedColAggMxx(a, b, scalars, c, m, n, sparseSafe, rl, ru, rix); } return -1; } private double executeCompressedAndAgg(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { //NOTE: we don't create sparse side inputs w/ row-major cursors because //compressed data is access in a column-major order //numerically stable aggregation for sum/sum_sq if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ ) - return executeCompressedAggSum(a, b, scalars, m, n, sparseSafe, rl, ru); + return executeCompressedAggSum(a, b, scalars, m, n, sparseSafe, rl, ru, rix); else - return executeCompressedAggMxx(a, b, scalars, m, n, sparseSafe, rl, ru); + return executeCompressedAggMxx(a, b, scalars, m, n, sparseSafe, rl, ru, rix); } ///////// //core operator skeletons for dense, sparse, and compressed private long executeDenseNoAgg(DenseBlock a, SideInput[] b, double[] scalars, - DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { long lnnz = 0; if( a == null && !sparseSafe ) { @@ -416,7 +424,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl double[] cvals = c.values(i); int cix = c.pos(i); for( int j=0; j<n; j++ ) - lnnz += ((cvals[cix+j] = genexec(0, b, scalars, m, n, i, j))!=0) ? 1 : 0; + lnnz += ((cvals[cix+j] = genexec(0, b, scalars, m, n, rix+i, i, j))!=0) ? 1 : 0; } } else if( a != null ) { @@ -427,7 +435,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for( int j=0; j<n; j++ ) { double aval = avals[ix+j]; if( aval != 0 || !sparseSafe) - lnnz += ((cvals[ix+j] = genexec(aval, b, scalars, m, n, i, j))!=0) ? 1 : 0; + lnnz += ((cvals[ix+j] = genexec(aval, b, scalars, m, n, rix+i, i, j))!=0) ? 1 : 0; } } } @@ -436,7 +444,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeDenseRowAggSum(DenseBlock a, SideInput[] b, double[] scalars, - DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { //note: output always single block double[] lc = c.valuesAt(0); @@ -448,7 +456,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for( int i=rl; i<ru; i++ ) { kbuff.set(0, 0); for( int j=0; j<n; j++ ) - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j)); lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0; } } @@ -460,7 +468,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for( int j=0; j<n; j++ ) { double aval = avals[aix+j]; if( aval != 0 || !sparseSafe) - kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, rix+i, i, j)); } lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0; } @@ -470,7 +478,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeDenseRowAggMxx(DenseBlock a, SideInput[] b, double[] scalars, - DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { double[] lc = c.valuesAt(0); //single block @@ -481,7 +489,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for( int i=rl; i<ru; i++ ) { double tmp = initialVal; for( int j=0; j<n; j++ ) - tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); + tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, rix+i, i, j)); lnnz += ((lc[i] = tmp)!=0) ? 1 : 0; } } @@ -493,7 +501,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for( int j=0; j<n; j++ ) { double aval = avals[aix + j]; if( aval != 0 || !sparseSafe) - tmp = vfun.execute(tmp, genexec(aval, b, scalars, m, n, i, j)); + tmp = vfun.execute(tmp, genexec(aval, b, scalars, m, n, rix+i, i, j)); } if( sparseSafe && UtilFunctions.containsZero(avals, aix, n) ) tmp = vfun.execute(tmp, 0); @@ -504,7 +512,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeDenseColAggSum(DenseBlock a, SideInput[] b, double[] scalars, - DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { double[] lc = c.valuesAt(0); //single block @@ -516,7 +524,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for( int i=rl; i<ru; i++ ) for( int j=0; j<n; j++ ) { kbuff.set(lc[j], corr[j]); - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j)); lc[j] = kbuff._sum; corr[j] = kbuff._correction; } @@ -529,7 +537,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl double aval = avals[aix + j]; if( aval != 0 || !sparseSafe ) { kbuff.set(lc[j], corr[j]); - kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, rix+i, i, j)); lc[j] = kbuff._sum; corr[j] = kbuff._correction; } @@ -541,7 +549,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeDenseColAggMxx(DenseBlock a, SideInput[] b, double[] scalars, - DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { double[] lc = c.valuesAt(0); //single block @@ -552,7 +560,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl if( a == null && !sparseSafe ) { //empty for( int i=rl; i<ru; i++ ) for( int j=0; j<n; j++ ) - lc[j] = vfun.execute(lc[j], genexec(0, b, scalars, m, n, i, j)); + lc[j] = vfun.execute(lc[j], genexec(0, b, scalars, m, n, rix+i, i, j)); } else if( a != null ) { //general case int[] counts = new int[n]; @@ -562,7 +570,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for( int j=0; j<n; j++ ) { double aval = avals[aix + j]; if( aval != 0 || !sparseSafe ) { - lc[j] = vfun.execute(lc[j], genexec(aval, b, scalars, m, n, i, j)); + lc[j] = vfun.execute(lc[j], genexec(aval, b, scalars, m, n, rix+i, i, j)); counts[j] ++; } } @@ -576,7 +584,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private double executeDenseAggSum(DenseBlock a, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); @@ -584,7 +592,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl if( a == null && !sparseSafe ) { for( int i=rl; i<ru; i++ ) for( int j=0; j<n; j++ ) - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j)); } else if( a != null ) { for( int i=rl; i<ru; i++ ) { @@ -593,7 +601,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for( int j=0; j<n; j++ ) { double aval = avals[aix + j]; if( aval != 0 || !sparseSafe) - kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, rix+i, i, j)); } } } @@ -602,7 +610,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private double executeDenseAggMxx(DenseBlock a, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { //safe aggregation for min/max w/ handling of zero entries //note: sparse safe with zero value as min/max handled outside @@ -612,7 +620,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl if( a == null && !sparseSafe ) { for( int i=rl; i<ru; i++ ) for( int j=0; j<n; j++ ) - ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); + ret = vfun.execute(ret, genexec(0, b, scalars, m, n, rix+i, i, j)); } else if( a != null ) { for( int i=rl; i<ru; i++ ) { @@ -621,7 +629,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for( int j=0; j<n; j++ ) { double aval = avals[aix + j]; if( aval != 0 || !sparseSafe) - ret = vfun.execute(ret, genexec(aval, b, scalars, m, n, i, j)); + ret = vfun.execute(ret, genexec(aval, b, scalars, m, n, rix+i, i, j)); } } } @@ -630,7 +638,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeSparseNoAggSparse(SparseBlock sblock, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { //note: sequential scan algorithm for both sparse-safe and -unsafe //in order to avoid binary search for sparse-unsafe @@ -649,16 +657,16 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //process zeros before current non-zero if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) - c.append(i, j, genexec(0, b, scalars, m, n, i, j)); + c.append(i, j, genexec(0, b, scalars, m, n, rix+i, i, j)); //process current non-zero lastj = aix[k]; - c.append(i, lastj, genexec(avals[k], b, scalars, m, n, i, lastj)); + c.append(i, lastj, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj)); } } //process empty rows or remaining zeros if( !sparseSafe ) for(int j=lastj+1; j<n; j++) - c.append(i, j, genexec(0, b, scalars, m, n, i, j)); + c.append(i, j, genexec(0, b, scalars, m, n, rix+i, i, j)); lnnz += c.size(i); } @@ -666,7 +674,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeSparseNoAggDense(SparseBlock sblock, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { //note: sequential scan algorithm for both sparse-safe and -unsafe //in order to avoid binary search for sparse-unsafe @@ -686,10 +694,10 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //process zeros before current non-zero if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) - lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; + lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, rix+i, i, j))!=0)?1:0; //process current non-zero lastj = aix[k]; - lnnz += ((cvals[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0; + lnnz += ((cvals[cix+lastj]=genexec(avals[k], b, scalars, m, n, rix+i, i, lastj))!=0)?1:0; } } //process empty rows or remaining zeros @@ -697,14 +705,14 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl for(int j=lastj+1; j<n; j++) { double[] cvals = c.values(i); int cix = c.pos(i); - lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; + lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, rix+i, i, j))!=0)?1:0; } } return lnnz; } private long executeSparseRowAggSum(SparseBlock sblock, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); @@ -726,23 +734,23 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //process zeros before current non-zero if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j)); //process current non-zero lastj = aix[k]; - kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj)); + kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj)); } } //process empty rows or remaining zeros if( !sparseSafe ) for(int j=lastj+1; j<n; j++) - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j)); lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; } return lnnz; } private long executeSparseRowAggMxx(SparseBlock sblock, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { double initialVal = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ValueFunction vfun = getAggFunction(); @@ -764,23 +772,23 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //process zeros before current non-zero if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) - tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); + tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, rix+i, i, j)); //process current non-zero lastj = aix[k]; - tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, i, lastj)); + tmp = vfun.execute( tmp, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj)); } } //process empty rows or remaining zeros if( !sparseSafe ) for(int j=lastj+1; j<n; j++) - tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); + tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, rix+i, i, j)); lnnz += ((c[i] = tmp)!=0) ? 1 : 0; } return lnnz; } private long executeSparseColAggSum(SparseBlock sblock, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); @@ -803,14 +811,14 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) { kbuff.set(c[j], corr[j]); - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j)); c[j] = kbuff._sum; corr[j] = kbuff._correction; } //process current non-zero lastj = aix[k]; kbuff.set(c[aix[k]], corr[aix[k]]); - kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj)); + kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj)); c[aix[k]] = kbuff._sum; corr[aix[k]] = kbuff._correction; } @@ -819,7 +827,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl if( !sparseSafe ) for(int j=lastj+1; j<n; j++) { kbuff.set(c[j], corr[j]); - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j)); c[j] = kbuff._sum; corr[j] = kbuff._correction; } @@ -828,7 +836,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeSparseColAggMxx(SparseBlock sblock, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { double initialVal = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ValueFunction vfun = getAggFunction(); @@ -850,26 +858,26 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //process zeros before current non-zero if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) { - c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, i, j)); + c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, rix+i, i, j)); count[j] ++; } //process current non-zero lastj = aix[k]; - c[aix[k]] = vfun.execute(c[aix[k]], genexec(avals[k], b, scalars, m, n, i, lastj)); + c[aix[k]] = vfun.execute(c[aix[k]], genexec(avals[k], b, scalars, m, n, rix+i, i, lastj)); count[aix[k]] ++; } } //process empty rows or remaining zeros if( !sparseSafe ) for(int j=lastj+1; j<n; j++) - c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, i, j)); + c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, rix+i, i, j)); } return -1; } private double executeSparseAggSum(SparseBlock sblock, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); @@ -888,22 +896,22 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //process zeros before current non-zero if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j)); //process current non-zero lastj = aix[k]; - kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, i, lastj)); + kplus.execute2(kbuff, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj)); } } //process empty rows or remaining zeros if( !sparseSafe ) for(int j=lastj+1; j<n; j++) - kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, rix+i, i, j)); } return kbuff._sum; } private double executeSparseAggMxx(SparseBlock sblock, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { double ret = (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY; ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret; @@ -923,22 +931,22 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //process zeros before current non-zero if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) - ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); + ret = vfun.execute(ret, genexec(0, b, scalars, m, n, rix+i, i, j)); //process current non-zero lastj = aix[k]; - ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, i, lastj)); + ret = vfun.execute(ret, genexec(avals[k], b, scalars, m, n, rix+i, i, lastj)); } } //process empty rows or remaining zeros if( !sparseSafe ) for(int j=lastj+1; j<n; j++) - ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); + ret = vfun.execute(ret, genexec(0, b, scalars, m, n, rix+i, i, j)); } return ret; } private long executeCompressedNoAgg(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) + MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { double[] c = (out.getDenseBlock() != null) ? out.getDenseBlockValues() : null; @@ -958,7 +966,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); while( iter.hasNext() ) { IJV cell = iter.next(); - double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ()); + double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ()); if( out.isInSparseFormat() ) { csblock.allocate(cell.getI()); csblock.append(cell.getI(), cell.getJ(), val); @@ -971,7 +979,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeCompressedRowAggSum(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); @@ -979,7 +987,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); while( iter.hasNext() ) { IJV cell = iter.next(); - double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ()); + double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ()); kbuff.set(c[cell.getI()], 0); kplus.execute2(kbuff, val); c[cell.getI()] = kbuff._sum; @@ -990,7 +998,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeCompressedRowAggMxx(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY); ValueFunction vfun = getAggFunction(); @@ -998,7 +1006,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); while( iter.hasNext() ) { IJV cell = iter.next(); - double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ()); + double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ()); c[cell.getI()] = vfun.execute(c[cell.getI()], val); } for( int i=rl; i<ru; i++ ) @@ -1007,7 +1015,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeCompressedColAggSum(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); @@ -1016,7 +1024,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); while( iter.hasNext() ) { IJV cell = iter.next(); - double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ()); + double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ()); kbuff.set(c[cell.getJ()], corr[cell.getJ()]); kplus.execute2(kbuff, val); c[cell.getJ()] = kbuff._sum; @@ -1026,7 +1034,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private long executeCompressedColAggMxx(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { Arrays.fill(c, rl, ru, (_aggOp==AggOp.MIN) ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY); ValueFunction vfun = getAggFunction(); @@ -1034,7 +1042,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); while( iter.hasNext() ) { IJV cell = iter.next(); - double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ()); + double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ()); c[cell.getI()] = vfun.execute(c[cell.getI()], val); } for( int i=rl; i<ru; i++ ) @@ -1043,8 +1051,9 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private double executeCompressedAggSum(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { + //TODO handle sequences in special case summation KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); KahanObject kbuff2 = new KahanObject(0, 0); @@ -1073,7 +1082,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); while( iter.hasNext() ) { IJV cell = iter.next(); - double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ()); + double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ()); kplus.execute2(kbuff, val); } } @@ -1081,7 +1090,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl } private double executeCompressedAggMxx(CompressedMatrixBlock a, SideInput[] b, double[] scalars, - int m, int n, boolean sparseSafe, int rl, int ru) + int m, int n, boolean sparseSafe, int rl, int ru, long rix) { //safe aggregation for min/max w/ handling of zero entries //note: sparse safe with zero value as min/max handled outside @@ -1091,14 +1100,21 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl Iterator<IJV> iter = a.getIterator(rl, ru, !sparseSafe); while( iter.hasNext() ) { IJV cell = iter.next(); - double val = genexec(cell.getV(), b, scalars, m, n, cell.getI(), cell.getJ()); + double val = genexec(cell.getV(), b, scalars, m, n, rix+cell.getI(), cell.getI(), cell.getJ()); ret = vfun.execute(ret, val); } return ret; } + //local execution where grix==rix + protected final double genexec( double a, SideInput[] b, + double[] scalars, int m, int n, int rix, int cix) { + return genexec(a, b, scalars, m, n, rix, rix, cix); + } + + //distributed execution with additional global row index protected abstract double genexec( double a, SideInput[] b, - double[] scalars, int m, int n, int rowIndex, int colIndex); + double[] scalars, int m, int n, long gix, int rix, int cix); private class ParAggTask implements Callable<Double> { @@ -1126,11 +1142,11 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl @Override public Double call() { if( _a instanceof CompressedMatrixBlock ) - return executeCompressedAndAgg((CompressedMatrixBlock)_a, _b, _scalars, _rlen, _clen, _safe, _rl, _ru); + return executeCompressedAndAgg((CompressedMatrixBlock)_a, _b, _scalars, _rlen, _clen, _safe, _rl, _ru, 0); else if (!_a.isInSparseFormat()) - return executeDenseAndAgg(_a.getDenseBlock(), _b, _scalars, _rlen, _clen, _safe, _rl, _ru); + return executeDenseAndAgg(_a.getDenseBlock(), _b, _scalars, _rlen, _clen, _safe, _rl, _ru, 0); else - return executeSparseAndAgg(_a.getSparseBlock(), _b, _scalars, _rlen, _clen, _safe, _rl, _ru); + return executeSparseAndAgg(_a.getSparseBlock(), _b, _scalars, _rlen, _clen, _safe, _rl, _ru, 0); } } @@ -1166,11 +1182,11 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl _c.allocateDenseBlock(); } if( _a instanceof CompressedMatrixBlock ) - return executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru); + return executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru, 0); else if( !_a.isInSparseFormat() ) - return executeDense(_a.getDenseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru); + return executeDense(_a.getDenseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru, 0); else - return executeSparse(_a.getSparseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru); + return executeSparse(_a.getSparseBlock(), _b, _scalars, _c, _rlen, _clen, _safe, _rl, _ru, 0); } public MatrixBlock getResult() { http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java index 8db037e..68ca6ff 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java @@ -73,11 +73,15 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria @Override public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) { - return execute(inputs, scalarObjects, out, 1); + return execute(inputs, scalarObjects, out, 1, 0); } @Override public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) { + return execute(inputs, scalarObjects, out, k, 0); + } + + public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k, long rix) { //sanity check if( inputs==null || inputs.size() < 1 ) throw new RuntimeException("Invalid input arguments."); @@ -104,11 +108,11 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria if( k <= 1 ) //SINGLE-THREADED { if( inputs.get(0) instanceof CompressedMatrixBlock ) - executeCompressed((CompressedMatrixBlock)inputs.get(0), b, scalars, c, m, n, 0, m); + executeCompressed((CompressedMatrixBlock)inputs.get(0), b, scalars, c, m, n, 0, m, rix); else if( !inputs.get(0).isInSparseFormat() ) - executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m); + executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m, rix); else - executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m); + executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m, rix); } else //MULTI-THREADED { @@ -141,7 +145,7 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria return out; } - private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { SideInput[] lb = createSparseSideInputs(b); @@ -149,20 +153,20 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria if( a == null && !sparseSafe ) { for( int i=rl; i<ru; i++ ) for( int j=0; j<n; j++ ) - genexec( 0, lb, scalars, c, m, n, i, j ); + genexec( 0, lb, scalars, c, m, n, rix+i, i, j ); } else if( a != null ) { for( int i=rl; i<ru; i++ ) { double[] avals = a.values(i); int aix = a.pos(i); for( int j=0; j<n; j++ ) - genexec( avals[aix+j], lb, scalars, c, m, n, i, j ); + genexec( avals[aix+j], lb, scalars, c, m, n, rix+i, i, j ); } } } private void executeSparse(SparseBlock sblock, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + double[] c, int m, int n, boolean sparseSafe, int rl, int ru, long rix) { if( sblock == null && sparseSafe ) return; @@ -183,30 +187,38 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria //process zeros before current non-zero if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) - genexec(0, lb, scalars, c, m, n, i, j); + genexec(0, lb, scalars, c, m, n, rix+i, i, j); //process current non-zero lastj = aix[k]; - genexec(avals[k], lb, scalars, c, m, n, i, lastj); + genexec(avals[k], lb, scalars, c, m, n, rix+i, i, lastj); } } //process empty rows or remaining zeros if( !sparseSafe ) for(int j=lastj+1; j<n; j++) - genexec(0, lb, scalars, c, m, n, i, j); + genexec(0, lb, scalars, c, m, n, rix+i, i, j); } } - private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, int rl, int ru) + private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, int rl, int ru, long rix) { //core compressed aggregation operation Iterator<IJV> iter = a.getIterator(rl, ru, true); while( iter.hasNext() ) { IJV cell = iter.next(); - genexec(cell.getV(), b, scalars, c, m, n, cell.getI(), cell.getJ()); + genexec(cell.getV(), b, scalars, c, m, n, rix+cell.getI(), cell.getI(), cell.getJ()); } } - protected abstract void genexec( double a, SideInput[] b, double[] scalars, double[] c, int m, int n, int rowIndex, int colIndex); + //local execution where grix==rix + protected final void genexec( double a, SideInput[] b, + double[] scalars, double[] c, int m, int n, int rix, int cix) { + genexec(a, b, scalars, c, m, n, rix, rix, cix); + } + + //distributed execution with additional global row index + protected abstract void genexec( double a, SideInput[] b, + double[] scalars, double[] c, int m, int n, long grix, int rix, int cix); private void setInitialOutputValues(double[] c) { @@ -304,11 +316,11 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria double[] c = new double[_aggOps.length]; setInitialOutputValues(c); if( _a instanceof CompressedMatrixBlock ) - executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _rlen, _clen, _rl, _ru); + executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _rlen, _clen, _rl, _ru, 0); else if( !_a.isInSparseFormat() ) - executeDense(_a.getDenseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru); + executeDense(_a.getDenseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru, 0); else - executeSparse(_a.getSparseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru); + executeSparse(_a.getSparseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru, 0); return c; } } http://git-wip-us.apache.org/repos/asf/systemml/blob/d0b4373f/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java index 7454f78..507bd65 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java @@ -118,10 +118,10 @@ public abstract class SpoofRowwise extends SpoofOperator @Override public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) { - return execute(inputs, scalarObjects, out, true, false); + return execute(inputs, scalarObjects, out, true, false, 0); } - public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, boolean allocTmp, boolean aggIncr) { + public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, boolean allocTmp, boolean aggIncr, long rix) { //sanity check if( inputs==null || inputs.size() < 1 || out==null ) throw new RuntimeException("Invalid input arguments."); @@ -149,11 +149,11 @@ public abstract class SpoofRowwise extends SpoofOperator //core sequential execute MatrixBlock a = inputs.get(0); if( a instanceof CompressedMatrixBlock ) - executeCompressed((CompressedMatrixBlock)a, b, scalars, c, n, 0, m); + executeCompressed((CompressedMatrixBlock)a, b, scalars, c, n, 0, m, rix); else if( !a.isInSparseFormat() ) - executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m); + executeDense(a.getDenseBlock(), b, scalars, c, n, 0, m, rix); else - executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, m); + executeSparse(a.getSparseBlock(), b, scalars, c, n, 0, m, rix); //post-processing if( allocTmp &&_reqVectMem > 0 ) @@ -285,39 +285,39 @@ public abstract class SpoofRowwise extends SpoofOperator out.setNonZeros(out.getNumRows()*out.getNumColumns()); } - private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) { + private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru, long rix) { //forward empty block to sparse if( a == null ) { - executeSparse(null, b, scalars, c, n, rl, ru); + executeSparse(null, b, scalars, c, n, rl, ru, rix); return; } SideInput[] lb = createSparseSideInputs(b, true); for( int i=rl; i<ru; i++ ) { genexec(a.values(i), a.pos(i), lb, scalars, - c.values(i), c.pos(i), n, i ); + c.values(i), c.pos(i), n, rix+i, i ); } } - private void executeSparse(SparseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) { + private void executeSparse(SparseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru, long rix) { SideInput[] lb = createSparseSideInputs(b, true); SparseRow empty = new SparseRowVector(1); for( int i=rl; i<ru; i++ ) { if( a!=null && !a.isEmpty(i) ) { //call generated method genexec(a.values(i), a.indexes(i), a.pos(i), lb, scalars, - c.values(i), c.pos(i), a.size(i), n, i); + c.values(i), c.pos(i), a.size(i), n, rix+i, i); } else genexec(empty.values(), empty.indexes(), 0, lb, scalars, - c.values(i), c.pos(i), 0, n, i); + c.values(i), c.pos(i), 0, n, rix+i, i); } } - private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) { + private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru, long rix) { //forward empty block to sparse if( a.isEmptyBlock(false) ) { - executeSparse(null, b, scalars, c, n, rl, ru); + executeSparse(null, b, scalars, c, n, rl, ru, rix); return; } @@ -331,11 +331,23 @@ public abstract class SpoofRowwise extends SpoofOperator //methods to be implemented by generated operators of type SpoofRowAggrgate + //local execution where grix==rix + protected final void genexec(double[] a, int ai, + SideInput[] b, double[] scalars, double[] c, int ci, int len, int rix) { + genexec(a, ai, b, scalars, c, ci, len, rix, rix); + } + + protected final void genexec(double[] avals, int[] aix, int ai, + SideInput[] b, double[] scalars, double[] c, int ci, int alen, int n, int rix) { + genexec(avals, aix, ai, b, scalars, c, ci, alen, n, rix, rix); + } + + //distributed execution with additional global row index protected abstract void genexec(double[] a, int ai, - SideInput[] b, double[] scalars, double[] c, int ci, int len, int rowIndex); + SideInput[] b, double[] scalars, double[] c, int ci, int len, long grix, int rix); protected abstract void genexec(double[] avals, int[] aix, int ai, - SideInput[] b, double[] scalars, double[] c, int ci, int alen, int n, int rowIndex); + SideInput[] b, double[] scalars, double[] c, int ci, int alen, int n, long grix, int rix); /** @@ -369,11 +381,11 @@ public abstract class SpoofRowwise extends SpoofOperator DenseBlock c = DenseBlockFactory.createDenseBlock(1, _outLen); if( _a instanceof CompressedMatrixBlock ) - executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _clen, _rl, _ru); + executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _clen, _rl, _ru, 0); else if( !_a.isInSparseFormat() ) - executeDense(_a.getDenseBlock(), _b, _scalars, c, _clen, _rl, _ru); + executeDense(_a.getDenseBlock(), _b, _scalars, c, _clen, _rl, _ru, 0); else - executeSparse(_a.getSparseBlock(), _b, _scalars, c, _clen, _rl, _ru); + executeSparse(_a.getSparseBlock(), _b, _scalars, c, _clen, _rl, _ru, 0); if( _reqVectMem > 0 ) LibSpoofPrimitives.cleanupThreadLocalMemory(); @@ -413,11 +425,11 @@ public abstract class SpoofRowwise extends SpoofOperator LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2); if( _a instanceof CompressedMatrixBlock ) - executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru); + executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru, 0); else if( !_a.isInSparseFormat() ) - executeDense(_a.getDenseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru); + executeDense(_a.getDenseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru, 0); else - executeSparse(_a.getSparseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru); + executeSparse(_a.getSparseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru, 0); if( _reqVectMem > 0 ) LibSpoofPrimitives.cleanupThreadLocalMemory();
