[SYSTEMML-1362] Handling of temporary vectors in generated operators This patch adds proper handling and allocation of temporary vectors in codegen row aggregate templates. So far, these scenarios crashed if the vector operations were not condensed into the partial aggregation. For example, colSums(X<=rowSums(X)) can be condensed, but colSums((X<=rowSums(X))==1) cannot because X<=rowSums(X) creates a row intermediate that is used by the subsequent operation.
However, an initial implementation which allocated temporary vector intermediates showed significant performance problems. Hence, this patch also includes a very lean memory management of intermediates for resue. The basic idea is to (1) count the number of vector intermediates, (2) establish a thread-local ring buffer of pre-allocated row vectors, and (3) reuse these intermediates for sparse and dense vectors operations w/ and w/o reset. On a scenario of an 1M x 1K (8GB) input matrix and colSums((X<=rowSums(X))==1), this temporary reuse improved performance from 743ms to 315ms, i.e., it saturated peak memory bandwidth of 25GB/s. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7491a680 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7491a680 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7491a680 Branch: refs/heads/master Commit: 7491a680b645ea205faf296c924358014c273892 Parents: 70e5aa1 Author: Matthias Boehm <[email protected]> Authored: Tue Mar 28 22:28:53 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Tue Mar 28 22:29:53 2017 -0700 ---------------------------------------------------------------------- .../sysml/hops/codegen/cplan/CNodeBinary.java | 38 ++-- .../sysml/hops/codegen/cplan/CNodeRowAgg.java | 23 ++- .../sysml/hops/codegen/cplan/CNodeUnary.java | 4 +- .../codegen/template/TemplateOuterProduct.java | 4 +- .../hops/codegen/template/TemplateRowAgg.java | 6 +- .../hops/codegen/template/TemplateUtils.java | 8 + .../runtime/codegen/LibSpoofPrimitives.java | 196 +++++++++++++------ .../runtime/codegen/SpoofRowAggregate.java | 17 +- .../functions/codegen/RowAggTmplTest.java | 15 +- .../scripts/functions/codegen/rowAggPattern8.R | 29 +++ .../functions/codegen/rowAggPattern8.dml | 26 +++ 11 files changed, 270 insertions(+), 96 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java index 36b0354..41f99c9 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java @@ -56,7 +56,7 @@ public class CNodeBinary extends CNode case DOT_PRODUCT: return sparse ? " double %TMP% = LibSpoofPrimitives.dotProduct(%IN1v%, %IN2%, %IN1i%, %POS1%, %POS2%, %LEN%);\n" : " double %TMP% = LibSpoofPrimitives.dotProduct(%IN1%, %IN2%, %POS1%, %POS2%, %LEN%);\n"; - + case VECT_MULT_ADD: case VECT_DIV_ADD: case VECT_EQUAL_ADD: @@ -66,9 +66,10 @@ public class CNodeBinary extends CNode case VECT_GREATER_ADD: case VECT_GREATEREQUAL_ADD: { String vectName = getVectorPrimitiveName(); - return sparse ? " LibSpoofPrimitives.vect"+vectName+"Add(%IN1%, %IN2v%, %OUT%, %IN2i%, %POS2%, %POSOUT%, %LEN%);\n" : - " LibSpoofPrimitives.vect"+vectName+"Add(%IN1%, %IN2%, %OUT%, %POS2%, %POSOUT%, %LEN%);\n"; + return sparse ? " LibSpoofPrimitives.vect"+vectName+"Add(%IN1v%, %IN2%, %OUT%, %IN1i%, %POS1%, %POSOUT%, %LEN%);\n" : + " LibSpoofPrimitives.vect"+vectName+"Add(%IN1%, %IN2%, %OUT%, %POS1%, %POSOUT%, %LEN%);\n"; } + case VECT_DIV_SCALAR: case VECT_MULT_SCALAR: case VECT_EQUAL_SCALAR: @@ -78,9 +79,10 @@ public class CNodeBinary extends CNode case VECT_GREATER_SCALAR: case VECT_GREATEREQUAL_SCALAR: { String vectName = getVectorPrimitiveName(); - return sparse ? " LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN1i%, %IN2%, %OUT%, %POS1%, %POSOUT%, %LEN%);\n" : - " LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %OUT%, %POS1%, %POSOUT%, %LEN%);\n"; + return sparse ? " double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN2%, %IN1i%, %POS1%, %LEN%);\n" : + " double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS1%, %LEN%);\n"; } + /*Can be replaced by function objects*/ case MULT: return " double %TMP% = %IN1% * %IN2%;\n" ; @@ -171,23 +173,25 @@ public class CNodeBinary extends CNode sb.append(_inputs.get(0).codegen(sparse)); sb.append(_inputs.get(1).codegen(sparse)); - //generate binary operation + //generate binary operation (use sparse template, if data input) + boolean lsparse = sparse && (_inputs.get(0) instanceof CNodeData); String var = createVarname(); - String tmp = _type.getTemplate(sparse); + String tmp = _type.getTemplate(lsparse); tmp = tmp.replaceAll("%TMP%", var); + + //replace input references and start indexes for( int j=1; j<=2; j++ ) { String varj = _inputs.get(j-1).getVarname(); - if( sparse && !tmp.contains("%IN"+j+"%") ) { - tmp = tmp.replaceAll("%IN"+j+"v%", varj+"vals"); - tmp = tmp.replaceAll("%IN"+j+"i%", varj+"ix"); - } - else - tmp = tmp.replaceAll("%IN"+j+"%", varj ); - if(varj.startsWith("b") ) //i.e. b.get(index) - tmp = tmp.replaceAll("%POS"+j+"%", "bi"); - else - tmp = tmp.replaceAll("%POS"+j+"%", varj+"i"); + //replace sparse and dense inputs + tmp = tmp.replaceAll("%IN"+j+"v%", varj+"vals"); + tmp = tmp.replaceAll("%IN"+j+"i%", varj+"ix"); + tmp = tmp.replaceAll("%IN"+j+"%", varj ); + + //replace start position of main input + tmp = tmp.replaceAll("%POS"+j+"%", (!varj.startsWith("b") + && _inputs.get(j-1) instanceof CNodeData + && _inputs.get(j-1).getDataType().isMatrix()) ? varj+"i" : "0"); } sb.append(tmp); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRowAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRowAgg.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRowAgg.java index 7073141..846c88d 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRowAgg.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRowAgg.java @@ -32,7 +32,7 @@ public class CNodeRowAgg extends CNodeTpl + "\n" + "public final class %TMP% extends SpoofRowAggregate { \n" + " public %TMP%() {\n" - + " _colVector = %FLAG%;\n" + + " super(%COL_VECTOR%, %VECT_MEM%);\n" + " }\n" + " protected void genexecRowDense( double[] a, int ai, double[][] b, double[] scalars, double[] c, int len, int rowIndex ) { \n" + "%BODY_dense%" @@ -46,6 +46,16 @@ public class CNodeRowAgg extends CNodeTpl super(inputs, output); } + //number of intermediate vectors + private int _numVectors = -1; + + public void setNumVectorIntermediates(int num) { + _numVectors = num; + } + + public int getNumVectorIntermediates() { + return _numVectors; + } @Override public String codegen(boolean sparse) { @@ -71,9 +81,9 @@ public class CNodeRowAgg extends CNodeTpl //replace size information tmp = tmp.replaceAll("%LEN%", "len"); - //replace colvector information and start position - tmp = tmp.replaceAll("%FLAG%", String.valueOf(_output._cols==1)); - tmp = tmp.replaceAll("bi", "0"); + //replace colvector information and number of vector intermediates + tmp = tmp.replaceAll("%COL_VECTOR%", String.valueOf(_output._cols==1)); + tmp = tmp.replaceAll("%VECT_MEM%", String.valueOf(_numVectors)); return tmp; } @@ -108,6 +118,7 @@ public class CNodeRowAgg extends CNodeTpl CNodeRowAgg that = (CNodeRowAgg)o; return super.equals(o) + && _numVectors == that._numVectors && equalInputReferences( _output, that._output, _inputs, that._inputs); } @@ -115,7 +126,9 @@ public class CNodeRowAgg extends CNodeTpl @Override public String getTemplateInfo() { StringBuilder sb = new StringBuilder(); - sb.append("SPOOF ROWAGGREGATE"); + sb.append("SPOOF ROWAGGREGATE [reqVectMem="); + sb.append(_numVectors); + sb.append("]"); return sb.toString(); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java index 5d4c63e..5fcb7bc 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java @@ -43,8 +43,8 @@ public class CNodeUnary extends CNode public String getTemplate(boolean sparse) { switch( this ) { case ROW_SUMS: - return sparse ? " double %TMP% = LibSpoofPrimitives.vectSum( %IN1v%, %IN1i%, %POS1%, %LEN%);\n": - " double %TMP% = LibSpoofPrimitives.vectSum( %IN1%, %POS1%, %LEN%);\n"; + return sparse ? " double %TMP% = LibSpoofPrimitives.vectSum(%IN1v%, %IN1i%, %POS1%, %LEN%);\n": + " double %TMP% = LibSpoofPrimitives.vectSum(%IN1%, %POS1%, %LEN%);\n"; case EXP: return " double %TMP% = FastMath.exp(%IN1%);\n"; case LOOKUP_R: http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java index 68aaa2a..5918e5e 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateOuterProduct.java @@ -193,9 +193,9 @@ public class TemplateOuterProduct extends TemplateBase { //final left/right matrix mult, see close else { if( cdata1.getDataType().isScalar() ) - out = new CNodeBinary(cdata1, cdata2, BinType.VECT_MULT_ADD); - else out = new CNodeBinary(cdata2, cdata1, BinType.VECT_MULT_ADD); + else + out = new CNodeBinary(cdata1, cdata2, BinType.VECT_MULT_ADD); } } else if( HopRewriteUtils.isTransposeOperation(hop) ) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java index 31c0f79..b384f52 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRowAgg.java @@ -118,6 +118,8 @@ public class TemplateRowAgg extends TemplateBase inputs.add(tmp.get(in.getHopID())); CNode output = tmp.get(hop.getHopID()); CNodeRowAgg tpl = new CNodeRowAgg(inputs, output); + tpl.setNumVectorIntermediates(TemplateUtils + .countVectorIntermediates(output)); // return cplan instance return new Pair<Hop[],CNodeTpl>(sinHops.toArray(new Hop[0]), tpl); @@ -172,7 +174,7 @@ public class TemplateRowAgg extends TemplateBase inHops.remove(hop.getInput().get(0)); inHops.add(hop.getInput().get(0).getInput().get(0)); - out = new CNodeBinary(cdata2, cdata1, BinType.VECT_MULT_ADD); + out = new CNodeBinary(cdata1, cdata2, BinType.VECT_MULT_ADD); } else { @@ -206,7 +208,7 @@ public class TemplateRowAgg extends TemplateBase { if( HopRewriteUtils.isBinary(hop, SUPPORTED_VECT_BINARY) ) { String opname = "VECT_"+((BinaryOp)hop).getOp().name()+"_SCALAR"; - out = new CNodeBinary(cdata2, cdata1, BinType.valueOf(opname)); + out = new CNodeBinary(cdata1, cdata2, BinType.valueOf(opname)); } else throw new RuntimeException("Unsupported binary matrix " http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java index a172af5..adab46c 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java @@ -272,4 +272,12 @@ public class TemplateUtils || ((CNodeUnary)c).getType()==UnaryType.LOOKUP_RC))); return ret; } + + public static int countVectorIntermediates(CNode node) { + int ret = 0; + for( CNode c : node.getInput() ) + ret += countVectorIntermediates(c); + return ret + ((node instanceof CNodeBinary + && ((CNodeBinary)node).getType().isVectorScalarPrimitive()) ? 1 : 0); + } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java index 9b83006..52e9893 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java @@ -19,6 +19,9 @@ package org.apache.sysml.runtime.codegen; +import java.util.Arrays; +import java.util.LinkedList; + import org.apache.sysml.runtime.matrix.data.LibMatrixMult; /** @@ -30,6 +33,12 @@ import org.apache.sysml.runtime.matrix.data.LibMatrixMult; */ public class LibSpoofPrimitives { + //global pool of reusable vectors, individual operations set up their own thread-local + //ring buffers of reusable vectors with specific number of vectors and vector sizes + private static ThreadLocal<LinkedList<double[]>> memPool = new ThreadLocal<LinkedList<double[]>>() { + @Override protected LinkedList<double[]> initialValue() { return new LinkedList<double[]>(); } + }; + // forwarded calls to LibMatrixMult public static double dotProduct(double[] a, double[] b, int ai, int bi, int len) { @@ -40,20 +49,24 @@ public class LibSpoofPrimitives return LibMatrixMult.dotProduct(a, b, aix, ai, bi, len); } - public static void vectMultAdd(double aval, double[] b, double[] c, int bi, int ci, int len) { - LibMatrixMult.vectMultiplyAdd(aval, b, c, bi, ci, len); + public static void vectMultAdd(double[] a, double bval, double[] c, int bi, int ci, int len) { + LibMatrixMult.vectMultiplyAdd(bval, a, c, bi, ci, len); } - public static void vectMultAdd(double aval, double[] b, double[] c, int[] bix, int bi, int ci, int len) { - LibMatrixMult.vectMultiplyAdd(aval, b, c, bix, bi, ci, len); + public static void vectMultAdd(double[] a, double bval, double[] c, int[] bix, int bi, int ci, int len) { + LibMatrixMult.vectMultiplyAdd(bval, a, c, bix, bi, ci, len); } - public static void vectMultWrite(double aval, double[] b, double[] c, int bi, int ci, int len) { - LibMatrixMult.vectMultiplyWrite(aval, b, c, bi, ci, len); + public static double[] vectMultWrite(double[] a, double bval, int bi, int len) { + double[] c = allocVector(len, false); + LibMatrixMult.vectMultiplyWrite(bval, a, c, bi, 0, len); + return c; } - public static void vectMultWrite(double aval, double[] b, double[] c, int[] bix, int bi, int ci, int len) { - LibMatrixMult.vectMultiplyAdd(aval, b, c, bix, bi, ci, len); + public static double[] vectMultWrite(double[] a, double bval, int[] bix, int bi, int len) { + double[] c = allocVector(len, true); + LibMatrixMult.vectMultiplyAdd(bval, a, c, bix, bi, 0, len); + return c; } // custom vector sums @@ -119,155 +132,214 @@ public class LibSpoofPrimitives //custom vector div - public static void vectDivAdd(double bval, double[] a, double[] c, int ai, int ci, int len) { + public static void vectDivAdd(double[] a, double bval, double[] c, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++, ci++) c[ci] += a[j] / bval; } - public static void vectDivAdd(double bval, double[] a, double[] c, int[] aix, int ai, int ci, int len) { + public static void vectDivAdd(double[] a, double bval, double[] c, int[] aix, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++ ) c[ci + aix[j]] += a[j] / bval; } - public static void vectDivWrite(double bval, double[] a, double[] c, int ai, int ci, int len) { - for( int j = ai; j < ai+len; j++, ci++) - c[ci] = a[j] / bval; + public static double[] vectDivWrite(double[] a, double bval, int ai, int len) { + double[] c = allocVector(len, false); + for( int j = 0; j < len; j++, ai++) + c[j] = a[ai] / bval; + return c; } - public static void vectDivWrite(double bval, double[] a, int[] aix, double[] c, int ai, int ci, int len) { + public static double[] vectDivWrite(double[] a, double bval, int[] aix, int ai, int len) { + double[] c = allocVector(len, true); for( int j = ai; j < ai+len; j++ ) - c[ci + aix[j]] = a[j] / bval; + c[aix[j]] = a[j] / bval; + return c; } //custom vector equal - public static void vectEqualAdd(double bval, double[] a, double[] c, int ai, int ci, int len) { + public static void vectEqualAdd(double[] a, double bval, double[] c, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++, ci++) c[ci] += (a[j] == bval) ? 1 : 0; } - public static void vectEqualAdd(double bval, double[] a, double[] c, int[] aix, int ai, int ci, int len) { + public static void vectEqualAdd(double[] a, double bval, double[] c, int[] aix, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++ ) c[ci + aix[j]] += (a[j] == bval) ? 1 : 0; } - public static void vectEqualWrite(double bval, double[] a, double[] c, int ai, int ci, int len) { - for( int j = ai; j < ai+len; j++, ci++) - c[ci] = (a[j] == bval) ? 1 : 0; + public static double[] vectEqualWrite(double[] a, double bval, int ai, int len) { + double[] c = allocVector(len, false); + for( int j = 0; j < len; j++, ai++) + c[j] = (a[ai] == bval) ? 1 : 0; + return c; } - public static void vectEqualWrite(double bval, double[] a, int[] aix, double[] c, int ai, int ci, int len) { + public static double[] vectEqualWrite(double[] a, double bval, int[] aix, int ai, int len) { + double[] c = allocVector(len, true); for( int j = ai; j < ai+len; j++ ) - c[ci + aix[j]] = (a[j] == bval) ? 1 : 0; - } + c[aix[j]] = (a[j] == bval) ? 1 : 0; + return c; + } - //custom vector notequal + //custom vector not equal - public static void vectNotequalAdd(double bval, double[] a, double[] c, int ai, int ci, int len) { + public static void vectNotequalAdd(double[] a, double bval, double[] c, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++, ci++) c[ci] += (a[j] != bval) ? 1 : 0; } - public static void vectNotequalAdd(double bval, double[] a, double[] c, int[] aix, int ai, int ci, int len) { + public static void vectNotequalAdd(double[] a, double bval, double[] c, int[] aix, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++ ) c[ci + aix[j]] += (a[j] != bval) ? 1 : 0; } - public static void vectNotequalWrite(double bval, double[] a, double[] c, int ai, int ci, int len) { - for( int j = ai; j < ai+len; j++, ci++) - c[ci] = (a[j] != bval) ? 1 : 0; + public static double[] vectNotequalWrite(double[] a, double bval, int ai, int len) { + double[] c = allocVector(len, false); + for( int j = 0; j < len; j++, ai++) + c[j] = (a[j] != bval) ? 1 : 0; + return c; } - public static void vectNotequalWrite(double bval, double[] a, int[] aix, double[] c, int ai, int ci, int len) { + public static double[] vectNotequalWrite(double[] a, double bval, int[] aix, int ai, int len) { + double[] c = allocVector(len, true); for( int j = ai; j < ai+len; j++ ) - c[ci + aix[j]] = (a[j] != bval) ? 1 : 0; + c[aix[j]] = (a[j] != bval) ? 1 : 0; + return c; } //custom vector less - public static void vectLessAdd(double bval, double[] a, double[] c, int ai, int ci, int len) { + public static void vectLessAdd(double[] a, double bval, double[] c, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++, ci++) c[ci] += (a[j] < bval) ? 1 : 0; } - public static void vectLessAdd(double bval, double[] a, double[] c, int[] aix, int ai, int ci, int len) { + public static void vectLessAdd(double[] a, double bval, double[] c, int[] aix, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++ ) c[ci + aix[j]] += (a[j] < bval) ? 1 : 0; } - public static void vectLessWrite(double bval, double[] a, double[] c, int ai, int ci, int len) { - for( int j = ai; j < ai+len; j++, ci++) - c[ci] = (a[j] < bval) ? 1 : 0; + public static double[] vectLessWrite(double[] a, double bval, int ai, int len) { + double[] c = allocVector(len, false); + for( int j = 0; j < len; j++, ai++) + c[j] = (a[j] < bval) ? 1 : 0; + return c; } - public static void vectLessWrite(double bval, double[] a, int[] aix, double[] c, int ai, int ci, int len) { + public static double[] vectLessWrite(double[] a, double bval, int[] aix, int ai, int len) { + double[] c = allocVector(len, true); for( int j = ai; j < ai+len; j++ ) - c[ci + aix[j]] = (a[j] < bval) ? 1 : 0; + c[aix[j]] = (a[j] < bval) ? 1 : 0; + return c; } - //custom vector lessequal + //custom vector less equal - public static void vectLessequalAdd(double bval, double[] a, double[] c, int ai, int ci, int len) { + public static void vectLessequalAdd(double[] a, double bval, double[] c, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++, ci++) c[ci] += (a[j] <= bval) ? 1 : 0; } - public static void vectLessequalAdd(double bval, double[] a, double[] c, int[] aix, int ai, int ci, int len) { + public static void vectLessequalAdd(double[] a, double bval, double[] c, int[] aix, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++ ) c[ci + aix[j]] += (a[j] <= bval) ? 1 : 0; } - public static void vectLessequalWrite(double bval, double[] a, double[] c, int ai, int ci, int len) { - for( int j = ai; j < ai+len; j++, ci++) - c[ci] = (a[j] <= bval) ? 1 : 0; + public static double[] vectLessequalWrite(double[] a, double bval, int ai, int len) { + double[] c = allocVector(len, false); + for( int j = 0; j < len; j++, ai++) + c[j] = (a[j] <= bval) ? 1 : 0; + return c; } - public static void vectLessequalWrite(double bval, double[] a, int[] aix, double[] c, int ai, int ci, int len) { + public static double[] vectLessequalWrite(double[] a, double bval, int[] aix, int ai, int len) { + double[] c = allocVector(len, true); for( int j = ai; j < ai+len; j++ ) - c[ci + aix[j]] = (a[j] <= bval) ? 1 : 0; + c[aix[j]] = (a[j] <= bval) ? 1 : 0; + return c; } //custom vector greater - public static void vectGreaterAdd(double bval, double[] a, double[] c, int ai, int ci, int len) { + public static void vectGreaterAdd(double[] a, double bval, double[] c, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++, ci++) c[ci] += (a[j] > bval) ? 1 : 0; } - public static void vectGreaterAdd(double bval, double[] a, double[] c, int[] aix, int ai, int ci, int len) { + public static void vectGreaterAdd(double[] a, double bval, double[] c, int[] aix, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++ ) c[ci + aix[j]] += (a[j] > bval) ? 1 : 0; } - public static void vectGreaterWrite(double bval, double[] a, double[] c, int ai, int ci, int len) { - for( int j = ai; j < ai+len; j++, ci++) - c[ci] = (a[j] > bval) ? 1 : 0; + public static double[] vectGreaterWrite(double[] a, double bval, int ai, int len) { + double[] c = allocVector(len, false); + for( int j = 0; j < len; j++, ai++) + c[j] = (a[j] > bval) ? 1 : 0; + return c; } - public static void vectGreaterWrite(double bval, double[] a, int[] aix, double[] c, int ai, int ci, int len) { + public static double[] vectGreaterWrite(double[] a, double bval, int[] aix, int ai, int len) { + double[] c = allocVector(len, true); for( int j = ai; j < ai+len; j++ ) - c[ci + aix[j]] = (a[j] > bval) ? 1 : 0; - } + c[aix[j]] = (a[j] > bval) ? 1 : 0; + return c; + } - //custom vector greaterequal + //custom vector greater equal - public static void vectGreaterequalAdd(double bval, double[] a, double[] c, int ai, int ci, int len) { + public static void vectGreaterequalAdd(double[] a, double bval, double[] c, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++, ci++) c[ci] += (a[j] >= bval) ? 1 : 0; } - public static void vectGreaterequalAdd(double bval, double[] a, double[] c, int[] aix, int ai, int ci, int len) { + public static void vectGreaterequalAdd(double[] a, double bval, double[] c, int[] aix, int ai, int ci, int len) { for( int j = ai; j < ai+len; j++ ) c[ci + aix[j]] += (a[j] >= bval) ? 1 : 0; } - public static void vectGreaterequalWrite(double bval, double[] a, double[] c, int ai, int ci, int len) { - for( int j = ai; j < ai+len; j++, ci++) - c[ci] = (a[j] >= bval) ? 1 : 0; + public static double[] vectGreaterequalWrite(double[] a, double bval, int ai, int len) { + double[] c = allocVector(len, false); + for( int j = 0; j < len; j++, ai++) + c[j] = (a[j] >= bval) ? 1 : 0; + return c; } - public static void vectGreaterequalWrite(double bval, double[] a, int[] aix, double[] c, int ai, int ci, int len) { + public static double[] vectGreaterequalWrite(double[] a, double bval, int[] aix, int ai, int len) { + double[] c = allocVector(len, true); for( int j = ai; j < ai+len; j++ ) - c[ci + aix[j]] = (a[j] >= bval) ? 1 : 0; + c[aix[j]] = (a[j] >= bval) ? 1 : 0; + return c; + } + + //dynamic memory management + + public static void setupThreadLocalMemory(int numVectors, int len) { + LinkedList<double[]> list = new LinkedList<double[]>(); + for( int i=0; i<numVectors; i++ ) + list.addLast(new double[len]); + memPool.set(list); + } + + public static void cleanupThreadLocalMemory() { + memPool.remove(); + } + + private static double[] allocVector(int len, boolean reset) { + LinkedList<double[]> list = memPool.get(); + + //sanity check for missing setup + if( list.isEmpty() ) + return new double[len]; + + //get and re-queue first entry + double[] tmp = list.removeFirst(); + list.addLast(tmp); + + //reset vector if required + if( reset ) + Arrays.fill(tmp, 0); + return tmp; } } + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowAggregate.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowAggregate.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowAggregate.java index c0b58a8..6df9b67 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowAggregate.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowAggregate.java @@ -39,10 +39,12 @@ public abstract class SpoofRowAggregate extends SpoofOperator private static final long serialVersionUID = 6242910797139642998L; private static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //Min 1M elements - protected boolean _colVector = false; + protected final boolean _colVector; + protected final int _reqVectMem; - public SpoofRowAggregate() { - + public SpoofRowAggregate(boolean colVector, int reqVectMem) { + _colVector = colVector; + _reqVectMem = reqVectMem; } @Override @@ -66,12 +68,14 @@ public abstract class SpoofRowAggregate extends SpoofOperator //core sequential execute final int m = inputs.get(0).getNumRows(); final int n = inputs.get(0).getNumColumns(); + LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, out.getNumColumns()); if( !inputs.get(0).isInSparseFormat() ) executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, n, 0, m); else executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, n, 0, m); //post-processing + LibSpoofPrimitives.cleanupThreadLocalMemory(); out.recomputeNonZeros(); } @@ -176,12 +180,17 @@ public abstract class SpoofRowAggregate extends SpoofOperator @Override public double[] call() throws DMLRuntimeException { + + //allocate vector intermediates and partial output + LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen); double[] c = new double[_clen]; + if( !_a.isInSparseFormat() ) executeDense(_a.getDenseBlock(), _b, _scalars, c, _clen, _rl, _ru); else executeSparse(_a.getSparseBlock(), _b, _scalars, c, _clen, _rl, _ru); - + + LibSpoofPrimitives.cleanupThreadLocalMemory(); return c; } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java index 0d9bc1d..c2ae38b 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java @@ -43,7 +43,8 @@ public class RowAggTmplTest extends AutomatedTestBase private static final String TEST_NAME5 = TEST_NAME+"5"; private static final String TEST_NAME6 = TEST_NAME+"6"; private static final String TEST_NAME7 = TEST_NAME+"7"; - + private static final String TEST_NAME8 = TEST_NAME+"8"; //colSums((X/rowSums(X))>0.7) + private static final String TEST_DIR = "functions/codegen/"; private static final String TEST_CLASS_DIR = TEST_DIR + RowAggTmplTest.class.getSimpleName() + "/"; private final static String TEST_CONF = "SystemML-config-codegen.xml"; @@ -54,7 +55,7 @@ public class RowAggTmplTest extends AutomatedTestBase @Override public void setUp() { TestUtils.clearAssertionInformation(); - for(int i=1; i<=7; i++) + for(int i=1; i<=8; i++) addTestConfiguration( TEST_NAME+i, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME+i, new String[] { String.valueOf(i) }) ); } @@ -93,6 +94,11 @@ public class RowAggTmplTest extends AutomatedTestBase testCodegenIntegration( TEST_NAME7, true, ExecType.CP ); } + @Test + public void testCodegenRowAggRewrite8() { + testCodegenIntegration( TEST_NAME8, true, ExecType.CP ); + } + @Test public void testCodegenRowAgg1() { testCodegenIntegration( TEST_NAME1, false, ExecType.CP ); @@ -128,6 +134,11 @@ public class RowAggTmplTest extends AutomatedTestBase testCodegenIntegration( TEST_NAME7, false, ExecType.CP ); } + @Test + public void testCodegenRowAgg8() { + testCodegenIntegration( TEST_NAME8, false, ExecType.CP ); + } + private void testCodegenIntegration( String testname, boolean rewrites, ExecType instType ) { boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/test/scripts/functions/codegen/rowAggPattern8.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/rowAggPattern8.R b/src/test/scripts/functions/codegen/rowAggPattern8.R new file mode 100644 index 0000000..5030636 --- /dev/null +++ b/src/test/scripts/functions/codegen/rowAggPattern8.R @@ -0,0 +1,29 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +args<-commandArgs(TRUE) +options(digits=22) +library("Matrix") + +X = matrix(seq(1,15), 5, 3, byrow=TRUE); +S = colSums((X/rowSums(X))>0.7) + +writeMM(as(S, "CsparseMatrix"), paste(args[2], "S", sep="")); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7491a680/src/test/scripts/functions/codegen/rowAggPattern8.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/rowAggPattern8.dml b/src/test/scripts/functions/codegen/rowAggPattern8.dml new file mode 100644 index 0000000..b4ede87 --- /dev/null +++ b/src/test/scripts/functions/codegen/rowAggPattern8.dml @@ -0,0 +1,26 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +X = matrix(seq(1,15), rows=5, cols=3); +S = colSums((X/rowSums(X))>0.7) + +write(S,$1) +
