Repository: systemml Updated Branches: refs/heads/master 4f865b218 -> 40dd894ed
[SYSTEMML-1952] Codegen row operators w/ sparse side inputs This patch improves the performance of codegen row templates with sparse side inputs, which have been so far simply converted to dense inputs for random access. Similar to sparse side inputs in cell templates and compressed main inputs in row templates, we now use thread-local temporary rows and convert sparse rows on demand. This avoids the single-threaded full allocation and conversion of such sparse inputs, which also significantly reduces the GC overhead. On multinominal Mlogreg with 32 classes and a dense 10M x 100 input (8GB), this change improved end-to-end performance for 20/10 outer/inner iterations from 1264s (72s GC) to 1197s (43s GC). Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b3d09d56 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b3d09d56 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b3d09d56 Branch: refs/heads/master Commit: b3d09d5613d5f64099bb0f1d9b6d459a0e3e2a5f Parents: 4f865b2 Author: Matthias Boehm <[email protected]> Authored: Tue Oct 10 17:24:09 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Tue Oct 10 17:24:09 2017 -0700 ---------------------------------------------------------------------- .../sysml/hops/codegen/cplan/CNodeBinary.java | 4 +- .../sysml/hops/codegen/cplan/CNodeCell.java | 2 +- .../sysml/hops/codegen/cplan/CNodeMultiAgg.java | 2 +- .../hops/codegen/cplan/CNodeOuterProduct.java | 4 +- .../sysml/hops/codegen/cplan/CNodeRow.java | 8 +-- .../sysml/hops/codegen/cplan/CNodeTernary.java | 4 +- .../sysml/hops/codegen/cplan/CNodeUnary.java | 10 ++-- .../sysml/runtime/codegen/SpoofOperator.java | 59 +++++++++++++++++--- .../sysml/runtime/codegen/SpoofRowwise.java | 21 ++++--- 9 files changed, 81 insertions(+), 33 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/b3d09d56/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java index a101647..1ca4aa6 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeBinary.java @@ -287,13 +287,13 @@ public class CNodeBinary extends CNode tmp = tmp.replace("%IN"+(j+1)+"v%", varj+"vals"); tmp = tmp.replace("%IN"+(j+1)+"i%", varj+"ix"); tmp = tmp.replace("%IN"+(j+1)+"%", - varj.startsWith("b") ? varj + ".ddat" : varj ); + varj.startsWith("b") ? varj + ".values(rix)" : varj ); //replace start position of main input tmp = tmp.replace("%POS"+(j+1)+"%", (_inputs.get(j) instanceof CNodeData && _inputs.get(j).getDataType().isMatrix()) ? (!varj.startsWith("b")) ? varj+"i" : (TemplateUtils.isMatrix(_inputs.get(j)) && _type!=BinType.VECT_MATRIXMULT) ? - "rowIndex*"+((_type==BinType.VECT_OUTERMULT_ADD)?"%LEN"+(j+1)+"%":"%LEN%") : "0" : "0"); + varj + ".pos(rix)" : "0" : "0"); } //replace length information (e.g., after matrix mult) if( _type == BinType.VECT_OUTERMULT_ADD ) { http://git-wip-us.apache.org/repos/asf/systemml/blob/b3d09d56/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java index 25c422d..3b30124 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeCell.java @@ -41,7 +41,7 @@ public class CNodeCell extends CNodeTpl + " public %TMP%() {\n" + " super(CellType.%TYPE%, %SPARSE_SAFE%, %AGG_OP%);\n" + " }\n" - + " protected double genexec(double a, SideInput[] b, double[] scalars, int m, int n, int rowIndex, int colIndex) { \n" + + " protected double genexec(double a, SideInput[] b, double[] scalars, int m, int n, int rix, int cix) { \n" + "%BODY_dense%" + " return %OUT%;\n" + " }\n" http://git-wip-us.apache.org/repos/asf/systemml/blob/b3d09d56/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java index 8abf907..fd39c7c 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeMultiAgg.java @@ -44,7 +44,7 @@ public class CNodeMultiAgg extends CNodeTpl + " super(%SPARSE_SAFE%, %AGG_OP%);\n" + " }\n" + " protected void genexec(double a, SideInput[] b, double[] scalars, double[] c, " - + "int m, int n, int rowIndex, int colIndex) { \n" + + "int m, int n, int rix, int cix) { \n" + "%BODY_dense%" + " }\n" + "}\n"; http://git-wip-us.apache.org/repos/asf/systemml/blob/b3d09d56/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java index b06e9b9..e518246 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java @@ -39,10 +39,10 @@ public class CNodeOuterProduct extends CNodeTpl + " public %TMP%() {\n" + " super(OutProdType.%TYPE%);\n" + " }\n" - + " protected void genexecDense(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, int ci, int m, int n, int len, int rowIndex, int colIndex) { \n" + + " protected void genexecDense(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, int ci, int m, int n, int len, int rix, int cix) { \n" + "%BODY_dense%" + " }\n" - + " protected double genexecCellwise(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int m, int n, int len, int rowIndex, int colIndex) { \n" + + " protected double genexecCellwise(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int m, int n, int len, int rix, int cix) { \n" + "%BODY_cellwise%" + " return %OUT_cellwise%;\n" + " }\n" http://git-wip-us.apache.org/repos/asf/systemml/blob/b3d09d56/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java index 9235216..68739ad 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java @@ -41,17 +41,17 @@ public class CNodeRow extends CNodeTpl + " public %TMP%() {\n" + " super(RowType.%TYPE%, %CONST_DIM2%, %TB1%, %VECT_MEM%);\n" + " }\n" - + " protected void genexec(double[] a, int ai, SideInput[] b, double[] scalars, double[] c, int len, int rowIndex) { \n" + + " protected void genexec(double[] a, int ai, SideInput[] b, double[] scalars, double[] c, int len, int rix) { \n" + "%BODY_dense%" + " }\n" - + " protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b, double[] scalars, double[] c, int alen, int len, int rowIndex) { \n" + + " protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b, double[] scalars, double[] c, int alen, int len, int rix) { \n" + "%BODY_sparse%" + " }\n" + "}\n"; - private static final String TEMPLATE_ROWAGG_OUT = " c[rowIndex] = %IN%;\n"; + private static final String TEMPLATE_ROWAGG_OUT = " c[rix] = %IN%;\n"; private static final String TEMPLATE_FULLAGG_OUT = " c[0] += %IN%;\n"; - private static final String TEMPLATE_NOAGG_OUT = " LibSpoofPrimitives.vectWrite(%IN%, c, rowIndex*%LEN%, %LEN%);\n"; + private static final String TEMPLATE_NOAGG_OUT = " LibSpoofPrimitives.vectWrite(%IN%, c, rix*%LEN%, %LEN%);\n"; public CNodeRow(ArrayList<CNode> inputs, CNode output ) { super(inputs, output); http://git-wip-us.apache.org/repos/asf/systemml/blob/b3d09d56/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java index 6e76816..155cc8b 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeTernary.java @@ -56,10 +56,10 @@ public class CNodeTernary extends CNode case LOOKUP_RC1: return sparse ? " double %TMP% = getValue(%IN1v%, %IN1i%, ai, alen, %IN3%-1);\n" : - " double %TMP% = getValue(%IN1%, %IN2%, rowIndex, %IN3%-1);\n"; + " double %TMP% = getValue(%IN1%, %IN2%, rix, %IN3%-1);\n"; case LOOKUP_RVECT1: - return " double[] %TMP% = getVector(%IN1%, %IN2%, rowIndex, %IN3%-1);\n"; + return " double[] %TMP% = getVector(%IN1%, %IN2%, rix, %IN3%-1);\n"; default: throw new RuntimeException("Invalid ternary type: "+this.toString()); http://git-wip-us.apache.org/repos/asf/systemml/blob/b3d09d56/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java index 5e3ef83..b66423e 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java @@ -90,11 +90,11 @@ public class CNodeUnary extends CNode case LOOKUP_R: return sparse ? " double %TMP% = getValue(%IN1v%, %IN1i%, ai, alen, 0);\n" : - " double %TMP% = getValue(%IN1%, rowIndex);\n"; + " double %TMP% = getValue(%IN1%, rix);\n"; case LOOKUP_C: - return " double %TMP% = getValue(%IN1%, n, 0, colIndex);\n"; + return " double %TMP% = getValue(%IN1%, n, 0, cix);\n"; case LOOKUP_RC: - return " double %TMP% = getValue(%IN1%, n, rowIndex, colIndex);\n"; + return " double %TMP% = getValue(%IN1%, n, rix, cix);\n"; case LOOKUP0: return " double %TMP% = %IN1%[0];\n" ; case POW2: @@ -216,12 +216,12 @@ public class CNodeUnary extends CNode tmp = tmp.replace("%IN1v%", varj+"vals"); tmp = tmp.replace("%IN1i%", varj+"ix"); tmp = tmp.replace("%IN1%", varj.startsWith("b") && !_type.isScalarLookup() - && TemplateUtils.isMatrix(_inputs.get(0)) ? varj + ".ddat" : varj ); + && TemplateUtils.isMatrix(_inputs.get(0)) ? varj + ".values(rix)" : varj ); //replace start position of main input String spos = (_inputs.get(0) instanceof CNodeData && _inputs.get(0).getDataType().isMatrix()) ? !varj.startsWith("b") ? - varj+"i" : TemplateUtils.isMatrix(_inputs.get(0)) ? "rowIndex*%LEN%" : "0" : "0"; + varj+"i" : TemplateUtils.isMatrix(_inputs.get(0)) ? varj + ".pos(rix)" : "0" : "0"; tmp = tmp.replace("%POS1%", spos); tmp = tmp.replace("%POS2%", spos); http://git-wip-us.apache.org/repos/asf/systemml/blob/b3d09d56/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java index bec488b..a614ded 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java @@ -30,6 +30,7 @@ import org.apache.sysml.runtime.compress.CompressedMatrixBlock; import org.apache.sysml.runtime.instructions.cp.ScalarObject; import org.apache.sysml.runtime.matrix.data.LibMatrixReorg; import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.SparseBlock; import org.apache.sysml.runtime.util.DataConverter; import org.apache.sysml.runtime.util.UtilFunctions; @@ -119,6 +120,10 @@ public abstract class SpoofOperator implements Serializable } protected static SideInput[] createSparseSideInputs(SideInput[] input) { + return createSparseSideInputs(input, false); + } + + protected static SideInput[] createSparseSideInputs(SideInput[] input, boolean row) { //determine if there are sparse side inputs boolean containsSparse = false; for( int i=0; i<input.length; i++ ) { @@ -133,7 +138,8 @@ public abstract class SpoofOperator implements Serializable SideInput tmp = input[i]; ret[i] = (tmp.mdat != null && tmp.mdat.isInSparseFormat() && !tmp.mdat.isEmptyBlock(false) && tmp.clen > 1) ? - new SideInputSparse(tmp) : tmp; + (row ? new SideInputSparseRow(tmp) : + new SideInputSparseCell(tmp)) : tmp; } return ret; } @@ -213,7 +219,8 @@ public abstract class SpoofOperator implements Serializable protected static double getValue(SideInput data, int n, int rowIndex, int colIndex) { //note: wrapper sideinput guaranteed to exist return (data.ddat!=null) ? data.ddat[rowIndex*n+colIndex] : - (data instanceof SideInputSparse) ? ((SideInputSparse)data).next(rowIndex, colIndex) : + (data instanceof SideInputSparseCell) ? + ((SideInputSparseCell)data).next(rowIndex, colIndex) : (data.mdat!=null) ? data.mdat.quickGetValue(rowIndex, colIndex) : 0; } @@ -224,9 +231,8 @@ public abstract class SpoofOperator implements Serializable } protected static double[] getVector(SideInput data, int n, int rowIndex, int colIndex) { - //note: wrapper sideinput guaranteed to be in dense format double[] c = LibSpoofPrimitives.allocVector(colIndex+1, false); - System.arraycopy(data.ddat, rowIndex*n, c, 0, colIndex+1); + System.arraycopy(data.values(rowIndex), data.pos(rowIndex), c, 0, colIndex+1); return c; } @@ -234,22 +240,61 @@ public abstract class SpoofOperator implements Serializable public final double[] ddat; public final MatrixBlock mdat; public final int clen; - public SideInput(double[] ddata, MatrixBlock mdata, int clength) { ddat = ddata; mdat = mdata; clen = clength; } + public int pos(int r) { + return r * clen; + } + public double[] values(int r) { + return ddat; + } + } + + public static class SideInputSparseRow extends SideInput { + private final double[] values; + private int currRowIndex = -1; + + public SideInputSparseRow(SideInput in) { + super(in.ddat, in.mdat, in.clen); + values = new double[in.clen]; + } + @Override + public int pos(int r) { + return 0; + } + @Override + public double[] values(int r) { + if( r > currRowIndex ) + nextRow(r); + return values; + } + + private void nextRow(int r) { + currRowIndex = r; + Arrays.fill(values, 0); + SparseBlock sblock = mdat.getSparseBlock(); + if( sblock != null && !sblock.isEmpty(r) ) { + int apos = sblock.pos(r); + int alen = sblock.size(r); + int[] aix = sblock.indexes(r); + double[] avals = sblock.values(r); + for(int k=apos; k<apos+alen; k++) + values[aix[k]] = avals[k]; + } + } } - public static class SideInputSparse extends SideInput { + public static class SideInputSparseCell extends SideInput { private int currRowIndex = -1; private int currColPos = 0; private int currLen = 0; private int[] indexes; private double[] values; - public SideInputSparse(SideInput in) { + public SideInputSparseCell(SideInput in) { super(in.ddat, in.mdat, in.clen); } public double next(int rowIndex, int colIndex) { http://git-wip-us.apache.org/repos/asf/systemml/blob/b3d09d56/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java index 311c27f..679ac78 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java @@ -112,7 +112,7 @@ public abstract class SpoofRowwise extends SpoofOperator } @Override - public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) + public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) throws DMLRuntimeException { return execute(inputs, scalarObjects, out, true, false); @@ -138,7 +138,7 @@ public abstract class SpoofRowwise extends SpoofOperator && LibSpoofPrimitives.isFlipOuter(out.getNumRows(), out.getNumColumns()); //input preparation - SideInput[] b = prepInputMatrices(inputs, 1, inputs.size()-1, true, _tB1); + SideInput[] b = prepInputMatrices(inputs, 1, inputs.size()-1, false, _tB1); double[] scalars = prepInputScalars(scalarObjects); //setup thread-local memory if necessary @@ -168,7 +168,7 @@ public abstract class SpoofRowwise extends SpoofOperator } @Override - public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) + public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int k) throws DMLRuntimeException { //redirect to serial execution @@ -193,7 +193,7 @@ public abstract class SpoofRowwise extends SpoofOperator //input preparation MatrixBlock a = inputs.get(0); - SideInput[] b = prepInputMatrices(inputs, 1, inputs.size()-1, true, _tB1); + SideInput[] b = prepInputMatrices(inputs, 1, inputs.size()-1, false, _tB1); double[] scalars = prepInputScalars(scalarObjects); //core parallel execute @@ -287,14 +287,16 @@ public abstract class SpoofRowwise extends SpoofOperator if( a == null ) return; + SideInput[] lb = createSparseSideInputs(b, true); for( int i=rl, aix=rl*n; i<ru; i++, aix+=n ) { //call generated method - genexec( a, aix, b, scalars, c, n, i ); + genexec( a, aix, lb, scalars, c, n, i ); } } private void executeSparse(SparseBlock sblock, SideInput[] b, double[] scalars, double[] c, int n, int rl, int ru) { + SideInput[] lb = createSparseSideInputs(b, true); SparseRow empty = new SparseRowVector(1); for( int i=rl; i<ru; i++ ) { if( sblock!=null && !sblock.isEmpty(i) ) { @@ -304,11 +306,11 @@ public abstract class SpoofRowwise extends SpoofOperator int alen = sblock.size(i); //call generated method - genexec(avals, aix, apos, b, scalars, c, alen, n, i); + genexec(avals, aix, apos, lb, scalars, c, alen, n, i); } else - genexec(empty.values(), - empty.indexes(), 0, b, scalars, c, 0, n, i); + genexec(empty.values(), + empty.indexes(), 0, lb, scalars, c, 0, n, i); } } @@ -317,9 +319,10 @@ public abstract class SpoofRowwise extends SpoofOperator if( a.isEmptyBlock(false) ) return; + SideInput[] lb = createSparseSideInputs(b, true); Iterator<double[]> iter = a.getDenseRowIterator(rl, ru); for( int i=rl; iter.hasNext(); i++ ) { - genexec(iter.next(), 0, b, scalars, c, n, i); + genexec(iter.next(), 0, lb, scalars, c, n, i); } }
