[SYSTEMML-1971] New codegen vector primitive for counting nnz This patch adds a new codegen vector primitive for rowSums(X!=0), i.e. for counting the number of non-zeros, which avoids unnecessary dense row intermediates and is realized as a pure meta data operation for sparse row inputs.
After recent optimizer changes, we compiled a row template in ALS-CG for rowSums(X!=0), which showed severe performance issues on the amazon books dataset. This was because amazon is a ultra-sparse dataset with huge number of features (2330066), for which the dense row intermediates are more than 7 orders of magnitude larger than the sparse row input. This patch completely removed these performance issues. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/1191dbfe Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/1191dbfe Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/1191dbfe Branch: refs/heads/master Commit: 1191dbfe2a20d85bf79f0106312f37df210053cf Parents: c70cb11 Author: Matthias Boehm <mboe...@gmail.com> Authored: Sun Oct 22 19:53:52 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sun Oct 22 19:53:52 2017 -0700 ---------------------------------------------------------------------- .../org/apache/sysml/hops/codegen/cplan/CNodeUnary.java | 9 ++++++--- .../apache/sysml/hops/codegen/template/TemplateRow.java | 5 +++++ .../sysml/hops/codegen/template/TemplateUtils.java | 3 ++- .../org/apache/sysml/hops/rewrite/HopRewriteUtils.java | 6 ++++++ .../sysml/runtime/codegen/LibSpoofPrimitives.java | 12 ++++++++++++ 5 files changed, 31 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java index b66423e..3a3dc79 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java @@ -30,7 +30,7 @@ public class CNodeUnary extends CNode { public enum UnaryType { LOOKUP_R, LOOKUP_C, LOOKUP_RC, LOOKUP0, //codegen specific - ROW_SUMS, ROW_MINS, ROW_MAXS, //codegen specific + ROW_SUMS, ROW_MINS, ROW_MAXS, ROW_COUNTNNZS, //codegen specific VECT_EXP, VECT_POW2, VECT_MULT2, VECT_SQRT, VECT_LOG, VECT_ABS, VECT_ROUND, VECT_CEIL, VECT_FLOOR, VECT_SIGN, VECT_SIN, VECT_COS, VECT_TAN, VECT_ASIN, VECT_ACOS, VECT_ATAN, @@ -52,8 +52,9 @@ public class CNodeUnary extends CNode switch( this ) { case ROW_SUMS: case ROW_MINS: - case ROW_MAXS: { - String vectName = StringUtils.capitalize(this.toString().substring(4,7).toLowerCase()); + case ROW_MAXS: + case ROW_COUNTNNZS: { + String vectName = StringUtils.capitalize(name().substring(4, name().length()-1).toLowerCase()); return sparse ? " double %TMP% = LibSpoofPrimitives.vect"+vectName+"(%IN1v%, %IN1i%, %POS1%, alen, len);\n": " double %TMP% = LibSpoofPrimitives.vect"+vectName+"(%IN1%, %POS1%, %LEN%);\n"; } @@ -244,6 +245,7 @@ public class CNodeUnary extends CNode case ROW_SUMS: return "u(R+)"; case ROW_MINS: return "u(Rmin)"; case ROW_MAXS: return "u(Rmax)"; + case ROW_COUNTNNZS: return "u(Rnnz)"; case VECT_EXP: case VECT_POW2: case VECT_MULT2: @@ -308,6 +310,7 @@ public class CNodeUnary extends CNode case ROW_SUMS: case ROW_MINS: case ROW_MAXS: + case ROW_COUNTNNZS: case EXP: case LOOKUP_R: case LOOKUP_C: http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java index e664b9f..9da04dc 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java @@ -240,6 +240,11 @@ public class TemplateRow extends TemplateBase if( ((AggUnaryOp)hop).getDirection() == Direction.Row && HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_ROW_AGG) ) { if(hop.getInput().get(0).getDim2()==1) out = (cdata1.getDataType()==DataType.SCALAR) ? cdata1 : new CNodeUnary(cdata1,UnaryType.LOOKUP_R); + else if( HopRewriteUtils.isAggUnaryOp(hop, AggOp.SUM) + && HopRewriteUtils.isBinaryMatrixScalar(hop.getInput().get(0), OpOp2.NOTEQUAL, 0) + && cdata1 instanceof CNodeBinary ) { + out = new CNodeUnary(cdata1.getInput().get(0), UnaryType.ROW_COUNTNNZS); + } else { String opcode = "ROW_"+((AggUnaryOp)hop).getOp().name().toUpperCase()+"S"; out = new CNodeUnary(cdata1, UnaryType.valueOf(opcode)); http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java index 96e15cb..e07c410 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java +++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java @@ -312,7 +312,8 @@ public class TemplateUtils public static boolean hasSingleOperation(CNodeTpl tpl) { CNode output = tpl.getOutput(); return ((output instanceof CNodeUnary - && !TemplateUtils.isUnary(output, UnaryType.EXP, UnaryType.LOG)) + && !TemplateUtils.isUnary(output, + UnaryType.EXP, UnaryType.LOG, UnaryType.ROW_COUNTNNZS)) || (output instanceof CNodeBinary && !TemplateUtils.isBinary(output, BinType.VECT_OUTERMULT_ADD))) && hasOnlyDataNodeOrLookupInputs(output); http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java index 7bbfa52..68068eb 100644 --- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java +++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java @@ -893,6 +893,12 @@ public class HopRewriteUtils return ret; } + public static boolean isBinaryMatrixScalar(Hop hop, OpOp2 type, double val) { + return isBinary(hop, type) + && (isLiteralOfValue(hop.getInput().get(0), val) + || isLiteralOfValue(hop.getInput().get(1), val)); + } + public static boolean containsInput(Hop current, Hop probe) { return rContainsInput(current, probe, new HashSet<Long>()); } http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java index 91fde5e..356c729 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java @@ -281,6 +281,18 @@ public class LibSpoofPrimitives return (alen<len) ? Math.max(val, 0) : val; } + public static double vectCountnnz(double[] a, int ai, int len) { + int count = 0; + for( int i = ai; i < ai+len; i++ ) + count += (a[i] != 0) ? 1 : 0; + return count; + } + + public static double vectCountnnz(double[] avals, int[] aix, int ai, int alen, int len) { + //pure meta data operation + return alen; + } + //custom vector div public static void vectDivAdd(double[] a, double bval, double[] c, int ai, int ci, int len) {