[SYSTEMML-1971] New codegen vector primitive for counting nnz

This patch adds a new codegen vector primitive for rowSums(X!=0), i.e.
for counting the number of non-zeros, which avoids unnecessary dense row
intermediates and is realized as a pure meta data operation for sparse
row inputs. 

After recent optimizer changes, we compiled a row template in ALS-CG for
rowSums(X!=0), which showed severe performance issues on the amazon
books dataset. This was because amazon is a ultra-sparse dataset with
huge number of features (2330066), for which the dense row intermediates
are more than 7 orders of magnitude larger than the sparse row input.
This patch completely removed these performance issues.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/1191dbfe
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/1191dbfe
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/1191dbfe

Branch: refs/heads/master
Commit: 1191dbfe2a20d85bf79f0106312f37df210053cf
Parents: c70cb11
Author: Matthias Boehm <mboe...@gmail.com>
Authored: Sun Oct 22 19:53:52 2017 -0700
Committer: Matthias Boehm <mboe...@gmail.com>
Committed: Sun Oct 22 19:53:52 2017 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/hops/codegen/cplan/CNodeUnary.java |  9 ++++++---
 .../apache/sysml/hops/codegen/template/TemplateRow.java |  5 +++++
 .../sysml/hops/codegen/template/TemplateUtils.java      |  3 ++-
 .../org/apache/sysml/hops/rewrite/HopRewriteUtils.java  |  6 ++++++
 .../sysml/runtime/codegen/LibSpoofPrimitives.java       | 12 ++++++++++++
 5 files changed, 31 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java 
b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
index b66423e..3a3dc79 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
@@ -30,7 +30,7 @@ public class CNodeUnary extends CNode
 {
        public enum UnaryType {
                LOOKUP_R, LOOKUP_C, LOOKUP_RC, LOOKUP0, //codegen specific
-               ROW_SUMS, ROW_MINS, ROW_MAXS, //codegen specific
+               ROW_SUMS, ROW_MINS, ROW_MAXS, ROW_COUNTNNZS, //codegen specific
                VECT_EXP, VECT_POW2, VECT_MULT2, VECT_SQRT, VECT_LOG,
                VECT_ABS, VECT_ROUND, VECT_CEIL, VECT_FLOOR, VECT_SIGN, 
                VECT_SIN, VECT_COS, VECT_TAN, VECT_ASIN, VECT_ACOS, VECT_ATAN, 
@@ -52,8 +52,9 @@ public class CNodeUnary extends CNode
                        switch( this ) {
                                case ROW_SUMS:
                                case ROW_MINS:
-                               case ROW_MAXS: {
-                                       String vectName = 
StringUtils.capitalize(this.toString().substring(4,7).toLowerCase());
+                               case ROW_MAXS:
+                               case ROW_COUNTNNZS: {
+                                       String vectName = 
StringUtils.capitalize(name().substring(4, name().length()-1).toLowerCase());
                                        return sparse ? "    double %TMP% = 
LibSpoofPrimitives.vect"+vectName+"(%IN1v%, %IN1i%, %POS1%, alen, len);\n": 
                                                                        "    
double %TMP% = LibSpoofPrimitives.vect"+vectName+"(%IN1%, %POS1%, %LEN%);\n"; 
                                }
@@ -244,6 +245,7 @@ public class CNodeUnary extends CNode
                        case ROW_SUMS:  return "u(R+)";
                        case ROW_MINS:  return "u(Rmin)";
                        case ROW_MAXS:  return "u(Rmax)";
+                       case ROW_COUNTNNZS: return "u(Rnnz)";
                        case VECT_EXP:
                        case VECT_POW2:
                        case VECT_MULT2:
@@ -308,6 +310,7 @@ public class CNodeUnary extends CNode
                        case ROW_SUMS:
                        case ROW_MINS:
                        case ROW_MAXS:
+                       case ROW_COUNTNNZS:
                        case EXP:
                        case LOOKUP_R:
                        case LOOKUP_C:

http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index e664b9f..9da04dc 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -240,6 +240,11 @@ public class TemplateRow extends TemplateBase
                        if( ((AggUnaryOp)hop).getDirection() == Direction.Row 
&& HopRewriteUtils.isAggUnaryOp(hop, SUPPORTED_ROW_AGG) ) {
                                if(hop.getInput().get(0).getDim2()==1)
                                        out = 
(cdata1.getDataType()==DataType.SCALAR) ? cdata1 : new 
CNodeUnary(cdata1,UnaryType.LOOKUP_R);
+                               else if( HopRewriteUtils.isAggUnaryOp(hop, 
AggOp.SUM) 
+                                       && 
HopRewriteUtils.isBinaryMatrixScalar(hop.getInput().get(0), OpOp2.NOTEQUAL, 0)
+                                       && cdata1 instanceof CNodeBinary ) {
+                                       out = new 
CNodeUnary(cdata1.getInput().get(0), UnaryType.ROW_COUNTNNZS);
+                               }
                                else {
                                        String opcode = 
"ROW_"+((AggUnaryOp)hop).getOp().name().toUpperCase()+"S";
                                        out = new CNodeUnary(cdata1, 
UnaryType.valueOf(opcode));

http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java 
b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
index 96e15cb..e07c410 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateUtils.java
@@ -312,7 +312,8 @@ public class TemplateUtils
        public static boolean hasSingleOperation(CNodeTpl tpl) {
                CNode output = tpl.getOutput();
                return ((output instanceof CNodeUnary 
-                               && !TemplateUtils.isUnary(output, 
UnaryType.EXP, UnaryType.LOG)) 
+                               && !TemplateUtils.isUnary(output, 
+                                       UnaryType.EXP, UnaryType.LOG, 
UnaryType.ROW_COUNTNNZS)) 
                        || (output instanceof CNodeBinary
                                && !TemplateUtils.isBinary(output, 
BinType.VECT_OUTERMULT_ADD))) 
                        && hasOnlyDataNodeOrLookupInputs(output);

http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java 
b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index 7bbfa52..68068eb 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -893,6 +893,12 @@ public class HopRewriteUtils
                return ret;
        }
        
+       public static boolean isBinaryMatrixScalar(Hop hop, OpOp2 type, double 
val) {
+               return isBinary(hop, type)
+                       && (isLiteralOfValue(hop.getInput().get(0), val)
+                       || isLiteralOfValue(hop.getInput().get(1), val));
+       }
+       
        public static boolean containsInput(Hop current, Hop probe) {
                return rContainsInput(current, probe, new HashSet<Long>());     
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/1191dbfe/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java 
b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
index 91fde5e..356c729 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
@@ -281,6 +281,18 @@ public class LibSpoofPrimitives
                return (alen<len) ? Math.max(val, 0) : val;
        }
        
+       public static double vectCountnnz(double[] a, int ai, int len) { 
+               int count = 0;
+               for( int i = ai; i < ai+len; i++ )
+                       count += (a[i] != 0) ? 1 : 0;
+               return count;
+       } 
+       
+       public static double vectCountnnz(double[] avals, int[] aix, int ai, 
int alen, int len) {
+               //pure meta data operation
+               return alen;
+       }
+       
        //custom vector div
        
        public static void vectDivAdd(double[] a, double bval, double[] c, int 
ai, int ci, int len) {

Reply via email to