Repository: incubator-systemml
Updated Branches:
  refs/heads/master 2ebf885a6 -> ab45af17c


[SYSTEMML-769] Minor improvement for dense-dense conv2d and added
statistics method for performance debugging

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ab45af17
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ab45af17
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ab45af17

Branch: refs/heads/master
Commit: ab45af17c3ff54a77262a318c5d0be084384b8f7
Parents: 2ebf885
Author: Niketan Pansare <[email protected]>
Authored: Sat Jul 9 09:33:52 2016 -0700
Committer: Niketan Pansare <[email protected]>
Committed: Sat Jul 9 09:33:52 2016 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/LibMatrixDNN.java | 144 ++++++++++++-------
 1 file changed, 94 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ab45af17/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 26e2b8b..3014b49 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -32,11 +32,11 @@ import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.util.ConvolutionUtils;
 
-
 public class LibMatrixDNN {
        
        protected static final Log LOG =  
LogFactory.getLog(LibMatrixDNN.class.getName());
@@ -77,6 +77,44 @@ public class LibMatrixDNN {
                int maxCommonIndexS;
        }
        
+       private static AtomicLong conv2dSparseCount = new AtomicLong(0);
+       private static AtomicLong conv2dDenseCount = new AtomicLong(0);
+       private static AtomicLong conv2dBwdFilterSparseCount = new 
AtomicLong(0);
+       private static AtomicLong conv2dBwdFilterDenseCount = new AtomicLong(0);
+       private static AtomicLong conv2dBwdDataSparseCount = new AtomicLong(0);
+       private static AtomicLong conv2dBwdDataDenseCount = new AtomicLong(0);
+       private static AtomicLong im2colSparseCount = new AtomicLong(0);
+       private static AtomicLong im2colDenseCount = new AtomicLong(0);
+       private static AtomicLong maxPoolBwdSparseCount = new AtomicLong(0);
+       private static AtomicLong maxPoolBwdDenseCount = new AtomicLong(0);
+       public static void appendStatistics(StringBuilder sb) {
+               sb.append("LibMatrixDNN dense count 
(conv/bwdF/bwdD/im2col/maxBwd):\t" 
+                               + conv2dDenseCount.get() + "/"
+                               + conv2dBwdFilterDenseCount.get() + "/"
+                               + conv2dBwdDataDenseCount.get() + "/"
+                               + im2colDenseCount.get() + "/"
+                               + maxPoolBwdDenseCount.get() + ".\n");
+               sb.append("LibMatrixDNN sparse count 
(conv/bwdF/bwdD/im2col/maxBwd):\t" 
+                               + conv2dSparseCount.get() + "/"
+                               + conv2dBwdFilterSparseCount.get() + "/"
+                               + conv2dBwdDataSparseCount.get() + "/"
+                               + im2colSparseCount.get() + "/"
+                               + maxPoolBwdSparseCount.get() + ".\n");
+       }
+       public static void resetStatistics() {
+               conv2dDenseCount.set(0);
+               conv2dBwdFilterDenseCount.set(0);
+               conv2dBwdDataDenseCount.set(0);
+               im2colDenseCount.set(0);
+               maxPoolBwdDenseCount.set(0);
+               
+               conv2dSparseCount.set(0);
+               conv2dBwdFilterSparseCount.set(0);
+               conv2dBwdDataSparseCount.set(0);
+               im2colSparseCount.set(0);
+               maxPoolBwdSparseCount.set(0);
+       }
+       
        public static class ConvolutionParameters {
                public int N; public int C; public int H; public int W;
                public int K; public int R; public int S; public int stride_h; 
public int stride_w; public int pad_h; public int pad_w;
@@ -169,6 +207,15 @@ public class LibMatrixDNN {
                        throw new DMLRuntimeException("Only positive strides 
supported");
                }
                
+               if(DMLScript.STATISTICS) {
+                       if(input.isInSparseFormat() || dout.isInSparseFormat()) 
{
+                               conv2dBwdFilterSparseCount.addAndGet(1);
+                       }
+                       else {
+                               conv2dBwdFilterDenseCount.addAndGet(1);
+                       }
+               }
+               
                int constrainedNumThreads = 
OptimizerUtils.getConstrainedNumThreads(params.numThreads);
                if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
                        for (int c = 0; c < params.C; c++) {
@@ -366,6 +413,15 @@ public class LibMatrixDNN {
                        throw new DMLRuntimeException("Incorrect input to 
conv2d");
                }
                
+               if(DMLScript.STATISTICS) {
+                       if(input.isInSparseFormat() || 
filter.isInSparseFormat()) {
+                               conv2dSparseCount.addAndGet(1);
+                       }
+                       else {
+                               conv2dDenseCount.addAndGet(1);
+                       }
+               }
+               
                params.tmpData = new TemporaryConvolutionData();
                if(input.isInSparseFormat()) {
                        params.tmpData.minIndexArrR = new int[params.H];
@@ -433,6 +489,15 @@ public class LibMatrixDNN {
                if(dout.getNumColumns() != params.C*params.P*params.Q || 
dout.getNumRows() != params.N) {
                        throw new DMLRuntimeException("Incorrect dout 
dimensions in maxpooling_backward:" + input.getNumRows() + " " + 
input.getNumColumns() + " " + params.N + " " + params.K*params.P*params.Q);
                }
+               
+               if(DMLScript.STATISTICS) {
+                       if(input.isInSparseFormat() || dout.isInSparseFormat()) 
{
+                               maxPoolBwdSparseCount.addAndGet(1);
+                       }
+                       else {
+                               maxPoolBwdDenseCount.addAndGet(1);
+                       }
+               }
 
                int constrainedNumThreads = 
OptimizerUtils.getConstrainedNumThreads(params.numThreads);
                if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
@@ -455,41 +520,10 @@ public class LibMatrixDNN {
                int [] minIndexArrS = params.tmpData.minIndexArrS;
                int [] maxIndexArrS = params.tmpData.maxIndexArrS;
                
-               int minCommonIndexS = params.tmpData.minCommonIndexS;
-               int maxCommonIndexS = params.tmpData.maxCommonIndexS;
-               
+               final int minCommonIndexS = params.tmpData.minCommonIndexS;
+               final int maxCommonIndexS = params.tmpData.maxCommonIndexS;
                
-               int minS = 0;
-               if(params.S >= 4) {
-                       minS = params.S - params.S % 4;
-                       for (int n = n1; n < n2; n++) {
-                               for (int c = 0; c < params.C; c++) {
-                                       for (int r = 0; r < params.R; r++) {
-                                               final int filterOffset = 
k*params.C*params.R*params.S + c*params.R*params.S + r*params.S;
-                                               for (int p = minIndexArrR[r]; p 
< maxIndexArrR[r]; p++) {
-                                                       final int h = 
p*params.stride_h + r - params.pad_h;
-                                                       final int inputOffSet = 
n*params.C*params.H*params.W + c*params.H*params.W + h*params.W - params.pad_w;
-                                                       final int outputOffset 
= n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q;
-                                                       // 
------------------------------------------------------------------------
-                                                       // Efficient striding 
with vectorization
-                                                       for (int q = 
minCommonIndexS; q < maxCommonIndexS; q++) {
-                                                               final int 
wOffset = inputOffSet + q*params.stride_w;
-                                                               final int 
outOffsetWithQ = outputOffset + q;
-                                                               for (int s = 0; 
s < minS; s += 4) {
-                                                                       final 
int inOffsetWithS = wOffset + s;
-                                                                       final 
int filterOffsetWithS = filterOffset + s;
-                                                                       
outputArray[outOffsetWithQ] += 
inputArray[inOffsetWithS]*filterArray[filterOffsetWithS]
-                                                                               
        + inputArray[inOffsetWithS+1]*filterArray[filterOffsetWithS+1]
-                                                                               
        + inputArray[inOffsetWithS+2]*filterArray[filterOffsetWithS+2]
-                                                                               
        + inputArray[inOffsetWithS+3]*filterArray[filterOffsetWithS+3];
-                                                               }
-                                                       }
-                                                       // 
------------------------------------------------------------------------
-                                               }
-                                       }
-                               }
-                       }
-               }
+               final int minS = (params.S >= 4) ? (params.S - params.S % 4) : 
0;
                
                for (int n = n1; n < n2; n++) {
                        for (int c = 0; c < params.C; c++) {
@@ -499,28 +533,28 @@ public class LibMatrixDNN {
                                                final int h = p*params.stride_h 
+ r - params.pad_h;
                                                final int inputOffSet = 
n*params.C*params.H*params.W + c*params.H*params.W + h*params.W - params.pad_w;
                                                final int outputOffset = 
n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q;
-                                               // 
------------------------------------------------------------------------
-                                               // Efficient striding
+                                               
                                                for (int q = minCommonIndexS; q 
< maxCommonIndexS; q++) {
                                                        final int wOffset = 
inputOffSet + q*params.stride_w;
+                                                       // 
------------------------------------------------------------------------
+                                                       // Efficient striding 
with vectorization
+                                                       final int 
outOffsetWithQ = outputOffset + q;
+                                                       for (int s = 0; s < 
minS; s += 4) {
+                                                               final int 
inOffsetWithS = wOffset + s;
+                                                               final int 
filterOffsetWithS = filterOffset + s;
+                                                               
outputArray[outOffsetWithQ] += 
inputArray[inOffsetWithS]*filterArray[filterOffsetWithS]
+                                                                               
+ inputArray[inOffsetWithS+1]*filterArray[filterOffsetWithS+1]
+                                                                               
+ inputArray[inOffsetWithS+2]*filterArray[filterOffsetWithS+2]
+                                                                               
+ inputArray[inOffsetWithS+3]*filterArray[filterOffsetWithS+3];
+                                                       }
+                                                       // 
------------------------------------------------------------------------
+                                                       // Efficient striding 
without vectorization
                                                        for (int s = minS; s < 
params.S; s++) {
                                                                
outputArray[outputOffset + q] += inputArray[wOffset + 
s]*filterArray[filterOffset + s];
                                                        }
+                                                       // 
------------------------------------------------------------------------
                                                }
                                                // 
------------------------------------------------------------------------
-                                       }
-                               }
-                       }
-                       
-                       
-                       for (int c = 0; c < params.C; c++) {
-                               for (int r = 0; r < params.R; r++) {
-                                       final int filterOffset = 
k*params.C*params.R*params.S + c*params.R*params.S + r*params.S;
-                                       for (int p = minIndexArrR[r]; p < 
maxIndexArrR[r]; p++) {
-                                               final int h = p*params.stride_h 
+ r - params.pad_h;
-                                               final int inputOffSet = 
n*params.C*params.H*params.W + c*params.H*params.W + h*params.W - params.pad_w;
-                                               final int outputOffset = 
n*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q;
-                                               // 
------------------------------------------------------------------------
                                                // Inefficient striding
                                                for (int s = 0; s < params.S; 
s++) {
                                                        for (int q = 
minIndexArrS[s]; q < minCommonIndexS; q++) {
@@ -1032,6 +1066,16 @@ public class LibMatrixDNN {
                params.output = outputBlock;
                
                params.outputNNZ.set(0);
+               
+               if(DMLScript.STATISTICS) {
+                       if(input.isInSparseFormat()) {
+                               im2colSparseCount.addAndGet(1);
+                       }
+                       else {
+                               im2colDenseCount.addAndGet(1);
+                       }
+               }
+               
                int constrainedNumThreads = 
OptimizerUtils.getConstrainedNumThreads(params.numThreads);
                if(!ALLOW_MULTI_THREADED_OPS || constrainedNumThreads <= 1) {
                        for (int n = 0; n < params.N; n++) { // Do following 
for all images

Reply via email to