Repository: systemml Updated Branches: refs/heads/master 0325da7de -> 4d5a82ecf
[SYSTEMML-540] Avoid unnecessary sparse-to-dense conversion on depthwise convolution layers and probability matrix ``` Network: unet Setup: 30g driver, K80 GPU (only 1 GPU used) Performance before this commit: Total elapsed time: 306.291 sec. 1 leftIndex [106:4-106:42] 139.392 28552 csrlix[114.674s,22649], aqrs[0.021s,23165], rls[0.069s,57104], s2d[8.691s,258], aqrd[14.199s,33939], lixcp[1.351s,5903] Performance after this commit: Total elapsed time: 220.712 sec. 6 leftIndex [106:4-106:42] 21.066 28552 rls[0.036s,57104], lixcp[5.375s,28552], aqrd[15.423s,57104] ``` Closes #610. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/4d5a82ec Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/4d5a82ec Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/4d5a82ec Branch: refs/heads/master Commit: 4d5a82ecf18c57f3cf614364fdfabaf0680a51fe Parents: 0325da7 Author: Niketan Pansare <[email protected]> Authored: Tue Aug 22 10:23:20 2017 -0700 Committer: Niketan Pansare <[email protected]> Committed: Tue Aug 22 10:24:59 2017 -0700 ---------------------------------------------------------------------- scripts/nn/layers/conv2d_depthwise.dml | 12 ++++++++---- scripts/nn/layers/conv2d_transpose_depthwise.dml | 12 ++++++++---- src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala | 2 +- 3 files changed, 17 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/4d5a82ec/scripts/nn/layers/conv2d_depthwise.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/conv2d_depthwise.dml b/scripts/nn/layers/conv2d_depthwise.dml index ff36ea1..4e4d3e4 100644 --- a/scripts/nn/layers/conv2d_depthwise.dml +++ b/scripts/nn/layers/conv2d_depthwise.dml @@ -79,7 +79,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, Wout = as.integer(floor((Win + 2*padw - Wf)/stridew + 1)) # create output volume - out = matrix(0, rows=N, cols=C*M*Hout*Wout) + # NOTE: We initialize to 1s vs. 0s to avoid conversions between sparse and dense formats. + # This is a complete hack until the engine is improved. + out = matrix(1, rows=N, cols=C*M*Hout*Wout) # depthwise convolution # TODO: Explore usage of parfor loops more to determine if they can provide a performance @@ -137,9 +139,11 @@ backward = function(matrix[double] dout, int Hout, int Wout, C = nrow(W) # create gradient volumes - dX = matrix(0, rows=N, cols=C*Hin*Win) - dW = matrix(0, rows=C, cols=M*Hf*Wf) - db = matrix(0, rows=C*M, cols=1) + # NOTE: We initialize to 1s vs. 0s to avoid conversions between sparse and dense formats. + # This is a complete hack until the engine is improved. + dX = matrix(1, rows=N, cols=C*Hin*Win) + dW = matrix(1, rows=C, cols=M*Hf*Wf) + db = matrix(1, rows=C*M, cols=1) # partial derivatives for depthwise convolution for (c in 1:C) { # all examples http://git-wip-us.apache.org/repos/asf/systemml/blob/4d5a82ec/scripts/nn/layers/conv2d_transpose_depthwise.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/conv2d_transpose_depthwise.dml b/scripts/nn/layers/conv2d_transpose_depthwise.dml index b3e798c..0a9a235 100644 --- a/scripts/nn/layers/conv2d_transpose_depthwise.dml +++ b/scripts/nn/layers/conv2d_transpose_depthwise.dml @@ -85,7 +85,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, Wout = stridew*(Win-1) - 2*padw + Wf + out_padw # create output volume - out = matrix(0, rows=N, cols=C/M*Hout*Wout) + # NOTE: We initialize to 1s vs. 0s to avoid conversions between sparse and dense formats. + # This is a complete hack until the engine is improved. + out = matrix(1, rows=N, cols=C/M*Hout*Wout) # depthwise transpose convolution # TODO: Explore usage of parfor loops more to determine if they can provide a performance @@ -146,9 +148,11 @@ backward = function(matrix[double] dout, int Hout, int Wout, F = nrow(W) # create gradient volumes - dX = matrix(0, rows=N, cols=C*Hin*Win) - dW = matrix(0, rows=C/M, cols=M*Hf*Wf) - db = matrix(0, rows=C/M, cols=1) + # NOTE: We initialize to 1s vs. 0s to avoid conversions between sparse and dense formats. + # This is a complete hack until the engine is improved. + dX = matrix(1, rows=N, cols=C*Hin*Win) + dW = matrix(1, rows=C/M, cols=M*Hf*Wf) + db = matrix(1, rows=C/M, cols=1) # depthwise transpose convolution for (f in 1:F) { http://git-wip-us.apache.org/repos/asf/systemml/blob/4d5a82ec/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala ---------------------------------------------------------------------- diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala index 25d19f6..000fe32 100644 --- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala +++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala @@ -613,7 +613,7 @@ class Caffe2DMLModel(val numClasses:String, val sc: SparkContext, val solver:Caf val lossLayers = getLossLayers(net) val lastLayerShape = estimator.getOutputShapeOfLastLayer - assign(tabDMLScript, "Prob", matrix("0", Caffe2DML.numImages, (lastLayerShape._1*lastLayerShape._2*lastLayerShape._3).toString)) + assign(tabDMLScript, "Prob", matrix("1", Caffe2DML.numImages, (lastLayerShape._1*lastLayerShape._2*lastLayerShape._3).toString)) estimator.getTestAlgo.toLowerCase match { case "minibatch" => { ceilDivide(tabDMLScript(), "num_iters", Caffe2DML.numImages, Caffe2DML.batchSize)
