Repository: systemml Updated Branches: refs/heads/master b420baa2e -> 48bfc9e30
[MINOR] Reduce code duplication DNN maxpool/avgpool, performance avgpool This patch removes unnecessarily duplicated code paths for max and avg pooling. Furthermore, this also includes a minor performance improvement for avg pooling by avoiding unnecessary multiply operations. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/3705e78f Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/3705e78f Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/3705e78f Branch: refs/heads/master Commit: 3705e78fbc56356d8762333159a3e00ef51c3d1a Parents: b420baa Author: Matthias Boehm <[email protected]> Authored: Sat Jun 16 18:25:59 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sat Jun 16 18:25:59 2018 -0700 ---------------------------------------------------------------------- .../matrix/data/LibMatrixDNNPooling.java | 75 +++++++------------- 1 file changed, 26 insertions(+), 49 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/3705e78f/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java index 29f949e..4d92377 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.concurrent.Callable; import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.runtime.codegen.LibSpoofPrimitives; import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.PoolingType; import org.apache.sysml.runtime.matrix.data.LibMatrixDNNHelper.CellIndex3; @@ -122,6 +123,7 @@ public class LibMatrixDNNPooling { double[] out = _params.output.getDenseBlockValues(); double minValForMaxPoolOperations = _poolingType == PoolingType.AVG ? 0 : _params.minValForMaxPoolOperations; + boolean max = (_poolingType == PoolingType.MAX); //thread-local initialization of output block if( !(_params.isStride1Pad0() && _params.isAllOnes(P, Q, W)) ) @@ -131,56 +133,34 @@ public class LibMatrixDNNPooling { //quick-path w/o materialized index arrays and //simplified inner loops for P = 1, Q = 1, W = 1 int lenh = Math.min(R,H); - if(_poolingType == PoolingType.AVG) { - for(int i = _rl, oix=_rl*C; i < _ru; i++, oix+=C) - for (int c = 0, off=i*CHW; c < C; c++, off+=H) - out[oix+c] = avg_pool(minValForMaxPoolOperations, in, off, lenh, _poolingMultiplier); - } - else { - for(int i = _rl, oix=_rl*C; i < _ru; i++, oix+=C) - for (int c = 0, off=i*CHW; c < C; c++, off+=H) - out[oix+c] = max(minValForMaxPoolOperations, in, off, lenh); - } + for(int i = _rl, oix=_rl*C; i < _ru; i++, oix+=C) + for (int c = 0, off=i*CHW; c < C; c++, off+=H) { + out[oix+c] = max ? max(minValForMaxPoolOperations, in, off, lenh) : + avg(minValForMaxPoolOperations, in, off, lenh, _poolingMultiplier); + } } else if( _params.isStride1Pad0() ) { - if(_poolingType == PoolingType.AVG) { - //quick-path w/o materialized index arrays - for(int i = _rl; i < _ru; i++) - for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) - for (int p = 0; p < P; p++, oix+=Q) - for (int h = p; h < Math.min(p+R,H); h++) - for (int q = 0, off2=off+h*W; q < Q; q++) - out[oix+q] = avg_pool(out[oix+q], in, off2+q, Math.min(S,W-q), _poolingMultiplier); - } - else { - //quick-path w/o materialized index arrays - for(int i = _rl; i < _ru; i++) - for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) - for (int p = 0; p < P; p++, oix+=Q) - for (int h = p; h < Math.min(p+R,H); h++) - for (int q = 0, off2=off+h*W; q < Q; q++) - out[oix+q] = max(out[oix+q], in, off2+q, Math.min(S,W-q)); - } + //quick-path w/o materialized index arrays + for(int i = _rl; i < _ru; i++) + for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) + for (int p = 0; p < P; p++, oix+=Q) + for (int h = p; h < Math.min(p+R,H); h++) + for (int q = 0, off2=off+h*W; q < Q; q++) { + out[oix+q] = max ? max(out[oix+q], in, off2+q, Math.min(S,W-q)) : + avg(out[oix+q], in, off2+q, Math.min(S,W-q), _poolingMultiplier); + } } else { //general case int[] hl = _params.start_indexes_h, hu = _params.end_indexes_h; int[] wl = _params.start_indexes_w, wu = _params.end_indexes_w; - if(_poolingType == PoolingType.AVG) { - for(int i = _rl; i < _ru; i++) - for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) - for (int p = 0; p < P; p++, oix+=Q) - for (int h = hl[p]; h < hu[p]; h++) - for (int q = 0, off2=off+h*W; q < Q; q++) - out[oix+q] = avg_pool(out[oix+q], in, off2+wl[q], wu[q]-wl[q], _poolingMultiplier); - } - else { - for(int i = _rl; i < _ru; i++) - for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) - for (int p = 0; p < P; p++, oix+=Q) - for (int h = hl[p]; h < hu[p]; h++) - for (int q = 0, off2=off+h*W; q < Q; q++) - out[oix+q] = max(out[oix+q], in, off2+wl[q], wu[q]-wl[q]); - } + for(int i = _rl; i < _ru; i++) + for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) + for (int p = 0; p < P; p++, oix+=Q) + for (int h = hl[p]; h < hu[p]; h++) + for (int q = 0, off2=off+h*W; q < Q; q++) { + out[oix+q] = max ? max(out[oix+q], in, off2+wl[q], wu[q]-wl[q]) : + avg(out[oix+q], in, off2+wl[q], wu[q]-wl[q], _poolingMultiplier); + } } //thread-local recomputation of non-zeros @@ -655,11 +635,8 @@ public class LibMatrixDNNPooling { } } - private static double avg_pool(final double aval, double[] b, final int bi, final int len, final double poolingMultiplier) { - double ret = aval; - for( int i = bi; i < bi+len; i++ ) - ret += poolingMultiplier*b[i]; - return ret; + private static double avg(final double aval, double[] b, final int bi, final int len, final double poolingMultiplier) { + return LibSpoofPrimitives.vectSum(b, bi, len) * poolingMultiplier + aval; } private static double max(final double aval, double[] b, final int bi, final int len) {
