Repository: systemml Updated Branches: refs/heads/master 94f1b72ef -> aa537dad4
[SYSTEMML-2133] Performance cp maxpooling operations (load balance) This patch makes a minor performance improvement to maxpooling and maxpooling_backward operations. So far we used static task partitioning in the number of threads which led to suboptimal load balance. Since these maxpooling operations do not create intermediates, we now use twice as many tasks as threads. On an end-to-end cnn scoring application, this improved the maxpooling performance by 25%. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/54570660 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/54570660 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/54570660 Branch: refs/heads/master Commit: 54570660d6e5beddc88d9b6fa44de8f08e855dc8 Parents: 94f1b72 Author: Matthias Boehm <[email protected]> Authored: Tue Feb 6 11:43:26 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Tue Feb 6 11:44:47 2018 -0800 ---------------------------------------------------------------------- .../apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/54570660/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java index 067e9ef..906ef90 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java @@ -45,8 +45,9 @@ public class LibMatrixDNNPooling { */ public static ArrayList<Callable<Long>> getMaxPoolingWorkers(ConvolutionParameters params) throws DMLRuntimeException { ArrayList<Callable<Long>> ret = new ArrayList<>(); + // Try to create twice as many tasks as threads for improved load balance int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads); - int taskSize = (int)(Math.ceil((double)params.N / k)); + int taskSize = (int)(Math.ceil((double)params.N / k / 2)); for(int i = 0; i*taskSize < params.N; i++) { if(params.input1.isInSparseFormat()) ret.add(new SparseMaxPooling(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); @@ -66,8 +67,9 @@ public class LibMatrixDNNPooling { */ public static ArrayList<Callable<Long>> getMaxPoolingBackwardWorkers(ConvolutionParameters params, boolean performReluBackward) throws DMLRuntimeException { ArrayList<Callable<Long>> ret = new ArrayList<>(); + // Try to create twice as many tasks as threads for improved load balance int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads); - int taskSize = (int)(Math.ceil((double)params.N / k)); + int taskSize = (int)(Math.ceil((double)params.N / k / 2)); boolean sparse1 = params.input1.isInSparseFormat(); boolean sparse2 = params.input2.isInSparseFormat(); for(int i = 0; i*taskSize < params.N; i++) {
