Repository: systemml
Updated Branches:
  refs/heads/master 94f1b72ef -> aa537dad4


[SYSTEMML-2133] Performance cp maxpooling operations (load balance)

This patch makes a minor performance improvement to maxpooling and
maxpooling_backward operations. So far we used static task partitioning
in the number of threads which led to suboptimal load balance. Since
these maxpooling operations do not create intermediates, we now use
twice as many tasks as threads. On an end-to-end cnn scoring
application, this improved the maxpooling performance by 25%.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/54570660
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/54570660
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/54570660

Branch: refs/heads/master
Commit: 54570660d6e5beddc88d9b6fa44de8f08e855dc8
Parents: 94f1b72
Author: Matthias Boehm <[email protected]>
Authored: Tue Feb 6 11:43:26 2018 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Tue Feb 6 11:44:47 2018 -0800

----------------------------------------------------------------------
 .../apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/54570660/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java
index 067e9ef..906ef90 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java
@@ -45,8 +45,9 @@ public class LibMatrixDNNPooling {
         */
        public static ArrayList<Callable<Long>> 
getMaxPoolingWorkers(ConvolutionParameters params) throws DMLRuntimeException {
                ArrayList<Callable<Long>> ret = new ArrayList<>();
+               // Try to create twice as many tasks as threads for improved 
load balance
                int k = 
OptimizerUtils.getConstrainedNumThreads(params.numThreads);
-               int taskSize = (int)(Math.ceil((double)params.N / k));
+               int taskSize = (int)(Math.ceil((double)params.N / k / 2));
                for(int i = 0; i*taskSize < params.N; i++) {
                        if(params.input1.isInSparseFormat())
                                ret.add(new SparseMaxPooling(i*taskSize, 
Math.min((i+1)*taskSize, params.N), params));
@@ -66,8 +67,9 @@ public class LibMatrixDNNPooling {
         */
        public static ArrayList<Callable<Long>> 
getMaxPoolingBackwardWorkers(ConvolutionParameters params, boolean 
performReluBackward) throws DMLRuntimeException {
                ArrayList<Callable<Long>> ret = new ArrayList<>();
+               // Try to create twice as many tasks as threads for improved 
load balance
                int k = 
OptimizerUtils.getConstrainedNumThreads(params.numThreads);
-               int taskSize = (int)(Math.ceil((double)params.N / k));
+               int taskSize = (int)(Math.ceil((double)params.N / k / 2));
                boolean sparse1 = params.input1.isInSparseFormat();
                boolean sparse2 = params.input2.isInSparseFormat();
                for(int i = 0; i*taskSize < params.N; i++) {

Reply via email to