Repository: incubator-systemml Updated Branches: refs/heads/master 4cd982917 -> 2b5b12557
[SYSTEMML-540] Added fused relu_maxpooling - Fused relu_maxpooling reduces the unnecessary dense-to-sparse-to-dense conversion. This operator makes relu a "no op". - Note: fused relu_maxpooling is only supported in CP, not on GPU as both relu and maxpooling invoke CuDNN functions. - Also, improved the performance of maxpooling by computing indexes apriori. Closes #329. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2b5b1255 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2b5b1255 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2b5b1255 Branch: refs/heads/master Commit: 2b5b12557556d95c499730ae105807d996ad7a47 Parents: 4cd9829 Author: Niketan Pansare <npan...@us.ibm.com> Authored: Fri Jan 6 10:23:47 2017 -0800 Committer: Niketan Pansare <npan...@us.ibm.com> Committed: Fri Jan 6 10:23:47 2017 -0800 ---------------------------------------------------------------------- .../org/apache/sysml/hops/ConvolutionOp.java | 15 ++- .../apache/sysml/lops/ConvolutionTransform.java | 5 +- .../instructions/CPInstructionParser.java | 1 + .../cp/ConvolutionCPInstruction.java | 7 +- .../matrix/data/ConvolutionParameters.java | 5 +- .../sysml/runtime/matrix/data/LibMatrixDNN.java | 124 +++++++++++-------- 6 files changed, 93 insertions(+), 64 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2b5b1255/src/main/java/org/apache/sysml/hops/ConvolutionOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java index f1efbb1..3f9ca7e 100644 --- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java +++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java @@ -25,6 +25,7 @@ import org.apache.sysml.api.DMLScript; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.hops.Hop.MultiThreadedHop; import org.apache.sysml.lops.ConvolutionTransform; +import org.apache.sysml.lops.ConvolutionTransform.OperationTypes; import org.apache.sysml.lops.Lop; import org.apache.sysml.lops.LopsException; import org.apache.sysml.lops.LopProperties.ExecType; @@ -136,10 +137,18 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop throw new HopsException("Incorrect number of inputs for " + op.name()); } - Lop in = inputs.get(0).constructLops(); + Lop in = null; + OperationTypes lopOp = HopsConv2Lops.get(op); int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads); - ConvolutionTransform transform1 = new ConvolutionTransform( in, - HopsConv2Lops.get(op), getDataType(), getValueType(), et, k); + if(op == ConvOp.MAX_POOLING && et == ExecType.CP && inputs.get(0) instanceof UnaryOp + && ((UnaryOp) inputs.get(0)).getOp() == OpOp1.SELP) { + in = inputs.get(0).getInput().get(0).constructLops(); + lopOp = OperationTypes.RELU_MAX_POOLING; + } + else { + in = inputs.get(0).constructLops(); + } + ConvolutionTransform transform1 = new ConvolutionTransform( in, lopOp, getDataType(), getValueType(), et, k); setOutputDimensions(transform1); setLineNumbers(transform1); in.addOutput(transform1); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2b5b1255/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java index d4821ac..d69bd93 100644 --- a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java +++ b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java @@ -30,7 +30,7 @@ public class ConvolutionTransform extends Lop public enum OperationTypes { - MAX_POOLING, MAX_POOLING_BACKWARD, + MAX_POOLING, MAX_POOLING_BACKWARD, RELU_MAX_POOLING, DIRECT_CONV2D, DIRECT_CONV2D_BACKWARD_FILTER, DIRECT_CONV2D_BACKWARD_DATA, BIAS_ADD }; @@ -99,6 +99,9 @@ public class ConvolutionTransform extends Lop case MAX_POOLING: return "maxpooling"; + case RELU_MAX_POOLING: + return "relu_maxpooling"; + case MAX_POOLING_BACKWARD: return "maxpooling_backward"; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2b5b1255/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java index 344cde4..18e3a48 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java @@ -218,6 +218,7 @@ public class CPInstructionParser extends InstructionParser String2CPInstructionType.put( "rsort" , CPINSTRUCTION_TYPE.Reorg); // Opcodes related to convolutions + String2CPInstructionType.put( "relu_maxpooling" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "maxpooling" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "maxpooling_backward" , CPINSTRUCTION_TYPE.Convolution); String2CPInstructionType.put( "conv2d" , CPINSTRUCTION_TYPE.Convolution); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2b5b1255/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java index e0238aa..56f1460 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java @@ -20,6 +20,7 @@ package org.apache.sysml.runtime.instructions.cp; import java.util.ArrayList; +import java.util.Arrays; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; @@ -89,7 +90,7 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { String[] parts = InstructionUtils.getInstructionPartsWithValueType(str); String opcode = parts[0]; - if (opcode.equalsIgnoreCase("maxpooling")) { + if (opcode.equalsIgnoreCase("maxpooling") || opcode.equalsIgnoreCase("relu_maxpooling")) { InstructionUtils.checkNumFields(parts, 15); // stride1, stride2, padding1, padding2 // input_shape1, input_shape2, input_shape3, input_shape4, @@ -231,12 +232,14 @@ public class ConvolutionCPInstruction extends UnaryCPInstruction { int Q = (int) ConvolutionUtils.getQ(W, S, stride_w, pad_w); ConvolutionParameters params = new ConvolutionParameters(N, C, H, W, K, R, S, stride_h, stride_w, pad_h, pad_w, _numThreads); - if (instOpcode.equalsIgnoreCase("maxpooling")) { + if (instOpcode.equalsIgnoreCase("maxpooling") || instOpcode.equalsIgnoreCase("relu_maxpooling")) { if(matBlock.isEmptyBlock()) { outputBlock = new MatrixBlock(N, C*P*Q, true, 0); } else { outputBlock = getDenseOutputBlock(ec, N, C*P*Q); + if(instOpcode.equalsIgnoreCase("maxpooling")) + Arrays.fill(outputBlock.getDenseBlock(), -Double.MAX_VALUE); LibMatrixDNN.maxpooling(matBlock, outputBlock, params); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2b5b1255/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java index 27fcf87..cd37c06 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/ConvolutionParameters.java @@ -19,8 +19,6 @@ package org.apache.sysml.runtime.matrix.data; -import java.util.concurrent.atomic.AtomicLong; - import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.util.ConvolutionUtils; @@ -33,10 +31,11 @@ public class ConvolutionParameters { public int K; public int R; public int S; public int stride_h; public int stride_w; public int pad_h; public int pad_w; public int P; public int Q; public int numThreads; - public AtomicLong outputNNZ = new AtomicLong(-1); MatrixBlock input1; MatrixBlock input2; MatrixBlock output; + public int [] start_indexes_h, end_indexes_h, start_indexes_w, end_indexes_w; + private int convertToInt(long val) throws DMLRuntimeException { if( val > Integer.MAX_VALUE ) { throw new DMLRuntimeException("The value for ConvolutionParameters is too large:" + val); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2b5b1255/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java index 62b1513..1400b31 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java @@ -379,9 +379,31 @@ public class LibMatrixDNN { if (params.output.isInSparseFormat()) throw new DMLRuntimeException("Sparse maxpooling_backward is not supported"); + fillIndexesArray(params); runConvTask(TaskType.MaxPooling_Backward, params); } + private static void fillIndexesArray(ConvolutionParameters params) { + params.start_indexes_h = new int[params.P]; + params.end_indexes_h = new int[params.P]; + params.start_indexes_w = new int[params.Q]; + params.end_indexes_w = new int[params.Q]; + for (int p = 0; p < params.P; p++) { + int start_index_h = p * params.stride_h - params.pad_h; + final int end_index_h = Math.min(start_index_h + params.R, params.H); + start_index_h = Math.max(start_index_h, 0); + params.start_indexes_h[p] = start_index_h; + params.end_indexes_h[p] = end_index_h; + } + for (int q = 0; q < params.Q; q++) { + int start_index_w = Math.max(q * params.stride_w - params.pad_w, 0); + int end_index_w = Math.min(start_index_w + params.S, params.W); + start_index_w = Math.max(start_index_w, 0); + params.start_indexes_w[q] = start_index_w; + params.end_indexes_w[q] = end_index_w; + } + } + private static void doPoolingBackward(int n, ConvolutionParameters params) throws DMLRuntimeException { double [] inputArray = null; if (!params.input1.isInSparseFormat()) @@ -419,10 +441,7 @@ public class LibMatrixDNN { double inVal = doutArray[n*params.C*params.P*params.Q + c*params.P*params.Q + p * params.Q + q]; if(inVal != 0) { final int inputOffset = n*params.C*params.H*params.W + c*params.H*params.W; - int start_index_h = p * params.stride_h - params.pad_h; - final int end_index_h = Math.min(start_index_h + params.R, params.H); - start_index_h = Math.max(start_index_h, 0); - int maxIndex = getMaxIndexSparse(start_index_h, end_index_h, q, inputOffset, n, c, params.input1, params); + int maxIndex = getMaxIndexSparse(p, q, inputOffset, n, c, params.input1, params); outputArray[maxIndex] += inVal; } } @@ -446,10 +465,7 @@ public class LibMatrixDNN { int q = tensorIndexes[2]; final int inputOffset = n*params.C*params.H*params.W + c*params.H*params.W; - int start_index_h = p * params.stride_h - params.pad_h; - final int end_index_h = Math.min(start_index_h + params.R, params.H); - start_index_h = Math.max(start_index_h, 0); - int maxIndex = getMaxIndexSparse(start_index_h, end_index_h, q, inputOffset, n, c, params.input1, params); + int maxIndex = getMaxIndexSparse(p, q, inputOffset, n, c, params.input1, params); outputArray[maxIndex] += ijv.getV(); } @@ -469,10 +485,7 @@ public class LibMatrixDNN { int q = tensorIndexes[2]; final int inputOffset = n*params.C*params.H*params.W + c*params.H*params.W; - int start_index_h = p * params.stride_h - params.pad_h; - final int end_index_h = Math.min(start_index_h + params.R, params.H); - start_index_h = Math.max(start_index_h, 0); - int maxIndex = getMaxIndex(start_index_h, end_index_h, q, inputOffset, inputArray, params); + int maxIndex = getMaxIndex(p, q, inputOffset, inputArray, params); outputArray[maxIndex] += ijv.getV(); } } @@ -484,20 +497,15 @@ public class LibMatrixDNN { final int outputOffset = n*params.C*params.P*params.Q + c*params.P*params.Q; for (int p = 0; p < params.P; p++) { - int start_index_h = p * params.stride_h - params.pad_h; - final int end_index_h = Math.min(start_index_h + params.R, params.H); - start_index_h = Math.max(start_index_h, 0); - for (int q = 0; q < params.Q; q++) { - int maxIndex = getMaxIndex(start_index_h, end_index_h, q, inputOffset, inputArray, params); + int maxIndex = getMaxIndex(p, q, inputOffset, inputArray, params); outputArray[maxIndex] += doutArray[outputOffset + p * params.Q + q]; } } } } - private static int getMaxIndexSparse(int start_index_h, int end_index_h, - int q, int inputOffset, int n, int c, MatrixBlock input, ConvolutionParameters params) throws DMLRuntimeException { + private static int getMaxIndexSparse(int p, int q, int inputOffset, int n, int c, MatrixBlock input, ConvolutionParameters params) throws DMLRuntimeException { if(!input.isInSparseFormat()) throw new DMLRuntimeException("Incorrect usage: Only sparse format supported"); @@ -505,9 +513,10 @@ public class LibMatrixDNN { Iterator<IJV> iter = input.sparseBlock.getIterator(n, n+1); int [] tensorIndexes = new int[3]; - int start_index_w = Math.max(q * params.stride_w - params.pad_w, 0); - int end_index_w = Math.min(start_index_w + params.S, params.W); - start_index_w = Math.max(start_index_w, 0); + int start_index_h = params.start_indexes_h[p]; + int end_index_h = params.end_indexes_h[p]; + int start_index_w = params.start_indexes_w[q]; + int end_index_w = params.end_indexes_w[q]; int maxIndex = inputOffset + start_index_h*params.W + start_index_w; double maxVal = -Double.MAX_VALUE; @@ -532,11 +541,11 @@ public class LibMatrixDNN { return maxIndex; } - private static int getMaxIndex(int start_index_h, int end_index_h, - int q, int inputOffset, double [] inputArray, ConvolutionParameters params) { - int start_index_w = q * params.stride_w - params.pad_w; - int end_index_w = Math.min(start_index_w + params.S, params.W); - start_index_w = Math.max(start_index_w, 0); + private static int getMaxIndex(int p, int q, int inputOffset, double [] inputArray, ConvolutionParameters params) { + int start_index_h = params.start_indexes_h[p]; + int end_index_h = params.end_indexes_h[p]; + int start_index_w = params.start_indexes_w[q]; + int end_index_w = params.end_indexes_w[q]; int maxIndex = inputOffset + start_index_h*params.W + start_index_w; double maxVal = -Double.MAX_VALUE; @@ -619,12 +628,11 @@ public class LibMatrixDNN { throw new DMLRuntimeException("Incorrect input dimensions in maxpooling:" + input.getNumRows() + " " + input.getNumColumns() + " " + params.N + " " + params.K*params.P*params.Q); } - params.outputNNZ.set(0); + fillIndexesArray(params); runConvTask(TaskType.MaxPooling_Forward, params); - outputBlock.setNonZeros(params.outputNNZ.get()); } - private static void doPooling(int n, int c, ConvolutionParameters params) throws DMLRuntimeException { + private static void doPooling(int n, ConvolutionParameters params) throws DMLRuntimeException { double [] inputArray = null; if (!params.input1.isInSparseFormat()) inputArray = params.input1.getDenseBlock(); @@ -634,32 +642,40 @@ public class LibMatrixDNN { else throw new DMLRuntimeException("Expected the output to be allocated in dense format"); - long tmpNNZ = 0; - for (int p = 0; p < params.P; p++) { - for (int q = 0; q < params.Q; q++) { - int start_index_h = p * params.stride_h - params.pad_h; - int start_index_w = q * params.stride_w - params.pad_w; - int end_index_h = Math.min(start_index_h + params.R, params.H); - int end_index_w = Math.min(start_index_w + params.S, params.W); - start_index_h = Math.max(start_index_h, 0); - start_index_w = Math.max(start_index_w, 0); - int out_index = n*params.C*params.P*params.Q + c*params.P*params.Q + p * params.Q + q; - outputArray[out_index] = -Double.MAX_VALUE; - for (int h = start_index_h; h < end_index_h; h++) { - for (int w = start_index_w; w < end_index_w; w++) { - double inVal = -1; - if(inputArray != null) - inVal = inputArray[n*params.C*params.H*params.W + c*params.H*params.W + h*params.W + w]; - else - inVal = params.input1.quickGetValue(n, c*params.H*params.W + h*params.W + w); - outputArray[out_index] = Math.max(outputArray[out_index], inVal); - if(outputArray[out_index] != 0) - tmpNNZ++; + final int inOffset = n*params.C*params.H*params.W; + int out_index = n*params.C*params.P*params.Q; + final int HW = params.H*params.W; + + if(inputArray != null) { + for (int c = 0; c < params.C; c++) { + final int inOffset1 = inOffset + c*HW; + for (int p = 0; p < params.P; p++) { + for (int q = 0; q < params.Q; q++, out_index++) { + for (int h = params.start_indexes_h[p]; h < params.end_indexes_h[p]; h++) { + for (int w = params.start_indexes_w[q]; w < params.end_indexes_w[q]; w++) { + outputArray[out_index] = Math.max(outputArray[out_index], inputArray[inOffset1 + h*params.W + w]); + } + } + } + } + } + } + else { + // TODO: Optimize sparse maxpooling + // Low priority after adding fused relu_maxpooling operator as output of conv2d expected to be dense + for (int c = 0; c < params.C; c++) { + for (int p = 0; p < params.P; p++) { + for (int q = 0; q < params.Q; q++, out_index++) { + for (int h = params.start_indexes_h[p]; h < params.end_indexes_h[p]; h++) { + for (int w = params.start_indexes_w[q]; w < params.end_indexes_w[q]; w++) { + double inVal = params.input1.quickGetValue(n, c*HW + h*params.W + w); + outputArray[out_index] = Math.max(outputArray[out_index], inVal); + } + } } } } } - params.outputNNZ.addAndGet(tmpNNZ); } private static void doRotate180(int inputN, int outputN, MatrixBlock input, @@ -818,9 +834,7 @@ public class LibMatrixDNN { case MaxPooling_Forward: { for(int n = n1; n < n2; n++) { - for (int c = 0; c < params.C; c++) { - doPooling(n, c, params); - } + doPooling(n, params); } break; }