Repository: systemml Updated Branches: refs/heads/master e7994b745 -> 0a0a40370
[SYSTEMML-2046] Large dense blocks in rotate180, relu, incl cleanups This patch adds support for large dense blocks in convolution rotate180, and relu backwards operations. Furthermore, this also includes some minor cleanups of unnecessary code duplication and inefficiencies. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/185ab0e3 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/185ab0e3 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/185ab0e3 Branch: refs/heads/master Commit: 185ab0e3b34ecd8e8281027839a3d53fbacb9e42 Parents: e7994b7 Author: Matthias Boehm <[email protected]> Authored: Mon Jan 15 19:45:48 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Mon Jan 15 19:45:48 2018 -0800 ---------------------------------------------------------------------- .../runtime/instructions/InstructionUtils.java | 8 +- .../runtime/matrix/data/LibMatrixDNNRelu.java | 95 ++++++++++++++---- .../matrix/data/LibMatrixDNNRotate180.java | 34 +++---- .../sysml/runtime/util/ConvolutionUtils.java | 100 ------------------- 4 files changed, 93 insertions(+), 144 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/185ab0e3/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java b/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java index 91a4546..e6affa0 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/InstructionUtils.java @@ -594,10 +594,8 @@ public class InstructionUtils * @param opcode the opcode * @param arg1IsScalar ? * @return scalar operator - * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static ScalarOperator parseScalarBinaryOperator(String opcode, boolean arg1IsScalar) - throws DMLRuntimeException { //for all runtimes that set constant dynamically (cp/spark) double default_constant = 0; @@ -612,10 +610,8 @@ public class InstructionUtils * @param arg1IsScalar ? * @param constant ? * @return scalar operator - * @throws DMLRuntimeException if DMLRuntimeException occurs */ public static ScalarOperator parseScalarBinaryOperator(String opcode, boolean arg1IsScalar, double constant) - throws DMLRuntimeException { //commutative operators if ( opcode.equalsIgnoreCase("+") ){ @@ -754,8 +750,8 @@ public class InstructionUtils return new LeftScalarOperator(Divide.getDivideFnObject(), constant); } - throw new DMLRuntimeException("Unknown binary opcode " + opcode); - } + throw new RuntimeException("Unknown binary opcode " + opcode); + } public static BinaryOperator parseExtendedBinaryOperator(String opcode) throws DMLRuntimeException http://git-wip-us.apache.org/repos/asf/systemml/blob/185ab0e3/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRelu.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRelu.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRelu.java index c44a032..f1c3ecb 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRelu.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRelu.java @@ -23,17 +23,15 @@ import java.util.concurrent.Callable; import org.apache.sysml.hops.OptimizerUtils; import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.functionobjects.Plus; import org.apache.sysml.runtime.instructions.InstructionUtils; -import org.apache.sysml.runtime.matrix.operators.BinaryOperator; -import org.apache.sysml.runtime.util.ConvolutionUtils; +import org.apache.sysml.runtime.matrix.operators.ScalarOperator; /** * This class contains the different implementation of rotate180 operation */ public class LibMatrixDNNRelu { - private static BinaryOperator PLUS = new BinaryOperator(Plus.getPlusFnObject()); + private static ScalarOperator GT0 = InstructionUtils.parseScalarBinaryOperator(">", false, 0); /** @@ -55,35 +53,94 @@ public class LibMatrixDNNRelu /** * Performs the operation: (X gt 0) * dout */ - public static class ReluBackward implements Callable<Long> + public static class ReluBackward implements Callable<Long> { - public int _rl; public int _ru; - private final ConvolutionParameters _params; - double [] outputArray; int numOutCols; + public final int _rl, _ru; + private final ConvolutionParameters _params; public ReluBackward(int rl, int ru, ConvolutionParameters params) { _rl = rl; _ru = ru; _params = params; - outputArray= params.output.getDenseBlockValues(); - numOutCols = params.input1.getNumColumns(); } @Override public Long call() throws Exception { + //note: X (m x n), dout (m x n) -> out (m x n) + DenseBlock out = _params.output.getDenseBlock(); + final int n = _params.input1.getNumColumns(); if(!_params.input1.isInSparseFormat() && !_params.input2.isInSparseFormat()) { - double [] inputArr = _params.input1.getDenseBlockValues(); - double [] doutArr = _params.input2.getDenseBlockValues(); - for(int i = _rl*numOutCols; i < _ru*numOutCols; i++) { - outputArray[i] = inputArr[i] > 0 ? doutArr[i] : 0; + DenseBlock x = _params.input1.getDenseBlock(); + DenseBlock dout = _params.input2.getDenseBlock(); + for(int i = _rl; i < _ru; i++) { + double[] xvals = x.values(i), doutvals = dout.values(i), cvals = out.values(i); + int xpos = x.pos(i), doutpos = dout.pos(i), cpos = out.pos(i); + for(int j=0; j<n; j++) + cvals[cpos+j] = xvals[xpos+j] > 0 ? doutvals[doutpos +j] : 0; } } else { - // Perform (X > 0) - ConvolutionUtils.scalarOperations(_params.input1, outputArray, _rl*numOutCols, numOutCols, _rl, _ru, - InstructionUtils.parseScalarBinaryOperator(">", false, 0)); - // Then perform (X > 0) * dout - ConvolutionUtils.binaryOperationInPlace(_params.input2, outputArray, _rl*numOutCols, numOutCols, _rl, _ru, PLUS); + scalarOperations(_params.input1, out, n, _rl, _ru, GT0); // (X > 0) + binaryOperationInPlacePlus(_params.input2, out, n, _rl, _ru); // (X > 0) * dout } return 0L; } } + + private static void scalarOperations(MatrixBlock src, DenseBlock c, + int destNumCols, int src_rl, int src_ru, ScalarOperator op) + throws DMLRuntimeException + { + if(src.isInSparseFormat()) { + for(int i = src_rl; i < src_ru; i++) { + if( src.getSparseBlock().isEmpty(i) ) continue; + int apos = src.getSparseBlock().pos(i); + int alen = src.getSparseBlock().size(i); + int[] aix = src.getSparseBlock().indexes(i); + double[] avals = src.getSparseBlock().values(i); + double[] cvals = c.values(i); + int cix = c.pos(i); + for(int j = apos; j < apos+alen; j++) + cvals[ cix+aix[j] ] = op.executeScalar(avals[j]); + } + } + else { + DenseBlock a = src.getDenseBlock(); + for(int i = src_rl; i < src_ru; i++) { + double[] avals = a.values(i), cvals = c.values(i); + int aix = a.pos(i), cix = c.pos(i); + for(int j=0; j<destNumCols; j++) + cvals[cix+j] = op.executeScalar(avals[aix+j]); + } + } + } + + private static void binaryOperationInPlacePlus(MatrixBlock src, + DenseBlock c, int destNumCols, int src_rl, int src_ru) + throws DMLRuntimeException + { + if( src.isEmptyBlock(false) ) + return; //do nothing (add 0); + + if(src.isInSparseFormat()) { + for(int i = src_rl; i < src_ru; i++) { + if( src.getSparseBlock().isEmpty(i) ) continue; + int apos = src.getSparseBlock().pos(i); + int alen = src.getSparseBlock().size(i); + int[] aix = src.getSparseBlock().indexes(i); + double[] avals = src.getSparseBlock().values(i); + double[] cvals = c.values(i); + int cix = c.pos(i); + for(int j = apos; j < apos+alen; j++) + cvals[ cix+aix[j] ] += avals[j]; + } + } + else { //DENSE + DenseBlock a = src.getDenseBlock(); + for(int i = src_rl; i < src_ru; i++) { + double[] avals = a.values(i), cvals = c.values(i); + int aix = a.pos(i), cix = c.pos(i); + for(int j=0; j<destNumCols; j++) + cvals[cix+j] += avals[aix+j]; + } + } + } } http://git-wip-us.apache.org/repos/asf/systemml/blob/185ab0e3/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180.java index b463794..f4921c4 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180.java @@ -29,10 +29,9 @@ public class LibMatrixDNNRotate180 public void execute(int inputN, int outputN); public static Rotate180Worker getWorker(MatrixBlock in, MatrixBlock out, ConvolutionParameters params, boolean zeroOutSparseOutput, boolean trans) { - if(!in.isInSparseFormat()) - return new DenseRotate180Worker(in, out.getDenseBlockValues(), params); - else - return new SparseRotate180Worker(in, out, params, trans); + return in.isInSparseFormat() ? + new SparseRotate180Worker(in, out, params, trans) : + new DenseRotate180Worker(in, out, params); } } @@ -40,27 +39,24 @@ public class LibMatrixDNNRotate180 * Performing dense rotate180 (general case) */ private static class DenseRotate180Worker implements Rotate180Worker { - private final double[] inputArray, outputArray; + private final DenseBlock in, out; private final ConvolutionParameters params; - public DenseRotate180Worker(MatrixBlock input, double[] outputArray, ConvolutionParameters params) { - this.outputArray = outputArray; + public DenseRotate180Worker(MatrixBlock input, MatrixBlock output, ConvolutionParameters params) { + this.in = input.getDenseBlock(); + this.out = output.getDenseBlock(); this.params = params; - inputArray = input.getDenseBlockValues(); - if(inputArray == null || outputArray == null) - throw new RuntimeException("Incorrect usage: empty inputs"); } @Override public void execute(int inputN, int outputN) { - int outputOffset = outputN*params.K*params.P*params.Q; - for (int k = 0; k < params.K; k++) { - for (int p = 0; p < params.P; p++) { - for (int q = 0; q < params.Q; q++) { - outputArray[outputOffset + p*params.Q*params.K + q*params.K + k] = - inputArray[inputN*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q + q]; - } - } - } + //note: in (m x KPQ) -> out (m x KPQ) + double[] avals = in.values(inputN), cvals = out.values(outputN); + int aix = in.pos(inputN), cix = out.pos(outputN); + int K = params.K, P = params.P, Q = params.Q; + for (int k = 0; k < K; k++) + for (int p = 0; p < P; p++) + for (int q = 0; q < Q; q++) + cvals[cix + p*Q*K + q*K + k] = avals[aix + k*P*Q + p*Q + q]; } } http://git-wip-us.apache.org/repos/asf/systemml/blob/185ab0e3/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java index 87f1240..9800773 100644 --- a/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java +++ b/src/main/java/org/apache/sysml/runtime/util/ConvolutionUtils.java @@ -22,11 +22,7 @@ package org.apache.sysml.runtime.util; import java.util.Arrays; import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.functionobjects.Multiply; -import org.apache.sysml.runtime.functionobjects.Plus; import org.apache.sysml.runtime.matrix.data.MatrixBlock; -import org.apache.sysml.runtime.matrix.operators.BinaryOperator; -import org.apache.sysml.runtime.matrix.operators.ScalarOperator; public class ConvolutionUtils { @@ -73,102 +69,6 @@ public class ConvolutionUtils { } return ret; } - - - // Performs dest[destPos...] op= thatValue[src_rl:src_ru,] - public static void binaryOperationInPlace(MatrixBlock src, double [] dest, - int destPos, int destNumCols, int src_rl, int src_ru, BinaryOperator op) throws DMLRuntimeException { - if(src.isInSparseFormat()) { - if(src.isEmptyBlock() && op.fn == Plus.getPlusFnObject()) { - // Do nothing: Inplace addition by zero - } - else if(src.isEmptyBlock() && op.fn == Multiply.getMultiplyFnObject()) { - // Inplace multiplication by zero - Arrays.fill(dest, destPos, destPos + (src_ru-src_rl)*destNumCols, 0); - } - else if(op.fn == Plus.getPlusFnObject()) { - for(int i = src_rl, cix = destPos; i < src_ru; i++, cix += destNumCols) { - if( !src.getSparseBlock().isEmpty(i) ) { - int apos = src.getSparseBlock().pos(i); - int alen = src.getSparseBlock().size(i); - int[] aix = src.getSparseBlock().indexes(i); - double[] avals = src.getSparseBlock().values(i); - for(int j = apos; j < apos+alen; j++) { - dest[ cix+aix[j] ] += avals[j]; - } - } - } - } - else if(op.fn == Multiply.getMultiplyFnObject()) { - // Unsafe operation - for(int i = src_rl, cix = destPos; i < src_ru; i++, cix += destNumCols) { - if( !src.getSparseBlock().isEmpty(i) ) { - int apos = src.getSparseBlock().pos(i); - int alen = src.getSparseBlock().size(i); - int[] aix = src.getSparseBlock().indexes(i); - double[] avals = src.getSparseBlock().values(i); - int prevDestIndex = 0; - for(int j = apos; j < apos+alen; j++) { - // Multiplication by zero. Assumption: aix is sorted. - Arrays.fill(dest, cix+prevDestIndex, cix+aix[j], 0); - prevDestIndex = aix[j]+1; - dest[ cix+aix[j] ] *= avals[j]; - } - Arrays.fill(dest, cix+prevDestIndex, cix+destNumCols, 0); - } - else { - Arrays.fill(dest, cix, cix + destNumCols, 0); - } - } - } - else { - // As operation could be safe or unsafe. This will be caught at development time. - throw new DMLRuntimeException("Unimplemented sparse operation"); - } - } - else { - double [] inputArr = src.getDenseBlockValues(); - if(op.fn == Plus.getPlusFnObject()) { - for(int i = destPos; i < src_ru*destNumCols; i++) { - dest[i] += inputArr[i]; - } - } - else if(op.fn == Multiply.getMultiplyFnObject()) { - for(int i = destPos; i < src_ru*destNumCols; i++) { - dest[i] *= inputArr[i]; - } - } - else { - for(int i = destPos; i < src_ru*destNumCols; i++) { - dest[i] = op.fn.execute(dest[i], inputArr[i]); - } - } - } - } - - // Performs dest[destPos...] = src[src_rl:src_ru,] op scalar - public static void scalarOperations(MatrixBlock src, double [] dest, - int destPos, int destNumCols, int src_rl, int src_ru, ScalarOperator scalarOp) throws DMLRuntimeException { - if(src.isInSparseFormat()) { - for(int i = src_rl, cix = destPos; i < src_ru; i++, cix += destNumCols) { - if( !src.getSparseBlock().isEmpty(i) ) { - int apos = src.getSparseBlock().pos(i); - int alen = src.getSparseBlock().size(i); - int[] aix = src.getSparseBlock().indexes(i); - double[] avals = src.getSparseBlock().values(i); - for(int j = apos; j < apos+alen; j++) { - dest[ cix+aix[j] ] = scalarOp.executeScalar(avals[j]); - } - } - } - } - else { - double [] inputArr = src.getDenseBlockValues(); - for(int i = destPos; i < src_ru*destNumCols; i++) { - dest[i] = scalarOp.executeScalar(inputArr[i]); - } - } - } public static void fillBias(MatrixBlock bias, double [] outputArray, int src_rl, int src_ru, int N, int K, int PQ) throws DMLRuntimeException { // bias.getNumColumns() == 1 checked outside
