http://git-wip-us.apache.org/repos/asf/systemml/blob/45eec2d2/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPoolingBackwardHelper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPoolingBackwardHelper.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPoolingBackwardHelper.java deleted file mode 100644 index 3dfb545..0000000 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPoolingBackwardHelper.java +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sysml.runtime.matrix.data; - -import java.util.Arrays; -import java.util.concurrent.Callable; - -import org.apache.sysml.runtime.matrix.data.LibMatrixDNNHelper.CellIndex3; - -/** - * This class contains the set of operators used for performing pooling backward - */ -public class LibMatrixDNNPoolingBackwardHelper { - /** - * Performs the maxpooling backward operation for dense input and dense error (dout) - */ - public static class PoolingBackwardDenseDense implements Callable<Long> - { - public int _rl; public int _ru; - private final ConvolutionParameters _params; - boolean performReluBackward; - double [] inputArray, doutArray; - MatrixBlock output; - int C; int CHW; int P; int Q; int HW; int CPQ; int PQ; - public PoolingBackwardDenseDense(int rl, int ru, ConvolutionParameters params, boolean performReluBackward) { - _rl = rl; _ru = ru; - _params = params; - this.performReluBackward = performReluBackward; - inputArray = params.input1.getDenseBlockValues(); - doutArray = params.input2.getDenseBlockValues(); - output = params.output; - C = params.C; CHW = params.C*params.H*params.W; HW = params.H*params.W; - P = params.P; Q = params.Q; CPQ = params.C*params.P*params.Q; - PQ = params.P*params.Q; - if (inputArray == null || doutArray == null || output.getDenseBlock() == null ) - throw new RuntimeException("Incorrect usage: empty inputs"); - } - - @Override - public Long call() throws Exception { - double[] out = output.getDenseBlockValues(); - for(int n = _rl; n < _ru; n++) { - for (int c = 0; c < C; c++) { - final int inputOffset = n*CHW + c*HW; - final int outputOffset = n*CPQ + c*PQ; - for (int p = 0; p < P; p++) { - for (int q = 0; q < Q; q++) { - int maxIndex = LibMatrixDNNHelper.getMaxIndex(p, q, inputOffset, inputArray, _params, performReluBackward); - if(maxIndex != -1) - out[maxIndex] += doutArray[outputOffset + p * Q + q]; - } - } - } - } - //thread-local nnz maintenance - return output.recomputeNonZeros(_rl, _ru-1); - } - } - - /** - * Performs the maxpooling backward operation for dense input and sparse error (dout) - */ - public static class PoolingBackwardDenseSparse implements Callable<Long> - { - public int _rl; public int _ru; - private final ConvolutionParameters _params; - MatrixBlock output; - boolean performReluBackward; - double [] inputArray; MatrixBlock dout; - int C; int CHW; int P; int Q; int HW; - public PoolingBackwardDenseSparse(int rl, int ru, ConvolutionParameters params, boolean performReluBackward) { - _rl = rl; _ru = ru; - _params = params; - this.performReluBackward = performReluBackward; - inputArray = params.input1.getDenseBlockValues(); - dout = params.input2; - output = params.output; - C = params.C; CHW = params.C*params.H*params.W; HW = params.H*params.W; - P = params.P; Q = params.Q; - if (inputArray == null || output.getDenseBlock() == null ) - throw new RuntimeException("Incorrect usage: empty inputs"); - if (!params.input2.isInSparseFormat()) - throw new RuntimeException("Incorrect usage: Call optimized versions"); - } - - @Override - public Long call() throws Exception { - CellIndex3 ix = new CellIndex3(); - double[] out = output.getDenseBlockValues(); - SparseBlock sblock = dout.sparseBlock; - for(int n = _rl; n < _ru; n++) { - if( sblock.isEmpty(n) ) continue; - int apos = sblock.pos(n); - int alen = sblock.size(n); - int[] aix = sblock.indexes(n); - double[] avals = sblock.values(n); - for(int j = apos; j < apos+alen; j++) { - ix = LibMatrixDNNHelper.computeTensorIndexes(aix[j], P, Q, ix); - final int inputOffset = n*CHW + ix.ix1*HW; - int maxIndex = LibMatrixDNNHelper.getMaxIndex(ix.ix2, ix.ix3, - inputOffset, inputArray, _params, performReluBackward); - if(maxIndex != -1) - out[maxIndex] += avals[j]; - } - } - //thread-local nnz maintenance - return output.recomputeNonZeros(_rl, _ru-1); - } - } - - /** - * Performs the maxpooling backward operation for sparse input and dense error (dout) - */ - public static class PoolingBackwardSparseDense implements Callable<Long> - { - private final int _rl, _ru; - private final ConvolutionParameters _params; - private final boolean reluBack; - protected final MatrixBlock doutput, output; - - protected PoolingBackwardSparseDense(int rl, int ru, ConvolutionParameters params, boolean relu, MatrixBlock dout, MatrixBlock out) { - _rl = rl; _ru = ru; - _params = params; - reluBack = relu; - doutput = dout; - output = out; - } - - public PoolingBackwardSparseDense(int rl, int ru, ConvolutionParameters params, boolean relu) { - this(rl, ru, params, relu, params.input2, params.output); - if (doutput.getDenseBlock() == null || output.getDenseBlock() == null ) - throw new RuntimeException("Incorrect usage: empty inputs"); - if (!params.input1.isInSparseFormat()) - throw new RuntimeException("Incorrect usage: sparse input1 expected"); - } - - @Override - public Long call() throws Exception - { - final int P = _params.P, Q = _params.Q, W = _params.W; - final int C = _params.C, R = _params.R, S = _params.S; - final int padh = _params.pad_h, padw = _params.pad_w; - final int strideh = _params.stride_h, stridew = _params.stride_w; - final int PQ = _params.P * _params.Q; - final int CPQ = _params.C * _params.P * _params.Q; - final int HW = _params.H * _params.W; - final int CHW = _params.C * _params.H * _params.W; - - //allocate auxiliary data structures - double[] maxVal = new double[PQ]; - int[] maxIx = new int[PQ]; - - for(int n = _rl; n < _ru; n++) { - for (int c = 0; c < C; c++) { - //step 0: basic initializations - final int outOffset = n*CHW + c*HW; - - //step 1: perform maxpooling w/ index maintenance in a - //single, sequential pass over the sparse input matrix - maxpoolingForward(maxVal, maxIx, n, c, - padh, padw, strideh, stridew, C, P, Q, R, S, HW, W); - - //step 2: perform maxpooling backward - maxpoolingBackward(maxIx, outOffset, n, c, C, Q, PQ, CPQ); - } - } - //thread-local nnz maintenance - return output.recomputeNonZeros(_rl, _ru-1); - } - - protected void maxpoolingForward(double[] maxVal, int[] maxIx, int n, int c, int padh, int padw, int strideh, int stridew, int C, int P, int Q, int R, int S, int HW, int W) { - SparseBlock sblock = _params.input1.getSparseBlock(); - if( !sblock.isEmpty(n) ) { - Arrays.fill(maxVal, -Double.MAX_VALUE); - int apos = sblock.pos(n); - int alen = sblock.size(n); - int[] aix = sblock.indexes(n); - double[] avals = sblock.values(n); - //find channel start and end, w/ robustness for non-existing entries - int cpos = (c==0) ? 0 : sblock.posFIndexGTE(n, c*HW); - int cpos2 = (c+1==C) ? alen : sblock.posFIndexGTE(n, (c+1)*HW); - cpos = (cpos>=0) ? cpos : alen; - cpos2 = (cpos2>=0) ? cpos2 : alen; - int lastix = c*HW-1; - for(int j=apos+cpos; j<apos+cpos2; j++) { - //handle skipped zero values - update0(lastix+1, aix[j], maxVal, maxIx, padh, padw, strideh, stridew, P, Q, R, S, HW, W); - //handle current non-zero value - int h = (aix[j] % HW) / W; - int w = aix[j] % W; - double val = reluBack && avals[j] < 0 ? 0 : avals[j]; - update(val, maxVal, maxIx, h, w, padh, padw, strideh, stridew, P, Q, R, S, W); - //memoize last seen index - lastix = aix[j]; - } - //handle skipped zero values at end of row - update0(lastix+1, (c+1)*HW, maxVal, maxIx, padh, padw, strideh, stridew, P, Q, R, S, HW, W); - } - else { - //handle empty row - Arrays.fill(maxVal, 0); - for(int p = 0, ix=0; p < P; p++) { - int h = Math.max(-padh+p*strideh, 0); - for(int q = 0; q < Q; q++, ix++) { - int w = Math.max(-padw+q*stridew, 0); - maxIx[ix] = h * W + w; - } - } - } - } - - protected void maxpoolingBackward(int[] maxIx, int outOffset, int n, int c, int C, int Q, int PQ, int CPQ) { - double[] dout = doutput.getDenseBlockValues(); - double[] out = output.getDenseBlockValues(); - final int doutOffset = n*CPQ + c*PQ; - for( int pq = 0; pq < PQ; pq++ ) - out[ outOffset + maxIx[pq] ] += dout[ doutOffset + pq ]; - } - - private static void update0(int lix, int uix, double[] maxVal, int[] maxIx, int padh, int padw, int strideh, int stridew, int P, int Q, int R, int S, int HW, int W) { - //TODO exploit constant value and overlap for potential early abort - for(int i = lix; i<uix; i++) - update(0, maxVal, maxIx, (i%HW)/W, i%W, padh, padw, strideh, stridew, P, Q, R, S, W); - } - - private static void update(double val, double[] maxVal, int[] maxIx, int h, int w, int padh, int padw, int strideh, int stridew, int P, int Q, int R, int S, int W) { - //determine lower and upper bounds for p and q - //(see fillIndexesArray, solved for p and q, reversed) - int lp = Math.max((h+padh-R+strideh)/strideh, 0); - int up = Math.min((h+padh+strideh)/strideh, P); - int lq = Math.max((w+padw-S+stridew)/stridew, 0); - int uq = Math.min((w+padw+stridew)/stridew, Q); - - //maintain max index for all relevant p and q - int maxIndex = h * W + w; - for(int p = lp; p < up; p++) - for(int q = lq; q < uq; q++) { - int ix = p * Q + q; - if( maxVal[ix] < val ) { - maxVal[ix] = val; - maxIx[ix] = maxIndex; - } - } - } - } - - /** - * Performs the maxpooling backward operation for sparse input and sparse error (dout) - */ - public static class PoolingBackwardSparseSparse extends PoolingBackwardSparseDense - { - public PoolingBackwardSparseSparse(int rl, int ru, ConvolutionParameters params, boolean relu) { - super(rl, ru, params, relu, params.input2, params.output); - if (output.getDenseBlock() == null ) - throw new RuntimeException("Incorrect usage: empty outputs"); - if (!params.input1.isInSparseFormat() || !params.input2.isInSparseFormat()) - throw new RuntimeException("Incorrect usage: Call optimized versions"); - } - - @Override - protected void maxpoolingBackward(int[] maxIx, int outOffset, int n, int c, int C, int Q, int PQ, int CPQ) { - SparseBlock sblock = doutput.getSparseBlock(); - double[] out = output.getDenseBlockValues(); - if( sblock.isEmpty(n) ) - return; - int apos = sblock.pos(n); - int alen = sblock.size(n); - int[] aix = sblock.indexes(n); - double[] avals = sblock.values(n); - //find channel start and end, w/ robustness for non-existing entries - int cpos = (c==0) ? 0 : sblock.posFIndexGTE(n, c*PQ); - int cpos2 = (c+1==C) ? alen : sblock.posFIndexGTE(n, (c+1)*PQ); - cpos = (cpos>=0) ? cpos : alen; - cpos2 = (cpos2>=0) ? cpos2 : alen; - for(int j = apos+cpos; j<apos+cpos2; j++) { - int p = (aix[j] % PQ) / Q; - int q = aix[j] % Q; - int pq = p * Q + q; - out[ outOffset + maxIx[pq] ] += avals[j]; - } - } - } -}
http://git-wip-us.apache.org/repos/asf/systemml/blob/45eec2d2/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPoolingHelper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPoolingHelper.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPoolingHelper.java deleted file mode 100644 index 0377c50..0000000 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPoolingHelper.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sysml.runtime.matrix.data; - -import java.util.Arrays; -import java.util.concurrent.Callable; - -/** - * This class contains the set of operators used for performing pooling - */ -public class LibMatrixDNNPoolingHelper { - - /** - * Performs the dense maxpooling - */ - public static class DenseMaxPooling implements Callable<Long> - { - private final int _rl, _ru; - private final ConvolutionParameters _params; - - public DenseMaxPooling(int rl, int ru, ConvolutionParameters params) { - _rl = rl; _ru = ru; - _params = params; - } - - @Override - public Long call() throws Exception { - final int C = _params.C, P = _params.P, Q = _params.Q; - final int R = _params.R, S = _params.S, H = _params.H, W = _params.W; - final int HW = _params.H*_params.W; - final int CHW = _params.C*_params.H*_params.W; - final int CPQ = C*P*Q; - double[] in = _params.input1.getDenseBlockValues(); - double[] out = _params.output.getDenseBlockValues(); - - double minValForMaxPoolOperations = _params.minValForMaxPoolOperations; - - //thread-local initialization of output block - if( !(_params.isStride1Pad0() && _params.isAllOnes(P, Q, W)) ) - Arrays.fill(out, _rl*CPQ, _ru*CPQ, minValForMaxPoolOperations); - - if( _params.isStride1Pad0() && _params.isAllOnes(P, Q, W) ) { - //quick-path w/o materialized index arrays and - //simplified inner loops for P = 1, Q = 1, W = 1 - int lenh = Math.min(R,H); - for(int i = _rl, oix=_rl*C; i < _ru; i++, oix+=C) - for (int c = 0, off=i*CHW; c < C; c++, off+=H) - out[oix+c] = max(minValForMaxPoolOperations, in, off, lenh); - } - else if( _params.isStride1Pad0() ) { - //quick-path w/o materialized index arrays - for(int i = _rl; i < _ru; i++) - for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) - for (int p = 0; p < P; p++, oix+=Q) - for (int h = p; h < Math.min(p+R,H); h++) - for (int q = 0, off2=off+h*W; q < Q; q++) - out[oix+q] = max(out[oix+q], in, off2+q, Math.min(S,W-q)); - } - else { //general case - int[] hl = _params.start_indexes_h, hu = _params.end_indexes_h; - int[] wl = _params.start_indexes_w, wu = _params.end_indexes_w; - for(int i = _rl; i < _ru; i++) - for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) - for (int p = 0; p < P; p++, oix+=Q) - for (int h = hl[p]; h < hu[p]; h++) - for (int q = 0, off2=off+h*W; q < Q; q++) - out[oix+q] = max(out[oix+q], in, off2+wl[q], wu[q]-wl[q]); - } - - //thread-local recomputation of non-zeros - return _params.output.recomputeNonZeros(_rl, _ru-1); - } - } - - /** - * Performs the sparse maxpooling - */ - public static class SparseMaxPooling implements Callable<Long> - { - private final int _rl, _ru; - private final ConvolutionParameters _params; - private double [] outputArray; - private final int C, P, Q, W, H, CPQ, PQ; - - public SparseMaxPooling(int rl, int ru, ConvolutionParameters params) { - _rl = rl; _ru = ru; - _params = params; - outputArray = params.output.getDenseBlockValues(); - C = params.C; P = params.P; Q = params.Q; H = params.H; - W = params.W; - CPQ = C*P*Q; - PQ = P*Q; - } - - @Override - public Long call() throws Exception { - //thread-local initialization of output block - Arrays.fill(outputArray, _rl *CPQ, _ru*CPQ, _params.minValForMaxPoolOperations); - - for(int n = _rl; n < _ru; n++) { - if( !_params.input1.sparseBlock.isEmpty(n) ) { - final int apos = _params.input1.sparseBlock.pos(n); - final int alen = _params.input1.sparseBlock.size(n); - final int [] aix = _params.input1.sparseBlock.indexes(n); - final double [] avals = _params.input1.sparseBlock.values(n); - int chw = 0; int index = apos; - for (int c = 0; c < C; c++) { - final int outOffset = n*CPQ + c*PQ; - for(int h = 0; h < H; h++) { - for(int w = 0; w < W; w++, chw++) { - // Take into account zero values as well - double nchwVal = 0; - if(aix[index] == chw) { - nchwVal = avals[index++]; - // Ensure that we satisfy the condition index < apos+alen - if(index >= apos+alen) index--; - } - // Perform maxpooling without binary search :) - // Tradeoff as compared to dense maxpooling: - // In dense maxpooling, iteration space CPQHW where H and W iterations are restricted by _params.start_indexes_h[p] - // and are eligible for JIT optimizations. - // In sparse maxpooling, iteration space CHWPQ without HW restrictions. - for (int p = 0; p < P; p++) { - if(h >= _params.start_indexes_h[p] && h < _params.end_indexes_h[p]) { - final int outOffsetWithp = outOffset + p*Q; - for (int q = 0; q < Q; q++) { - if(w >= _params.start_indexes_w[q] && w < _params.end_indexes_w[q]) { - outputArray[outOffsetWithp + q] = Math.max(outputArray[outOffsetWithp + q], nchwVal); - } - } - } - } - } - } - } - } - else { - // Empty input image - Arrays.fill(outputArray, n*CPQ, (n+1)*CPQ, 0); - } - } - - //thread-local recomputation of non-zeros - return _params.output.recomputeNonZeros(_rl, _ru-1); - } - } - - private static double max(final double aval, double[] b, final int bi, final int len) { - double ret = aval; - for( int i = bi; i < bi+len; i++ ) - ret = Math.max(ret, b[i]); - return ret; - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/45eec2d2/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRelu.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRelu.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRelu.java new file mode 100644 index 0000000..c44a032 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRelu.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sysml.runtime.matrix.data; + +import java.util.ArrayList; +import java.util.concurrent.Callable; + +import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.functionobjects.Plus; +import org.apache.sysml.runtime.instructions.InstructionUtils; +import org.apache.sysml.runtime.matrix.operators.BinaryOperator; +import org.apache.sysml.runtime.util.ConvolutionUtils; + +/** + * This class contains the different implementation of rotate180 operation + */ +public class LibMatrixDNNRelu +{ + private static BinaryOperator PLUS = new BinaryOperator(Plus.getPlusFnObject()); + + + /** + * Factory method that returns list of callable tasks for performing relu backward operation + * + * @param params convolution parameters + * @return list of callable tasks for performing relu backward operation + * @throws DMLRuntimeException if error occurs + */ + public static ArrayList<Callable<Long>> getReluBackwardWorkers(ConvolutionParameters params) throws DMLRuntimeException { + ArrayList<Callable<Long>> ret = new ArrayList<>(); + int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads); + int taskSize = (int)(Math.ceil((double)params.N / k)); + for(int i = 0; i*taskSize < params.N; i++) + ret.add(new ReluBackward(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); + return ret; + } + + /** + * Performs the operation: (X gt 0) * dout + */ + public static class ReluBackward implements Callable<Long> + { + public int _rl; public int _ru; + private final ConvolutionParameters _params; + double [] outputArray; int numOutCols; + public ReluBackward(int rl, int ru, ConvolutionParameters params) { + _rl = rl; _ru = ru; + _params = params; + outputArray= params.output.getDenseBlockValues(); + numOutCols = params.input1.getNumColumns(); + } + + @Override + public Long call() throws Exception { + if(!_params.input1.isInSparseFormat() && !_params.input2.isInSparseFormat()) { + double [] inputArr = _params.input1.getDenseBlockValues(); + double [] doutArr = _params.input2.getDenseBlockValues(); + for(int i = _rl*numOutCols; i < _ru*numOutCols; i++) { + outputArray[i] = inputArr[i] > 0 ? doutArr[i] : 0; + } + } + else { + // Perform (X > 0) + ConvolutionUtils.scalarOperations(_params.input1, outputArray, _rl*numOutCols, numOutCols, _rl, _ru, + InstructionUtils.parseScalarBinaryOperator(">", false, 0)); + // Then perform (X > 0) * dout + ConvolutionUtils.binaryOperationInPlace(_params.input2, outputArray, _rl*numOutCols, numOutCols, _rl, _ru, PLUS); + } + return 0L; + } + } +} http://git-wip-us.apache.org/repos/asf/systemml/blob/45eec2d2/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180.java new file mode 100644 index 0000000..b463794 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sysml.runtime.matrix.data; + +import org.apache.sysml.runtime.matrix.data.LibMatrixDNNHelper.CellIndex3; + +/** + * This class contains the different implementation of rotate180 operation + */ +public class LibMatrixDNNRotate180 +{ + public static interface Rotate180Worker { + public void execute(int inputN, int outputN); + public static Rotate180Worker getWorker(MatrixBlock in, MatrixBlock out, + ConvolutionParameters params, boolean zeroOutSparseOutput, boolean trans) { + if(!in.isInSparseFormat()) + return new DenseRotate180Worker(in, out.getDenseBlockValues(), params); + else + return new SparseRotate180Worker(in, out, params, trans); + } + } + + /** + * Performing dense rotate180 (general case) + */ + private static class DenseRotate180Worker implements Rotate180Worker { + private final double[] inputArray, outputArray; + private final ConvolutionParameters params; + public DenseRotate180Worker(MatrixBlock input, double[] outputArray, ConvolutionParameters params) { + this.outputArray = outputArray; + this.params = params; + inputArray = input.getDenseBlockValues(); + if(inputArray == null || outputArray == null) + throw new RuntimeException("Incorrect usage: empty inputs"); + } + + @Override + public void execute(int inputN, int outputN) { + int outputOffset = outputN*params.K*params.P*params.Q; + for (int k = 0; k < params.K; k++) { + for (int p = 0; p < params.P; p++) { + for (int q = 0; q < params.Q; q++) { + outputArray[outputOffset + p*params.Q*params.K + q*params.K + k] = + inputArray[inputN*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q + q]; + } + } + } + } + } + + /** + * Performing rotate180 when input is sparse (general case) + * + * Why are we allocating the output of rotate180 in dense format ? + * Because the number of rows of output (i.e. NPQ) is much larger than number of columns (i.e. K) + */ + private static class SparseRotate180Worker implements Rotate180Worker { + private final MatrixBlock in, out; + private final ConvolutionParameters params; + private final boolean trans; + + public SparseRotate180Worker(MatrixBlock input, MatrixBlock output, + ConvolutionParameters params, boolean trans) { + this.in = input; + this.out = output; + this.params = params; + this.trans = trans; + } + + @Override + public void execute(int inputN, int outputN) { + out.reset(); + + SparseBlock sblock = in.sparseBlock; + if( sblock==null || sblock.isEmpty(inputN) ) + return; + + CellIndex3 ix = new CellIndex3(); + int outputOffset = outputN*params.P*params.Q; + int apos = sblock.pos(inputN); + int alen = sblock.size(inputN); + int[] aix = sblock.indexes(inputN); + double[] avals = sblock.values(inputN); + for(int j = apos; j < apos+alen; j++) { + ix = LibMatrixDNNHelper.computeTensorIndexes(aix[j], params.P, params.Q, ix); + if( trans ) + out.appendValue(ix.ix1, outputOffset + ix.ix2*params.Q + ix.ix3, avals[j]); + else + out.appendValue(outputOffset + ix.ix2*params.Q + ix.ix3, ix.ix1, avals[j]); + } + } + } +} http://git-wip-us.apache.org/repos/asf/systemml/blob/45eec2d2/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180Helper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180Helper.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180Helper.java deleted file mode 100644 index 74e2baa..0000000 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNRotate180Helper.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sysml.runtime.matrix.data; - -import org.apache.sysml.runtime.matrix.data.LibMatrixDNNHelper.CellIndex3; - -/** - * This class contains the different implementation of rotate180 operation - */ -public class LibMatrixDNNRotate180Helper { - - static interface Rotate180Worker { - public void execute(int inputN, int outputN); - public static Rotate180Worker getWorker(MatrixBlock in, MatrixBlock out, - ConvolutionParameters params, boolean zeroOutSparseOutput, boolean trans) { - if(!in.isInSparseFormat()) - return new DenseRotate180Worker(in, out.getDenseBlockValues(), params); - else - return new SparseRotate180Worker(in, out, params, trans); - } - } - - /** - * Performing dense rotate180 (general case) - */ - static class DenseRotate180Worker implements Rotate180Worker { - - double [] inputArray; double [] outputArray; - ConvolutionParameters params; - public DenseRotate180Worker(MatrixBlock input, double [] outputArray, ConvolutionParameters params) { - this.outputArray = outputArray; - this.params = params; - inputArray = input.getDenseBlockValues(); - if(inputArray == null || outputArray == null) - throw new RuntimeException("Incorrect usage: empty inputs"); - } - - @Override - public void execute(int inputN, int outputN) { - int outputOffset = outputN*params.K*params.P*params.Q; - for (int k = 0; k < params.K; k++) { - for (int p = 0; p < params.P; p++) { - for (int q = 0; q < params.Q; q++) { - outputArray[outputOffset + p*params.Q*params.K + q*params.K + k] = - inputArray[inputN*params.K*params.P*params.Q + k*params.P*params.Q + p*params.Q + q]; - } - } - } - } - } - - /** - * Performing rotate180 when input is sparse (general case) - * - * Why are we allocating the output of rotate180 in dense format ? - * Because the number of rows of output (i.e. NPQ) is much larger than number of columns (i.e. K) - */ - static class SparseRotate180Worker implements Rotate180Worker { - private final MatrixBlock in, out; - private final ConvolutionParameters params; - private final boolean trans; - - public SparseRotate180Worker(MatrixBlock input, MatrixBlock output, - ConvolutionParameters params, boolean trans) { - this.in = input; - this.out = output; - this.params = params; - this.trans = trans; - } - - @Override - public void execute(int inputN, int outputN) { - out.reset(); - - SparseBlock sblock = in.sparseBlock; - if( sblock==null || sblock.isEmpty(inputN) ) - return; - - CellIndex3 ix = new CellIndex3(); - int outputOffset = outputN*params.P*params.Q; - int apos = sblock.pos(inputN); - int alen = sblock.size(inputN); - int[] aix = sblock.indexes(inputN); - double[] avals = sblock.values(inputN); - for(int j = apos; j < apos+alen; j++) { - ix = LibMatrixDNNHelper.computeTensorIndexes(aix[j], params.P, params.Q, ix); - if( trans ) - out.appendValue(ix.ix1, outputOffset + ix.ix2*params.Q + ix.ix3, avals[j]); - else - out.appendValue(outputOffset + ix.ix2*params.Q + ix.ix3, ix.ix1, avals[j]); - } - } - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/45eec2d2/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java index 36d3dc2..6119e95 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java @@ -1369,7 +1369,7 @@ public class LibMatrixMult int k2 = (ru==cd) ? alen : a.posFIndexGTE(i, ru); k2 = (k2>=0) ? apos+k2 : apos+alen; - if( b.isContiguous(aix[k1], aix[k2-1]) ) { + if( k1==k2 || b.isContiguous(aix[k1], aix[k2-1]) ) { double[] bvals = b.values(aix[k1]); int base = aix[k1]*n - b.pos(aix[k1]); //rest not aligned to blocks of 4 rows
