Repository: systemml Updated Branches: refs/heads/master d16cc7cf8 -> 54a11eed3
http://git-wip-us.apache.org/repos/asf/systemml/blob/54a11eed/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java index 906ef90..5d3a631 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNPooling.java @@ -24,6 +24,7 @@ import java.util.concurrent.Callable; import org.apache.sysml.hops.OptimizerUtils; import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.PoolingType; import org.apache.sysml.runtime.matrix.data.LibMatrixDNNHelper.CellIndex3; /** @@ -37,22 +38,23 @@ public class LibMatrixDNNPooling { // Also, they simplify the design of LibMatrixDNN and help in code-maintenance. /** - * Factory method that returns list of callable tasks for performing maxpooling operation + * Factory method that returns list of callable tasks for performing pooling operation * * @param params convolution parameters - * @return list of callable tasks for performing maxpooling operation + * @param poolType type of pooling + * @return list of callable tasks for performing pooling operation * @throws DMLRuntimeException if error occurs */ - public static ArrayList<Callable<Long>> getMaxPoolingWorkers(ConvolutionParameters params) throws DMLRuntimeException { + public static ArrayList<Callable<Long>> getPoolingWorkers(ConvolutionParameters params, PoolingType poolType) throws DMLRuntimeException { ArrayList<Callable<Long>> ret = new ArrayList<>(); // Try to create twice as many tasks as threads for improved load balance int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads); int taskSize = (int)(Math.ceil((double)params.N / k / 2)); for(int i = 0; i*taskSize < params.N; i++) { if(params.input1.isInSparseFormat()) - ret.add(new SparseMaxPooling(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); + ret.add(new SparsePooling(i*taskSize, Math.min((i+1)*taskSize, params.N), params, poolType)); else - ret.add(new DenseMaxPooling(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); + ret.add(new DensePooling(i*taskSize, Math.min((i+1)*taskSize, params.N), params, poolType)); } return ret; } @@ -62,37 +64,54 @@ public class LibMatrixDNNPooling { * * @param params convolution parameters * @param performReluBackward whether to perform ReLU backward + * @param poolType type of pooling operation to perform * @return list of callable tasks for performing maxpooling backward operation * @throws DMLRuntimeException if error occurs */ - public static ArrayList<Callable<Long>> getMaxPoolingBackwardWorkers(ConvolutionParameters params, boolean performReluBackward) throws DMLRuntimeException { + public static ArrayList<Callable<Long>> getPoolingBackwardWorkers(ConvolutionParameters params, boolean performReluBackward, PoolingType poolType) throws DMLRuntimeException { ArrayList<Callable<Long>> ret = new ArrayList<>(); // Try to create twice as many tasks as threads for improved load balance int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads); int taskSize = (int)(Math.ceil((double)params.N / k / 2)); - boolean sparse1 = params.input1.isInSparseFormat(); - boolean sparse2 = params.input2.isInSparseFormat(); - for(int i = 0; i*taskSize < params.N; i++) { - if( !sparse1 && !sparse2 ) - ret.add(new PoolingBackwardDenseDense(i*taskSize, Math.min((i+1)*taskSize, params.N), params, performReluBackward)); - else if( !sparse1 && sparse2 ) - ret.add(new PoolingBackwardDenseSparse(i*taskSize, Math.min((i+1)*taskSize, params.N), params, performReluBackward)); - else if( sparse1 && !sparse2 ) - ret.add(new PoolingBackwardSparseDense(i*taskSize, Math.min((i+1)*taskSize, params.N), params, performReluBackward)); - else if( sparse1 && sparse2 ) - ret.add(new PoolingBackwardSparseSparse(i*taskSize, Math.min((i+1)*taskSize, params.N), params, performReluBackward)); + if(poolType == PoolingType.MAX) { + boolean sparse1 = params.input1.isInSparseFormat(); + boolean sparse2 = params.input2.isInSparseFormat(); + for(int i = 0; i*taskSize < params.N; i++) { + if( !sparse1 && !sparse2 ) + ret.add(new PoolingBackwardDenseDense(i*taskSize, Math.min((i+1)*taskSize, params.N), params, performReluBackward)); + else if( !sparse1 && sparse2 ) + ret.add(new PoolingBackwardDenseSparse(i*taskSize, Math.min((i+1)*taskSize, params.N), params, performReluBackward)); + else if( sparse1 && !sparse2 ) + ret.add(new PoolingBackwardSparseDense(i*taskSize, Math.min((i+1)*taskSize, params.N), params, performReluBackward)); + else if( sparse1 && sparse2 ) + ret.add(new PoolingBackwardSparseSparse(i*taskSize, Math.min((i+1)*taskSize, params.N), params, performReluBackward)); + } + } + else { + boolean sparse = params.input2.isInSparseFormat(); + for(int i = 0; i*taskSize < params.N; i++) { + if( !sparse ) + ret.add(new AvgPoolingBackwardDense(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); + else + ret.add(new AvgPoolingBackwardSparse(i*taskSize, Math.min((i+1)*taskSize, params.N), params)); + } } return ret; } - private static class DenseMaxPooling implements Callable<Long> + private static class DensePooling implements Callable<Long> { private final int _rl, _ru; private final ConvolutionParameters _params; + + private final PoolingType _poolingType; + private final double _poolingMultiplier; - public DenseMaxPooling(int rl, int ru, ConvolutionParameters params) { + public DensePooling(int rl, int ru, ConvolutionParameters params, PoolingType poolingType) { _rl = rl; _ru = ru; _params = params; + _poolingType = poolingType; + _poolingMultiplier = Math.pow(params.R*params.S, -1); } @Override @@ -105,7 +124,7 @@ public class LibMatrixDNNPooling { double[] in = _params.input1.getDenseBlockValues(); double[] out = _params.output.getDenseBlockValues(); - double minValForMaxPoolOperations = _params.minValForMaxPoolOperations; + double minValForMaxPoolOperations = _poolingType == PoolingType.AVG ? 0 : _params.minValForMaxPoolOperations; //thread-local initialization of output block if( !(_params.isStride1Pad0() && _params.isAllOnes(P, Q, W)) ) @@ -115,28 +134,56 @@ public class LibMatrixDNNPooling { //quick-path w/o materialized index arrays and //simplified inner loops for P = 1, Q = 1, W = 1 int lenh = Math.min(R,H); - for(int i = _rl, oix=_rl*C; i < _ru; i++, oix+=C) - for (int c = 0, off=i*CHW; c < C; c++, off+=H) - out[oix+c] = max(minValForMaxPoolOperations, in, off, lenh); + if(_poolingType == PoolingType.AVG) { + for(int i = _rl, oix=_rl*C; i < _ru; i++, oix+=C) + for (int c = 0, off=i*CHW; c < C; c++, off+=H) + out[oix+c] = avg_pool(minValForMaxPoolOperations, in, off, lenh, _poolingMultiplier); + } + else { + for(int i = _rl, oix=_rl*C; i < _ru; i++, oix+=C) + for (int c = 0, off=i*CHW; c < C; c++, off+=H) + out[oix+c] = max(minValForMaxPoolOperations, in, off, lenh); + } } else if( _params.isStride1Pad0() ) { - //quick-path w/o materialized index arrays - for(int i = _rl; i < _ru; i++) - for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) - for (int p = 0; p < P; p++, oix+=Q) - for (int h = p; h < Math.min(p+R,H); h++) - for (int q = 0, off2=off+h*W; q < Q; q++) - out[oix+q] = max(out[oix+q], in, off2+q, Math.min(S,W-q)); + if(_poolingType == PoolingType.AVG) { + //quick-path w/o materialized index arrays + for(int i = _rl; i < _ru; i++) + for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) + for (int p = 0; p < P; p++, oix+=Q) + for (int h = p; h < Math.min(p+R,H); h++) + for (int q = 0, off2=off+h*W; q < Q; q++) + out[oix+q] = avg_pool(out[oix+q], in, off2+q, Math.min(S,W-q), _poolingMultiplier); + } + else { + //quick-path w/o materialized index arrays + for(int i = _rl; i < _ru; i++) + for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) + for (int p = 0; p < P; p++, oix+=Q) + for (int h = p; h < Math.min(p+R,H); h++) + for (int q = 0, off2=off+h*W; q < Q; q++) + out[oix+q] = max(out[oix+q], in, off2+q, Math.min(S,W-q)); + } } else { //general case int[] hl = _params.start_indexes_h, hu = _params.end_indexes_h; int[] wl = _params.start_indexes_w, wu = _params.end_indexes_w; - for(int i = _rl; i < _ru; i++) - for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) - for (int p = 0; p < P; p++, oix+=Q) - for (int h = hl[p]; h < hu[p]; h++) - for (int q = 0, off2=off+h*W; q < Q; q++) - out[oix+q] = max(out[oix+q], in, off2+wl[q], wu[q]-wl[q]); + if(_poolingType == PoolingType.AVG) { + for(int i = _rl; i < _ru; i++) + for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) + for (int p = 0; p < P; p++, oix+=Q) + for (int h = hl[p]; h < hu[p]; h++) + for (int q = 0, off2=off+h*W; q < Q; q++) + out[oix+q] = avg_pool(out[oix+q], in, off2+wl[q], wu[q]-wl[q], _poolingMultiplier); + } + else { + for(int i = _rl; i < _ru; i++) + for (int c = 0, off=i*CHW, oix=i*CPQ; c < C; c++, off+=HW) + for (int p = 0; p < P; p++, oix+=Q) + for (int h = hl[p]; h < hu[p]; h++) + for (int q = 0, off2=off+h*W; q < Q; q++) + out[oix+q] = max(out[oix+q], in, off2+wl[q], wu[q]-wl[q]); + } } //thread-local recomputation of non-zeros @@ -144,14 +191,16 @@ public class LibMatrixDNNPooling { } } - private static class SparseMaxPooling implements Callable<Long> + private static class SparsePooling implements Callable<Long> { private final int _rl, _ru; private final ConvolutionParameters _params; private double [] outputArray; private final int C, P, Q, W, H, CPQ, PQ; + private final PoolingType _poolingType; + private final double _poolingMultiplier; - public SparseMaxPooling(int rl, int ru, ConvolutionParameters params) { + public SparsePooling(int rl, int ru, ConvolutionParameters params, PoolingType poolingType) { _rl = rl; _ru = ru; _params = params; outputArray = params.output.getDenseBlockValues(); @@ -159,12 +208,15 @@ public class LibMatrixDNNPooling { W = params.W; CPQ = C*P*Q; PQ = P*Q; + _poolingType = poolingType; + _poolingMultiplier = Math.pow(params.R*params.S, -1); } @Override public Long call() throws Exception { //thread-local initialization of output block - Arrays.fill(outputArray, _rl *CPQ, _ru*CPQ, _params.minValForMaxPoolOperations); + if(_poolingType == PoolingType.MAX) + Arrays.fill(outputArray, _rl *CPQ, _ru*CPQ, _params.minValForMaxPoolOperations); for(int n = _rl; n < _ru; n++) { if( !_params.input1.sparseBlock.isEmpty(n) ) { @@ -184,17 +236,31 @@ public class LibMatrixDNNPooling { // Ensure that we satisfy the condition index < apos+alen if(index >= apos+alen) index--; } - // Perform maxpooling without binary search :) - // Tradeoff as compared to dense maxpooling: - // In dense maxpooling, iteration space CPQHW where H and W iterations are restricted by _params.start_indexes_h[p] - // and are eligible for JIT optimizations. - // In sparse maxpooling, iteration space CHWPQ without HW restrictions. - for (int p = 0; p < P; p++) { - if(h >= _params.start_indexes_h[p] && h < _params.end_indexes_h[p]) { - final int outOffsetWithp = outOffset + p*Q; - for (int q = 0; q < Q; q++) { - if(w >= _params.start_indexes_w[q] && w < _params.end_indexes_w[q]) { - outputArray[outOffsetWithp + q] = Math.max(outputArray[outOffsetWithp + q], nchwVal); + if(_poolingType == PoolingType.MAX) { + // Perform maxpooling without binary search :) + // Tradeoff as compared to dense maxpooling: + // In dense maxpooling, iteration space CPQHW where H and W iterations are restricted by _params.start_indexes_h[p] + // and are eligible for JIT optimizations. + // In sparse maxpooling, iteration space CHWPQ without HW restrictions. + for (int p = 0; p < P; p++) { + if(h >= _params.start_indexes_h[p] && h < _params.end_indexes_h[p]) { + final int outOffsetWithp = outOffset + p*Q; + for (int q = 0; q < Q; q++) { + if(w >= _params.start_indexes_w[q] && w < _params.end_indexes_w[q]) { + outputArray[outOffsetWithp + q] = Math.max(outputArray[outOffsetWithp + q], nchwVal); + } + } + } + } + } + else { + for (int p = 0; p < P; p++) { + if(h >= _params.start_indexes_h[p] && h < _params.end_indexes_h[p]) { + final int outOffsetWithp = outOffset + p*Q; + for (int q = 0; q < Q; q++) { + if(w >= _params.start_indexes_w[q] && w < _params.end_indexes_w[q]) { + outputArray[outOffsetWithp + q] += _poolingMultiplier*nchwVal; + } } } } @@ -217,6 +283,57 @@ public class LibMatrixDNNPooling { //BACKWARD /** + * Performs the avgpooling backward operation for dense error (dout) + */ + private static class AvgPoolingBackwardDense implements Callable<Long> + { + public int _rl; public int _ru; + private final ConvolutionParameters _params; + double [] doutArray; + MatrixBlock output; + final int C; final int CHW; final int P; final int Q; final int HW; final int CPQ; final int PQ; + final double _poolingMultiplier; + public AvgPoolingBackwardDense(int rl, int ru, ConvolutionParameters params) { + _rl = rl; _ru = ru; + _params = params; + doutArray = params.input2.getDenseBlockValues(); + output = params.output; + C = params.C; CHW = params.C*params.H*params.W; HW = params.H*params.W; + P = params.P; Q = params.Q; CPQ = params.C*params.P*params.Q; + PQ = params.P*params.Q; + _poolingMultiplier = Math.pow(params.R*params.S, -1); + if (doutArray == null || output.getDenseBlock() == null ) + throw new RuntimeException("Incorrect usage: empty inputs"); + } + + @Override + public Long call() throws Exception { + double[] out = output.getDenseBlockValues(); + for(int n = _rl; n < _ru; n++) { + for (int c = 0; c < C; c++) { + final int inputOffset = n*CHW + c*HW; + final int outputOffset = n*CPQ + c*PQ; + for (int p = 0; p < P; p++) { + for (int q = 0; q < Q; q++) { + int start_index_h = _params.start_indexes_h[p]; + int end_index_h = _params.end_indexes_h[p]; + int start_index_w = _params.start_indexes_w[q]; + int end_index_w = _params.end_indexes_w[q]; + for (int h = start_index_h; h < end_index_h; h++) { + for (int w = start_index_w; w < end_index_w; w++) { + out[inputOffset + h*_params.W + w] += _poolingMultiplier*doutArray[outputOffset + p * Q + q]; + } + } + } + } + } + } + //thread-local nnz maintenance + return output.recomputeNonZeros(_rl, _ru-1); + } + } + + /** * Performs the maxpooling backward operation for dense input and dense error (dout) */ private static class PoolingBackwardDenseDense implements Callable<Long> @@ -314,6 +431,61 @@ public class LibMatrixDNNPooling { } /** + * Performs the avgpooling backward operation for sparse error (dout) + */ + private static class AvgPoolingBackwardSparse implements Callable<Long> + { + public int _rl; public int _ru; + private final ConvolutionParameters _params; + MatrixBlock output; + MatrixBlock dout; + int CHW; int P; int Q; int HW; final double _poolingMultiplier; + public AvgPoolingBackwardSparse(int rl, int ru, ConvolutionParameters params) { + _rl = rl; _ru = ru; + _params = params; + dout = params.input2; + output = params.output; + CHW = params.C*params.H*params.W; HW = params.H*params.W; + P = params.P; Q = params.Q; + _poolingMultiplier = Math.pow(params.R*params.S, -1); + if (output.getDenseBlock() == null ) + throw new RuntimeException("Incorrect usage: empty inputs"); + } + + @Override + public Long call() throws Exception { + CellIndex3 ix = new CellIndex3(); + double[] out = output.getDenseBlockValues(); + SparseBlock sblock = dout.sparseBlock; + for(int n = _rl; n < _ru; n++) { + if( sblock.isEmpty(n) ) continue; + int apos = sblock.pos(n); + int alen = sblock.size(n); + int[] aix = sblock.indexes(n); + double[] avals = sblock.values(n); + for(int j = apos; j < apos+alen; j++) { + ix = LibMatrixDNNHelper.computeTensorIndexes(aix[j], P, Q, ix); + int c = ix.ix1; + int p = ix.ix2; + int q = ix.ix3; + final int inputOffset = n*CHW + c*HW; + int start_index_h = _params.start_indexes_h[p]; + int end_index_h = _params.end_indexes_h[p]; + int start_index_w = _params.start_indexes_w[q]; + int end_index_w = _params.end_indexes_w[q]; + for (int h = start_index_h; h < end_index_h; h++) { + for (int w = start_index_w; w < end_index_w; w++) { + out[inputOffset + h*_params.W + w] += _poolingMultiplier*avals[j]; + } + } + } + } + //thread-local nnz maintenance + return output.recomputeNonZeros(_rl, _ru-1); + } + } + + /** * Performs the maxpooling backward operation for sparse input and dense error (dout) */ private static class PoolingBackwardSparseDense implements Callable<Long> @@ -486,6 +658,13 @@ public class LibMatrixDNNPooling { } } + private static double avg_pool(final double aval, double[] b, final int bi, final int len, final double poolingMultiplier) { + double ret = aval; + for( int i = bi; i < bi+len; i++ ) + ret += poolingMultiplier*b[i]; + return ret; + } + private static double max(final double aval, double[] b, final int bi, final int len) { double ret = aval; for( int i = bi; i < bi+len; i++ ) http://git-wip-us.apache.org/repos/asf/systemml/blob/54a11eed/src/main/proto/caffe/caffe.proto ---------------------------------------------------------------------- diff --git a/src/main/proto/caffe/caffe.proto b/src/main/proto/caffe/caffe.proto index c5a5799..8d1d796 100644 --- a/src/main/proto/caffe/caffe.proto +++ b/src/main/proto/caffe/caffe.proto @@ -405,6 +405,9 @@ message LayerParameter { optional ThresholdParameter threshold_param = 128; optional TileParameter tile_param = 138; optional WindowDataParameter window_data_param = 129; + + // Nike: + optional UpsampleParameter upsample_param = 147; } // Message that stores parameters used to apply transformation @@ -1173,6 +1176,12 @@ message ThresholdParameter { optional float threshold = 1 [default = 0]; // Strictly positive values } +// Nike: +message UpsampleParameter { + optional uint32 size_h = 6; // upsampling factors for rows + optional uint32 size_w = 7; // upsampling factors for columns +} + message WindowDataParameter { // Specify the data source. optional string source = 1; http://git-wip-us.apache.org/repos/asf/systemml/blob/54a11eed/src/main/python/systemml/mllearn/keras2caffe.py ---------------------------------------------------------------------- diff --git a/src/main/python/systemml/mllearn/keras2caffe.py b/src/main/python/systemml/mllearn/keras2caffe.py index ac3ba80..3f4bddf 100755 --- a/src/main/python/systemml/mllearn/keras2caffe.py +++ b/src/main/python/systemml/mllearn/keras2caffe.py @@ -23,7 +23,7 @@ import numpy as np -import os +import os, math from itertools import chain, imap from ..converters import * from ..classloader import * @@ -60,6 +60,7 @@ supportedLayers = { keras.layers.Concatenate: 'Concat', keras.layers.Conv2DTranspose: 'Deconvolution', keras.layers.Conv2D: 'Convolution', + keras.layers.UpSampling2D: 'Upsample', keras.layers.MaxPooling2D: 'Pooling', keras.layers.AveragePooling2D: 'Pooling', keras.layers.SimpleRNN: 'RNN', @@ -163,6 +164,8 @@ def getConvParam(layer): config = layer.get_config() return {'num_output':layer.filters,'bias_term':str(config['use_bias']).lower(),'kernel_h':layer.kernel_size[0], 'kernel_w':layer.kernel_size[1], 'stride_h':stride[0],'stride_w':stride[1],'pad_h':padding[0], 'pad_w':padding[1]} +def getUpSamplingParam(layer): + return { 'size_h':layer.size[0], 'size_w':layer.size[1] } def getPoolingParam(layer, pool='MAX'): stride = (1, 1) if layer.strides is None else layer.strides @@ -192,12 +195,14 @@ layerParamMapping = { {'concat_param': {'axis': _getCompensatedAxis(l)}}, keras.layers.Conv2DTranspose: lambda l: \ {'convolution_param': getConvParam(l)}, + keras.layers.UpSampling2D: lambda l: \ + {'upsample_param': getUpSamplingParam(l)}, keras.layers.Conv2D: lambda l: \ {'convolution_param': getConvParam(l)}, keras.layers.MaxPooling2D: lambda l: \ {'pooling_param': getPoolingParam(l, 'MAX')}, keras.layers.AveragePooling2D: lambda l: \ - {'pooling_param': getPoolingParam(l, 'MAX')}, + {'pooling_param': getPoolingParam(l, 'AVE')}, keras.layers.SimpleRNN: lambda l: \ {'recurrent_param': getRecurrentParam(l)}, keras.layers.LSTM: lambda l: \ http://git-wip-us.apache.org/repos/asf/systemml/blob/54a11eed/src/main/python/tests/test_nn_numpy.py ---------------------------------------------------------------------- diff --git a/src/main/python/tests/test_nn_numpy.py b/src/main/python/tests/test_nn_numpy.py index 4ecf878..6fd190c 100644 --- a/src/main/python/tests/test_nn_numpy.py +++ b/src/main/python/tests/test_nn_numpy.py @@ -38,7 +38,7 @@ import unittest import numpy as np from keras.models import Sequential -from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten, LSTM +from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten, LSTM, UpSampling2D, SimpleRNN from keras import backend as K from keras.models import Model from systemml.mllearn import Keras2DML @@ -54,15 +54,29 @@ tmp_dir = 'tmp_dir' spark = SparkSession.builder.getOrCreate() -def are_predictions_all_close(keras_model): +def are_predictions_all_close(keras_model, rtol=1e-05, atol=1e-08): sysml_model = Keras2DML(spark, keras_model, input_shape=input_shape, weights=tmp_dir) keras_preds = keras_model.predict(keras_tensor).flatten() sysml_preds = sysml_model.predict_proba(sysml_matrix).flatten() #print(str(keras_preds)) #print(str(sysml_preds)) - return np.allclose(keras_preds, sysml_preds) + return np.allclose(keras_preds, sysml_preds, rtol=rtol, atol=atol) class TestNNLibrary(unittest.TestCase): + def test_1layer_upsample_predictions1(self): + keras_model = Sequential() + keras_model.add(UpSampling2D(size=(2, 2), input_shape=input_shape)) + keras_model.add(Flatten()) + keras_model.add(Dense(10, activation='softmax')) + self.failUnless(are_predictions_all_close(keras_model, atol=1e-06)) + + def test_1layer_upsample_predictions2(self): + keras_model = Sequential() + keras_model.add(UpSampling2D(size=(2, 3), input_shape=input_shape)) + keras_model.add(Flatten()) + keras_model.add(Dense(10, activation='softmax')) + self.failUnless(are_predictions_all_close(keras_model, atol=1e-06)) + def test_1layer_cnn_predictions(self): keras_model = Sequential() keras_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape, padding='valid')) http://git-wip-us.apache.org/repos/asf/systemml/blob/54a11eed/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala ---------------------------------------------------------------------- diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala index 9b4736a..9aad7b3 100644 --- a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala +++ b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala @@ -28,6 +28,7 @@ import org.apache.sysml.runtime.util.ConvolutionUtils import caffe.Caffe.EltwiseParameter.EltwiseOp import org.apache.sysml.runtime.DMLRuntimeException; import java.util.ArrayList +import caffe.Caffe.PoolingParameter.PoolMethod trait CaffeLayer extends BaseDMLGenerator { // ------------------------------------------------- @@ -713,6 +714,54 @@ class ReLU(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extend // ------------------------------------------------- } +class Upsample(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends CaffeLayer { + // ------------------------------------------------- + override def sourceFileName = "upsample2d" + override def init(dmlScript: StringBuilder) = {} + /* + * Computes the forward pass for a Upsampling layer. + * + * + * Inputs: + * - X: Inputs, of shape (any, any). + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - size_h: upsampling factor for rows. + * - size_w: upsampling factor for columns. + * + * Outputs: + * - out: Outputs, of same shape as `X`. + */ + override def forward(dmlScript: StringBuilder, isPrediction: Boolean) = + invokeForward(dmlScript, List[String](out), X, num_channels, Hin, Win, size_h, size_w) + /* + * Computes the backward pass for a Upsampling layer. + * + * Inputs: + * - dout: Gradient wrt `out` from upstream. + * - C: Number of input channels (dimensionality of input depth). + * - Hin: Input height. + * - Win: Input width. + * - size_h: upsampling factor for rows. + * - size_w: upsampling factor for columns. + * + * Outputs: + * - dX: Gradient wrt `X`, of same shape as `X`. + */ + override def backward(dmlScript: StringBuilder, outSuffix: String) = + invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id), dout, num_channels, Hin, Win, size_h, size_w) + override def weightShape(): Array[Int] = null + override def biasShape(): Array[Int] = null + def size_h(): String = param.getUpsampleParam.getSizeH.toString + def size_w(): String = param.getUpsampleParam.getSizeW.toString + def num_channels():String = bottomLayerOutputShape._1 + def Hin():String = bottomLayerOutputShape._2 + def Win():String = bottomLayerOutputShape._3 + override def outputShape = (num_channels, int_mult(size_h, bottomLayerOutputShape._2), int_mult(size_w, bottomLayerOutputShape._3)) + // ------------------------------------------------- +} + class Softmax(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends CaffeLayer { // ------------------------------------------------- override def sourceFileName = "softmax" @@ -989,7 +1038,7 @@ class LSTM(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extend class MaxPooling(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends CaffeLayer { // ------------------------------------------------- - override def sourceFileName = "max_pool2d_builtin" + override def sourceFileName = if(param.getPoolingParam.getPool == PoolMethod.AVE) "avg_pool2d_builtin" else "max_pool2d_builtin"; override def init(dmlScript: StringBuilder) = {} /* * Computes the forward pass for a 2D spatial max pooling layer. http://git-wip-us.apache.org/repos/asf/systemml/blob/54a11eed/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala ---------------------------------------------------------------------- diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala index dd2597c..297176f 100644 --- a/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala +++ b/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala @@ -225,13 +225,14 @@ class CaffeNetwork(netFilePath: String, val currentPhase: Phase, var numChannels else m.containsKey(key) private def convertLayerParameterToCaffeLayer(param: LayerParameter): CaffeLayer = { id = id + 1 - param.getType.toLowerCase() match { + param.getType.toLowerCase match { case "convolution" => new Convolution(param, id, this) case "pooling" => - if (param.getPoolingParam.getPool == PoolingParameter.PoolMethod.MAX) new MaxPooling(param, id, this) - else throw new LanguageException("Only maxpooling is supported:" + param.getPoolingParam.getPool.name) + if (param.getPoolingParam.getPool == PoolingParameter.PoolMethod.MAX || param.getPoolingParam.getPool == PoolingParameter.PoolMethod.AVE) new MaxPooling(param, id, this) + else throw new LanguageException("Only max/avg pooling is supported:" + param.getPoolingParam.getPool.name) case "innerproduct" => new InnerProduct(param, id, this) case "relu" => new ReLU(param, id, this) + case "upsample" => new Upsample(param, id, this) case "tanh" => new TanH(param, id, this) case "sigmoid" => new Sigmoid(param, id, this) case "softmaxwithloss" => new SoftmaxWithLoss(param, id, this) http://git-wip-us.apache.org/repos/asf/systemml/blob/54a11eed/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala ---------------------------------------------------------------------- diff --git a/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala b/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala index efe5c00..60396f1 100644 --- a/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala +++ b/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala @@ -49,6 +49,8 @@ trait BaseDMLGenerator { try { (v1.toDouble + v2.toDouble).toInt.toString } catch { case _: Throwable => "(" + v1 + "+" + v2 + ")" } def int_mult(v1: String, v2: String, v3: String): String = try { (v1.toDouble * v2.toDouble * v3.toDouble).toInt.toString } catch { case _: Throwable => "(" + v1 + "*" + v2 + "*" + v3 + ")" } + def int_mult(v1: String, v2: String): String = + try { (v1.toDouble * v2.toDouble).toInt.toString } catch { case _: Throwable => "(" + v1 + "*" + v2 + ")" } def isNumber(x: String): Boolean = x forall Character.isDigit def transpose(x: String): String = "t(" + x + ")" def write(varName: String, fileName: String, format: String): String = "write(" + varName + ", \"" + fileName + "\", format=\"" + format + "\")\n"
