Repository: systemml Updated Branches: refs/heads/master ddc3672f1 -> 9e7ee19a4
http://git-wip-us.apache.org/repos/asf/systemml/blob/9e7ee19a/src/main/python/systemml/mllearn/keras2caffe.py ---------------------------------------------------------------------- diff --git a/src/main/python/systemml/mllearn/keras2caffe.py b/src/main/python/systemml/mllearn/keras2caffe.py index 3f4bddf..6e1e9c3 100755 --- a/src/main/python/systemml/mllearn/keras2caffe.py +++ b/src/main/python/systemml/mllearn/keras2caffe.py @@ -23,7 +23,8 @@ import numpy as np -import os, math +import os +import math from itertools import chain, imap from ..converters import * from ..classloader import * @@ -36,7 +37,8 @@ try: from py4j.java_gateway import JavaObject from pyspark import SparkContext except ImportError: - raise ImportError('Unable to import `pyspark`. Hint: Make sure you are running with PySpark.') + raise ImportError( + 'Unable to import `pyspark`. Hint: Make sure you are running with PySpark.') # -------------------------------------------------------------------------------------- # Design Document: @@ -44,14 +46,17 @@ except ImportError: # # Part 1: Keras network to Caffe network conversion: # - Core logic: model.layers.flatMap(layer => _parseJSONObject(_parseKerasLayer(layer))) -# That is, for each layer, we first convert it into JSON format and then convert the JSON object into String +# That is, for each layer, we first convert it into JSON format and then convert the JSON object into String # - This is true for all the layers except the "specialLayers" (given in below hashmap). These are redirected to their custom parse function in _parseKerasLayer. # - To add an activation, simply add the keras type to caffe type in supportedCaffeActivations. # - To add a layer, add the corresponding caffe layer type in supportedLayers. If the layer accepts parameters then update layerParamMapping too. # - The above logic is implemented in the function converKerasToCaffeNetwork # -------------------------------------------------------------------------------------- -supportedCaffeActivations = {'relu':'ReLU', 'softmax':'Softmax', 'sigmoid':'Sigmoid' } +supportedCaffeActivations = { + 'relu': 'ReLU', + 'softmax': 'Softmax', + 'sigmoid': 'Sigmoid'} supportedLayers = { keras.layers.InputLayer: 'Data', keras.layers.Dense: 'InnerProduct', @@ -63,22 +68,26 @@ supportedLayers = { keras.layers.UpSampling2D: 'Upsample', keras.layers.MaxPooling2D: 'Pooling', keras.layers.AveragePooling2D: 'Pooling', - keras.layers.SimpleRNN: 'RNN', + keras.layers.SimpleRNN: 'RNN', keras.layers.LSTM: 'LSTM', - keras.layers.Flatten: 'None', + keras.layers.Flatten: 'None', keras.layers.BatchNormalization: 'None', keras.layers.Activation: 'None' - } +} + def _getInboundLayers(layer): in_names = [] # get inbound nodes to current layer (support newer as well as older APIs) - inbound_nodes = layer.inbound_nodes if hasattr(layer, 'inbound_nodes') else layer._inbound_nodes + inbound_nodes = layer.inbound_nodes if hasattr( + layer, 'inbound_nodes') else layer._inbound_nodes for node in inbound_nodes: node_list = node.inbound_layers # get layers pointing to this node in_names = in_names + node_list # For Caffe2DML to reroute any use of Flatten layers - return list(chain.from_iterable( [ _getInboundLayers(l) if isinstance(l, keras.layers.Flatten) else [ l ] for l in in_names ] )) + return list(chain.from_iterable([_getInboundLayers(l) if isinstance( + l, keras.layers.Flatten) else [l] for l in in_names])) + def _getCompensatedAxis(layer): compensated_axis = layer.axis @@ -91,265 +100,401 @@ def _getCompensatedAxis(layer): compensated_axis = 1 return compensated_axis -str_keys = [ 'name', 'type', 'top', 'bottom' ] + +str_keys = ['name', 'type', 'top', 'bottom'] + + def toKV(key, value): - return str(key) + ': "' + str(value) + '"' if key in str_keys else str(key) + ': ' + str(value) - + return str(key) + ': "' + str(value) + \ + '"' if key in str_keys else str(key) + ': ' + str(value) + def _parseJSONObject(obj): - rootName = obj.keys()[0] - ret = ['\n', rootName, ' {'] - for key in obj[rootName]: - if isinstance(obj[rootName][key], dict): - ret = ret + [ '\n\t', key, ' {' ] - for key1 in obj[rootName][key]: - ret = ret + [ '\n\t\t', toKV(key1, obj[rootName][key][key1]) ] - ret = ret + [ '\n\t', '}' ] - elif isinstance(obj[rootName][key], list): - for v in obj[rootName][key]: - ret = ret + ['\n\t', toKV(key, v) ] - else: - ret = ret + ['\n\t', toKV(key, obj[rootName][key]) ] - return ret + ['\n}' ] - + rootName = obj.keys()[0] + ret = ['\n', rootName, ' {'] + for key in obj[rootName]: + if isinstance(obj[rootName][key], dict): + ret = ret + ['\n\t', key, ' {'] + for key1 in obj[rootName][key]: + ret = ret + ['\n\t\t', toKV(key1, obj[rootName][key][key1])] + ret = ret + ['\n\t', '}'] + elif isinstance(obj[rootName][key], list): + for v in obj[rootName][key]: + ret = ret + ['\n\t', toKV(key, v)] + else: + ret = ret + ['\n\t', toKV(key, obj[rootName][key])] + return ret + ['\n}'] + def _getBottomLayers(layer): - return [ bottomLayer.name for bottomLayer in _getInboundLayers(layer) ] + return [bottomLayer.name for bottomLayer in _getInboundLayers(layer)] def _parseActivation(layer, customLayerName=None): - kerasActivation = keras.activations.serialize(layer.activation) - if kerasActivation not in supportedCaffeActivations: - raise TypeError('Unsupported activation ' + kerasActivation + ' for the layer:' + layer.name) - if customLayerName is not None: - return { 'layer':{'name':customLayerName, 'type':supportedCaffeActivations[kerasActivation], 'top':layer.name, 'bottom':layer.name }} - else: - return { 'layer':{'name':layer.name, 'type':supportedCaffeActivations[kerasActivation], 'top':layer.name, 'bottom':_getBottomLayers(layer) }} + kerasActivation = keras.activations.serialize(layer.activation) + if kerasActivation not in supportedCaffeActivations: + raise TypeError( + 'Unsupported activation ' + + kerasActivation + + ' for the layer:' + + layer.name) + if customLayerName is not None: + return {'layer': {'name': customLayerName, + 'type': supportedCaffeActivations[kerasActivation], 'top': layer.name, 'bottom': layer.name}} + else: + return {'layer': {'name': layer.name, + 'type': supportedCaffeActivations[kerasActivation], 'top': layer.name, 'bottom': _getBottomLayers(layer)}} def _shouldParseActivation(layer): - ignore_activation = [ keras.layers.SimpleRNN , keras.layers.LSTM ] - return hasattr(layer, 'activation') and (type(layer) not in ignore_activation) and keras.activations.serialize(layer.activation) != 'linear' + ignore_activation = [keras.layers.SimpleRNN, keras.layers.LSTM] + return hasattr(layer, 'activation') and (type( + layer) not in ignore_activation) and keras.activations.serialize(layer.activation) != 'linear' + def _parseKerasLayer(layer): - layerType = type(layer) - if layerType in specialLayers: - return specialLayers[layerType](layer) - elif layerType == keras.layers.Activation: - return [ _parseActivation(layer) ] - param = layerParamMapping[layerType](layer) - paramName = param.keys()[0] - if layerType == keras.layers.InputLayer: - ret = { 'layer': { 'name':layer.name, 'type':'Data', paramName:param[paramName], 'top':layer.name, 'top':'label' } } - else: - ret = { 'layer': { 'name':layer.name, 'type':supportedLayers[layerType], 'bottom':_getBottomLayers(layer), 'top':layer.name, paramName:param[paramName] } } - return [ ret, _parseActivation(layer, layer.name + '_activation') ] if _shouldParseActivation(layer) else [ ret ] + layerType = type(layer) + if layerType in specialLayers: + return specialLayers[layerType](layer) + elif layerType == keras.layers.Activation: + return [_parseActivation(layer)] + param = layerParamMapping[layerType](layer) + paramName = param.keys()[0] + if layerType == keras.layers.InputLayer: + ret = { + 'layer': { + 'name': layer.name, + 'type': 'Data', + paramName: param[paramName], + 'top': layer.name, + 'top': 'label'}} + else: + ret = { + 'layer': { + 'name': layer.name, + 'type': supportedLayers[layerType], + 'bottom': _getBottomLayers(layer), + 'top': layer.name, + paramName: param[paramName]}} + return [ret, _parseActivation( + layer, layer.name + '_activation')] if _shouldParseActivation(layer) else [ret] def _parseBatchNorm(layer): - bnName = layer.name + '_1' - config = layer.get_config() - bias_term = 'true' if config['center'] else 'false' - return [ { 'layer': { 'name':bnName, 'type':'BatchNorm', 'bottom':_getBottomLayers(layer), 'top':bnName, 'batch_norm_param':{'moving_average_fraction':layer.momentum, 'eps':layer.epsilon} } }, { 'layer': { 'name':layer.name, 'type':'Scale', 'bottom':bnName, 'top':layer.name, 'scale_param':{'bias_term':bias_term} } } ] + bnName = layer.name + '_1' + config = layer.get_config() + bias_term = 'true' if config['center'] else 'false' + return [{'layer': {'name': bnName, 'type': 'BatchNorm', 'bottom': _getBottomLayers(layer), 'top': bnName, 'batch_norm_param': {'moving_average_fraction': layer.momentum, 'eps': layer.epsilon}}}, { + 'layer': {'name': layer.name, 'type': 'Scale', 'bottom': bnName, 'top': layer.name, 'scale_param': {'bias_term': bias_term}}}] + # The special are redirected to their custom parse function in _parseKerasLayer specialLayers = { keras.layers.Flatten: lambda x: [], keras.layers.BatchNormalization: _parseBatchNorm - } - +} + + def getConvParam(layer): - stride = (1, 1) if layer.strides is None else layer.strides - padding = [layer.kernel_size[0] / 2, layer.kernel_size[1] / 2] if layer.padding == 'same' else [0, 0] - config = layer.get_config() - return {'num_output':layer.filters,'bias_term':str(config['use_bias']).lower(),'kernel_h':layer.kernel_size[0], 'kernel_w':layer.kernel_size[1], 'stride_h':stride[0],'stride_w':stride[1],'pad_h':padding[0], 'pad_w':padding[1]} + stride = (1, 1) if layer.strides is None else layer.strides + padding = [ + layer.kernel_size[0] / + 2, + layer.kernel_size[1] / + 2] if layer.padding == 'same' else [ + 0, + 0] + config = layer.get_config() + return {'num_output': layer.filters, 'bias_term': str(config['use_bias']).lower( + ), 'kernel_h': layer.kernel_size[0], 'kernel_w': layer.kernel_size[1], 'stride_h': stride[0], 'stride_w': stride[1], 'pad_h': padding[0], 'pad_w': padding[1]} + def getUpSamplingParam(layer): - return { 'size_h':layer.size[0], 'size_w':layer.size[1] } + return {'size_h': layer.size[0], 'size_w': layer.size[1]} + def getPoolingParam(layer, pool='MAX'): - stride = (1, 1) if layer.strides is None else layer.strides - padding = [layer.pool_size[0] / 2, layer.pool_size[1] / 2] if layer.padding == 'same' else [0, 0] - return {'pool':pool, 'kernel_h':layer.pool_size[0], 'kernel_w':layer.pool_size[1], 'stride_h':stride[0],'stride_w':stride[1],'pad_h':padding[0], 'pad_w':padding[1]} + stride = (1, 1) if layer.strides is None else layer.strides + padding = [ + layer.pool_size[0] / + 2, + layer.pool_size[1] / + 2] if layer.padding == 'same' else [ + 0, + 0] + return {'pool': pool, 'kernel_h': layer.pool_size[0], 'kernel_w': layer.pool_size[1], + 'stride_h': stride[0], 'stride_w': stride[1], 'pad_h': padding[0], 'pad_w': padding[1]} + def getRecurrentParam(layer): - if(not layer.use_bias): - raise Exception('Only use_bias=True supported for recurrent layers') - if(keras.activations.serialize(layer.activation) != 'tanh'): - raise Exception('Only tanh activation supported for recurrent layers') - if(layer.dropout != 0 or layer.recurrent_dropout != 0): - raise Exception('Only dropout not supported for recurrent layers') - return {'num_output': layer.units, 'return_sequences': str(layer.return_sequences).lower() } - -# TODO: Update AveragePooling2D when we add maxpooling support + if(not layer.use_bias): + raise Exception('Only use_bias=True supported for recurrent layers') + if(keras.activations.serialize(layer.activation) != 'tanh'): + raise Exception('Only tanh activation supported for recurrent layers') + if(layer.dropout != 0 or layer.recurrent_dropout != 0): + raise Exception('Only dropout not supported for recurrent layers') + return {'num_output': layer.units, 'return_sequences': str( + layer.return_sequences).lower()} + + +# TODO: Update AveragePooling2D when we add maxpooling support layerParamMapping = { - keras.layers.InputLayer: lambda l: \ - {'data_param': {'batch_size': l.batch_size}}, - keras.layers.Dense: lambda l: \ + keras.layers.InputLayer: lambda l: + {'data_param': {'batch_size': l.batch_size}}, + keras.layers.Dense: lambda l: {'inner_product_param': {'num_output': l.units}}, - keras.layers.Dropout: lambda l: \ + keras.layers.Dropout: lambda l: {'dropout_param': {'dropout_ratio': l.rate}}, - keras.layers.Add: lambda l: \ + keras.layers.Add: lambda l: {'eltwise_param': {'operation': 'SUM'}}, - keras.layers.Concatenate: lambda l: \ + keras.layers.Concatenate: lambda l: {'concat_param': {'axis': _getCompensatedAxis(l)}}, - keras.layers.Conv2DTranspose: lambda l: \ + keras.layers.Conv2DTranspose: lambda l: {'convolution_param': getConvParam(l)}, - keras.layers.UpSampling2D: lambda l: \ + keras.layers.UpSampling2D: lambda l: {'upsample_param': getUpSamplingParam(l)}, - keras.layers.Conv2D: lambda l: \ + keras.layers.Conv2D: lambda l: {'convolution_param': getConvParam(l)}, - keras.layers.MaxPooling2D: lambda l: \ + keras.layers.MaxPooling2D: lambda l: {'pooling_param': getPoolingParam(l, 'MAX')}, - keras.layers.AveragePooling2D: lambda l: \ + keras.layers.AveragePooling2D: lambda l: {'pooling_param': getPoolingParam(l, 'AVE')}, - keras.layers.SimpleRNN: lambda l: \ + keras.layers.SimpleRNN: lambda l: {'recurrent_param': getRecurrentParam(l)}, - keras.layers.LSTM: lambda l: \ + keras.layers.LSTM: lambda l: {'recurrent_param': getRecurrentParam(l)}, - } +} + def _checkIfValid(myList, fn, errorMessage): - bool_vals = np.array([ fn(elem) for elem in myList]) - unsupported_elems = np.where(bool_vals)[0] - if len(unsupported_elems) != 0: - raise ValueError(errorMessage + str(np.array(myList)[unsupported_elems])) + bool_vals = np.array([fn(elem) for elem in myList]) + unsupported_elems = np.where(bool_vals)[0] + if len(unsupported_elems) != 0: + raise ValueError(errorMessage + + str(np.array(myList)[unsupported_elems])) + def _transformLayer(layer, batch_size): - if type(layer) == keras.layers.InputLayer: - layer.batch_size = batch_size - return [ layer ] + if isinstance(layer, keras.layers.InputLayer): + layer.batch_size = batch_size + return [layer] + def _appendKerasLayers(fileHandle, kerasLayers, batch_size): - if len(kerasLayers) >= 1: - transformedLayers = list(chain.from_iterable(imap(lambda layer: _transformLayer(layer, batch_size), kerasLayers))) - jsonLayers = list(chain.from_iterable(imap(lambda layer: _parseKerasLayer(layer), transformedLayers))) - parsedLayers = list(chain.from_iterable(imap(lambda layer: _parseJSONObject(layer), jsonLayers))) - fileHandle.write(''.join(parsedLayers)) - fileHandle.write('\n') - + if len(kerasLayers) >= 1: + transformedLayers = list( + chain.from_iterable( + imap( + lambda layer: _transformLayer( + layer, + batch_size), + kerasLayers))) + jsonLayers = list( + chain.from_iterable( + imap( + lambda layer: _parseKerasLayer(layer), + transformedLayers))) + parsedLayers = list( + chain.from_iterable( + imap( + lambda layer: _parseJSONObject(layer), + jsonLayers))) + fileHandle.write(''.join(parsedLayers)) + fileHandle.write('\n') + + def lossLayerStr(layerType, bottomLayer): - return 'layer {\n name: "loss"\n type: "' + layerType + '"\n bottom: "' + bottomLayer + '"\n bottom: "label"\n top: "loss"\n}\n' - + return 'layer {\n name: "loss"\n type: "' + layerType + \ + '"\n bottom: "' + bottomLayer + '"\n bottom: "label"\n top: "loss"\n}\n' + + def _appendKerasLayerWithoutActivation(fileHandle, layer, batch_size): - if type(layer) != keras.layers.Activation: - lastLayerActivation = layer.activation - layer.activation = keras.activations.linear - _appendKerasLayers(fileHandle, [layer], batch_size) - layer.activation = lastLayerActivation + if not isinstance(layer, keras.layers.Activation): + lastLayerActivation = layer.activation + layer.activation = keras.activations.linear + _appendKerasLayers(fileHandle, [layer], batch_size) + layer.activation = lastLayerActivation + def _getExactlyOneBottomLayer(layer): - bottomLayers = _getBottomLayers(layer) - if len(bottomLayers) != 1: - raise Exception('Expected only one bottom layer for ' + str(layer.name) + ', but found ' + str(bottomLayers)) - return bottomLayers[0] + bottomLayers = _getBottomLayers(layer) + if len(bottomLayers) != 1: + raise Exception('Expected only one bottom layer for ' + + str(layer.name) + ', but found ' + str(bottomLayers)) + return bottomLayers[0] + def _isMeanSquaredError(loss): - return loss == 'mean_squared_error' or loss == 'mse' or loss == 'MSE' - -def convertKerasToCaffeNetwork(kerasModel, outCaffeNetworkFilePath, batch_size): - _checkIfValid(kerasModel.layers, lambda layer: False if type(layer) in supportedLayers else True, 'Unsupported Layers:') - with open(outCaffeNetworkFilePath, 'w') as f: - # Write the parsed layers for all but the last layer - _appendKerasLayers(f, kerasModel.layers[:-1], batch_size) - # Now process the last layer with loss - lastLayer = kerasModel.layers[-1] - if _isMeanSquaredError(kerasModel.loss): - _appendKerasLayers(f, [ lastLayer ], batch_size) - f.write(lossLayerStr('EuclideanLoss', lastLayer.name)) - elif kerasModel.loss == 'categorical_crossentropy': - _appendKerasLayerWithoutActivation(f, lastLayer, batch_size) - bottomLayer = _getExactlyOneBottomLayer(lastLayer) if type(lastLayer) == keras.layers.Activation else lastLayer.name - lastLayerActivation = str(keras.activations.serialize(lastLayer.activation)) - if lastLayerActivation == 'softmax' and kerasModel.loss == 'categorical_crossentropy': - f.write(lossLayerStr('SoftmaxWithLoss', bottomLayer)) - else: - raise Exception('Unsupported loss layer ' + str(kerasModel.loss) + ' (where last layer activation ' + lastLayerActivation + ').') - else: - raise Exception('Unsupported loss layer ' + str(kerasModel.loss) + ' (where last layer activation ' + lastLayerActivation + ').') + return loss == 'mean_squared_error' or loss == 'mse' or loss == 'MSE' + + +def convertKerasToCaffeNetwork( + kerasModel, outCaffeNetworkFilePath, batch_size): + _checkIfValid(kerasModel.layers, lambda layer: False if type( + layer) in supportedLayers else True, 'Unsupported Layers:') + with open(outCaffeNetworkFilePath, 'w') as f: + # Write the parsed layers for all but the last layer + _appendKerasLayers(f, kerasModel.layers[:-1], batch_size) + # Now process the last layer with loss + lastLayer = kerasModel.layers[-1] + if _isMeanSquaredError(kerasModel.loss): + _appendKerasLayers(f, [lastLayer], batch_size) + f.write(lossLayerStr('EuclideanLoss', lastLayer.name)) + elif kerasModel.loss == 'categorical_crossentropy': + _appendKerasLayerWithoutActivation(f, lastLayer, batch_size) + bottomLayer = _getExactlyOneBottomLayer(lastLayer) if isinstance( + lastLayer, keras.layers.Activation) else lastLayer.name + lastLayerActivation = str( + keras.activations.serialize( + lastLayer.activation)) + if lastLayerActivation == 'softmax' and kerasModel.loss == 'categorical_crossentropy': + f.write(lossLayerStr('SoftmaxWithLoss', bottomLayer)) + else: + raise Exception('Unsupported loss layer ' + + str(kerasModel.loss) + + ' (where last layer activation ' + + lastLayerActivation + + ').') + else: + raise Exception('Unsupported loss layer ' + + str(kerasModel.loss) + + ' (where last layer activation ' + + lastLayerActivation + + ').') def getNumPyMatrixFromKerasWeight(param): - x = np.array(param) - if len(x.shape) > 2: - x = x.transpose(3, 2, 0, 1) - return x.reshape(x.shape[0], -1) - elif len(x.shape) == 1: - return np.matrix(param).transpose() - else: - return x + x = np.array(param) + if len(x.shape) > 2: + x = x.transpose(3, 2, 0, 1) + return x.reshape(x.shape[0], -1) + elif len(x.shape) == 1: + return np.matrix(param).transpose() + else: + return x defaultSolver = """ solver_mode: CPU """ + def evaluateValue(val): - if type(val) == int or type(val) == float: - return float(val) - else: - return K.eval(val) - -def convertKerasToCaffeSolver(kerasModel, caffeNetworkFilePath, outCaffeSolverFilePath, max_iter, test_iter, test_interval, display, lr_policy, weight_decay, regularization_type): - if type(kerasModel.optimizer) == keras.optimizers.SGD: - solver = 'type: "Nesterov"\n' if kerasModel.optimizer.nesterov else 'type: "SGD"\n' - elif type(kerasModel.optimizer) == keras.optimizers.Adagrad: - solver = 'type: "Adagrad"\n' - elif type(kerasModel.optimizer) == keras.optimizers.Adam: - solver = 'type: "Adam"\n' - else: - raise Exception('Only sgd (with/without momentum/nesterov), Adam and Adagrad supported.') - base_lr = evaluateValue(kerasModel.optimizer.lr) if hasattr(kerasModel.optimizer, 'lr') else 0.01 - gamma = evaluateValue(kerasModel.optimizer.decay) if hasattr(kerasModel.optimizer, 'decay') else 0.0 - with open(outCaffeSolverFilePath, 'w') as f: - f.write('net: "' + caffeNetworkFilePath + '"\n') - f.write(defaultSolver) - f.write(solver) - f.write('lr_policy: "' + lr_policy + '"\n') - f.write('regularization_type: "' + str(regularization_type) + '"\n') - f.write('weight_decay: ' + str(weight_decay) + '\n') - f.write('max_iter: ' + str(max_iter) + '\ntest_iter: ' + str(test_iter) + '\ntest_interval: ' + str(test_interval) + '\n') - f.write('display: ' + str(display) + '\n') - f.write('base_lr: ' + str(base_lr) + '\n') - f.write('gamma: ' + str(gamma) + '\n') - if type(kerasModel.optimizer) == keras.optimizers.SGD: - momentum = evaluateValue(kerasModel.optimizer.momentum) if hasattr(kerasModel.optimizer, 'momentum') else 0.0 - f.write('momentum: ' + str(momentum) + '\n') - elif type(kerasModel.optimizer) == keras.optimizers.Adam: - momentum = evaluateValue(kerasModel.optimizer.beta_1) if hasattr(kerasModel.optimizer, 'beta_1') else 0.9 - momentum2 = evaluateValue(kerasModel.optimizer.beta_2) if hasattr(kerasModel.optimizer, 'beta_2') else 0.999 - delta = evaluateValue(kerasModel.optimizer.epsilon) if hasattr(kerasModel.optimizer, 'epsilon') else 1e-8 - f.write('momentum: ' + str(momentum) + '\n') - f.write('momentum2: ' + str(momentum2) + '\n') - f.write('delta: ' + str(delta) + '\n') - elif type(kerasModel.optimizer) == keras.optimizers.Adagrad: - delta = evaluateValue(kerasModel.optimizer.epsilon) if hasattr(kerasModel.optimizer, 'epsilon') else 1e-8 - f.write('delta: ' + str(delta) + '\n') - else: - raise Exception('Only sgd (with/without momentum/nesterov), Adam and Adagrad supported.') + if isinstance(val, int) or isinstance(val, float): + return float(val) + else: + return K.eval(val) + + +def convertKerasToCaffeSolver(kerasModel, caffeNetworkFilePath, outCaffeSolverFilePath, + max_iter, test_iter, test_interval, display, lr_policy, weight_decay, regularization_type): + if isinstance(kerasModel.optimizer, keras.optimizers.SGD): + solver = 'type: "Nesterov"\n' if kerasModel.optimizer.nesterov else 'type: "SGD"\n' + elif isinstance(kerasModel.optimizer, keras.optimizers.Adagrad): + solver = 'type: "Adagrad"\n' + elif isinstance(kerasModel.optimizer, keras.optimizers.Adam): + solver = 'type: "Adam"\n' + else: + raise Exception( + 'Only sgd (with/without momentum/nesterov), Adam and Adagrad supported.') + base_lr = evaluateValue( + kerasModel.optimizer.lr) if hasattr( + kerasModel.optimizer, + 'lr') else 0.01 + gamma = evaluateValue( + kerasModel.optimizer.decay) if hasattr( + kerasModel.optimizer, + 'decay') else 0.0 + with open(outCaffeSolverFilePath, 'w') as f: + f.write('net: "' + caffeNetworkFilePath + '"\n') + f.write(defaultSolver) + f.write(solver) + f.write('lr_policy: "' + lr_policy + '"\n') + f.write('regularization_type: "' + str(regularization_type) + '"\n') + f.write('weight_decay: ' + str(weight_decay) + '\n') + f.write( + 'max_iter: ' + + str(max_iter) + + '\ntest_iter: ' + + str(test_iter) + + '\ntest_interval: ' + + str(test_interval) + + '\n') + f.write('display: ' + str(display) + '\n') + f.write('base_lr: ' + str(base_lr) + '\n') + f.write('gamma: ' + str(gamma) + '\n') + if isinstance(kerasModel.optimizer, keras.optimizers.SGD): + momentum = evaluateValue( + kerasModel.optimizer.momentum) if hasattr( + kerasModel.optimizer, + 'momentum') else 0.0 + f.write('momentum: ' + str(momentum) + '\n') + elif isinstance(kerasModel.optimizer, keras.optimizers.Adam): + momentum = evaluateValue( + kerasModel.optimizer.beta_1) if hasattr( + kerasModel.optimizer, + 'beta_1') else 0.9 + momentum2 = evaluateValue( + kerasModel.optimizer.beta_2) if hasattr( + kerasModel.optimizer, + 'beta_2') else 0.999 + delta = evaluateValue( + kerasModel.optimizer.epsilon) if hasattr( + kerasModel.optimizer, + 'epsilon') else 1e-8 + f.write('momentum: ' + str(momentum) + '\n') + f.write('momentum2: ' + str(momentum2) + '\n') + f.write('delta: ' + str(delta) + '\n') + elif isinstance(kerasModel.optimizer, keras.optimizers.Adagrad): + delta = evaluateValue( + kerasModel.optimizer.epsilon) if hasattr( + kerasModel.optimizer, + 'epsilon') else 1e-8 + f.write('delta: ' + str(delta) + '\n') + else: + raise Exception( + 'Only sgd (with/without momentum/nesterov), Adam and Adagrad supported.') def getInputMatrices(layer): - if type(layer) == keras.layers.LSTM or type(layer) == keras.layers.SimpleRNN: - weights = layer.get_weights() - return [np.vstack((weights[0], weights[1])), np.matrix(weights[2]) ] - else: - return [ getNumPyMatrixFromKerasWeight(param) for param in layer.get_weights() ] + if isinstance(layer, keras.layers.LSTM) or isinstance( + layer, keras.layers.SimpleRNN): + weights = layer.get_weights() + return [np.vstack((weights[0], weights[1])), np.matrix(weights[2])] + else: + return [getNumPyMatrixFromKerasWeight( + param) for param in layer.get_weights()] + def convertKerasToSystemMLModel(spark, kerasModel, outDirectory): - _checkIfValid(kerasModel.layers, lambda layer: False if len(layer.get_weights()) <= 4 or len(layer.get_weights()) != 3 else True, 'Unsupported number of weights:') - layers = [layer for layer in kerasModel.layers if len(layer.get_weights()) > 0] - sc = spark._sc - biasToTranspose = [ keras.layers.Dense ] - dmlLines = [] - script_java = sc._jvm.org.apache.sysml.api.mlcontext.ScriptFactory.dml('') - for layer in layers: - inputMatrices = getInputMatrices(layer) - potentialVar = [ layer.name + '_weight', layer.name + '_bias', layer.name + '_1_weight', layer.name + '_1_bias' ] - for i in range(len(inputMatrices)): - dmlLines = dmlLines + [ 'write(' + potentialVar[i] + ', "' + outDirectory + '/' + potentialVar[i] + '.mtx", format="binary");\n' ] - mat = inputMatrices[i].transpose() if (i == 1 and type(layer) in biasToTranspose) else inputMatrices[i] - py4j.java_gateway.get_method(script_java, "in")(potentialVar[i], convertToMatrixBlock(sc, mat)) - script_java.setScriptString(''.join(dmlLines)) - ml = sc._jvm.org.apache.sysml.api.mlcontext.MLContext(sc._jsc) - ml.execute(script_java) + _checkIfValid( + kerasModel.layers, + lambda layer: False if len( + layer.get_weights()) <= 4 or len( + layer.get_weights()) != 3 else True, + 'Unsupported number of weights:') + layers = [ + layer for layer in kerasModel.layers if len( + layer.get_weights()) > 0] + sc = spark._sc + biasToTranspose = [keras.layers.Dense] + dmlLines = [] + script_java = sc._jvm.org.apache.sysml.api.mlcontext.ScriptFactory.dml('') + for layer in layers: + inputMatrices = getInputMatrices(layer) + potentialVar = [ + layer.name + '_weight', + layer.name + '_bias', + layer.name + '_1_weight', + layer.name + '_1_bias'] + for i in range(len(inputMatrices)): + dmlLines = dmlLines + \ + ['write(' + potentialVar[i] + ', "' + outDirectory + + '/' + potentialVar[i] + '.mtx", format="binary");\n'] + mat = inputMatrices[i].transpose() if ( + i == 1 and type(layer) in biasToTranspose) else inputMatrices[i] + py4j.java_gateway.get_method(script_java, "in")( + potentialVar[i], convertToMatrixBlock(sc, mat)) + script_java.setScriptString(''.join(dmlLines)) + ml = sc._jvm.org.apache.sysml.api.mlcontext.MLContext(sc._jsc) + ml.execute(script_java) http://git-wip-us.apache.org/repos/asf/systemml/blob/9e7ee19a/src/main/python/systemml/random/__init__.py ---------------------------------------------------------------------- diff --git a/src/main/python/systemml/random/__init__.py b/src/main/python/systemml/random/__init__.py index 65f6bbd..68a74b8 100644 --- a/src/main/python/systemml/random/__init__.py +++ b/src/main/python/systemml/random/__init__.py @@ -1,4 +1,4 @@ -#------------------------------------------------------------- +# ------------------------------------------------------------- # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. # -#------------------------------------------------------------- +# ------------------------------------------------------------- """ ======================== @@ -38,4 +38,4 @@ uniform Uniform distribution. from .sampling import * -__all__ = sampling.__all__ \ No newline at end of file +__all__ = sampling.__all__ http://git-wip-us.apache.org/repos/asf/systemml/blob/9e7ee19a/src/main/python/systemml/random/sampling.py ---------------------------------------------------------------------- diff --git a/src/main/python/systemml/random/sampling.py b/src/main/python/systemml/random/sampling.py index d320536..ab74250 100644 --- a/src/main/python/systemml/random/sampling.py +++ b/src/main/python/systemml/random/sampling.py @@ -1,4 +1,4 @@ -#------------------------------------------------------------- +# ------------------------------------------------------------- # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -17,7 +17,7 @@ # specific language governing permissions and limitations # under the License. # -#------------------------------------------------------------- +# ------------------------------------------------------------- __all__ = ['normal', 'uniform', 'poisson'] @@ -27,6 +27,7 @@ from ..defmatrix import * # This helps to provide dml containing output ID in constructSamplingNode OUTPUT_ID = '$$OutputID$$' + def constructSamplingNode(inputs, dml): """ Convenient utility to create an intermediate of AST. @@ -38,31 +39,35 @@ def constructSamplingNode(inputs, dml): """ dmlOp = DMLOp(inputs) out = matrix(None, op=dmlOp) - dmlOp.dml = [out.ID if x==OUTPUT_ID else x for x in dml] + dmlOp.dml = [out.ID if x == OUTPUT_ID else x for x in dml] return out + INPUTS = [] + + def asStr(arg): """ Internal use only: Convenient utility to update inputs and return appropriate string value """ if isinstance(arg, matrix): - INPUTS = INPUTS + [ arg ] + INPUTS = INPUTS + [arg] return arg.ID else: return str(arg) - -def normal(loc=0.0, scale=1.0, size=(1,1), sparsity=1.0): + + +def normal(loc=0.0, scale=1.0, size=(1, 1), sparsity=1.0): """ Draw random samples from a normal (Gaussian) distribution. - + Parameters ---------- loc: Mean ("centre") of the distribution. scale: Standard deviation (spread or "width") of the distribution. size: Output shape (only tuple of length 2, i.e. (m, n), supported). sparsity: Sparsity (between 0.0 and 1.0). - + Examples -------- @@ -75,7 +80,7 @@ def normal(loc=0.0, scale=1.0, size=(1,1), sparsity=1.0): array([[ 3.48857226, 6.17261819, 2.51167259], [ 3.60506708, -1.90266305, 3.97601633], [ 3.62245706, 5.9430881 , 2.53070413]]) - + """ if len(size) != 2: raise TypeError('Incorrect type for size. Expected tuple of length 2') @@ -86,22 +91,24 @@ def normal(loc=0.0, scale=1.0, size=(1,1), sparsity=1.0): scale = asStr(scale) sparsity = asStr(sparsity) # loc + scale*standard normal - return constructSamplingNode(INPUTS, [OUTPUT_ID, ' = ', loc,' + ', scale,' * random.normal(', rows, ',', cols, ',', sparsity, ')\n']) + return constructSamplingNode(INPUTS, [ + OUTPUT_ID, ' = ', loc, ' + ', scale, ' * random.normal(', rows, ',', cols, ',', sparsity, ')\n']) + -def uniform(low=0.0, high=1.0, size=(1,1), sparsity=1.0): +def uniform(low=0.0, high=1.0, size=(1, 1), sparsity=1.0): """ Draw samples from a uniform distribution. - + Parameters ---------- low: Lower boundary of the output interval. high: Upper boundary of the output interval. - size: Output shape (only tuple of length 2, i.e. (m, n), supported). + size: Output shape (only tuple of length 2, i.e. (m, n), supported). sparsity: Sparsity (between 0.0 and 1.0). Examples -------- - + >>> import systemml as sml >>> import numpy as np >>> sml.setSparkContext(sc) @@ -121,22 +128,24 @@ def uniform(low=0.0, high=1.0, size=(1,1), sparsity=1.0): low = asStr(low) high = asStr(high) sparsity = asStr(sparsity) - return constructSamplingNode(INPUTS, [OUTPUT_ID, ' = random.uniform(', rows, ',', cols, ',', sparsity, ',', low, ',', high, ')\n']) + return constructSamplingNode(INPUTS, [ + OUTPUT_ID, ' = random.uniform(', rows, ',', cols, ',', sparsity, ',', low, ',', high, ')\n']) -def poisson(lam=1.0, size=(1,1), sparsity=1.0): + +def poisson(lam=1.0, size=(1, 1), sparsity=1.0): """ Draw samples from a Poisson distribution. - + Parameters ---------- lam: Expectation of interval, should be > 0. - size: Output shape (only tuple of length 2, i.e. (m, n), supported). + size: Output shape (only tuple of length 2, i.e. (m, n), supported). sparsity: Sparsity (between 0.0 and 1.0). - - + + Examples -------- - + >>> import systemml as sml >>> import numpy as np >>> sml.setSparkContext(sc) @@ -146,7 +155,7 @@ def poisson(lam=1.0, size=(1,1), sparsity=1.0): array([[ 1., 0., 2.], [ 1., 0., 0.], [ 0., 0., 0.]]) - + """ if len(size) != 2: raise TypeError('Incorrect type for size. Expected tuple of length 2') @@ -155,4 +164,5 @@ def poisson(lam=1.0, size=(1,1), sparsity=1.0): cols = asStr(size[1]) lam = asStr(lam) sparsity = asStr(sparsity) - return constructSamplingNode(INPUTS, [OUTPUT_ID, ' = random.poisson(', rows, ',', cols, ',', sparsity, ',', lam, ')\n']) + return constructSamplingNode(INPUTS, [ + OUTPUT_ID, ' = random.poisson(', rows, ',', cols, ',', sparsity, ',', lam, ')\n'])