This is an automated email from the ASF dual-hosted git repository.
okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/master by this push:
new 2158ff5 DL: Add alternative options for compile params
2158ff5 is described below
commit 2158ff559fafa440367b3022c4e49fa7617c2690
Author: Orhan Kislal <[email protected]>
AuthorDate: Thu Apr 18 10:35:06 2019 -0700
DL: Add alternative options for compile params
JIRA: MADLIB-1309
Add support for optimizer (string) and loss (objective function).
Add unit-tests for python functions.
Closes #368
---
.../modules/deep_learning/madlib_keras.py_in | 8 +--
.../deep_learning/madlib_keras_wrapper.py_in | 73 +++++++++++++++++-----
.../modules/deep_learning/test/madlib_keras.sql_in | 16 +++++
.../test/unit_tests/test_madlib_keras.py_in | 24 +++++++
4 files changed, 97 insertions(+), 24 deletions(-)
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 6add8ba..a69cc6e 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -101,8 +101,6 @@ def fit(schema_madlib, source_table, model,
dependent_varname,
validation_set_provided = bool(validation_table)
validation_aggregate_accuracy = []; validation_aggregate_loss = []
- optimizers = get_optimizers()
-
# Compute total buffers on each segment
total_buffers_per_seg = plpy.execute(
""" SELECT gp_segment_id, count(*) AS total_buffers_per_seg
@@ -478,12 +476,8 @@ def internal_keras_evaluate(dependent_var,
independent_var, model_architecture,
_, _, _, model_weights = madlib_keras_serializer.deserialize_weights(
model_data, model_shapes)
model.set_weights(model_weights)
- optimizers = get_optimizers()
- (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
with K.tf.device(device_name):
- model.compile(optimizer=optimizers[opt_name](**final_args),
- loss=compile_dict['loss'],
- metrics=compile_dict['metrics'])
+ compile_model(model, compile_params)
# Since the training data is batched but the validation data isn't, we have
# to make sure that the validation data np array has the same no of
dimensions
diff --git
a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index 6ebf96e..211488c 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -28,6 +28,7 @@ from keras import utils as keras_utils
from keras.optimizers import *
import keras.optimizers as opt
+import keras.losses as losses
import madlib_keras_serializer
from utilities.utilities import _assert
@@ -62,13 +63,7 @@ def clear_keras_session():
def compile_and_set_weights(segment_model, compile_params, device_name,
previous_state, model_shapes):
with K.tf.device(device_name):
-
- optimizers = get_optimizers()
- (opt_name,final_args,compile_dict) =
parse_compile_params(compile_params)
-
- segment_model.compile(optimizer=optimizers[opt_name](**final_args),
- loss=compile_dict['loss'],
- metrics=compile_dict['metrics'])
+ compile_model(segment_model, compile_params)
_, _, _, model_weights = madlib_keras_serializer.deserialize_weights(
previous_state, model_shapes)
segment_model.set_weights(model_weights)
@@ -111,25 +106,60 @@ def convert_string_of_args_to_dict(str_of_args):
return compile_dict
# Parse the compile parameters and the optimizer.
-# Optimizer name and its arguments are returned in addition to the rest of the
-# compile parameters.
def parse_compile_params(str_of_args):
+ """
+ Args:
+ @param: str_of_args The string of arguments given by the user
+ Returns:
+ opt_name: Name of the optimizer
+ opt_args: Arguments for the optimizer
+ compile_dict: Dictionary of arguments for keras.compile
+ """
compile_dict = convert_string_of_args_to_dict(str_of_args)
+
+ opt_name,opt_args = parse_optimizer(compile_dict)
+ compile_dict['loss'] = parse_loss(compile_dict)
+
compile_dict['metrics'] = ast.literal_eval(compile_dict['metrics']) if
'metrics' in compile_dict.keys() else None
compile_dict['loss_weights'] =
ast.literal_eval(compile_dict['loss_weights']) if 'loss_weights' in
compile_dict.keys() else None
- opt_name = compile_dict['optimizer'].split('(')[0]
+ return (opt_name,opt_args,compile_dict)
+
+# Parse the optimizer name and params.
+def parse_optimizer(compile_dict):
+ """
+ Args:
+ @param: compile_dict Dictionary of arguments for keras.compile
+ Returns:
+ opt_name: Name of the optimizer
+ opt_args: Arguments for the optimizer
+ """
+ opt_split = compile_dict['optimizer'].split('(')
+ opt_name = opt_split[0]
optimizers = get_optimizers()
_assert(opt_name in optimizers,
"model_keras error: invalid optimizer name: {0}".format(opt_name))
- opt_params = compile_dict['optimizer'].split('(')[1][:-1]
- opt_params_array = opt_params.split(',')
- opt_params_clean = map(split_and_strip, opt_params_array)
- key_value_params = { x[0] : x[1] for x in opt_params_clean}
- final_args = { key: bool(value) if value == 'True' or value == 'False'
else float(value) for key,value in key_value_params.iteritems() }
-
- return (opt_name,final_args,compile_dict)
+ if len(opt_split) == 1:
+ final_args = None
+ else:
+ opt_params = opt_split[1][:-1]
+ opt_params_array = opt_params.split(',')
+ opt_params_clean = map(split_and_strip, opt_params_array)
+ key_value_params = { x[0] : x[1] for x in opt_params_clean}
+ final_args = { key: bool(value) if value == 'True' or value == 'False'
+ else float(value)
+ for key,value in key_value_params.iteritems() }
+ return (opt_name,final_args)
+
+# Parse the loss function.
+def parse_loss(compile_dict):
+ loss_split = compile_dict['loss'].split('.')
+ if (len(loss_split) == 2 and
+ loss_split[0] == 'losses' and
+ loss_split[1] in dir(losses)):
+ return eval('losses.'+loss_split[1])
+ return compile_dict['loss']
# Parse the fit parameters into a dictionary.
def parse_fit_params(str_of_args):
@@ -152,3 +182,12 @@ def get_optimizers():
if optimizer.__class__ == type and optimizer.__base__ == opt.Optimizer:
optimizers[n] = optimizer
return optimizers
+
+# Run the keras.compile with the given parameters
+def compile_model(model, compile_params):
+ optimizers = get_optimizers()
+ (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
+ optimizer = optimizers[opt_name](**final_args) if final_args else opt_name
+ model.compile(optimizer=optimizer,
+ loss=compile_dict['loss'],
+ metrics=compile_dict['metrics'])
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index 7902db4..8b7bd0c 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -205,6 +205,21 @@ select assert(trap_error($TRAP$madlib_keras_predict(
'cifar10_predict');$TRAP$) = 1,
'Passing batched image table to predict should error out.');
+DROP TABLE IF EXISTS keras_out, keras_out_summary;
+SELECT madlib_keras_fit(
+ 'cifar_10_sample_batched',
+ 'keras_out',
+ 'dependent_var',
+ 'independent_var',
+ 'model_arch',
+ 1,
+ $$ optimizer='SGD', loss=losses.categorical_crossentropy,
metrics=['accuracy']$$::text,
+ $$ batch_size=2, epochs=1, verbose=0 $$::text,
+ 1,
+ FALSE,
+ NULL,
+ 'model name', 'model desc');
+
-- -- negative test case for passing non numeric y to fit
-- induce failure by passing a non numeric column
create table cifar_10_sample_val_failure as select * from cifar_10_sample_val;
@@ -224,3 +239,4 @@ select assert(trap_error($TRAP$madlib_keras_fit(
FALSE,
'cifar_10_sample_val_failure');$TRAP$) = 1,
'Passing y of type non numeric array to fit should error out.');
+
diff --git
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index c152c53..c8b649d 100644
---
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -175,6 +175,30 @@ class MadlibKerasFitTestCase(unittest.TestCase):
[0,1,2], [3,3,3], 'dummy_model_json', "foo", "bar", False,
'dummy_prev_state', **k))
+ def test_split_and_strip(self):
+ self.assertEqual(('a','b'), self.subject.split_and_strip(' a = b '))
+
+ def test_parse_fit_params(self):
+ result = {'batch_size':2, 'epochs':1, 'verbose':0}
+ self.assertDictEqual(result,
self.subject.parse_fit_params('batch_size=2, epochs=1, verbose=0'))
+
+ def test_parse_optimizer(self):
+ opt_name = 'SGD'
+ final_args = {'lr':0.01, 'decay':1e-6, 'nesterov':True}
+ compile_dict = {}
+ compile_dict['optimizer']='SGD(lr=0.01, decay=1e-6, nesterov=True)'
+ result_name, result_params = self.subject.parse_optimizer(compile_dict)
+
+ self.assertEqual(result_name, opt_name)
+ self.assertDictEqual(result_params, final_args)
+
+ def test_parse_compile_params(self):
+
+ test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True),
loss='categorical_crossentropy', metrics=['accuracy']"
+ compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)',
'metrics':['accuracy'], 'loss':'categorical_crossentropy', 'loss_weights': None}
+ opt_name,opt_args,result_params =
self.subject.parse_compile_params(test_str)
+ self.assertDictEqual(result_params, compile_dict)
+
class MadlibKerasValidatorTestCase(unittest.TestCase):
def setUp(self):
self.plpy_mock = Mock(spec='error')