This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
     new 2158ff5  DL: Add alternative options for compile params
2158ff5 is described below

commit 2158ff559fafa440367b3022c4e49fa7617c2690
Author: Orhan Kislal <[email protected]>
AuthorDate: Thu Apr 18 10:35:06 2019 -0700

    DL: Add alternative options for compile params
    
    JIRA: MADLIB-1309
    
    Add support for optimizer (string) and loss (objective function).
    Add unit-tests for python functions.
    
    Closes #368
---
 .../modules/deep_learning/madlib_keras.py_in       |  8 +--
 .../deep_learning/madlib_keras_wrapper.py_in       | 73 +++++++++++++++++-----
 .../modules/deep_learning/test/madlib_keras.sql_in | 16 +++++
 .../test/unit_tests/test_madlib_keras.py_in        | 24 +++++++
 4 files changed, 97 insertions(+), 24 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 6add8ba..a69cc6e 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -101,8 +101,6 @@ def fit(schema_madlib, source_table, model, 
dependent_varname,
     validation_set_provided = bool(validation_table)
     validation_aggregate_accuracy = []; validation_aggregate_loss = []
 
-    optimizers = get_optimizers()
-
     # Compute total buffers on each segment
     total_buffers_per_seg = plpy.execute(
         """ SELECT gp_segment_id, count(*) AS total_buffers_per_seg
@@ -478,12 +476,8 @@ def internal_keras_evaluate(dependent_var, 
independent_var, model_architecture,
     _, _, _, model_weights = madlib_keras_serializer.deserialize_weights(
         model_data, model_shapes)
     model.set_weights(model_weights)
-    optimizers = get_optimizers()
-    (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
     with K.tf.device(device_name):
-        model.compile(optimizer=optimizers[opt_name](**final_args),
-                      loss=compile_dict['loss'],
-                      metrics=compile_dict['metrics'])
+        compile_model(model, compile_params)
 
     # Since the training data is batched but the validation data isn't, we have
     # to make sure that the validation data np array has the same no of 
dimensions
diff --git 
a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index 6ebf96e..211488c 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -28,6 +28,7 @@ from keras import utils as keras_utils
 from keras.optimizers import *
 
 import keras.optimizers as opt
+import keras.losses as losses
 
 import madlib_keras_serializer
 from utilities.utilities import _assert
@@ -62,13 +63,7 @@ def clear_keras_session():
 def compile_and_set_weights(segment_model, compile_params, device_name,
                             previous_state, model_shapes):
     with K.tf.device(device_name):
-
-        optimizers = get_optimizers()
-        (opt_name,final_args,compile_dict) = 
parse_compile_params(compile_params)
-
-        segment_model.compile(optimizer=optimizers[opt_name](**final_args),
-                              loss=compile_dict['loss'],
-                              metrics=compile_dict['metrics'])
+        compile_model(segment_model, compile_params)
         _, _, _, model_weights = madlib_keras_serializer.deserialize_weights(
             previous_state, model_shapes)
         segment_model.set_weights(model_weights)
@@ -111,25 +106,60 @@ def convert_string_of_args_to_dict(str_of_args):
     return compile_dict
 
 # Parse the compile parameters and the optimizer.
-# Optimizer name and its arguments are returned in addition to the rest of the
-# compile parameters.
 def parse_compile_params(str_of_args):
+    """
+    Args:
+        @param: str_of_args     The string of arguments given by the user
+    Returns:
+        opt_name:               Name of the optimizer
+        opt_args:               Arguments for the optimizer
+        compile_dict:           Dictionary of arguments for keras.compile
+    """
 
     compile_dict = convert_string_of_args_to_dict(str_of_args)
+
+    opt_name,opt_args = parse_optimizer(compile_dict)
+    compile_dict['loss'] = parse_loss(compile_dict)
+
     compile_dict['metrics'] = ast.literal_eval(compile_dict['metrics']) if 
'metrics' in compile_dict.keys() else None
     compile_dict['loss_weights'] = 
ast.literal_eval(compile_dict['loss_weights']) if 'loss_weights' in 
compile_dict.keys() else None
 
-    opt_name = compile_dict['optimizer'].split('(')[0]
+    return (opt_name,opt_args,compile_dict)
+
+# Parse the optimizer name and params.
+def parse_optimizer(compile_dict):
+    """
+    Args:
+        @param: compile_dict    Dictionary of arguments for keras.compile
+    Returns:
+        opt_name:               Name of the optimizer
+        opt_args:               Arguments for the optimizer
+    """
+    opt_split = compile_dict['optimizer'].split('(')
+    opt_name = opt_split[0]
     optimizers = get_optimizers()
     _assert(opt_name in optimizers,
             "model_keras error: invalid optimizer name: {0}".format(opt_name))
-    opt_params = compile_dict['optimizer'].split('(')[1][:-1]
-    opt_params_array = opt_params.split(',')
-    opt_params_clean = map(split_and_strip, opt_params_array)
-    key_value_params = { x[0] : x[1] for x in opt_params_clean}
-    final_args = { key: bool(value) if value == 'True' or value == 'False' 
else  float(value) for key,value in key_value_params.iteritems() }
-
-    return (opt_name,final_args,compile_dict)
+    if len(opt_split) == 1:
+        final_args = None
+    else:
+        opt_params = opt_split[1][:-1]
+        opt_params_array = opt_params.split(',')
+        opt_params_clean = map(split_and_strip, opt_params_array)
+        key_value_params = { x[0] : x[1] for x in opt_params_clean}
+        final_args = { key: bool(value) if value == 'True' or value == 'False'
+                       else float(value)
+                       for key,value in key_value_params.iteritems() }
+    return (opt_name,final_args)
+
+# Parse the loss function.
+def parse_loss(compile_dict):
+    loss_split = compile_dict['loss'].split('.')
+    if (len(loss_split) == 2 and
+        loss_split[0] == 'losses' and
+        loss_split[1] in dir(losses)):
+        return eval('losses.'+loss_split[1])
+    return compile_dict['loss']
 
 # Parse the fit parameters into a dictionary.
 def parse_fit_params(str_of_args):
@@ -152,3 +182,12 @@ def get_optimizers():
         if optimizer.__class__ == type and optimizer.__base__ == opt.Optimizer:
             optimizers[n] = optimizer
     return optimizers
+
+# Run the keras.compile with the given parameters
+def compile_model(model, compile_params):
+    optimizers = get_optimizers()
+    (opt_name,final_args,compile_dict) = parse_compile_params(compile_params)
+    optimizer = optimizers[opt_name](**final_args) if final_args else opt_name
+    model.compile(optimizer=optimizer,
+                  loss=compile_dict['loss'],
+                  metrics=compile_dict['metrics'])
diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in 
b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
index 7902db4..8b7bd0c 100644
--- a/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras.sql_in
@@ -205,6 +205,21 @@ select assert(trap_error($TRAP$madlib_keras_predict(
     'cifar10_predict');$TRAP$) = 1,
     'Passing batched image table to predict should error out.');
 
+DROP TABLE IF EXISTS keras_out, keras_out_summary;
+SELECT madlib_keras_fit(
+    'cifar_10_sample_batched',
+    'keras_out',
+    'dependent_var',
+    'independent_var',
+    'model_arch',
+    1,
+    $$ optimizer='SGD', loss=losses.categorical_crossentropy, 
metrics=['accuracy']$$::text,
+    $$ batch_size=2, epochs=1, verbose=0 $$::text,
+    1,
+    FALSE,
+    NULL,
+    'model name', 'model desc');
+
 -- -- negative test case for passing non numeric y to fit
 -- induce failure by passing a non numeric column
 create table cifar_10_sample_val_failure as select * from cifar_10_sample_val;
@@ -224,3 +239,4 @@ select assert(trap_error($TRAP$madlib_keras_fit(
            FALSE,
           'cifar_10_sample_val_failure');$TRAP$) = 1,
        'Passing y of type non numeric array to fit should error out.');
+
diff --git 
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
 
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index c152c53..c8b649d 100644
--- 
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ 
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -175,6 +175,30 @@ class MadlibKerasFitTestCase(unittest.TestCase):
             [0,1,2], [3,3,3], 'dummy_model_json', "foo", "bar", False,
             'dummy_prev_state', **k))
 
+    def test_split_and_strip(self):
+        self.assertEqual(('a','b'), self.subject.split_and_strip(' a = b '))
+
+    def test_parse_fit_params(self):
+        result = {'batch_size':2, 'epochs':1, 'verbose':0}
+        self.assertDictEqual(result, 
self.subject.parse_fit_params('batch_size=2, epochs=1, verbose=0'))
+
+    def test_parse_optimizer(self):
+        opt_name = 'SGD'
+        final_args = {'lr':0.01, 'decay':1e-6, 'nesterov':True}
+        compile_dict = {}
+        compile_dict['optimizer']='SGD(lr=0.01, decay=1e-6, nesterov=True)'
+        result_name, result_params = self.subject.parse_optimizer(compile_dict)
+
+        self.assertEqual(result_name, opt_name)
+        self.assertDictEqual(result_params, final_args)
+
+    def test_parse_compile_params(self):
+
+        test_str = "optimizer=SGD(lr=0.01, decay=1e-6, nesterov=True), 
loss='categorical_crossentropy', metrics=['accuracy']"
+        compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', 
'metrics':['accuracy'], 'loss':'categorical_crossentropy', 'loss_weights': None}
+        opt_name,opt_args,result_params = 
self.subject.parse_compile_params(test_str)
+        self.assertDictEqual(result_params, compile_dict)
+
 class MadlibKerasValidatorTestCase(unittest.TestCase):
     def setUp(self):
         self.plpy_mock = Mock(spec='error')

Reply via email to