[madlib] 02/03: DL: Add validation for metrics in compile_params

jingyimei Wed, 22 May 2019 15:09:54 -0700

This is an automated email from the ASF dual-hosted git repository.

jingyimei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


commit ddbdcdd178c6cac179093f0726b29773ff3f0636
Author: Nandish Jayaram <[email protected]>
AuthorDate: Wed May 15 16:25:19 2019 -0700

    DL: Add validation for metrics in compile_params
    
    JIRA: MADLIB-1338
    Add validation to take in at most one metrics to compute as part of
    compile_params. This validation also includes check for metrics that
    are not supported as part of 1.16. This commit also adds the necessary
    unit tests.
    
    Closes #393
    Co-authored-by: Jingyi Mei <[email protected]>
---
 .../modules/deep_learning/madlib_keras.py_in       |  8 ++---
 .../deep_learning/madlib_keras_wrapper.py_in       | 27 ++++++++------
 .../test/unit_tests/test_madlib_keras.py_in        | 42 ++++++++++++++++++----
 3 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
index 208cb12..4b1998e 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in
@@ -354,9 +354,9 @@ def compute_loss_and_metrics(schema_madlib, table, 
dependent_varname,
     end_val = time.time()
     plpy.info("Time for evaluation in iteration {0}: {1} sec.". format(
         curr_iter, end_val - start_val))
-    if len(evaluate_result) < 2:
-        plpy.error('Calling evaluate on table {0} returned < 2 '
-                   'metrics. Expected both loss and a metric.'.format(
+    if len(evaluate_result) not in [1, 2]:
+        plpy.error('Calling evaluate on table {0} must return loss '
+                   'and at most one metric value.'.format(
             table))
     loss = evaluate_result[0]
     metric = evaluate_result[1]
@@ -698,5 +698,5 @@ def internal_keras_eval_final(state, **kwargs):
     loss /= image_count
     accuracy /= image_count
 
-    state = loss, accuracy, image_count
+    state = loss, accuracy
     return state
diff --git 
a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
index e30c9d8..f3b97fd 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in
@@ -169,13 +169,28 @@ def parse_and_validate_compile_params(str_of_args):
     compile_dict = validate_and_literal_eval_keys(compile_dict,
                                                   literal_eval_compile_params,
                                                   accepted_compile_params)
-
     _assert('optimizer' in compile_dict, "optimizer is a required parameter 
for compile")
     opt_name, opt_args = parse_optimizer(compile_dict)
 
     _assert('loss' in compile_dict, "loss is a required parameter for compile")
     validate_compile_param_types(compile_dict)
-    return (opt_name,opt_args,compile_dict)
+    _validate_metrics(compile_dict)
+    return (opt_name, opt_args, compile_dict)
+
+def _validate_metrics(compile_dict):
+    _assert('metrics' not in compile_dict.keys() or
+        compile_dict['metrics'] is None or
+        type(compile_dict['metrics']) is list,
+        "wrong input type for compile parameter metrics: multi-output model"
+        "and user defined metrics are not supported yet, please pass a list")
+    if 'metrics' in compile_dict and compile_dict['metrics']:
+        unsupported_metrics_list = ['sparse_categorical_accuracy',
+            'sparse_categorical_crossentropy', 'top_k_categorical_accuracy',
+            'sparse_top_k_categorical_accuracy']
+        _assert(len(compile_dict['metrics']) == 1,
+            "Only at most one metric is supported.")
+        _assert(compile_dict['metrics'][0] not in unsupported_metrics_list,
+            "Metric {0} is not supported.".format(compile_dict['metrics'][0]))
 
 # Parse the optimizer name and params.
 def parse_optimizer(compile_dict):
@@ -241,7 +256,6 @@ def parse_and_validate_fit_params(fit_param_str):
 # Validate the keys of the given dictionary and run literal_eval on the
 # user-defined subset
 def validate_and_literal_eval_keys(keys_dict, literal_eval_list, 
accepted_list):
-
     for ckey in keys_dict.keys():
         _assert(ckey in accepted_list,
             "{0} is not accepted as a parameter yet. "
@@ -277,13 +291,6 @@ def compile_model(model, compile_params):
     model.compile(**compile_dict)
 
 def validate_compile_param_types(compile_dict):
-
-    _assert('metrics' not in compile_dict.keys() or
-            compile_dict['metrics'] is None or
-            type(compile_dict['metrics']) is list,
-            "wrong input type for compile parameter metrics: multi-output 
model"
-            "and user defined metrics are not supported yet, please pass a 
list")
-
     _assert('loss_weights' not in compile_dict.keys() or
             compile_dict['loss_weights'] is None or
             type(compile_dict['loss_weights']) is list or
diff --git 
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
 
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
index ea17c79..e3cae8d 100644
--- 
a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
+++ 
b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in
@@ -42,8 +42,7 @@ class MadlibKerasFitTestCase(unittest.TestCase):
     def setUp(self):
         self.plpy_mock = Mock(spec='error')
         patches = {
-            'plpy': plpy,
-            'utilities.minibatch_preprocessing': Mock()
+            'plpy': plpy
         }
 
         self.plpy_mock_execute = MagicMock()
@@ -678,6 +677,40 @@ class MadlibKerasWrapperTestCase(unittest.TestCase):
         opt_name,opt_args,result_params = 
self.subject.parse_and_validate_compile_params(test_str)
         self.assertDictEqual(result_params, compile_dict)
 
+    def test_validate_metrics_None_pass(self):
+        compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)',
+                        'metrics':['accuracy'], 
'loss':'categorical_crossentropy'}
+        self.subject._validate_metrics(compile_dict)
+
+    def test_validate_metrics_empty_pass(self):
+        compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)',
+                        'metrics':[], 'loss':'categorical_crossentropy'}
+        self.subject._validate_metrics(compile_dict)
+
+    def test_validate_metrics_two_params_fail(self):
+        compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)',
+                        'metrics':['accuracy','mae'], 
'loss':'categorical_crossentropy'}
+        with self.assertRaises(plpy.PLPYException) as error:
+            self.subject._validate_metrics(compile_dict)
+
+    def test_validate_metrics_one_unsupported_fail(self):
+        compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)',
+                        'metrics':['sparse_categorical_accuracy'], 
'loss':'categorical_crossentropy'}
+        with self.assertRaises(plpy.PLPYException) as error:
+            self.subject._validate_metrics(compile_dict)
+
+    def test_validate_metrics_two_unsupported_fail(self):
+        compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)',
+                        'metrics':['sparse_categorical_accuracy', 
'sparse_categorical_crossentropy'], 'loss':'categorical_crossentropy'}
+        with self.assertRaises(plpy.PLPYException) as error:
+            self.subject._validate_metrics(compile_dict)
+
+    def test_validate_metrics_one_supported_one_unsupported_fail(self):
+        compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)',
+                        'metrics':['accuracy', 
'sparse_categorical_crossentropy'], 'loss':'categorical_crossentropy'}
+        with self.assertRaises(plpy.PLPYException) as error:
+            self.subject._validate_metrics(compile_dict)
+
     def test_parse_and_validate_compile_params_default_optimizer_pass(self):
         test_str = "optimizer='SGD', loss='categorical_crossentropy'"
         _,_,result_dict = 
self.subject.parse_and_validate_compile_params(test_str)
@@ -1070,8 +1103,7 @@ class MadlibKerasEvaluationTestCase(unittest.TestCase):
     def setUp(self):
         self.plpy_mock = Mock(spec='error')
         patches = {
-            'plpy': plpy,
-            'utilities.minibatch_preprocessing': Mock()
+            'plpy': plpy
         }
 
         self.plpy_mock_execute = MagicMock()
@@ -1283,9 +1315,7 @@ class MadlibKerasEvaluationTestCase(unittest.TestCase):
         output_state = self.subject.internal_keras_eval_final(input_state)
         agg_loss = output_state[0]
         agg_accuracy = output_state[1]
-        image_count_output = output_state[2]
 
-        self.assertEqual(image_count, image_count_output)
         self.assertAlmostEqual(self.loss, agg_loss,2)
         self.assertAlmostEqual(self.accuracy, agg_accuracy,2)

[madlib] 02/03: DL: Add validation for metrics in compile_params

Reply via email to