This is an automated email from the ASF dual-hosted git repository. jingyimei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit ddbdcdd178c6cac179093f0726b29773ff3f0636 Author: Nandish Jayaram <[email protected]> AuthorDate: Wed May 15 16:25:19 2019 -0700 DL: Add validation for metrics in compile_params JIRA: MADLIB-1338 Add validation to take in at most one metrics to compute as part of compile_params. This validation also includes check for metrics that are not supported as part of 1.16. This commit also adds the necessary unit tests. Closes #393 Co-authored-by: Jingyi Mei <[email protected]> --- .../modules/deep_learning/madlib_keras.py_in | 8 ++--- .../deep_learning/madlib_keras_wrapper.py_in | 27 ++++++++------ .../test/unit_tests/test_madlib_keras.py_in | 42 ++++++++++++++++++---- 3 files changed, 57 insertions(+), 20 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in index 208cb12..4b1998e 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras.py_in @@ -354,9 +354,9 @@ def compute_loss_and_metrics(schema_madlib, table, dependent_varname, end_val = time.time() plpy.info("Time for evaluation in iteration {0}: {1} sec.". format( curr_iter, end_val - start_val)) - if len(evaluate_result) < 2: - plpy.error('Calling evaluate on table {0} returned < 2 ' - 'metrics. Expected both loss and a metric.'.format( + if len(evaluate_result) not in [1, 2]: + plpy.error('Calling evaluate on table {0} must return loss ' + 'and at most one metric value.'.format( table)) loss = evaluate_result[0] metric = evaluate_result[1] @@ -698,5 +698,5 @@ def internal_keras_eval_final(state, **kwargs): loss /= image_count accuracy /= image_count - state = loss, accuracy, image_count + state = loss, accuracy return state diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in index e30c9d8..f3b97fd 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_wrapper.py_in @@ -169,13 +169,28 @@ def parse_and_validate_compile_params(str_of_args): compile_dict = validate_and_literal_eval_keys(compile_dict, literal_eval_compile_params, accepted_compile_params) - _assert('optimizer' in compile_dict, "optimizer is a required parameter for compile") opt_name, opt_args = parse_optimizer(compile_dict) _assert('loss' in compile_dict, "loss is a required parameter for compile") validate_compile_param_types(compile_dict) - return (opt_name,opt_args,compile_dict) + _validate_metrics(compile_dict) + return (opt_name, opt_args, compile_dict) + +def _validate_metrics(compile_dict): + _assert('metrics' not in compile_dict.keys() or + compile_dict['metrics'] is None or + type(compile_dict['metrics']) is list, + "wrong input type for compile parameter metrics: multi-output model" + "and user defined metrics are not supported yet, please pass a list") + if 'metrics' in compile_dict and compile_dict['metrics']: + unsupported_metrics_list = ['sparse_categorical_accuracy', + 'sparse_categorical_crossentropy', 'top_k_categorical_accuracy', + 'sparse_top_k_categorical_accuracy'] + _assert(len(compile_dict['metrics']) == 1, + "Only at most one metric is supported.") + _assert(compile_dict['metrics'][0] not in unsupported_metrics_list, + "Metric {0} is not supported.".format(compile_dict['metrics'][0])) # Parse the optimizer name and params. def parse_optimizer(compile_dict): @@ -241,7 +256,6 @@ def parse_and_validate_fit_params(fit_param_str): # Validate the keys of the given dictionary and run literal_eval on the # user-defined subset def validate_and_literal_eval_keys(keys_dict, literal_eval_list, accepted_list): - for ckey in keys_dict.keys(): _assert(ckey in accepted_list, "{0} is not accepted as a parameter yet. " @@ -277,13 +291,6 @@ def compile_model(model, compile_params): model.compile(**compile_dict) def validate_compile_param_types(compile_dict): - - _assert('metrics' not in compile_dict.keys() or - compile_dict['metrics'] is None or - type(compile_dict['metrics']) is list, - "wrong input type for compile parameter metrics: multi-output model" - "and user defined metrics are not supported yet, please pass a list") - _assert('loss_weights' not in compile_dict.keys() or compile_dict['loss_weights'] is None or type(compile_dict['loss_weights']) is list or diff --git a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in index ea17c79..e3cae8d 100644 --- a/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in +++ b/src/ports/postgres/modules/deep_learning/test/unit_tests/test_madlib_keras.py_in @@ -42,8 +42,7 @@ class MadlibKerasFitTestCase(unittest.TestCase): def setUp(self): self.plpy_mock = Mock(spec='error') patches = { - 'plpy': plpy, - 'utilities.minibatch_preprocessing': Mock() + 'plpy': plpy } self.plpy_mock_execute = MagicMock() @@ -678,6 +677,40 @@ class MadlibKerasWrapperTestCase(unittest.TestCase): opt_name,opt_args,result_params = self.subject.parse_and_validate_compile_params(test_str) self.assertDictEqual(result_params, compile_dict) + def test_validate_metrics_None_pass(self): + compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', + 'metrics':['accuracy'], 'loss':'categorical_crossentropy'} + self.subject._validate_metrics(compile_dict) + + def test_validate_metrics_empty_pass(self): + compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', + 'metrics':[], 'loss':'categorical_crossentropy'} + self.subject._validate_metrics(compile_dict) + + def test_validate_metrics_two_params_fail(self): + compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', + 'metrics':['accuracy','mae'], 'loss':'categorical_crossentropy'} + with self.assertRaises(plpy.PLPYException) as error: + self.subject._validate_metrics(compile_dict) + + def test_validate_metrics_one_unsupported_fail(self): + compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', + 'metrics':['sparse_categorical_accuracy'], 'loss':'categorical_crossentropy'} + with self.assertRaises(plpy.PLPYException) as error: + self.subject._validate_metrics(compile_dict) + + def test_validate_metrics_two_unsupported_fail(self): + compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', + 'metrics':['sparse_categorical_accuracy', 'sparse_categorical_crossentropy'], 'loss':'categorical_crossentropy'} + with self.assertRaises(plpy.PLPYException) as error: + self.subject._validate_metrics(compile_dict) + + def test_validate_metrics_one_supported_one_unsupported_fail(self): + compile_dict = {'optimizer':'SGD(lr=0.01, decay=1e-6, nesterov=True)', + 'metrics':['accuracy', 'sparse_categorical_crossentropy'], 'loss':'categorical_crossentropy'} + with self.assertRaises(plpy.PLPYException) as error: + self.subject._validate_metrics(compile_dict) + def test_parse_and_validate_compile_params_default_optimizer_pass(self): test_str = "optimizer='SGD', loss='categorical_crossentropy'" _,_,result_dict = self.subject.parse_and_validate_compile_params(test_str) @@ -1070,8 +1103,7 @@ class MadlibKerasEvaluationTestCase(unittest.TestCase): def setUp(self): self.plpy_mock = Mock(spec='error') patches = { - 'plpy': plpy, - 'utilities.minibatch_preprocessing': Mock() + 'plpy': plpy } self.plpy_mock_execute = MagicMock() @@ -1283,9 +1315,7 @@ class MadlibKerasEvaluationTestCase(unittest.TestCase): output_state = self.subject.internal_keras_eval_final(input_state) agg_loss = output_state[0] agg_accuracy = output_state[1] - image_count_output = output_state[2] - self.assertEqual(image_count, image_count_output) self.assertAlmostEqual(self.loss, agg_loss,2) self.assertAlmostEqual(self.accuracy, agg_accuracy,2)
