[madlib] branch master updated: Disallow 'deserialization', 'serialization', & 'get' loss and metrics
This is an automated email from the ASF dual-hosted git repository. domino pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git The following commit(s) were added to refs/heads/master by this push: new 08acbeb Disallow 'deserialization', 'serialization', & 'get' loss and metrics 08acbeb is described below commit 08acbebff8483821f8377719d0a83760f7a966e3 Author: Domino Valdano AuthorDate: Tue Mar 9 11:56:36 2021 -0500 Disallow 'deserialization', 'serialization', & 'get' loss and metrics Also: - Remove whitelisting of any metrics containing the string "top_k_categorical_accuracy". This is already a builtin metric, and would compromise security if we allowed arbitrary python code containing this string to be passed along to keras. - Remove elements which start with an underscore from list of builtins. - Avoid using metrics[2:-2] which assumes first 2 characters are [' or [" and '] or "]. This prevents sneaky inputs like metrics=[*__builtins__ ] --- .../madlib_keras_custom_function.py_in | 13 +++ .../madlib_keras_fit_multiple_model.py_in | 2 +- .../deep_learning/madlib_keras_validator.py_in | 20 + .../deep_learning/madlib_keras_wrapper.py_in | 25 -- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in index 32a5757..f2f06d6 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in @@ -168,13 +168,26 @@ def delete_custom_function(schema_madlib, object_table, id=None, name=None, **kw sql = "DROP TABLE {0}".format(object_table) plpy.execute(sql, 0) +dangerous_builtins = set(('serialize', 'deserialize', 'get')) + def update_builtin_metrics(builtin_metrics): builtin_metrics.append('accuracy') builtin_metrics.append('acc') builtin_metrics.append('crossentropy') builtin_metrics.append('ce') + +builtin_metrics = [ b for b in builtin_metrics \ +if not b.startswith('_') and \ + b not in dangerous_builtins ] + return builtin_metrics +def update_builtin_losses(builtin_losses): +builtin_losses = [ b for b in builtin_losses \ +if not b.startswith('_') and \ + b not in dangerous_builtins ] +return builtin_losses + @MinWarning("error") def load_top_k_accuracy_function(schema_madlib, object_table, k, **kwargs): diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in index 2db346e..aa88fbe 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in @@ -353,7 +353,7 @@ class FitMultipleModel(object): DEBUG.print_timing('eval_model_total') def populate_object_map(self): -builtin_losses = dir(losses) +builtin_losses = update_builtin_losses(dir(losses)) builtin_metrics = update_builtin_metrics(dir(metrics)) # Track distinct custom functions in compile_params diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in index ab8d336..de5c63d 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in @@ -49,6 +49,8 @@ from utilities.validate_args import input_tbl_valid from utilities.validate_args import output_tbl_valid from madlib_keras_wrapper import parse_and_validate_fit_params from madlib_keras_wrapper import parse_and_validate_compile_params +from madlib_keras_custom_function import update_builtin_metrics +from madlib_keras_custom_function import update_builtin_losses import tensorflow.keras.losses as losses import tensorflow.keras.metrics as metrics @@ -541,18 +543,18 @@ class MstLoaderInputValidator(): """.format(fit_params, str(e))) if not self.compile_params_list: plpy.error( "compile_params_list cannot be NULL") -custom_fn_name = [] -## Initialize builtin loss/metrics functions -builtin_losses = dir(losses) -builtin_metrics = dir(metrics) -# Default metrics, since it is not part of the builtin metrics list -builtin_metrics.append('accuracy') +custom_fn_names = [] + +# Initialize builtin loss/metrics functions +builtin_losses = update_builtin_losses(dir(losses)) +builtin_metri
[madlib] branch master updated: release notes for 1dot18dot0
This is an automated email from the ASF dual-hosted git repository. fmcquillan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git The following commit(s) were added to refs/heads/master by this push: new c6a5883 release notes for 1dot18dot0 c6a5883 is described below commit c6a5883e193a8f89d1b29dd0317f7976e7a969fa Author: Frank McQuillan AuthorDate: Tue Mar 9 11:19:26 2021 -0800 release notes for 1dot18dot0 --- RELEASE_NOTES | 52 1 file changed, 52 insertions(+) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 030d28c..918cdf4 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -10,6 +10,58 @@ commit history located at https://github.com/apache/madlib/commits/master. Current list of bugs and issues can be found at https://issues.apache.org/jira/browse/MADLIB. —- +MADlib v1.18.0: + +Release Date: 2021-Mar-16 + +New features +- DL: setup methods for grid search and random search (MADLIB-1439) +- DL: Add support for custom loss functions (MADLIB-1441) +- DL: Hyperband phase 1 - print run schedule (MADLIB-1445) +- DL: Hyperband phase 2 - generate MST table (MADLIB-1446) +- DL: Hyperband phase 3 - logic for diagonal runs (MADLIB-1447) +- DL: Hyperband phase 4 - implement full logic with default params (MADLIB-1448) +- DL: Hyperband phase 5 - implement full logic with optional params (MADLIB-1449) +- AutoML: add Hyperopt for deep learning (MADLIB-1453) +- DL: Add Multiple input/output support to load, fit, and evaluate (MADLIB-1457) +- DL: Add multiple input/output support on advanced features (MADLIB-1458) +- DL: add caching param to autoML interface (MADLIB-1461) +- DL: Add support for TensorBoard (MADLIB-1474) +- DBSCAN clustering algo - phase 1 (MADLIB-1017) + +Improvements: +- DL: cache data to speed training (MADLIB-1427) +- DL: reduce GPU idle time between hops (MADLIB-1428) +- DL: utility to load and delete custom Python functions (MADLIB-1429) +- DL: support custom loss functions (MADLIB-1432) +- DL: support custom metrics (MADLIB-1433) +- DL: Fit multiple does not print timing for validation evaluate (MADLIB-1462) +- DL: Fix gpu_memory_fraction for distribution_policy != 'all_segments' (MADLIB-1463) +- DL: add object table info in load MST table utility function (MADLIB-1430) +- DL: improve speed of evaluate for multiple model training (MADLIB-1431) +- DL: improve existing grid search method (MADLIB-1440) +- DL: Remove dependency on keras (MADLIB-1450) +- DL: Improve output of predict (MADLIB-1451) +- DL: Add top n to evalute() (MADLIB-1452) +- DL - Write best so far to console for autoML methods (MADLIB-1454) +- Do not try to drop output tables (MADLIB-1442) +- Prevent an "integer out of range" exception in linear regression train (MADLIB-1460) + +Bug fixes: +- DL: Fix fit_multiple when output_table or mst_table is passed as NULL (MADLIB-1464) +- DL: Iris predict accuracy has regressed (MADLIB-1465) +- DL: madlib_keras_fit_multiple_model goes down with an IndexError: tuple index out of range (MADLIB-1467) +- DL: Crash in fit_multiple when any model reaches loss=nan (MADLIB-1443) +- DL: BYOM fails at get_num_classes (MADLIB-1472) +- DL: Hyperband cumulative output time is not correct (MADLIB-1456) +- check bigint support for all graph methods (MADLIB-1444) +- MLP: weights param not working (MADLIB-1471) + +Other: +- Create build trigger jobs on cloudbees (MADLIB-1466) + + +—- MADlib v1.17.0: Release Date: 2020-Mar-31
[madlib] branch master updated: DL: Fix num_class parsing from model architecture
This is an automated email from the ASF dual-hosted git repository. okislal pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git The following commit(s) were added to refs/heads/master by this push: new 6263347 DL: Fix num_class parsing from model architecture 6263347 is described below commit 626334769fffd3f6ce069f4e646509b12341d698 Author: Orhan Kislal AuthorDate: Mon Mar 8 16:06:22 2021 +0300 DL: Fix num_class parsing from model architecture JIRA: MADLIB-1472 get_num_classes function did not work in certain models that end with activation layers. The regresion was caused by the changes from the multi-io commit. --- .../modules/deep_learning/madlib_keras.sql_in | 22 +--- .../modules/deep_learning/model_arch_info.py_in| 13 +++-- .../test/unit_tests/test_madlib_keras.py_in| 66 +- 3 files changed, 87 insertions(+), 14 deletions(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in index 429c0f0..05edc0e 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in @@ -84,7 +84,7 @@ Note that the following MADlib functions are targeting a specific TensorFlow kernel version (1.14). Using a newer or older version may or may not work as intended. MADlib's deep learning methods are designed to use the TensorFlow package and its built in Keras -functions. To ensure consistency, please use tensorflow.keras objects (models, layers, etc.) +functions. To ensure consistency, please use tensorflow.keras objects (models, layers, etc.) instead of importing Keras and using its objects. @note CUDA GPU memory cannot be released until the process holding it is terminated. @@ -165,15 +165,15 @@ madlib_keras_fit( @note - Custom loss functions and custom metrics can be used as defined in Define Custom Functions. -List the custom function name and provide the name of the table where the +List the custom function name and provide the name of the table where the serialized Python objects reside using the parameter 'object_table' below. - The following loss function is not supported: sparse_categorical_crossentropy. The following metrics are not supported: sparse_categorical_accuracy, sparse_top_k_categorical_accuracy. -- The Keras accuracy parameter top_k_categorical_accuracy returns top 5 accuracy by +- The Keras accuracy parameter top_k_categorical_accuracy returns top 5 accuracy by default. If you want a different top k value, use the helper function -Top k Accuracy Function +Top k Accuracy Function to create a custom Python function to compute the top k accuracy that you want. @@ -609,10 +609,10 @@ madlib_keras_predict( TEXT. Column with independent variables in the test table. If a 'normalizing_const' is specified when preprocessing the training dataset, this same normalization will be applied to - the independent variables used in predict. In the case that there - are multiple independent variables, + the independent variables used in predict. In the case that there + are multiple independent variables, representing a multi-input neural network, - put the columns as a comma + put the columns as a comma separated list, e.g., 'indep_var1, indep_var2, indep_var3' in the same way as was done in the preprocessor step for the training data. @@ -695,7 +695,8 @@ madlib_keras_predict_byom( pred_type, use_gpus, class_values, -normalizing_const +normalizing_const, +dependent_count ) @@ -805,6 +806,11 @@ madlib_keras_predict_byom( array by. For example, you would use 255 for this value if the image data is in the form 0-255. + + dependent_count (optional) + INTEGER, default: 1. + The number of dependent variables in the model. + diff --git a/src/ports/postgres/modules/deep_learning/model_arch_info.py_in b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in index 9c28c43..0081e58 100644 --- a/src/ports/postgres/modules/deep_learning/model_arch_info.py_in +++ b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in @@ -66,12 +66,15 @@ def get_num_classes(model_arch, multi_dep_count): arch_layers = _get_layers(model_arch) num_classes = [] -layer_count = len(arch_layers) - 1 -for i in range(multi_dep_count): -if 'units' in arch_layers[layer_count-i]['config']: -num_classes.append(arch_layers[layer_count-i]['config']['units']) - +i = len(arch_layers) - 1 +dep_counter = 0 +while i >= 0 and dep_counter < multi_dep_count: +if 'units' in arch_layers[i]['config']: +num_classes.append(arch_layers[i]['config']['units']) +dep_counter +=1 +i -= 1 if num_classe