[madlib] branch master updated: Disallow 'deserialization', 'serialization', & 'get' loss and metrics

2021-03-10 Thread domino
This is an automated email from the ASF dual-hosted git repository.

domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
 new 08acbeb  Disallow 'deserialization', 'serialization', & 'get' loss and 
metrics
08acbeb is described below

commit 08acbebff8483821f8377719d0a83760f7a966e3
Author: Domino Valdano 
AuthorDate: Tue Mar 9 11:56:36 2021 -0500

Disallow 'deserialization', 'serialization', & 'get' loss and metrics

Also:
  - Remove whitelisting of any metrics containing the string
"top_k_categorical_accuracy".  This is already a builtin metric, and
would compromise security if we allowed arbitrary python code
containing this string to be passed along to keras.

  - Remove elements which start with an underscore from list of builtins.

  - Avoid using metrics[2:-2] which assumes first 2 characters are [' or
[" and '] or "].  This prevents sneaky inputs like 
metrics=[*__builtins__ ]
---
 .../madlib_keras_custom_function.py_in | 13 +++
 .../madlib_keras_fit_multiple_model.py_in  |  2 +-
 .../deep_learning/madlib_keras_validator.py_in | 20 +
 .../deep_learning/madlib_keras_wrapper.py_in   | 25 --
 4 files changed, 43 insertions(+), 17 deletions(-)

diff --git 
a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
index 32a5757..f2f06d6 100644
--- 
a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
+++ 
b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in
@@ -168,13 +168,26 @@ def delete_custom_function(schema_madlib, object_table, 
id=None, name=None, **kw
 sql = "DROP TABLE {0}".format(object_table)
 plpy.execute(sql, 0)
 
+dangerous_builtins = set(('serialize', 'deserialize', 'get'))
+
 def update_builtin_metrics(builtin_metrics):
 builtin_metrics.append('accuracy')
 builtin_metrics.append('acc')
 builtin_metrics.append('crossentropy')
 builtin_metrics.append('ce')
+
+builtin_metrics = [ b for b in builtin_metrics \
+if not b.startswith('_') and \
+ b not in dangerous_builtins ]
+
 return builtin_metrics
 
+def update_builtin_losses(builtin_losses):
+builtin_losses = [ b for b in builtin_losses \
+if not b.startswith('_') and \
+ b not in dangerous_builtins ]
+return builtin_losses
+
 @MinWarning("error")
 def load_top_k_accuracy_function(schema_madlib, object_table, k, **kwargs):
 
diff --git 
a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
 
b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
index 2db346e..aa88fbe 100644
--- 
a/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
+++ 
b/src/ports/postgres/modules/deep_learning/madlib_keras_fit_multiple_model.py_in
@@ -353,7 +353,7 @@ class FitMultipleModel(object):
 DEBUG.print_timing('eval_model_total')
 
 def populate_object_map(self):
-builtin_losses = dir(losses)
+builtin_losses = update_builtin_losses(dir(losses))
 builtin_metrics = update_builtin_metrics(dir(metrics))
 
 # Track distinct custom functions in compile_params
diff --git 
a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
index ab8d336..de5c63d 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_validator.py_in
@@ -49,6 +49,8 @@ from utilities.validate_args import input_tbl_valid
 from utilities.validate_args import output_tbl_valid
 from madlib_keras_wrapper import parse_and_validate_fit_params
 from madlib_keras_wrapper import parse_and_validate_compile_params
+from madlib_keras_custom_function import update_builtin_metrics
+from madlib_keras_custom_function import update_builtin_losses
 import tensorflow.keras.losses as losses
 import tensorflow.keras.metrics as metrics
 
@@ -541,18 +543,18 @@ class MstLoaderInputValidator():
 """.format(fit_params, str(e)))
 if not self.compile_params_list:
 plpy.error( "compile_params_list cannot be NULL")
-custom_fn_name = []
-## Initialize builtin loss/metrics functions
-builtin_losses = dir(losses)
-builtin_metrics = dir(metrics)
-# Default metrics, since it is not part of the builtin metrics list
-builtin_metrics.append('accuracy')
+custom_fn_names = []
+
+# Initialize builtin loss/metrics functions
+builtin_losses = update_builtin_losses(dir(losses))
+builtin_metri

[madlib] branch master updated: release notes for 1dot18dot0

2021-03-10 Thread fmcquillan
This is an automated email from the ASF dual-hosted git repository.

fmcquillan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
 new c6a5883  release notes for 1dot18dot0
c6a5883 is described below

commit c6a5883e193a8f89d1b29dd0317f7976e7a969fa
Author: Frank McQuillan 
AuthorDate: Tue Mar 9 11:19:26 2021 -0800

release notes for 1dot18dot0
---
 RELEASE_NOTES | 52 
 1 file changed, 52 insertions(+)

diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 030d28c..918cdf4 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -10,6 +10,58 @@ commit history located at 
https://github.com/apache/madlib/commits/master.
 Current list of bugs and issues can be found at 
https://issues.apache.org/jira/browse/MADLIB.
 
 —-
+MADlib v1.18.0:
+
+Release Date: 2021-Mar-16
+
+New features
+- DL: setup methods for grid search and random search (MADLIB-1439)  
+- DL: Add support for custom loss functions (MADLIB-1441) 
+- DL: Hyperband phase 1 - print run schedule (MADLIB-1445)
+- DL: Hyperband phase 2 - generate MST table (MADLIB-1446)
+- DL: Hyperband phase 3 - logic for diagonal runs (MADLIB-1447)   
+- DL: Hyperband phase 4 - implement full logic with default params 
(MADLIB-1448)  
+- DL: Hyperband phase 5 - implement full logic with optional params 
(MADLIB-1449) 
+- AutoML: add Hyperopt for deep learning (MADLIB-1453)
+- DL: Add Multiple input/output support to load, fit, and evaluate 
(MADLIB-1457)  
+- DL: Add multiple input/output support on advanced features (MADLIB-1458) 
   
+- DL: add caching param to autoML interface (MADLIB-1461) 
+- DL: Add support for TensorBoard (MADLIB-1474)
+- DBSCAN clustering algo - phase 1 (MADLIB-1017)  
+
+Improvements:
+- DL: cache data to speed training (MADLIB-1427) 
+- DL: reduce GPU idle time between hops (MADLIB-1428)
+- DL: utility to load and delete custom Python functions (MADLIB-1429)   
+- DL: support custom loss functions (MADLIB-1432)
+- DL: support custom metrics (MADLIB-1433)   
+- DL: Fit multiple does not print timing for validation evaluate 
(MADLIB-1462)   
+- DL: Fix gpu_memory_fraction for distribution_policy != 'all_segments' 
(MADLIB-1463) 
+- DL: add object table info in load MST table utility function 
(MADLIB-1430) 
+- DL: improve speed of evaluate for multiple model training (MADLIB-1431)  
  
+- DL: improve existing grid search method (MADLIB-1440)
+- DL: Remove dependency on keras (MADLIB-1450)
+- DL: Improve output of predict (MADLIB-1451) 
+- DL: Add top n to evalute() (MADLIB-1452)
+- DL - Write best so far to console for autoML methods (MADLIB-1454)  
+- Do not try to drop output tables (MADLIB-1442)
+- Prevent an "integer out of range" exception in linear regression train 
(MADLIB-1460)
+
+Bug fixes:
+- DL: Fix fit_multiple when output_table or mst_table is passed as NULL 
(MADLIB-1464) 
+- DL: Iris predict accuracy has regressed (MADLIB-1465)   
+- DL: madlib_keras_fit_multiple_model goes down with an IndexError: tuple 
index out of range (MADLIB-1467)
+- DL: Crash in fit_multiple when any model reaches loss=nan (MADLIB-1443) 
+- DL: BYOM fails at get_num_classes (MADLIB-1472) 
+- DL: Hyperband cumulative output time is not correct (MADLIB-1456)  
+- check bigint support for all graph methods (MADLIB-1444)   
+- MLP: weights param not working (MADLIB-1471)  
+
+Other:
+- Create build trigger jobs on cloudbees (MADLIB-1466)
+
+
+—-
 MADlib v1.17.0:
 
 Release Date: 2020-Mar-31



[madlib] branch master updated: DL: Fix num_class parsing from model architecture

2021-03-10 Thread okislal
This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
 new 6263347  DL: Fix num_class parsing from model architecture
6263347 is described below

commit 626334769fffd3f6ce069f4e646509b12341d698
Author: Orhan Kislal 
AuthorDate: Mon Mar 8 16:06:22 2021 +0300

DL: Fix num_class parsing from model architecture

JIRA: MADLIB-1472
get_num_classes function did not work in certain models that end
with activation layers. The regresion was caused by the changes
from the multi-io commit.
---
 .../modules/deep_learning/madlib_keras.sql_in  | 22 +---
 .../modules/deep_learning/model_arch_info.py_in| 13 +++--
 .../test/unit_tests/test_madlib_keras.py_in| 66 +-
 3 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in 
b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
index 429c0f0..05edc0e 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras.sql_in
@@ -84,7 +84,7 @@ Note that the following MADlib functions are targeting a 
specific TensorFlow
 kernel version (1.14). Using a newer or older version may or may not work as 
intended.
 
 MADlib's deep learning methods are designed to use the TensorFlow package and 
its built in Keras
-functions.  To ensure consistency, please use tensorflow.keras objects 
(models, layers, etc.) 
+functions.  To ensure consistency, please use tensorflow.keras objects 
(models, layers, etc.)
 instead of importing Keras and using its objects.
 
 @note CUDA GPU memory cannot be released until the process holding it is 
terminated.
@@ -165,15 +165,15 @@ madlib_keras_fit(
 @note
 - Custom loss functions and custom metrics can be used as defined in
 Define Custom Functions.
-List the custom function name and provide the name of the table where the 
+List the custom function name and provide the name of the table where the
 serialized Python objects reside using the parameter 'object_table' below.
 - The following loss function is
 not supported: sparse_categorical_crossentropy.
 The following metrics are not
 supported: sparse_categorical_accuracy, 
sparse_top_k_categorical_accuracy.
-- The Keras accuracy parameter top_k_categorical_accuracy returns 
top 5 accuracy by 
+- The Keras accuracy parameter top_k_categorical_accuracy returns 
top 5 accuracy by
 default.  If you want a different top k value, use the helper function
-Top k Accuracy 
Function 
+Top k Accuracy 
Function
 to create a custom
 Python function to compute the top k accuracy that you want.
 
@@ -609,10 +609,10 @@ madlib_keras_predict(
   TEXT. Column with independent variables in the test table.
   If a 'normalizing_const' is specified when preprocessing the
   training dataset, this same normalization will be applied to
-  the independent variables used in predict. In the case that there 
-  are multiple independent variables, 
+  the independent variables used in predict. In the case that there
+  are multiple independent variables,
   representing a multi-input neural network,
-  put the columns as a comma 
+  put the columns as a comma
   separated list, e.g., 'indep_var1, indep_var2, indep_var3' in the same
   way as was done in the preprocessor step for the training data.
   
@@ -695,7 +695,8 @@ madlib_keras_predict_byom(
 pred_type,
 use_gpus,
 class_values,
-normalizing_const
+normalizing_const,
+dependent_count
 )
 
 
@@ -805,6 +806,11 @@ madlib_keras_predict_byom(
   array by. For example, you would use 255 for this value if the image data is
   in the form 0-255.
   
+
+  dependent_count (optional)
+  INTEGER, default: 1.
+  The number of dependent variables in the model.
+  
 
 
 
diff --git a/src/ports/postgres/modules/deep_learning/model_arch_info.py_in 
b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in
index 9c28c43..0081e58 100644
--- a/src/ports/postgres/modules/deep_learning/model_arch_info.py_in
+++ b/src/ports/postgres/modules/deep_learning/model_arch_info.py_in
@@ -66,12 +66,15 @@ def get_num_classes(model_arch, multi_dep_count):
 arch_layers = _get_layers(model_arch)
 num_classes = []
 
-layer_count = len(arch_layers) - 1
-for i in range(multi_dep_count):
-if 'units' in arch_layers[layer_count-i]['config']:
-num_classes.append(arch_layers[layer_count-i]['config']['units'])
-
+i = len(arch_layers) - 1
+dep_counter = 0
+while i >= 0 and dep_counter < multi_dep_count:
+if 'units' in arch_layers[i]['config']:
+num_classes.append(arch_layers[i]['config']['units'])
+dep_counter +=1
+i -= 1
 if num_classe