Repository: madlib Updated Branches: refs/heads/master 7ffad0388 -> 467807b54
MLP: Use array_upper to get the last array element JIRA: MADLIB-1209 Changes: - Postgresql arrays can be indexed in an arbitrary range. Hence, array_length is not necessarily the last element of the array. The array_upper function gives the index of the last element. - Added a test in install-check for verbose=True. - Removed asserts from install-check leading to significant drop in runtime (about 90% reduction). Closes #238 Project: http://git-wip-us.apache.org/repos/asf/madlib/repo Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/467807b5 Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/467807b5 Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/467807b5 Branch: refs/heads/master Commit: 467807b5495aaa0874ac1b66827c996a36f98e3f Parents: 7ffad03 Author: Rahul Iyer <ri...@apache.org> Authored: Wed Feb 21 15:58:24 2018 -0800 Committer: Rahul Iyer <ri...@apache.org> Committed: Thu Feb 22 21:59:25 2018 -0800 ---------------------------------------------------------------------- src/ports/postgres/modules/convex/mlp_igd.py_in | 5 +- .../postgres/modules/convex/test/mlp.sql_in | 67 ++------------------ 2 files changed, 7 insertions(+), 65 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/467807b5/src/ports/postgres/modules/convex/mlp_igd.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in b/src/ports/postgres/modules/convex/mlp_igd.py_in index 12e2310..68e5dfd 100644 --- a/src/ports/postgres/modules/convex/mlp_igd.py_in +++ b/src/ports/postgres/modules/convex/mlp_igd.py_in @@ -286,9 +286,8 @@ def mlp(schema_madlib, source_table, output_table, independent_varname, break if verbose and 1 < it.iteration <= n_iterations: # Get loss value from the state. - res = it.get_param_value_per_group(""" - _state_current[array_length(_state_current,1)] AS loss - """) + res = it.get_param_value_per_group( + "_state_current[array_upper(_state_current, 1)] AS loss") # Create a list of grouping values if grouping_cols was # used, it will be an empty list if there was not grouping. groups = [t[col_grp_key] for t in res if t[col_grp_key]] http://git-wip-us.apache.org/repos/asf/madlib/blob/467807b5/src/ports/postgres/modules/convex/test/mlp.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/convex/test/mlp.sql_in b/src/ports/postgres/modules/convex/test/mlp.sql_in index bd9b04d..b8713ea 100644 --- a/src/ports/postgres/modules/convex/test/mlp.sql_in +++ b/src/ports/postgres/modules/convex/test/mlp.sql_in @@ -349,7 +349,7 @@ SELECT mlp_classification( ARRAY[5], -- Number of units per layer 'learning_rate_init=0.1, learning_rate_policy=constant, - n_iterations=50, + n_iterations=10, n_tries=3, tolerance=0', 'sigmoid', @@ -366,23 +366,16 @@ SELECT mlp_classification( ARRAY[5], -- Number of units per layer 'learning_rate_init=0.1, learning_rate_policy=constant, - n_iterations=750, + n_iterations=10, tolerance=0', 'sigmoid', '', True, -- Warm start - False, + True, 'g' ); -SELECT assert( - -- Loss will improve much more if more iterations are run - loss < 0.2, - 'MLP: Loss is too high (> 0.2). Wrong result.' -) FROM mlp_class -WHERE g=1; - DROP TABLE IF EXISTS mlp_prediction; DROP TABLE IF EXISTS mlp_prediction_output; -- See prediction accuracy for training data @@ -393,21 +386,6 @@ SELECT mlp_predict( 'mlp_prediction_output', 'output'); --- Sum over the probabilities where the prediction is correct --- If the predictions were perfect, the score would be 150 -SELECT assert( - score > 140, - 'MLP: Score is too low (< 140). Wrong result.' -) FROM( -SELECT SUM( -mlp_prediction_output.estimated_prob_1*(iris_data.class=1)::INT+ -mlp_prediction_output.estimated_prob_2*(iris_data.class=2)::INT+ -mlp_prediction_output.estimated_prob_3*(iris_data.class=3)::INT -) AS score -FROM mlp_prediction_output INNER JOIN iris_data ON iris_data.id=mlp_prediction_output.id -WHERE iris_data.g=2)q; - - SELECT mlp_predict( 'mlp_class', 'iris_data', @@ -416,17 +394,6 @@ SELECT mlp_predict( 'response'); SELECT * FROM mlp_prediction; -SELECT assert( - COUNT(*)/150.0 > 0.95, - 'MLP: Accuracy is too low (< 95%). Wrong result.' -) FROM - (SELECT iris_data.class AS actual, mlp_prediction.estimated_class as estimated - FROM mlp_prediction - INNER JOIN iris_data - ON iris_data.id=mlp_prediction.id - WHERE iris_data.g=1 - ) q -WHERE q.actual=q.estimated; DROP TABLE IF EXISTS mlp_prediction; DROP TABLE IF EXISTS mlp_prediction_output; @@ -957,7 +924,7 @@ SELECT mlp_regression( ARRAY[40], -- Number of units per layer 'learning_rate_init=0.015, learning_rate_policy=inv, - n_iterations=20, n_tries=3, + n_iterations=10, n_tries=3, tolerance=0', 'sigmoid', '', @@ -973,7 +940,7 @@ SELECT mlp_regression( ARRAY[40], -- Number of units per layer 'learning_rate_init=0.015, learning_rate_policy=inv, - n_iterations=280, + n_iterations=8, tolerance=0', 'sigmoid', '', @@ -982,12 +949,6 @@ SELECT mlp_regression( 'grp_by_col'); -SELECT assert( - loss < 10, - 'MLP: Loss is too high (> 10). Wrong result.' -) FROM mlp_regress; - - DROP TABLE IF EXISTS mlp_prediction_regress; SELECT mlp_predict( 'mlp_regress', @@ -995,21 +956,3 @@ SELECT mlp_predict( 'id', 'mlp_prediction_regress', 'output'); -SELECT assert( - 0.5*SUM(pow(mlp_prediction_regress.estimated_y-lin_housing_wi.y,2.0) - )/245 < 10, - 'MLP: Predict MSE is too high (> 10) for group 1. Wrong result' -) -FROM mlp_prediction_regress JOIN lin_housing_wi -ON mlp_prediction_regress.id = lin_housing_wi.id -WHERE lin_housing_wi.grp_by_col=1; - -SELECT assert( - 0.5*SUM(pow(mlp_prediction_regress.estimated_y-lin_housing_wi.y,2.0) - )/261 < 10, - 'MLP: Predict MSE is too high (> 10) for group 2. Wrong result' -) -FROM mlp_prediction_regress JOIN lin_housing_wi -ON mlp_prediction_regress.id = lin_housing_wi.id -WHERE lin_housing_wi.grp_by_col=2; -