Build: Disable AppendOnly if available JIRA: MADLIB-1171
Greenplum provides an Append-optimized table storage that does not allow UPDATE and DELETE. MADlib model tables are small enough that they won't see a big benefit of using AO instead of Heap tables. This commit ensures that APPENDONLY=False during MADlib function call (the GUC is reset back to original value during exit). For cases where we recreate the data table (standardization, redistribution, etc), we have to explicitly add an 'APPENDONLY=true' to see the AO benefits. Closes #316 Project: http://git-wip-us.apache.org/repos/asf/madlib/repo Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/3db98bab Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/3db98bab Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/3db98bab Branch: refs/heads/master Commit: 3db98babe3326fb5e2cd16d0639a2bef264f4b04 Parents: b76a083 Author: Rahul Iyer <[email protected]> Authored: Wed Aug 29 16:23:04 2018 -0700 Committer: Rahul Iyer <[email protected]> Committed: Thu Sep 13 11:24:22 2018 -0700 ---------------------------------------------------------------------- src/ports/postgres/madpack/SQLCommon.m4_in | 15 +- .../modules/assoc_rules/assoc_rules.sql_in | 80 ++++---- src/ports/postgres/modules/convex/mlp.sql_in | 72 ++++--- .../modules/convex/utils_regularization.py_in | 129 ++++++------ .../modules/elastic_net/elastic_net.sql_in | 13 +- src/ports/postgres/modules/knn/knn.py_in | 2 +- src/ports/postgres/modules/knn/knn.sql_in | 36 +--- src/ports/postgres/modules/lda/lda.py_in | 10 +- src/ports/postgres/modules/lda/lda.sql_in | 44 ++-- .../postgres/modules/linalg/matrix_ops.sql_in | 201 +++++++++++-------- src/ports/postgres/modules/linalg/svd.sql_in | 47 +++-- src/ports/postgres/modules/pca/pca.py_in | 10 +- src/ports/postgres/modules/pca/pca.sql_in | 6 +- .../postgres/modules/pca/pca_project.py_in | 4 +- .../recursive_partitioning/decision_tree.sql_in | 50 ++--- .../recursive_partitioning/random_forest.sql_in | 41 ++-- .../postgres/modules/stats/correlation.sql_in | 27 ++- .../modules/stats/cox_prop_hazards.sql_in | 49 ++--- .../postgres/modules/stats/pred_metrics.sql_in | 82 +++++--- .../postgres/modules/summary/summary.sql_in | 15 +- src/ports/postgres/modules/tsa/arima.sql_in | 25 ++- .../postgres/modules/utilities/cols2vec.sql_in | 8 +- .../postgres/modules/utilities/control.py_in | 55 +++++ .../utilities/minibatch_preprocessing.py_in | 20 +- .../utilities/minibatch_preprocessing.sql_in | 7 +- .../utilities/test/unit_tests/plpy_mock.py_in | 8 + .../test/unit_tests/test_control.py_in | 81 ++++++++ .../modules/utilities/test/utilities.sql_in | 5 +- .../modules/utilities/text_utilities.sql_in | 5 +- 29 files changed, 684 insertions(+), 463 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/madpack/SQLCommon.m4_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/madpack/SQLCommon.m4_in b/src/ports/postgres/madpack/SQLCommon.m4_in index afc82d2..ffc0c37 100644 --- a/src/ports/postgres/madpack/SQLCommon.m4_in +++ b/src/ports/postgres/madpack/SQLCommon.m4_in @@ -28,14 +28,14 @@ m4_changequote(<!,!>) * RETURNS DOUBLE PRECISION[] * AS $$PythonFunction(regress, logistic, compute_logregr_coef)$$ * LANGUAGE plpythonu VOLATILE; - */ + */ m4_define(<!PythonFunction!>, <! import sys from inspect import getframeinfo, currentframe sys.path.insert(1, "EXT_PYTHON_LIBDIR") sys.path.insert(1, "PLPYTHON_LIBDIR") from $1 import $2 - + # Retrieve the schema name of the current function # Make it available as variable: schema_madlib fname = getframeinfo(currentframe()).function @@ -50,7 +50,9 @@ m4_define(<!PythonFunction!>, <! global schema_madlib schema_madlib = rv[0]['nspname'] - return $2.$3(**globals()) + from utilities.control import AOControl + with AOControl(False): + return $2.$3(**globals()) !>) /* @@ -59,14 +61,14 @@ m4_define(<!PythonFunction!>, <! * @param $1 directory * @param $2 python file (without suffix) * - */ + */ m4_define(<!PythonFunctionBodyOnly!>, <! import sys from inspect import getframeinfo, currentframe sys.path.insert(1, "EXT_PYTHON_LIBDIR") sys.path.insert(1, "PLPYTHON_LIBDIR") from $1 import $2 - + # Retrieve the schema name of the current function # Make it available as variable: schema_madlib fname = getframeinfo(currentframe()).function @@ -79,7 +81,8 @@ m4_define(<!PythonFunctionBodyOnly!>, <! 'WHERE p.oid = %s' % foid, 1) global schema_madlib - schema_madlib = rv[0]['nspname'] + schema_madlib = rv[0]['nspname'] + from utilities.control import AOControl !>) /* http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/assoc_rules/assoc_rules.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/assoc_rules/assoc_rules.sql_in b/src/ports/postgres/modules/assoc_rules/assoc_rules.sql_in index 8ee9fcb..ec3c330 100644 --- a/src/ports/postgres/modules/assoc_rules/assoc_rules.sql_in +++ b/src/ports/postgres/modules/assoc_rules/assoc_rules.sql_in @@ -493,23 +493,19 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.assoc_rules ) RETURNS MADLIB_SCHEMA.assoc_rules_results AS $$ - PythonFunctionBodyOnly(`assoc_rules', `assoc_rules') - - plpy.execute("SET client_min_messages = error;") - - # schema_madlib comes from PythonFunctionBodyOnly - return assoc_rules.assoc_rules( - schema_madlib, - support, - confidence, - tid_col, - item_col, - input_table, - output_schema, - verbose, - max_itemset_size - ); + with AOControl(False): + plpy.execute("SET client_min_messages = error;") + # schema_madlib comes from PythonFunctionBodyOnly + return assoc_rules.assoc_rules(schema_madlib, + support, + confidence, + tid_col, + item_col, + input_table, + output_schema, + verbose, + max_itemset_size); $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -533,21 +529,18 @@ RETURNS MADLIB_SCHEMA.assoc_rules_results AS $$ PythonFunctionBodyOnly(`assoc_rules', `assoc_rules') - plpy.execute("SET client_min_messages = error;") - - # schema_madlib comes from PythonFunctionBodyOnly - return assoc_rules.assoc_rules( - schema_madlib, - support, - confidence, - tid_col, - item_col, - input_table, - output_schema, - False, - 'NULL' - ); + with AOControl(False): + # schema_madlib comes from PythonFunctionBodyOnly + return assoc_rules.assoc_rules(schema_madlib, + support, + confidence, + tid_col, + item_col, + input_table, + output_schema, + False, + 'NULL'); $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -565,24 +558,19 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.assoc_rules ) RETURNS MADLIB_SCHEMA.assoc_rules_results AS $$ - PythonFunctionBodyOnly(`assoc_rules', `assoc_rules') - plpy.execute("SET client_min_messages = error;") - - # schema_madlib comes from PythonFunctionBodyOnly - return assoc_rules.assoc_rules( - schema_madlib, - support, - confidence, - tid_col, - item_col, - input_table, - output_schema, - verbose, - 'NULL' - ); - + with AOControl(False): + # schema_madlib comes from PythonFunctionBodyOnly + return assoc_rules.assoc_rules(schema_madlib, + support, + confidence, + tid_col, + item_col, + input_table, + output_schema, + verbose, + 'NULL'); $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/convex/mlp.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/convex/mlp.sql_in b/src/ports/postgres/modules/convex/mlp.sql_in index 07d7dbc..0d06c54 100644 --- a/src/ports/postgres/modules/convex/mlp.sql_in +++ b/src/ports/postgres/modules/convex/mlp.sql_in @@ -1559,21 +1559,20 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_classification( grouping_col VARCHAR ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`convex', `mlp_igd') - mlp_igd.mlp( - schema_madlib, - source_table, - output_table, - independent_varname, - dependent_varname, - hidden_layer_sizes, - optimizer_params, - activation, - True, - weights, - warm_start, - verbose, - grouping_col - ) + with AOControl(False): + mlp_igd.mlp(schema_madlib, + source_table, + output_table, + independent_varname, + dependent_varname, + hidden_layer_sizes, + optimizer_params, + activation, + True, + weights, + warm_start, + verbose, + grouping_col) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1591,21 +1590,20 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_regression( grouping_col VARCHAR ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`convex', `mlp_igd') - mlp_igd.mlp( - schema_madlib, - source_table, - output_table, - independent_varname, - dependent_varname, - hidden_layer_sizes, - optimizer_params, - activation, - False, - weights, - warm_start, - verbose, - grouping_col - ) + with AOControl(False): + mlp_igd.mlp(schema_madlib, + source_table, + output_table, + independent_varname, + dependent_varname, + hidden_layer_sizes, + optimizer_params, + activation, + False, + weights, + warm_start, + verbose, + grouping_col ) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1793,13 +1791,13 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mlp_predict( pred_type VARCHAR ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`convex', `mlp_igd') - mlp_igd.mlp_predict( - schema_madlib, - model_table, - data_table, - id_col_name, - output_table, - pred_type) + with AOControl(False): + mlp_igd.mlp_predict(schema_madlib, + model_table, + data_table, + id_col_name, + output_table, + pred_type) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/convex/utils_regularization.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/convex/utils_regularization.py_in b/src/ports/postgres/modules/convex/utils_regularization.py_in index 74ee3d4..6ed98bf 100644 --- a/src/ports/postgres/modules/convex/utils_regularization.py_in +++ b/src/ports/postgres/modules/convex/utils_regularization.py_in @@ -67,9 +67,9 @@ def utils_ind_var_scales(tbl_data, col_ind_var, dimension, schema_madlib, x_scaled_vals["mean"] = mad_vec(x_scales[0]["mean"], text=False) x_scaled_vals["std"] = mad_vec(x_scales[0]["std"], text=False) return x_scaled_vals - # ======================================================================== + def utils_ind_var_scales_grouping(tbl_data, col_ind_var, dimension, schema_madlib, grouping_col, x_mean_table, set_zero_std_to_one=False, @@ -123,6 +123,7 @@ def utils_ind_var_scales_grouping(tbl_data, col_ind_var, dimension, """.format(**locals())) # ======================================================================== + def __utils_dep_var_scale(schema_madlib, tbl_data, col_ind_var, col_dep_var): """ @@ -140,13 +141,15 @@ def __utils_dep_var_scale(schema_madlib, tbl_data, col_ind_var, y_scale = plpy.execute( """ SELECT - avg(CASE WHEN NOT {schema_madlib}.array_contains_null({col_ind_var}) THEN {col_dep_var} END) AS mean, + avg(CASE WHEN NOT {schema_madlib}.array_contains_null({col_ind_var}) + THEN {col_dep_var} END) AS mean, 1 AS std FROM {tbl_data} """.format(**locals()))[0] return y_scale # ======================================================================== + def __utils_dep_var_scale_grouping(y_mean_table, tbl_data, grouping_col, family, schema_madlib=None, col_ind_var=None, col_dep_var=None): @@ -178,8 +181,8 @@ def __utils_dep_var_scale_grouping(y_mean_table, tbl_data, grouping_col, # col_dep_var must be passed along. if schema_madlib is None or col_ind_var is None or col_dep_var is None: plpy.error("Schema name, indpendent column and dependent column names required.") - mean_str = ' avg(CASE WHEN NOT {0}.array_contains_null({1}) THEN {2} END) '.format( - schema_madlib, col_ind_var, col_dep_var) + mean_str = (' avg(CASE WHEN NOT {0}.array_contains_null({1}) THEN {2} END) '. + format(schema_madlib, col_ind_var, col_dep_var)) plpy.execute( """ CREATE TEMP TABLE {y_mean_table} AS @@ -191,6 +194,7 @@ def __utils_dep_var_scale_grouping(y_mean_table, tbl_data, grouping_col, """.format(**locals())) # ======================================================================== + def __utils_normalize_data_grouping(y_decenter=True, **kwargs): """ Normalize the independent and dependent variables using the calculated @@ -228,15 +232,17 @@ def __utils_normalize_data_grouping(y_decenter=True, **kwargs): group_where_x = _check_groups(kwargs.get('tbl_data'), '__x__', group_col_list) x_mean_join_clause = "INNER JOIN {0} AS __x__ ON {1}".format( - kwargs.get('x_mean_table'), group_where_x) + kwargs.get('x_mean_table'), group_where_x) if kwargs.get('y_mean_table'): group_where_y = _check_groups(kwargs.get('tbl_data'), '__y__', group_col_list) y_mean_join_clause = "INNER JOIN {0} AS __y__ ON {1}".format( - kwargs.get('y_mean_table'), group_where_y) + kwargs.get('y_mean_table'), group_where_y) ydecenter_str = "- __y__.mean".format(**kwargs) if y_decenter else "" plpy.execute(""" - CREATE TEMP TABLE {tbl_data_scaled} AS + CREATE TEMP TABLE {tbl_data_scaled} + m4_ifdef(`__POSTGRESQL__', `', `WITH (appendonly=true)') + AS SELECT ({schema_madlib}.utils_normalize_data({col_ind_var}, __x__.mean::double precision[], @@ -248,13 +254,14 @@ def __utils_normalize_data_grouping(y_decenter=True, **kwargs): {x_mean_join_clause} {y_mean_join_clause} """.format(ydecenter_str=ydecenter_str, group_col=group_col, - x_mean_join_clause=x_mean_join_clause, - y_mean_join_clause=y_mean_join_clause, - select_grouping_cols=select_grouping_cols, - **kwargs)) + x_mean_join_clause=x_mean_join_clause, + y_mean_join_clause=y_mean_join_clause, + select_grouping_cols=select_grouping_cols, + **kwargs)) return None # ======================================================================== + def __utils_normalize_data(y_decenter=True, **kwargs): """ Normalize the independent and dependent variables using the calculated mean's and std's @@ -281,11 +288,14 @@ def __utils_normalize_data(y_decenter=True, **kwargs): ydecenter_str = "- {y_mean}".format(**kwargs) if y_decenter else "" plpy.execute( """ - CREATE TEMP TABLE {tbl_data_scaled} AS + CREATE TEMP TABLE {tbl_data_scaled} + m4_ifdef(`__POSTGRESQL__', `', `WITH (appendonly=true)') + AS SELECT - ({schema_madlib}.utils_normalize_data({col_ind_var}, - '{x_mean_str}'::double precision[], - '{x_std_str}'::double precision[])) + ({schema_madlib}.utils_normalize_data( + {col_ind_var}, + '{x_mean_str}'::double precision[], + '{x_std_str}'::double precision[])) AS {col_ind_var_norm_new}, ({col_dep_var} {ydecenter_str}) AS {col_dep_var_norm_new} FROM {tbl_data} @@ -294,6 +304,7 @@ def __utils_normalize_data(y_decenter=True, **kwargs): return None # ======================================================================== + def __utils_cv_preprocess(kwargs): """ Some common processes used in both ridge and lasso cross validation functions: @@ -350,20 +361,20 @@ def __utils_cv_preprocess(kwargs): col_ind_var_cv_new = unique_string() # new name for independent column due to normalization col_ind_var_norm_new = unique_string() - kwargs.update(dict(tbl_accum_error = tbl_accum_error, - tbl_all_data = tbl_all_data, - tbl_inter = tbl_inter, - tbl_train = tbl_train, - tbl_valid = tbl_valid, - tbl_random_id = tbl_random_id, - col_random_id = col_random_id, - tbl_ind_scales = tbl_ind_scales, - tbl_dep_scale = tbl_dep_scale, - col_ind_var_cv_new = col_ind_var_cv_new, - col_ind_var_norm_new = col_ind_var_norm_new, - col_dep_var_cv_new = col_dep_var_cv_new, - col_dep_var_norm_new = col_dep_var_norm_new, - tbl_coef = tbl_coef)) + kwargs.update(dict(tbl_accum_error=tbl_accum_error, + tbl_all_data=tbl_all_data, + tbl_inter=tbl_inter, + tbl_train=tbl_train, + tbl_valid=tbl_valid, + tbl_random_id=tbl_random_id, + col_random_id=col_random_id, + tbl_ind_scales=tbl_ind_scales, + tbl_dep_scale=tbl_dep_scale, + col_ind_var_cv_new=col_ind_var_cv_new, + col_ind_var_norm_new=col_ind_var_norm_new, + col_dep_var_cv_new=col_dep_var_cv_new, + col_dep_var_norm_new=col_dep_var_norm_new, + tbl_coef=tbl_coef)) # data_cols = [col_ind_var, col_dep_var] if data_id is None: @@ -394,7 +405,7 @@ def __utils_cv_preprocess(kwargs): # length of the independent variable array dimension = plpy.execute("select max(array_upper({col_ind_var},1)) as dimension from {data_tbl}".format(**kwargs))[0]["dimension"] - kwargs.update(dict(tbl_used = tbl_used, row_num = row_num, dimension = dimension)) + kwargs.update(dict(tbl_use=tbl_used, row_num=row_num, dimension=dimension)) # table to append all fitting results # which are distinguished by id @@ -403,8 +414,8 @@ def __utils_cv_preprocess(kwargs): create temp table {tbl_coef} (id integer, coef double precision[], intercept double precision) """.format(**kwargs)) return None +# ======================================================================== -## ======================================================================== def __utils_accumulate_error(accum_count, tbl_accum_error, param_value, error): """ @@ -413,12 +424,16 @@ def __utils_accumulate_error(accum_count, tbl_accum_error, param_value, error): accumulate measured errors from each validation for each lambda value. """ if accum_count == 1: - plpy.execute("create temp table {tbl_accum_error} (lambda double precision, mean_squared_error double precision)".format(tbl_accum_error = tbl_accum_error)) - plpy.execute("insert into {tbl_accum_error} values ({param_value}, {error})".format( - tbl_accum_error = tbl_accum_error, - param_value = param_value, error = error)) + plpy.execute(""" + CREATE TEMP TABLE {tbl_accum_error} ( + lambda double precision, + mean_squared_error double precision) + """.format(tbl_accum_error=tbl_accum_error)) + plpy.execute("insert into {tbl_accum_error} values ({param_value}, {error})". + format(tbl_accum_error=tbl_accum_error, + param_value=param_value, error=error)) +# ======================================================================== -## ======================================================================== def __utils_cv_produce_col_name_string(col_dep_var, col_dep_var_cv_new, col_ind_var, col_ind_var_cv_new): @@ -436,12 +451,12 @@ def __utils_cv_produce_col_name_string(col_dep_var, col_dep_var_cv_new, """ return col_dep_var + " as " + col_dep_var_cv_new + ", " + col_ind_var + " as " + col_ind_var_cv_new - # ======================================================================== -def __utils_cv_copy_data_with_id (rel_origin, col_dep_var, col_dep_var_cv_new, - col_ind_var, col_ind_var_cv_new, rel_copied, - random_id): + +def __utils_cv_copy_data_with_id(rel_origin, col_dep_var, col_dep_var_cv_new, + col_ind_var, col_ind_var_cv_new, rel_copied, + random_id): """ If the user does not provide a ID column, the data table has to be copied and at the same time create a random ID column with it. @@ -458,15 +473,15 @@ def __utils_cv_copy_data_with_id (rel_origin, col_dep_var, col_dep_var_cv_new, # We want to select only the columns that will be used in the computation. col_string = __utils_cv_produce_col_name_string(col_dep_var, col_dep_var_cv_new, col_ind_var, col_ind_var_cv_new) - __cv_copy_data_with_id_compute (rel_origin, col_string, rel_copied, random_id) + __cv_copy_data_with_id_compute(rel_origin, col_string, rel_copied, random_id) return None - # ======================================================================== -def __utils_cv_split_data_using_id_col (rel_source, col_dep_var, col_dep_var_cv_new, - col_ind_var, - col_ind_var_cv_new, col_id, row_num, - rel_train, rel_valid, fold_num, which_fold): + +def __utils_cv_split_data_using_id_col(rel_source, col_dep_var, col_dep_var_cv_new, + col_ind_var, + col_ind_var_cv_new, col_id, row_num, + rel_train, rel_valid, fold_num, which_fold): """ A random ID column exists (originally exists or was created during copying), split the data into training and validation. @@ -484,17 +499,17 @@ def __utils_cv_split_data_using_id_col (rel_source, col_dep_var, col_dep_var_cv_ @param which_fold Which fold will be used as validation part? """ col_string = __utils_cv_produce_col_name_string(col_dep_var, col_dep_var_cv_new, col_ind_var, col_ind_var_cv_new) - __cv_split_data_using_id_col_compute (rel_source, col_string, col_id, row_num, - rel_train, rel_valid, fold_num, which_fold) + __cv_split_data_using_id_col_compute(rel_source, col_string, col_id, row_num, + rel_train, rel_valid, fold_num, which_fold) return None - # ======================================================================== -def __utils_cv_split_data_using_id_tbl (rel_origin, col_dep_var, col_dep_var_cv_new, - col_ind_var, - col_ind_var_cv_new, rel_random_id, - random_id, origin_id, row_num, rel_train, - rel_valid, fold_num, which_fold): + +def __utils_cv_split_data_using_id_tbl(rel_origin, col_dep_var, col_dep_var_cv_new, + col_ind_var, + col_ind_var_cv_new, rel_random_id, + random_id, origin_id, row_num, rel_train, + rel_valid, fold_num, which_fold): """ Split the data table using a random ID mapping table @@ -518,7 +533,7 @@ def __utils_cv_split_data_using_id_tbl (rel_origin, col_dep_var, col_dep_var_cv_ """ col_string = __utils_cv_produce_col_name_string(col_dep_var, col_dep_var_cv_new, col_ind_var, col_ind_var_cv_new) - __cv_split_data_using_id_tbl_compute (rel_origin, col_string, rel_random_id, - random_id, origin_id, row_num, rel_train, - rel_valid, fold_num, which_fold) + __cv_split_data_using_id_tbl_compute(rel_origin, col_string, rel_random_id, + random_id, origin_id, row_num, rel_train, + rel_valid, fold_num, which_fold) return None http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/elastic_net/elastic_net.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/elastic_net/elastic_net.sql_in b/src/ports/postgres/modules/elastic_net/elastic_net.sql_in index 55f5b25..e30c98c 100644 --- a/src/ports/postgres/modules/elastic_net/elastic_net.sql_in +++ b/src/ports/postgres/modules/elastic_net/elastic_net.sql_in @@ -898,12 +898,13 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( tolerance DOUBLE PRECISION ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`elastic_net', `elastic_net') - return elastic_net.elastic_net_train( - schema_madlib, tbl_source, tbl_result, col_dep_var, - col_ind_var, regress_family, alpha, lambda_value, - standardize, grouping_col, optimizer, optimizer_params, - excluded, max_iter, tolerance) -$$ LANGUAGE plpythonu + with AOControl(False): + return elastic_net.elastic_net_train( + schema_madlib, tbl_source, tbl_result, col_dep_var, + col_ind_var, regress_family, alpha, lambda_value, + standardize, grouping_col, optimizer, optimizer_params, + excluded, max_iter, tolerance) + $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); ------------------------------------------------------------------------ http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/knn/knn.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/knn/knn.py_in b/src/ports/postgres/modules/knn/knn.py_in index 04e74d1..249c316 100644 --- a/src/ports/postgres/modules/knn/knn.py_in +++ b/src/ports/postgres/modules/knn/knn.py_in @@ -129,7 +129,7 @@ def knn_validate_src(schema_madlib, point_source, point_column_name, point_id, def knn(schema_madlib, point_source, point_column_name, point_id, label_column_name, test_source, test_column_name, test_id, output_table, - k, output_neighbors, fn_dist, weighted_avg): + k, output_neighbors, fn_dist, weighted_avg, **kwargs): """ KNN function to find the K Nearest neighbours Args: http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/knn/knn.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/knn/knn.sql_in b/src/ports/postgres/modules/knn/knn.sql_in index 49e0c22..6fe1672 100644 --- a/src/ports/postgres/modules/knn/knn.sql_in +++ b/src/ports/postgres/modules/knn/knn.sql_in @@ -420,22 +420,11 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__knn_validate_src( test_column_name VARCHAR, test_id VARCHAR, output_table VARCHAR, - operation VARCHAR, - k INTEGER + k INTEGER, + output_neighbors BOOLEAN, + fn_dist VARCHAR ) RETURNS INTEGER AS $$ - PythonFunctionBodyOnly(`knn', `knn') - return knn.knn_validate_src( - schema_madlib, - point_source, - point_column_name, - label_column_name, - test_source, - test_column_name, - test_id, - output_table, - operation, - k - ) + PythonFunctionBody(`knn', `knn', `knn_validate_src') $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -453,22 +442,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.knn( fn_dist TEXT, weighted_avg BOOLEAN ) RETURNS VARCHAR AS $$ - PythonFunctionBodyOnly(`knn', `knn') - return knn.knn( - schema_madlib, - point_source, - point_column_name, - point_id, - label_column_name, - test_source, - test_column_name, - test_id, - output_table, - k, - output_neighbors, - fn_dist, - weighted_avg - ) + PythonFunction(`knn', `knn', `knn') $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/lda/lda.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/lda/lda.py_in b/src/ports/postgres/modules/lda/lda.py_in index 67a6d0c..303afee 100644 --- a/src/ports/postgres/modules/lda/lda.py_in +++ b/src/ports/postgres/modules/lda/lda.py_in @@ -914,17 +914,17 @@ def conorm_data(data_table, vocab_table, output_data_table, output_vocab_table): norm_dataset(data_table, output_vocab_table, output_data_table) -def index_sort(vector): +def index_sort(arr, **kwargs): """ @brief Return the index of elements in a sorted order - @param vector The array to be sorted + @param arr The array to be sorted @return The index of elements """ # process arrays for GPDB < 4.1 and PG < 9.0 - vector = string_to_array(vector, False) - dim = len(vector) + arr = string_to_array(arr, False) + dim = len(arr) idx = range(dim) - idx.sort(key=lambda r: vector[r]) + idx.sort(key=lambda r: arr[r]) return array_to_string(map(lambda r: r + 1, idx)) http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/lda/lda.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/lda/lda.sql_in b/src/ports/postgres/modules/lda/lda.sql_in index 2355850..16dc3a8 100644 --- a/src/ports/postgres/modules/lda/lda.sql_in +++ b/src/ports/postgres/modules/lda/lda.sql_in @@ -965,10 +965,9 @@ MADLIB_SCHEMA.lda_train ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.lda_train( - schema_madlib, data_table, model_table, output_data_table, voc_size, - topic_num, iter_num, alpha, beta - ) + with AOControl(False): + lda.lda_train(schema_madlib, data_table, model_table, output_data_table, + voc_size, topic_num, iter_num, alpha, beta) return [[model_table, 'model table'], [output_data_table, 'output data table']] $$ LANGUAGE plpythonu @@ -995,7 +994,8 @@ MADLIB_SCHEMA.lda_predict ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.lda_predict(schema_madlib, data_table, model_table, output_table) + with AOControl(False): + lda.lda_predict(schema_madlib, data_table, model_table, output_table) return [[ output_table, 'per-doc topic distribution and per-word topic assignments']] @@ -1015,8 +1015,8 @@ MADLIB_SCHEMA.lda_predict ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.lda_predict( - schema_madlib, data_table, model_table, output_table, iter_num) + with AOControl(False): + lda.lda_predict(schema_madlib, data_table, model_table, output_table, iter_num) return [[ output_table, 'per-doc topic distribution and per-word topic assignments']] @@ -1036,7 +1036,8 @@ MADLIB_SCHEMA.lda_get_topic_word_count ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.get_topic_word_count(schema_madlib, model_table, output_table) + with AOControl(False): + lda.get_topic_word_count(schema_madlib, model_table, output_table) return [[output_table, 'per-topic word counts']] $$ LANGUAGE plpythonu STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1054,7 +1055,8 @@ MADLIB_SCHEMA.lda_get_word_topic_count ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.get_word_topic_count(schema_madlib, model_table, output_table) + with AOControl(False): + lda.get_word_topic_count(schema_madlib, model_table, output_table) return [[output_table, 'per-word topic counts']] $$ LANGUAGE plpythonu STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1076,8 +1078,8 @@ MADLIB_SCHEMA.lda_get_topic_desc ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.get_topic_desc( - schema_madlib, model_table, vocab_table, desc_table, top_k) + with AOControl(False): + lda.get_topic_desc(schema_madlib, model_table, vocab_table, desc_table, top_k) return [[ desc_table, """topic description, use "ORDER BY topicid, prob DESC" to check the @@ -1098,7 +1100,8 @@ MADLIB_SCHEMA.lda_get_word_topic_mapping ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.get_word_topic_mapping(schema_madlib, lda_output_table, mapping_table) + with AOControl(False): + lda.get_word_topic_mapping(schema_madlib, lda_output_table, mapping_table) return [[mapping_table, 'wordid - topicid mapping']] $$ LANGUAGE plpythonu STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1256,8 +1259,8 @@ MADLIB_SCHEMA.lda_get_perplexity ) RETURNS FLOAT8 AS $$ PythonFunctionBodyOnly(`lda', `lda') - return lda.get_perplexity( - schema_madlib, model_table, output_data_table) + with AOControl(False): + return lda.get_perplexity(schema_madlib, model_table, output_data_table) $$ LANGUAGE plpythonu STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `'); @@ -1454,8 +1457,7 @@ MADLIB_SCHEMA.__lda_util_index_sort arr FLOAT8[] ) RETURNS INT4[] AS $$ - PythonFunctionBodyOnly(`lda', `lda') - return lda.index_sort(arr) + PythonFunction(`lda', `lda', `index_sort') $$ LANGUAGE plpythonu STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); @@ -1474,7 +1476,8 @@ MADLIB_SCHEMA.__lda_util_norm_vocab ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.norm_vocab(vocab_table, output_vocab_table) + with AOControl(False): + lda.norm_vocab(vocab_table, output_vocab_table) return [[output_vocab_table,'normalized vocbulary table']] $$ LANGUAGE plpythonu STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1495,7 +1498,8 @@ MADLIB_SCHEMA.__lda_util_norm_dataset ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.norm_dataset(data_table, norm_vocab_table, output_data_table) + with AOControl(False): + lda.norm_dataset(data_table, norm_vocab_table, output_data_table) return [[output_data_table,'normalized data table']] $$ LANGUAGE plpythonu STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1520,8 +1524,8 @@ MADLIB_SCHEMA.__lda_util_conorm_data ) RETURNS SETOF MADLIB_SCHEMA.lda_result AS $$ PythonFunctionBodyOnly(`lda', `lda') - lda.conorm_data( - data_table, vocab_table, output_data_table, output_vocab_table) + with AOControl(False): + lda.conorm_data(data_table, vocab_table, output_data_table, output_vocab_table) return [[output_data_table,'normalized data table'], [output_vocab_table,'normalized vocab table']] $$ LANGUAGE plpythonu STRICT http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/linalg/matrix_ops.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/linalg/matrix_ops.sql_in b/src/ports/postgres/modules/linalg/matrix_ops.sql_in index bd59826..6e33f46 100644 --- a/src/ports/postgres/modules/linalg/matrix_ops.sql_in +++ b/src/ports/postgres/modules/linalg/matrix_ops.sql_in @@ -1057,10 +1057,11 @@ MADLIB_SCHEMA.matrix_mult ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_mult(schema_madlib, - matrix_a, a_args, - matrix_b, b_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_mult(schema_madlib, + matrix_a, a_args, + matrix_b, b_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1116,8 +1117,9 @@ MADLIB_SCHEMA.matrix_ndims ) RETURNS INTEGER[] AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_ndims(schema_madlib, - matrix_in, in_args, is_block) + with AOControl(False): + return matrix_ops.matrix_ndims(schema_madlib, + matrix_in, in_args, is_block) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1180,10 +1182,11 @@ MADLIB_SCHEMA.matrix_add ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_add(schema_madlib, - matrix_a, a_args, - matrix_b, b_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_add(schema_madlib, + matrix_a, a_args, + matrix_b, b_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1249,10 +1252,11 @@ MADLIB_SCHEMA.matrix_sub ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_sub(schema_madlib, - matrix_a, a_args, - matrix_b, b_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_sub(schema_madlib, + matrix_a, a_args, + matrix_b, b_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1328,8 +1332,8 @@ MADLIB_SCHEMA.matrix_extract_row ) RETURNS FLOAT8[] AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_extract( - schema_madlib, matrix_in, in_args, 1, index) + with AOControl(False): + return matrix_ops.matrix_extract(schema_madlib, matrix_in, in_args, 1, index) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1373,8 +1377,8 @@ MADLIB_SCHEMA.matrix_extract_col ) RETURNS FLOAT8[] AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_extract( - schema_madlib, matrix_in, in_args, 2, index) + with AOControl(False): + return matrix_ops.matrix_extract(schema_madlib, matrix_in, in_args, 2, index) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1411,9 +1415,10 @@ MADLIB_SCHEMA.matrix_zeros ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_zeros(schema_madlib, - row_dim, col_dim, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_zeros(schema_madlib, + row_dim, col_dim, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1474,8 +1479,9 @@ MADLIB_SCHEMA.matrix_max ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_max(schema_madlib, - matrix_in, in_args, dim, matrix_out, fetch_index) + with AOControl(False): + matrix_ops.matrix_max(schema_madlib, + matrix_in, in_args, dim, matrix_out, fetch_index) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1539,8 +1545,9 @@ MADLIB_SCHEMA.matrix_min ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_min(schema_madlib, - matrix_in, in_args, dim, matrix_out, fetch_index) + with AOControl(False): + matrix_ops.matrix_min(schema_madlib, + matrix_in, in_args, dim, matrix_out, fetch_index) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1594,8 +1601,9 @@ MADLIB_SCHEMA.matrix_sum ) RETURNS FLOAT8[] AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_sum(schema_madlib, - matrix_in, in_args, dim) + with AOControl(False): + return matrix_ops.matrix_sum(schema_madlib, + matrix_in, in_args, dim) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1646,8 +1654,9 @@ MADLIB_SCHEMA.matrix_mean ) RETURNS FLOAT8[] AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_mean(schema_madlib, - matrix_in, in_args, dim) + with AOControl(False): + return matrix_ops.matrix_mean(schema_madlib, + matrix_in, in_args, dim) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1703,8 +1712,9 @@ MADLIB_SCHEMA.matrix_scalar_mult ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_scalar_mult(schema_madlib, - matrix_in, in_args, scalar, matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_scalar_mult(schema_madlib, + matrix_in, in_args, scalar, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1755,8 +1765,9 @@ MADLIB_SCHEMA.matrix_vec_mult ) RETURNS FLOAT8[] AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_vec_mult(schema_madlib, - matrix_in, in_args, vector) + with AOControl(False): + return matrix_ops.matrix_vec_mult(schema_madlib, + matrix_in, in_args, vector) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1806,10 +1817,11 @@ MADLIB_SCHEMA.matrix_elem_mult ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_elem_mult(schema_madlib, - matrix_a, a_args, - matrix_b, b_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_elem_mult(schema_madlib, + matrix_a, a_args, + matrix_b, b_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1868,8 +1880,9 @@ MADLIB_SCHEMA.matrix_trans ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_trans(schema_madlib, - matrix_in, in_args, matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_trans(schema_madlib, + matrix_in, in_args, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1922,8 +1935,9 @@ MADLIB_SCHEMA.matrix_sparsify ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_sparsify(schema_madlib, matrix_in, in_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_sparsify(schema_madlib, matrix_in, in_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1981,8 +1995,9 @@ MADLIB_SCHEMA.matrix_densify ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_densify(schema_madlib, - matrix_in, in_args, matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_densify(schema_madlib, + matrix_in, in_args, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2095,8 +2110,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.matrix_norm( ) RETURNS DOUBLE PRECISION AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_norm(schema_madlib, - matrix_in, in_args, norm_type) + with AOControl(False): + return matrix_ops.matrix_norm(schema_madlib, + matrix_in, in_args, norm_type) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `'); @@ -2139,8 +2155,9 @@ MADLIB_SCHEMA.matrix_block_mult ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_block_mult(schema_madlib, matrix_a, a_args, - matrix_b, b_args, matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_block_mult(schema_madlib, matrix_a, a_args, + matrix_b, b_args, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2155,8 +2172,9 @@ MADLIB_SCHEMA.matrix_block_square ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_block_square(schema_madlib, matrix_in, in_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_block_square(schema_madlib, matrix_in, in_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2171,8 +2189,9 @@ MADLIB_SCHEMA.matrix_block_trans ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_block_trans(schema_madlib, matrix_in, in_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_block_trans(schema_madlib, matrix_in, in_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2198,8 +2217,9 @@ MADLIB_SCHEMA.matrix_square ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_square(schema_madlib, matrix_in, in_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_square(schema_madlib, matrix_in, in_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2236,8 +2256,9 @@ MADLIB_SCHEMA.matrix_diag ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_diag(schema_madlib, - diag_elements, matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_diag(schema_madlib, + diag_elements, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2291,7 +2312,8 @@ MADLIB_SCHEMA.matrix_extract_diag ) RETURNS float[] AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_extract_diag(schema_madlib, matrix_in, in_args) + with AOControl(False): + return matrix_ops.matrix_extract_diag(schema_madlib, matrix_in, in_args) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2331,8 +2353,9 @@ MADLIB_SCHEMA.matrix_identity ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_identity(schema_madlib, - row_dim, matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_identity(schema_madlib, + row_dim, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2386,8 +2409,9 @@ MADLIB_SCHEMA.matrix_random ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_random(schema_madlib, distribution, - row_dim, col_dim, in_args, matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_random(schema_madlib, distribution, + row_dim, col_dim, in_args, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2435,8 +2459,9 @@ MADLIB_SCHEMA.matrix_ones ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_ones(schema_madlib, - row_dim, col_dim, matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_ones(schema_madlib, + row_dim, col_dim, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2489,9 +2514,10 @@ MADLIB_SCHEMA.matrix_zeros ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_zeros(schema_madlib, - row_dim, col_dim, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_zeros(schema_madlib, + row_dim, col_dim, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2544,8 +2570,9 @@ MADLIB_SCHEMA.matrix_inverse ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_inverse(schema_madlib, matrix_in, in_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_inverse(schema_madlib, matrix_in, in_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2598,8 +2625,9 @@ MADLIB_SCHEMA.matrix_pinv ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_pinv(schema_madlib, matrix_in, in_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_pinv(schema_madlib, matrix_in, in_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2654,8 +2682,9 @@ MADLIB_SCHEMA.matrix_lu ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_lu(schema_madlib, matrix_in, in_args, - matrix_out_prefix, out_args) + with AOControl(False): + matrix_ops.matrix_lu(schema_madlib, matrix_in, in_args, + matrix_out_prefix, out_args) result = """ For LU decomposition with full pivoting (PAQ = LU), the corresponding matrices can be obtained by appending following suffixes @@ -2715,8 +2744,9 @@ MADLIB_SCHEMA.matrix_eigen ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_eigen(schema_madlib, matrix_in, in_args, - matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_eigen(schema_madlib, matrix_in, in_args, + matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2773,8 +2803,9 @@ MADLIB_SCHEMA.matrix_cholesky ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_cholesky(schema_madlib, matrix_in, in_args, - matrix_out_prefix, out_args) + with AOControl(False): + matrix_ops.matrix_cholesky(schema_madlib, matrix_in, in_args, + matrix_out_prefix, out_args) result = """ For Cholesky decomposition with left pivoting (PA = LDL*), the corresponding matrices can be obtained by appending following suffixes @@ -2836,8 +2867,9 @@ MADLIB_SCHEMA.matrix_qr ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_qr(schema_madlib, matrix_in, in_args, - matrix_out_prefix, out_args) + with AOControl(False): + matrix_ops.matrix_qr(schema_madlib, matrix_in, in_args, + matrix_out_prefix, out_args) result = """ For QR decomposition (A = QR), the corresponding matrices can be obtained by appending following suffixes @@ -2893,7 +2925,8 @@ MADLIB_SCHEMA.matrix_rank ) RETURNS INT8 AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_rank(schema_madlib, matrix_in, in_args) + with AOControl(False): + return matrix_ops.matrix_rank(schema_madlib, matrix_in, in_args) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2929,7 +2962,8 @@ MADLIB_SCHEMA.matrix_nuclear_norm ) RETURNS DOUBLE PRECISION AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - return matrix_ops.matrix_nuclear_norm(schema_madlib, matrix_in, in_args) + with AOControl(False): + return matrix_ops.matrix_nuclear_norm(schema_madlib, matrix_in, in_args) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -3328,7 +3362,8 @@ MADLIB_SCHEMA.__matrix_column_to_array_format ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops._matrix_column_to_array_format(matrix_in, row_id, matrix_out, istemp) + with AOControl(False): + matrix_ops._matrix_column_to_array_format(matrix_in, row_id, matrix_out, istemp) return [matrix_out] $$ LANGUAGE plpythonu STRICT m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -3346,7 +3381,8 @@ MADLIB_SCHEMA.matrix_blockize ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_blockize(schema_madlib, matrix_in, in_args, rsize, csize, matrix_out, out_args) + with AOControl(False): + matrix_ops.matrix_blockize(schema_madlib, matrix_in, in_args, rsize, csize, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -3376,7 +3412,8 @@ MADLIB_SCHEMA.matrix_unblockize ) RETURNS MADLIB_SCHEMA.matrix_result AS $$ PythonFunctionBodyOnly(`linalg', `matrix_ops') - matrix_ops.matrix_unblockize(schema_madlib, matrix_in, in_args, + with AOControl(False): + matrix_ops.matrix_unblockize(schema_madlib, matrix_in, in_args, matrix_out, out_args) return [matrix_out] $$ LANGUAGE plpythonu http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/linalg/svd.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/linalg/svd.sql_in b/src/ports/postgres/modules/linalg/svd.sql_in index 070f5e4..50313c5 100644 --- a/src/ports/postgres/modules/linalg/svd.sql_in +++ b/src/ports/postgres/modules/linalg/svd.sql_in @@ -464,9 +464,9 @@ MADLIB_SCHEMA.svd( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`linalg', `svd') - return svd.svd( - schema_madlib, source_table, output_table_prefix, - row_id, k, n_iterations, result_summary_table) + with AOControl(False): + return svd.svd(schema_madlib, source_table, output_table_prefix, + row_id, k, n_iterations, result_summary_table) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -484,9 +484,9 @@ MADLIB_SCHEMA.svd_block( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`linalg', `svd') - return svd.svd_block( - schema_madlib, source_table, output_table_prefix, k, - n_iterations, result_summary_table) + with AOControl(False): + return svd.svd_block(schema_madlib, source_table, output_table_prefix, k, + n_iterations, result_summary_table) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -537,10 +537,10 @@ MADLIB_SCHEMA.svd_sparse( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`linalg', `svd') - return svd.svd_sparse( - schema_madlib, source_table, output_table_prefix, - row_id, col_id, val_id, row_dim, col_dim, k, - n_iterations, result_summary_table) + with AOControl(False): + return svd.svd_sparse(schema_madlib, source_table, output_table_prefix, + row_id, col_id, val_id, row_dim, col_dim, k, + n_iterations, result_summary_table) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -560,9 +560,11 @@ MADLIB_SCHEMA.svd_sparse_native( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`linalg', `svd') - return svd.svd_sparse_native( - schema_madlib, source_table, output_table_prefix, - row_id, col_id, val_id, row_dim, col_dim, k, n_iterations, result_summary_table) + with AOControl(False): + return svd.svd_sparse_native(schema_madlib, source_table, + output_table_prefix, row_id, col_id, + val_id, row_dim, col_dim, k, n_iterations, + result_summary_table) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -947,9 +949,9 @@ MADLIB_SCHEMA.__svd_lanczos_bidiagonalize ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`linalg', `svd') - return svd.lanczos_bidiagonalize( - schema_madlib, source_table, - output_table_prefix, n_iterations, k, is_block) + with AOControl(False): + return svd.lanczos_bidiagonalize(schema_madlib, source_table, + output_table_prefix, n_iterations, k, is_block) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -966,9 +968,11 @@ MADLIB_SCHEMA.__svd_lanczos_bidiagonalize_sparse ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`linalg', `svd') - return svd.lanczos_bidiagonalize_sparse( - schema_madlib, source_table, row_id, col_id, - val, output_table_prefix, n_iterations, k) + with AOControl(False): + return svd.lanczos_bidiagonalize_sparse(schema_madlib, source_table, + row_id, col_id, val, + output_table_prefix, + n_iterations, k) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1051,8 +1055,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svd_vec_trans_mult_matrix( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`linalg', `svd') - return svd.svd_vec_trans_mult_matrix( - schema_madlib, vec_table, mat_table, k, res_table, is_left) + with AOControl(False): + return svd.svd_vec_trans_mult_matrix(schema_madlib, vec_table, + mat_table, k, res_table, is_left) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/pca/pca.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/pca/pca.py_in b/src/ports/postgres/modules/pca/pca.py_in index 286c57e..cab8e7f 100644 --- a/src/ports/postgres/modules/pca/pca.py_in +++ b/src/ports/postgres/modules/pca/pca.py_in @@ -682,9 +682,9 @@ def _recenter_data(schema_madlib, source_table, output_table, row_id, """ # Step 1: Compute column mean values x_scales = utils_ind_var_scales(tbl_data=source_table, - col_ind_var=col_name, - dimension=dimension, - schema_madlib=schema_madlib) + col_ind_var=col_name, + dimension=dimension, + schema_madlib=schema_madlib) x_mean_str = _array_to_string(x_scales["mean"]) x_std_str = _array_to_string([1] * dimension) @@ -692,7 +692,9 @@ def _recenter_data(schema_madlib, source_table, output_table, row_id, # Step 2: Rescale the matrices plpy.execute( """ - CREATE TABLE {output_table} AS + CREATE TABLE {output_table} + m4_ifdef(`__POSTGRESQL__', `', `WITH (appendonly=true)') + AS SELECT {row_id} as row_id, ({schema_madlib}.utils_normalize_data( http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/pca/pca.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/pca/pca.sql_in b/src/ports/postgres/modules/pca/pca.sql_in index f7ef305..92b74e2 100644 --- a/src/ports/postgres/modules/pca/pca.sql_in +++ b/src/ports/postgres/modules/pca/pca.sql_in @@ -745,7 +745,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pca_train( ) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`pca', `pca') - return pca.pca_help_message(schema_madlib, usage_string) + with AOControl(False): + return pca.pca_help_message(schema_madlib, usage_string) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -982,7 +983,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pca_sparse_train( ) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`pca', `pca') - return pca.pca_sparse_help_message(schema_madlib, usage_string) + with AOControl(False): + return pca.pca_sparse_help_message(schema_madlib, usage_string) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/pca/pca_project.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/pca/pca_project.py_in b/src/ports/postgres/modules/pca/pca_project.py_in index cc7a4ba..d205044 100644 --- a/src/ports/postgres/modules/pca/pca_project.py_in +++ b/src/ports/postgres/modules/pca/pca_project.py_in @@ -630,7 +630,9 @@ def _pca_project_union(schema_madlib, source_table, pc_table, out_table, pc_table_mean = add_postfix(pc_table, "_mean") plpy.execute( """ - CREATE TABLE {scaled_source_table} AS + CREATE TABLE {scaled_source_table} + m4_ifdef(`__POSTGRESQL__', `', `WITH (appendonly=true)') + AS SELECT row_id, ({schema_madlib}.utils_normalize_data( http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/recursive_partitioning/decision_tree.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/recursive_partitioning/decision_tree.sql_in b/src/ports/postgres/modules/recursive_partitioning/decision_tree.sql_in index c015556..8ad7a9d 100644 --- a/src/ports/postgres/modules/recursive_partitioning/decision_tree.sql_in +++ b/src/ports/postgres/modules/recursive_partitioning/decision_tree.sql_in @@ -1573,12 +1573,12 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.tree_train( verbose_mode BOOLEAN ) RETURNS VOID AS $$ PythonFunctionBodyOnly(recursive_partitioning, decision_tree) - decision_tree.tree_train( - schema_madlib, training_table_name, output_table_name, - id_col_name, dependent_variable, list_of_features, - list_of_features_to_exclude, split_criterion, grouping_cols, - weights, max_depth, min_split, min_bucket, n_bins, pruning_params, - null_handling_params, verbose_mode) + with AOControl(False): + decision_tree.tree_train(schema_madlib, training_table_name, output_table_name, + id_col_name, dependent_variable, list_of_features, + list_of_features_to_exclude, split_criterion, grouping_cols, + weights, max_depth, min_split, min_bucket, n_bins, pruning_params, + null_handling_params, verbose_mode) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); ------------------------------------------------------------ @@ -1991,8 +1991,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.tree_surr_display( model_table TEXT ) RETURNS VARCHAR AS $$ PythonFunctionBodyOnly(recursive_partitioning, decision_tree, tree_display) - return decision_tree.tree_display(schema_madlib, model_table, dot_format=False, - verbose=False, disp_surr=True) + with AOControl(False): + return decision_tree.tree_display(schema_madlib, model_table, dot_format=False, + verbose=False, disp_surr=True) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `'); @@ -2439,10 +2440,10 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._tree_rmse( output_table VARCHAR ) RETURNS VOID AS $$ PythonFunctionBodyOnly(recursive_partitioning, decision_tree) - decision_tree._tree_error( - schema_madlib, source_table, dependent_varname, - prediction_table, pred_dep_name, id_col_name, grouping_cols, - output_table, False) + with AOControl(False): + decision_tree._tree_error(schema_madlib, source_table, dependent_varname, + prediction_table, pred_dep_name, id_col_name, grouping_cols, + output_table, False) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2458,10 +2459,10 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._tree_rmse( k INTEGER ) RETURNS VOID AS $$ PythonFunctionBodyOnly(recursive_partitioning, decision_tree) - decision_tree._tree_error( - schema_madlib, source_table, dependent_varname, - prediction_table, pred_dep_name, id_col_name, grouping_cols, - output_table, False, use_existing_tables, k) + with AOControl(False): + decision_tree._tree_error(schema_madlib, source_table, dependent_varname, + prediction_table, pred_dep_name, id_col_name, grouping_cols, + output_table, False, use_existing_tables, k) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); ------------------------------------------------------------------------- @@ -2477,10 +2478,10 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._tree_misclassified( output_table VARCHAR ) RETURNS VOID AS $$ PythonFunctionBodyOnly(recursive_partitioning, decision_tree) - decision_tree._tree_error( - schema_madlib, source_table, dependent_varname, - prediction_table, pred_dep_name, id_col_name, grouping_cols, - output_table, True) + with AOControl(False): + decision_tree._tree_error(schema_madlib, source_table, dependent_varname, + prediction_table, pred_dep_name, id_col_name, + grouping_cols, output_table, True) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -2496,9 +2497,10 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._tree_misclassified( k INTEGER ) RETURNS VOID AS $$ PythonFunctionBodyOnly(recursive_partitioning, decision_tree) - decision_tree._tree_error( - schema_madlib, source_table, dependent_varname, - prediction_table, pred_dep_name, id_col_name, grouping_cols, - output_table, True, use_existing_tables, k) + with AOControl(False): + decision_tree._tree_error(schema_madlib, source_table, dependent_varname, + prediction_table, pred_dep_name, id_col_name, + grouping_cols, output_table, True, + use_existing_tables, k) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/recursive_partitioning/random_forest.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/recursive_partitioning/random_forest.sql_in b/src/ports/postgres/modules/recursive_partitioning/random_forest.sql_in index 795cc48..ba0049b 100644 --- a/src/ports/postgres/modules/recursive_partitioning/random_forest.sql_in +++ b/src/ports/postgres/modules/recursive_partitioning/random_forest.sql_in @@ -1506,27 +1506,26 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.forest_train( sample_ratio DOUBLE PRECISION ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`recursive_partitioning', `random_forest') - random_forest.forest_train( - schema_madlib, - training_table_name, - output_table_name, - id_col_name, - dependent_variable, - list_of_features, - list_of_features_to_exclude, - grouping_cols, - num_trees, - num_random_features, - importance, - num_permutations, - max_tree_depth, - min_split, - min_bucket, - num_splits, - null_handling_params, - verbose, - sample_ratio - ) + with AOControl(False): + random_forest.forest_train(schema_madlib, + training_table_name, + output_table_name, + id_col_name, + dependent_variable, + list_of_features, + list_of_features_to_exclude, + grouping_cols, + num_trees, + num_random_features, + importance, + num_permutations, + max_tree_depth, + min_split, + min_bucket, + num_splits, + null_handling_params, + verbose, + sample_ratio) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/stats/correlation.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/stats/correlation.sql_in b/src/ports/postgres/modules/stats/correlation.sql_in index a8a7a7e..1a1bdc5 100644 --- a/src/ports/postgres/modules/stats/correlation.sql_in +++ b/src/ports/postgres/modules/stats/correlation.sql_in @@ -487,16 +487,18 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.correlation( input_message text ) RETURNS TEXT AS $$ - PythonFunctionBodyOnly(`stats', `correlation') - return correlation.correlation_help_message(schema_madlib, input_message) +PythonFunctionBodyOnly(`stats', `correlation') + with AOControl(False): + return correlation.correlation_help_message(schema_madlib, input_message) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); ----------------------------------------------------------------------- CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.correlation() RETURNS TEXT AS $$ - PythonFunctionBodyOnly(`stats', `correlation') - return correlation.correlation_help_message(schema_madlib, None) +PythonFunctionBodyOnly(`stats', `correlation') + with AOControl(False): + return correlation.correlation_help_message(schema_madlib, None) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); ------------------------------------------------------------------------- @@ -526,9 +528,10 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance( verbose boolean, -- flag to determine verbosity grouping_cols varchar -- comma separated column names to be used for grouping ) RETURNS TEXT AS $$ - PythonFunctionBodyOnly(`stats', `correlation') - return correlation.correlation(schema_madlib, source_table, output_table, - target_cols, grouping_cols, True, verbose) +PythonFunctionBodyOnly(`stats', `correlation') + with AOControl(False): + return correlation.correlation(schema_madlib, source_table, output_table, + target_cols, grouping_cols, True, verbose) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -571,16 +574,18 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance( input_message text ) RETURNS TEXT AS $$ - PythonFunctionBodyOnly(`stats', `correlation') - return correlation.correlation_help_message(schema_madlib, input_message, cov=True) +PythonFunctionBodyOnly(`stats', `correlation') + with AOControl(False): + return correlation.correlation_help_message(schema_madlib, input_message, cov=True) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); ----------------------------------------------------------------------- CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance() RETURNS TEXT AS $$ - PythonFunctionBodyOnly(`stats', `correlation') - return correlation.correlation_help_message(schema_madlib, None, cov=True) +PythonFunctionBodyOnly(`stats', `correlation') + with AOControl(False): + return correlation.correlation_help_message(schema_madlib, None, cov=True) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); ------------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/stats/cox_prop_hazards.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/stats/cox_prop_hazards.sql_in b/src/ports/postgres/modules/stats/cox_prop_hazards.sql_in index e5c1d41..e8faa01 100644 --- a/src/ports/postgres/modules/stats/cox_prop_hazards.sql_in +++ b/src/ports/postgres/modules/stats/cox_prop_hazards.sql_in @@ -669,17 +669,18 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.coxph_train( optimizer_params VARCHAR ) RETURNS VOID AS $$ - PythonFunctionBodyOnly(`stats', `cox_prop_hazards') - cox_prop_hazards.coxph( - schema_madlib, - source_table, - output_table, - dependent_varname, - independent_varname, - right_censoring_status, - strata, - optimizer_params - ) +PythonFunctionBodyOnly(`stats', `cox_prop_hazards') + with AOControl(False): + cox_prop_hazards.coxph( + schema_madlib, + source_table, + output_table, + dependent_varname, + independent_varname, + right_censoring_status, + strata, + optimizer_params + ) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1112,7 +1113,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cox_zph ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `cox_prop_hazards') - cox_prop_hazards.zph(schema_madlib, coxph_model_table, output_table) + with AOControl(False): + cox_prop_hazards.zph(schema_madlib, coxph_model_table, output_table) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -1414,17 +1416,18 @@ PythonFunctionBodyOnly(`stats', `cox_prop_hazards') temp_string = plpy.execute("SELECT 'pg_temp.' || {schema_madlib}.__unique_string() AS t". format(schema_madlib=schema_madlib))[0]["t"] - plpy.execute(""" - SELECT {schema_madlib}.coxph_train( - '{source}','{temp}', '{dep}', '{indep}', - '{status}', NULL, '{optimizer}') - """.format(schema_madlib=schema_madlib, - source=source, - temp=temp_string, - dep=depColumn, - indep=indepColumn, - status=status, - optimizer=optimizer_params)) + with AOControl(False): + plpy.execute(""" + SELECT {schema_madlib}.coxph_train( + '{source}','{temp}', '{dep}', '{indep}', + '{status}', NULL, '{optimizer}') + """.format(schema_madlib=schema_madlib, + source=source, + temp=temp_string, + dep=depColumn, + indep=indepColumn, + status=status, + optimizer=optimizer_params)) return plpy.execute("SELECT coef, loglikelihood, std_err, z_stats," "p_values, num_iterations FROM {temp}". format(temp=temp_string))[0]
