Repository: madlib Updated Branches: refs/heads/master b76a08344 -> 3db98babe
http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/stats/pred_metrics.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/stats/pred_metrics.sql_in b/src/ports/postgres/modules/stats/pred_metrics.sql_in index 3f62746..32de9a9 100644 --- a/src/ports/postgres/modules/stats/pred_metrics.sql_in +++ b/src/ports/postgres/modules/stats/pred_metrics.sql_in @@ -411,8 +411,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_error( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.mean_abs_error( - table_in, table_out, prediction_col, observed_col, grouping_cols) + with AOControl(False): + return pred_metrics.mean_abs_error( + table_in, table_out, prediction_col, observed_col, grouping_cols) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -430,8 +431,9 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_error(message TEXT) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.metric_agg_help_msg(schema_madlib, message, - 'mean_abs_error') + with AOControl(False): + return pred_metrics.metric_agg_help_msg(schema_madlib, message, + 'mean_abs_error') $$ language plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -463,8 +465,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_perc_error( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.mean_abs_perc_error( - table_in, table_out, prediction_col, observed_col, grouping_cols) + with AOControl(False): + return pred_metrics.mean_abs_perc_error( + table_in, table_out, prediction_col, observed_col, grouping_cols) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -482,8 +485,9 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_perc_error(message TEXT) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.metric_agg_help_msg(schema_madlib, message, - 'mean_abs_perc_error') + with AOControl(False): + return pred_metrics.metric_agg_help_msg(schema_madlib, message, + 'mean_abs_perc_error') $$ language plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -515,8 +519,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_perc_error( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.mean_perc_error( - table_in, table_out, prediction_col, observed_col, grouping_cols) + with AOControl(False): + return pred_metrics.mean_perc_error( + table_in, table_out, prediction_col, observed_col, grouping_cols) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -534,8 +539,9 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_perc_error(message TEXT) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.metric_agg_help_msg(schema_madlib, message, - 'mean_perc_error') + with AOControl(False): + return pred_metrics.metric_agg_help_msg(schema_madlib, message, + 'mean_perc_error') $$ language plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -567,8 +573,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_squared_error( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.mean_squared_error( - table_in, table_out, prediction_col, observed_col, grouping_cols) + with AOControl(False): + return pred_metrics.mean_squared_error( + table_in, table_out, prediction_col, observed_col, grouping_cols) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -586,8 +593,9 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_squared_error(message TEXT) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.metric_agg_help_msg(schema_madlib, message, - 'mean_squared_error') + with AOControl(False): + return pred_metrics.metric_agg_help_msg(schema_madlib, message, + 'mean_squared_error') $$ language plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -619,8 +627,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.r2_score( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.r2_score( - table_in, table_out, prediction_col, observed_col, grouping_cols) + with AOControl(False): + return pred_metrics.r2_score( + table_in, table_out, prediction_col, observed_col, grouping_cols) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -638,7 +647,8 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.r2_score(message TEXT) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.r2_score_help(schema_madlib, message) + with AOControl(False): + return pred_metrics.r2_score_help(schema_madlib, message) $$ language plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -675,9 +685,10 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.adjusted_r2_score( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.adjusted_r2_score( - table_in, table_out, prediction_col, observed_col, - num_predictors, training_size, grouping_cols) + with AOControl(False): + return pred_metrics.adjusted_r2_score( + table_in, table_out, prediction_col, observed_col, + num_predictors, training_size, grouping_cols) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -697,7 +708,8 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.adjusted_r2_score(message TEXT) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.adjusted_r2_score_help(schema_madlib, message) + with AOControl(False): + return pred_metrics.adjusted_r2_score_help(schema_madlib, message) $$ language plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -728,8 +740,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.binary_classifier( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.binary_classifier( - table_in, table_out, prediction_col, observed_col, grouping_cols) + with AOControl(False): + return pred_metrics.binary_classifier( + table_in, table_out, prediction_col, observed_col, grouping_cols) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -747,7 +760,8 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.binary_classifier(message TEXT) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.binary_classifier_help(schema_madlib, message) + with AOControl(False): + return pred_metrics.binary_classifier_help(schema_madlib, message) $$ language plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -778,8 +792,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.area_under_roc( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.area_under_roc( - table_in, table_out, prediction_col, observed_col, grouping_cols) + with AOControl(False): + return pred_metrics.area_under_roc( + table_in, table_out, prediction_col, observed_col, grouping_cols) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -797,7 +812,8 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.area_under_roc(message TEXT) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.area_under_roc_help(schema_madlib, message) + with AOControl(False): + return pred_metrics.area_under_roc_help(schema_madlib, message) $$ language plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -827,15 +843,17 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.confusion_matrix( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.confusion_matrix( - table_in, table_out, prediction_col, observed_col) + with AOControl(False): + return pred_metrics.confusion_matrix( + table_in, table_out, prediction_col, observed_col) $$ LANGUAGE plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.confusion_matrix(message TEXT) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`stats', `pred_metrics') - return pred_metrics.confusion_matrix_help(schema_madlib, message) + with AOControl(False): + return pred_metrics.confusion_matrix_help(schema_madlib, message) $$ language plpythonu m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/summary/summary.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/summary/summary.sql_in b/src/ports/postgres/modules/summary/summary.sql_in index 73f5a0e..84ab89d 100644 --- a/src/ports/postgres/modules/summary/summary.sql_in +++ b/src/ports/postgres/modules/summary/summary.sql_in @@ -591,10 +591,11 @@ MADLIB_SCHEMA.summary ) RETURNS MADLIB_SCHEMA.summary_result AS $$ PythonFunctionBodyOnly(`summary', `summary') - return summary.summary( - schema_madlib, source_table, output_table, target_cols, grouping_cols, - get_distinct, get_quartiles, ntile_array, how_many_mfv, - get_estimates, n_cols_per_run) + with AOControl(False): + return summary.summary( + schema_madlib, source_table, output_table, target_cols, grouping_cols, + get_distinct, get_quartiles, ntile_array, how_many_mfv, + get_estimates, n_cols_per_run) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -733,7 +734,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.summary( ) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`summary', `summary') - return summary.summary_help_message(schema_madlib, input_message) + with AOControl(False): + return summary.summary_help_message(schema_madlib, input_message) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -741,6 +743,7 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.summary() RETURNS TEXT AS $$ PythonFunctionBodyOnly(`summary', `summary') - return summary.summary_help_message(schema_madlib, None) + with AOControl(False): + return summary.summary_help_message(schema_madlib, None) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/tsa/arima.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/tsa/arima.sql_in b/src/ports/postgres/modules/tsa/arima.sql_in index 6bb6889..48f0abd 100644 --- a/src/ports/postgres/modules/tsa/arima.sql_in +++ b/src/ports/postgres/modules/tsa/arima.sql_in @@ -482,10 +482,11 @@ MADLIB_SCHEMA.arima_train( optimizer_params TEXT -- Control parameters for optimizer ) RETURNS VOID AS $$ - PythonFunctionBodyOnly(`tsa', `arima') - arima.arima_train(schema_madlib, input_table, output_table, - timestamp_column, timeseries_column, grouping_columns, - include_mean, non_seasonal_orders, optimizer_params) +PythonFunctionBodyOnly(`tsa', `arima') + with AOControl(False): + arima.arima_train(schema_madlib, input_table, output_table, + timestamp_column, timeseries_column, grouping_columns, + include_mean, non_seasonal_orders, optimizer_params) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -579,8 +580,9 @@ MADLIB_SCHEMA.arima_forecast( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(`tsa', `arima_forecast') - arima_forecast.arima_forecast(schema_madlib, model_table, output_table, - steps_ahead) + with AOControl(False): + arima_forecast.arima_forecast(schema_madlib, model_table, output_table, + steps_ahead) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); @@ -592,14 +594,15 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_train( ) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`tsa', `arima') - return arima.arima_train_help_message(schema_madlib, message) + with AOControl(False): + return arima.arima_train_help_message(schema_madlib, message) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_train() RETURNS TEXT AS $$ - PythonFunction(tsa, arima, arima_train_help_message) +PythonFunction(tsa, arima, arima_train_help_message) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -608,7 +611,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_forecast( ) RETURNS TEXT AS $$ PythonFunctionBodyOnly(`tsa', `arima_forecast') - return arima_forecast.arima_forecast_help_message(schema_madlib, message) + with AOControl(False): + return arima_forecast.arima_forecast_help_message(schema_madlib, message) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); @@ -616,7 +620,8 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_forecast() RETURNS TEXT AS $$ PythonFunctionBodyOnly(`tsa', `arima_forecast') - return arima_forecast.arima_forecast_help_message(schema_madlib, None) + with AOControl(False): + return arima_forecast.arima_forecast_help_message(schema_madlib, None) $$ LANGUAGE plpythonu IMMUTABLE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/utilities/cols2vec.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/cols2vec.sql_in b/src/ports/postgres/modules/utilities/cols2vec.sql_in index bbdcc52..3fdbe70 100644 --- a/src/ports/postgres/modules/utilities/cols2vec.sql_in +++ b/src/ports/postgres/modules/utilities/cols2vec.sql_in @@ -399,8 +399,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cols2vec( with MinWarning('warning'): cols2vec_obj = transform_vec_cols.cols2vec() return cols2vec_obj.cols2vec(**globals()) - $$ LANGUAGE plpythonu VOLATILE - m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); +$$ LANGUAGE plpythonu VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cols2vec( source_table VARCHAR, @@ -424,7 +424,9 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cols2vec(message TEXT) RETURNS text AS $$ PythonFunctionBodyOnly(utilities, transform_vec_cols) - return transform_vec_cols.cols2vec().cols2vec_help_message(schema_madlib, message) + from utilities.control import MinWarning + with MinWarning('warning'): + return transform_vec_cols.cols2vec().cols2vec_help_message(schema_madlib, message) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/utilities/control.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/control.py_in b/src/ports/postgres/modules/utilities/control.py_in index 255c14e..7900086 100644 --- a/src/ports/postgres/modules/utilities/control.py_in +++ b/src/ports/postgres/modules/utilities/control.py_in @@ -14,6 +14,7 @@ import plpy from distutils.util import strtobool from functools import wraps +from utilities import extract_keyvalue_params from utilities import unique_string HAS_FUNCTION_PROPERTIES = m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `True', `False') @@ -158,6 +159,60 @@ class MinWarning(ContextDecorator): format(oldMsgLevel=self.oldMsgLevel)) +class AOControl(ContextDecorator): + + """ + @brief: A wrapper that enables/disables the AO storage option + """ + + def __init__(self, enable=False): + self.to_enable = enable + self.was_ao_enabled = False + self.guc_exists = True + self.storage_options_dict = dict() + + def _parse_gp_default_storage_options(self, gp_default_storage_options_str): + """ Parse comma separated key=value pairs + + Example: + appendonly=false,blocksize=32768,compresstype=none,checksum=true,orientation=row + """ + self.storage_options_dict = extract_keyvalue_params(gp_default_storage_options_str) + self.storage_options_dict['appendonly'] = bool( + strtobool(self.storage_options_dict['appendonly'])) + + @property + def _gp_default_storage_options(self): + return ','.join(['{0}={1}'.format(k, v) + for k, v in self.storage_options_dict.iteritems()]) + + def __enter__(self): + try: + _storage_options_str = plpy.execute( + "show gp_default_storage_options")[0]["gp_default_storage_options"] + self._parse_gp_default_storage_options(_storage_options_str) + + # Set APPENDONLY=False after backing up existing value + self.was_ao_enabled = self.storage_options_dict['appendonly'] + self.storage_options_dict['appendonly'] = self.to_enable + plpy.execute("set gp_default_storage_options={0}". + format(self._gp_default_storage_options)) + except plpy.SPIError: + self.guc_exists = False + finally: + return self + + def __exit__(self, *args): + if self.guc_exists: + self.storage_options_dict['appendonly'] = self.was_ao_enabled + plpy.execute("set gp_default_storage_options={0}". + format(self._gp_default_storage_options)) + if args and args[0]: + # an exception was raised in code. We return False so that any + # exception is re-raised after exit. + return False + + class IterationController: """ http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in index 1238104..0762a06 100644 --- a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in +++ b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in @@ -269,8 +269,15 @@ class MiniBatchPreProcessor: grouping_cols_select_col = '' grouping_cols_group_by = '' + if is_platform_pg(): + distributed_by_clause = with_append_only_true = '' + else: + distributed_by_clause= 'DISTRIBUTED RANDOMLY' + with_append_only_true= 'WITH (APPENDONLY=TRUE)' sql = """ - CREATE TABLE {self.output_table} AS + CREATE TABLE {self.output_table} + {with_append_only_true} + AS SELECT {row_id}, {grouping_cols_select_col} {self.schema_madlib}.matrix_agg({dep_colname}) as {dep_colname}, @@ -291,12 +298,9 @@ class MiniBatchPreProcessor: dep_colname=MINIBATCH_OUTPUT_DEPENDENT_COLNAME, ind_colname=MINIBATCH_OUTPUT_INDEPENDENT_COLNAME, row_id=unique_row_id, - distributed_by_clause='' if is_platform_pg() else - 'DISTRIBUTED RANDOMLY', **locals()) plpy.execute(sql) - def create_output_summary_table(self, buffer_size, total_num_rows_processed, @@ -313,7 +317,8 @@ class MiniBatchPreProcessor: class_level_str=py_list_to_sql_string( self.dependent_levels, array_type=self.dependent_vartype, long_format=True) - + grouping_cols_str=("$__madlib__${0}$__madlib__$".format(self.grouping_cols) + if self.grouping_cols else "NULL") query = """ CREATE TABLE {self.output_summary_table} AS SELECT @@ -327,11 +332,10 @@ class MiniBatchPreProcessor: {total_num_rows_processed} AS num_rows_processed, {num_missing_rows_skipped} AS num_missing_rows_skipped, {grouping_cols_str}::TEXT AS grouping_cols - """.format(grouping_cols_str="$__madlib__$" + self.grouping_cols + "$__madlib__$" - if self.grouping_cols else "NULL", - **locals()) + """.format(**locals()) plpy.execute(query) + class MiniBatchStandardizer: """ This class is responsible for http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/utilities/minibatch_preprocessing.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/minibatch_preprocessing.sql_in b/src/ports/postgres/modules/utilities/minibatch_preprocessing.sql_in index ead43d9..1ac00fb 100644 --- a/src/ports/postgres/modules/utilities/minibatch_preprocessing.sql_in +++ b/src/ports/postgres/modules/utilities/minibatch_preprocessing.sql_in @@ -535,9 +535,10 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.minibatch_preprocessor( ) RETURNS VOID AS $$ PythonFunctionBodyOnly(utilities, minibatch_preprocessing) from utilities.control import MinWarning - with MinWarning('error'): - minibatch_preprocessor_obj = minibatch_preprocessing.MiniBatchPreProcessor(**globals()) - minibatch_preprocessor_obj.minibatch_preprocessor() + with AOControl(False): + with MinWarning('error'): + minibatch_preprocessor_obj = minibatch_preprocessing.MiniBatchPreProcessor(**globals()) + minibatch_preprocessor_obj.minibatch_preprocessor() $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/utilities/test/unit_tests/plpy_mock.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/test/unit_tests/plpy_mock.py_in b/src/ports/postgres/modules/utilities/test/unit_tests/plpy_mock.py_in index dd18649..564faf7 100644 --- a/src/ports/postgres/modules/utilities/test/unit_tests/plpy_mock.py_in +++ b/src/ports/postgres/modules/utilities/test/unit_tests/plpy_mock.py_in @@ -34,6 +34,14 @@ def info(query): print query +class SPIError(Exception): + def __init__(self, message): + super(SPIError, self).__init__() + self.message = message + + def __str__(self): + return repr(self.message) + class PLPYException(Exception): def __init__(self, message): super(PLPYException, self).__init__() http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/utilities/test/unit_tests/test_control.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/test/unit_tests/test_control.py_in b/src/ports/postgres/modules/utilities/test/unit_tests/test_control.py_in new file mode 100644 index 0000000..55bbd06 --- /dev/null +++ b/src/ports/postgres/modules/utilities/test/unit_tests/test_control.py_in @@ -0,0 +1,81 @@ +# coding=utf-8 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sys +from os import path +# Add utilites module to the pythonpath. +sys.path.append(path.dirname(path.dirname(path.dirname(path.abspath(__file__))))) + + +import unittest +from mock import * +import sys +import plpy_mock as plpy + +m4_changequote(`<!', `!>') + + +class ControlTestCase(unittest.TestCase): + def setUp(self): + patches = { + 'plpy': plpy + } + self.plpy_mock_execute = MagicMock() + plpy.execute = self.plpy_mock_execute + + self.module_patcher = patch.dict('sys.modules', patches) + self.module_patcher.start() + + import control + self.subject = control + + def tearDown(self): + self.module_patcher.stop() + + def test_ao_control_false(self): + option = ('appendonly=true,blocksize=32768,compresstype=none,' + 'checksum=true,orientation=row') + self.plpy_mock_execute.return_value = [{'gp_default_storage_options': option}] + with self.subject.AOControl(False) as C: + self.assertFalse(C.storage_options_dict['appendonly']) + self.plpy_mock_execute.assert_called_with( + "set gp_default_storage_options=compresstype=none,blocksize=32768" + ",appendonly=True,orientation=row,checksum=true") + + def test_ao_control_true(self): + option = ('appendonly=true,blocksize=32768,compresstype=none,' + 'checksum=true,orientation=row') + self.plpy_mock_execute.return_value = [{'gp_default_storage_options': option}] + with self.subject.AOControl(True) as C: + self.assertTrue(C.storage_options_dict['appendonly']) + self.plpy_mock_execute.assert_called_with( + "set gp_default_storage_options=compresstype=none,blocksize=32768" + ",appendonly=True,orientation=row,checksum=true") + + def test_ao_control_missing(self): + option = ('appendonly=true,blocksize=32768,compresstype=none,' + 'checksum=true,orientation=row') + self.plpy_mock_execute.side_effect = plpy.SPIError( + 'Unrecognized configuration parameter "gp_default_storage_options"') + with self.subject.AOControl(True) as C: + self.assertFalse(C.guc_exists) + + +if __name__ == '__main__': + unittest.main() http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/utilities/test/utilities.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/test/utilities.sql_in b/src/ports/postgres/modules/utilities/test/utilities.sql_in index d3525ce..16e0edc 100644 --- a/src/ports/postgres/modules/utilities/test/utilities.sql_in +++ b/src/ports/postgres/modules/utilities/test/utilities.sql_in @@ -21,6 +21,8 @@ */ /* ----------------------------------------------------------------------- */ +m4_include(`SQLCommon.m4') + CREATE TABLE "__madlib_temp_Quoted"(b varchar); CREATE TABLE __madlib_temp_non_quoted(a text); -- assert that madlib_temp tables are created @@ -50,7 +52,8 @@ CREATE TABLE dt_golf ( cat_features text[], windy boolean, class text -) ; +) +m4_ifdef(`__POSTGRESQL__', `', `WITH (appendonly=true)'); INSERT INTO dt_golf (id,"OUTLOOK",temperature,"len$$'%*()gth","Cont,features",cat_features, windy,class) VALUES (1, 'sunny', 85, 85,ARRAY[85, 85], ARRAY['a', 'b'], false, 'Don''t Play'), http://git-wip-us.apache.org/repos/asf/madlib/blob/3db98bab/src/ports/postgres/modules/utilities/text_utilities.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/text_utilities.sql_in b/src/ports/postgres/modules/utilities/text_utilities.sql_in index 2ae65cb..2438239 100644 --- a/src/ports/postgres/modules/utilities/text_utilities.sql_in +++ b/src/ports/postgres/modules/utilities/text_utilities.sql_in @@ -324,8 +324,9 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.term_frequency( RETURNS TEXT AS $$ PythonFunctionBodyOnly(`utilities', `text_utilities') - return text_utilities.term_frequency(input_table, doc_id_col, word_vec_col, - output_table, compute_vocab=compute_vocab) + with AOControl(False): + return text_utilities.term_frequency(input_table, doc_id_col, word_vec_col, + output_table, compute_vocab=compute_vocab) $$ LANGUAGE plpythonu VOLATILE
