This is an automated email from the ASF dual-hosted git repository. khannaekta pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 5071d8b94d3fd7a24853bc41ad920dbfe168846e Author: Ekta Khanna <[email protected]> AuthorDate: Wed May 13 15:51:55 2020 -0700 DL: Add utility function for custom functions This commit adds new helper functions for loading and deleting from a given table custom functions that can be passed to keras fit/evaluate. The user passes in the function name along with a valid dill pickled object. This commit also adds dev-check tests for it. --- .../madlib_keras_custom_function.py_in | 245 +++++++++++++++++++++ .../madlib_keras_custom_function.sql_in | 103 +++++++++ .../test/madlib_keras_custom_function.sql_in | 148 +++++++++++++ tool/docker/base/Dockerfile_postgres_10_Jenkins | 2 +- 4 files changed, 497 insertions(+), 1 deletion(-) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in new file mode 100644 index 0000000..246c72d --- /dev/null +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.py_in @@ -0,0 +1,245 @@ +# coding=utf-8 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations + +import dill +import plpy +from plpy import spiexceptions +from utilities.control import MinWarning +from utilities.utilities import _assert +from utilities.utilities import get_col_name_type_sql_string +from utilities.validate_args import columns_missing_from_table +from utilities.validate_args import input_tbl_valid +from utilities.validate_args import quote_ident +from utilities.validate_args import table_exists + +module_name = 'Keras Custom Function' +class CustomFunctionSchema: + """Expected format of custom function table. + Example uses: + + from utilities.validate_args import columns_missing_from_table + from madlib_keras_custom_function import CustomFunctionSchema + + # Validate names in cols list against actual table + missing_cols = columns_missing_from_table('my_custom_fn_table', CustomFunctionSchema.col_names) + + # Get function object from table, without hard coding column names + sql = "SELECT {object} FROM {table} WHERE {id} = {my_id}" + .format(object=CustomFunctionSchema.FN_OBJ, + table='my_custom_fn_table', + id=CustomFunctionSchema.FN_ID, + my_id=1) + object = plpy.execute(sql)[0] + + """ + FN_ID = 'id' + FN_NAME = 'name' + FN_OBJ = 'object' + FN_DESC = 'description' + col_names = (FN_ID, FN_NAME, FN_DESC, FN_OBJ) + col_types = ('SERIAL', 'TEXT', 'TEXT', 'BYTEA') + +def _validate_object(object, **kwargs): + _assert(object is not None, "{0}: function object cannot be NULL!".format(module_name)) + try: + obj=dill.loads(object) + except Exception as e: + plpy.error("{0}: Invalid function object".format(module_name, e)) + +@MinWarning("error") +def load_custom_function(object_table, object, name, description=None, **kwargs): + object_table = quote_ident(object_table) + _validate_object(object) + _assert(name is not None, + "{0}: function name cannot be NULL!".format(module_name)) + if not table_exists(object_table): + col_defs = get_col_name_type_sql_string(CustomFunctionSchema.col_names, + CustomFunctionSchema.col_types) + + sql = "CREATE TABLE {0} ({1}, PRIMARY KEY({2}))" \ + .format(object_table, col_defs, CustomFunctionSchema.FN_NAME) + + plpy.execute(sql, 0) + plpy.info("{0}: Created new custom function table {1}." \ + .format(module_name, object_table)) + else: + missing_cols = columns_missing_from_table(object_table, + CustomFunctionSchema.col_names) + if len(missing_cols) > 0: + plpy.error("{0}: Invalid custom function table {1}," + " missing columns: {2}".format(module_name, + object_table, + missing_cols)) + + insert_query = plpy.prepare("INSERT INTO {0} " + "VALUES(DEFAULT, $1, $2, $3);".format(object_table), + CustomFunctionSchema.col_types[1:]) + try: + plpy.execute(insert_query,[name, description, object], 0) + # spiexceptions.UniqueViolation is only supported for PG>=9.2. For + # GP5(based of PG8.4) it cannot be used. Therefore, checking exception + # message for duplicate key error. + except Exception as e: + if 'duplicate key' in e.message: + plpy.error("Function '{0}' already exists in {1}".format(name, object_table)) + plpy.error(e) + + plpy.info("{0}: Added function {1} to {2} table". + format(module_name, name, object_table)) + +@MinWarning("error") +def delete_custom_function(object_table, id=None, name=None, **kwargs): + object_table = quote_ident(object_table) + input_tbl_valid(object_table, "Keras Custom Funtion") + _assert(id is not None or name is not None, + "{0}: function id/name cannot be NULL! " \ + "Use \"SELECT delete_custom_function('usage')\" for help.".format(module_name)) + + missing_cols = columns_missing_from_table(object_table, CustomFunctionSchema.col_names) + if len(missing_cols) > 0: + plpy.error("{0}: Invalid custom function table {1}," + " missing columns: {2}".format(module_name, object_table, + missing_cols)) + + if id is not None: + sql = """ + DELETE FROM {0} WHERE {1}={2} + """.format(object_table, CustomFunctionSchema.FN_ID, id) + else: + sql = """ + DELETE FROM {0} WHERE {1}=$${2}$$ + """.format(object_table, CustomFunctionSchema.FN_NAME, name) + res = plpy.execute(sql, 0) + + if res.nrows() > 0: + plpy.info("{0}: Object id {1} has been deleted from {2}.". + format(module_name, id, object_table)) + else: + plpy.error("{0}: Object id {1} not found".format(module_name, id)) + + sql = "SELECT {0} FROM {1}".format(CustomFunctionSchema.FN_ID, object_table) + res = plpy.execute(sql, 0) + if not res: + plpy.info("{0}: Dropping empty custom keras function table " \ + "table {1}".format(module_name, object_table)) + sql = "DROP TABLE {0}".format(object_table) + plpy.execute(sql, 0) + +class KerasCustomFunctionDocumentation: + @staticmethod + def _returnHelpMsg(schema_madlib, message, summary, usage, method): + if not message: + return summary + elif message.lower() in ('usage', 'help', '?'): + return usage + return """ + No such option. Use "SELECT {schema_madlib}.{method}()" + for help. + """.format(**locals()) + + @staticmethod + def load_custom_function_help(schema_madlib, message): + method = "load_custom_function" + summary = """ + ---------------------------------------------------------------- + SUMMARY + ---------------------------------------------------------------- + The user can specify custom functions as part of the parameters + passed to madlib_keras_fit()/madlib_keras_fit_multiple(). These + custom function(s) definition must be stored in a table to pass. + This is a helper function to help users insert object(BYTEA) of + the function definitions into a table. + If the output table already exists, the custom function specified + will be added as a new row into the table. The output table could + thus act as a repository of Keras custom functions. + + For more details on function usage: + SELECT {schema_madlib}.{method}('usage') + """.format(**locals()) + + usage = """ + --------------------------------------------------------------------------- + USAGE + --------------------------------------------------------------------------- + SELECT {schema_madlib}.{method}( + object_table, -- VARCHAR. Output table to load custom function. + object, -- BYTEA. dill pickled object of the function definition. + name, -- TEXT. Free text string to identify a name + description -- TEXT. Free text string to provide a description + ); + + + --------------------------------------------------------------------------- + OUTPUT + --------------------------------------------------------------------------- + The output table produced by load_custom_function contains the following columns: + + 'id' -- SERIAL. Function ID. + 'name' -- TEXT PRIMARY KEY. unique function name. + 'description' -- TEXT. function description. + 'object' -- BYTEA. dill pickled function object. + + """.format(**locals()) + + return KerasCustomFunctionDocumentation._returnHelpMsg( + schema_madlib, message, summary, usage, method) + # --------------------------------------------------------------------- + + @staticmethod + def delete_custom_function_help(schema_madlib, message): + method = "delete_custom_function" + summary = """ + ---------------------------------------------------------------- + SUMMARY + ---------------------------------------------------------------- + Delete the custom function corresponding to the provided id + from the custom function repository table (object_table). + + For more details on function usage: + SELECT {schema_madlib}.{method}('usage') + """.format(**locals()) + + usage = """ + --------------------------------------------------------------------------- + USAGE + --------------------------------------------------------------------------- + SELECT {schema_madlib}.{method}( + object_table VARCHAR, -- Table containing keras custom function objects. + id INTEGER -- The id of the keras custom function object + to be deleted. + ); + + SELECT {schema_madlib}.{method}( + object_table VARCHAR, -- Table containing keras custom function objects. + name TEXT -- Function name of the keras custom function + object to be deleted. + ); + + --------------------------------------------------------------------------- + OUTPUT + --------------------------------------------------------------------------- + This method deletes the row corresponding to the given id in the + object_table. This also tries to drop the table if the table is + empty after dropping the id. If there are any views depending on the + table, a warning message is displayed and the table is not dropped. + + --------------------------------------------------------------------------- + """.format(**locals()) + + return KerasCustomFunctionDocumentation._returnHelpMsg( + schema_madlib, message, summary, usage, method) diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in new file mode 100644 index 0000000..62f89a9 --- /dev/null +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in @@ -0,0 +1,103 @@ +/* ----------------------------------------------------------------------- *//** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * + * @file madlib_keras_custom_function.sql_in + * + * @brief SQL functions for load/delete keras custom function objects + * @date August 2019 + * + * + *//* ----------------------------------------------------------------------- */ + +m4_include(`SQLCommon.m4') + +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.load_custom_function( + object_table VARCHAR, + object BYTEA, + name TEXT, + description TEXT +) RETURNS VOID AS $$ + PythonFunctionBodyOnly(`deep_learning', `madlib_keras_custom_function') + with AOControl(False): + madlib_keras_custom_function.load_custom_function(**globals()) +$$ LANGUAGE plpythonu VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); + +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.load_custom_function( + object_table VARCHAR, + object BYTEA, + name TEXT +) RETURNS VOID AS $$ + SELECT MADLIB_SCHEMA.load_custom_function($1, $2, $3, NULL) +$$ LANGUAGE sql VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); + +-- Functions for online help +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.load_custom_function( + message VARCHAR +) RETURNS VARCHAR AS $$ + PythonFunctionBodyOnly(deep_learning, madlib_keras_custom_function) + return madlib_keras_custom_function.KerasCustomFunctionDocumentation.load_custom_function_help(schema_madlib, message) +$$ LANGUAGE plpythonu VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); + +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.load_custom_function() +RETURNS VARCHAR AS $$ + PythonFunctionBodyOnly(deep_learning, madlib_keras_custom_function) + return madlib_keras_custom_function.KerasCustomFunctionDocumentation.load_custom_function_help(schema_madlib, '') +$$ LANGUAGE plpythonu VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); + +-- Function to delete a keras custom function from object table +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.delete_custom_function( + object_table VARCHAR, + id INTEGER +) +RETURNS VOID AS $$ + PythonFunctionBodyOnly(`deep_learning',`madlib_keras_custom_function') + with AOControl(False): + madlib_keras_custom_function.delete_custom_function(object_table, id=id) +$$ LANGUAGE plpythonu VOLATILE; + +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.delete_custom_function( + object_table VARCHAR, + name TEXT +) +RETURNS VOID AS $$ + PythonFunctionBodyOnly(`deep_learning',`madlib_keras_custom_function') + with AOControl(False): + madlib_keras_custom_function.delete_custom_function(object_table, name=name) +$$ LANGUAGE plpythonu VOLATILE; + +-- Functions for online help +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.delete_custom_function( + message VARCHAR +) RETURNS VARCHAR AS $$ + PythonFunctionBodyOnly(deep_learning, madlib_keras_custom_function) + return madlib_keras_custom_function.KerasCustomFunctionDocumentation.delete_custom_function_help(schema_madlib, message) +$$ LANGUAGE plpythonu VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); + +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.delete_custom_function() +RETURNS VARCHAR AS $$ + PythonFunctionBodyOnly(deep_learning, madlib_keras_custom_function) + return madlib_keras_custom_function.KerasCustomFunctionDocumentation.delete_custom_function_help(schema_madlib, '') +$$ LANGUAGE plpythonu VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); diff --git a/src/ports/postgres/modules/deep_learning/test/madlib_keras_custom_function.sql_in b/src/ports/postgres/modules/deep_learning/test/madlib_keras_custom_function.sql_in new file mode 100644 index 0000000..74f6ba2 --- /dev/null +++ b/src/ports/postgres/modules/deep_learning/test/madlib_keras_custom_function.sql_in @@ -0,0 +1,148 @@ +/* ----------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ----------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------------- + * Test load custom function helper functions + * -------------------------------------------------------------------------- */ + +CREATE OR REPLACE FUNCTION custom_function_object() +RETURNS BYTEA AS +$$ +import dill +def test_sum_fn(a, b): + return a+b + +pb=dill.dumps(test_sum_fn) +return pb +$$ language plpythonu; + +CREATE OR REPLACE FUNCTION read_custom_function(pb bytea, arg1 int, arg2 int) +RETURNS INTEGER AS +$$ +import dill +obj=dill.loads(pb) +res=obj(arg1, arg2) +return res +$$ language plpythonu; + +/* Test successful table creation where no table exists */ +DROP TABLE IF EXISTS test_custom_function_table; +SELECT load_custom_function('test_custom_function_table', custom_function_object(), 'sum_fn', 'returns sum'); + +SELECT assert(UPPER(atttypid::regtype::TEXT) = 'INTEGER', 'id column should be INTEGER type') + FROM pg_attribute WHERE attrelid = 'test_custom_function_table'::regclass + AND attname = 'id'; +SELECT assert(UPPER(atttypid::regtype::TEXT) = 'BYTEA', 'object column should be BYTEA type' ) + FROM pg_attribute WHERE attrelid = 'test_custom_function_table'::regclass + AND attname = 'object'; +SELECT assert(UPPER(atttypid::regtype::TEXT) = 'TEXT', + 'name column should be TEXT type') + FROM pg_attribute WHERE attrelid = 'test_custom_function_table'::regclass + AND attname = 'name'; +SELECT assert(UPPER(atttypid::regtype::TEXT) = 'TEXT', + 'description column should be TEXT type') + FROM pg_attribute WHERE attrelid = 'test_custom_function_table'::regclass + AND attname = 'description'; + +/* id should be 1 */ +SELECT assert(id = 1, 'Wrong id written by load_custom_function') + FROM test_custom_function_table; + +/* Validate function object created */ +SELECT assert(read_custom_function(object, 2, 3) = 5, 'Custom function should return sum of args.') + FROM test_custom_function_table; + +/* Test custom function insertion where valid table exists */ +SELECT load_custom_function('test_custom_function_table', custom_function_object(), 'sum_fn1'); +SELECT assert(name = 'sum_fn', 'Custom function sum_fn found in table.') + FROM test_custom_function_table WHERE id = 1; +SELECT assert(name = 'sum_fn1', 'Custom function sum_fn1 found in table.') + FROM test_custom_function_table WHERE id = 2; + +/* Test adding an existing function name should error out */ +SELECT assert(MADLIB_SCHEMA.trap_error($TRAP$ +SELECT load_custom_function('test_custom_function_table', custom_function_object(), 'sum_fn1'); +$TRAP$) = 1, 'Should error out for duplicate function name'); + +/* Test deletion by id where valid table exists */ +/* Assert id exists before deleting */ +SELECT assert(COUNT(id) = 1, 'id 2 should exist before deletion!') + FROM test_custom_function_table WHERE id = 2; +SELECT delete_custom_function('test_custom_function_table', 2); +SELECT assert(COUNT(id) = 0, 'id 2 should have been deleted!') + FROM test_custom_function_table WHERE id = 2; + +/* Test deletion by name where valid table exists */ +SELECT load_custom_function('test_custom_function_table', custom_function_object(), 'sum_fn1'); +/* Assert id exists before deleting */ +SELECT assert(COUNT(id) = 1, 'function name sum_fn1 should exist before deletion!') + FROM test_custom_function_table WHERE name = 'sum_fn1'; +SELECT delete_custom_function('test_custom_function_table', 'sum_fn1'); +SELECT assert(COUNT(id) = 0, 'function name sum_fn1 should have been deleted!') + FROM test_custom_function_table WHERE name = 'sum_fn1'; + +/* Test deleting an already deleted entry should error out */ +SELECT assert(MADLIB_SCHEMA.trap_error($TRAP$ +SELECT delete_custom_function('test_custom_function_table', 2); +$TRAP$) = 1, 'Should error out for trying to delete an entry that does not exist'); + +/* Test delete drops the table after deleting last entry*/ +SELECT delete_custom_function('test_custom_function_table', 1); +SELECT assert(COUNT(relname) = 0, 'Table test_custom_function_table should have been deleted.') + FROM pg_class where relname='test_custom_function_table'; + +/* Test deletion where empty table exists */ +SELECT load_custom_function('test_custom_function_table', custom_function_object(), 'sum_fn', 'returns sum'); +DELETE FROM test_custom_function_table; +SELECT assert(MADLIB_SCHEMA.trap_error($$SELECT delete_custom_function('test_custom_function_table', 1)$$) = 1, + 'Deleting function in an empty table should generate an exception.'); + +/* Test deletion where no table exists */ +DROP TABLE IF EXISTS test_custom_function_table; +SELECT assert(MADLIB_SCHEMA.trap_error($$SELECT delete_custom_function('test_custom_function_table', 1)$$) = 1, + 'Deleting a non-existent table should raise exception.'); + +/* Test where invalid table exists */ +SELECT load_custom_function('test_custom_function_table', custom_function_object(), 'sum_fn', 'returns sum'); +ALTER TABLE test_custom_function_table DROP COLUMN id; +SELECT assert(MADLIB_SCHEMA.trap_error($$SELECT delete_custom_function('test_custom_function_table', 2)$$) = 1, + 'Deleting an invalid table should generate an exception.'); + +SELECT assert(MADLIB_SCHEMA.trap_error($$SELECT load_custom_function('test_custom_function_table', custom_function_object(), 'sum_fn', 'returns sum')$$) = 1, + 'Passing an invalid table to load_custom_function() should raise exception.'); + +/* Test input validation */ +DROP TABLE IF EXISTS test_custom_function_table; +SELECT assert(MADLIB_SCHEMA.trap_error($$ + SELECT load_custom_function('test_custom_function_table', custom_function_object(), NULL, NULL); +$$) = 1, 'Name cannot be NULL'); +SELECT assert(MADLIB_SCHEMA.trap_error($$ + SELECT load_custom_function('test_custom_function_table', NULL, 'sum_fn', NULL); +$$) = 1, 'Function object cannot be NULL'); +SELECT assert(MADLIB_SCHEMA.trap_error($$ + SELECT load_custom_function('test_custom_function_table', 'invalid_obj'::bytea, 'sum_fn', NULL); +$$) = 1, 'Invalid custom function object'); +SELECT load_custom_function('test_custom_function_table', custom_function_object(), 'sum_fn', NULL); +SELECT assert(name IS NOT NULL AND description IS NULL, 'validate name is not NULL.') + FROM test_custom_function_table; +SELECT assert(MADLIB_SCHEMA.trap_error($$ + SELECT delete_custom_function('test_custom_function_table', NULL); +$$) = 1, 'id/name cannot be NULL!'); diff --git a/tool/docker/base/Dockerfile_postgres_10_Jenkins b/tool/docker/base/Dockerfile_postgres_10_Jenkins index a0d882d..851e499 100644 --- a/tool/docker/base/Dockerfile_postgres_10_Jenkins +++ b/tool/docker/base/Dockerfile_postgres_10_Jenkins @@ -33,7 +33,7 @@ RUN apt-get update && apt-get install -y wget \ build-essential \ cmake -RUN pip install tensorflow==1.14 keras==2.2.4 +RUN pip install tensorflow==1.14 keras==2.2.4 dill ## To build an image from this docker file, from madlib folder, run: # docker build -t madlib/postgres_10:jenkins -f tool/docker/base/Dockerfile_postgres_10_Jenkins .
