This is an automated email from the ASF dual-hosted git repository. khannaekta pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 85696551c416c26fa3d4c0e1d5156a2f0b43059a Author: Frank McQuillan <[email protected]> AuthorDate: Wed May 20 17:15:47 2020 -0700 user docs for utility function to add Python objects to a table --- doc/mainpage.dox.in | 1 + .../madlib_keras_custom_function.sql_in | 222 ++++++++++++++++++++- 2 files changed, 221 insertions(+), 2 deletions(-) diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in index a02500e..4afb091 100644 --- a/doc/mainpage.dox.in +++ b/doc/mainpage.dox.in @@ -292,6 +292,7 @@ Interface and implementation are subject to change. @{ @defgroup grp_gpu_configuration GPU Configuration @defgroup grp_keras Keras + @defgroup grp_custom_function Load Custom Functions @defgroup grp_keras_model_arch Load Models @defgroup grp_model_selection Model Selection for DL @brief Train multiple deep learning models at the same time for model architecture search and hyperparameter selection. diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in index 62f89a9..c8c2c9c 100644 --- a/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in +++ b/src/ports/postgres/modules/deep_learning/madlib_keras_custom_function.sql_in @@ -20,13 +20,231 @@ * * @file madlib_keras_custom_function.sql_in * - * @brief SQL functions for load/delete keras custom function objects - * @date August 2019 + * @brief Utility function to load serialized Python objects into a table + * @date May 2020 * * *//* ----------------------------------------------------------------------- */ m4_include(`SQLCommon.m4') +/** +@addtogroup grp_custom_function + +@brief Utility function to load serialized Python objects into a table. + +\warning <em> This MADlib method is still in early stage development. +Interface and implementation are subject to change. </em> + +<div class="toc"><b>Contents</b><ul> +<li class="level1"><a href="#load_function">Load Function</a></li> +<li class="level1"><a href="#delete_function">Delete Function</a></li> +<li class="level1"><a href="#example">Examples</a></li> +<li class="level1"><a href="#literature">Literature</a></li> +<li class="level1"><a href="#related">Related Topics</a></li> +</ul></div> + +This utility function loads custom Python functions +into a table for use by deep learning algorithms. +Custom functions can be useful if, for example, you need loss functions +or metrics that are not built into the standard libraries. +The functions to be loaded must be in the form of serialized Python objects +created using Dill, which extends Python's pickle module to the majority +of the built-in Python types [1]. + +There is also a utility function to delete a function +from the table. + +@anchor load_function +@par Load Function + +<pre class="syntax"> +load_custom_function( + object table, + object, + name, + description + ) +</pre> +\b Arguments +<dl class="arglist"> + <dt>object table</dt> + <dd>VARCHAR. Table to load serialized Python objects. If this table + does not exist, it will be created. If this table already + exists, a new row is inserted into the existing table. + </dd> + + <dt>object</dt> + <dd>BYTEA. PostgreSQL binary data type of the Python object. + Object must be created with the Dill package for serializing + Python objects. + </dd> + + <dt>name</dt> + <dd>TEXT, default: NULL. Name of the object. Must be unique + identifier in the table, since this name is used when passing + the object to Keras. + </dd> + + <dt>description (optional)</dt> + <dd>TEXT, default: NULL. Free text string to provide + a description, if desired. + </dd> + +</dl> + +<b>Output table</b> +<br> + The output table contains the following columns: + <table class="output"> + <tr> + <th>id</th> + <td>SERIAL PRIMARY KEY. Object ID. + </td> + </tr> + <tr> + <th>name</th> + <td>TEXT. Name of model. + </td> + </tr> + <tr> + <th>description</th> + <td>TEXT. Description of model (free text). + </td> + </tr> + <tr> + <th>object</th> + <td>BYTEA. Serialized Python object stored as a PostgreSQL binary data type. + </td> + </tr> + </table> +</br> + +@anchor delete_function +@par Delete Function + +Delete by id: +<pre class="syntax"> +delete_custom_function( + object_table, + id +) +</pre> +Or alternatively by name: +<pre class="syntax"> +delete_custom_function( + object_table, + name +) +</pre> +\b Arguments +<dl class="arglist"> + <dt>object_table</dt> + <dd>VARCHAR. Table containing Python object to be deleted. + </dd> + <dt>id</dt> + <dd>INTEGER. The id of the object to be deleted. + </dd> + <dt>name</dt> + <dd>TEXT. Name of the object to be deleted. + </dd> +</dl> + +@anchor example +@par Examples +-# Load object using psycopg2. Psycopg is a PostgreSQL database +adapter for the Python programming language. Note need to use the +psycopg2.Binary() method to pass as bytes. +<pre class="example"> +\# import database connector psycopg2 and create connection cursor +import psycopg2 as p2 +conn = p2.connect('postgresql://gpadmin@localhost:8000/madlib') +cur = conn.cursor() +\# import Dill and define 2 functions +import dill +def test_sum_fn(a, b): + return a+b +pb_sum=dill.dumps(test_sum_fn) +def test_mult_fn(a, b): + return a*b +pb_mult=dill.dumps(test_mult_fn) +\# call load function +cur.execute("DROP TABLE IF EXISTS test_custom_function_table") +cur.execute("SELECT madlib.load_custom_function('test_custom_function_table', %s,'sum_fn', 'returns sum')", [p2.Binary(pb_sum)]) +cur.execute("SELECT madlib.load_custom_function('test_custom_function_table', %s,'mult_fn', 'returns mult')", [p2.Binary(pb_mult)]) +conn.commit() +</pre> +List table to see objects: +<pre class="example"> +SELECT id, name, description FROM test_custom_function_table ORDER BY id; +</pre> +<pre class="result"> + id | name | description +----+---------+-------------- + 1 | sum_fn | returns sum + 2 | mult_fn | returns mult +</pre> +-# Load object using a PL/Python function. First define the objects: +<pre class="example"> +CREATE OR REPLACE FUNCTION custom_function_object_sum() +RETURNS BYTEA AS +$$ +import dill +def test_sum_fn(a, b): + return a+b +pb_sum=dill.dumps(test_sum_fn) +return pb_sum +$$ language plpythonu; +CREATE OR REPLACE FUNCTION custom_function_object_mult() +RETURNS BYTEA AS +$$ +import dill +def test_mult_fn(a, b): + return a*b +pb_mult=dill.dumps(test_mult_fn) +return pb_mult +$$ language plpythonu; +</pre> +Now call loader: +<pre class="result"> +DROP TABLE IF EXISTS test_custom_function_table; +SELECT madlib.load_custom_function('test_custom_function_table', + custom_function_object_sum(), + 'sum_fn', + 'returns sum' + ); +SELECT madlib.load_custom_function('test_custom_function_table', + custom_function_object_mult(), + 'mult_fn', + 'returns mult' + ); +</pre> +-# Delete an object by id: +<pre class="example"> +SELECT madlib.delete_custom_function( 'test_custom_function_table', 1); +SELECT id, name, description FROM test_custom_function_table ORDER BY id; +</pre> +<pre class="result"> + id | name | description +----+---------+-------------- + 2 | mult_fn | returns mult +</pre> +Delete an object by name: +<pre class="example"> +SELECT madlib.delete_custom_function( 'test_custom_function_table', 'mult_fn'); +</pre> +If all objects are deleted from the table using this function, the table itself will be dropped. + +@anchor literature +@literature + +[1] Dill https://pypi.org/project/dill/ + +@anchor related +@par Related Topics + +See madlib_keras_custom_function.sql_in + +*/ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.load_custom_function( object_table VARCHAR,
