http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/linalg/matrix_help_message.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/linalg/matrix_help_message.py_in b/src/ports/postgres/modules/linalg/matrix_help_message.py_in index 053fd75..62b2535 100644 --- a/src/ports/postgres/modules/linalg/matrix_help_message.py_in +++ b/src/ports/postgres/modules/linalg/matrix_help_message.py_in @@ -47,7 +47,7 @@ The column names in {} are set using the options provided in 'out_args'. def _get_help_message(schema_madlib, message, function_name, functionality_str, - usage_str, example_str, **kwargs): + usage_str, **kwargs): format_dict = dict(locals().items() + globals().items()) if not message: help_string = """ @@ -58,15 +58,11 @@ Functionality: {functionality_str} For more details on the function usage: SELECT {schema_madlib}.{function_name}('usage'); -For an example on using this function: - SELECT {schema_madlib}.{function_name}('example'); For more details on the two input formats (dense or sparse): SELECT {schema_madlib}.matrix_info(); """ elif message.lower().strip() in ['usage', 'help', '?']: help_string = usage_str - elif message.lower().strip() in ['example', 'examples']: - help_string = example_str else: help_string = "No such option. Use {schema_madlib}.{function_name}('usage')" return help_string.format(**format_dict) @@ -82,7 +78,14 @@ def matrix_info_help_message(schema_madlib, message, **kwargs): Returns: STR. """ - dense_format = """ + message = message.lower() + if not message: + help_string = """ + Run "SELECT matrix_info('dense');" or "SELECT matrix_info('sparse');" + for examples of the specific data format. + """ + elif message == 'dense': + help_string = """ A dense matrix is represented as a distributed collection of 1-D arrays. An example 3x10 matrix would be the below table: @@ -95,7 +98,8 @@ An example 3x10 matrix would be the below table: The column names above can be user-defined - the matrix functions provide options to input these column names. The default names expected are 'row_num' and 'val'. """ - sparse_format = """ + elif message == 'sparse': + help_string = """ A sparse matrix is represented using the row and column indices for each non-zero entry of the matrix. This representation is useful for sparse matrices, containing multiple zero elements. Given below is an example of a sparse 4x7 matrix @@ -121,89 +125,6 @@ The column names above can be user-defined - the matrix functions provide option to input these column names. The default names expected are 'row_num', 'col_num' and 'val'. """ - message = message.lower() - if not message: - help_string = dense_format + sparse_format + """ - Run "SELECT matrix_info('dense');" or "SELECT matrix_info('sparse');" - for examples of the specific data format. - """ - elif message == 'dense': - help_string = dense_format + """ - --- Example to create dense matices --- These matrices are used in all the matrix operation help message examples. - -DROP TABLE IF EXISTS "matrix_A"; -CREATE TABLE "matrix_A" ( - row_id integer, - row_vec integer[] -); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (1, '{{9,6,5,8,5,6,6,3,10,8}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (2, '{{8,2,2,6,6,10,2,1,9,9}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (3, '{{3,9,9,9,8,6,3,9,5,6}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (4, '{{6,4,2,2,2,7,8,8,0,7}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (5, '{{6,8,9,9,4,6,9,5,7,7}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (6, '{{4,10,7,3,9,5,9,2,3,4}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (7, '{{8,10,7,10,1,9,7,9,8,7}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (8, '{{7,4,5,6,2,8,1,1,4,8}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (9, '{{8,8,8,5,2,6,9,1,8,3}}'); -INSERT INTO "matrix_A" (row_id, row_vec) VALUES (10, '{{4,6,3,2,6,4,1,2,3,8}}'); - -DROP TABLE IF EXISTS "matrix_B"; -CREATE TABLE "matrix_B" ( - row_id integer, - row_vec integer[] -); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (1, '{{9,10,2,4,6,5,3,7,5,6}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (2, '{{5,3,5,2,8,6,9,7,7,6}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (3, '{{0,1,2,3,2,7,7,3,10,1}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (4, '{{2,9,0,4,3,6,8,6,3,4}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (5, '{{3,8,7,7,0,5,3,9,2,10}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (6, '{{5,3,1,7,6,3,5,3,6,4}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (7, '{{4,8,4,4,2,7,10,0,3,3}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (8, '{{4,6,0,1,3,1,6,6,9,8}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (9, '{{6,5,1,7,2,7,10,6,0,6}}'); -INSERT INTO "matrix_B" (row_id, row_vec) VALUES (10, '{{1,4,4,4,8,5,2,8,5,5}}'); -""" - elif message == 'sparse': - # TODO - help_string = sparse_format + """ - -- Example data for sparse matrices -CREATE TABLE "mat_A_sparse"( - "rowNum" integer, - col_num integer, - entry integer -); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 1, 9); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 2, 6); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 8, 3); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 9, 10); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (1, 10, 8); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 1, 8); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 2, 2); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 3, 2); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 4, 6); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 6, 6); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (2, 7, 3); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (8, 1, 7); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (9, 3, 8); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (9, 4, 5); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (10, 2, 6); -INSERT INTO "mat_A_sparse" ("rowNum", col_num, entry) VALUES (10, 3, 3); - -CREATE TABLE "mat_B_sparse"( - row_id integer, - col_id integer, - val integer -); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (1, 1, 9); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (1, 8, 3); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (2, 2, 2); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (2, 3, 2); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (2, 4, 6); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (10, 2, 6); -INSERT INTO "mat_B_sparse" (row_id, col_id, val) VALUES (10, 3, 3); - """ else: help_string = "No such option. Use {schema_madlib}.matrix_add('usage')" return help_string.format(schema_madlib=schema_madlib) @@ -229,22 +150,8 @@ SELECT {schema_madlib}.matrix_identity( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example returns an identity matrix of size 4 x 4 - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_identity(4, 'mat_r', 'row=row_id, val=val, fmt=dense'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_identity(4, 'mat_r', 'row=row,col=col,val=val, fmt=sparse'); -SELECT * FROM mat_r ORDER BY row; - """ return _get_help_message(schema_madlib, message, "matrix_identity", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -267,24 +174,9 @@ SELECT {schema_madlib}.matrix_diag( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example creates a diagonal matrix. The output by default is a sparse --- matrix. A dense matrix can be obtained by using 'fmt=dense' - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_diag(array[1.0, 2.5, 3.4, 10, 6.8], - 'matrix_r', 'row=row_id, val=val, fmt=dense'); -SELECT * FROM matrix_r ORDER BY row_id; ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_diag(array[1.0, 2.5, 3.4, 10, 6.8], 'matrix_r', 'row=row_id, col=col_id,val=val'); -SELECT * FROM matrix_r ORDER BY row_id; - """ return _get_help_message(schema_madlib, message, "matrix_diag", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------------------------ @@ -309,22 +201,9 @@ SELECT {schema_madlib}.matrix_extract_diag( ------------------------------------------------------------ The output is an array containing the main diagonal. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------ --- Below example extracts the main diagonal. The function call is the same --- for dense and sparse matrices. --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense input format -------------------------------- -SELECT madlib.matrix_extract_diag('"matrix_A"', 'row=row_id, val=row_vec'); ------------------------ Sparse input format -------------------------------- -SELECT madlib.matrix_extract_diag('"mat_B_sparse"', 'row=row_id, col=col_id, val=val'); - """ return _get_help_message(schema_madlib, message, "matrix_extract_diag", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -352,27 +231,8 @@ SELECT {schema_madlib}.matrix_add( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A + B --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_add('"matrix_A"', 'row=row_id, val=row_vec', - '"matrix_B"', 'row=row_id, val=vector', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_add('"mat_A_sparse"', 'row="rowNum", val=entry', - '"mat_B_sparse"', 'row=row_id, col=col_id, val=vector', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_add", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -395,23 +255,8 @@ SELECT {schema_madlib}.matrix_zeros( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example returns a matrix initialized with all zeros. The default output --- format is sparse. - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_zeros(5, 4, 'matrix_r_dense', 'row=row_id, val=val, fmt=dense'); -SELECT * FROM matrix_r_dense ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_zeros(5, 4, 'matrix_r_sparse', 'row=row_id, col=col_id, val=val'); -SELECT * FROM matrix_r_sparse ORDER BY row_id; - """ return _get_help_message(schema_madlib, message, "matrix_zeros", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -434,23 +279,8 @@ SELECT {schema_madlib}.matrix_ones( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example returns a matrix initialized with all ones. The default --- output format is sparse. - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_ones(5, 4, 'matrix_r_dense', 'row=row_id, val=val, fmt=dense'); -SELECT * FROM mat_r_dense ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_ones(3, 2, 'matrix_r_sparse', 'row=row_id, col=col_id, val=val'); -SELECT * FROM matrix_r_sparse ORDER BY row_id, col_id; - """ return _get_help_message(schema_madlib, message, "matrix_ones", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -478,27 +308,8 @@ SELECT {schema_madlib}.matrix_sub( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A - B --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_sub('"matrix_A"', 'row=row_id, val=row_vec', - '"matrix_B"', 'row=row_id, val=vector', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_sub('"mat_A_sparse"', 'row="rowNum", val=entry', - '"mat_B_sparse"', 'row=row_id, col=col_id, val=vector', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_sub", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ def matrix_ndims_help_message(schema_madlib, message, **kwargs): @@ -515,8 +326,6 @@ This function provides dimension information of a matrix either in dense or spar For more details on the function usage: SELECT {schema_madlib}.matrix_ndims('usage'); -For an example on using this function: - SELECT {schema_madlib}.matrix_ndims('example'); For more details on the two input formats (dense or sparse): SELECT {schema_madlib}.matrix_info(); """ @@ -555,25 +364,6 @@ If not provided, out_args uses same value as in_args. ------------------------------------------------------------ An array with matrix_in dimension information in format of (number of rows,number of columns) """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes B' - ------------------------ Dense format -------------------------------- --- Data for "matrix_B" can be obtained from --- SELECT matrix_info('dense'); - -SELECT madlib.matrix_ndims('"mat_B"', 'row=row_id, val=vector'); - ------------------------ Sparse format -------------------------------- --- Data for "matrix_A_sparse" can be obtained from --- SELECT matrix_info('sparse'); - -SELECT madlib.matrix_ndims('"matrix_A_sparse"', 'row="rowNum", col=col_num, val=entry'); - """ else: help_string = "No such option. Use {schema_madlib}.matrix_trans('usage')" return help_string.format(schema_madlib=schema_madlib) @@ -604,27 +394,8 @@ SELECT {schema_madlib}.matrix_elem_mult( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A .* B --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_elem_mult('"matrix_A"', 'row=row_id, val=row_vec', - '"matrix_B"', 'row=row_id, val=vector', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_elem_mult('"mat_A_sparse"', 'row="rowNum", val=entry', - '"mat_B_sparse"', 'row=row_id, col=col_id, val=vector', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_elem_mult", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -652,27 +423,8 @@ SELECT {schema_madlib}.matrix_mult( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A * B --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_mult('"matrix_A"', 'row=row_id, val=row_vec', - '"matrix_B"', 'row=row_id, val=vector', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_mult('"mat_A_sparse"', 'row="rowNum", val=entry', - '"mat_B_sparse"', 'row=row_id, col=col_id, val=vector', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_mult", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -697,25 +449,8 @@ SELECT {schema_madlib}.matrix_trans( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes A' --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_trans('"matrix_A"', 'row=row_id, val=row_vec', - 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_trans('"mat_A_sparse"', 'row="rowNum", val=entry', - 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_trans", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -738,20 +473,8 @@ SELECT {schema_madlib}.matrix_extract_row( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_extract_row('"mat_A"', 'row=row_id, val=row_vec', 0); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_extract_row('"mat_A_sparse"', 'row="rowNum", val=entry', 0); - """ return _get_help_message(schema_madlib, message, "matrix_extract_row", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -774,20 +497,8 @@ SELECT {schema_madlib}.matrix_extract_col( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_extract_col('"mat_A"', 'row=row_id, val=row_vec', 0); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_extract_col('"mat_A_sparse"', 'row="rowNum", val=entry', 0); - """ return _get_help_message(schema_madlib, message, "matrix_extract_col", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -800,7 +511,7 @@ def _min_max_help_message(schema_madlib, message, suffix, **kwargs): USAGE ------------------------------------------------------------ -SELECT {schema_madlib}.matrix_{0}( +SELECT {1}.matrix_{0}( 'matrix_in', -- Name of the table containing input matrix 'in_args', -- String argument containing matrix_in specific arguments -- (see matrix arguments below for options) @@ -820,23 +531,9 @@ The output table ('matrix_r' above) has the following columns '{0}' -- Vector of ordered {0} values 'index' -- Vector of ordered corresponding indices of {0} values - """.format(suffix) - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_{0}('"mat_A"', 'row=row_id, val=row_vec', 1, 'mat_r', true, true); -SELECT madlib.matrix_{0}('"mat_A"', 'row=row_id, val=row_vec', 2, 'mat_r', true, true); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_{0}('"mat_A_sparse"', 'row="rowNum", val=entry', 1, 'mat_r', true, true); -SELECT madlib.matrix_{0}('"mat_A_sparse"', 'row="rowNum", val=entry', 2, 'mat_r', true, true); - """.format(suffix) + """.format(suffix, schema_madlib) return _get_help_message(schema_madlib, message, "matrix_" + suffix, - functionality_str, usage_str, example_str) + functionality_str, usage_str) def matrix_max_help_message(schema_madlib, message, **kwargs): @@ -848,21 +545,19 @@ def matrix_min_help_message(schema_madlib, message, **kwargs): # ------------------------------------------------------------ def matrix_norm_help_message(schema_madlib, message, **kwargs): - """ Help message for Matrix norm + """ Help message for Matrix norm """ if not message: help_string = """ ------------------------------------------------------------ SUMMARY ------------------------------------------------------------ -Functionality: Matrix norm +Functionality: Matrix norm This function computes matrix norm values either in dense or sparse format. For more details on the function usage: SELECT {schema_madlib}.matrix_norm('usage'); -For an example on using this function: - SELECT {schema_madlib}.matrix_norm('example'); For more details on the two input formats (dense or sparse): SELECT {schema_madlib}.matrix_info(); """ @@ -904,27 +599,6 @@ These string arguments can be NULL if the default values are to be used. ------------------------------------------------------------ The output is a value which computes matrix norm. """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Below example computes matrix norm - ------------------------ Dense format -------------------------------- --- Data for "matrix_A" can be obtained from --- SELECT matrix_info('dense'); - -SELECT madlib.matrix_norm('"mat_A"', 'row=row_id, val=row_vec', '2'); -SELECT madlib.matrix_norm('"mat_A"', 'row=row_id, val=row_vec', 'inf'); - ------------------------ Sparse format -------------------------------- --- Data for "matrix_A_sparse" can be obtained from --- SELECT matrix_info('sparse'); - -SELECT madlib.matrix_norm('"mat_A_sparse"', 'row="rowNum", val=entry', '2'); -SELECT madlib.matrix_norm('"mat_A_sparse"', 'row="rowNum", val=entry', 'm'); - """ else: help_string = "No such option. Use {schema_madlib}.matrix_norm('usage')" return help_string.format(schema_madlib=schema_madlib) @@ -940,7 +614,7 @@ def _agg_help_message(schema_madlib, message, suffix, **kwargs): USAGE ------------------------------------------------------------ -SELECT {schema_madlib}.matrix_{0}( +SELECT {1}.matrix_{0}( 'matrix_in', -- Name of the table containing input matrix 'in_args', -- String argument containing matrix_in specific arguments -- (see matrix arguments below for options) @@ -955,23 +629,9 @@ SELECT {schema_madlib}.matrix_{0}( OUTPUT ------------------------------------------------------------ The output is a vector containing the {0} along given dimension. - """.format(suffix) - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_{0}('"mat_A"', 'row=row_id, val=row_vec', 1); -SELECT madlib.matrix_{0}('"mat_A"', 'row=row_id, val=row_vec', 2); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_{0}('"mat_A_sparse"', 'row="rowNum", val=entry', 1); -SELECT madlib.matrix_{0}('"mat_A_sparse"', 'row="rowNum", val=entry', 2); - """ + """.format(suffix, schema_madlib) return _get_help_message(schema_madlib, message, "matrix_" + suffix, - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1004,24 +664,8 @@ SELECT {schema_madlib}.matrix_scalar_mult( {matrix_arg_str} {output_str} """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_scalar_mult('"mat_A"', 'row=row_id, val=row_vec', - 10, 'mat_r', 'val=vector'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_scalar_mult('"mat_A_sparse"', 'row="rowNum", val=entry', - 10, 'matrix_r_sparse', 'col=col_out'); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_scalar_mult", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1047,29 +691,13 @@ SELECT {schema_madlib}.matrix_vec_mult( ------------------------------------------------------------ The output is an array representing the result of the vector multiplication. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_vec_mult('"mat_A"', 'row=row_id, val=row_vec', - array[1,2,3,4,5,6,7,8,9,10]::float8[]); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_scalar_mult('"mat_A_sparse"', 'row="rowNum", val=entry', - array[1,2,3,4,5,6,7]::float8[]); -SELECT * FROM matrix_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_vec_mult", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ def matrix_eigen_help_message(schema_madlib, message, **kwargs): - """ Help message for Matrix eigen values extraction + """ Help message for Matrix eigen values extraction """ functionality_str = "Extract eigen values of matrix" usage_str = """ @@ -1090,22 +718,8 @@ SELECT {schema_madlib}.matrix_eigen( ------------------------------------------------------------ The output are eigen values of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_eigen('"mat_A"', 'row=row_id, val=row_vec', 'mat_r'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_eigen('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_r'); -SELECT * FROM mat_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_eigen", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1133,22 +747,8 @@ SELECT {schema_madlib}.matrix_pinv( ------------------------------------------------------------ The output is generic inverse of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_pinv('"mat_A"', 'row=row_id, val=row_vec', 'mat_r'); -SELECT * FROM mat_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_pinv('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_r'); -SELECT * FROM mat_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_pinv", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1174,26 +774,8 @@ SELECT {schema_madlib}.matrix_cholesky( ------------------------------------------------------------ The output is cholesky decomposition of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_cholesky('"mat_A"', 'row=row_id, val=row_vec', 'mat_result'); -SELECT * FROM mat_result_p ORDER BY row_id; -SELECT * FROM mat_result_l ORDER BY row_id; -SELECT * FROM mat_result_d ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_cholesky('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_result'); -SELECT * FROM mat_result_p ORDER BY "rowNum"; -SELECT * FROM mat_result_l ORDER BY "rowNum"; -SELECT * FROM mat_result_d ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_cholesky", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1221,24 +803,8 @@ SELECT {schema_madlib}.matrix_qr( ------------------------------------------------------------ The output is QR decomposition of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_qr('"mat_A"', 'row=row_id, val=row_vec', 'mat_result'); -SELECT * FROM mat_result_q ORDER BY row_id; -SELECT * FROM mat_result_r ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_qr('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_q', 'mat_r'); -SELECT * FROM mat_result_q ORDER BY "rowNum"; -SELECT * FROM mat_result_r ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_qr", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1266,28 +832,8 @@ SELECT {schema_madlib}.matrix_lu( ------------------------------------------------------------ The output is LU decomposition of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_lu('"mat_A"', 'row=row_id, val=row_vec', 'mat_result'); -SELECT * FROM mat_result_p ORDER BY row_id; -SELECT * FROM mat_result_l ORDER BY row_id; -SELECT * FROM mat_result_u ORDER BY row_id; -SELECT * FROM mat_result_q ORDER BY row_id; - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_lu('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_result'); -SELECT * FROM mat_result_p ORDER BY "rowNum"; -SELECT * FROM mat_result_l ORDER BY "rowNum"; -SELECT * FROM mat_result_u ORDER BY "rowNum"; -SELECT * FROM mat_result_q ORDER BY "rowNum"; - """ return _get_help_message(schema_madlib, message, "matrix_lu", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1312,20 +858,8 @@ SELECT {schema_madlib}.matrix_nuclear_norm( ------------------------------------------------------------ The output is nuclear norm computing of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_nuclear_norm('"mat_A"', 'row=row_id, val=row_vec'); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_nuclear_norm('"mat_A_sparse"', 'row="rowNum", val=entry'); - """ return _get_help_message(schema_madlib, message, "matrix_nuclear_norm", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1350,20 +884,8 @@ SELECT {schema_madlib}.matrix_rank( ------------------------------------------------------------ The output is rank computing of the matrix. """ - example_str = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Use `matrix_info()' to get the data/table definitions for below matrices - ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_rank('"mat_A"', 'row=row_id, val=row_vec'); - ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_rank('"mat_A_sparse"', 'row="rowNum", val=entry'); - """ return _get_help_message(schema_madlib, message, "matrix_rank", - functionality_str, usage_str, example_str) + functionality_str, usage_str) # ------------------------------------------------------------ @@ -1391,20 +913,62 @@ SELECT {schema_madlib}.matrix_inverse( ------------------------------------------------------------ The output is inverse of the matrix. """ - example_str = """ + return _get_help_message(schema_madlib, message, "matrix_inverse", + functionality_str, usage_str) +# ------------------------------------------------------------ + +def matrix_sparsify_help_message(schema_madlib, message, **kwargs): + """ Help message for sparsifying a matrix + """ + functionality_str = "Matrix sparsify" + usage_str = """ ------------------------------------------------------------ - EXAMPLE + USAGE ------------------------------------------------------------ --- Use `matrix_info()' to get the data/table definitions for below matrices ------------------------ Dense format -------------------------------- -SELECT madlib.matrix_inverse('"mat_A"', 'row=row_id, val=row_vec', 'mat_r'); -SELECT row_vec FROM mat_r ORDER BY row_id; +SELECT {schema_madlib}.matrix_sparsify( + 'matrix_in', -- Name of the table containing input matrix + 'in_args', -- String argument containing matrix_in specific arguments + -- (see matrix arguments below for options) + 'matrix_out' -- Name of the table to store result matrix + 'out_args' -- String argument containing matrix_out specific arguments + -- (see matrix arguments below for options) +); ------------------------ Sparse format -------------------------------- -SELECT madlib.matrix_inverse('"mat_A_sparse"', 'row="rowNum", val=entry', 'mat_r'); -SELECT row_vec FROM mat_r ORDER BY row_id; +{matrix_arg_str} +------------------------------------------------------------ + OUTPUT +------------------------------------------------------------ +The output is the sparse version of the matrix. """ - return _get_help_message(schema_madlib, message, "matrix_inverse", - functionality_str, usage_str, example_str) + return _get_help_message(schema_madlib, message, "matrix_sparsify", + functionality_str, usage_str) +# ------------------------------------------------------------ + +def matrix_densify_help_message(schema_madlib, message, **kwargs): + """ Help message for densifying a matrix + """ + functionality_str = "Matrix densify" + usage_str = """ +------------------------------------------------------------ + USAGE +------------------------------------------------------------ + +SELECT {schema_madlib}.matrix_densify( + 'matrix_in', -- Name of the table containing input matrix + 'in_args', -- String argument containing matrix_in specific arguments + -- (see matrix arguments below for options) + 'matrix_out' -- Name of the table to store result matrix + 'out_args' -- String argument containing matrix_out specific arguments + -- (see matrix arguments below for options) +); + +{matrix_arg_str} +------------------------------------------------------------ + OUTPUT +------------------------------------------------------------ +The output is the dense version of the matrix. + """ + return _get_help_message(schema_madlib, message, "matrix_densify", + functionality_str, usage_str) # ------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/linalg/svd.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/linalg/svd.py_in b/src/ports/postgres/modules/linalg/svd.py_in index 1ad2ac9..3534479 100644 --- a/src/ports/postgres/modules/linalg/svd.py_in +++ b/src/ports/postgres/modules/linalg/svd.py_in @@ -1244,49 +1244,6 @@ def svd_help_message(schema_madlib, message, **kwargs): recon_error FLOAT8 -- Total quality score (i.e. approximation quality) for this set of orthonormal basis """.format(schema_madlib=schema_madlib) - elif message is not None and message.lower() in ('example', 'examples'): - return """ - CREATE TABLE mat ( - row_id integer, - row_vec double precision[] - ); - - -- example input data - COPY mat (row_id, row_vec) FROM stdin; - 1 {{691,58,899,163,159,533,604,582,269,390}} - 0 {{396,840,353,446,318,886,15,584,159,383}} - 3 {{462,532,787,265,982,306,600,608,212,885}} - 2 {{293,742,298,75,404,857,941,662,846,2}} - 5 {{327,946,368,943,7,516,272,24,591,204}} - 4 {{304,151,337,387,643,753,603,531,459,652}} - 7 {{458,959,774,376,228,354,300,669,718,565}} - 6 {{877,59,260,302,891,498,710,286,864,675}} - 9 {{882,761,398,688,761,405,125,484,222,873}} - 8 {{824,390,818,844,180,943,424,520,65,913}} - 11 {{492,220,576,289,321,261,173,1,44,241}} - 10 {{528,1,860,18,814,242,314,965,935,809}} - 13 {{350,192,211,633,53,783,30,444,176,932}} - 12 {{415,701,221,503,67,393,479,218,219,916}} - 15 {{739,651,678,577,273,935,661,47,373,618}} - 14 {{909,472,871,695,930,455,398,893,693,838}} - \. - - DROP TABLE if exists svd_u; - DROP TABLE if exists svd_v; - DROP TABLE if exists svd_s; - -- SVD for dense matrices - SELECT {schema_madlib}.svd('mat', 'svd', 'row_id', 10); - ---------------------------------------------------------------- - DROP TABLE if exists mat_sparse; - SELECT {schema_madlib}.matrix_sparsify('mat', NULL, 'mat_sparse'); - - DROP TABLE if exists svd_u; - DROP TABLE if exists svd_v; - DROP TABLE if exists svd_s; - -- SVD for sparse matrices - SELECT {schema_madlib}.svd_sparse('mat_sparse', 'svd', 'row_id', - 'col_id', 'value', 10); - """.format(schema_madlib=schema_madlib) else: return """ In linear algebra, the singular value decomposition (SVD) is a @@ -1295,9 +1252,6 @@ def svd_help_message(schema_madlib, message, **kwargs): ------- For an overview on usage, run: SELECT {schema_madlib}.svd('usage'); - ------- - For an example, run: - SELECT {schema_madlib}.svd('example') """.format(schema_madlib=schema_madlib) # ------------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/linear_systems/dense_linear_systems.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/linear_systems/dense_linear_systems.py_in b/src/ports/postgres/modules/linear_systems/dense_linear_systems.py_in index 7e5b55a..4d8da94 100644 --- a/src/ports/postgres/modules/linear_systems/dense_linear_systems.py_in +++ b/src/ports/postgres/modules/linear_systems/dense_linear_systems.py_in @@ -155,21 +155,6 @@ def linear_solver_dense_help(schema_madlib, input_string=None, **kwargs): residual_norm DOUBLE PRECISION, -- Norm of the residual iters INTEGER -- Iterations of the algorithm - - ---------------------------------------------------------------- - Examples - ---------------------------------------------------------------- - SELECT {schema_madlib}.linear_solver_dense( - 'tbl_input', -- Input table which contains the matrix - 'tbl_result', -- Output table where the results are stored - 'row_id', -- Column name containing the row_id (zero base) - 'LHS', -- Column name containing the LHS - 'RHS', -- Column name containing the RHS - NULL, -- Grouping columns - 'direct', -- Classification of method used (direct) - 'algorithm = householderqr' -- Optional parameters - ); - ---------------------------------------------------------------- Summary ---------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/linear_systems/sparse_linear_systems.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/linear_systems/sparse_linear_systems.py_in b/src/ports/postgres/modules/linear_systems/sparse_linear_systems.py_in index 95514c7..2145009 100644 --- a/src/ports/postgres/modules/linear_systems/sparse_linear_systems.py_in +++ b/src/ports/postgres/modules/linear_systems/sparse_linear_systems.py_in @@ -210,25 +210,6 @@ def linear_solver_sparse_help(schema_madlib, input_string = None, **kwargs): residual_norm DOUBLE PRECISION, -- Norm of the residual iters INTEGER -- Iterations of the algorithm - - ---------------------------------------------------------------- - Examples - ---------------------------------------------------------------- - SELECT {schema_madlib}.linear_solver_sparse( - 'lhs_tbl_source', -- Data table (A matrix) - 'rhs_tbl_source', -- Data table (b vector) - 'tbl_result', -- Result table - 'lhs_row_id', -- Name of column containing row_id - 'lhs_col_id', -- Name of column containing col_id - 'lhs_value' , -- Name of column containing value - 'rhs_row_id', -- Name of column containing row_id - 'rhs_value' , -- Name of column containing value - 50 , -- Number of variables - 'grouping_cols', -- Grouping columns (default: NULL) - 'direct', -- Method used (direct vs iterative) - 'algorithm = ldlt' -- Optimizer optional parameters - ); - ---------------------------------------------------------------- Summary ---------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/pca/pca.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/pca/pca.py_in b/src/ports/postgres/modules/pca/pca.py_in index 5c71acc..286c57e 100644 --- a/src/ports/postgres/modules/pca/pca.py_in +++ b/src/ports/postgres/modules/pca/pca.py_in @@ -783,62 +783,7 @@ The result summary table ("rslt_summary_table" above) has the following columns specified in grouping_cols """.format(schema_madlib=schema_madlib) else: - if message is not None and \ - message.lower() in ("example", "examples"): - return """ ----------------------------------------------------------------- - Examples ----------------------------------------------------------------- -DROP TABLE IF EXISTS mat_sparse; -CREATE TABLE mat_sparse ( - row_id integer, - col_id integer, - value double precision -); -INSERT INTO mat_sparse VALUES -(1, 1, 1.0), -(2, 2, 2.0), -(3, 3, 3.0), -(4, 4, 4.0), -(1, 5, 5.0), -(2, 4, 6.0), -(3, 2, 7.0), -(4, 3, 8.0); -\. - -DROP TABLE IF EXISTS result_table_sparse; -DROP TABLE IF EXISTS result_table_sparse_mean; -SELECT {schema_madlib}.pca_sparse_train('mat_sparse', 'result_table_sparse', -'row_id', 'col_id', 'val_id', 4, 5, 3); - -SELECT * FROM result_table_sparse ORDER BY row_id; - -DROP TABLE IF EXISTS mat_sparse_group; -CREATE TABLE mat_sparse_group ( - row_id integer, - col_id integer, - value double precision, - matrix_id integer); -INSERT INTO mat_sparse_group VALUES -(1, 1, 1.0, 1), -(2, 2, 2.0, 1), -(3, 3, 3.0, 1), -(4, 4, 4.0, 1), -(1, 5, 5.0, 1), -(2, 4, 6.0, 2), -(3, 2, 7.0, 2), -(4, 3, 8.0, 2); -\. - -DROP TABLE IF EXISTS result_table_sparsed_grouped; -DROP TABLE IF EXISTS result_table_sparsed_grouped_mean; -SELECT {schema_madlib}.pca_sparse_train('mat_sparse_group', 'result_table_sparsed_grouped', -'row_id', 'col_id', 'val_id', 4, 5, 0.8, 'matrix_id'); - -SELECT * FROM result_table_sparsed_grouped ORDER BY matrix_id, row_id; - """.format(schema_madlib=schema_madlib) - else: - return """ + return """ ---------------------------------------------------------------- Summary: Sparse PCA Training ---------------------------------------------------------------- @@ -928,62 +873,9 @@ The result summary table ("rslt_summary_table" above) has the following columns grouping_cols -- The grouping columns (with their types), if any, specified in grouping_cols """.format(schema_madlib=schema_madlib) + else: - if message is not None and \ - message.lower() in ("example", "examples"): - return """ ----------------------------------------------------------------- - Examples ----------------------------------------------------------------- -DROP TABLE IF EXISTS mat; -CREATE TABLE mat ( - id integer, - row_vec double precision[] -); -COPY mat (id, row_vec) FROM stdin DELIMITER '|'; -1|{{1,2,3}} -2|{{2,1,2}} -3|{{3,2,1}} -\. - -DROP TABLE IF EXISTS result_table; -DROP TABLE IF EXISTS result_table_mean; -SELECT {schema_madlib}.pca_train( 'mat', - 'result_table', - 'id', - 3 - ); - -SELECT * FROM result_table ORDER BY row_id; - -DROP TABLE IF EXISTS mat_group; -CREATE TABLE mat_group ( - id integer, - row_vec double precision[], - matrix_id integer -); -INSERT INTO mat_group VALUES -(1, '{{1,2,3}}', 1), -(2, '{{2,1,2}}', 1), -(3, '{{3,2,1}}', 1), -(4, '{{1,2,3,4,5}}', 2), -(5, '{{2,5,2,4,1}}', 2), -(6, '{{5,4,3,2,1}}', 2); -\. - -DROP TABLE IF EXISTS result_table_grp; -DROP TABLE IF EXISTS result_table_grp_mean; -SELECT {schema_madlib}.pca_train( 'mat_group', - 'result_table_grp', - 'row_id', - 0.9, - 'matrix_id' - ); - -SELECT * FROM result_table_grp ORDER BY matrix_id, row_id; - """.format(schema_madlib=schema_madlib) - else: - return """ + return """ ---------------------------------------------------------------- Summary: PCA Training ---------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/pca/pca_project.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/pca/pca_project.py_in b/src/ports/postgres/modules/pca/pca_project.py_in index 62bf2b1..cc7a4ba 100644 --- a/src/ports/postgres/modules/pca/pca_project.py_in +++ b/src/ports/postgres/modules/pca/pca_project.py_in @@ -86,83 +86,9 @@ The output is divided into three tables (two of which are optional) grouping_col -- The grouping columns present in the 'pc_table', if any ---------------------------------------------------------------- """.format(schema_madlib=schema_madlib) - else: - if usage_string is not None and \ - usage_string.lower() in ("example", "examples"): - return """ ----------------------------------------------------------------- - Examples ----------------------------------------------------------------- --- Run pca_project() using a model table generated without grouping_cols. --- Create input table for pca_project() -DROP TABLE IF EXISTS mat_proj; -CREATE TABLE mat_proj ( - row_id integer, - row_vec double precision[] -); -COPY mat_proj (row_id, row_vec) FROM stdin DELIMITER '|'; -1|{{1,2,3}} -2|{{2,1,2}} -3|{{3,2,1}} -11|{{1,2,3}} -21|{{2,1,2}} -31|{{3,2,1}} -41|{{1,2,4}} -12|{{1,3,3}} -\. - --- NOTE: Use the 'result_table' created using the example shown in --- {schema_madlib}.pca_train('examples'), as the 'pc_table' parameter here. - -DROP TABLE IF EXISTS mat_proj_out; -SELECT {schema_madlib}.pca_project( - 'mat_proj', - 'result_table', - 'mat_proj_out', - 'row_id' - ); - -SELECT * FROM mat_proj_out; - ------------------------------------------------------------------------ - --- Run pca_project() using a model table generated with grouping_cols. --- Create input table for pca_project(), with grouping - -DROP TABLE IF EXISTS mat_proj_grouped; -CREATE TABLE mat_proj_grouped ( - row_id integer, - row_vec double precision[], - matrix_id integer -); -COPY mat_proj_grouped (row_id, row_vec, matrix_id) FROM stdin DELIMITER '|'; -1|{{1,2,3}}|1 -2|{{2,1,2}}|1 -3|{{3,2,1}}|1 -4|{{1,2,3,4,5}}|2 -5|{{2,1,2,4,5}}|2 -6|{{3,2,1,4,5}}|2 -\. - --- NOTE: Use the 'result_table_grp' created using the example shown --- in {schema_madlib}.pca_train('examples'), as the 'pc_table' parameter --- here. 'result_table_grp' was created with 'matrix_id' as the --- grouping column, and the table 'mat_proj_grouped' should also have the --- 'matrix_id' column in it. - -DROP TABLE IF EXISTS mat_proj_grouped_out; -SELECT {schema_madlib}.pca_project( - 'mat_proj_grouped', - 'result_table_grp', - 'mat_proj_grouped_out', - 'row_id' - ); - -SELECT * FROM mat_proj_grouped_out; - """.format(schema_madlib=schema_madlib) - else: - return """ + else: + return """ ---------------------------------------------------------------- Summary: PCA Projection ---------------------------------------------------------------- @@ -238,88 +164,7 @@ The output is divided into three tables (two of which are optional) ---------------------------------------------------------------- """.format(schema_madlib=schema_madlib) else: - if usage_string is not None and \ - usage_string.lower() in ("example", "examples"): - return """ ----------------------------------------------------------------- - Examples ----------------------------------------------------------------- --- Run pca_sparse_project() using a model table generated without grouping_cols. --- Create input table for pca_sparse_project() - -DROP TABLE IF EXISTS sparse_proj_mat; -CREATE TABLE sparse_proj_mat ( - row_id integer, - col_id integer, - val_id integer -); -COPY sparse_proj_mat (row_id, col_id, val_id) FROM stdin delimiter '|'; -1|2|4 -1|5|6 -3|8|4 -8|1|2 -8|7|2 -9|3|4 -9|8|2 -\. - --- NOTE: Use the 'result_table_sparse' created using the example shown in --- {schema_madlib}.pca_sparse_train('examples'), as the 'pc_table' parameter here. - -SELECT {schema_madlib}.pca_sparse_project( - 'sparse_proj_mat', - 'result_table_sparse', - 'sparse_proj_mat_out', - 'row_id', - 'col_id', - 'val_id', - 10, - 10 - ); - -SELECT * FROM sparse_proj_mat_out; - - --- Run pca_sparse_project() using a model table generated with grouping_cols. --- Create input table for pca_sparse_project(), with grouping - -DROP TABLE IF EXISTS sparse_proj_mat_with_grouping; -CREATE TABLE sparse_proj_mat_with_grouping ( - row_id integer, - col_id integer, - val_id integer, - matrix_id integer -); -COPY sparse_proj_mat_with_grouping (row_id, col_id, val_id, matrix_id) FROM stdin delimiter '|'; -8|7|2|1 -9|3|4|1 -9|8|2|1 -1|2|4|2 -1|5|6|2 -6|6|12|2 -\. - --- NOTE: Use the 'result_table_sparsed_grouped' created using the example shown --- in {schema_madlib}.pca_sparse_train('examples'), as the 'pc_table' parameter --- here. 'result_table_sparsed_grouped' was created with 'matrix_id' as the --- grouping column, and the table 'sparse_proj_mat_with_grouping' should also have --- the 'matrix_id' column in it. - -SELECT {schema_madlib}.pca_sparse_project( - 'sparse_proj_mat_with_grouping', - 'result_table_sparsed_grouped', - 'sparse_proj_mat_with_grouping_out', - 'row_id', - 'col_id', - 'val_id', - 10, - 10 - ); - -SELECT * FROM sparse_proj_mat_with_grouping_out; - """.format(schema_madlib=schema_madlib) - else: - return """ + return """ ---------------------------------------------------------------- Summary: PCA Projection ---------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/pmml/table_to_pmml.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/pmml/table_to_pmml.py_in b/src/ports/postgres/modules/pmml/table_to_pmml.py_in index 5fae7b9..16d5125 100644 --- a/src/ports/postgres/modules/pmml/table_to_pmml.py_in +++ b/src/ports/postgres/modules/pmml/table_to_pmml.py_in @@ -82,44 +82,6 @@ SELECT {schema_madlib}.pmml( OUTPUT ------------------------------------------------------------------ The output of this function is a standard PMML document. - ------------------------------------------------------------------- - OUTPUT ------------------------------------------------------------------- --- Create data set -CREATE TABLE patients( id integer NOT NULL, - second_attack integer, - treatment integer, - trait_anxiety integer); -INSERT INTO patients(id, second_attack, treatment, trait_anxiety) VALUES -( 1, 1, 1, 70), -( 3, 1, 1, 50), -( 5, 1, 0, 40), -( 7, 1, 0, 75), -( 9, 1, 0, 70), -(11, 0, 1, 65), -(13, 0, 1, 45), -(15, 0, 1, 40), -(17, 0, 0, 55), -(19, 0, 0, 50), -( 2, 1, 1, 80), -( 4, 1, 0, 60), -( 6, 1, 0, 65), -( 8, 1, 0, 80), -(10, 1, 0, 60), -(12, 0, 1, 50), -(14, 0, 1, 35), -(16, 0, 1, 50), -(18, 0, 0, 45), -(20, 0, 0, 60); --- train the model -SELECT madlib.logregr_train( - 'patients', - 'patients_logregr', - 'second_attack', - 'ARRAY[1, treatment, trait_anxiety]'); --- pmml export -SELECT madlib.pmml('patients_logregr'); """ return help_string.format(schema_madlib=schema_madlib) http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in b/src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in index f9a1ed9..26b0e1f 100644 --- a/src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in +++ b/src/ports/postgres/modules/recursive_partitioning/decision_tree.py_in @@ -2413,8 +2413,6 @@ predict the value of a target variable based on several input variables. For more details on the function usage: SELECT {schema_madlib}.tree_train('usage'); -For an example on using this function: - SELECT {schema_madlib}.tree_train('example'); """ elif message.lower().strip() in ['usage', 'help', '?']: help_string = """ @@ -2509,54 +2507,6 @@ The output summary table ('output_table_summary') has the following columns: (NULL if null_as_category = False) """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- -DROP TABLE IF EXISTS dummy_dt_con_src CASCADE; -CREATE TABLE dummy_dt_con_src ( - id INTEGER, - cat INTEGER[], - con FLOAT8[], - y FLOAT8 -); - -INSERT INTO dummy_dt_src VALUES -(1, '{{0}}'::INTEGER[], ARRAY[0], 0.5), -(2, '{{0}}'::INTEGER[], ARRAY[1], 0.5), -(3, '{{0}}'::INTEGER[], ARRAY[4], 0.5), -(4, '{{0}}'::INTEGER[], ARRAY[4], 0.5), -(5, '{{0}}'::INTEGER[], ARRAY[4], 0.5), -(6, '{{0}}'::INTEGER[], ARRAY[5], 0.1), -(7, '{{0}}'::INTEGER[], ARRAY[6], 0.1), -(8, '{{1}}'::INTEGER[], ARRAY[9], 0.1); -(9, '{{1}}'::INTEGER[], ARRAY[9], 0.1); -(10, '{{1}}'::INTEGER[], ARRAY[9], 0.1); -(11, '{{1}}'::INTEGER[], ARRAY[9], 0.1); - -DROP TABLE IF EXISTS tree_out, tree_out_summary; -SELECT madlib.tree_train( - 'dummy_dt_src', - 'tree_out', - 'id', - 'y', - 'cat, con', - '', - 'mse', - NULL::Text, - NULL::Text, - 3, - 2, - 1, - 5); - -SELECT madlib.tree_display('tree_out'); --- View the impurity importance value of each feature -DROP TABLE IF EXISTS var_imp_out; -SELECT madlib.get_var_importance('tree_out', 'var_imp_out'); -SELECT * FROM var_imp_out; - """ else: help_string = "No such option. Use {schema_madlib}.tree_train('usage')" return help_string.format(schema_madlib=schema_madlib) @@ -2613,21 +2563,6 @@ possible value of the response variable. The columns are labeled as 'estimated_prob_<dep value>', where <dep value> represents for each value of the response. """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Assuming the example of tree_train() has been run -SELECT {schema_madlib}.tree_predict( - 'tree_out', - 'dummy_dt_src', - 'tree_predict_out', - 'response' -); - -SELECT * FROM tree_predict_out; - """ else: help_string = "No such option. Use {schema_madlib}.tree_predict('usage')" return help_string.format(schema_madlib=schema_madlib) http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/recursive_partitioning/random_forest.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/recursive_partitioning/random_forest.py_in b/src/ports/postgres/modules/recursive_partitioning/random_forest.py_in index e10e2ec..fae91bd 100644 --- a/src/ports/postgres/modules/recursive_partitioning/random_forest.py_in +++ b/src/ports/postgres/modules/recursive_partitioning/random_forest.py_in @@ -65,8 +65,6 @@ predict the value of a target variable based on several input variables. For more details on the function usage: SELECT {schema_madlib}.forest_train('usage'); -For an example on using this function: - SELECT {schema_madlib}.forest_train('example'); """ elif message.lower().strip() in ['usage', 'help', '?']: help_string = """ @@ -184,57 +182,6 @@ it has the following columns: features. The order corresponds to the order of the variables as found in con_features in <model_table>_summary. """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- -DROP TABLE IF EXISTS dt_golf; -CREATE TABLE dt_golf ( - id integer NOT NULL, - "OUTLOOK" text, - temperature double precision, - humidity double precision, - windy text, - class text -); - -INSERT INTO dt_golf (id,"OUTLOOK",temperature,humidity,windy,class) VALUES -(1, 'sunny', 85, 85, 'false', 'Don''t Play'), -(2, 'sunny', 80, 90, 'true', 'Don''t Play'), -(3, 'overcast', 83, 78, 'false', 'Play'), -(4, 'rain', 70, 96, 'false', 'Play'), -(5, 'rain', 68, 80, 'false', 'Play'), -(6, 'rain', 65, 70, 'true', 'Don''t Play'), -(7, 'overcast', 64, 65, 'true', 'Play'), -(8, 'sunny', 72, 95, 'false', 'Don''t Play'), -(9, 'sunny', 69, 70, 'false', 'Play'), -(10, 'rain', 75, 80, 'false', 'Play'), -(11, 'sunny', 75, 70, 'true', 'Play'), -(12, 'overcast', 72, 90, 'true', 'Play'), -(13, 'overcast', 81, 75, 'false', 'Play'), -(14, 'rain', 71, 80, 'true', 'Don''t Play'); - -DROP TABLE IF EXISTS train_output, train_output_group, train_output_summary; -SELECT madlib.forest_train('dt_golf', -- source table - 'train_output', -- output model table - 'id', -- id column - 'class', -- response - '"OUTLOOK", temperature, humidity, windy', -- features - NULL, -- exclude columns - NULL, -- grouping columns - 20::integer, -- number of trees - 2::integer, -- number of random features - TRUE::boolean, -- variable importance - 1::integer, -- num_permutations - 8::integer, -- max depth - 3::integer, -- min split - 1::integer, -- min bucket - 10::integer -- number of splits per continuous variable -); -SELECT madlib.get_tree('train_output',1,2,FALSE); - - """ else: help_string = "No such option. Use {schema_madlib}.forest_train('usage')" return help_string.format(schema_madlib=schema_madlib) @@ -1681,19 +1628,6 @@ of the response. This is only for the classification models, and the value is the fraction of votes in each category. """ - elif message.lower().strip() in ['example', 'examples']: - help_string = """ ------------------------------------------------------------- - EXAMPLE ------------------------------------------------------------- --- Assuming the example of forest_train has been run -SELECT {schema_madlib}.forest_predict( - 'forest_out', - 'dummy_dt_src', - 'forest_predict_out', - 'response' -); - """ else: help_string = "No such option. Use {schema_madlib}.forest_predict('usage')" http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/regress/linear.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/regress/linear.py_in b/src/ports/postgres/modules/regress/linear.py_in index a81b78d..70fe78a 100644 --- a/src/ports/postgres/modules/regress/linear.py_in +++ b/src/ports/postgres/modules/regress/linear.py_in @@ -232,9 +232,6 @@ def linregr_help_message(schema_madlib, message, **kwargs): For more details on function usage: SELECT {schema_madlib}.linregr_train('usage') - - For an example on using the function: - SELECT {schema_madlib}.linregr_train('example') """ elif message in ['usage', 'help', '?']: help_string = """ @@ -276,46 +273,6 @@ def linregr_help_message(schema_madlib, message, **kwargs): 'num_rows_processed' INTEGER, -- total number of rows that are used 'num_missing_rows_skipped' INTEGER -- total number of rows that are skipped because of NULL values """ - elif message in ['example', 'examples']: - help_string = """ - CREATE TABLE houses (id INT, tax INT, - bedroom INT, bath FLOAT, - price INT, size INT, lot INT); - COPY houses FROM STDIN WITH DELIMITER '|'; - 1 | 590 | 2 | 1 | 50000 | 770 | 22100 - 2 | 1050 | 3 | 2 | 85000 | 1410 | 12000 - 3 | 20 | 3 | 1 | 22500 | 1060 | 3500 - 4 | 870 | 2 | 2 | 90000 | 1300 | 17500 - 5 | 1320 | 3 | 2 | 133000 | 1500 | 30000 - 6 | 1350 | 2 | 1 | 90500 | 820 | 25700 - 7 | 2790 | 3 | 2.5 | 260000 | 2130 | 25000 - 8 | 680 | 2 | 1 | 142500 | 1170 | 22000 - 9 | 1840 | 3 | 2 | 160000 | 1500 | 19000 - 10 | 3680 | 4 | 2 | 240000 | 2790 | 20000 - 11 | 1660 | 3 | 1 | 87000 | 1030 | 17500 - 12 | 1620 | 3 | 2 | 118600 | 1250 | 20000 - 13 | 3100 | 3 | 2 | 140000 | 1760 | 38000 - 14 | 2070 | 2 | 3 | 148000 | 1550 | 14000 - 15 | 650 | 3 | 1.5 | 65000 | 1450 | 12000 - \. - - -- Train a regression model. First, single regression for all data. - SELECT {schema_madlib}.linregr_train( 'houses', - 'houses_linregr', - 'price', - 'ARRAY[1, tax, bath, size]' - ); - -- Generate three output models, one for each value of "bedroom". - SELECT {schema_madlib}.linregr_train('houses', - 'houses_linregr_bedroom', - 'price', - 'ARRAY[1, tax, bath, size]', - 'bedroom' - ); - -- Examine the resulting models. - SELECT * FROM houses_linregr; - SELECT * FROM houses_linregr_bedroom; - """ else: help_string = "No such option. Use {schema_madlib}.linregr_train()" http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/regress/logistic.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/regress/logistic.py_in b/src/ports/postgres/modules/regress/logistic.py_in index 77ea465..812d4cd 100644 --- a/src/ports/postgres/modules/regress/logistic.py_in +++ b/src/ports/postgres/modules/regress/logistic.py_in @@ -416,9 +416,6 @@ that can be represented with a Boolean expression. For more details on function usage: SELECT {schema_madlib}.logregr_train('usage') - -For a small example on using the function: - SELECT {schema_madlib}.logregr_train('example') """ elif message in ['usage', 'help', '?']: @@ -467,51 +464,6 @@ A summary table named <out_table>_summary is also created at the same time, whic 'num_missing_rows_skipped' integer, -- total number of rows skipped 'grouping_col' varchar -- grouping columns used in the regression """ - elif message in ['example', 'examples']: - - help_string = """ -CREATE TABLE patients( id INTEGER NOT NULL, - second_attack BOOLEAN, - treatment INTEGER, - trait_anxiety INTEGER); -COPY patients FROM STDIN WITH DELIMITER '|'; - 1 | True | 1 | 70 - 3 | True | 1 | 50 - 5 | True | 0 | 40 - 7 | True | 0 | 75 - 9 | True | 0 | 70 - 11 | False | 1 | 65 - 13 | False | 1 | 45 - 15 | False | 1 | 40 - 17 | False | 0 | 55 - 19 | False | 0 | 50 - 2 | True | 1 | 80 - 4 | True | 0 | 60 - 6 | True | 0 | 65 - 8 | True | 0 | 80 - 10 | True | 0 | 60 - 12 | False | 1 | 50 - 14 | False | 1 | 35 - 16 | False | 1 | 50 - 18 | False | 0 | 45 - 20 | False | 0 | 60 -\. - --- Drop output tables before calling the function -DROP TABLE IF EXISTS patients_logregr; -DROP TABLE IF EXISTS patients_logregr_summary; - -SELECT madlib.logregr_train( 'patients', - 'patients_logregr', - 'second_attack', - 'ARRAY[1, treatment, trait_anxiety]', - NULL, - 20, - 'irls' - ); - -SELECT * from patients_logregr; - """ else: help_string = "No such option. Use {schema_madlib}.logregr_train('help')" http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/regress/multilogistic.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/regress/multilogistic.py_in b/src/ports/postgres/modules/regress/multilogistic.py_in index 66de5ef..51d9190 100644 --- a/src/ports/postgres/modules/regress/multilogistic.py_in +++ b/src/ports/postgres/modules/regress/multilogistic.py_in @@ -560,9 +560,6 @@ coefficients that maximizes the likelihood of the observations. For more details on function usage: SELECT {schema_madlib}.mlogregr_train('usage') - -For an example on using the function: - SELECT {schema_madlib}.mlogregr_train('example') """ elif message in ['usage', 'help', '?']: help_string = """ @@ -614,89 +611,6 @@ The output summary table named as <'output_table'>_summary has the following col vcov -- DOUBLE PRECISION[], Covariance matrix coef -- DOUBLE PRECISION[], Coefficients of regression """ - elif message in ['example', 'examples']: - help_string = """ --- Create sample data set -DROP TABLE IF EXISTS test3; -CREATE TABLE test3 ( - feat1 INTEGER, - feat2 INTEGER, - cat INTEGER -); -INSERT INTO test3(feat1, feat2, cat) VALUES -(1,35,1), -(2,33,0), -(3,39,1), -(1,37,1), -(2,31,1), -(3,36,0), -(2,36,1), -(2,31,1), -(2,41,1), -(2,37,1), -(1,44,1), -(3,33,2), -(1,31,1), -(2,44,1), -(1,35,1), -(1,44,0), -(1,46,0), -(2,46,1), -(2,46,2), -(3,49,1), -(2,39,0), -(2,44,1), -(1,47,1), -(1,44,1), -(1,37,2), -(3,38,2), -(1,49,0), -(2,44,0), -(3,61,2), -(1,65,2), -(3,67,1), -(3,65,2), -(1,65,2), -(2,67,2), -(1,65,2), -(1,62,2), -(3,52,2), -(3,63,2), -(2,59,2), -(3,65,2), -(2,59,0), -(3,67,2), -(3,67,2), -(3,60,2), -(3,67,2), -(3,62,2), -(2,54,2), -(3,65,2), -(3,62,2), -(2,59,2), -(3,60,2), -(3,63,2), -(3,65,2), -(2,63,1), -(2,67,2), -(2,65,2), -(2,62,2), -(NULL,67,2), -(2,NULL,2), -(NULL,NULL,2), -(2,62,NULL); - --- Run the multilogistic regression function. -DROP TABLE IF EXISTS test3_output; -DROP TABLE IF EXISTS test3_output_summary; -SELECT madlib.mlogregr_train('test3', - 'test3_output', - 'cat', - 'ARRAY[1, feat1, feat2]', - 0, - 'max_iter=20, optimizer=irls, precision=0.0001' - ); - """ else: help_string = "No such option. Use {schema_madlib}.mlogregr_train()" http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/sample/balance_sample.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/sample/balance_sample.py_in b/src/ports/postgres/modules/sample/balance_sample.py_in index 28cd11c..391d1b4 100644 --- a/src/ports/postgres/modules/sample/balance_sample.py_in +++ b/src/ports/postgres/modules/sample/balance_sample.py_in @@ -722,7 +722,6 @@ output table size. For more details on function usage: SELECT {schema_madlib}.balance_sample('usage'); - SELECT {schema_madlib}.balance_sample('example'); """ elif message.lower() in ['usage', 'help', '?']: help_string = """ @@ -769,60 +768,6 @@ The output_table would contain the required number of samples, along with a new column named __madlib_id__, that contain unique numbers for all sampled rows. """ - elif message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- - --- Create an input table -DROP TABLE IF EXISTS test; - -CREATE TABLE test( - id1 INTEGER, - id2 INTEGER, - gr1 INTEGER, - gr2 INTEGER -); - -INSERT INTO test VALUES -(1,0,1,1), -(2,0,1,1), -(3,0,1,1), -(4,0,1,1), -(5,0,1,1), -(6,0,1,1), -(7,0,1,1), -(8,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(0,1,1,2), -(0,2,1,2), -(0,3,1,2), -(0,4,1,2), -(0,5,1,2), -(0,6,1,2), -(10,10,2,2), -(20,20,2,2), -(30,30,2,2), -(40,40,2,2), -(50,50,2,2), -(60,60,2,2), -(70,70,2,2) -; - --- Sample without replacement -DROP TABLE IF EXISTS out; -SELECT balance_sample('test', 'out', 'gr1', 'undersample', NULL, NULL, FALSE); -SELECT * FROM out; - ---- Sample with replacement -DROP TABLE IF EXISTS out_sr2; -SELECT balance_sample('test', 'out', 'gr1', 'undersample', NULL, NULL, TRUE); -SELECT * FROM out; -""" else: help_string = "No such option. Use {schema_madlib}.balance_sample()" http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/sample/stratified_sample.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/sample/stratified_sample.py_in b/src/ports/postgres/modules/sample/stratified_sample.py_in index 0621d61..90d12d5 100644 --- a/src/ports/postgres/modules/sample/stratified_sample.py_in +++ b/src/ports/postgres/modules/sample/stratified_sample.py_in @@ -208,7 +208,6 @@ whole table is a single strata. For more details on function usage: SELECT {schema_madlib}.stratified_sample('usage'); - SELECT {schema_madlib}.stratified_sample('example'); """ elif message.lower() in ['usage', 'help', '?']: help_string = """ @@ -244,63 +243,7 @@ be selected in the sample set more than once). Else (if with_replacement is FALSE), a row can be selected at most once. ); """ - elif message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- - --- Create an input table -DROP TABLE IF EXISTS test; - -CREATE TABLE test( - id1 INTEGER, - id2 INTEGER, - gr1 INTEGER, - gr2 INTEGER -); - -INSERT INTO test VALUES -(1,0,1,1), -(2,0,1,1), -(3,0,1,1), -(4,0,1,1), -(5,0,1,1), -(6,0,1,1), -(7,0,1,1), -(8,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(0,1,1,2), -(0,2,1,2), -(0,3,1,2), -(0,4,1,2), -(0,5,1,2), -(0,6,1,2), -(10,10,2,2), -(20,20,2,2), -(30,30,2,2), -(40,40,2,2), -(50,50,2,2), -(60,60,2,2), -(70,70,2,2) -; - --- Sample without replacement -DROP TABLE IF EXISTS out; -SELECT madlib.stratified_sample('test', 'out', 0.5, 'gr1,gr2', 'id1,id2', - FALSE); -SELECT * FROM out; - --- Sample with replacement -DROP TABLE IF EXISTS out; -SELECT madlib.stratified_sample('test', 'out', 0.5, 'gr1,gr2', 'id1,id2', - TRUE); -SELECT * FROM out; -""" else: - help_string = "No such option. Use {schema_madlib}.graph_sssp()" + help_string = "No such option. Use {schema_madlib}.stratified_sample()" return help_string.format(schema_madlib=schema_madlib) http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/sample/stratified_sample.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/sample/stratified_sample.sql_in b/src/ports/postgres/modules/sample/stratified_sample.sql_in index c762115..6f0c233 100644 --- a/src/ports/postgres/modules/sample/stratified_sample.sql_in +++ b/src/ports/postgres/modules/sample/stratified_sample.sql_in @@ -253,7 +253,7 @@ m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); ------------------------------------------------------------------------------- -- Online help -CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.stratified_sample_help( +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.stratified_sample( message VARCHAR ) RETURNS VARCHAR AS $$ PythonFunction(sample, stratified_sample, stratified_sample_help) @@ -262,9 +262,9 @@ m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); ------------------------------------------------------------------------------- -CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.stratified_sample_help() +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.stratified_sample() RETURNS VARCHAR AS $$ - SELECT MADLIB_SCHEMA.stratified_sample_help(''); + SELECT MADLIB_SCHEMA.stratified_sample(''); $$ LANGUAGE sql IMMUTABLE m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); ------------------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/sample/train_test_split.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/sample/train_test_split.py_in b/src/ports/postgres/modules/sample/train_test_split.py_in index 011b14f..17388bb 100644 --- a/src/ports/postgres/modules/sample/train_test_split.py_in +++ b/src/ports/postgres/modules/sample/train_test_split.py_in @@ -199,7 +199,6 @@ performed. For more details on function usage: SELECT {schema_madlib}.train_test_split('usage'); - SELECT {schema_madlib}.train_test_split('example'); """ elif message.lower() in ['usage', 'help', '?']: help_string = """ @@ -243,77 +242,7 @@ be selected in the sample set more than once). Else (if with_replacement is FALSE), a row can be selected at most once. ); """ - elif message.lower() in ("example", "examples"): - help_string = """ ----------------------------------------------------------------------------- - EXAMPLES ----------------------------------------------------------------------------- - --- Create an input table -DROP TABLE IF EXISTS test; - -CREATE TABLE test( - id1 INTEGER, - id2 INTEGER, - gr1 INTEGER, - gr2 INTEGER -); - -INSERT INTO test VALUES -(1,0,1,1), -(2,0,1,1), -(3,0,1,1), -(4,0,1,1), -(5,0,1,1), -(6,0,1,1), -(7,0,1,1), -(8,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(9,0,1,1), -(0,1,1,2), -(0,2,1,2), -(0,3,1,2), -(0,4,1,2), -(0,5,1,2), -(0,6,1,2), -(10,10,2,2), -(20,20,2,2), -(30,30,2,2), -(40,40,2,2), -(50,50,2,2), -(60,60,2,2), -(70,70,2,2) -; - --- Sample without replacement -DROP TABLE IF EXISTS out; -SELECT madlib.train_test_split( - 'test', -- Source table - 'out', -- Output table - 0.5, -- Sample proportion - 0.5, -- Sample proportion - 'gr1,gr2', -- Strata definition - 'id1,id2', -- Columns to output - FALSE, -- Sample without replacement - FALSE); -- Do not separate output tables -SELECT * FROM out ORDER BY split,gr1,gr2,id1,id2; - --- Sample with replacement -DROP TABLE IF EXISTS out_train, out_test; -SELECT madlib.train_test_split( - 'test', -- Source table - 'out', -- Output table - 0.5, -- train_proportion - NULL, -- Default = 1 - train_proportion = 0.5 - 'gr1,gr2', -- Strata definition - 'id1,id2', -- Columns to output - TRUE, -- Sample with replacement - TRUE); -- Separate output tables -SELECT * FROM out_train ORDER BY gr1,gr2,id1,id2; -""" else: - help_string = "No such option. Use {schema_madlib}.graph_sssp()" + help_string = "No such option. Use {schema_madlib}.train_test_split()" return help_string.format(schema_madlib=schema_madlib)
