Pivot: Add support for array output JIRA: MADLIB-1066
When total pivoted columns exceed the Postgresql limit (250 - 1600 depending on the type of columns), an array output becomes essential. This commit adds support to get each pivoted set of columns (all columns related to a particular value-aggregate combination) as an array. There is also support for getting the output as madlib.svec. Closes #108 Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/bb209bbb Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/bb209bbb Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/bb209bbb Branch: refs/heads/latest_release Commit: bb209bbb6e081a2838a3f698947529358792e47f Parents: 6b466ea Author: Rahul Iyer <ri...@apache.org> Authored: Fri Mar 31 11:15:58 2017 -0700 Committer: Rahul Iyer <ri...@apache.org> Committed: Fri Mar 31 11:15:58 2017 -0700 ---------------------------------------------------------------------- .../postgres/modules/utilities/pivot.py_in | 238 +++++++++----- .../postgres/modules/utilities/pivot.sql_in | 327 ++++++++----------- .../modules/utilities/test/pivot.sql_in | 74 +++-- .../postgres/modules/utilities/utilities.py_in | 13 +- .../modules/utilities/validate_args.py_in | 4 +- 5 files changed, 354 insertions(+), 302 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/pivot.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/pivot.py_in b/src/ports/postgres/modules/utilities/pivot.py_in index 7e342f1..6d0ebae 100644 --- a/src/ports/postgres/modules/utilities/pivot.py_in +++ b/src/ports/postgres/modules/utilities/pivot.py_in @@ -42,6 +42,7 @@ from validate_args import columns_exist_in_table from validate_args import table_is_empty from validate_args import _get_table_schema_names from validate_args import get_first_schema +from validate_args import get_expr_type m4_changequote(`<!', `!>') @@ -49,14 +50,14 @@ m4_changequote(`<!', `!>') def pivot(schema_madlib, source_table, out_table, index, pivot_cols, pivot_values, aggregate_func=None, fill_value=None, keep_null=False, - output_col_dictionary=False, **kwargs): + output_col_dictionary=False, output_type=None, **kwargs): """ Helper function that can be used to pivot tables Args: @param source_table The original data table @param out_table The output table that contains the dummy variable columns - @param index The index columns to group by the records by + @param index The index columns to group the records by @param pivot_cols The columns to pivot the table @param pivot_values The value columns to be summarized in the pivoted table @@ -80,6 +81,16 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols, FROM pivset GROUP BY id ORDER BY id) """ + def _fill_value_wrapper(sel_str): + """ Wrap a given SQL SELECT statement with COALESCE using a given fill value. + + No-op if the fill value is not provided + """ + if fill_value is not None: + return " COALESCE({0}, {1}) ".format(sel_str, fill_value) + else: + return sel_str + with MinWarning('warning'): # If there are more than 1000 columns for the output table, we give a @@ -93,6 +104,25 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols, indices = split_quoted_delimited_str(index) pcols = split_quoted_delimited_str(pivot_cols) pvals = split_quoted_delimited_str(pivot_values) + + # output type for specific supported types + output_type = 'column' if not output_type else output_type.lower() + all_output_types = sorted(['array', 'column', 'svec']) + try: + # allow user to specify a prefix substring of + # supported output types. This works because the supported + # output types have unique prefixes. + output_type = next(s for s in all_output_types + if s.startswith(output_type)) + except StopIteration: + # next() returns a StopIteration if no element found + plpy.error("Encoding categorical: Output type should be one of {0}". + format(','.join(all_output_types))) + + is_array_output = output_type in ('array', 'svec') + # always build dictionary table if output is array + output_col_dictionary = True if is_array_output else output_col_dictionary + validate_pivot_coding(source_table, out_table, indices, pcols, pvals) # Strip the end quotes for building output columns (this can only be @@ -104,6 +134,8 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols, # value column. agg_dict = parse_aggregates(pvals, aggregate_func) + validate_output_types(source_table, agg_dict, is_array_output) + # Find the distinct values of pivot_cols array_agg_str = ', '.join("array_agg(DISTINCT {pcol}) AS {pcol}_values". format(pcol=pcol) for pcol in pcols) @@ -143,7 +175,7 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols, for pcol in pcols]))) # Check the max possible length of a output column name - # If it is over 63 (psql upper limit) create table lookup + # If it is over 63 (postgresql upper limit) create dictionary lookup for pval in pvals: agg_func = agg_dict[pval] # Length calculation: value column length + aggregate length + @@ -159,122 +191,157 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols, format(**locals())) output_col_dictionary = True - # Create the output dictionary if needed + # Types of pivot columns are needed for building the right columns + # in the dictionary table and to decide if a pivot column value needs to + # be quoted during comparison (will be quoted if it's a text column) + types_str = ', '.join("pg_typeof(\"{pcol}\") as {pcol}". + format(pcol=p) for p in pcols) + pcol_types = plpy.execute("SELECT {0} FROM {1} LIMIT 1". + format(types_str, source_table))[0] if output_col_dictionary: out_dict = out_table + "_dictionary" _assert(not table_exists(out_dict), "Pivot: Output dictionary table already exists!") - - # Collect the types for pivot columns - types_str = ','.join("pg_typeof(\"{pcol}\") as {pcol}_type". - format(pcol=pcol) for pcol in pcols) - pcol_types = plpy.execute("SELECT {0} FROM {1} LIMIT 1". - format(types_str, source_table)) - # Create the empty dictionary table - dict_str = ', '.join(" {pcol} {pcol_type} ". - format(pcol=pcol, pcol_type=pcol_types[0][pcol+"_type"]) - for pcol in pcols) + pcol_names_types = ', '.join(" {pcol} {pcol_type} ". + format(pcol=pcol, + pcol_type=pcol_types[pcol]) + for pcol in pcols) plpy.execute(""" CREATE TABLE {out_dict} ( - __pivot_cid__ VARCHAR, pval VARCHAR, - agg VARCHAR, {dict_str}, col_name VARCHAR) - """.format(**locals())) - - # The holder for rows to insert into output dictionary - insert_str = [] + __pivot_cid__ VARCHAR, + pval VARCHAR, + agg VARCHAR, + {pcol_names_types}, + col_name VARCHAR) + """.format(out_dict=out_dict, pcol_names_types=pcol_names_types)) + + # List of rows to insert into output dictionary + dict_insert_str = [] # Counter for the new output column names - dict_counter = 0 + dict_counter = 1 - pivot_str_sel_list = [] - pivot_str_from_list = [] - # Prepare the wrapper for fill value - if fill_value is not None: - fill_str_begin = " COALESCE(" - fill_str_end = ", " + fill_value + " ) " - else: - fill_str_begin, fill_str_end = "", "" + pivot_sel_list = [] + pivot_from_list = [] for pval in pvals: agg_func = agg_dict[pval] for agg in agg_func: + + # is using array_output, create a new array for each pval-agg combo + if is_array_output: + # we store information in the dictionary table for each + # index in the array. 'index_counter' is the current index + # being updated (resets for each new array) + index_counter = 1 + + sub_pivot_sel_list = [] for comb in pivot_comb: pivot_col_condition = [] - pivot_col_name = ["\"{pval}_{agg}".format(pval=pval, agg=agg)] + # note column name starts with double quotes + pivot_col_name = ['{pval}_{agg}'.format(pval=pval, agg=agg)] if output_col_dictionary: # Prepare the entry for the dictionary - insert_str.append("(\'__p_{dict_counter}__\', \'{pval}\', " - "\'{agg}\' ".format(dict_counter=dict_counter, - pval=pval, agg=agg)) + if not is_array_output: + index_name = ("__p_{dict_counter}__". + format(dict_counter=dict_counter)) + else: + # for arrays, index_name is just the index into each array + index_name = str(index_counter) + index_counter += 1 + dict_insert_str.append( + "(\'{index_name}\', \'{pval}\', \'{agg}\' ". + format(index_name=index_name, pval=pval, agg=agg)) # For every pivot column in a given combination for counter, pcol in enumerate(pcols): + if comb[counter] is None: + quoted_pcol_value = "NULL" + elif pcol_types[pcol] in ("text", "varchar", "character varying"): + quoted_pcol_value = "'" + comb[counter] + "'" + else: + quoted_pcol_value = comb[counter] + # If we encounter a NULL value that means it is not filtered # because of keep_null. Use "IS NULL" for comparison if comb[counter] is None: pivot_col_condition.append(" \"{0}\" IS NULL".format(pcol)) pivot_col_name.append("_{0}_null".format(pcol)) else: - pivot_col_condition.append(" \"{0}\" = '{1}'". - format(pcol, comb[counter])) + pivot_col_condition.append(" \"{0}\" = {1}". + format(pcol, quoted_pcol_value)) pivot_col_name.append("_{0}_{1}".format(pcol, comb[counter])) - # Collect pcol values for the dict if output_col_dictionary: - insert_str.append("{0}".format( - comb[counter] if comb[counter] is not None else "NULL")) - pivot_col_name.append("\"") + dict_insert_str.append("{0}".format(quoted_pcol_value)) if output_col_dictionary: - # Store the whole string in case some user wants it - insert_str.append("\'{column_name}\')". - format(column_name=''.join(pivot_col_name))) - pivot_col_name = ["__p_"+str(dict_counter)+"__"] + # Store the whole string as additional info + dict_insert_str.append("'{0}')".format(''.join(pivot_col_name))) + pivot_col_name = ["__p_" + str(dict_counter) + "__"] dict_counter += 1 + # Collecting the whole sql query # Please refer to the earlier comment for a sample output - # Build the pivot column with NULL values in tuples that don't # satisfy that column's condition - pivot_str_from = ("(CASE WHEN {condition} THEN {pval} END) " - "AS {pivot_col_name}". - format(pval=pval, - condition=' AND '.join(pivot_col_condition), - pivot_col_name=''.join(pivot_col_name))) - pivot_str_from_list.append(pivot_str_from) - # Aggregate over each pivot column, while filtering all NULL values - # created by previous query. - pivot_str_sel = ("{fill_str_begin}" - " {agg} ({pivot_col_name}) " - " FILTER (WHERE {pivot_col_name} IS NOT NULL) " - "{fill_str_end} AS {pivot_col_name}". - format(agg=agg, fill_str_begin=fill_str_begin, - fill_str_end=fill_str_end, - pivot_col_name=''.join(pivot_col_name))) - pivot_str_sel_list.append(pivot_str_sel) + p_name = '"{0}"'.format(''.join(pivot_col_name)) + pivot_str_from = ( + "(CASE WHEN {condition} THEN {pval} END) AS {p_name}". + format(pval=pval, + condition=' AND '.join(pivot_col_condition), + p_name=p_name)) + pivot_from_list.append(pivot_str_from) + + # Aggregate over each pivot column, while filtering all NULL + # values created by previous query. + sub_pivot_str_sel = _fill_value_wrapper( + "{agg}({p_name}) " + " FILTER (WHERE {p_name} IS NOT NULL)". + format(agg=agg, p_name=p_name)) + if not is_array_output: + # keep spaces around the 'AS' + sub_pivot_str_sel += " AS " + p_name + sub_pivot_sel_list.append(sub_pivot_str_sel) + + if sub_pivot_sel_list: + if is_array_output: + if output_type is 'svec': + cast_str = '::FLOAT8[]::{0}.svec'.format(schema_madlib) + else: + cast_str = '::FLOAT8[]' + pivot_sel_list.append( + 'ARRAY[{all_pivot_sel}]{cast_str} AS "{pval}_{agg}"'. + format(all_pivot_sel=', '.join(sub_pivot_sel_list), + cast_str=cast_str, + pval=pval, + agg=agg)) + else: + pivot_sel_list += sub_pivot_sel_list try: plpy.execute(""" CREATE TABLE {out_table} AS SELECT {index}, - {pivot_str_sel_list} + {all_pivot_sel_str} FROM ( SELECT {index}, - {pivot_str_from_list} + {all_pivot_from_str} FROM {source_table} ) x GROUP BY {index} """.format(out_table=out_table, index=index, source_table=source_table, - pivot_str_from_list=', '.join(pivot_str_from_list), - pivot_str_sel_list=', '.join(pivot_str_sel_list))) + all_pivot_from_str=', '.join(pivot_from_list), + all_pivot_sel_str=', '.join(pivot_sel_list) + )) if output_col_dictionary: plpy.execute("INSERT INTO {out_dict} VALUES {insert_sql}". format(out_dict=out_dict, - insert_sql=', '.join(insert_str))) + insert_sql=', '.join(dict_insert_str))) except plpy.SPIError: # Warn user if the number of columns is over the limit with MinWarning("warning"): @@ -314,16 +381,16 @@ def parse_aggregates(pvals, aggregate_func): 5) A partial mapping (eg. 'val2=sum'): Use the default ('avg') for the missing value columns """ - param_types = dict.fromkeys(pvals, list) + param_types = dict.fromkeys(pvals, tuple) agg_dict = extract_keyvalue_params(aggregate_func, param_types) if not agg_dict: - agg_list = split_quoted_delimited_str(aggregate_func) - agg_dict = dict.fromkeys(pvals, (agg_list if agg_list else ['avg'])) + agg_list = tuple(split_quoted_delimited_str(aggregate_func)) + agg_dict = dict.fromkeys(pvals, (agg_list if agg_list else ('avg', ))) else: for pval in pvals: if pval not in agg_dict: - agg_dict[pval] = ['avg'] + agg_dict[pval] = ('avg', ) return agg_dict # ------------------------------------------------------------------------------ @@ -364,6 +431,26 @@ def validate_pivot_coding(source_table, out_table, indices, pivs, vals): # ------------------------------------------------------------------------------ +def validate_output_types(source_table, agg_dict, is_array_output): + """ + Args: + @param source_table: str, Name of table containing data + @param agg_dict: dict, Key-value pair containing aggregates applied for each val column + @param is_array_output: bool, Is the pivot output columnar (False) or array (True) + + Returns: + None + """ + for val, func_iterable in agg_dict.items(): + for func in func_iterable: + func_call_str = '{0}({1})'.format(func, val) + _assert(not ('[]' in get_expr_type(func_call_str, source_table) and + is_array_output), + "Pivot: Aggregate {0} with an array return type cannot be " + "combined with output_type='array' or 'svec'".format(func)) +# ---------------------------------------------------------------------- + + def pivot_help(schema_madlib, message, **kwargs): """ Help function for pivot @@ -401,14 +488,19 @@ For more details on function usage: -- of the output pivot table pivot_cols, -- Comma-separated columns that will form the -- columns of the output pivot table - pivot_values -- Comma-separated columns that contain the values + pivot_values, -- Comma-separated columns that contain the values -- to be summarized in the output pivot table - fill_value -- If specified, determines how to fill NULL values + fill_value, -- If specified, determines how to fill NULL values -- resulting from pivot operation - keep_null -- The flag for determining how to handle NULL + keep_null, -- The flag for determining how to handle NULL -- values in pivot columns - output_col_dictionary -- The flag for enabling the creation of the + output_col_dictionary, -- The flag for enabling the creation of the -- output dictionary for shorter column names + output_type -- This parameter controls the output format + -- of the pivoted variables. + -- If 'column', a column is created for each pivot + -- If 'array', an array is created combining all pivots + -- If 'svec', the array is cast to madlib.svec ); ----------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/pivot.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/pivot.sql_in b/src/ports/postgres/modules/utilities/pivot.sql_in index cb2c223..7cdfbe0 100644 --- a/src/ports/postgres/modules/utilities/pivot.sql_in +++ b/src/ports/postgres/modules/utilities/pivot.sql_in @@ -59,7 +59,8 @@ pivot( aggregate_func, fill_value, keep_null, - output_col_dictionary + output_col_dictionary, + output_type ) </pre> \b Arguments @@ -67,6 +68,7 @@ pivot( <dt>source_table</dt> <dd>VARCHAR. Name of the source table (or view) containing data to pivot.</dd> + <dt>output_table</dt> <dd>VARCHAR. Name of output table that contains the pivoted data. The output table contains all the columns present in @@ -81,18 +83,21 @@ pivot( - aggregate function - name of the pivot column <em>'pivot_cols'</em> - values in the pivot column - </dd> + <dt>index </dt> <dd>VARCHAR. Comma-separated columns that will form the index of the output pivot table. By index we mean the values to group by; these are the rows in the output pivot table.</dd> + <dt>pivot_cols </dt> <dd>VARCHAR. Comma-separated columns that will form the columns of the output pivot table.</dd> + <dt>pivot_values </dt> <dd>VARCHAR. Comma-separated columns that contain the values to be summarized in the output pivot table.</dd> + <dt>aggregate_func (optional)</dt> <dd>VARCHAR. default: 'AVG'. A comma-separated list of aggregates to be applied to values. These can be PostgreSQL built-in aggregates [1] or UDAs. It is @@ -113,10 +118,12 @@ pivot( values resulting from pivot operation. This is a global parameter (not applied per aggregate) and is applied post-aggregation to the output table.</dd> + <dt>keep_null (optional)</dt> <dd>BOOLEAN. default: FALSE. If TRUE, then pivot columns are created corresponding to NULL categories. If FALSE, then no pivot columns will be created for NULL categories.</dd> + <dt>output_col_dictionary (optional)</dt> <dd>BOOLEAN. default: FALSE. This parameter is used to handle auto-generated column names that exceed the PostgreSQL limit of 63 bytes @@ -127,6 +134,19 @@ pivot( a dictionary output file will be created and a message given to the user. </dd> + <dt>output_type (optional)</dt> + <dd>VARCHAR. default: 'column'. This parameter controls the output format + of the pivoted variables. If 'column', a column is created for each pivot + variable. PostgreSQL limits the number of columns in a table + (250 - 1600 depending on column types). + If the total number of output columns exceeds this limit, then make this + parameter either 'array' (to combine the output columns into an array) or + 'svec' (to cast the array output to <em>'madlib.svec'</em> type). + + A dictionary will be created (<em>output_col_dictionary=TRUE</em>) + when 'output_type' is 'array' or 'svec' to define each index into the array. + </dd> + </dl> @anchor notes @@ -138,8 +158,8 @@ allowed so NULLs are ignored. - It is not allowed to set the fill_value parameter without setting the aggregate_func parameter due to possible ambiguity. Set aggregate_func to NULL for the default behavior and use fill_value as desired. -Please note that full_value must be of the same type as the output of the -aggregate_func (or capable of being cast to the same type by PostgreSQL), +Please note that full_value must be of the same type as the output of the +aggregate_func (or capable of being cast to the same type by PostgreSQL), or else an error will result. - It is not allowed to set the output_col_dictionary parameter without setting the keep_null parameter due to possible ambiguity. Set @@ -303,13 +323,9 @@ SELECT * FROM pivout ORDER BY id,id2; | 0 | 8 | | </pre> --# Turn on the extended view for readability: +-# Use multiple pivot columns with columnar output: <pre class="example"> \\x on -</pre> - --# Use multiple pivot columns: -<pre class="example"> DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val'); SELECT * FROM pivout ORDER BY id; @@ -346,10 +362,47 @@ val_avg_piv_30_piv2_300 | ... </pre> +-# Use multiple pivot columns (same as above) with an array output: +<pre class="example"> +DROP TABLE IF EXISTS pivout; +SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val', + NULL, NULL, FALSE, FALSE, 'array'); +\\x off +SELECT * FROM pivout ORDER BY id; +</pre> +<pre class="result"> + id | val_avg +--------+------------------------------------------------------------ + 0 | {1,2,NULL,NULL,NULL,3,NULL,NULL,NULL,NULL,NULL,NULL} + 1 | {NULL,NULL,7,NULL,NULL,4,NULL,NULL,NULL,NULL,5.5,NULL} + [NULL] | {NULL,NULL,NULL,8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL} +</pre> +<pre class="example"> +-- Use the dictionary to understand what each index of an array corresponds to +SELECT * FROM pivout_dictionary; +</pre> +<pre class="result"> + __pivot_cid__ | pval | agg | piv | piv2 | col_name +---------------+------+-----+-----+------+--------------------------- + 1 | val | avg | 10 | 0 | "val_avg_piv_10_piv2_0" + 2 | val | avg | 10 | 100 | "val_avg_piv_10_piv2_100" + 3 | val | avg | 10 | 200 | "val_avg_piv_10_piv2_200" + 4 | val | avg | 10 | 300 | "val_avg_piv_10_piv2_300" + 5 | val | avg | 20 | 0 | "val_avg_piv_20_piv2_0" + 6 | val | avg | 20 | 100 | "val_avg_piv_20_piv2_100" + 7 | val | avg | 20 | 200 | "val_avg_piv_20_piv2_200" + 8 | val | avg | 20 | 300 | "val_avg_piv_20_piv2_300" + 9 | val | avg | 30 | 0 | "val_avg_piv_30_piv2_0" + 10 | val | avg | 30 | 100 | "val_avg_piv_30_piv2_100" + 11 | val | avg | 30 | 200 | "val_avg_piv_30_piv2_200" + 12 | val | avg | 30 | 300 | "val_avg_piv_30_piv2_300" +</pre> + -# Use multiple value columns: <pre class="example"> DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2'); +\\x on SELECT * FROM pivout ORDER BY id; </pre> <pre class="result"> @@ -377,6 +430,7 @@ val2_avg_piv_30 | 15.5 <pre class="example"> DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'avg, sum'); +\\x on SELECT * FROM pivout ORDER BY id; </pre> <pre class="result"> @@ -404,6 +458,7 @@ val_sum_piv_30 | 11 DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2', 'val=avg, val2=sum'); +\\x on SELECT * FROM pivout ORDER BY id; </pre> <pre class="result"> @@ -431,6 +486,7 @@ val2_sum_piv_30 | 31 DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2', 'val=avg, val2=[avg,sum]'); +\\x on SELECT * FROM pivout ORDER BY id; </pre> <pre class="result"> @@ -464,6 +520,7 @@ val2_sum_piv_30 | 31 DROP TABLE IF EXISTS pivout; SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2', 'val=avg, val2=[avg,sum]', '111', True); +\\x on SELECT * FROM pivout ORDER BY id,id2; </pre> <pre class="result"> @@ -492,32 +549,7 @@ val2_avg_piv_null_piv2_200 | 111 val2_avg_piv_null_piv2_300 | 111 val2_avg_piv_10_piv2_0 | 11 val2_avg_piv_10_piv2_100 | 111 -val2_avg_piv_10_piv2_200 | 111 -val2_avg_piv_10_piv2_300 | 111 -val2_avg_piv_20_piv2_0 | 111 -val2_avg_piv_20_piv2_100 | 111 -val2_avg_piv_20_piv2_200 | 111 -val2_avg_piv_20_piv2_300 | 111 -val2_avg_piv_30_piv2_0 | 111 -val2_avg_piv_30_piv2_100 | 111 -val2_avg_piv_30_piv2_200 | 111 -val2_avg_piv_30_piv2_300 | 111 -val2_sum_piv_null_piv2_0 | 111 -val2_sum_piv_null_piv2_100 | 111 -val2_sum_piv_null_piv2_200 | 111 -val2_sum_piv_null_piv2_300 | 111 -val2_sum_piv_10_piv2_0 | 11 -val2_sum_piv_10_piv2_100 | 111 -val2_sum_piv_10_piv2_200 | 111 -val2_sum_piv_10_piv2_300 | 111 -val2_sum_piv_20_piv2_0 | 111 -val2_sum_piv_20_piv2_100 | 111 -val2_sum_piv_20_piv2_200 | 111 -val2_sum_piv_20_piv2_300 | 111 -val2_sum_piv_30_piv2_0 | 111 -val2_sum_piv_30_piv2_100 | 111 -val2_sum_piv_30_piv2_200 | 111 -val2_sum_piv_30_piv2_300 | 111 +... -[ RECORD 2 ]--------------+----- id | 0 id2 | 1 @@ -541,34 +573,6 @@ val2_avg_piv_null_piv2_0 | 111 val2_avg_piv_null_piv2_100 | 111 val2_avg_piv_null_piv2_200 | 111 val2_avg_piv_null_piv2_300 | 111 -val2_avg_piv_10_piv2_0 | 111 -val2_avg_piv_10_piv2_100 | 12 -val2_avg_piv_10_piv2_200 | 111 -val2_avg_piv_10_piv2_300 | 111 -val2_avg_piv_20_piv2_0 | 111 -val2_avg_piv_20_piv2_100 | 13 -val2_avg_piv_20_piv2_200 | 111 -val2_avg_piv_20_piv2_300 | 111 -val2_avg_piv_30_piv2_0 | 111 -val2_avg_piv_30_piv2_100 | 111 -val2_avg_piv_30_piv2_200 | 111 -val2_avg_piv_30_piv2_300 | 111 -val2_sum_piv_null_piv2_0 | 111 -val2_sum_piv_null_piv2_100 | 111 -val2_sum_piv_null_piv2_200 | 111 -val2_sum_piv_null_piv2_300 | 111 -val2_sum_piv_10_piv2_0 | 111 -val2_sum_piv_10_piv2_100 | 12 -val2_sum_piv_10_piv2_200 | 111 -val2_sum_piv_10_piv2_300 | 111 -val2_sum_piv_20_piv2_0 | 111 -val2_sum_piv_20_piv2_100 | 13 -val2_sum_piv_20_piv2_200 | 111 -val2_sum_piv_20_piv2_300 | 111 -val2_sum_piv_30_piv2_0 | 111 -val2_sum_piv_30_piv2_100 | 111 -val2_sum_piv_30_piv2_200 | 111 -val2_sum_piv_30_piv2_300 | 111 ... </pre> @@ -577,74 +581,49 @@ val2_sum_piv_30_piv2_300 | 111 DROP TABLE IF EXISTS pivout, pivout_dictionary; SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2', 'val=avg, val2=[avg,sum]', '111', True, True); -SELECT * FROM pivout_dictionary; +\\x off +SELECT * FROM pivout_dictionary order by __pivot_cid__; </pre> <pre class="result"> - __pivot_cid__ | pval | agg | piv | piv2 | col_name ----------------+------+-----+-----+------+------------------------------ - __p_1__ | val | avg | | 100 | "val_avg_piv_null_piv2_100" - __p_5__ | val | avg | 10 | 100 | "val_avg_piv_10_piv2_100" - __p_9__ | val | avg | 20 | 100 | "val_avg_piv_20_piv2_100" - __p_12__ | val | avg | 30 | 0 | "val_avg_piv_30_piv2_0" - __p_16__ | val2 | avg | | 0 | "val2_avg_piv_null_piv2_0" - __p_23__ | val2 | avg | 10 | 300 | "val2_avg_piv_10_piv2_300" - __p_27__ | val2 | avg | 20 | 300 | "val2_avg_piv_20_piv2_300" - __p_30__ | val2 | avg | 30 | 200 | "val2_avg_piv_30_piv2_200" - __p_34__ | val2 | sum | | 200 | "val2_sum_piv_null_piv2_200" - __p_38__ | val2 | sum | 10 | 200 | "val2_sum_piv_10_piv2_200" - __p_41__ | val2 | sum | 20 | 100 | "val2_sum_piv_20_piv2_100" - __p_45__ | val2 | sum | 30 | 100 | "val2_sum_piv_30_piv2_100" - __p_2__ | val | avg | | 200 | "val_avg_piv_null_piv2_200" - __p_6__ | val | avg | 10 | 200 | "val_avg_piv_10_piv2_200" - __p_11__ | val | avg | 20 | 300 | "val_avg_piv_20_piv2_300" - __p_15__ | val | avg | 30 | 300 | "val_avg_piv_30_piv2_300" - __p_19__ | val2 | avg | | 300 | "val2_avg_piv_null_piv2_300" - __p_20__ | val2 | avg | 10 | 0 | "val2_avg_piv_10_piv2_0" - __p_24__ | val2 | avg | 20 | 0 | "val2_avg_piv_20_piv2_0" - __p_28__ | val2 | avg | 30 | 0 | "val2_avg_piv_30_piv2_0" - __p_33__ | val2 | sum | | 100 | "val2_sum_piv_null_piv2_100" - __p_37__ | val2 | sum | 10 | 100 | "val2_sum_piv_10_piv2_100" - __p_42__ | val2 | sum | 20 | 200 | "val2_sum_piv_20_piv2_200" - __p_46__ | val2 | sum | 30 | 200 | "val2_sum_piv_30_piv2_200" - __p_3__ | val | avg | | 300 | "val_avg_piv_null_piv2_300" - __p_7__ | val | avg | 10 | 300 | "val_avg_piv_10_piv2_300" - __p_10__ | val | avg | 20 | 200 | "val_avg_piv_20_piv2_200" - __p_14__ | val | avg | 30 | 200 | "val_avg_piv_30_piv2_200" - __p_18__ | val2 | avg | | 200 | "val2_avg_piv_null_piv2_200" - __p_21__ | val2 | avg | 10 | 100 | "val2_avg_piv_10_piv2_100" - __p_25__ | val2 | avg | 20 | 100 | "val2_avg_piv_20_piv2_100" - __p_29__ | val2 | avg | 30 | 100 | "val2_avg_piv_30_piv2_100" - __p_32__ | val2 | sum | | 0 | "val2_sum_piv_null_piv2_0" - __p_36__ | val2 | sum | 10 | 0 | "val2_sum_piv_10_piv2_0" - __p_43__ | val2 | sum | 20 | 300 | "val2_sum_piv_20_piv2_300" - __p_47__ | val2 | sum | 30 | 300 | "val2_sum_piv_30_piv2_300" - __p_0__ | val | avg | | 0 | "val_avg_piv_null_piv2_0" - __p_4__ | val | avg | 10 | 0 | "val_avg_piv_10_piv2_0" - __p_8__ | val | avg | 20 | 0 | "val_avg_piv_20_piv2_0" - __p_13__ | val | avg | 30 | 100 | "val_avg_piv_30_piv2_100" - __p_17__ | val2 | avg | | 100 | "val2_avg_piv_null_piv2_100" - __p_22__ | val2 | avg | 10 | 200 | "val2_avg_piv_10_piv2_200" - __p_26__ | val2 | avg | 20 | 200 | "val2_avg_piv_20_piv2_200" - __p_31__ | val2 | avg | 30 | 300 | "val2_avg_piv_30_piv2_300" - __p_35__ | val2 | sum | | 300 | "val2_sum_piv_null_piv2_300" - __p_39__ | val2 | sum | 10 | 300 | "val2_sum_piv_10_piv2_300" - __p_40__ | val2 | sum | 20 | 0 | "val2_sum_piv_20_piv2_0" - __p_44__ | val2 | sum | 30 | 0 | "val2_sum_piv_30_piv2_0" +__pivot_cid__ | pval | agg | piv | piv2 | col_name +---------------+------+-----+--------+------+------------------------------ + __p_1__ | val | avg | [NULL] | 0 | "val_avg_piv_null_piv2_0" + __p_2__ | val | avg | [NULL] | 100 | "val_avg_piv_null_piv2_100" + __p_3__ | val | avg | [NULL] | 200 | "val_avg_piv_null_piv2_200" + __p_4__ | val | avg | [NULL] | 300 | "val_avg_piv_null_piv2_300" + __p_5__ | val | avg | 10 | 0 | "val_avg_piv_10_piv2_0" + __p_6__ | val | avg | 10 | 100 | "val_avg_piv_10_piv2_100" + __p_7__ | val | avg | 10 | 200 | "val_avg_piv_10_piv2_200" + __p_8__ | val | avg | 10 | 300 | "val_avg_piv_10_piv2_300" + __p_9__ | val | avg | 20 | 0 | "val_avg_piv_20_piv2_0" + __p_10__ | val | avg | 20 | 100 | "val_avg_piv_20_piv2_100" + __p_11__ | val | avg | 20 | 200 | "val_avg_piv_20_piv2_200" + __p_12__ | val | avg | 20 | 300 | "val_avg_piv_20_piv2_300" + __p_13__ | val | avg | 30 | 0 | "val_avg_piv_30_piv2_0" + __p_14__ | val | avg | 30 | 100 | "val_avg_piv_30_piv2_100" + __p_15__ | val | avg | 30 | 200 | "val_avg_piv_30_piv2_200" + __p_16__ | val | avg | 30 | 300 | "val_avg_piv_30_piv2_300" + __p_17__ | val2 | avg | [NULL] | 0 | "val2_avg_piv_null_piv2_0" + __p_18__ | val2 | avg | [NULL] | 100 | "val2_avg_piv_null_piv2_100" + __p_19__ | val2 | avg | [NULL] | 200 | "val2_avg_piv_null_piv2_200" + __p_20__ | val2 | avg | [NULL] | 300 | "val2_avg_piv_null_piv2_300" + __p_21__ | val2 | avg | 10 | 0 | "val2_avg_piv_10_piv2_0" +... (48 rows) </pre> <pre class="example"> +\\x on SELECT * FROM pivout ORDER BY id,id2; </pre> <pre class="result"> --[ RECORD 1 ]-- +-[ RECORD 1 ]---- id | 0 id2 | 0 -__p_0__ | 111 __p_1__ | 111 __p_2__ | 111 __p_3__ | 111 -__p_4__ | 1 -__p_5__ | 111 +__p_4__ | 111 +__p_5__ | 1 __p_6__ | 111 __p_7__ | 111 __p_8__ | 111 @@ -653,91 +632,40 @@ __p_10__ | 111 __p_11__ | 111 __p_12__ | 111 __p_13__ | 111 -__p_14__ | 111 -__p_15__ | 111 -__p_16__ | 111 -__p_17__ | 111 -__p_18__ | 111 -__p_19__ | 111 -__p_20__ | 11 -__p_21__ | 111 -__p_22__ | 111 -__p_23__ | 111 -__p_24__ | 111 -__p_25__ | 111 -__p_26__ | 111 -__p_27__ | 111 -__p_28__ | 111 -__p_29__ | 111 -__p_30__ | 111 -__p_31__ | 111 -__p_32__ | 111 -__p_33__ | 111 -__p_34__ | 111 -__p_35__ | 111 -__p_36__ | 11 -__p_37__ | 111 -__p_38__ | 111 -__p_39__ | 111 -__p_40__ | 111 -__p_41__ | 111 -__p_42__ | 111 -__p_43__ | 111 -__p_44__ | 111 -__p_45__ | 111 -__p_46__ | 111 -__p_47__ | 111 --[ RECORD 2 ]-- +... +-[ RECORD 2 ]---- id | 0 id2 | 1 -__p_0__ | 111 __p_1__ | 111 __p_2__ | 111 __p_3__ | 111 __p_4__ | 111 -__p_5__ | 2 +__p_5__ | 111 +__p_6__ | 2 +__p_7__ | 111 +__p_8__ | 111 +__p_9__ | 111 +__p_10__ | 3 +__p_11__ | 111 +__p_12__ | 111 +__p_13__ | 111 +... +-[ RECORD 3 ]---- +id | 1 +id2 | 0 +__p_1__ | 111 +__p_2__ | 111 +__p_3__ | 111 +__p_4__ | 111 +__p_5__ | 111 __p_6__ | 111 __p_7__ | 111 __p_8__ | 111 -__p_9__ | 3 +__p_9__ | 111 __p_10__ | 111 __p_11__ | 111 __p_12__ | 111 __p_13__ | 111 -__p_14__ | 111 -__p_15__ | 111 -__p_16__ | 111 -__p_17__ | 111 -__p_18__ | 111 -__p_19__ | 111 -__p_20__ | 111 -__p_21__ | 12 -__p_22__ | 111 -__p_23__ | 111 -__p_24__ | 111 -__p_25__ | 13 -__p_26__ | 111 -__p_27__ | 111 -__p_28__ | 111 -__p_29__ | 111 -__p_30__ | 111 -__p_31__ | 111 -__p_32__ | 111 -__p_33__ | 111 -__p_34__ | 111 -__p_35__ | 111 -__p_36__ | 111 -__p_37__ | 12 -__p_38__ | 111 -__p_39__ | 111 -__p_40__ | 111 -__p_41__ | 13 -__p_42__ | 111 -__p_43__ | 111 -__p_44__ | 111 -__p_45__ | 111 -__p_46__ | 111 -__p_47__ | 111 ... </pre> @@ -786,7 +714,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot( aggregate_func TEXT, fill_value TEXT, keep_null BOOLEAN, - output_col_dictionary BOOLEAN + output_col_dictionary BOOLEAN, + output_type TEXT ) RETURNS VOID AS $$ PythonFunction(utilities, pivot, pivot) @@ -794,6 +723,22 @@ $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `'); CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot( + source_table TEXT, + out_table TEXT, + index TEXT, + pivot_cols TEXT, + pivot_values TEXT, + aggregate_func TEXT, + fill_value TEXT, + keep_null BOOLEAN, + output_col_dictionary BOOLEAN + +) RETURNS VOID AS $$ + SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL) +$$ LANGUAGE sql VOLATILE +m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); + +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot( source_table TEXT, out_table TEXT, index TEXT, http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/test/pivot.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/test/pivot.sql_in b/src/ports/postgres/modules/utilities/test/pivot.sql_in index 79bcc57..3dafd18 100644 --- a/src/ports/postgres/modules/utilities/test/pivot.sql_in +++ b/src/ports/postgres/modules/utilities/test/pivot.sql_in @@ -28,16 +28,16 @@ CREATE TABLE pivset( ); INSERT INTO pivset VALUES - (0, 10, 1), - (0, 10, 2), - (0, 20, 3), - (1, 20, 4), - (1, 30, 5), - (1, 30, 6), - (1, 10, 7), - (NULL, 10, 8), - (0, NULL, 9), - (0, 10, NULL); + (0, 10, 1), + (0, 10, 2), + (0, 20, 3), + (1, 20, 4), + (1, 30, 5), + (1, 30, 6), + (1, 10, 7), + (NULL, 10, 8), + (0, NULL, 9), + (0, 10, NULL); DROP TABLE IF EXISTS pivout; SELECT pivot('pivset', 'pivout', 'id', 'piv', 'val'); @@ -47,12 +47,12 @@ SELECT assert(val_avg_piv_20 = 3, 'Wrong output in pivoting') FROM pivout WHERE DROP VIEW IF EXISTS pivset_ext; CREATE VIEW pivset_ext AS - SELECT *, + SELECT *, COALESCE(id + (pivset.val / 3), 0) AS id2, COALESCE(piv + (pivset.val / 3), 0) AS piv2, COALESCE(val + 10, 0) AS val2 FROM pivset; -SELECT id,id2,piv,piv2,val,val2 FROM pivset_ext +SELECT id,id2,piv,piv2,val,val2 FROM pivset_ext ORDER BY id,id2,piv,piv2,val,val2; DROP TABLE IF EXISTS pivout; @@ -60,87 +60,87 @@ SELECT pivot('pivset_ext', 'pivout', 'id,id2', 'piv', 'val'); SELECT * FROM pivout; SELECT assert(val_avg_piv_10 = 1.5, - 'Wrong output in pivoting: index columns') FROM pivout - WHERE id = 0 AND id2 = 0; + 'Wrong output in pivoting: index columns') FROM pivout + WHERE id = 0 AND id2 = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val'); SELECT * FROM pivout; SELECT assert(val_avg_piv_10_piv2_10 = 1.5, - 'Wrong output in pivoting: pivot columns') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: pivot columns') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2'); SELECT * FROM pivout; SELECT assert(val2_avg_piv_20 = 13, - 'Wrong output in pivoting: value columns') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: value columns') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum'); SELECT * FROM pivout; SELECT assert(val_sum_piv_10 = 3, - 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', True); SELECT * FROM pivout; SELECT assert(val_sum_piv_null = 9, - 'Wrong output in pivoting: keep null') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: keep null') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', '111'); SELECT * FROM pivout; SELECT assert(val_sum_piv_30 = 111, - 'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', '111', True); SELECT * FROM pivout; SELECT assert(val_sum_piv_30 = 111 AND val_sum_piv_null = 9, - 'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'avg, sum'); SELECT * FROM pivout; SELECT assert(val_avg_piv_10 = 1.5 AND val_sum_piv_10 = 3, - 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val', 'avg, sum'); SELECT * FROM pivout; SELECT assert(val_avg_piv_10_piv2_10 = 1.5 AND val_sum_piv_10_piv2_10 = 3, - 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2', 'avg, sum'); SELECT * FROM pivout; SELECT assert(val_sum_piv_10 = 3 AND val2_avg_piv_20 = 13, - 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2', - 'val=avg, val2=sum'); + 'val=avg, val2=sum'); SELECT * FROM pivout; SELECT assert(val_avg_piv_10 = 1.5 AND val2_sum_piv_10 = 23, - 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2', - 'val=avg, val2=[avg,sum]'); + 'val=avg, val2=[avg,sum]'); SELECT * FROM pivout; SELECT assert(val2_avg_piv_20 = 13 AND val2_sum_piv_10 = 23, - 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; + 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0; DROP TABLE IF EXISTS pivout; DROP TABLE IF EXISTS pivout_dictionary; @@ -148,9 +148,9 @@ SELECT pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2', 'val=avg, val2=[avg,sum]', '111', True, True); SELECT * FROM pivout; -SELECT assert(__p_7__ = 1.5, - 'Wrong output in pivoting: Output dictionary') FROM pivout - WHERE id = 0 AND id2 = 0; +SELECT assert(__p_8__ = 1.5, + 'Wrong output in pivoting: Output dictionary') FROM pivout + WHERE id = 0 AND id2 = 0; DROP FUNCTION IF EXISTS array_add1(ANYARRAY, ANYELEMENT); DROP AGGREGATE IF EXISTS array_accum1 (anyelement); @@ -167,4 +167,18 @@ DROP TABLE IF EXISTS pivout; SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'array_accum1'); SELECT * FROM pivout; +DROP TABLE IF EXISTS pivout; +DROP TABLE IF EXISTS pivout_dictionary; +SELECT pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2', + 'val=avg, val2=[avg,sum]', '111', True, True, 'a'); +SELECT * FROM pivout; +SELECT * FROM pivout_dictionary; + +DROP TABLE IF EXISTS pivout; +DROP TABLE IF EXISTS pivout_dictionary; +SELECT pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2', + 'val=avg, val2=[avg,sum]', '111', True, True, 's'); +SELECT * FROM pivout; +SELECT * FROM pivout_dictionary; + DROP VIEW IF EXISTS pivset_ext; http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/utilities.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in index 0e47aea..126f4e6 100644 --- a/src/ports/postgres/modules/utilities/utilities.py_in +++ b/src/ports/postgres/modules/utilities/utilities.py_in @@ -1,4 +1,5 @@ +import collections import re import time import random @@ -583,16 +584,16 @@ def extract_keyvalue_params(input_params, else: continue try: - if param_type in (int, str, float): - parameter_dict[param_name] = param_type(param_value) - elif param_type == list: - parameter_dict[param_name] = split_quoted_delimited_str( - param_value.strip('[](){} ')) - elif param_type == bool: + if param_type == bool: # bool is not subclassable # True values are y, yes, t, true, on and 1; # False values are n, no, f, false, off and 0. # Raises ValueError if anything else. parameter_dict[param_name] = bool(strtobool(param_value)) + elif param_type in (int, str, float): + parameter_dict[param_name] = param_type(param_value) + elif issubclass(param_type, collections.Iterable): + parameter_dict[param_name] = split_quoted_delimited_str( + param_value.strip('[](){} ')) else: raise TypeError("Invalid input: {0} has unsupported type " "{1}".format(param_name, usage_str)) http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/validate_args.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in index 91f34b8..5832124 100644 --- a/src/ports/postgres/modules/utilities/validate_args.py_in +++ b/src/ports/postgres/modules/utilities/validate_args.py_in @@ -345,8 +345,9 @@ def get_cols_and_types(tbl): def get_expr_type(expr, tbl): - """ Temporary function to obtain the type of an expression + """ Return the type of an expression run on a given table + Note: this Args: @param expr @@ -356,7 +357,6 @@ def get_expr_type(expr, tbl): expr_type = plpy.execute(""" SELECT pg_typeof({0}) AS type FROM {1} - WHERE ({0}) IS NOT NULL LIMIT 1 """.format(expr, tbl))[0]['type'] return expr_type.upper()