[10/34] incubator-madlib git commit: Pivot: Add support for array output

okislal Fri, 16 Jun 2017 13:58:36 -0700

Pivot: Add support for array output

JIRA: MADLIB-1066


When total pivoted columns exceed the Postgresql limit (250 - 1600
depending on the type of columns), an array output becomes
essential. This commit adds support to get each pivoted set of columns
(all columns related to a particular value-aggregate combination) as an
array. There is also support for getting the output as madlib.svec.

Closes #108


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/bb209bbb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/bb209bbb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/bb209bbb

Branch: refs/heads/latest_release
Commit: bb209bbb6e081a2838a3f698947529358792e47f
Parents: 6b466ea
Author: Rahul Iyer <ri...@apache.org>
Authored: Fri Mar 31 11:15:58 2017 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Fri Mar 31 11:15:58 2017 -0700

----------------------------------------------------------------------
 .../postgres/modules/utilities/pivot.py_in      | 238 +++++++++-----
 .../postgres/modules/utilities/pivot.sql_in     | 327 ++++++++-----------
 .../modules/utilities/test/pivot.sql_in         |  74 +++--
 .../postgres/modules/utilities/utilities.py_in  |  13 +-
 .../modules/utilities/validate_args.py_in       |   4 +-
 5 files changed, 354 insertions(+), 302 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/pivot.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/pivot.py_in 
b/src/ports/postgres/modules/utilities/pivot.py_in
index 7e342f1..6d0ebae 100644
--- a/src/ports/postgres/modules/utilities/pivot.py_in
+++ b/src/ports/postgres/modules/utilities/pivot.py_in
@@ -42,6 +42,7 @@ from validate_args import columns_exist_in_table
 from validate_args import table_is_empty
 from validate_args import _get_table_schema_names
 from validate_args import get_first_schema
+from validate_args import get_expr_type
 
 
 m4_changequote(`<!', `!>')
@@ -49,14 +50,14 @@ m4_changequote(`<!', `!>')
 
 def pivot(schema_madlib, source_table, out_table, index, pivot_cols,
           pivot_values, aggregate_func=None, fill_value=None, keep_null=False,
-          output_col_dictionary=False, **kwargs):
+          output_col_dictionary=False, output_type=None, **kwargs):
     """
     Helper function that can be used to pivot tables
     Args:
         @param source_table     The original data table
         @param out_table        The output table that contains the dummy
                                 variable columns
-        @param index            The index columns to group by the records by
+        @param index            The index columns to group the records by
         @param pivot_cols       The columns to pivot the table
         @param pivot_values     The value columns to be summarized in the
                                 pivoted table
@@ -80,6 +81,16 @@ def pivot(schema_madlib, source_table, out_table, index, 
pivot_cols,
         FROM pivset GROUP BY id ORDER BY id)
     """
 
+    def _fill_value_wrapper(sel_str):
+        """ Wrap a given SQL SELECT statement with COALESCE using a given fill 
value.
+
+            No-op if the fill value is not provided
+        """
+        if fill_value is not None:
+            return " COALESCE({0}, {1}) ".format(sel_str, fill_value)
+        else:
+            return sel_str
+
     with MinWarning('warning'):
 
         # If there are more than 1000 columns for the output table, we give a
@@ -93,6 +104,25 @@ def pivot(schema_madlib, source_table, out_table, index, 
pivot_cols,
         indices = split_quoted_delimited_str(index)
         pcols = split_quoted_delimited_str(pivot_cols)
         pvals = split_quoted_delimited_str(pivot_values)
+
+        # output type for specific supported types
+        output_type = 'column' if not output_type else output_type.lower()
+        all_output_types = sorted(['array', 'column', 'svec'])
+        try:
+            # allow user to specify a prefix substring of
+            # supported output types. This works because the supported
+            # output types have unique prefixes.
+            output_type = next(s for s in all_output_types
+                               if s.startswith(output_type))
+        except StopIteration:
+            # next() returns a StopIteration if no element found
+            plpy.error("Encoding categorical: Output type should be one of 
{0}".
+                       format(','.join(all_output_types)))
+
+        is_array_output = output_type in ('array', 'svec')
+        # always build dictionary table if output is array
+        output_col_dictionary = True if is_array_output else 
output_col_dictionary
+
         validate_pivot_coding(source_table, out_table, indices, pcols, pvals)
 
         # Strip the end quotes for building output columns (this can only be
@@ -104,6 +134,8 @@ def pivot(schema_madlib, source_table, out_table, index, 
pivot_cols,
         # value column.
         agg_dict = parse_aggregates(pvals, aggregate_func)
 
+        validate_output_types(source_table, agg_dict, is_array_output)
+
         # Find the distinct values of pivot_cols
         array_agg_str = ', '.join("array_agg(DISTINCT {pcol}) AS 
{pcol}_values".
                                   format(pcol=pcol) for pcol in pcols)
@@ -143,7 +175,7 @@ def pivot(schema_madlib, source_table, out_table, index, 
pivot_cols,
                                               for pcol in pcols])))
 
         # Check the max possible length of a output column name
-        # If it is over 63 (psql upper limit) create table lookup
+        # If it is over 63 (postgresql upper limit) create dictionary lookup
         for pval in pvals:
             agg_func = agg_dict[pval]
             # Length calculation: value column length + aggregate length +
@@ -159,122 +191,157 @@ def pivot(schema_madlib, source_table, out_table, 
index, pivot_cols,
                                  format(**locals()))
                 output_col_dictionary = True
 
-        # Create the output dictionary if needed
+        # Types of pivot columns are needed for building the right columns
+        # in the dictionary table and to decide if a pivot column value needs 
to
+        # be quoted during comparison (will be quoted if it's a text column)
+        types_str = ', '.join("pg_typeof(\"{pcol}\") as {pcol}".
+                              format(pcol=p) for p in pcols)
+        pcol_types = plpy.execute("SELECT {0} FROM {1} LIMIT 1".
+                                  format(types_str, source_table))[0]
         if output_col_dictionary:
             out_dict = out_table + "_dictionary"
             _assert(not table_exists(out_dict),
                     "Pivot: Output dictionary table already exists!")
-
-            # Collect the types for pivot columns
-            types_str = ','.join("pg_typeof(\"{pcol}\") as {pcol}_type".
-                                 format(pcol=pcol) for pcol in pcols)
-            pcol_types = plpy.execute("SELECT {0} FROM {1} LIMIT 1".
-                                      format(types_str, source_table))
-
             # Create the empty dictionary table
-            dict_str = ', '.join(" {pcol} {pcol_type} ".
-                                 format(pcol=pcol, 
pcol_type=pcol_types[0][pcol+"_type"])
-                                 for pcol in pcols)
+            pcol_names_types = ', '.join(" {pcol} {pcol_type} ".
+                                         format(pcol=pcol,
+                                                pcol_type=pcol_types[pcol])
+                                         for pcol in pcols)
             plpy.execute("""
                 CREATE TABLE {out_dict} (
-                    __pivot_cid__ VARCHAR, pval VARCHAR,
-                    agg VARCHAR, {dict_str}, col_name VARCHAR)
-                """.format(**locals()))
-
-            # The holder for rows to insert into output dictionary
-            insert_str = []
+                    __pivot_cid__ VARCHAR,
+                    pval VARCHAR,
+                    agg VARCHAR,
+                    {pcol_names_types},
+                    col_name VARCHAR)
+                """.format(out_dict=out_dict, 
pcol_names_types=pcol_names_types))
+
+            # List of rows to insert into output dictionary
+            dict_insert_str = []
             # Counter for the new output column names
-            dict_counter = 0
+            dict_counter = 1
 
-        pivot_str_sel_list = []
-        pivot_str_from_list = []
-        # Prepare the wrapper for fill value
-        if fill_value is not None:
-            fill_str_begin = " COALESCE("
-            fill_str_end = ", " + fill_value + " ) "
-        else:
-            fill_str_begin, fill_str_end = "", ""
+        pivot_sel_list = []
+        pivot_from_list = []
 
         for pval in pvals:
             agg_func = agg_dict[pval]
             for agg in agg_func:
+
+                # is using array_output, create a new array for each pval-agg 
combo
+                if is_array_output:
+                    # we store information in the dictionary table for each
+                    # index in the array. 'index_counter' is the current index
+                    # being updated (resets for each new array)
+                    index_counter = 1
+
+                sub_pivot_sel_list = []
                 for comb in pivot_comb:
                     pivot_col_condition = []
-                    pivot_col_name = ["\"{pval}_{agg}".format(pval=pval, 
agg=agg)]
+                    # note column name starts with double quotes
+                    pivot_col_name = ['{pval}_{agg}'.format(pval=pval, 
agg=agg)]
 
                     if output_col_dictionary:
                         # Prepare the entry for the dictionary
-                        insert_str.append("(\'__p_{dict_counter}__\', 
\'{pval}\', "
-                                          "\'{agg}\' 
".format(dict_counter=dict_counter,
-                                                              pval=pval, 
agg=agg))
+                        if not is_array_output:
+                            index_name = ("__p_{dict_counter}__".
+                                          format(dict_counter=dict_counter))
+                        else:
+                            # for arrays, index_name is just the index into 
each array
+                            index_name = str(index_counter)
+                            index_counter += 1
+                        dict_insert_str.append(
+                            "(\'{index_name}\', \'{pval}\', \'{agg}\' ".
+                            format(index_name=index_name, pval=pval, agg=agg))
 
                     # For every pivot column in a given combination
                     for counter, pcol in enumerate(pcols):
+                        if comb[counter] is None:
+                            quoted_pcol_value = "NULL"
+                        elif pcol_types[pcol] in ("text", "varchar", 
"character varying"):
+                            quoted_pcol_value = "'" + comb[counter] + "'"
+                        else:
+                            quoted_pcol_value = comb[counter]
+
                         # If we encounter a NULL value that means it is not 
filtered
                         # because of keep_null. Use "IS NULL" for comparison
                         if comb[counter] is None:
                             pivot_col_condition.append(" \"{0}\" IS 
NULL".format(pcol))
                             pivot_col_name.append("_{0}_null".format(pcol))
                         else:
-                            pivot_col_condition.append(" \"{0}\" = '{1}'".
-                                                       format(pcol, 
comb[counter]))
+                            pivot_col_condition.append(" \"{0}\" = {1}".
+                                                       format(pcol, 
quoted_pcol_value))
                             pivot_col_name.append("_{0}_{1}".format(pcol, 
comb[counter]))
 
-                        # Collect pcol values for the dict
                         if output_col_dictionary:
-                            insert_str.append("{0}".format(
-                                comb[counter] if comb[counter] is not None 
else "NULL"))
-                    pivot_col_name.append("\"")
+                            
dict_insert_str.append("{0}".format(quoted_pcol_value))
 
                     if output_col_dictionary:
-                        # Store the whole string in case some user wants it
-                        insert_str.append("\'{column_name}\')".
-                                          
format(column_name=''.join(pivot_col_name)))
-                        pivot_col_name = ["__p_"+str(dict_counter)+"__"]
+                        # Store the whole string as additional info
+                        
dict_insert_str.append("'{0}')".format(''.join(pivot_col_name)))
+                        pivot_col_name = ["__p_" + str(dict_counter) + "__"]
                         dict_counter += 1
+
                     # Collecting the whole sql query
                     # Please refer to the earlier comment for a sample output
-
                     # Build the pivot column with NULL values in tuples that 
don't
                     # satisfy that column's condition
-                    pivot_str_from = ("(CASE WHEN {condition} THEN {pval} END) 
"
-                                      "AS {pivot_col_name}".
-                                      format(pval=pval,
-                                             condition=' AND 
'.join(pivot_col_condition),
-                                             
pivot_col_name=''.join(pivot_col_name)))
-                    pivot_str_from_list.append(pivot_str_from)
-                    # Aggregate over each pivot column, while filtering all 
NULL values
-                    # created by previous query.
-                    pivot_str_sel = ("{fill_str_begin}"
-                                     "  {agg} ({pivot_col_name}) "
-                                     "    FILTER (WHERE {pivot_col_name} IS 
NOT NULL) "
-                                     "{fill_str_end} AS {pivot_col_name}".
-                                     format(agg=agg, 
fill_str_begin=fill_str_begin,
-                                            fill_str_end=fill_str_end,
-                                            
pivot_col_name=''.join(pivot_col_name)))
-                    pivot_str_sel_list.append(pivot_str_sel)
+                    p_name = '"{0}"'.format(''.join(pivot_col_name))
+                    pivot_str_from = (
+                        "(CASE WHEN {condition} THEN {pval} END) AS {p_name}".
+                        format(pval=pval,
+                               condition=' AND '.join(pivot_col_condition),
+                               p_name=p_name))
+                    pivot_from_list.append(pivot_str_from)
+
+                    # Aggregate over each pivot column, while filtering all 
NULL
+                    #  values created by previous query.
+                    sub_pivot_str_sel = _fill_value_wrapper(
+                        "{agg}({p_name}) "
+                        "   FILTER (WHERE {p_name} IS NOT NULL)".
+                        format(agg=agg, p_name=p_name))
+                    if not is_array_output:
+                        # keep spaces around the 'AS'
+                        sub_pivot_str_sel += " AS " + p_name
+                    sub_pivot_sel_list.append(sub_pivot_str_sel)
+
+                if sub_pivot_sel_list:
+                    if is_array_output:
+                        if output_type is 'svec':
+                            cast_str = 
'::FLOAT8[]::{0}.svec'.format(schema_madlib)
+                        else:
+                            cast_str = '::FLOAT8[]'
+                        pivot_sel_list.append(
+                            'ARRAY[{all_pivot_sel}]{cast_str} AS 
"{pval}_{agg}"'.
+                            format(all_pivot_sel=', '.join(sub_pivot_sel_list),
+                                   cast_str=cast_str,
+                                   pval=pval,
+                                   agg=agg))
+                    else:
+                        pivot_sel_list += sub_pivot_sel_list
 
         try:
             plpy.execute("""
                 CREATE TABLE {out_table} AS
                     SELECT {index},
-                           {pivot_str_sel_list}
+                           {all_pivot_sel_str}
                     FROM (
                             SELECT {index},
-                                   {pivot_str_from_list}
+                                   {all_pivot_from_str}
                             FROM {source_table}
                         ) x
                     GROUP BY {index}
                 """.format(out_table=out_table,
                            index=index,
                            source_table=source_table,
-                           pivot_str_from_list=', '.join(pivot_str_from_list),
-                           pivot_str_sel_list=', '.join(pivot_str_sel_list)))
+                           all_pivot_from_str=', '.join(pivot_from_list),
+                           all_pivot_sel_str=', '.join(pivot_sel_list)
+                           ))
 
             if output_col_dictionary:
                 plpy.execute("INSERT INTO {out_dict} VALUES {insert_sql}".
                              format(out_dict=out_dict,
-                                    insert_sql=', '.join(insert_str)))
+                                    insert_sql=', '.join(dict_insert_str)))
         except plpy.SPIError:
             # Warn user if the number of columns is over the limit
             with MinWarning("warning"):
@@ -314,16 +381,16 @@ def parse_aggregates(pvals, aggregate_func):
     5) A partial mapping (eg. 'val2=sum'): Use the default ('avg') for the
        missing value columns
     """
-    param_types = dict.fromkeys(pvals, list)
+    param_types = dict.fromkeys(pvals, tuple)
     agg_dict = extract_keyvalue_params(aggregate_func, param_types)
 
     if not agg_dict:
-        agg_list = split_quoted_delimited_str(aggregate_func)
-        agg_dict = dict.fromkeys(pvals, (agg_list if agg_list else ['avg']))
+        agg_list = tuple(split_quoted_delimited_str(aggregate_func))
+        agg_dict = dict.fromkeys(pvals, (agg_list if agg_list else ('avg', )))
     else:
         for pval in pvals:
             if pval not in agg_dict:
-                agg_dict[pval] = ['avg']
+                agg_dict[pval] = ('avg', )
     return agg_dict
 # 
------------------------------------------------------------------------------
 
@@ -364,6 +431,26 @@ def validate_pivot_coding(source_table, out_table, 
indices, pivs, vals):
 # 
------------------------------------------------------------------------------
 
 
+def validate_output_types(source_table, agg_dict, is_array_output):
+    """
+    Args:
+        @param source_table: str, Name of table containing data
+        @param agg_dict: dict, Key-value pair containing aggregates applied 
for each val column
+        @param is_array_output: bool, Is the pivot output columnar (False) or 
array (True)
+
+    Returns:
+        None
+    """
+    for val, func_iterable in agg_dict.items():
+        for func in func_iterable:
+            func_call_str = '{0}({1})'.format(func, val)
+            _assert(not ('[]' in get_expr_type(func_call_str, source_table) and
+                         is_array_output),
+                    "Pivot: Aggregate {0} with an array return type cannot be "
+                    "combined with output_type='array' or 'svec'".format(func))
+# ----------------------------------------------------------------------
+
+
 def pivot_help(schema_madlib, message, **kwargs):
     """
     Help function for pivot
@@ -401,14 +488,19 @@ For more details on function usage:
                             -- of the output pivot table
     pivot_cols,             -- Comma-separated columns that will form the
                             -- columns of the output pivot table
-    pivot_values            -- Comma-separated columns that contain the values
+    pivot_values,            -- Comma-separated columns that contain the values
                             -- to be summarized in the output pivot table
-    fill_value              -- If specified, determines how to fill NULL values
+    fill_value,              -- If specified, determines how to fill NULL 
values
                             -- resulting from pivot operation
-    keep_null               -- The flag for determining how to handle NULL
+    keep_null,               -- The flag for determining how to handle NULL
                             -- values in pivot columns
-    output_col_dictionary   -- The flag for enabling the creation of the
+    output_col_dictionary,   -- The flag for enabling the creation of the
                             -- output dictionary for shorter column names
+    output_type             -- This parameter controls the output format
+                            -- of the pivoted variables.
+                            -- If 'column', a column is created for each pivot
+                            -- If 'array', an array is created combining all 
pivots
+                            -- If 'svec', the array is cast to madlib.svec
  );
 
 -----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/pivot.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/pivot.sql_in 
b/src/ports/postgres/modules/utilities/pivot.sql_in
index cb2c223..7cdfbe0 100644
--- a/src/ports/postgres/modules/utilities/pivot.sql_in
+++ b/src/ports/postgres/modules/utilities/pivot.sql_in
@@ -59,7 +59,8 @@ pivot(
     aggregate_func,
     fill_value,
     keep_null,
-    output_col_dictionary
+    output_col_dictionary,
+    output_type
     )
 </pre>
 \b Arguments
@@ -67,6 +68,7 @@ pivot(
     <dt>source_table</dt>
     <dd>VARCHAR. Name of the source table (or view) containing data to
     pivot.</dd>
+
     <dt>output_table</dt>
     <dd>VARCHAR. Name of output table that contains the pivoted data.
     The output table contains all the columns present in
@@ -81,18 +83,21 @@ pivot(
     - aggregate function
     - name of the pivot column <em>'pivot_cols'</em>
     - values in the pivot column
-
     </dd>
+
     <dt>index </dt>
     <dd>VARCHAR. Comma-separated columns that will form the index of the output
     pivot table.  By index we mean the values to group by; these are the rows
     in the output pivot table.</dd>
+
     <dt>pivot_cols </dt>
     <dd>VARCHAR. Comma-separated columns that will form the columns of the
     output pivot table.</dd>
+
     <dt>pivot_values </dt>
     <dd>VARCHAR. Comma-separated columns that contain the values to be
     summarized in the output pivot table.</dd>
+
     <dt>aggregate_func (optional)</dt>
     <dd>VARCHAR. default: 'AVG'. A comma-separated list of aggregates to be
     applied to values. These can be PostgreSQL built-in aggregates [1] or 
UDAs. It is
@@ -113,10 +118,12 @@ pivot(
     values resulting from pivot operation. This is a global parameter (not
     applied per aggregate) and is applied post-aggregation to the output
     table.</dd>
+
     <dt>keep_null (optional)</dt>
     <dd>BOOLEAN. default: FALSE. If TRUE, then pivot columns are created
     corresponding to NULL categories. If FALSE, then no pivot columns will be
     created for NULL categories.</dd>
+
     <dt>output_col_dictionary (optional)</dt>
     <dd>BOOLEAN. default: FALSE. This parameter is used to handle
     auto-generated column names that exceed the PostgreSQL limit of 63 bytes
@@ -127,6 +134,19 @@ pivot(
     a dictionary output file will be created and a message given to the user.
     </dd>
 
+    <dt>output_type (optional)</dt>
+    <dd>VARCHAR. default: 'column'.  This parameter controls the output format
+    of the pivoted variables. If 'column', a column is created for each pivot
+    variable. PostgreSQL limits the number of columns in a table
+    (250 - 1600 depending on column types).
+    If the total number of output columns exceeds this limit, then make this
+    parameter either 'array' (to combine the output columns into an array) or
+    'svec' (to cast the array output to <em>'madlib.svec'</em> type).
+
+    A dictionary will be created (<em>output_col_dictionary=TRUE</em>)
+    when 'output_type' is 'array' or 'svec' to define each index into the 
array.
+    </dd>
+
 </dl>
 
 @anchor notes
@@ -138,8 +158,8 @@ allowed so NULLs are ignored.
 - It is not allowed to set the fill_value parameter without setting the
 aggregate_func parameter due to possible ambiguity. Set
 aggregate_func to NULL for the default behavior and use fill_value as desired.
-Please note that full_value must be of the same type as the output of the 
-aggregate_func (or capable of being cast to the same type by PostgreSQL), 
+Please note that full_value must be of the same type as the output of the
+aggregate_func (or capable of being cast to the same type by PostgreSQL),
 or else an error will result.
 - It is not allowed to set the output_col_dictionary parameter without setting
 the keep_null parameter due to possible ambiguity. Set
@@ -303,13 +323,9 @@ SELECT * FROM pivout ORDER BY id,id2;
     |   0 |              8 |                |
 </pre>
 
--# Turn on the extended view for readability:
+-# Use multiple pivot columns with columnar output:
 <pre class="example">
 \\x on
-</pre>
-
--# Use multiple pivot columns:
-<pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val');
 SELECT * FROM pivout ORDER BY id;
@@ -346,10 +362,47 @@ val_avg_piv_30_piv2_300 |
 ...
 </pre>
 
+-# Use multiple pivot columns (same as above) with an array output:
+<pre class="example">
+DROP TABLE IF EXISTS pivout;
+SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val',
+                    NULL, NULL, FALSE, FALSE, 'array');
+\\x off
+SELECT * FROM pivout ORDER BY id;
+</pre>
+<pre class="result">
+   id   |                          val_avg
+--------+------------------------------------------------------------
+      0 | {1,2,NULL,NULL,NULL,3,NULL,NULL,NULL,NULL,NULL,NULL}
+      1 | {NULL,NULL,7,NULL,NULL,4,NULL,NULL,NULL,NULL,5.5,NULL}
+ [NULL] | {NULL,NULL,NULL,8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
+</pre>
+<pre class="example">
+-- Use the dictionary to understand what each index of an array corresponds to
+SELECT * FROM pivout_dictionary;
+</pre>
+<pre class="result">
+ __pivot_cid__ | pval | agg | piv | piv2 |         col_name
+---------------+------+-----+-----+------+---------------------------
+ 1             | val  | avg |  10 |    0 | "val_avg_piv_10_piv2_0"
+ 2             | val  | avg |  10 |  100 | "val_avg_piv_10_piv2_100"
+ 3             | val  | avg |  10 |  200 | "val_avg_piv_10_piv2_200"
+ 4             | val  | avg |  10 |  300 | "val_avg_piv_10_piv2_300"
+ 5             | val  | avg |  20 |    0 | "val_avg_piv_20_piv2_0"
+ 6             | val  | avg |  20 |  100 | "val_avg_piv_20_piv2_100"
+ 7             | val  | avg |  20 |  200 | "val_avg_piv_20_piv2_200"
+ 8             | val  | avg |  20 |  300 | "val_avg_piv_20_piv2_300"
+ 9             | val  | avg |  30 |    0 | "val_avg_piv_30_piv2_0"
+ 10            | val  | avg |  30 |  100 | "val_avg_piv_30_piv2_100"
+ 11            | val  | avg |  30 |  200 | "val_avg_piv_30_piv2_200"
+ 12            | val  | avg |  30 |  300 | "val_avg_piv_30_piv2_300"
+</pre>
+
 -# Use multiple value columns:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2');
+\\x on
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
@@ -377,6 +430,7 @@ val2_avg_piv_30 | 15.5
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'avg, sum');
+\\x on
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
@@ -404,6 +458,7 @@ val_sum_piv_30 | 11
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
     'val=avg, val2=sum');
+\\x on
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
@@ -431,6 +486,7 @@ val2_sum_piv_30 | 31
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
     'val=avg, val2=[avg,sum]');
+\\x on
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
@@ -464,6 +520,7 @@ val2_sum_piv_30 | 31
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, 
val2',
     'val=avg, val2=[avg,sum]', '111', True);
+\\x on
 SELECT * FROM pivout ORDER BY id,id2;
 </pre>
 <pre class="result">
@@ -492,32 +549,7 @@ val2_avg_piv_null_piv2_200 | 111
 val2_avg_piv_null_piv2_300 | 111
 val2_avg_piv_10_piv2_0     | 11
 val2_avg_piv_10_piv2_100   | 111
-val2_avg_piv_10_piv2_200   | 111
-val2_avg_piv_10_piv2_300   | 111
-val2_avg_piv_20_piv2_0     | 111
-val2_avg_piv_20_piv2_100   | 111
-val2_avg_piv_20_piv2_200   | 111
-val2_avg_piv_20_piv2_300   | 111
-val2_avg_piv_30_piv2_0     | 111
-val2_avg_piv_30_piv2_100   | 111
-val2_avg_piv_30_piv2_200   | 111
-val2_avg_piv_30_piv2_300   | 111
-val2_sum_piv_null_piv2_0   | 111
-val2_sum_piv_null_piv2_100 | 111
-val2_sum_piv_null_piv2_200 | 111
-val2_sum_piv_null_piv2_300 | 111
-val2_sum_piv_10_piv2_0     | 11
-val2_sum_piv_10_piv2_100   | 111
-val2_sum_piv_10_piv2_200   | 111
-val2_sum_piv_10_piv2_300   | 111
-val2_sum_piv_20_piv2_0     | 111
-val2_sum_piv_20_piv2_100   | 111
-val2_sum_piv_20_piv2_200   | 111
-val2_sum_piv_20_piv2_300   | 111
-val2_sum_piv_30_piv2_0     | 111
-val2_sum_piv_30_piv2_100   | 111
-val2_sum_piv_30_piv2_200   | 111
-val2_sum_piv_30_piv2_300   | 111
+...
 -[ RECORD 2 ]--------------+-----
 id                         | 0
 id2                        | 1
@@ -541,34 +573,6 @@ val2_avg_piv_null_piv2_0   | 111
 val2_avg_piv_null_piv2_100 | 111
 val2_avg_piv_null_piv2_200 | 111
 val2_avg_piv_null_piv2_300 | 111
-val2_avg_piv_10_piv2_0     | 111
-val2_avg_piv_10_piv2_100   | 12
-val2_avg_piv_10_piv2_200   | 111
-val2_avg_piv_10_piv2_300   | 111
-val2_avg_piv_20_piv2_0     | 111
-val2_avg_piv_20_piv2_100   | 13
-val2_avg_piv_20_piv2_200   | 111
-val2_avg_piv_20_piv2_300   | 111
-val2_avg_piv_30_piv2_0     | 111
-val2_avg_piv_30_piv2_100   | 111
-val2_avg_piv_30_piv2_200   | 111
-val2_avg_piv_30_piv2_300   | 111
-val2_sum_piv_null_piv2_0   | 111
-val2_sum_piv_null_piv2_100 | 111
-val2_sum_piv_null_piv2_200 | 111
-val2_sum_piv_null_piv2_300 | 111
-val2_sum_piv_10_piv2_0     | 111
-val2_sum_piv_10_piv2_100   | 12
-val2_sum_piv_10_piv2_200   | 111
-val2_sum_piv_10_piv2_300   | 111
-val2_sum_piv_20_piv2_0     | 111
-val2_sum_piv_20_piv2_100   | 13
-val2_sum_piv_20_piv2_200   | 111
-val2_sum_piv_20_piv2_300   | 111
-val2_sum_piv_30_piv2_0     | 111
-val2_sum_piv_30_piv2_100   | 111
-val2_sum_piv_30_piv2_200   | 111
-val2_sum_piv_30_piv2_300   | 111
 ...
 </pre>
 
@@ -577,74 +581,49 @@ val2_sum_piv_30_piv2_300   | 111
 DROP TABLE IF EXISTS pivout, pivout_dictionary;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, 
val2',
     'val=avg, val2=[avg,sum]', '111', True, True);
-SELECT * FROM pivout_dictionary;
+\\x off
+SELECT * FROM pivout_dictionary order by __pivot_cid__;
 </pre>
 <pre class="result">
-  __pivot_cid__ | pval | agg | piv | piv2 |           col_name
----------------+------+-----+-----+------+------------------------------
- __p_1__       | val  | avg |     |  100 | "val_avg_piv_null_piv2_100"
- __p_5__       | val  | avg |  10 |  100 | "val_avg_piv_10_piv2_100"
- __p_9__       | val  | avg |  20 |  100 | "val_avg_piv_20_piv2_100"
- __p_12__      | val  | avg |  30 |    0 | "val_avg_piv_30_piv2_0"
- __p_16__      | val2 | avg |     |    0 | "val2_avg_piv_null_piv2_0"
- __p_23__      | val2 | avg |  10 |  300 | "val2_avg_piv_10_piv2_300"
- __p_27__      | val2 | avg |  20 |  300 | "val2_avg_piv_20_piv2_300"
- __p_30__      | val2 | avg |  30 |  200 | "val2_avg_piv_30_piv2_200"
- __p_34__      | val2 | sum |     |  200 | "val2_sum_piv_null_piv2_200"
- __p_38__      | val2 | sum |  10 |  200 | "val2_sum_piv_10_piv2_200"
- __p_41__      | val2 | sum |  20 |  100 | "val2_sum_piv_20_piv2_100"
- __p_45__      | val2 | sum |  30 |  100 | "val2_sum_piv_30_piv2_100"
- __p_2__       | val  | avg |     |  200 | "val_avg_piv_null_piv2_200"
- __p_6__       | val  | avg |  10 |  200 | "val_avg_piv_10_piv2_200"
- __p_11__      | val  | avg |  20 |  300 | "val_avg_piv_20_piv2_300"
- __p_15__      | val  | avg |  30 |  300 | "val_avg_piv_30_piv2_300"
- __p_19__      | val2 | avg |     |  300 | "val2_avg_piv_null_piv2_300"
- __p_20__      | val2 | avg |  10 |    0 | "val2_avg_piv_10_piv2_0"
- __p_24__      | val2 | avg |  20 |    0 | "val2_avg_piv_20_piv2_0"
- __p_28__      | val2 | avg |  30 |    0 | "val2_avg_piv_30_piv2_0"
- __p_33__      | val2 | sum |     |  100 | "val2_sum_piv_null_piv2_100"
- __p_37__      | val2 | sum |  10 |  100 | "val2_sum_piv_10_piv2_100"
- __p_42__      | val2 | sum |  20 |  200 | "val2_sum_piv_20_piv2_200"
- __p_46__      | val2 | sum |  30 |  200 | "val2_sum_piv_30_piv2_200"
- __p_3__       | val  | avg |     |  300 | "val_avg_piv_null_piv2_300"
- __p_7__       | val  | avg |  10 |  300 | "val_avg_piv_10_piv2_300"
- __p_10__      | val  | avg |  20 |  200 | "val_avg_piv_20_piv2_200"
- __p_14__      | val  | avg |  30 |  200 | "val_avg_piv_30_piv2_200"
- __p_18__      | val2 | avg |     |  200 | "val2_avg_piv_null_piv2_200"
- __p_21__      | val2 | avg |  10 |  100 | "val2_avg_piv_10_piv2_100"
- __p_25__      | val2 | avg |  20 |  100 | "val2_avg_piv_20_piv2_100"
- __p_29__      | val2 | avg |  30 |  100 | "val2_avg_piv_30_piv2_100"
- __p_32__      | val2 | sum |     |    0 | "val2_sum_piv_null_piv2_0"
- __p_36__      | val2 | sum |  10 |    0 | "val2_sum_piv_10_piv2_0"
- __p_43__      | val2 | sum |  20 |  300 | "val2_sum_piv_20_piv2_300"
- __p_47__      | val2 | sum |  30 |  300 | "val2_sum_piv_30_piv2_300"
- __p_0__       | val  | avg |     |    0 | "val_avg_piv_null_piv2_0"
- __p_4__       | val  | avg |  10 |    0 | "val_avg_piv_10_piv2_0"
- __p_8__       | val  | avg |  20 |    0 | "val_avg_piv_20_piv2_0"
- __p_13__      | val  | avg |  30 |  100 | "val_avg_piv_30_piv2_100"
- __p_17__      | val2 | avg |     |  100 | "val2_avg_piv_null_piv2_100"
- __p_22__      | val2 | avg |  10 |  200 | "val2_avg_piv_10_piv2_200"
- __p_26__      | val2 | avg |  20 |  200 | "val2_avg_piv_20_piv2_200"
- __p_31__      | val2 | avg |  30 |  300 | "val2_avg_piv_30_piv2_300"
- __p_35__      | val2 | sum |     |  300 | "val2_sum_piv_null_piv2_300"
- __p_39__      | val2 | sum |  10 |  300 | "val2_sum_piv_10_piv2_300"
- __p_40__      | val2 | sum |  20 |    0 | "val2_sum_piv_20_piv2_0"
- __p_44__      | val2 | sum |  30 |    0 | "val2_sum_piv_30_piv2_0"
+__pivot_cid__ | pval | agg |  piv   | piv2 |           col_name
+---------------+------+-----+--------+------+------------------------------
+ __p_1__       | val  | avg | [NULL] |    0 | "val_avg_piv_null_piv2_0"
+ __p_2__       | val  | avg | [NULL] |  100 | "val_avg_piv_null_piv2_100"
+ __p_3__       | val  | avg | [NULL] |  200 | "val_avg_piv_null_piv2_200"
+ __p_4__       | val  | avg | [NULL] |  300 | "val_avg_piv_null_piv2_300"
+ __p_5__       | val  | avg |     10 |    0 | "val_avg_piv_10_piv2_0"
+ __p_6__       | val  | avg |     10 |  100 | "val_avg_piv_10_piv2_100"
+ __p_7__       | val  | avg |     10 |  200 | "val_avg_piv_10_piv2_200"
+ __p_8__       | val  | avg |     10 |  300 | "val_avg_piv_10_piv2_300"
+ __p_9__       | val  | avg |     20 |    0 | "val_avg_piv_20_piv2_0"
+ __p_10__      | val  | avg |     20 |  100 | "val_avg_piv_20_piv2_100"
+ __p_11__      | val  | avg |     20 |  200 | "val_avg_piv_20_piv2_200"
+ __p_12__      | val  | avg |     20 |  300 | "val_avg_piv_20_piv2_300"
+ __p_13__      | val  | avg |     30 |    0 | "val_avg_piv_30_piv2_0"
+ __p_14__      | val  | avg |     30 |  100 | "val_avg_piv_30_piv2_100"
+ __p_15__      | val  | avg |     30 |  200 | "val_avg_piv_30_piv2_200"
+ __p_16__      | val  | avg |     30 |  300 | "val_avg_piv_30_piv2_300"
+ __p_17__      | val2 | avg | [NULL] |    0 | "val2_avg_piv_null_piv2_0"
+ __p_18__      | val2 | avg | [NULL] |  100 | "val2_avg_piv_null_piv2_100"
+ __p_19__      | val2 | avg | [NULL] |  200 | "val2_avg_piv_null_piv2_200"
+ __p_20__      | val2 | avg | [NULL] |  300 | "val2_avg_piv_null_piv2_300"
+ __p_21__      | val2 | avg |     10 |    0 | "val2_avg_piv_10_piv2_0"
+...
 (48 rows)
 </pre>
 <pre class="example">
+\\x on
 SELECT * FROM pivout ORDER BY id,id2;
 </pre>
 <pre class="result">
--[ RECORD 1 ]--
+-[ RECORD 1 ]----
 id       | 0
 id2      | 0
-__p_0__  | 111
 __p_1__  | 111
 __p_2__  | 111
 __p_3__  | 111
-__p_4__  | 1
-__p_5__  | 111
+__p_4__  | 111
+__p_5__  | 1
 __p_6__  | 111
 __p_7__  | 111
 __p_8__  | 111
@@ -653,91 +632,40 @@ __p_10__ | 111
 __p_11__ | 111
 __p_12__ | 111
 __p_13__ | 111
-__p_14__ | 111
-__p_15__ | 111
-__p_16__ | 111
-__p_17__ | 111
-__p_18__ | 111
-__p_19__ | 111
-__p_20__ | 11
-__p_21__ | 111
-__p_22__ | 111
-__p_23__ | 111
-__p_24__ | 111
-__p_25__ | 111
-__p_26__ | 111
-__p_27__ | 111
-__p_28__ | 111
-__p_29__ | 111
-__p_30__ | 111
-__p_31__ | 111
-__p_32__ | 111
-__p_33__ | 111
-__p_34__ | 111
-__p_35__ | 111
-__p_36__ | 11
-__p_37__ | 111
-__p_38__ | 111
-__p_39__ | 111
-__p_40__ | 111
-__p_41__ | 111
-__p_42__ | 111
-__p_43__ | 111
-__p_44__ | 111
-__p_45__ | 111
-__p_46__ | 111
-__p_47__ | 111
--[ RECORD 2 ]--
+...
+-[ RECORD 2 ]----
 id       | 0
 id2      | 1
-__p_0__  | 111
 __p_1__  | 111
 __p_2__  | 111
 __p_3__  | 111
 __p_4__  | 111
-__p_5__  | 2
+__p_5__  | 111
+__p_6__  | 2
+__p_7__  | 111
+__p_8__  | 111
+__p_9__  | 111
+__p_10__ | 3
+__p_11__ | 111
+__p_12__ | 111
+__p_13__ | 111
+...
+-[ RECORD 3 ]----
+id       | 1
+id2      | 0
+__p_1__  | 111
+__p_2__  | 111
+__p_3__  | 111
+__p_4__  | 111
+__p_5__  | 111
 __p_6__  | 111
 __p_7__  | 111
 __p_8__  | 111
-__p_9__  | 3
+__p_9__  | 111
 __p_10__ | 111
 __p_11__ | 111
 __p_12__ | 111
 __p_13__ | 111
-__p_14__ | 111
-__p_15__ | 111
-__p_16__ | 111
-__p_17__ | 111
-__p_18__ | 111
-__p_19__ | 111
-__p_20__ | 111
-__p_21__ | 12
-__p_22__ | 111
-__p_23__ | 111
-__p_24__ | 111
-__p_25__ | 13
-__p_26__ | 111
-__p_27__ | 111
-__p_28__ | 111
-__p_29__ | 111
-__p_30__ | 111
-__p_31__ | 111
-__p_32__ | 111
-__p_33__ | 111
-__p_34__ | 111
-__p_35__ | 111
-__p_36__ | 111
-__p_37__ | 12
-__p_38__ | 111
-__p_39__ | 111
-__p_40__ | 111
-__p_41__ | 13
-__p_42__ | 111
-__p_43__ | 111
-__p_44__ | 111
-__p_45__ | 111
-__p_46__ | 111
-__p_47__ | 111
 ...
 </pre>
 
@@ -786,7 +714,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     aggregate_func          TEXT,
     fill_value              TEXT,
     keep_null               BOOLEAN,
-    output_col_dictionary   BOOLEAN
+    output_col_dictionary   BOOLEAN,
+    output_type             TEXT
 
 ) RETURNS VOID AS $$
     PythonFunction(utilities, pivot, pivot)
@@ -794,6 +723,22 @@ $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
 
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
+    source_table            TEXT,
+    out_table               TEXT,
+    index                   TEXT,
+    pivot_cols              TEXT,
+    pivot_values            TEXT,
+    aggregate_func          TEXT,
+    fill_value              TEXT,
+    keep_null               BOOLEAN,
+    output_col_dictionary   BOOLEAN
+
+) RETURNS VOID AS $$
+    SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL)
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     source_table        TEXT,
     out_table           TEXT,
     index               TEXT,

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/test/pivot.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/test/pivot.sql_in 
b/src/ports/postgres/modules/utilities/test/pivot.sql_in
index 79bcc57..3dafd18 100644
--- a/src/ports/postgres/modules/utilities/test/pivot.sql_in
+++ b/src/ports/postgres/modules/utilities/test/pivot.sql_in
@@ -28,16 +28,16 @@ CREATE TABLE pivset(
                 );
 
 INSERT INTO pivset VALUES
-       (0, 10, 1),
-       (0, 10, 2),
-       (0, 20, 3),
-       (1, 20, 4),
-       (1, 30, 5),
-       (1, 30, 6),
-       (1, 10, 7),
-       (NULL, 10, 8),
-       (0, NULL, 9),
-       (0, 10, NULL);
+    (0, 10, 1),
+    (0, 10, 2),
+    (0, 20, 3),
+    (1, 20, 4),
+    (1, 30, 5),
+    (1, 30, 6),
+    (1, 10, 7),
+    (NULL, 10, 8),
+    (0, NULL, 9),
+    (0, 10, NULL);
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset', 'pivout', 'id', 'piv', 'val');
@@ -47,12 +47,12 @@ SELECT assert(val_avg_piv_20 = 3, 'Wrong output in 
pivoting') FROM pivout WHERE
 
 DROP VIEW IF EXISTS pivset_ext;
 CREATE VIEW pivset_ext AS
-       SELECT *,
+    SELECT *,
     COALESCE(id + (pivset.val / 3), 0) AS id2,
     COALESCE(piv + (pivset.val / 3), 0) AS piv2,
     COALESCE(val + 10, 0) AS val2
    FROM pivset;
-SELECT id,id2,piv,piv2,val,val2 FROM pivset_ext 
+SELECT id,id2,piv,piv2,val,val2 FROM pivset_ext
 ORDER BY id,id2,piv,piv2,val,val2;
 
 DROP TABLE IF EXISTS pivout;
@@ -60,87 +60,87 @@ SELECT pivot('pivset_ext', 'pivout', 'id,id2', 'piv', 
'val');
 SELECT * FROM pivout;
 
 SELECT assert(val_avg_piv_10 = 1.5,
-       'Wrong output in pivoting: index columns') FROM pivout 
-       WHERE id = 0 AND id2 = 0;
+    'Wrong output in pivoting: index columns') FROM pivout
+    WHERE id = 0 AND id2 = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val');
 SELECT * FROM pivout;
 
 SELECT assert(val_avg_piv_10_piv2_10 = 1.5,
-       'Wrong output in pivoting: pivot columns') FROM pivout WHERE id = 0;
+    'Wrong output in pivoting: pivot columns') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2');
 SELECT * FROM pivout;
 
 SELECT assert(val2_avg_piv_20 = 13,
-       'Wrong output in pivoting: value columns') FROM pivout WHERE id = 0;
+    'Wrong output in pivoting: value columns') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum');
 SELECT * FROM pivout;
 
 SELECT assert(val_sum_piv_10 = 3,
-       'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 
0;
+    'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', True);
 SELECT * FROM pivout;
 
 SELECT assert(val_sum_piv_null = 9,
-       'Wrong output in pivoting: keep null') FROM pivout WHERE id = 0;
+    'Wrong output in pivoting: keep null') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', '111');
 SELECT * FROM pivout;
 
 SELECT assert(val_sum_piv_30 = 111,
-       'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0;
+    'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', '111', True);
 SELECT * FROM pivout;
 
 SELECT assert(val_sum_piv_30 = 111 AND val_sum_piv_null = 9,
-       'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0;
+    'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'avg, sum');
 SELECT * FROM pivout;
 
 SELECT assert(val_avg_piv_10 = 1.5 AND val_sum_piv_10 = 3,
-       'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 
0;
+    'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val', 'avg, sum');
 SELECT * FROM pivout;
 
 SELECT assert(val_avg_piv_10_piv2_10 = 1.5 AND val_sum_piv_10_piv2_10 = 3,
-       'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 
0;
+    'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2', 'avg, sum');
 SELECT * FROM pivout;
 
 SELECT assert(val_sum_piv_10 = 3 AND val2_avg_piv_20 = 13,
-       'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 
0;
+    'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
-       'val=avg, val2=sum');
+    'val=avg, val2=sum');
 SELECT * FROM pivout;
 
 SELECT assert(val_avg_piv_10 = 1.5 AND val2_sum_piv_10 = 23,
-       'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 
0;
+    'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
-       'val=avg, val2=[avg,sum]');
+    'val=avg, val2=[avg,sum]');
 SELECT * FROM pivout;
 
 SELECT assert(val2_avg_piv_20 = 13 AND val2_sum_piv_10 = 23,
-       'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 
0;
+    'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
 
 DROP TABLE IF EXISTS pivout;
 DROP TABLE IF EXISTS pivout_dictionary;
@@ -148,9 +148,9 @@ SELECT pivot('pivset_ext', 'pivout', 'id, id2', 'piv, 
piv2', 'val, val2',
     'val=avg, val2=[avg,sum]', '111', True, True);
 SELECT * FROM pivout;
 
-SELECT assert(__p_7__ = 1.5,
-       'Wrong output in pivoting: Output dictionary') FROM pivout 
-       WHERE id = 0 AND id2 = 0;
+SELECT assert(__p_8__ = 1.5,
+             'Wrong output in pivoting: Output dictionary') FROM pivout
+    WHERE id = 0 AND id2 = 0;
 
 DROP FUNCTION IF EXISTS array_add1(ANYARRAY, ANYELEMENT);
 DROP AGGREGATE IF EXISTS array_accum1 (anyelement);
@@ -167,4 +167,18 @@ DROP TABLE IF EXISTS pivout;
 SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'array_accum1');
 SELECT * FROM pivout;
 
+DROP TABLE IF EXISTS pivout;
+DROP TABLE IF EXISTS pivout_dictionary;
+SELECT pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
+    'val=avg, val2=[avg,sum]', '111', True, True, 'a');
+SELECT * FROM pivout;
+SELECT * FROM pivout_dictionary;
+
+DROP TABLE IF EXISTS pivout;
+DROP TABLE IF EXISTS pivout_dictionary;
+SELECT pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
+    'val=avg, val2=[avg,sum]', '111', True, True, 's');
+SELECT * FROM pivout;
+SELECT * FROM pivout_dictionary;
+
 DROP VIEW IF EXISTS pivset_ext;

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/utilities.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in 
b/src/ports/postgres/modules/utilities/utilities.py_in
index 0e47aea..126f4e6 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -1,4 +1,5 @@
 
+import collections
 import re
 import time
 import random
@@ -583,16 +584,16 @@ def extract_keyvalue_params(input_params,
                 else:
                     continue
             try:
-                if param_type in (int, str, float):
-                    parameter_dict[param_name] = param_type(param_value)
-                elif param_type == list:
-                    parameter_dict[param_name] = split_quoted_delimited_str(
-                        param_value.strip('[](){} '))
-                elif param_type == bool:
+                if param_type == bool:  # bool is not subclassable
                     #  True values are y, yes, t, true, on and 1;
                     #  False values are n, no, f, false, off and 0.
                     #  Raises ValueError if anything else.
                     parameter_dict[param_name] = bool(strtobool(param_value))
+                elif param_type in (int, str, float):
+                    parameter_dict[param_name] = param_type(param_value)
+                elif issubclass(param_type, collections.Iterable):
+                    parameter_dict[param_name] = split_quoted_delimited_str(
+                        param_value.strip('[](){} '))
                 else:
                     raise TypeError("Invalid input: {0} has unsupported type "
                                     "{1}".format(param_name, usage_str))

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/validate_args.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in 
b/src/ports/postgres/modules/utilities/validate_args.py_in
index 91f34b8..5832124 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -345,8 +345,9 @@ def get_cols_and_types(tbl):
 
 
 def get_expr_type(expr, tbl):
-    """ Temporary function to obtain the type of an expression
+    """ Return the type of an expression run on a given table
 
+    Note: this
     Args:
         @param expr
 
@@ -356,7 +357,6 @@ def get_expr_type(expr, tbl):
     expr_type = plpy.execute("""
         SELECT pg_typeof({0}) AS type
         FROM {1}
-        WHERE ({0}) IS NOT NULL
         LIMIT 1
         """.format(expr, tbl))[0]['type']
     return expr_type.upper()

[10/34] incubator-madlib git commit: Pivot: Add support for array output

Reply via email to