Hello

I am returning to discussion
http://archives.postgresql.org/pgsql-hackers/2009-12/msg01378.php

I propose a new aggregate function - listagg. This function
concatenate values to string. If this function is used with two
parameters, then second parameter is used as delimiter. NULL input
values are ignored like other aggregates. If all values are NULL, then
result is NULL. When delimiter is omitted, then values are
concatenated without any delimiter.

This function could be replaced with
array_to_string(array_agg(),delimiter). It has same functionality, but
different implementation. listagg should be (and it is) faster,
because hasn't array overhead. In my tests - listagg is about 25%
faster. Mainly, listagg is shorter. Because we cannot well wrap
aggregates, I propose integrate this function. There are precedent -
function generate_sequences. It should be replaced by
generate_series(array_lower(), array_upper()), but it hasn't same
effectiveness.

Using:

postgres=# select * from country ;
    town    | state
------------+-------
 Prague     | cs
 Brno       | cs
 Bratislava | sk
 Kosice     | sk
(4 rows)

postgres=# select listagg(town,',') from country group by state;
      listagg
-------------------
 Bratislava,Kosice
 Prague,Brno
(2 rows)

Comments?

Regards
Pavel Stehule
*** ./doc/src/sgml/func.sgml.orig	2009-12-19 18:49:50.000000000 +0100
--- ./doc/src/sgml/func.sgml	2009-12-25 18:01:13.281367152 +0100
***************
*** 1789,1794 ****
--- 1789,1798 ----
      </tgroup>
     </table>
  
+    <para>
+     See also <xref linkend="functions-aggregate"> about the aggregate
+     function <function>listagg</function>.
+    </para>
  
     <table id="conversion-names">
      <title>Built-in Conversions</title>
***************
*** 9789,9794 ****
--- 9793,9816 ----
       </row>
  
       <row>
+       <entry>
+        <indexterm>
+         <primary>listagg</primary>
+        </indexterm>
+        <function>
+ 	 listagg(<replaceable class="parameter">expression</replaceable> 
+ 	         [, <replaceable class="parameter">expression</replaceable> ] )</function>
+       </entry>
+       <entry>
+        <type>text</type>
+       </entry>
+       <entry>
+        <type>text</type>
+       </entry>
+       <entry>input values concatenated into an string</entry>
+      </row>
+ 
+      <row>
        <entry><function>max(<replaceable class="parameter">expression</replaceable>)</function></entry>
        <entry>any array, numeric, string, or date/time type</entry>
        <entry>same as argument type</entry>
*** ./src/backend/utils/adt/varchar.c.orig	2009-07-11 23:15:32.000000000 +0200
--- ./src/backend/utils/adt/varchar.c	2009-12-25 15:41:42.928370326 +0100
***************
*** 18,27 ****
--- 18,40 ----
  #include "access/hash.h"
  #include "access/tuptoaster.h"
  #include "libpq/pqformat.h"
+ #include "lib/stringinfo.h"
+ #include "nodes/execnodes.h"
  #include "utils/array.h"
  #include "utils/builtins.h"
  #include "mb/pg_wchar.h"
  
+ /* type for state data of listagg function */
+ typedef struct
+ {
+ 	StringInfo	strInfo;
+ 	char	delimiter[1];		/* separator string - one or more chars */
+ } ListAggState;
+ 
+ static ListAggState *accumStringResult(ListAggState *state, 
+ 						    text *elem, 
+ 						    text *delimiter, 
+ 						    MemoryContext aggcontext);
  
  /* common code for bpchartypmodin and varchartypmodin */
  static int32
***************
*** 995,997 ****
--- 1008,1163 ----
  
  	PG_RETURN_INT32(result);
  }
+ 
+ /****************************************************************
+  * listagg
+  *  
+  * Concates values and returns string.
+  *
+  * Syntax:
+  *     FUNCTION listagg(string varchar, delimiter varchar = '')
+  *      RETURNS varchar;
+  *
+  * Note: any NULL value is ignored.
+  *
+  ****************************************************************/
+ static ListAggState *
+ accumStringResult(ListAggState *state, text *elem, text *delimiter, 
+ 						    MemoryContext aggcontext)
+ {
+ 	MemoryContext	oldcontext;
+ 	
+ 	/* 
+ 	 * when state is NULL, create new state value.
+ 	 */
+ 	if (state == NULL)
+ 	{
+ 		if (delimiter != NULL)
+ 		{
+ 			char *dstr = text_to_cstring(delimiter);
+ 			int len = strlen(dstr);
+ 			
+ 			oldcontext = MemoryContextSwitchTo(aggcontext);
+ 			state = palloc(sizeof(ListAggState) + len);
+ 			
+ 			/* copy delimiter to state var */
+ 			memcpy(&state->delimiter, dstr, len + 1);
+ 		}
+ 		else
+ 		{
+ 			oldcontext = MemoryContextSwitchTo(aggcontext);
+ 			state = palloc(sizeof(ListAggState));
+ 			state->delimiter[0] = '\0';
+ 		}
+ 		
+ 		/* Initialise StringInfo */
+ 		state->strInfo = NULL;
+ 		
+ 		MemoryContextSwitchTo(oldcontext);
+ 	}
+ 	
+ 	/* only when element isn't null */
+ 	if (elem != NULL)
+ 	{
+ 		char	*value = text_to_cstring(elem);
+ 
+ 		oldcontext = MemoryContextSwitchTo(aggcontext);
+ 		if (state->strInfo != NULL)
+ 			appendStringInfoString(state->strInfo, state->delimiter);
+ 		else
+ 			state->strInfo = makeStringInfo();
+ 			
+ 		appendStringInfoString(state->strInfo, value);
+ 		MemoryContextSwitchTo(oldcontext);
+ 	}
+ 	
+ 	return state;
+ }
+ 
+ Datum
+ listagg1_transfn(PG_FUNCTION_ARGS)
+ {
+ 	MemoryContext	aggcontext;
+ 	ListAggState *state = NULL;
+ 	text *elem;
+ 
+ 	if (fcinfo->context && IsA(fcinfo->context, AggState))
+ 		aggcontext = ((AggState *) fcinfo->context)->aggcontext;
+ 	else if (fcinfo->context && IsA(fcinfo->context, WindowAggState))
+ 		aggcontext = ((WindowAggState *) fcinfo->context)->wincontext;
+ 	else
+ 	{
+ 		/* cannot be called directly because of internal-type argument */
+ 		elog(ERROR, "listagg2_transfn called in non-aggregate context");
+ 		aggcontext = NULL;		/* keep compiler quiet */
+ 	}
+ 	
+ 	state = PG_ARGISNULL(0) ? NULL : (ListAggState *) PG_GETARG_POINTER(0);
+ 	elem = PG_ARGISNULL(1) ? NULL : PG_GETARG_TEXT_P(1);
+ 	
+ 	state = accumStringResult(state,
+ 					    elem,
+ 					    NULL,
+ 					    aggcontext);
+ 
+ 	/*
+ 	 * The transition type for listagg() is declared to be "internal", which
+ 	 * is a pass-by-value type the same size as a pointer.	
+ 	 */
+ 	PG_RETURN_POINTER(state);
+ }
+ 
+ Datum 
+ listagg2_transfn(PG_FUNCTION_ARGS)
+ {
+ 	MemoryContext	aggcontext;
+ 	ListAggState *state = NULL;
+ 	text *elem;
+ 	text *delimiter = NULL;
+ 
+ 	if (fcinfo->context && IsA(fcinfo->context, AggState))
+ 		aggcontext = ((AggState *) fcinfo->context)->aggcontext;
+ 	else if (fcinfo->context && IsA(fcinfo->context, WindowAggState))
+ 		aggcontext = ((WindowAggState *) fcinfo->context)->wincontext;
+ 	else
+ 	{
+ 		/* cannot be called directly because of internal-type argument */
+ 		elog(ERROR, "listagg2_transfn called in non-aggregate context");
+ 		aggcontext = NULL;		/* keep compiler quiet */
+ 	}
+ 	
+ 	state = PG_ARGISNULL(0) ? NULL : (ListAggState *) PG_GETARG_POINTER(0);
+ 	elem = PG_ARGISNULL(1) ? NULL : PG_GETARG_TEXT_P(1);
+ 	delimiter = PG_ARGISNULL(2) ? NULL : PG_GETARG_TEXT_P(2);
+ 	
+ 	state = accumStringResult(state,
+ 					    elem,
+ 					    delimiter,
+ 					    aggcontext);
+ 
+ 	/*
+ 	 * The transition type for listagg() is declared to be "internal", which
+ 	 * is a pass-by-value type the same size as a pointer.	
+ 	 */
+ 	PG_RETURN_POINTER(state);	
+ }
+ 
+ Datum
+ listagg_finalfn(PG_FUNCTION_ARGS)
+ {
+ 	ListAggState *state = NULL;
+ 
+ 	if (PG_ARGISNULL(0))
+ 		PG_RETURN_NULL();
+ 		
+ 	/* cannot be called directly because of internal-type argument */
+ 	Assert(fcinfo->context &&
+ 		    (IsA(fcinfo->context, AggState) ||
+ 			    IsA(fcinfo->context, WindowAggState)));
+ 	
+ 	state = (ListAggState *) PG_GETARG_POINTER(0);
+ 	if (state->strInfo != NULL)
+ 		PG_RETURN_TEXT_P(cstring_to_text(state->strInfo->data));
+ 	else
+ 		PG_RETURN_NULL();    
+ }
*** ./src/include/catalog/pg_aggregate.h.orig	2009-01-01 18:23:56.000000000 +0100
--- ./src/include/catalog/pg_aggregate.h	2009-12-25 17:16:31.229370902 +0100
***************
*** 223,228 ****
--- 223,232 ----
  /* array */
  DATA(insert ( 2335	array_agg_transfn	array_agg_finalfn		0	2281	_null_ ));
  
+ /* varchar */
+ DATA(insert (3030	listagg1_transfn	listagg_finalfn			0	2281	_null_ ));
+ DATA(insert (3031	listagg2_transfn	listagg_finalfn			0	2281	_null_ ));
+ 
  /*
   * prototypes for functions in pg_aggregate.c
   */
*** ./src/include/catalog/pg_proc.h.orig	2009-12-19 02:32:42.000000000 +0100
--- ./src/include/catalog/pg_proc.h	2009-12-25 17:04:53.430366927 +0100
***************
*** 2250,2255 ****
--- 2250,2266 ----
  DATA(insert OID =  2090 ( to_hex	   PGNSP PGUID 12 1 0 0 f f f t f i 1 0 25 "20" _null_ _null_ _null_ _null_ to_hex64 _null_ _null_ _null_ ));
  DESCR("convert int8 number to hex");
  
+ DATA(insert OID = 2997 (  listagg1_transfn   PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 25" _null_ _null_ _null_ _null_ listagg1_transfn _null_ _null_ _null_ ));
+ DESCR("listagg one param transition function");
+ DATA(insert OID = 2998 (  listagg2_transfn   PGNSP PGUID 12 1 0 0 f f f f f i 3 0 2281 "2281 25 25" _null_ _null_ _null_ _null_ listagg2_transfn _null_ _null_ _null_ ));
+ DESCR("listagg two params transition function");
+ DATA(insert OID = 2999 (  listagg_finalfn   PGNSP PGUID 12 1 0 0 f f f f f i 1 0 25 "2281" _null_ _null_ _null_ _null_ listagg_finalfn _null_ _null_ _null_ ));
+ DESCR("listagg final function");
+ DATA(insert OID = 3030 (  listagg		   PGNSP PGUID 12 1 0 0 t f f f f i 1 0 25 "25" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("concatenate aggregate input into an string");
+ DATA(insert OID = 3031 (  listagg		   PGNSP PGUID 12 1 0 0 t f f f f i 2 0 25 "25 25" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("concatenate aggregate input into an string with delimiter");
+ 
  /* for character set encoding support */
  
  /* return database encoding name */
*** ./src/include/utils/builtins.h.orig	2009-12-19 02:32:44.000000000 +0100
--- ./src/include/utils/builtins.h	2009-12-25 15:34:14.347368993 +0100
***************
*** 664,669 ****
--- 664,673 ----
  extern Datum varchartypmodout(PG_FUNCTION_ARGS);
  extern Datum varchar(PG_FUNCTION_ARGS);
  
+ extern Datum listagg1_transfn(PG_FUNCTION_ARGS);
+ extern Datum listagg2_transfn(PG_FUNCTION_ARGS);
+ extern Datum listagg_finalfn(PG_FUNCTION_ARGS);
+ 
  /* varlena.c */
  extern text *cstring_to_text(const char *s);
  extern text *cstring_to_text_with_len(const char *s, int len);
*** ./src/test/regress/expected/aggregates.out.orig	2009-12-15 18:57:47.000000000 +0100
--- ./src/test/regress/expected/aggregates.out	2009-12-25 17:38:53.000000000 +0100
***************
*** 799,801 ****
--- 799,832 ----
  ERROR:  in an aggregate with DISTINCT, ORDER BY expressions must appear in argument list
  LINE 1: select aggfns(distinct a,a,c order by a,b)
                                                  ^
+ -- list agg test
+ select listagg(a) from (values('aaaa'),('bbbb'),('cccc')) g(a);
+    listagg    
+ --------------
+  aaaabbbbcccc
+ (1 row)
+ 
+ select listagg(a,',') from (values('aaaa'),('bbbb'),('cccc')) g(a);
+     listagg     
+ ----------------
+  aaaa,bbbb,cccc
+ (1 row)
+ 
+ select listagg(a,',') from (values('aaaa'),(null),('bbbb'),('cccc')) g(a);
+     listagg     
+ ----------------
+  aaaa,bbbb,cccc
+ (1 row)
+ 
+ select listagg(a,',') from (values(null),(null),('bbbb'),('cccc')) g(a);
+   listagg  
+ -----------
+  bbbb,cccc
+ (1 row)
+ 
+ select listagg(a,',') from (values(null),(null)) g(a);
+  listagg 
+ ---------
+  
+ (1 row)
+ 
*** ./src/test/regress/sql/aggregates.sql.orig	2009-12-25 17:38:04.176369837 +0100
--- ./src/test/regress/sql/aggregates.sql	2009-12-25 17:38:17.211369992 +0100
***************
*** 355,357 ****
--- 355,364 ----
    from (values (1,1,'foo')) v(a,b,c), generate_series(1,2) i;
  select aggfns(distinct a,a,c order by a,b)
    from (values (1,1,'foo')) v(a,b,c), generate_series(1,2) i;
+ 
+ -- list agg test
+ select listagg(a) from (values('aaaa'),('bbbb'),('cccc')) g(a);
+ select listagg(a,',') from (values('aaaa'),('bbbb'),('cccc')) g(a);
+ select listagg(a,',') from (values('aaaa'),(null),('bbbb'),('cccc')) g(a);
+ select listagg(a,',') from (values(null),(null),('bbbb'),('cccc')) g(a);
+ select listagg(a,',') from (values(null),(null)) g(a);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to