Hello I am returning to discussion http://archives.postgresql.org/pgsql-hackers/2009-12/msg01378.php
I propose a new aggregate function - listagg. This function concatenate values to string. If this function is used with two parameters, then second parameter is used as delimiter. NULL input values are ignored like other aggregates. If all values are NULL, then result is NULL. When delimiter is omitted, then values are concatenated without any delimiter. This function could be replaced with array_to_string(array_agg(),delimiter). It has same functionality, but different implementation. listagg should be (and it is) faster, because hasn't array overhead. In my tests - listagg is about 25% faster. Mainly, listagg is shorter. Because we cannot well wrap aggregates, I propose integrate this function. There are precedent - function generate_sequences. It should be replaced by generate_series(array_lower(), array_upper()), but it hasn't same effectiveness. Using: postgres=# select * from country ; town | state ------------+------- Prague | cs Brno | cs Bratislava | sk Kosice | sk (4 rows) postgres=# select listagg(town,',') from country group by state; listagg ------------------- Bratislava,Kosice Prague,Brno (2 rows) Comments? Regards Pavel Stehule
*** ./doc/src/sgml/func.sgml.orig 2009-12-19 18:49:50.000000000 +0100 --- ./doc/src/sgml/func.sgml 2009-12-25 18:01:13.281367152 +0100 *************** *** 1789,1794 **** --- 1789,1798 ---- </tgroup> </table> + <para> + See also <xref linkend="functions-aggregate"> about the aggregate + function <function>listagg</function>. + </para> <table id="conversion-names"> <title>Built-in Conversions</title> *************** *** 9789,9794 **** --- 9793,9816 ---- </row> <row> + <entry> + <indexterm> + <primary>listagg</primary> + </indexterm> + <function> + listagg(<replaceable class="parameter">expression</replaceable> + [, <replaceable class="parameter">expression</replaceable> ] )</function> + </entry> + <entry> + <type>text</type> + </entry> + <entry> + <type>text</type> + </entry> + <entry>input values concatenated into an string</entry> + </row> + + <row> <entry><function>max(<replaceable class="parameter">expression</replaceable>)</function></entry> <entry>any array, numeric, string, or date/time type</entry> <entry>same as argument type</entry> *** ./src/backend/utils/adt/varchar.c.orig 2009-07-11 23:15:32.000000000 +0200 --- ./src/backend/utils/adt/varchar.c 2009-12-25 15:41:42.928370326 +0100 *************** *** 18,27 **** --- 18,40 ---- #include "access/hash.h" #include "access/tuptoaster.h" #include "libpq/pqformat.h" + #include "lib/stringinfo.h" + #include "nodes/execnodes.h" #include "utils/array.h" #include "utils/builtins.h" #include "mb/pg_wchar.h" + /* type for state data of listagg function */ + typedef struct + { + StringInfo strInfo; + char delimiter[1]; /* separator string - one or more chars */ + } ListAggState; + + static ListAggState *accumStringResult(ListAggState *state, + text *elem, + text *delimiter, + MemoryContext aggcontext); /* common code for bpchartypmodin and varchartypmodin */ static int32 *************** *** 995,997 **** --- 1008,1163 ---- PG_RETURN_INT32(result); } + + /**************************************************************** + * listagg + * + * Concates values and returns string. + * + * Syntax: + * FUNCTION listagg(string varchar, delimiter varchar = '') + * RETURNS varchar; + * + * Note: any NULL value is ignored. + * + ****************************************************************/ + static ListAggState * + accumStringResult(ListAggState *state, text *elem, text *delimiter, + MemoryContext aggcontext) + { + MemoryContext oldcontext; + + /* + * when state is NULL, create new state value. + */ + if (state == NULL) + { + if (delimiter != NULL) + { + char *dstr = text_to_cstring(delimiter); + int len = strlen(dstr); + + oldcontext = MemoryContextSwitchTo(aggcontext); + state = palloc(sizeof(ListAggState) + len); + + /* copy delimiter to state var */ + memcpy(&state->delimiter, dstr, len + 1); + } + else + { + oldcontext = MemoryContextSwitchTo(aggcontext); + state = palloc(sizeof(ListAggState)); + state->delimiter[0] = '\0'; + } + + /* Initialise StringInfo */ + state->strInfo = NULL; + + MemoryContextSwitchTo(oldcontext); + } + + /* only when element isn't null */ + if (elem != NULL) + { + char *value = text_to_cstring(elem); + + oldcontext = MemoryContextSwitchTo(aggcontext); + if (state->strInfo != NULL) + appendStringInfoString(state->strInfo, state->delimiter); + else + state->strInfo = makeStringInfo(); + + appendStringInfoString(state->strInfo, value); + MemoryContextSwitchTo(oldcontext); + } + + return state; + } + + Datum + listagg1_transfn(PG_FUNCTION_ARGS) + { + MemoryContext aggcontext; + ListAggState *state = NULL; + text *elem; + + if (fcinfo->context && IsA(fcinfo->context, AggState)) + aggcontext = ((AggState *) fcinfo->context)->aggcontext; + else if (fcinfo->context && IsA(fcinfo->context, WindowAggState)) + aggcontext = ((WindowAggState *) fcinfo->context)->wincontext; + else + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "listagg2_transfn called in non-aggregate context"); + aggcontext = NULL; /* keep compiler quiet */ + } + + state = PG_ARGISNULL(0) ? NULL : (ListAggState *) PG_GETARG_POINTER(0); + elem = PG_ARGISNULL(1) ? NULL : PG_GETARG_TEXT_P(1); + + state = accumStringResult(state, + elem, + NULL, + aggcontext); + + /* + * The transition type for listagg() is declared to be "internal", which + * is a pass-by-value type the same size as a pointer. + */ + PG_RETURN_POINTER(state); + } + + Datum + listagg2_transfn(PG_FUNCTION_ARGS) + { + MemoryContext aggcontext; + ListAggState *state = NULL; + text *elem; + text *delimiter = NULL; + + if (fcinfo->context && IsA(fcinfo->context, AggState)) + aggcontext = ((AggState *) fcinfo->context)->aggcontext; + else if (fcinfo->context && IsA(fcinfo->context, WindowAggState)) + aggcontext = ((WindowAggState *) fcinfo->context)->wincontext; + else + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "listagg2_transfn called in non-aggregate context"); + aggcontext = NULL; /* keep compiler quiet */ + } + + state = PG_ARGISNULL(0) ? NULL : (ListAggState *) PG_GETARG_POINTER(0); + elem = PG_ARGISNULL(1) ? NULL : PG_GETARG_TEXT_P(1); + delimiter = PG_ARGISNULL(2) ? NULL : PG_GETARG_TEXT_P(2); + + state = accumStringResult(state, + elem, + delimiter, + aggcontext); + + /* + * The transition type for listagg() is declared to be "internal", which + * is a pass-by-value type the same size as a pointer. + */ + PG_RETURN_POINTER(state); + } + + Datum + listagg_finalfn(PG_FUNCTION_ARGS) + { + ListAggState *state = NULL; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + /* cannot be called directly because of internal-type argument */ + Assert(fcinfo->context && + (IsA(fcinfo->context, AggState) || + IsA(fcinfo->context, WindowAggState))); + + state = (ListAggState *) PG_GETARG_POINTER(0); + if (state->strInfo != NULL) + PG_RETURN_TEXT_P(cstring_to_text(state->strInfo->data)); + else + PG_RETURN_NULL(); + } *** ./src/include/catalog/pg_aggregate.h.orig 2009-01-01 18:23:56.000000000 +0100 --- ./src/include/catalog/pg_aggregate.h 2009-12-25 17:16:31.229370902 +0100 *************** *** 223,228 **** --- 223,232 ---- /* array */ DATA(insert ( 2335 array_agg_transfn array_agg_finalfn 0 2281 _null_ )); + /* varchar */ + DATA(insert (3030 listagg1_transfn listagg_finalfn 0 2281 _null_ )); + DATA(insert (3031 listagg2_transfn listagg_finalfn 0 2281 _null_ )); + /* * prototypes for functions in pg_aggregate.c */ *** ./src/include/catalog/pg_proc.h.orig 2009-12-19 02:32:42.000000000 +0100 --- ./src/include/catalog/pg_proc.h 2009-12-25 17:04:53.430366927 +0100 *************** *** 2250,2255 **** --- 2250,2266 ---- DATA(insert OID = 2090 ( to_hex PGNSP PGUID 12 1 0 0 f f f t f i 1 0 25 "20" _null_ _null_ _null_ _null_ to_hex64 _null_ _null_ _null_ )); DESCR("convert int8 number to hex"); + DATA(insert OID = 2997 ( listagg1_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 25" _null_ _null_ _null_ _null_ listagg1_transfn _null_ _null_ _null_ )); + DESCR("listagg one param transition function"); + DATA(insert OID = 2998 ( listagg2_transfn PGNSP PGUID 12 1 0 0 f f f f f i 3 0 2281 "2281 25 25" _null_ _null_ _null_ _null_ listagg2_transfn _null_ _null_ _null_ )); + DESCR("listagg two params transition function"); + DATA(insert OID = 2999 ( listagg_finalfn PGNSP PGUID 12 1 0 0 f f f f f i 1 0 25 "2281" _null_ _null_ _null_ _null_ listagg_finalfn _null_ _null_ _null_ )); + DESCR("listagg final function"); + DATA(insert OID = 3030 ( listagg PGNSP PGUID 12 1 0 0 t f f f f i 1 0 25 "25" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("concatenate aggregate input into an string"); + DATA(insert OID = 3031 ( listagg PGNSP PGUID 12 1 0 0 t f f f f i 2 0 25 "25 25" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); + DESCR("concatenate aggregate input into an string with delimiter"); + /* for character set encoding support */ /* return database encoding name */ *** ./src/include/utils/builtins.h.orig 2009-12-19 02:32:44.000000000 +0100 --- ./src/include/utils/builtins.h 2009-12-25 15:34:14.347368993 +0100 *************** *** 664,669 **** --- 664,673 ---- extern Datum varchartypmodout(PG_FUNCTION_ARGS); extern Datum varchar(PG_FUNCTION_ARGS); + extern Datum listagg1_transfn(PG_FUNCTION_ARGS); + extern Datum listagg2_transfn(PG_FUNCTION_ARGS); + extern Datum listagg_finalfn(PG_FUNCTION_ARGS); + /* varlena.c */ extern text *cstring_to_text(const char *s); extern text *cstring_to_text_with_len(const char *s, int len); *** ./src/test/regress/expected/aggregates.out.orig 2009-12-15 18:57:47.000000000 +0100 --- ./src/test/regress/expected/aggregates.out 2009-12-25 17:38:53.000000000 +0100 *************** *** 799,801 **** --- 799,832 ---- ERROR: in an aggregate with DISTINCT, ORDER BY expressions must appear in argument list LINE 1: select aggfns(distinct a,a,c order by a,b) ^ + -- list agg test + select listagg(a) from (values('aaaa'),('bbbb'),('cccc')) g(a); + listagg + -------------- + aaaabbbbcccc + (1 row) + + select listagg(a,',') from (values('aaaa'),('bbbb'),('cccc')) g(a); + listagg + ---------------- + aaaa,bbbb,cccc + (1 row) + + select listagg(a,',') from (values('aaaa'),(null),('bbbb'),('cccc')) g(a); + listagg + ---------------- + aaaa,bbbb,cccc + (1 row) + + select listagg(a,',') from (values(null),(null),('bbbb'),('cccc')) g(a); + listagg + ----------- + bbbb,cccc + (1 row) + + select listagg(a,',') from (values(null),(null)) g(a); + listagg + --------- + + (1 row) + *** ./src/test/regress/sql/aggregates.sql.orig 2009-12-25 17:38:04.176369837 +0100 --- ./src/test/regress/sql/aggregates.sql 2009-12-25 17:38:17.211369992 +0100 *************** *** 355,357 **** --- 355,364 ---- from (values (1,1,'foo')) v(a,b,c), generate_series(1,2) i; select aggfns(distinct a,a,c order by a,b) from (values (1,1,'foo')) v(a,b,c), generate_series(1,2) i; + + -- list agg test + select listagg(a) from (values('aaaa'),('bbbb'),('cccc')) g(a); + select listagg(a,',') from (values('aaaa'),('bbbb'),('cccc')) g(a); + select listagg(a,',') from (values('aaaa'),(null),('bbbb'),('cccc')) g(a); + select listagg(a,',') from (values(null),(null),('bbbb'),('cccc')) g(a); + select listagg(a,',') from (values(null),(null)) g(a);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers