Hello
I am sending a prototype implementation of functions median and
percentile. This implementation is very simple and I moved it to
contrib for this moment - it is more easy maintainable. Later I'll
move it to core.
These functions are relative simple, there are not barrier for
implementation own specific mutations of this functions - so I propose
move to core only basic and well known form of these to core.
postgres=# select median(v) from generate_series(1,10) g(v);
median
────────
5.5
(1 row)
Time: 1.475 ms
postgres=# select percentile(v,50) from generate_series(1,10) g(v);
percentile
────────────
5
(1 row)
Time: 0.626 ms
This implementation is based on tuplesort and the speed is relative
well - the result from 1000000 rows is less 1 sec.
Regards
Pavel Stehule
*** ./contrib/median/Makefile.orig 2010-08-19 12:38:56.144777253 +0200
--- ./contrib/median/Makefile 2010-08-18 20:23:39.180156339 +0200
***************
*** 0 ****
--- 1,17 ----
+ # $PostgreSQL: pgsql/contrib/median/Makefile,v 1.1 2008/07/29 18:31:20 tgl Exp $
+
+ MODULES = median
+ DATA_built = median.sql
+ DATA = uninstall_median.sql
+ REGRESS = median
+
+ ifdef USE_PGXS
+ PG_CONFIG = pg_config
+ PGXS := $(shell $(PG_CONFIG) --pgxs)
+ include $(PGXS)
+ else
+ subdir = contrib/median
+ top_builddir = ../..
+ include $(top_builddir)/src/Makefile.global
+ include $(top_srcdir)/contrib/contrib-global.mk
+ endif
*** ./contrib/median/median.c.orig 2010-08-19 12:39:01.456650776 +0200
--- ./contrib/median/median.c 2010-08-19 12:35:32.104649418 +0200
***************
*** 0 ****
--- 1,244 ----
+ /*
+ * $PostgreSQL: pgsql/contrib/citext/citext.c,v 1.2 2009/06/11 14:48:50 momjian Exp $
+ */
+ #include "postgres.h"
+
+ #include "funcapi.h"
+ #include "miscadmin.h"
+ #include "catalog/pg_type.h"
+ #include "parser/parse_coerce.h"
+ #include "parser/parse_oper.h"
+ #include "utils/builtins.h"
+ #include "utils/tuplesort.h"
+
+ Datum median_transfn(PG_FUNCTION_ARGS);
+ Datum median_finalfn(PG_FUNCTION_ARGS);
+ Datum percentile_transfn(PG_FUNCTION_ARGS);
+ Datum percentile_finalfn(PG_FUNCTION_ARGS);
+
+
+ #ifdef PG_MODULE_MAGIC
+ PG_MODULE_MAGIC;
+ #endif
+
+ PG_FUNCTION_INFO_V1(median_transfn);
+ PG_FUNCTION_INFO_V1(median_finalfn);
+ PG_FUNCTION_INFO_V1(percentile_transfn);
+ PG_FUNCTION_INFO_V1(percentile_finalfn);
+
+
+ typedef struct
+ {
+ int nelems; /* number of valid entries */
+ Tuplesortstate *sortstate;
+ FmgrInfo cast_func_finfo;
+ int p; /* nth for percentille */
+ } StatAggState;
+
+ static StatAggState *
+ makeStatAggState(FunctionCallInfo fcinfo)
+ {
+ MemoryContext oldctx;
+ MemoryContext aggcontext;
+ StatAggState *aggstate;
+ Oid sortop,
+ castfunc;
+ Oid valtype;
+ CoercionPathType pathtype;
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "string_agg_transfn called in non-aggregate context");
+ }
+
+ oldctx = MemoryContextSwitchTo(aggcontext);
+
+ aggstate = (StatAggState *) palloc(sizeof(StatAggState));
+ aggstate->nelems = 0;
+
+ valtype = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ get_sort_group_operators(valtype,
+ true, false, false,
+ &sortop, NULL, NULL);
+
+ aggstate->sortstate = tuplesort_begin_datum(valtype,
+ sortop,
+ SORTBY_NULLS_DEFAULT,
+ work_mem, false);
+
+ MemoryContextSwitchTo(oldctx);
+
+ if (valtype != FLOAT8OID)
+ {
+ /* find a cast function */
+
+ pathtype = find_coercion_pathway(FLOAT8OID, valtype,
+ COERCION_EXPLICIT,
+ &castfunc);
+ if (pathtype == COERCION_PATH_FUNC)
+ {
+ Assert(OidIsValid(castfunc));
+ fmgr_info_cxt(castfunc, &aggstate->cast_func_finfo,
+ aggcontext);
+ }
+ else if (pathtype == COERCION_PATH_RELABELTYPE)
+ {
+ aggstate->cast_func_finfo.fn_oid = InvalidOid;
+ }
+ else
+ elog(ERROR, "no conversion function from %s %s",
+ format_type_be(valtype),
+ format_type_be(FLOAT8OID));
+ }
+
+ return aggstate;
+ }
+
+ /*
+ * append a non NULL value to tuplesort
+ */
+ Datum
+ median_transfn(PG_FUNCTION_ARGS)
+ {
+ StatAggState *aggstate;
+
+ aggstate = PG_ARGISNULL(0) ? NULL : (StatAggState *) PG_GETARG_POINTER(0);
+
+ if (!PG_ARGISNULL(1))
+ {
+ if (aggstate == NULL)
+ aggstate = makeStatAggState(fcinfo);
+
+ tuplesort_putdatum(aggstate->sortstate, PG_GETARG_DATUM(1), false);
+ aggstate->nelems++;
+ }
+
+ PG_RETURN_POINTER(aggstate);
+ }
+
+ static double
+ to_double(Datum value, FmgrInfo *cast_func_finfo)
+ {
+ if (cast_func_finfo->fn_oid != InvalidOid)
+ {
+ return DatumGetFloat8(FunctionCall1(cast_func_finfo, value));
+ }
+ else
+ return DatumGetFloat8(value);
+ }
+
+ Datum
+ median_finalfn(PG_FUNCTION_ARGS)
+ {
+ StatAggState *aggstate;
+
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ aggstate = PG_ARGISNULL(0) ? NULL : (StatAggState *) PG_GETARG_POINTER(0);
+
+ if (aggstate != NULL)
+ {
+ int lidx;
+ int hidx;
+ Datum value;
+ bool isNull;
+ int i = 1;
+ double result = 0;
+
+ hidx = aggstate->nelems / 2 + 1;
+ lidx = (aggstate->nelems + 1) / 2;
+
+ tuplesort_performsort(aggstate->sortstate);
+
+ while (tuplesort_getdatum(aggstate->sortstate,
+ true,
+ &value, &isNull))
+ {
+ if (i++ == lidx)
+ {
+ result = to_double(value, &aggstate->cast_func_finfo);
+
+ if (lidx != hidx)
+ {
+ tuplesort_getdatum(aggstate->sortstate,
+ true,
+ &value, &isNull);
+ result = (result + to_double(value, &aggstate->cast_func_finfo)) / 2.0;
+ }
+ break;
+ }
+ }
+
+ tuplesort_end(aggstate->sortstate);
+
+ PG_RETURN_FLOAT8(result);
+ }
+ else
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * append a non NULL value to tuplesort
+ */
+ Datum
+ percentile_transfn(PG_FUNCTION_ARGS)
+ {
+ StatAggState *aggstate;
+
+ aggstate = PG_ARGISNULL(0) ? NULL : (StatAggState *) PG_GETARG_POINTER(0);
+
+ if (!PG_ARGISNULL(1))
+ {
+ if (aggstate == NULL)
+ {
+ aggstate = makeStatAggState(fcinfo);
+ aggstate->p = PG_GETARG_INT32(2);
+ }
+
+ tuplesort_putdatum(aggstate->sortstate, PG_GETARG_DATUM(1), false);
+ aggstate->nelems++;
+ }
+
+ PG_RETURN_POINTER(aggstate);
+ }
+
+ Datum
+ percentile_finalfn(PG_FUNCTION_ARGS)
+ {
+ StatAggState *aggstate;
+
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ aggstate = PG_ARGISNULL(0) ? NULL : (StatAggState *) PG_GETARG_POINTER(0);
+
+ if (aggstate != NULL)
+ {
+ Datum value;
+ bool isNull;
+ int i = 1;
+ double result = 0;
+ int n;
+
+ n = ((aggstate->p / 100.0) * (aggstate->nelems - 1)) + 1;
+
+ tuplesort_performsort(aggstate->sortstate);
+
+ while (tuplesort_getdatum(aggstate->sortstate,
+ true,
+ &value, &isNull))
+ {
+ if (i++ == n)
+ {
+ result = to_double(value, &aggstate->cast_func_finfo);
+ break;
+ }
+ }
+
+ tuplesort_end(aggstate->sortstate);
+
+ PG_RETURN_FLOAT8(result);
+ }
+ else
+ PG_RETURN_NULL();
+ }
*** ./contrib/median/median.sql.in.orig 2010-08-19 12:39:06.192775857 +0200
--- ./contrib/median/median.sql.in 2010-08-19 12:28:24.230774219 +0200
***************
*** 0 ****
--- 1,31 ----
+ CREATE OR REPLACE FUNCTION median_transfn(internal, anyelement)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE;
+
+ CREATE OR REPLACE FUNCTION median_finalfn(internal)
+ RETURNS double precision
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE;
+
+ CREATE AGGREGATE median(anyelement) (
+ SFUNC=median_transfn,
+ STYPE=internal,
+ FINALFUNC=median_finalfn
+ );
+
+ CREATE OR REPLACE FUNCTION percentile_transfn(internal, anyelement, p integer)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE;
+
+ CREATE OR REPLACE FUNCTION percentile_finalfn(internal)
+ RETURNS double precision
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE;
+
+ CREATE AGGREGATE percentile(anyelement, int) (
+ SFUNC=percentile_transfn,
+ STYPE=internal,
+ FINALFUNC=percentile_finalfn
+ );
*** ./contrib/median/uninstall_median.sql.orig 2010-08-19 12:39:11.712777158 +0200
--- ./contrib/median/uninstall_median.sql 2010-08-19 12:37:25.800652539 +0200
***************
*** 0 ****
--- 1,6 ----
+ DROP FUNCTION median_transfn(internal, anyelement);
+ DROP FUNCTION median_finalfn(internal);
+ DROP AGGREGATE median(anyelement);
+ DROP FUNCTION percentile_transfn(internal, anyelement, p integer);
+ DROP FUNCTION percentile_finalfn(internal);
+ DROP AGGREGATE percentile(anyelement, int);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers