Hello

I am sending a prototype implementation of functions median and
percentile. This implementation is very simple and I moved it to
contrib for this moment - it is more easy maintainable. Later I'll
move it to core.

These functions are relative simple, there are not barrier for
implementation own specific mutations of this functions - so I propose
move to core only basic and well known form of these to core.

postgres=# select median(v) from generate_series(1,10) g(v);
 median
────────
    5.5
(1 row)

Time: 1.475 ms
postgres=# select percentile(v,50) from generate_series(1,10) g(v);
 percentile
────────────
          5
(1 row)

Time: 0.626 ms

This implementation is based on tuplesort and the speed is relative
well - the result from 1000000 rows is less 1 sec.

Regards

Pavel Stehule
*** ./contrib/median/Makefile.orig	2010-08-19 12:38:56.144777253 +0200
--- ./contrib/median/Makefile	2010-08-18 20:23:39.180156339 +0200
***************
*** 0 ****
--- 1,17 ----
+ # $PostgreSQL: pgsql/contrib/median/Makefile,v 1.1 2008/07/29 18:31:20 tgl Exp $
+ 
+ MODULES = median
+ DATA_built = median.sql
+ DATA = uninstall_median.sql
+ REGRESS = median
+ 
+ ifdef USE_PGXS
+ PG_CONFIG = pg_config
+ PGXS := $(shell $(PG_CONFIG) --pgxs)
+ include $(PGXS)
+ else
+ subdir = contrib/median
+ top_builddir = ../..
+ include $(top_builddir)/src/Makefile.global
+ include $(top_srcdir)/contrib/contrib-global.mk
+ endif
*** ./contrib/median/median.c.orig	2010-08-19 12:39:01.456650776 +0200
--- ./contrib/median/median.c	2010-08-19 12:35:32.104649418 +0200
***************
*** 0 ****
--- 1,244 ----
+ /*
+  * $PostgreSQL: pgsql/contrib/citext/citext.c,v 1.2 2009/06/11 14:48:50 momjian Exp $
+  */
+ #include "postgres.h"
+ 
+ #include "funcapi.h"
+ #include "miscadmin.h"
+ #include "catalog/pg_type.h"
+ #include "parser/parse_coerce.h"
+ #include "parser/parse_oper.h"
+ #include "utils/builtins.h"
+ #include "utils/tuplesort.h"
+ 
+ Datum median_transfn(PG_FUNCTION_ARGS);
+ Datum median_finalfn(PG_FUNCTION_ARGS);
+ Datum percentile_transfn(PG_FUNCTION_ARGS);
+ Datum percentile_finalfn(PG_FUNCTION_ARGS);
+ 
+ 
+ #ifdef PG_MODULE_MAGIC
+ PG_MODULE_MAGIC;
+ #endif
+ 
+ PG_FUNCTION_INFO_V1(median_transfn);
+ PG_FUNCTION_INFO_V1(median_finalfn);
+ PG_FUNCTION_INFO_V1(percentile_transfn);
+ PG_FUNCTION_INFO_V1(percentile_finalfn);
+ 
+ 
+ typedef struct
+ {
+ 	int	nelems;		/* number of valid entries */
+ 	Tuplesortstate *sortstate;
+ 	FmgrInfo	cast_func_finfo;
+ 	int	p;		/* nth for percentille */
+ } StatAggState;
+ 
+ static StatAggState *
+ makeStatAggState(FunctionCallInfo fcinfo)
+ {
+ 	MemoryContext oldctx;
+ 	MemoryContext aggcontext;
+ 	StatAggState *aggstate;
+ 	Oid	sortop,
+ 			castfunc;
+ 	Oid	   valtype;
+ 	CoercionPathType		pathtype;
+ 
+ 	if (!AggCheckCallContext(fcinfo, &aggcontext))
+ 	{
+ 		/* cannot be called directly because of internal-type argument */
+ 		elog(ERROR, "string_agg_transfn called in non-aggregate context");
+ 	}
+ 
+ 	oldctx = MemoryContextSwitchTo(aggcontext);
+ 	
+ 	aggstate = (StatAggState *) palloc(sizeof(StatAggState));
+ 	aggstate->nelems = 0;
+ 	
+ 	valtype = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ 	get_sort_group_operators(valtype,
+ 						    true, false, false,
+ 						    &sortop, NULL, NULL);
+ 
+ 	aggstate->sortstate = tuplesort_begin_datum(valtype,
+ 							sortop,
+ 							SORTBY_NULLS_DEFAULT,
+ 							work_mem, false);
+ 
+ 	MemoryContextSwitchTo(oldctx);
+ 
+ 	if (valtype != FLOAT8OID)
+ 	{
+ 		/* find a cast function */
+ 	
+ 		pathtype = find_coercion_pathway(FLOAT8OID, valtype,
+ 									COERCION_EXPLICIT,
+ 									&castfunc);
+ 		if (pathtype == COERCION_PATH_FUNC)
+ 		{
+ 			Assert(OidIsValid(castfunc));
+ 			fmgr_info_cxt(castfunc, &aggstate->cast_func_finfo,
+ 									    aggcontext);
+ 		} 
+ 		else if (pathtype == COERCION_PATH_RELABELTYPE)
+ 		{
+ 			aggstate->cast_func_finfo.fn_oid = InvalidOid;
+ 		}
+ 		else 
+ 			elog(ERROR, "no conversion function from %s %s",
+ 					 format_type_be(valtype),
+ 					 format_type_be(FLOAT8OID));
+ 	}
+ 
+ 	return aggstate;
+ }
+ 
+ /*
+  *  append a non NULL value to tuplesort
+  */
+ Datum
+ median_transfn(PG_FUNCTION_ARGS)
+ {
+ 	StatAggState *aggstate;
+ 	
+ 	aggstate = PG_ARGISNULL(0) ? NULL : (StatAggState *) PG_GETARG_POINTER(0);
+ 	
+ 	if (!PG_ARGISNULL(1))
+ 	{
+ 		if (aggstate == NULL)
+ 			aggstate = makeStatAggState(fcinfo);
+ 		
+ 		tuplesort_putdatum(aggstate->sortstate, PG_GETARG_DATUM(1), false);
+ 		aggstate->nelems++;
+ 	}
+ 
+ 	PG_RETURN_POINTER(aggstate);
+ }
+ 
+ static double 
+ to_double(Datum value, FmgrInfo *cast_func_finfo)
+ {
+ 	if (cast_func_finfo->fn_oid != InvalidOid)
+ 	{
+ 		return DatumGetFloat8(FunctionCall1(cast_func_finfo, value));
+ 	}
+ 	else
+ 		return DatumGetFloat8(value);
+ }
+ 
+ Datum
+ median_finalfn(PG_FUNCTION_ARGS)
+ {
+ 	StatAggState *aggstate;
+ 	
+ 	Assert(AggCheckCallContext(fcinfo, NULL));
+ 
+ 	aggstate = PG_ARGISNULL(0) ? NULL : (StatAggState *) PG_GETARG_POINTER(0);
+ 
+ 	if (aggstate != NULL)
+ 	{
+ 		int	lidx;
+ 		int	hidx;
+ 		Datum	   value;
+ 		bool	isNull;
+ 		int		i = 1;
+ 		double	result = 0;
+ 		
+ 		hidx = aggstate->nelems / 2 + 1;
+ 		lidx = (aggstate->nelems + 1) / 2;
+ 
+ 		tuplesort_performsort(aggstate->sortstate);
+ 		
+ 		while (tuplesort_getdatum(aggstate->sortstate, 
+ 								true,
+ 								      &value, &isNull))
+ 		{
+ 			if (i++ == lidx)
+ 			{
+ 				result = to_double(value, &aggstate->cast_func_finfo);
+ 				
+ 				if (lidx != hidx)
+ 				{
+ 					tuplesort_getdatum(aggstate->sortstate,
+ 										true,
+ 										      &value, &isNull);
+ 					result = (result + to_double(value, &aggstate->cast_func_finfo)) / 2.0;
+ 				}
+ 				break;
+ 			}
+ 		}
+ 		
+ 		tuplesort_end(aggstate->sortstate);
+ 
+ 		PG_RETURN_FLOAT8(result);
+ 	}
+ 	else
+ 		PG_RETURN_NULL();
+ }
+ 
+ /*
+  *  append a non NULL value to tuplesort
+  */
+ Datum
+ percentile_transfn(PG_FUNCTION_ARGS)
+ {
+ 	StatAggState *aggstate;
+ 	
+ 	aggstate = PG_ARGISNULL(0) ? NULL : (StatAggState *) PG_GETARG_POINTER(0);
+ 	
+ 	if (!PG_ARGISNULL(1))
+ 	{
+ 		if (aggstate == NULL)
+ 		{
+ 			aggstate = makeStatAggState(fcinfo);
+ 			aggstate->p = PG_GETARG_INT32(2);
+ 		}
+ 		
+ 		tuplesort_putdatum(aggstate->sortstate, PG_GETARG_DATUM(1), false);
+ 		aggstate->nelems++;
+ 	}
+ 
+ 	PG_RETURN_POINTER(aggstate);
+ }
+ 
+ Datum
+ percentile_finalfn(PG_FUNCTION_ARGS)
+ {
+ 	StatAggState *aggstate;
+ 	
+ 	Assert(AggCheckCallContext(fcinfo, NULL));
+ 
+ 	aggstate = PG_ARGISNULL(0) ? NULL : (StatAggState *) PG_GETARG_POINTER(0);
+ 
+ 	if (aggstate != NULL)
+ 	{
+ 		Datum	   value;
+ 		bool	isNull;
+ 		int		i = 1;
+ 		double	result = 0;
+ 		int 		n;
+ 		
+ 		n = ((aggstate->p / 100.0) * (aggstate->nelems - 1)) + 1;
+ 	
+ 		tuplesort_performsort(aggstate->sortstate);
+ 		
+ 		while (tuplesort_getdatum(aggstate->sortstate, 
+ 								true,
+ 								      &value, &isNull))
+ 		{
+ 			if (i++ == n)
+ 			{
+ 				result = to_double(value, &aggstate->cast_func_finfo);
+ 				break;
+ 			}
+ 		}
+ 		
+ 		tuplesort_end(aggstate->sortstate);
+ 
+ 		PG_RETURN_FLOAT8(result);
+ 	}
+ 	else
+ 		PG_RETURN_NULL();
+ }
*** ./contrib/median/median.sql.in.orig	2010-08-19 12:39:06.192775857 +0200
--- ./contrib/median/median.sql.in	2010-08-19 12:28:24.230774219 +0200
***************
*** 0 ****
--- 1,31 ----
+ CREATE OR REPLACE FUNCTION median_transfn(internal, anyelement)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE;
+ 
+ CREATE OR REPLACE FUNCTION median_finalfn(internal)
+ RETURNS double precision
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE;
+ 
+ CREATE AGGREGATE median(anyelement) (
+     SFUNC=median_transfn,
+     STYPE=internal,
+     FINALFUNC=median_finalfn
+ );
+ 
+ CREATE OR REPLACE FUNCTION percentile_transfn(internal, anyelement, p integer)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE;
+ 
+ CREATE OR REPLACE FUNCTION percentile_finalfn(internal)
+ RETURNS double precision
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE;
+ 
+ CREATE AGGREGATE percentile(anyelement, int) (
+     SFUNC=percentile_transfn,
+     STYPE=internal,
+     FINALFUNC=percentile_finalfn
+ );
*** ./contrib/median/uninstall_median.sql.orig	2010-08-19 12:39:11.712777158 +0200
--- ./contrib/median/uninstall_median.sql	2010-08-19 12:37:25.800652539 +0200
***************
*** 0 ****
--- 1,6 ----
+ DROP FUNCTION median_transfn(internal, anyelement);
+ DROP FUNCTION median_finalfn(internal);
+ DROP AGGREGATE median(anyelement);
+ DROP FUNCTION percentile_transfn(internal, anyelement, p integer);
+ DROP FUNCTION percentile_finalfn(internal);
+ DROP AGGREGATE percentile(anyelement, int);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to