sorry
little bit fixed patch
Pavel
2010/9/23 Pavel Stehule <[email protected]>:
> Hello
>
> I moved a "median" function to core.
>
> + doc part
> + regress test
>
> Regards
>
> Pavel Stehule
>
>
> 2010/9/20 Hitoshi Harada <[email protected]>:
>> 2010/8/19 Pavel Stehule <[email protected]>:
>>> Hello
>>>
>>> I am sending a prototype implementation of functions median and
>>> percentile. This implementation is very simple and I moved it to
>>> contrib for this moment - it is more easy maintainable. Later I'll
>>> move it to core.
>>
>> I've reviewed this patch.
>>
>> * The patch can apply cleanly and make doesn't print any errors nor
>> warnings. But it doesn't touch contrib/Makefile so I had to make by
>> changing dir to contrib/median.
>>
>> * Cosmetic coding style should be more appropriate, including trailing
>> white spaces and indentation positions.
>>
>> * Since these two aggregates use tuplesort inside it, there're
>> possible risk to cause out of memory in normal use case. I don't think
>> this fact is critical, but at least some notation should be referred
>> in docs.
>>
>> * It doesn't contain any document nor regression tests.
>>
>> * They should be callable in window function context; for example
>>
>> contrib_regression=# select median(a) over (order by a) from t1;
>> ERROR: invalid tuplesort state
>>
>> or at least user-friend message should be printed.
>>
>> * The returned type is controversy. median(int) returns float8 as the
>> patch intended, but avg(int) returns numeric. AFAIK only avg(float8)
>> returns float8.
>>
>> * percentile() is more problematic; first, the second argument for the
>> aggregate takes N of "N%ile" as int, like 50 if you want 50%ile, but
>> it doesn't care about negative values or more than 100. In addition,
>> the second argument is taken at the first non-NULL value of the first
>> argument, but the second argument is semantically constant. For
>> example, for (1.. 10) value of a in table t1,
>>
>> contrib_regression=# select percentile(a, a * 10 order by a) from t1;
>> percentile
>> ------------
>> 1
>> (1 row)
>>
>> contrib_regression=# select percentile(a, a * 10 order by a desc) from t1;
>> percentile
>> ------------
>> 10
>> (1 row)
>>
>> and if the argument comes from the subquery which doesn't contain
>> ORDER BY clause, you cannot predict the result.
>>
>> That's all of my quick review up to now.
>>
>> Regards,
>>
>> --
>> Hitoshi Harada
>>
>
*** ./doc/src/sgml/func.sgml.orig 2010-08-17 06:37:20.000000000 +0200
--- ./doc/src/sgml/func.sgml 2010-09-23 15:03:10.021576906 +0200
***************
*** 10304,10309 ****
--- 10304,10329 ----
<row>
<entry>
+ <indexterm>
+ <primary>Arithmetic median</primary>
+ <secondary>median</secondary>
+ </indexterm>
+ <function>median(<replaceable class="parameter">expression</replaceable>)</function>
+ </entry>
+ <entry>
+ <type>smallint</type>, <type>int</type>,
+ <type>bigint</type>, <type>real</type>, <type>double
+ precision</type>, or <type>numeric</type>
+ </entry>
+ <entry>
+ <type>double precision</type> for floating-point arguments,
+ otherwise <type>numeric</type>
+ </entry>
+ <entry>arithmetic median</entry>
+ </row>
+
+ <row>
+ <entry>
<function>regr_avgx(<replaceable class="parameter">Y</replaceable>, <replaceable class="parameter">X</replaceable>)</function>
</entry>
<entry>
*** ./src/backend/utils/adt/numeric.c.orig 2010-08-04 19:33:09.000000000 +0200
--- ./src/backend/utils/adt/numeric.c 2010-09-23 15:24:04.025453940 +0200
***************
*** 30,39 ****
--- 30,42 ----
#include "catalog/pg_type.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
+ #include "parser/parse_coerce.h"
+ #include "parser/parse_oper.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/int8.h"
#include "utils/numeric.h"
+ #include "utils/tuplesort.h"
/* ----------
* Uncomment the following to enable compilation of dump_numeric()
***************
*** 144,149 ****
--- 147,161 ----
union NumericChoice choice; /* choice of format */
};
+ /*
+ * used as type of state variable median's function
+ */
+ typedef struct
+ {
+ int nelems; /* number of valid entries */
+ Tuplesortstate *sortstate;
+ FmgrInfo cast_func_finfo;
+ } MedianAggState;
/*
* Interpretation of high bits.
***************
*** 6173,6175 ****
--- 6185,6467 ----
var->digits = digits;
var->ndigits = ndigits;
}
+
+ static MedianAggState *
+ makeMedianAggState(FunctionCallInfo fcinfo, Oid valtype, Oid targettype)
+ {
+ MemoryContext oldctx;
+ MemoryContext aggcontext;
+ MedianAggState *aggstate;
+ Oid sortop,
+ castfunc;
+ CoercionPathType pathtype;
+
+ /*
+ * We cannot to allow a median function under WindowAgg State.
+ * This content needs a repetetive a calling of final function,
+ * what isn't possible now with using a tuplestore.
+ * tuplesort_performsort can be called only once, and some has
+ * to call a tuplesort_end.
+ */
+ if (fcinfo->context && IsA(fcinfo->context, WindowAggState))
+ {
+ /* cannot be called inside like windowAggregates */
+ elog(ERROR, "median_transfn called as windows aggregates");
+ }
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "median_transfn called in non-aggregate context");
+ }
+
+ oldctx = MemoryContextSwitchTo(aggcontext);
+
+ aggstate = (MedianAggState *) palloc0(sizeof(MedianAggState));
+
+ valtype = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ get_sort_group_operators(valtype,
+ true, false, false,
+ &sortop, NULL, NULL);
+
+ aggstate->sortstate = tuplesort_begin_datum(valtype,
+ sortop,
+ SORTBY_NULLS_DEFAULT,
+ work_mem, true);
+
+ MemoryContextSwitchTo(oldctx);
+
+ if (valtype != targettype)
+ {
+ /* find a cast function */
+ pathtype = find_coercion_pathway(targettype, valtype,
+ COERCION_EXPLICIT,
+ &castfunc);
+ if (pathtype == COERCION_PATH_FUNC)
+ {
+ Assert(OidIsValid(castfunc));
+ fmgr_info_cxt(castfunc, &aggstate->cast_func_finfo,
+ aggcontext);
+ }
+ else if (pathtype == COERCION_PATH_RELABELTYPE)
+ {
+ aggstate->cast_func_finfo.fn_oid = InvalidOid;
+ }
+ else
+ elog(ERROR, "no conversion function from %s %s",
+ format_type_be(valtype),
+ format_type_be(targettype));
+ }
+
+ return aggstate;
+ }
+
+ /*
+ * append a non NULL value to tuplesort
+ */
+ static Datum
+ common_median_transfn(FunctionCallInfo fcinfo, Oid typoid, Oid targetoid)
+ {
+ MedianAggState *aggstate;
+
+ aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0);
+
+ if (!PG_ARGISNULL(1))
+ {
+ if (aggstate == NULL)
+ aggstate = makeMedianAggState(fcinfo, typoid, targetoid);
+
+ tuplesort_putdatum(aggstate->sortstate, PG_GETARG_DATUM(1), false);
+ aggstate->nelems++;
+ }
+
+ PG_RETURN_POINTER(aggstate);
+ }
+
+ /*
+ * just wrappers to be opr sanity checks happy
+ */
+ Datum
+ median_numeric_transfn(PG_FUNCTION_ARGS)
+ {
+ return common_median_transfn(fcinfo,
+ NUMERICOID, NUMERICOID);
+ }
+
+ Datum
+ median_int8_transfn(PG_FUNCTION_ARGS)
+ {
+ return common_median_transfn(fcinfo,
+ INT8OID, NUMERICOID);
+ }
+
+ Datum
+ median_int4_transfn(PG_FUNCTION_ARGS)
+ {
+ return common_median_transfn(fcinfo,
+ INT4OID, NUMERICOID);
+ }
+
+ Datum
+ median_int2_transfn(PG_FUNCTION_ARGS)
+ {
+ return common_median_transfn(fcinfo,
+ INT2OID, NUMERICOID);
+ }
+
+ Datum
+ median_double_transfn(PG_FUNCTION_ARGS)
+ {
+ return common_median_transfn(fcinfo,
+ FLOAT8OID, FLOAT8OID);
+ }
+
+ Datum
+ median_float_transfn(PG_FUNCTION_ARGS)
+ {
+ return common_median_transfn(fcinfo,
+ FLOAT4OID, FLOAT8OID);
+ }
+
+ /*
+ * Used for reading values from tuplesort. The value has to be
+ * double or cast function is defined (and used).
+ */
+ static double
+ to_double(Datum value, FmgrInfo *cast_func_finfo)
+ {
+ /* when valtype is same as target type, returns directly */
+ if (cast_func_finfo->fn_oid == InvalidOid)
+ return DatumGetFloat8(value);
+
+ return DatumGetFloat8(FunctionCall1(cast_func_finfo, value));
+ }
+
+ /*
+ * Used as final function for median when result is double.
+ */
+ Datum
+ median_finalfn_double(PG_FUNCTION_ARGS)
+ {
+ MedianAggState *aggstate;
+
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0);
+
+ if (aggstate != NULL)
+ {
+ int lidx;
+ int hidx;
+ Datum value;
+ bool isNull;
+ int i = 1;
+ double result = 0;
+
+ hidx = aggstate->nelems / 2 + 1;
+ lidx = (aggstate->nelems + 1) / 2;
+
+ tuplesort_performsort(aggstate->sortstate);
+
+ while (tuplesort_getdatum(aggstate->sortstate,
+ true,
+ &value, &isNull))
+ {
+ if (i++ == lidx)
+ {
+ result = to_double(value, &aggstate->cast_func_finfo);
+
+ if (lidx != hidx)
+ {
+ tuplesort_getdatum(aggstate->sortstate,
+ true,
+ &value, &isNull);
+ result = (result + to_double(value, &aggstate->cast_func_finfo)) / 2.0;
+ }
+ break;
+ }
+ }
+
+ tuplesort_end(aggstate->sortstate);
+
+ PG_RETURN_FLOAT8(result);
+ }
+ else
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * Used for reading values from tuplesort. The value has to be
+ * Numeric or cast function is defined (and used).
+ */
+ static Numeric
+ to_numeric(Datum value, FmgrInfo *cast_func_finfo)
+ {
+ /* when valtype is same as target type, returns directly */
+ if (cast_func_finfo->fn_oid == InvalidOid)
+ return DatumGetNumeric(value);
+
+ return DatumGetNumeric(FunctionCall1(cast_func_finfo, value));
+ }
+
+ /*
+ * Used as final function for median when result is numeric.
+ */
+ Datum
+ median_finalfn_numeric(PG_FUNCTION_ARGS)
+ {
+ MedianAggState *aggstate;
+
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ aggstate = PG_ARGISNULL(0) ? NULL : (MedianAggState *) PG_GETARG_POINTER(0);
+
+ if (aggstate != NULL)
+ {
+ int lidx;
+ int hidx;
+ Datum value;
+ bool isNull;
+ int i = 1;
+ Numeric result = NULL; /* be compiler quiet */
+
+ hidx = aggstate->nelems / 2 + 1;
+ lidx = (aggstate->nelems + 1) / 2;
+
+ tuplesort_performsort(aggstate->sortstate);
+
+ while (tuplesort_getdatum(aggstate->sortstate,
+ true,
+ &value, &isNull))
+ {
+ if (i++ == lidx)
+ {
+ result = to_numeric(value, &aggstate->cast_func_finfo);
+
+ if (lidx != hidx)
+ {
+ Numeric stack;
+
+ tuplesort_getdatum(aggstate->sortstate,
+ true,
+ &value, &isNull);
+ stack = to_numeric(value, &aggstate->cast_func_finfo);
+ stack = DatumGetNumeric(DirectFunctionCall2(numeric_add,
+ NumericGetDatum(stack),
+ NumericGetDatum(result)));
+ result = DatumGetNumeric(DirectFunctionCall2(numeric_div,
+ NumericGetDatum(stack),
+ DirectFunctionCall1(float4_numeric,
+ Float4GetDatum(2.0))));
+ }
+ break;
+ }
+ }
+
+ tuplesort_end(aggstate->sortstate);
+
+ PG_RETURN_NUMERIC(result);
+ }
+ else
+ PG_RETURN_NULL();
+ }
*** ./src/include/catalog/pg_aggregate.h.orig 2010-08-05 20:21:17.000000000 +0200
--- ./src/include/catalog/pg_aggregate.h 2010-09-23 14:32:25.572576527 +0200
***************
*** 226,231 ****
--- 226,239 ----
/* text */
DATA(insert ( 3538 string_agg_transfn string_agg_finalfn 0 2281 _null_ ));
+ /* median */
+ DATA(insert ( 3123 median_double_transfn median_finalfn_double 0 2281 _null_ ));
+ DATA(insert ( 3124 median_float_transfn median_finalfn_double 0 2281 _null_ ));
+ DATA(insert ( 3125 median_numeric_transfn median_finalfn_numeric 0 2281 _null_ ));
+ DATA(insert ( 3126 median_int8_transfn median_finalfn_numeric 0 2281 _null_ ));
+ DATA(insert ( 3127 median_int4_transfn median_finalfn_numeric 0 2281 _null_ ));
+ DATA(insert ( 3128 median_int2_transfn median_finalfn_numeric 0 2281 _null_ ));
+
/*
* prototypes for functions in pg_aggregate.c
*/
*** ./src/include/catalog/pg_proc.h.orig 2010-08-13 20:36:25.000000000 +0200
--- ./src/include/catalog/pg_proc.h 2010-09-23 14:36:55.381451421 +0200
***************
*** 4823,4828 ****
--- 4823,4857 ----
DATA(insert OID = 3114 ( nth_value PGNSP PGUID 12 1 0 0 f t f t f i 2 0 2283 "2283 23" _null_ _null_ _null_ _null_ window_nth_value _null_ _null_ _null_ ));
DESCR("fetch the Nth row value");
+ /* median aggregate */
+ DATA(insert OID = 3115 ( median_numeric_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 1700" _null_ _null_ _null_ _null_ median_numeric_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for numeric");
+ DATA(insert OID = 3116 ( median_int8_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 20" _null_ _null_ _null_ _null_ median_int8_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for bigint");
+ DATA(insert OID = 3117 ( median_int4_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 23" _null_ _null_ _null_ _null_ median_int4_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for int");
+ DATA(insert OID = 3118 ( median_int2_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 21" _null_ _null_ _null_ _null_ median_int2_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for smallint");
+ DATA(insert OID = 3119 ( median_double_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 701" _null_ _null_ _null_ _null_ median_double_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for double precision");
+ DATA(insert OID = 3120 ( median_float_transfn PGNSP PGUID 12 1 0 0 f f f f f i 2 0 2281 "2281 700" _null_ _null_ _null_ _null_ median_float_transfn _null_ _null_ _null_ ));
+ DESCR("median transident function for real");
+ DATA(insert OID = 3121 ( median_finalfn_double PGNSP PGUID 12 1 0 0 f f f f f i 1 0 701 "2281" _null_ _null_ _null_ _null_ median_finalfn_double _null_ _null_ _null_ ));
+ DESCR("median final function with double precision result");
+ DATA(insert OID = 3122 ( median_finalfn_numeric PGNSP PGUID 12 1 0 0 f f f f f i 1 0 1700 "2281" _null_ _null_ _null_ _null_ median_finalfn_numeric _null_ _null_ _null_ ));
+ DESCR("median final function with numeric result");
+ DATA(insert OID = 3123 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 701 "701" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3124 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 701 "700" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3125 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "1700" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3126 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "20" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3127 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "23" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
+ DATA(insert OID = 3128 ( median PGNSP PGUID 12 1 0 0 t f f f f i 1 0 1700 "21" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
+ DESCR("median aggregate");
/*
* Symbolic values for provolatile column: these indicate whether the result
*** ./src/include/utils/builtins.h.orig 2010-08-10 23:51:00.000000000 +0200
--- ./src/include/utils/builtins.h 2010-09-23 14:22:16.929453299 +0200
***************
*** 924,929 ****
--- 924,937 ----
extern Datum int8_avg(PG_FUNCTION_ARGS);
extern Datum width_bucket_numeric(PG_FUNCTION_ARGS);
extern Datum hash_numeric(PG_FUNCTION_ARGS);
+ extern Datum median_numeric_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_int8_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_int4_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_int2_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_double_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_float_transfn(PG_FUNCTION_ARGS);
+ extern Datum median_finalfn_double(PG_FUNCTION_ARGS);
+ extern Datum median_finalfn_numeric(PG_FUNCTION_ARGS);
/* ri_triggers.c */
extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);
*** ./src/test/regress/expected/numeric.out.orig 2009-08-10 20:29:27.000000000 +0200
--- ./src/test/regress/expected/numeric.out 2010-09-23 14:44:18.000000000 +0200
***************
*** 1372,1374 ****
--- 1372,1402 ----
12345678901234567890
(1 row)
+ -- median test
+ create table median_test (a int, b bigint, c smallint, d numeric, e double precision, f real);
+ insert into median_test select i,i,i,i,i,i from generate_series(1,10) g(i);
+ select median(a),
+ median(b),
+ median(c),
+ median(d),
+ median(e),
+ median(f) from median_test;
+ median | median | median | median | median | median
+ --------------------+--------------------+--------------------+--------------------+--------+--------
+ 5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 | 5.5000000000000000 | 5.5 | 5.5
+ (1 row)
+
+ truncate table median_test;
+ insert into median_test select i,i,i,i,i,i from generate_series(1,11) g(i);
+ select median(a),
+ median(b),
+ median(c),
+ median(d),
+ median(e),
+ median(f) from median_test;
+ median | median | median | median | median | median
+ --------+--------+--------+--------+--------+--------
+ 6 | 6 | 6 | 6 | 6 | 6
+ (1 row)
+
+ drop table median_test;
*** ./src/test/regress/sql/numeric.sql.orig 2009-08-10 20:29:27.000000000 +0200
--- ./src/test/regress/sql/numeric.sql 2010-09-23 14:49:45.480455045 +0200
***************
*** 824,826 ****
--- 824,845 ----
select 12345678901234567890 / 123;
select div(12345678901234567890, 123);
select div(12345678901234567890, 123) * 123 + 12345678901234567890 % 123;
+
+ -- median test
+ create table median_test (a int, b bigint, c smallint, d numeric, e double precision, f real);
+ insert into median_test select i,i,i,i,i,i from generate_series(1,10) g(i);
+ select median(a),
+ median(b),
+ median(c),
+ median(d),
+ median(e),
+ median(f) from median_test;
+ truncate table median_test;
+ insert into median_test select i,i,i,i,i,i from generate_series(1,11) g(i);
+ select median(a),
+ median(b),
+ median(c),
+ median(d),
+ median(e),
+ median(f) from median_test;
+ drop table median_test;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers