Just got a report on IRC of a bug in the array version of
percentile_cont; if two of the requested percentiles were between the
same pair of input rows, the result could be wrong or an error would
be generated.
e.g.
select percentile_cont(array[0.4,0.6]) within group (order by gs)
from generate_series(1,2) gs;
ERROR: missing row in percentile_cont
Proposed patch (against current master) attached.
--
Andrew (irc:RhodiumToad)
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 135a14b..1efbf07 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -919,7 +919,7 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo,
rownum = target_row;
}
- else
+ else if (target_row == rownum)
{
/*
* We are already at the target row, so we must previously
@@ -928,15 +928,23 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo,
first_val = second_val;
}
- /* Fetch second_row if needed */
- if (need_lerp)
+ /*
+ * We might be lerping between the same two rows of input as the
+ * previous value. If so, rownum will already be ahead of
+ * target_row, and we skip any attempt to read more.
+ */
+ if (target_row == rownum)
{
-if (!tuplesort_getdatum(osastate-sortstate, true, second_val, isnull) || isnull)
- elog(ERROR, missing row in percentile_cont);
-rownum++;
+/* Fetch second_row if needed */
+if (need_lerp)
+{
+ if (!tuplesort_getdatum(osastate-sortstate, true, second_val, isnull) || isnull)
+ elog(ERROR, missing row in percentile_cont);
+ rownum++;
+}
+else
+ second_val = first_val;
}
- else
-second_val = first_val;
/* Compute appropriate result */
if (need_lerp)
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 58df854..40f5398 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1423,11 +1423,11 @@ from tenk1;
{{NULL,999,499},{749,249,NULL}}
(1 row)
-select percentile_cont(array[0,1,0.25,0.75,0.5,1]) within group (order by x)
+select percentile_cont(array[0,1,0.25,0.75,0.5,1,0.3,0.32,0.35,0.38,0.4]) within group (order by x)
from generate_series(1,6) x;
-percentile_cont
- {1,6,2.25,4.75,3.5,6}
+ percentile_cont
+--
+ {1,6,2.25,4.75,3.5,6,2.5,2.6,2.75,2.9,3}
(1 row)
select ten, mode() within group (order by string4) from tenk1 group by ten;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 8096a6f..a84327d 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -533,7 +533,7 @@ select percentile_cont(array[0,0.25,0.5,0.75,1]) within group (order by thousand
from tenk1;
select percentile_disc(array[[null,1,0.5],[0.75,0.25,null]]) within group (order by thousand)
from tenk1;
-select percentile_cont(array[0,1,0.25,0.75,0.5,1]) within group (order by x)
+select percentile_cont(array[0,1,0.25,0.75,0.5,1,0.3,0.32,0.35,0.38,0.4]) within group (order by x)
from generate_series(1,6) x;
select ten, mode() within group (order by string4) from tenk1 group by ten;
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers