Changeset: e42d4c46818c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/e42d4c46818c
Modified Files:
clients/Tests/MAL-signatures-hge.test
clients/Tests/MAL-signatures.test
monetdb5/modules/kernel/aggr.c
sql/backends/monet5/rel_bin.c
sql/backends/monet5/rel_physical.c
sql/server/rel_select.c
sql/server/sql_semantic.c
sql/test/BugTracker-2015/Tests/quantile_function_resolution.Bug-3773.test
Branch: default
Log Message:
- created new quantile variants with only a single argument precentage
- make sure these new quantiles get used, ie no project with const.
- changed aggregate time resolution, ie when we find an aggregate function with
the first argument type matching, we try to upgrade the second
This reduces (auto) casts. Also solved bug in return types of _dup_subaggr.
- in the rel_add_orderby, properly order (using nils first and ascending).
and reuse ordered columns as much as possible
diffs (truncated from 369 to 300 lines):
diff --git a/clients/Tests/MAL-signatures-hge.test
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -1019,11 +1019,21 @@ command aggr.quantile(X_0:bat[:any_1], X
AGGRquantile;
Quantile aggregate
aggr
+quantile
+command aggr.quantile(X_0:bat[:any_1], X_1:dbl):any_1
+AGGRquantile_cst;
+Quantile aggregate
+aggr
quantile_avg
command aggr.quantile_avg(X_0:bat[:any_1], X_1:bat[:dbl]):dbl
AGGRquantile_avg;
Quantile aggregate
aggr
+quantile_avg
+command aggr.quantile_avg(X_0:bat[:any_1], X_1:dbl):dbl
+AGGRquantile_avg_cst;
+Quantile aggregate
+aggr
stdev
command aggr.stdev(X_0:bat[:bte], X_1:bat[:oid], X_2:bat[:any_1]):bat[:dbl]
AGGRstdev3_dbl;
diff --git a/clients/Tests/MAL-signatures.test
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -839,11 +839,21 @@ command aggr.quantile(X_0:bat[:any_1], X
AGGRquantile;
Quantile aggregate
aggr
+quantile
+command aggr.quantile(X_0:bat[:any_1], X_1:dbl):any_1
+AGGRquantile_cst;
+Quantile aggregate
+aggr
quantile_avg
command aggr.quantile_avg(X_0:bat[:any_1], X_1:bat[:dbl]):dbl
AGGRquantile_avg;
Quantile aggregate
aggr
+quantile_avg
+command aggr.quantile_avg(X_0:bat[:any_1], X_1:dbl):dbl
+AGGRquantile_avg_cst;
+Quantile aggregate
+aggr
stdev
command aggr.stdev(X_0:bat[:bte], X_1:bat[:oid], X_2:bat[:any_1]):bat[:dbl]
AGGRstdev3_dbl;
diff --git a/monetdb5/modules/kernel/aggr.c b/monetdb5/modules/kernel/aggr.c
--- a/monetdb5/modules/kernel/aggr.c
+++ b/monetdb5/modules/kernel/aggr.c
@@ -17,12 +17,12 @@
* grouped aggregates
*/
static str
-AGGRgrouped(bat *retval1, bat *retval2, const bat *bid, const bat *gid, const
bat *eid, const bat *sid,
+AGGRgrouped_bat_or_val(bat *retval1, bat *retval2, const bat *bid, const bat
*gid, const bat *eid, const bat *sid,
bool skip_nils, int scale, int tp,
BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, bool),
gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *,
BAT *, BAT *, int, bool, int),
BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int,
double, bool),
- const bat *quantile,
+ const bat *quantile, const double *quantile_val,
const char *malfunc)
{
BAT *b, *g, *e, *s, *bn = NULL, *cnts = NULL, *q = NULL;
@@ -34,7 +34,7 @@ AGGRgrouped(bat *retval1, bat *retval2,
/* if retval2 is non-NULL, we must have grpfunc2 */
assert(retval2 == NULL || grpfunc2 != NULL);
/* only quantiles need a quantile BAT */
- assert((quantilefunc == NULL) == (quantile == NULL));
+ assert((quantilefunc == NULL) == (quantile == NULL && quantile_val ==
NULL));
b = BATdescriptor(*bid);
g = gid ? BATdescriptor(*gid) : NULL;
@@ -46,7 +46,7 @@ AGGRgrouped(bat *retval1, bat *retval2,
(gid != NULL && g == NULL) ||
(eid != NULL && e == NULL) ||
(sid != NULL && s == NULL) ||
- (quantile != NULL && q == NULL)) {
+ ((quantile != NULL && quantile_val != NULL) && q == NULL)) {
BBPreclaim(b);
BBPreclaim(g);
BBPreclaim(e);
@@ -64,25 +64,29 @@ AGGRgrouped(bat *retval1, bat *retval2,
if (grpfunc1) {
bn = (*grpfunc1)(b, g, e, s, tp, skip_nils);
} else if (quantilefunc) {
- assert(BATcount(q) > 0 || BATcount(b) == 0);
- assert(q->ttype == TYPE_dbl);
- if (BATcount(q) == 0) {
- qvalue = 0.5;
+ if (!quantile_val) {
+ assert(BATcount(q) > 0 || BATcount(b) == 0);
+ assert(q->ttype == TYPE_dbl);
+ if (BATcount(q) == 0) {
+ qvalue = 0.5;
+ } else {
+ MT_lock_set(&q->theaplock);
+ qvalue = ((const dbl *)Tloc(q, 0))[0];
+ MT_lock_unset(&q->theaplock);
+ if (qvalue < 0 || qvalue > 1) {
+ BBPunfix(b->batCacheid);
+ BBPreclaim(g);
+ BBPreclaim(e);
+ BBPreclaim(s);
+ BBPunfix(q->batCacheid);
+ throw(MAL, malfunc,
+ "quantile value of %f is not in
range [0,1]", qvalue);
+ }
+ }
+ BBPunfix(q->batCacheid);
} else {
- MT_lock_set(&q->theaplock);
- qvalue = ((const dbl *)Tloc(q, 0))[0];
- MT_lock_unset(&q->theaplock);
- if (qvalue < 0 || qvalue > 1) {
- BBPunfix(b->batCacheid);
- BBPreclaim(g);
- BBPreclaim(e);
- BBPreclaim(s);
- BBPunfix(q->batCacheid);
- throw(MAL, malfunc,
- "quantile value of %f is not in range
[0,1]", qvalue);
- }
+ qvalue = *(quantile_val);
}
- BBPunfix(q->batCacheid);
bn = (*quantilefunc)(b, g, e, s, tp, qvalue, skip_nils);
} else if ((*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, s, tp,
skip_nils, scale) !=
GDK_SUCCEED) {
@@ -105,6 +109,18 @@ AGGRgrouped(bat *retval1, bat *retval2,
}
static str
+AGGRgrouped(bat *retval1, bat *retval2, const bat *bid, const bat *gid, const
bat *eid, const bat *sid,
+ bool skip_nils, int scale, int tp,
+ BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, bool),
+ gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *,
BAT *, BAT *, int, bool, int),
+ BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int,
double, bool),
+ const bat *quantile,
+ const char *malfunc)
+{
+ return AGGRgrouped_bat_or_val(retval1, retval2, bid, gid, eid, sid,
skip_nils, scale, tp, grpfunc1, grpfunc2, quantilefunc, quantile, NULL,
malfunc);
+}
+
+static str
AGGRsum3_bte(bat *retval, const bat *bid, const bat *gid, const bat *eid)
{
return AGGRgrouped(retval, NULL, bid, gid, eid, NULL, true, 0, TYPE_bte,
@@ -893,6 +909,21 @@ AGGRquantile(void *retval, const bat *bi
}
static str
+AGGRquantile_cst(void *retval, const bat *bid, const dbl *q)
+{
+ str err;
+ bat rval;
+ if ((err = AGGRgrouped_bat_or_val(&rval, NULL, bid, NULL, NULL, NULL,
true,
+ 0, TYPE_any, NULL, NULL,
BATgroupquantile,
+ NULL, q,
"aggr.subquantile")) == MAL_SUCCEED) {
+ oid pos = 0;
+ err = ALGfetchoid(retval, &rval, &pos);
+ BBPrelease(rval);
+ }
+ return err;
+}
+
+static str
AGGRsubquantile(bat *retval, const bat *bid, const bat *quantile, const bat
*gid, const bat *eid, const bit *skip_nils)
{
return AGGRgrouped(retval, NULL, bid, gid, eid, NULL, *skip_nils,
@@ -956,6 +987,21 @@ AGGRquantile_avg(dbl *retval, const bat
}
static str
+AGGRquantile_avg_cst(dbl *retval, const bat *bid, const dbl *q)
+{
+ str err;
+ bat rval;
+ if ((err = AGGRgrouped_bat_or_val(&rval, NULL, bid, NULL, NULL, NULL,
true,
+ 0, TYPE_any, NULL, NULL,
BATgroupquantile_avg,
+ NULL, q,
"aggr.subquantile_avg")) == MAL_SUCCEED) {
+ oid pos = 0;
+ err = ALGfetchoid(retval, &rval, &pos);
+ BBPrelease(rval);
+ }
+ return err;
+}
+
+static str
AGGRsubquantile_avg(bat *retval, const bat *bid, const bat *quantile, const
bat *gid, const bat *eid, const bit *skip_nils)
{
return AGGRgrouped(retval, NULL, bid, gid, eid, NULL, *skip_nils,
@@ -1447,12 +1493,14 @@ mel_func aggr_init_funcs[] = {
command("aggr", "submedian", AGGRsubmedian, false, "Grouped median
aggregate", args(1,5,
batargany("",1),batargany("b",1),batarg("g",oid),batargany("e",2),arg("skip_nils",bit))),
command("aggr", "submedian", AGGRsubmediancand, false, "Grouped median
aggregate with candidate list", args(1,6,
batargany("",1),batargany("b",1),batarg("g",oid),batargany("e",2),batarg("s",oid),arg("skip_nils",bit))),
command("aggr", "quantile", AGGRquantile, false, "Quantile aggregate",
args(1,3, argany("",1),batargany("b",1),batarg("q",dbl))),
+ command("aggr", "quantile", AGGRquantile_cst, false, "Quantile aggregate",
args(1,3, argany("",1),batargany("b",1),arg("q",dbl))),
command("aggr", "subquantile", AGGRsubquantile, false, "Grouped quantile
aggregate", args(1,6,
batargany("",1),batargany("b",1),batarg("q",dbl),batarg("g",oid),batargany("e",2),arg("skip_nils",bit))),
command("aggr", "subquantile", AGGRsubquantilecand, false, "Grouped quantile
aggregate with candidate list", args(1,7,
batargany("",1),batargany("b",1),batarg("q",dbl),batarg("g",oid),batargany("e",2),batarg("s",oid),arg("skip_nils",bit))),
command("aggr", "median_avg", AGGRmedian_avg, false, "Median aggregate",
args(1,2, arg("",dbl),batargany("b",1))),
command("aggr", "submedian_avg", AGGRsubmedian_avg, false, "Grouped median
aggregate", args(1,5,
batarg("",dbl),batargany("b",1),batarg("g",oid),batargany("e",2),arg("skip_nils",bit))),
command("aggr", "submedian_avg", AGGRsubmediancand_avg, false, "Grouped
median aggregate with candidate list", args(1,6,
batarg("",dbl),batargany("b",1),batarg("g",oid),batargany("e",2),batarg("s",oid),arg("skip_nils",bit))),
command("aggr", "quantile_avg", AGGRquantile_avg, false, "Quantile
aggregate", args(1,3, arg("",dbl),batargany("b",1),batarg("q",dbl))),
+ command("aggr", "quantile_avg", AGGRquantile_avg_cst, false, "Quantile
aggregate", args(1,3, arg("",dbl),batargany("b",1),arg("q",dbl))),
command("aggr", "subquantile_avg", AGGRsubquantile_avg, false, "Grouped
quantile aggregate", args(1,6,
batarg("",dbl),batargany("b",1),batarg("q",dbl),batarg("g",oid),batargany("e",2),arg("skip_nils",bit))),
command("aggr", "subquantile_avg", AGGRsubquantilecand_avg, false, "Grouped
quantile aggregate with candidate list", args(1,7,
batarg("",dbl),batargany("b",1),batarg("q",dbl),batarg("g",oid),batargany("e",2),batarg("s",oid),arg("skip_nils",bit))),
command("aggr", "str_group_concat", AGGRstr_group_concat, false, "Grouped
string tail concat", args(1,4,
batarg("",str),batarg("b",str),batarg("g",oid),batargany("e",1))),
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -1254,6 +1254,17 @@ exp2bin_copyfrombinary(backend *be, sql_
return stmt_list(be, columns);
}
+static bool
+is_const_func(sql_subfunc *f, list *attr)
+{
+ if (list_length(attr) != 2)
+ return false;
+ if (strcmp(f->func->base.name, "quantile") == 0 ||
+ strcmp(f->func->base.name, "quantile_avg") == 0)
+ return true;
+ return false;
+}
+
stmt *
exp_bin(backend *be, sql_exp *e, stmt *left, stmt *right, stmt *grp, stmt
*ext, stmt *cnt, stmt *sel, int depth, int reduce, int push)
{
@@ -1465,7 +1476,7 @@ exp_bin(backend *be, sql_exp *e, stmt *l
as = exp_bin(be, at, left, right, NULL, NULL,
NULL, sel, depth+1, 0, push);
- if (as && as->nrcols <= 0 && left)
+ if (as && as->nrcols <= 0 && left &&
(!is_const_func(a, attr) || grp))
as = stmt_const(be,
bin_find_smallest_column(be, left), as);
if (en == attr->h && !en->next &&
exp_aggr_is_count(e))
as = exp_count_no_nil_arg(e, ext, at,
as);
diff --git a/sql/backends/monet5/rel_physical.c
b/sql/backends/monet5/rel_physical.c
--- a/sql/backends/monet5/rel_physical.c
+++ b/sql/backends/monet5/rel_physical.c
@@ -21,6 +21,7 @@ rel_add_orderby(visitor *v, sql_rel *rel
{
if (is_groupby(rel->op)) {
if (rel->exps && !rel->r) { /* find quantiles */
+ sql_exp *obe = NULL, *oberef = NULL;
for(node *n = rel->exps->h; n; n = n->next) {
sql_exp *e = n->data;
@@ -30,24 +31,30 @@ rel_add_orderby(visitor *v, sql_rel *rel
/* for now we only handle one sort
order */
if
(IS_ORDER_BASED_AGGR(af->func->base.name) && aa && list_length(aa) == 2) {
- sql_exp *obe = aa->h->data;
- if (obe) {
+ sql_exp *nobe = aa->h->data;
+ if (nobe && !obe) {
sql_rel *l = rel->l =
rel_project(v->sql->sa, rel->l, rel_projections(v->sql, rel->l, NULL, 1, 1));
+ obe = nobe;
+ oberef = nobe;
if (l) {
- if
(!is_alias(obe->type)) {
-
append(l->exps, obe);
- obe =
exp_label(v->sql->sa, obe, ++v->sql->label);
-
aa->h->data = exp_ref(v->sql, obe);
+ if
(!is_alias(nobe->type)) {
+ oberef
= nobe = exp_label(v->sql->sa, exp_copy(v->sql, nobe), ++v->sql->label);
+
append(l->exps, nobe);
}
+
set_nulls_first(nobe);
+
set_ascending(nobe);
+ aa->h->data =
exp_ref(v->sql, nobe);
list *o = l->r
= sa_list(v->sql->sa);
if (o)
-
append(o, obe);
+
append(o, nobe);
}
+ } else if (exp_match_exp(nobe,
obe)) {
+ aa->h->data =
exp_ref(v->sql, oberef);
}
- return rel;
}
}
}
+ return rel;
}
}
return rel;
diff --git a/sql/server/rel_select.c b/sql/server/rel_select.c
--- a/sql/server/rel_select.c
+++ b/sql/server/rel_select.c
@@ -3787,10 +3787,11 @@ static sql_exp *
if (!a && list_length(exps) > 1) {
sql_subtype *t1 = exp_subtype(exps->h->data);
a = sql_bind_member(sql, sname, aname,
exp_subtype(exps->h->data), F_AGGR, list_length(exps), false, NULL);
-
- if (list_length(exps) != 2 || (!EC_NUMBER(t1->type->eclass) ||
!a || subtype_cmp(
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]