Changeset: 8e4fa53e2f76 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/8e4fa53e2f76
Branch: default
Log Message:
Merge ordered-set-aggregates into default.
diffs (truncated from 9159 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1035,6 +1035,7 @@ const char grant_rolesRef[];
const char groupRef[];
const char groupbyRef[];
const char groupdoneRef[];
+const char groupedfirstnRef[];
const char growRef[];
int hasSideEffects(MalBlkPtr mb, InstrPtr p, int strict);
const char hgeRef[];
diff --git a/monetdb5/mal/mal_namespace.c b/monetdb5/mal/mal_namespace.c
--- a/monetdb5/mal/mal_namespace.c
+++ b/monetdb5/mal/mal_namespace.c
@@ -197,6 +197,7 @@ const char grantRef[] = "grant";
const char grant_rolesRef[] = "grant_roles";
const char groupbyRef[] = "groupby";
const char groupdoneRef[] = "groupdone";
+const char groupedfirstnRef[] = "groupedfirstn";
const char groupRef[] = "group";
const char growRef[] = "grow";
const char hgeRef[] = "hge";
@@ -462,6 +463,7 @@ initNamespace(void)
fixName(grant_rolesRef);
fixName(groupbyRef);
fixName(groupdoneRef);
+ fixName(groupedfirstnRef);
fixName(groupRef);
fixName(growRef);
fixName(hgeRef);
diff --git a/monetdb5/mal/mal_namespace.h b/monetdb5/mal/mal_namespace.h
--- a/monetdb5/mal/mal_namespace.h
+++ b/monetdb5/mal/mal_namespace.h
@@ -123,6 +123,7 @@ mal_export const char grantRef[];
mal_export const char grant_rolesRef[];
mal_export const char groupbyRef[];
mal_export const char groupdoneRef[];
+mal_export const char groupedfirstnRef[];
mal_export const char groupRef[];
mal_export const char growRef[];
mal_export const char hgeRef[];
diff --git a/monetdb5/optimizer/opt_mergetable.c
b/monetdb5/optimizer/opt_mergetable.c
--- a/monetdb5/optimizer/opt_mergetable.c
+++ b/monetdb5/optimizer/opt_mergetable.c
@@ -2317,6 +2317,8 @@ OPTmergetableImplementation(Client cntxt
}
/* pack if there is a group statement following a groupdone (ie
aggr(distinct)) */
+ if (getModuleId(p) == algebraRef && getFunctionId(p) ==
groupedfirstnRef)
+ groupdone = 1;
if (getModuleId(p) == groupRef && p->argc == 5
&& (getFunctionId(p) == subgroupRef
|| getFunctionId(p) == subgroupdoneRef
diff --git a/monetdb5/optimizer/opt_mitosis.c b/monetdb5/optimizer/opt_mitosis.c
--- a/monetdb5/optimizer/opt_mitosis.c
+++ b/monetdb5/optimizer/opt_mitosis.c
@@ -67,13 +67,15 @@ OPTmitosisImplementation(Client cntxt, M
nr_aggrs += (p->argc > 2 && getModuleId(p) == aggrRef);
nr_maps += (isMapOp(p));
- if (p->argc > 2 && getModuleId(p) == aggrRef
+ if ((getModuleId(p) == algebraRef &&
+ getFunctionId(p) == groupedfirstnRef) ||
+ (p->argc > 2 && getModuleId(p) == aggrRef
&& getFunctionId(p) != subcountRef && getFunctionId(p)
!= subminRef
&& getFunctionId(p) != submaxRef && getFunctionId(p) !=
subavgRef
&& getFunctionId(p) != subsumRef && getFunctionId(p) !=
subprodRef
&& getFunctionId(p) != countRef && getFunctionId(p) !=
minRef
&& getFunctionId(p) != maxRef && getFunctionId(p) !=
avgRef
- && getFunctionId(p) != sumRef && getFunctionId(p) !=
prodRef) {
+ && getFunctionId(p) != sumRef && getFunctionId(p) !=
prodRef)) {
pieces = 0;
goto bailout;
}
diff --git a/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test
b/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test
--- a/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test
+++ b/sql/backends/monet5/UDF/pyapi3/Tests/pyloader3_01.test
@@ -86,7 +86,7 @@ DROP LOADER myfunc2
statement ok
DROP LOADER myfunc3
-query ITTTIIIIIIII rowsort
+query ITTTIIIIIIIII rowsort
SELECT * FROM functions WHERE name='myfunc'
----
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -1686,10 +1686,17 @@ exp_bin(backend *be, sql_exp *e, stmt *l
} break;
case e_aggr: {
list *attr = e->l;
+ list *r = e->r;
stmt *as = NULL;
sql_subfunc *a = e->f;
assert(sel == NULL);
+ /* cases
+ * 0) count(*)
+ * 1) general aggregation
+ * 2) aggregation with required order (quantile etc)
+ * 3) aggregation with optional order by, group_concat,
xml_agg
+ * */
if (attr && attr->h) {
node *en;
list *l = sa_list(sql->sa);
@@ -1742,6 +1749,37 @@ exp_bin(backend *be, sql_exp *e, stmt *l
return NULL;
append(l, stmt_project(be, u, a));
}
+ if (r) {
+ list *obe = r->h->data;
+ if (obe && obe->h) {
+ stmt *orderby = NULL, *orderby_vals,
*orderby_ids, *orderby_grp;
+ /* order by */
+ if (grp) {
+ orderby = stmt_order(be, grp,
true, true);
+
+ orderby_vals = stmt_result(be,
orderby, 0);
+ orderby_ids = stmt_result(be,
orderby, 1);
+ orderby_grp = stmt_result(be,
orderby, 2);
+ }
+ for (node *n = obe->h; n; n = n->next) {
+ sql_exp *oe = n->data;
+ stmt *os = exp_bin(be, oe,
left, right, NULL, NULL, NULL, sel, depth+1, 0, push);
+ if (orderby)
+ orderby =
stmt_reorder(be, os, is_ascending(oe), nulls_last(oe), orderby_ids,
orderby_grp);
+ else
+ orderby =
stmt_order(be, os, is_ascending(oe), nulls_last(oe));
+ orderby_vals = stmt_result(be,
orderby, 0);
+ orderby_ids = stmt_result(be,
orderby, 1);
+ orderby_grp = stmt_result(be,
orderby, 2);
+ }
+ /* depending on type of aggr project
input or ordered column */
+ stmt *h = l->h->data;
+ l->h->data = h = stmt_project(be,
orderby_ids, h);
+ if (grp)
+ grp = stmt_project(be,
orderby_ids, grp);
+ (void)orderby_vals;
+ }
+ }
as = stmt_list(be, l);
} else {
/* count(*) may need the default group (relation) and
@@ -4424,10 +4462,36 @@ rel2bin_project(backend *be, sql_rel *re
/* distinct, topn returns at least N (unique groups) */
int distinct = need_distinct(rel);
stmt *limit = NULL, *lpiv = NULL, *lgid = NULL;
-
- for (n=oexps->h; n; n = n->next) {
+ int nr_obe = list_length(oexps);
+
+ /* check for partition columns */
+ stmt *grp = NULL, *ext = NULL, *cnt = NULL;
+ for (n=oexps->h; n; n = n->next, nr_obe--) {
+ sql_exp *gbe = n->data;
+ bool last = (!n->next ||
!is_partitioning((sql_exp*)n->next->data));
+
+ if (!topn->grouped || !is_partitioning(gbe))
+ break;
+ /* create group by */
+ stmt *gbcol = exp_bin(be, gbe, sub, NULL, NULL, NULL,
NULL, NULL, 0, 0, 0);
+
+ if (!gbcol) {
+ assert(sql->session->status == -10); /* Stack
overflow errors shouldn't terminate the server */
+ return NULL;
+ }
+ if (!gbcol->nrcols)
+ gbcol = stmt_const(be,
bin_find_smallest_column(be, sub), gbcol);
+ stmt *groupby = stmt_group(be, gbcol, grp, ext, cnt,
last);
+ grp = stmt_result(be, groupby, 0);
+ ext = stmt_result(be, groupby, 1);
+ cnt = stmt_result(be, groupby, 2);
+ gbcol = stmt_alias(be, gbcol, gbe->alias.label,
exp_find_rel_name(gbe), exp_name(gbe));
+ }
+
+ if (grp)
+ lgid = grp;
+ for (; n; n = n->next, nr_obe--) {
sql_exp *orderbycole = n->data;
- int last = (n->next == NULL);
stmt *orderbycolstmt = exp_bin(be, orderbycole, sub,
psub, NULL, NULL, NULL, NULL, 0, 0, 0);
@@ -4435,18 +4499,18 @@ rel2bin_project(backend *be, sql_rel *re
return NULL;
/* handle constants */
- if (orderbycolstmt->nrcols == 0 && !last) /* no need to
sort on constant */
+ if (orderbycolstmt->nrcols == 0 && n->next) /* no need
to sort on constant */
continue;
orderbycolstmt = column(be, orderbycolstmt);
if (!limit) { /* topn based on a single column */
- limit = stmt_limit(be, orderbycolstmt, NULL,
NULL, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole),
nulls_last(orderbycole), last, 1);
+ limit = stmt_limit(be, orderbycolstmt, NULL,
grp, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole),
nulls_last(orderbycole), nr_obe, 1);
} else { /* topn based on 2 columns */
- limit = stmt_limit(be, orderbycolstmt, lpiv,
lgid, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole),
nulls_last(orderbycole), last, 1);
+ limit = stmt_limit(be, orderbycolstmt, lpiv,
lgid, stmt_atom_lng(be, 0), l, distinct, is_ascending(orderbycole),
nulls_last(orderbycole), nr_obe, 1);
}
if (!limit)
return NULL;
lpiv = limit;
- if (!last) {
+ if (!grp && nr_obe > 1) {
lpiv = stmt_result(be, limit, 0);
lgid = stmt_result(be, limit, 1);
if (lpiv == NULL || lgid == NULL)
@@ -4455,6 +4519,8 @@ rel2bin_project(backend *be, sql_rel *re
}
limit = lpiv;
+ if (limit && grp)
+ limit = stmt_project(be, stmt_selectnonil(be, limit,
NULL), limit);
stmt *s;
for (n=pl->h ; n; n = n->next) {
stmt *os = n->data;
@@ -4719,6 +4785,17 @@ rel2bin_groupby(backend *be, sql_rel *re
return cursub;
}
+static bool
+has_partitioning( list *exps )
+{
+ for(node *n = exps->h; n; n = n->next){
+ sql_exp *gbe = n->data;
+ if (is_partitioning(gbe))
+ return true;
+ }
+ return false;
+}
+
static stmt *
rel2bin_topn(backend *be, sql_rel *rel, list *refs)
{
@@ -4737,6 +4814,8 @@ rel2bin_topn(backend *be, sql_rel *rel,
sub = rel2bin_project(be, rl, refs,
rel);
} else
sub = rel2bin_project(be, rl, refs, rel);
+ if (rel->grouped && rl->r && has_partitioning(rl->r))
+ return sub;
} else {
sub = subrel_bin(be, rl, refs);
}
@@ -4774,8 +4853,9 @@ rel2bin_topn(backend *be, sql_rel *rel,
if (!l || !o)
return NULL;
+
sc = column(be, sc);
- limit = stmt_limit(be, sc /*stmt_alias(be, sc, 0, tname,
cname)*/, NULL, NULL, o, l, 0,0,0,0,0);
+ limit = stmt_limit(be, sc, NULL, NULL, o, l, 0,0,0,0,0);
for ( ; n; n = n->next) {
stmt *sc = n->data;
diff --git a/sql/backends/monet5/rel_physical.c
b/sql/backends/monet5/rel_physical.c
--- a/sql/backends/monet5/rel_physical.c
+++ b/sql/backends/monet5/rel_physical.c
@@ -17,8 +17,9 @@
#include "rel_exp.h"
#include "rel_rel.h"
-#define IS_ORDER_BASED_AGGR(name) (strcmp((name), "quantile") == 0 ||
strcmp((name), "quantile_avg") == 0 || \
- strcmp((name), "median") == 0 ||
strcmp((name), "median_avg") == 0)
+#define IS_ORDER_BASED_AGGR(fname, argc) (\
+ (argc == 2 && (strcmp((fname), "quantile") == 0
|| strcmp((fname), "quantile_avg") == 0)) || \
+ (argc == 1 && (strcmp((fname), "median") == 0
|| strcmp((fname), "median_avg") == 0)))
static sql_rel *
rel_add_orderby(visitor *v, sql_rel *rel)
@@ -31,10 +32,10 @@ rel_add_orderby(visitor *v, sql_rel *rel
if (is_aggr(e->type)) {
sql_subfunc *af = e->f;
- list *aa = e->l;
+ list *aa = e->l;
/* for now we only handle one sort
order */
- if
(IS_ORDER_BASED_AGGR(af->func->base.name) && aa && list_length(aa) == 2) {
+ if (aa &&
IS_ORDER_BASED_AGGR(af->func->base.name, list_length(aa))) {
sql_exp *nobe = aa->h->data;
if (nobe && !obe) {
sql_rel *l = rel->l =
rel_project(v->sql->sa, rel->l, rel_projections(v->sql, rel->l, NULL, 1, 1));
diff --git a/sql/backends/monet5/sql_cat.c b/sql/backends/monet5/sql_cat.c
--- a/sql/backends/monet5/sql_cat.c
+++ b/sql/backends/monet5/sql_cat.c
@@ -1070,7 +1070,7 @@ create_func(mvc *sql, char *sname, char
sql->errstr[0] = '\0';
}
}
- switch (mvc_create_func(&nf, sql, NULL, s, f->base.name, f->ops,
f->res, f->type, f->lang, f->mod, f->imp, f->query, f->varres, f->vararg,
f->system, f->side_effect)) {
+ switch (mvc_create_func(&nf, sql, NULL, s, f->base.name, f->ops,
f->res, f->type, f->lang, f->mod, f->imp, f->query, f->varres, f->vararg,
f->system, f->side_effect, f->order_required, f->opt_order)) {
case -1:
throw(SQL,"sql.create_func", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
case -2:
diff --git a/sql/backends/monet5/sql_statement.c
b/sql/backends/monet5/sql_statement.c
--- a/sql/backends/monet5/sql_statement.c
+++ b/sql/backends/monet5/sql_statement.c
@@ -1204,19 +1204,18 @@ stmt_result(backend *be, stmt *s, int nr
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]