Update of /cvsroot/monetdb/pathfinder/compiler/mil
In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv11514/mil
Modified Files:
milgen.brg
Log Message:
-- Replaced aggregate operators count, min, max, avg, sum, prod, seqty1,
and all in the algebra by a single aggregate operator ``aggr''
that can handle multiple aggregates. The aggregate entries
are of kind count, min, max, avg, sum, prod, seqty1, all, and dist.
-- Added new aggregate kind ``dist'' that allows to represent group by
columns that functionally depend on the partitioning criterion
in the result of the grouping aggregate.
-- Added rewrite that merges aggregates.
-- Added rewrite that removes superfluous aggregates.
-- Added rewrite that pushes a rank operator through an aggregate.
-- Extended the XML import to cope with the old
as well as the new representation of aggregates.
U milgen.brg
Index: milgen.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/mil/milgen.brg,v
retrieving revision 1.218
retrieving revision 1.219
diff -u -d -r1.218 -r1.219
--- milgen.brg 20 May 2009 16:00:46 -0000 1.218
+++ milgen.brg 12 Jun 2009 13:06:16 -0000 1.219
@@ -173,20 +173,13 @@
%term bool_or = 47
%term to = 50
%term count_ext = 54
-%term count = 55
-%term avg = 56
-%term max_ = 57
-%term min_ = 58
-%term sum = 59
+%term aggr = 55
%term mark = 60
%term rank = 61
%term mark_grp = 62
%term type = 63
%term type_assert = 64
%term cast = 65
-%term seqty1 = 66
-%term all = 67
-%term prod = 68
%term llscjoin = 100
%term llscjoin_dup = 101
%term doc_tbl = 120
@@ -261,11 +254,7 @@
Rel: bool_or (Rel) = 62 (10);
Rel: to (Rel) = 64 (10);
Rel: count_ext (Rel, Rel) = 65 (10);
-Rel: count (Rel) = 66 (10);
-Rel: avg (Rel) = 67 (10);
-Rel: max_ (Rel) = 68 (10);
-Rel: min_ (Rel) = 69 (10);
-Rel: sum (Rel) = 70 (10);
+Rel: aggr (Rel) = 66 (10);
Rel: mark (Rel) = 71 (10);
Rel: rank (Rel) = 72 (10);
Rel: rank (std_sort (Rel)) = 73 (10);
@@ -274,9 +263,6 @@
Rel: type (Rel) = 76 (10);
Rel: type_assert (Rel) = 77 (10);
Rel: cast (Rel) = 78 (10);
-Rel: seqty1 (Rel) = 79 (10);
-Rel: all (Rel) = 80 (10);
-Rel: prod (Rel) = 81 (10);
Rel: llscjoin (Rel) = 90 (10);
Rel: llscjoin_dup (Rel) = 91 (10);
@@ -1743,74 +1729,6 @@
} /* fold) */
/**
- * @brief Generic handling of aggregation functions (avg, max, min and sum).
- *
- * @param op A MIL operation that implements the function
- * of interest (as a function pointer to the
- * constructor function).
- * @param gop A MIL operation that implements the function
- * of interest (as a function pointer to the
- * constructor function), grouped version.
- * @param p The physical algebra tree node that we are to translate.
- * This function will actually fill @a p's environment
- * <code>p->env</code>.
- */
-static void
-aggr_function (PFmil_t * (*op) (const PFmil_t *),
- PFmil_t * (*gop) (const PFmil_t *),
- PFpa_op_t *p)
-{ /* fold( */
- PFalg_col_t part = p->sem.aggr.part,
- col = p->sem.aggr.col,
- res = p->sem.aggr.res;
- PFalg_simple_type_t part_ty,
- col_ty = type_of (L(p), col),
- res_ty = type_of (p, res);
-
- assert (col_ty == res_ty);
-
- if (part != col_NULL) {
- mvar_t *v = new_var (1);
- mvar_t *res_var = new_var (p->refctr);
- mvar_t *part_var = new_var (p->refctr);
-
- part_ty = type_of (L(p), part);
-
- execute (
- /* v :=
- {gop}(col.reverse().join(part).reverse()) */
- assgn (var (v->name),
- gop (reverse (
- join (reverse (VAR (L(p)->env, col, col_ty)),
- VAR (L(p)->env, part, part_ty))))),
- /* res := v.reverse ().mark (0...@0).reverse (); */
- assgn (var (res_var->name),
- reverse (mark (reverse (var (v->name)), lit_oid (0)))),
- /* part := v.mark (0...@0).reverse (); */
- assgn (var (part_var->name),
- reverse (mark (var (v->name), lit_oid (0)))));
-
- env_add (p->env, res, res_ty, res_var);
- env_add (p->env, part, part_ty, part_var);
-
- unpin (v, 1);
- }
- else {
- mvar_t *v = new_var (p->refctr);
- execute (
- assgn (var (v->name),
- op (VAR (L(p)->env, col, col_ty))),
- assgn (var (v->name),
- append (
- seqbase (
- new (type (mty_void), implty_ (res_ty)),
- lit_oid (0)),
- var (v->name))));
- env_add (p->env, res, res_ty, v);
- }
-} /* fold) */
-
-/**
* @brief Translation of the cross product.
*
* @param p The physical algebra tree node that we are to translate.
@@ -6767,90 +6685,270 @@
unpin (v, 1);
} break; /* fold) */
- /* Rel: count (Rel) */
+ /* Rel: aggr (Rel) */
case 66: /* fold( */
- if (p->sem.count.part != col_NULL) {
- mvar_t *res = new_var (p->refctr);
- mvar_t *part = new_var (p->refctr);
- PFalg_simple_type_t ty = type_of (L(p), p->sem.count.part);
+ /**
+ * We have two completely different implementations:
+ * - a partitioned aggregate (that returns for every group a row
+ * and an empty result for an empty input), and
+ * - an unpartitioned aggregate (that always return a single row).
+ */
+
+ /* reserve temporary variable */
+ v = new_var (1);
- v = new_var (1);
+ /* Handle partitioned aggregate. */
+ if (p->sem.aggr.part != col_NULL) {
+ PFalg_col_t part = p->sem.aggr.part;
+ PFalg_simple_type_t part_ty = type_of (L(p), part);
+ mvar_t *part_var = new_var (p->refctr);
+ PFmil_t *PART_VAR = VAR (L(p)->env, part, part_ty);
+ bool dist = false;
- if (!type_bit_check (ty))
+ if (!type_bit_check (part_ty))
PFoops (OOPS_FATAL,
- "HashCount not implemented for polymorphic
groups");
+ "Aggregates not implemented "
+ "for polymorphic columns");
+
+ /* add the part MIL variable to the environment */
+ env_add (p->env, part, part_ty, part_var);
+
+ /* 1st run: cope with all non-distinct aggregates */
+ for (unsigned int i = 0; i < p->sem.aggr.count; i++) {
+ PFalg_aggr_kind_t kind = p->sem.aggr.aggr[i].kind;
+ PFalg_col_t res = p->sem.aggr.aggr[i].res,
+ col = p->sem.aggr.aggr[i].col;
+ PFalg_simple_type_t col_ty = type_of (p, res);
+ mvar_t *res_var;
+
+ /* skip distinct aggregate for now */
+ if (kind == alg_aggr_dist) {
+ dist = true;
+ continue;
+ }
+
+ if (!type_bit_check (col_ty))
+ PFoops (OOPS_FATAL,
+ "Aggregates not implemented "
+ "for polymorphic columns");
+
+ /* create a new result MIL variable
+ and add it to the environment */
+ res_var = new_var (p->refctr);
+ env_add (p->env, res, col_ty, res_var);
+
+ /* prepare the input for the aggregate */
+ if (col) {
+ assert (type_of (L(p), col) == col_ty);
+ execute (assgn (var (v->name),
+ leftfetchjoin (
+ reverse (PART_VAR),
+ VAR (L(p)->env, col, col_ty))));
+ }
+ else
+ execute (assgn (var (v->name),
+ reverse (PART_VAR)));
+ switch (kind) {
+ case alg_aggr_dist:
+ assert (0);
+ break;
+ case alg_aggr_min:
+ execute (assgn (var (v->name),
+ PFmil_gmin (var (v->name))));
+ break;
+ case alg_aggr_max:
+ execute (assgn (var (v->name),
+ PFmil_gmax (var (v->name))));
+ break;
+ case alg_aggr_all:
+ /* v := [=]({sum}(v),{count}(v)); */
+ execute (assgn (var (v->name),
+ meq (PFmil_gsum (
+ mcast (type (mty_int),
+ var (v->name))),
+ gcount (var (v->name)))));
+ break;
+ case alg_aggr_count:
+ /* align with integer representation (lng) */
+ execute (assgn (var (v->name),
+ mcast (type (mty_lng),
+ gcount (var (v->name)))));
+ break;
+ case alg_aggr_avg:
+ execute (assgn (var (v->name),
+ PFmil_gavg (var (v->name))));
+ break;
+ case alg_aggr_sum:
+ execute (assgn (var (v->name),
+ PFmil_gsum (var (v->name))));
+ break;
+ case alg_aggr_seqty1:
+ /* v := [and]([=](1,{sum}(v)),[=](1,{count}(v)));
*/
+ execute (assgn (var (v->name),
+ PFmil_mand (
+ meq (lit_int (1),
+ PFmil_gsum (
+ mcast (
+ type (mty_int),
+ var (v->name)))),
+ meq (lit_int (1),
+ gcount (var
(v->name))))));
+ break;
+ case alg_aggr_prod:
+ execute (assgn (var (v->name),
+ PFmil_gprod (var (v->name))));
+ break;
+ }
+ /* bind the aggregate result to the result MIL variable */
+ execute (
+ assgn (var (res_var->name),
+ tmark (var (v->name), lit_oid (0))));
+ }
+
+ /* generate a relation that can be used to adjust the
+ distinct aggregate values */
+ if (dist)
+ execute (assgn (var (v->name),
+ PFmil_gmin (reverse (PART_VAR))));
+
+ /* bind the result partition to the partition MIL variable */
execute (
- /* v := {count}(p_in.reverse ()) */
- assgn (
- var (v->name),
- gcount (
- reverse (VAR (L(p)->env,
- p->sem.count.part, ty)))),
- /* align with integer representation (lng) */
- assgn (var (v->name),
- mcast (type (mty_lng), var (v->name))),
- /* res := v.reverse ().mark (0...@0).reverse (); */
- assgn (
- var (res->name),
- reverse (
- mark (
- reverse (var (v->name)),
- lit_oid (0)))),
- /* part := v.mark (0...@0).reverse (); */
- assgn (
- var (part->name),
- reverse (
- mark (var (v->name), lit_oid (0)))));
+ assgn (var (part_var->name),
+ hmark (var (v->name), lit_oid (0))));
- env_add (p->env, p->sem.count.res, aat_int, res);
- env_add (p->env, p->sem.count.part, ty, part);
+ /* 2nd run: cope with all distinct aggregates */
+ for (unsigned int i = 0; i < p->sem.aggr.count; i++) {
+ PFalg_aggr_kind_t kind = p->sem.aggr.aggr[i].kind;
+ PFalg_col_t res = p->sem.aggr.aggr[i].res,
+ col = p->sem.aggr.aggr[i].col;
+ PFalg_simple_type_t ty;
+ mvar_t *col_var,
+ *res_var;
+
+ /* only treat distinct aggregate */
+ if (kind != alg_aggr_dist)
+ continue;
+
+ /* map all variables related with this column */
+ for (unsigned int i = 0; i < env_count (L(p)->env); i++)
+ if (env_at (L(p)->env, i).col == col) {
+ ty = env_at (L(p)->env, i).ty;
+ col_var = env_at (L(p)->env, i).mvar;
+ res_var = new_var (p->refctr);
- unpin (v, 1);
+ /* map variables */
+ execute (assgn (var (res_var->name),
+ tmark (
+ leftjoin (var (v->name),
+ var (col_var->name)),
+ lit_oid (0))));
+
+ env_add (p->env, res, ty, res_var);
+ }
+ }
}
+ /* Handle unpartitioned aggregate.
+
+ NOTE: The current implementation only generates unpartitioned
+ count aggregates. In case other aggregates are also used
+ the result for empty input sequences (namely the value nil)
+ requires additional care. */
else {
- v = new_var (p->refctr);
- execute (
- assgn (var (v->name),
- count (var (env_at (L(p)->env, 0).mvar->name))),
- /* align with integer representation (lng) */
- assgn (var (v->name),
- cast (type (mty_lng), var (v->name))),
- assgn (var (v->name),
- append (
- seqbase (
- new (type (mty_void), type (mty_lng)),
- lit_oid (0)),
- var (v->name))));
- env_add (p->env, p->sem.count.res, aat_int, v);
- }
- break; /* fold) */
- /* Rel: avg (Rel) */
- case 67:
- aggr_function (PFmil_avg, PFmil_gavg, p);
- break;
+ for (unsigned int i = 0; i < p->sem.aggr.count; i++) {
+ PFalg_aggr_kind_t kind = p->sem.aggr.aggr[i].kind;
+ PFalg_col_t res = p->sem.aggr.aggr[i].res,
+ col = p->sem.aggr.aggr[i].col;
+ PFalg_simple_type_t col_ty = type_of (p, res);
+ mvar_t *res_var;
+
+ if (!type_bit_check (col_ty))
+ PFoops (OOPS_FATAL,
+ "Aggregates not implemented "
+ "for polymorphic columns");
- /* Rel: max_ (Rel) */
- case 68:
- aggr_function (PFmil_max, PFmil_gmax, p);
- break;
+ /* create a new result MIL variable
+ and add it to the environment */
+ res_var = new_var (p->refctr);
+ env_add (p->env, res, col_ty, res_var);
- /* Rel: min_ (Rel) */
- case 69:
- aggr_function (PFmil_min, PFmil_gmin, p);
- break;
+ /* prepare the input for the aggregate */
+ if (col) {
+ assert (type_of (L(p), col) == col_ty);
+ execute (assgn (var (v->name),
+ VAR (L(p)->env, col, col_ty)));
+ }
+ else
+ execute (assgn (var (v->name),
+ ANY_VAR (L(p)->env)));
- /* Rel: sum (Rel) */
- case 70:
- aggr_function (PFmil_sum, PFmil_gsum, p);
- break;
+ switch (kind) {
+ case alg_aggr_dist:
+ /* This case doesn't make sense -- so we bail out
*/
+ PFoops (OOPS_FATAL,
+ "Unpartitioned distinct aggregate"
+ " is not implemented");
+ break;
+ case alg_aggr_min:
+ execute (assgn (var (v->name),
+ PFmil_min (var (v->name))));
+ break;
+ case alg_aggr_max:
+ execute (assgn (var (v->name),
+ PFmil_max (var (v->name))));
+ break;
+ case alg_aggr_all:
+ execute (assgn (var (v->name),
+ eq (PFmil_sum (
+ mcast (type (mty_int),
+ var (v->name))),
+ count (var (v->name)))));
+ break;
+ case alg_aggr_count:
+ /* align with integer representation (lng) */
+ execute (assgn (var (v->name),
+ cast (type (mty_lng),
+ count (var (v->name)))));
+ break;
+ case alg_aggr_avg:
+ execute (assgn (var (v->name),
+ PFmil_avg (var (v->name))));
+ break;
+ case alg_aggr_sum:
+ execute (assgn (var (v->name),
+ PFmil_sum (var (v->name))));
+ break;
+ case alg_aggr_seqty1:
+ execute (assgn (var (v->name),
+ PFmil_and (
+ eq (lit_int (1),
+ PFmil_sum (
+ mcast (
+ type (mty_int),
+ var (v->name)))),
+ eq (lit_int (1),
+ count (var (v->name))))));
+ break;
+ case alg_aggr_prod:
+ execute (assgn (var (v->name),
+ PFmil_prod (var (v->name))));
+ break;
+ }
+ /* bind the aggregate result to the result MIL variable */
+ execute (
+ assgn (var (res_var->name),
+ append (
+ seqbase (
+ new (type (mty_void), implty_ (col_ty)),
+ lit_oid (0)),
+ var (v->name))));
+ }
+ }
+ /* release temporary variable */
+ unpin (v, 1);
+ break; /* fold) */
- /* Rel: prod (Rel) */
- case 81:
- aggr_function (PFmil_prod, PFmil_gprod, p);
- break;
-
/* Rel: mark (Rel) */
case 71: /* fold( */
{
@@ -7288,133 +7386,6 @@
"cast to polymorphic type not allowed.");
} break; /* fold) */
- /* Rel: seqty1 (Rel) */
- case 79: /* fold( */
- {
- PFalg_col_t part = p->sem.aggr.part,
- col = p->sem.aggr.col,
- res = p->sem.aggr.res;
- PFalg_simple_type_t part_ty;
-
- assert (type_of (L(p), col) == aat_bln &&
- type_of (p, res) == aat_bln);
-
- if (part != col_NULL) {
- mvar_t *res_var = new_var (p->refctr);
- mvar_t *part_var = new_var (p->refctr);
-
- v = new_var (1);
-
- part_ty = type_of (L(p), part);
-
- execute (
- /* v :=
- (col.reverse().join(part).reverse()) */
- assgn (var (v->name),
- reverse (
- join (reverse (VAR (L(p)->env, col, aat_bln)),
- VAR (L(p)->env, part, part_ty)))),
- /* v := [and]([=](1,{sum}(v)),[=](1,{count}(v))); */
- assgn (var (v->name),
- PFmil_mand (
- meq (lit_int (1),
- PFmil_gsum (mcast (type (mty_int),
- var (v->name)))),
- meq (lit_int (1), gcount (var (v->name))))),
- /* res := v.reverse ().mark (0...@0).reverse (); */
- assgn (var (res_var->name),
- reverse (mark (reverse (var (v->name)), lit_oid
(0)))),
- /* part := v.mark (0...@0).reverse (); */
- assgn (var (part_var->name),
- reverse (mark (var (v->name), lit_oid (0)))));
-
- env_add (p->env, res, aat_bln, res_var);
- env_add (p->env, part, part_ty, part_var);
-
- unpin (v, 1);
- }
- else {
- v = new_var (p->refctr);
- execute (
- assgn (var (v->name), VAR (L(p)->env, col, aat_bln)),
- assgn (var (v->name),
- PFmil_and (
- eq (lit_int (1),
- PFmil_sum (mcast (type (mty_int),
- var (v->name)))),
- eq (lit_int (1), count (var (v->name))))),
- assgn (var (v->name),
- append (
- seqbase (
- new (type (mty_void), implty_
(aat_bln)),
- lit_oid (0)),
- var (v->name))));
- env_add (p->env, res, aat_bln, v);
- }
- } break; /* fold) */
-
- /* Rel: all (Rel) */
- case 80: /* fold( */
- {
- PFalg_col_t part = p->sem.aggr.part,
- col = p->sem.aggr.col,
- res = p->sem.aggr.res;
- PFalg_simple_type_t part_ty;
-
- assert (type_of (L(p), col) == aat_bln &&
- type_of (p, res) == aat_bln);
-
- if (part != col_NULL) {
- mvar_t *res_var = new_var (p->refctr);
- mvar_t *part_var = new_var (p->refctr);
-
- v = new_var (1);
-
- part_ty = type_of (L(p), part);
-
- execute (
- /* v :=
- (col.reverse().join(part).reverse()) */
- assgn (var (v->name),
- reverse (
- join (reverse (VAR (L(p)->env, col, aat_bln)),
- VAR (L(p)->env, part, part_ty)))),
- /* v := [=]({sum}(v),{count}(v)); */
- assgn (var (v->name),
- meq (PFmil_gsum (mcast (type (mty_int),
- var (v->name))),
- gcount (var (v->name)))),
- /* res := v.reverse ().mark (0...@0).reverse (); */
- assgn (var (res_var->name),
- reverse (mark (reverse (var (v->name)),
- lit_oid (0)))),
- /* part := v.mark (0...@0).reverse (); */
- assgn (var (part_var->name),
- reverse (mark (var (v->name), lit_oid (0)))));
-
- env_add (p->env, res, aat_bln, res_var);
- env_add (p->env, part, part_ty, part_var);
-
- unpin (v, 1);
- }
- else {
- v = new_var (p->refctr);
- execute (
- assgn (var (v->name), VAR (L(p)->env, col, aat_bln)),
- assgn (var (v->name),
- eq (PFmil_sum (mcast (type (mty_int),
- var (v->name))),
- count (var (v->name)))),
- assgn (var (v->name),
- append (
- seqbase (
- new (type (mty_void), implty_
(aat_bln)),
- lit_oid (0)),
- var (v->name))));
- env_add (p->env, res, aat_bln, v);
- }
- } break; /* fold) */
-
/* Rel: llscjoin (Rel) */
case 90: /* fold( */
llscj (p);
------------------------------------------------------------------------------
Crystal Reports - New Free Runtime and 30 Day Trial
Check out the new simplified licensing option that enables unlimited
royalty-free distribution of the report engine for externally facing
server and web deployment.
http://p.sf.net/sfu/businessobjects
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins