Update of /cvsroot/monetdb/pathfinder/compiler/mil
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv29987/mil
Modified Files:
milgen.brg
Log Message:
-- Re-organized our set of numbering operators. We have now 4 different
operators with consistent semantics that cope with sorting and numbering:
- la_rownum behaves exactly like SQLs ROW_NUMBER. It is used to generate
position values.
- la_rowrank behaves exactly like SQLs DENSE_RANK. It is used to generate
the group by semantics of our functional source language. Up til now
we only need the unpartitioned variant. (In MIL it is implemented
using the sort extend.)
- la_rank -- beside one exception -- behaves like la_rowrank. It is also
implemented in our SQL compilation with a DENSE_RANK operation. la_rank's
important difference to la_rowrank is that its resulting values are used
solely for ordering. No operation should ever look at the generated
values.
While this difference is uninteresting in the resulting code it
simplifies
the algebraic optimizer a lot. Instead of repeatedly inferring a property
that checks for column usage we can optimize based on the operator kind.
- la_rowid generates unrepeatable unique numbers (as 'ROW_NUMBER() OVER ()'
does in SQL or 'mark()' does in MIL). It is used to generate a new key
column for mapping joins.
In comparison to the old version we introduced a new operator la_rowrank,
changed the semantic of la_rank from ROW_NUMBER to DENSE_RANK, and renamed
the formular la_number operator into la_rowid.
To implement positions in our Core to Algebra translation consistently we
now use only la_rownum (to generate real position values),
la_rank (to represent intermediate position order), and constant values
(to represent unordered sequences).
-- Introduced new SQL operator DENSE_RANK.
-- Splitted up the physical pa_number operator into the 3 operators:
pa_mark, pa_rank, and pa_mark_grp. The first and the last operator correspond
to the respective MIL primitives. The result column of pa_rank is generated
by the extend column of a CTrefine operation.
-- Added check for environment variable PF_DEBUG_PRINT_FRAG to disable the
fragment printing in the AT&T dot output of the logical algebra.
Index: milgen.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/mil/milgen.brg,v
retrieving revision 1.90
retrieving revision 1.91
diff -u -d -r1.90 -r1.91
--- milgen.brg 27 Nov 2007 17:13:00 -0000 1.90
+++ milgen.brg 6 Dec 2007 08:42:39 -0000 1.91
@@ -114,7 +114,9 @@
%term max_ = 57
%term min_ = 58
%term sum = 59
-%term number = 60
+%term mark = 60
+%term rank = 61
+%term mark_grp = 62
%term type = 63
%term type_assert = 64
%term cast = 65
@@ -203,10 +205,14 @@
Rel: max_ (Rel) = 68 (10);
Rel: min_ (Rel) = 69 (10);
Rel: sum (Rel) = 70 (10);
-Rel: number (Rel) = 73 (10);
-Rel: type (Rel) = 74 (10);
-Rel: type_assert (Rel) = 75 (10);
-Rel: cast (Rel) = 76 (10);
+Rel: mark (Rel) = 71 (10);
+Rel: rank (Rel) = 72 (10);
+Rel: rank (std_sort (Rel)) = 73 (10);
+Rel: rank (refine_sort (Rel)) = 74 (10);
+Rel: mark_grp (Rel) = 75 (10);
+Rel: type (Rel) = 76 (10);
+Rel: type_assert (Rel) = 77 (10);
+Rel: cast (Rel) = 78 (10);
Rel: llscj_anc (FragList, Rel) = 80 (10);
Rel: llscj_anc_self (FragList, Rel) = 81 (10);
@@ -4392,8 +4398,8 @@
aggr_function(PFmil_sum, PFmil_gsum, p);
break;
- /* Rel: number (Rel) */
- case 73:
+ /* Rel: mark (Rel) */
+ case 71:
{
mvar_t *res = new_var (p->refctr);
@@ -4408,25 +4414,9 @@
env_at (L(p)->env, i).mvar);
}
- /* and add the newly numbered one */
- if (p->sem.number.part != att_NULL)
- execute (
- assgn (var (res->name),
- mark_grp (
- VAR (p->env,
- p->sem.number.part,
- type_of (p, p->sem.number.part)),
- project (
- kunique (
- reverse (
- VAR (p->env,
- p->sem.number.part,
- type_of (p,
p->sem.number.part))
- )),
- lit_oid (1)))));
- else if (env_at (L(p)->env, 0).ty == aat_pfrag)
+ if (env_at (L(p)->env, 0).ty == aat_pfrag)
/* As in many cases two adjacent path steps are separated
- by a number operator and the fragment is constant we
+ by a mark operator and the fragment is constant we
try to avoid an additional dependency on the fragment
to ease 'materialize' operator removal. */
execute (
@@ -4442,11 +4432,178 @@
lit_oid (1))));
/* put the result into p's environment */
- env_add (p->env, p->sem.number.attname, aat_nat, res);
+ env_add (p->env, p->sem.mark.res, aat_nat, res);
+ } break;
+
+ /* Rel: rank (Rel) */
+ case 72:
+ /* Rel: rank (std_sort (Rel)) */
+ case 73:
+ /* Rel: rank (refine_sort (Rel)) */
+ case 74:
+ /* as we have to sort anyway we can also skip the sort operator */
+ {
+ /*
+ * Derive a single BAT from the multi-column grouping
+ * (using functions from the xtables module).
+ */
+ mvar_t *res = new_var (p->refctr),
+ *v = new_var (1),
+ *first_sort_var = NULL;
+ bool initialized = false;
+ PFpa_op_t *rel;
+
+ if (L(p)->kind == pa_std_sort || L(p)->kind == pa_refine_sort)
+ rel = LL(p);
+ else
+ rel = L(p);
+
+ assert (PFord_count (p->sem.rank.ord));
+
+ for (unsigned int i = 0; i < PFord_count (p->sem.rank.ord); i++) {
+ for (PFalg_simple_type_t t = 1; t; t <<= 1) {
+ if (t & type_of (rel,
+ PFord_order_col_at (p->sem.rank.ord, i)))
{
+ if (!initialized) {
+ /* we need a hit for the first order criterion */
+ assert (!i);
+
+ first_sort_var = env_mvar (
+ rel->env,
+ PFord_order_col_at (
+ p->sem.rank.ord,
+ i),
+ t);
+ execute (
+ assgn (
+ var (v->name),
+ reverse (
+ sort (
+ reverse (
+ var (first_sort_var->name)),
+ PFord_order_dir_at (
+ p->sem.rank.ord,
+ i)))));
+
+ initialized = true;
+ }
+ else {
+ execute (
+ assgn (
+ var (v->name),
+ ctrefine (
+ var (v->name),
+ VAR (rel->env,
+ PFord_order_col_at (
+ p->sem.rank.ord,
+ i),
+ t),
+ PFord_order_dir_at (
+ p->sem.rank.ord,
+ i))));
+ }
+ }
+ }
+ }
+
+ execute (
+ /* Extend the sorting with a dummy sort criterion to
+ ensure that the result column @a res consists of oids. */
+ assgn (var (res->name),
+ reverse (mark (reverse (
+ ctrefine (var (v->name),
+ project (var (v->name),
+ lit_oid (1)),
+ false)),
+ lit_oid (0)))),
+ /* We know that the extent certainly is sorted. */
+ assgn (var (res->name),
+ assert_order (var (res->name))),
+ /* Create a new mapping relation. */
+ assgn (var (v->name),
+ reverse (mark (var (v->name), lit_oid (0)))));
+
+ /* put the result into p's environment */
+ env_add (p->env, p->sem.rank.res, aat_nat, res);
+
+ /*
+ * The join with each input relation:
+ *
+ * out := v.join (in);
+ */
+ for (unsigned int i = 0; i < env_count (rel->env); i++) {
+ mvar_t *out = new_var (p->refctr);
+
+ execute (
+ assgn (var (out->name),
+ leftjoin (var (v->name),
+ var (env_at (rel->env,
+ i).mvar->name))));
+
+ /* we know that the first sort criterion certainly is sorted
+ -- assert_order however only copes with ascending order */
+ if (first_sort_var == env_at (rel->env,i).mvar &&
+ PFord_order_dir_at (p->sem.rank.ord, 0)
+ == DIR_ASC)
+ execute (
+ assgn (var (out->name),
+ assert_order (var (out->name))));
+
+ /* because leftjoin does not know that we have
+ exactly one match for each tuple in v,
+ we need to make the heads void ourselves */
+ execute (
+ assgn (var (out->name),
+ reverse (mark (reverse (var (out->name)),
+ lit_oid (0)))));
+
+ env_add (p->env,
+ env_at (rel->env, i).att,
+ env_at (rel->env, i).ty,
+ out);
+ }
+
+ /* release our temporary variable */
+ unpin (v, 1);
+
+ } break;
+ /* Rel: mark_grp (Rel) */
+ case 75:
+ {
+ mvar_t *res = new_var (p->refctr);
+
+ assert (env_count (L(p)->env));
+ assert (p->sem.mark.part);
+
+ /* copy all the attributes */
+ for (unsigned int i = 0; i < env_count (L(p)->env); i++) {
+ pin (env_at (L(p)->env, i).mvar, p->refctr);
+ env_add (p->env,
+ env_at (L(p)->env, i).att,
+ env_at (L(p)->env, i).ty,
+ env_at (L(p)->env, i).mvar);
+ }
+
+ execute (
+ assgn (var (res->name),
+ mark_grp (
+ VAR (p->env,
+ p->sem.mark.part,
+ type_of (p, p->sem.mark.part)),
+ project (
+ kunique (
+ reverse (
+ VAR (p->env,
+ p->sem.mark.part,
+ type_of (p, p->sem.mark.part)))),
+ lit_oid (1)))));
+
+ /* put the result into p's environment */
+ env_add (p->env, p->sem.mark.res, aat_nat, res);
} break;
/* Rel: type (Rel) */
- case 74:
+ case 76:
{
mvar_t *res = new_var (p->refctr);
PFalg_simple_type_t input_ty = 0;
@@ -4522,7 +4679,7 @@
break;
/* Rel: type_assert (Rel) */
- case 75:
+ case 77:
/* copy all the attributes */
for (unsigned int i = 0; i < env_count (L(p)->env); i++) {
pin (env_at (L(p)->env, i).mvar, p->refctr);
@@ -4534,7 +4691,7 @@
break;
/* Rel: cast (Rel) */
- case 76:
+ case 78:
for (unsigned int i = 0; i < L(p)->schema.count; i++)
{
bool att_needed = true;
-------------------------------------------------------------------------
SF.Net email is sponsored by: The Future of Linux Business White Paper
from Novell. From the desktop to the data center, Linux is going
mainstream. Let it simplify your IT future.
http://altfarm.mediaplex.com/ad/ck/8857-50307-18918-4
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins