Update of /cvsroot/monetdb/pathfinder/compiler/mil
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv29987/mil

Modified Files:
        milgen.brg 
Log Message:
-- Re-organized our set of numbering operators. We have now 4 different
   operators with consistent semantics that cope with sorting and numbering:

     - la_rownum behaves exactly like SQLs ROW_NUMBER. It is used to generate
       position values.

     - la_rowrank behaves exactly like SQLs DENSE_RANK. It is used to generate
       the group by semantics of our functional source language. Up til now
       we only need the unpartitioned variant. (In MIL it is implemented
       using the sort extend.)

     - la_rank -- beside one exception -- behaves like la_rowrank. It is also
       implemented in our SQL compilation with a DENSE_RANK operation. la_rank's
       important difference to la_rowrank is that its resulting values are used
       solely for ordering. No operation should ever look at the generated 
values.
       While this difference is uninteresting in the resulting code it 
simplifies
       the algebraic optimizer a lot. Instead of repeatedly inferring a property
       that checks for column usage we can optimize based on the operator kind.

     - la_rowid generates unrepeatable unique numbers (as 'ROW_NUMBER() OVER ()'
       does in SQL or 'mark()' does in MIL). It is used to generate a new key
       column for mapping joins.

   In comparison to the old version we introduced a new operator la_rowrank,
   changed the semantic of la_rank from ROW_NUMBER to DENSE_RANK, and renamed
   the formular la_number operator into la_rowid.

   To implement positions in our Core to Algebra translation consistently we
   now use only la_rownum (to generate real position values),
   la_rank (to represent intermediate position order), and constant values
   (to represent unordered sequences).

-- Introduced new SQL operator DENSE_RANK.

-- Splitted up the physical pa_number operator into the 3 operators:
   pa_mark, pa_rank, and pa_mark_grp. The first and the last operator correspond
   to the respective MIL primitives. The result column of pa_rank is generated
   by the extend column of a CTrefine operation.

-- Added check for environment variable PF_DEBUG_PRINT_FRAG to disable the
   fragment printing in the AT&T dot output of the logical algebra.



Index: milgen.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/mil/milgen.brg,v
retrieving revision 1.90
retrieving revision 1.91
diff -u -d -r1.90 -r1.91
--- milgen.brg  27 Nov 2007 17:13:00 -0000      1.90
+++ milgen.brg  6 Dec 2007 08:42:39 -0000       1.91
@@ -114,7 +114,9 @@
 %term max_           =  57
 %term min_           =  58
 %term sum            =  59
-%term number         =  60
+%term mark           =  60
+%term rank           =  61
+%term mark_grp       =  62
 %term type           =  63
 %term type_assert    =  64
 %term cast           =  65
@@ -203,10 +205,14 @@
 Rel:      max_ (Rel)                                               =  68 (10);
 Rel:      min_ (Rel)                                               =  69 (10);
 Rel:      sum (Rel)                                                =  70 (10);
-Rel:      number (Rel)                                             =  73 (10);
-Rel:      type (Rel)                                               =  74 (10);
-Rel:      type_assert (Rel)                                        =  75 (10);
-Rel:      cast (Rel)                                               =  76 (10);
+Rel:      mark (Rel)                                               =  71 (10);
+Rel:      rank (Rel)                                               =  72 (10);
+Rel:      rank (std_sort (Rel))                                    =  73 (10);
+Rel:      rank (refine_sort (Rel))                                 =  74 (10);
+Rel:      mark_grp (Rel)                                           =  75 (10);
+Rel:      type (Rel)                                               =  76 (10);
+Rel:      type_assert (Rel)                                        =  77 (10);
+Rel:      cast (Rel)                                               =  78 (10);
 
 Rel:      llscj_anc (FragList, Rel)                                =  80 (10);
 Rel:      llscj_anc_self (FragList, Rel)                           =  81 (10);
@@ -4392,8 +4398,8 @@
              aggr_function(PFmil_sum, PFmil_gsum, p);
              break;
 
-        /* Rel:      number (Rel) */
-        case 73:
+        /* Rel:      mark (Rel) */
+        case 71:
         {
             mvar_t *res = new_var (p->refctr);
 
@@ -4408,25 +4414,9 @@
                          env_at (L(p)->env, i).mvar);
             }
 
-            /* and add the newly numbered one */
-            if (p->sem.number.part != att_NULL)
-                execute (
-                    assgn (var (res->name),
-                           mark_grp (
-                               VAR (p->env,
-                                    p->sem.number.part,
-                                    type_of (p, p->sem.number.part)),
-                               project (
-                                   kunique (
-                                       reverse (
-                                           VAR (p->env,
-                                                p->sem.number.part,
-                                                type_of (p, 
p->sem.number.part))
-                                           )),
-                                   lit_oid (1)))));
-            else if (env_at (L(p)->env, 0).ty == aat_pfrag)
+            if (env_at (L(p)->env, 0).ty == aat_pfrag)
                 /* As in many cases two adjacent path steps are separated
-                   by a number operator and the fragment is constant we
+                   by a mark operator and the fragment is constant we
                    try to avoid an additional dependency on the fragment
                    to ease 'materialize' operator removal. */
                 execute (
@@ -4442,11 +4432,178 @@
                                  lit_oid (1))));
 
             /* put the result into p's environment */
-            env_add (p->env, p->sem.number.attname, aat_nat, res);
+            env_add (p->env, p->sem.mark.res, aat_nat, res);
+        } break;
+        
+        /* Rel:      rank (Rel) */
+        case 72:
+        /* Rel:      rank (std_sort (Rel)) */
+        case 73:
+        /* Rel:      rank (refine_sort (Rel)) */
+        case 74:
+            /* as we have to sort anyway we can also skip the sort operator */
+        {
+            /*
+             * Derive a single BAT from the multi-column grouping
+             * (using functions from the xtables module).
+             */
+            mvar_t    *res            = new_var (p->refctr),
+                      *v              = new_var (1),
+                      *first_sort_var = NULL;
+            bool       initialized = false;
+            PFpa_op_t *rel;
+
+            if (L(p)->kind == pa_std_sort || L(p)->kind == pa_refine_sort)
+                rel = LL(p);
+            else
+                rel = L(p);
+
+            assert (PFord_count (p->sem.rank.ord));
+
+            for (unsigned int i = 0; i < PFord_count (p->sem.rank.ord); i++) {
+                for (PFalg_simple_type_t t = 1; t; t <<= 1) {
+                    if (t & type_of (rel,
+                                     PFord_order_col_at (p->sem.rank.ord, i))) 
{
+                        if (!initialized) {
+                            /* we need a hit for the first order criterion */
+                            assert (!i);
+
+                            first_sort_var = env_mvar (
+                                                 rel->env,
+                                                 PFord_order_col_at (
+                                                     p->sem.rank.ord,
+                                                     i),
+                                                 t);
+                            execute (
+                                assgn (
+                                    var (v->name),
+                                    reverse (
+                                        sort (
+                                            reverse (
+                                                var (first_sort_var->name)),
+                                            PFord_order_dir_at (
+                                                p->sem.rank.ord,
+                                                i)))));
+
+                            initialized = true;
+                        }
+                        else {
+                            execute (
+                                assgn (
+                                    var (v->name),
+                                    ctrefine (
+                                        var (v->name),
+                                        VAR (rel->env,
+                                             PFord_order_col_at (
+                                                 p->sem.rank.ord,
+                                                 i),
+                                             t),
+                                        PFord_order_dir_at (
+                                            p->sem.rank.ord,
+                                            i))));
+                        }
+                    }
+                }
+            }
+
+            execute (
+                /* Extend the sorting with a dummy sort criterion to
+                   ensure that the result column @a res consists of oids. */
+                assgn (var (res->name),
+                       reverse (mark (reverse (
+                                          ctrefine (var (v->name),
+                                                    project (var (v->name),
+                                                             lit_oid (1)),
+                                                    false)),
+                                      lit_oid (0)))),
+                 /* We know that the extent certainly is sorted. */
+                assgn (var (res->name),
+                       assert_order (var (res->name))),
+                /* Create a new mapping relation. */
+                assgn (var (v->name),
+                       reverse (mark (var (v->name), lit_oid (0)))));
+
+            /* put the result into p's environment */
+            env_add (p->env, p->sem.rank.res, aat_nat, res);
+            
+            /*
+             * The join with each input relation:
+             *
+             * out := v.join (in);
+             */
+            for (unsigned int i = 0; i < env_count (rel->env); i++) {
+                 mvar_t *out = new_var (p->refctr);
+
+                 execute (
+                      assgn (var (out->name),
+                             leftjoin (var (v->name),
+                                       var (env_at (rel->env,
+                                                    i).mvar->name))));
+
+                 /* we know that the first sort criterion certainly is sorted
+                    -- assert_order however only copes with ascending order */
+                 if (first_sort_var == env_at (rel->env,i).mvar &&
+                     PFord_order_dir_at (p->sem.rank.ord, 0)
+                     == DIR_ASC)
+                     execute (
+                          assgn (var (out->name),
+                                 assert_order (var (out->name))));
+
+                 /* because leftjoin does not know that we have
+                    exactly one match for each tuple in v,
+                    we need to make the heads void ourselves */
+                 execute (
+                     assgn (var (out->name),
+                            reverse (mark (reverse (var (out->name)),
+                                           lit_oid (0)))));
+
+                 env_add (p->env,
+                          env_at (rel->env, i).att,
+                          env_at (rel->env, i).ty,
+                          out);
+            }
+
+            /* release our temporary variable */
+            unpin (v, 1);
+
+        } break;
+        /* Rel:      mark_grp (Rel) */
+        case 75:
+        {
+            mvar_t *res = new_var (p->refctr);
+
+            assert (env_count (L(p)->env));
+            assert (p->sem.mark.part);
+
+            /* copy all the attributes */
+            for (unsigned int i = 0; i < env_count (L(p)->env); i++) {
+                pin (env_at (L(p)->env, i).mvar, p->refctr);
+                env_add (p->env,
+                         env_at (L(p)->env, i).att,
+                         env_at (L(p)->env, i).ty,
+                         env_at (L(p)->env, i).mvar);
+            }
+
+            execute (
+                assgn (var (res->name),
+                       mark_grp (
+                           VAR (p->env,
+                                p->sem.mark.part,
+                                type_of (p, p->sem.mark.part)),
+                           project (
+                               kunique (
+                                   reverse (
+                                       VAR (p->env,
+                                            p->sem.mark.part,
+                                            type_of (p, p->sem.mark.part)))),
+                               lit_oid (1)))));
+
+            /* put the result into p's environment */
+            env_add (p->env, p->sem.mark.res, aat_nat, res);
         } break;
 
         /* Rel:      type (Rel) */
-        case 74:
+        case 76:
         {
             mvar_t             *res = new_var (p->refctr);
             PFalg_simple_type_t input_ty = 0;
@@ -4522,7 +4679,7 @@
             break;
 
         /* Rel:      type_assert (Rel) */
-        case 75:
+        case 77:
             /* copy all the attributes */
             for (unsigned int i = 0; i < env_count (L(p)->env); i++) {
                 pin (env_at (L(p)->env, i).mvar, p->refctr);
@@ -4534,7 +4691,7 @@
             break;
 
         /* Rel:      cast (Rel) */
-        case 76:
+        case 78:
             for (unsigned int i = 0; i < L(p)->schema.count; i++)
             {
                 bool att_needed = true;


-------------------------------------------------------------------------
SF.Net email is sponsored by: The Future of Linux Business White Paper
from Novell.  From the desktop to the data center, Linux is going
mainstream.  Let it simplify your IT future.
http://altfarm.mediaplex.com/ad/ck/8857-50307-18918-4
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to