Update of /cvsroot/monetdb/pathfinder/compiler/algebra
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv29987/algebra

Modified Files:
        algebra_cse.c algopt.c builtins.c core2alg.brg logical.c 
        physical.c planner.c 
Log Message:
-- Re-organized our set of numbering operators. We have now 4 different
   operators with consistent semantics that cope with sorting and numbering:

     - la_rownum behaves exactly like SQLs ROW_NUMBER. It is used to generate
       position values.

     - la_rowrank behaves exactly like SQLs DENSE_RANK. It is used to generate
       the group by semantics of our functional source language. Up til now
       we only need the unpartitioned variant. (In MIL it is implemented
       using the sort extend.)

     - la_rank -- beside one exception -- behaves like la_rowrank. It is also
       implemented in our SQL compilation with a DENSE_RANK operation. la_rank's
       important difference to la_rowrank is that its resulting values are used
       solely for ordering. No operation should ever look at the generated 
values.
       While this difference is uninteresting in the resulting code it 
simplifies
       the algebraic optimizer a lot. Instead of repeatedly inferring a property
       that checks for column usage we can optimize based on the operator kind.

     - la_rowid generates unrepeatable unique numbers (as 'ROW_NUMBER() OVER ()'
       does in SQL or 'mark()' does in MIL). It is used to generate a new key
       column for mapping joins.

   In comparison to the old version we introduced a new operator la_rowrank,
   changed the semantic of la_rank from ROW_NUMBER to DENSE_RANK, and renamed
   the formular la_number operator into la_rowid.

   To implement positions in our Core to Algebra translation consistently we
   now use only la_rownum (to generate real position values),
   la_rank (to represent intermediate position order), and constant values
   (to represent unordered sequences).

-- Introduced new SQL operator DENSE_RANK.

-- Splitted up the physical pa_number operator into the 3 operators:
   pa_mark, pa_rank, and pa_mark_grp. The first and the last operator correspond
   to the respective MIL primitives. The result column of pa_rank is generated
   by the extend column of a CTrefine operation.

-- Added check for environment variable PF_DEBUG_PRINT_FRAG to disable the
   fragment printing in the AT&T dot output of the logical algebra.



Index: core2alg.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/core2alg.brg,v
retrieving revision 1.57
retrieving revision 1.58
diff -u -d -r1.57 -r1.58
--- core2alg.brg        16 Oct 2007 15:46:41 -0000      1.57
+++ core2alg.brg        6 Dec 2007 08:42:12 -0000       1.58
@@ -310,6 +310,8 @@
 /** mnemonic algebra constructors */
 #include "logical_mnemonic.h"
 
+#define RANK_DUMMY 42
+
 /**
  * Variable environment (Gamma in our papers)
  */
@@ -805,7 +807,7 @@
                 else {
                     /**
                      * If an order by clause is present we need
-                     * the iter column as a row number criterion
+                     * the iter column as a row ranking criterion
                      * (sort_0, sort_1, ..., sort_n, iter, pos).
                      *
                      * Without an order by the combination of sort
@@ -914,7 +916,7 @@
             PFla_op_t   *var;
             PFla_op_t   *opt_var;
             PFla_op_t   *var_map;
-            PFla_op_t   *number;
+            PFla_op_t   *rowid;
             unsigned int  i;
             PFla_env_t   e;
             PFla_op_t   *new_map;
@@ -924,10 +926,10 @@
             /* initiate translation of e1 */
             reduce (kids[1], nts[1]);
 
-            number = number (A(LR(p)).rel, att_inner);
+            rowid = rowid (A(LR(p)).rel, att_inner);
 
             /* translate $v */
-            var = attach (project (number,
+            var = attach (project (rowid,
                                    proj (att_iter, att_inner),
                                    proj (att_item, att_item)),
                           att_pos, lit_nat (1));
@@ -943,7 +945,7 @@
             loop = project (var, proj (att_iter, att_iter));
 
             /* create var_map relation. */
-            var_map = project (number,
+            var_map = project (rowid,
                                proj (att_outer, att_iter),
                                proj (att_sort, att_pos),
                                proj (att_inner, att_inner));
@@ -1028,14 +1030,15 @@
                 atts[MAP_COUNT -1].new = att_sort << (MAP_COUNT - 3);
                 atts[MAP_COUNT -1].old = att_pos;
 
-                MAP = PFla_project_ (eqjoin (MAP,
-                                             project (number,
-                                                      proj (att_iter1, 
att_inner),
-                                                      proj (att_iter, 
att_iter),
-                                                      proj (att_pos, att_pos)),
-                                             att_inner,
-                                             att_iter),
-                                     MAP_COUNT, atts);
+                MAP = PFla_project_ (
+                          eqjoin (MAP,
+                                  project (rowid,
+                                           proj (att_iter1, att_inner),
+                                           proj (att_iter, att_iter),
+                                           proj (att_pos, att_pos)),
+                                  att_inner,
+                                  att_iter),
+                          MAP_COUNT, atts);
             }
 
             /* translate e2 under the specified conditions (updated
@@ -2626,7 +2629,7 @@
                    .frag = p.frag };
     else
         return (struct  PFla_pair_t) {
-                   .rel = number (step, att_pos),
+                   .rel = attach (step, att_pos, lit_nat (RANK_DUMMY)),
                    .frag = p.frag };
 }
 

Index: algopt.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/algopt.c,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -d -r1.27 -r1.28
--- algopt.c    29 Nov 2007 15:39:06 -0000      1.27
+++ algopt.c    6 Dec 2007 08:42:11 -0000       1.28
@@ -90,7 +90,7 @@
 PFla_op_t *
 PFalgopt (PFla_op_t *root, bool timing, PFguide_tree_t* guide_tree)
 {
-    bool debug_opt = getenv("PF_DEBUG_OPTIMIZATIONS");
+    bool debug_opt = getenv("PF_DEBUG_OPTIMIZATIONS") != NULL;
     assert (PFstate.opt_alg);
     long tm;
     bool const_no_attach = false;

Index: physical.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/physical.c,v
retrieving revision 1.43
retrieving revision 1.44
diff -u -d -r1.43 -r1.44
--- physical.c  20 Nov 2007 16:57:24 -0000      1.43
+++ physical.c  6 Dec 2007 08:42:14 -0000       1.44
@@ -2146,16 +2146,96 @@
 }
 
 PFpa_op_t *
-PFpa_number (const PFpa_op_t *n,
-             PFalg_att_t new_att,
-             PFalg_att_t part)
+PFpa_mark (const PFpa_op_t *n, PFalg_att_t new_att)
 {
-    PFpa_op_t *ret      = wire1 (pa_number, n);
+    PFpa_op_t *ret = wire1 (pa_mark, n);
+
+    ret->sem.mark.res  = new_att;
+    ret->sem.mark.part = att_NULL;
+
+    /* allocate memory for the result schema */
+    ret->schema.count = n->schema.count + 1;
+    ret->schema.items
+        = PFmalloc (ret->schema.count * sizeof (*(ret->schema.items)));
+
+    /* copy schema from n */
+    for (unsigned int i = 0; i < n->schema.count; i++)
+        ret->schema.items[i] = n->schema.items[i];
+
+    ret->schema.items[ret->schema.count - 1]
+        = (PFalg_schm_item_t) { .name = new_att, .type = aat_nat };
+
+    /* ---- orderings ---- */
+    for (unsigned int i = 0; i < PFord_set_count (n->orderings); i++) {
+        PFord_set_add (ret->orderings, PFord_set_at (n->orderings, i));
+
+        /* if we have already an ordering we can also add the new
+           generated column at the end. It won't break the ordering. */
+        PFord_set_add (ret->orderings,
+                       PFord_refine (
+                           PFord_set_at (n->orderings, i),
+                           new_att, DIR_ASC));
+    }
+
+    PFord_set_add (ret->orderings, sortby (new_att));
+
+    /* ---- costs ---- */
+    ret->cost = DEFAULT_COST + n->cost;
+
+    return ret;
+}
+
+PFpa_op_t *
+PFpa_rank (const PFpa_op_t *n, PFalg_att_t new_att, PFord_ordering_t ord)
+{
+    PFpa_op_t *ret = wire1 (pa_rank, n);
+
+    ret->sem.rank.res = new_att;
+    ret->sem.rank.ord = ord;
+
+    /* allocate memory for the result schema */
+    ret->schema.count = n->schema.count + 1;
+    ret->schema.items
+        = PFmalloc (ret->schema.count * sizeof (*(ret->schema.items)));
+
+    /* copy schema from n */
+    for (unsigned int i = 0; i < n->schema.count; i++)
+        ret->schema.items[i] = n->schema.items[i];
+
+    ret->schema.items[ret->schema.count - 1]
+        = (PFalg_schm_item_t) { .name = new_att, .type = aat_nat };
+
+    /* ---- orderings ---- */
+    for (unsigned int i = 0; i < PFord_set_count (n->orderings); i++) {
+        PFord_set_add (ret->orderings, PFord_set_at (n->orderings, i));
+
+        /* if we have already an ordering we can also add the new
+           generated column at the end. It won't break the ordering. */
+        PFord_set_add (ret->orderings,
+                       PFord_refine (
+                           PFord_set_at (n->orderings, i),
+                           new_att, DIR_ASC));
+    }
+
+    PFord_set_add (ret->orderings, sortby (new_att));
+
+    /* ---- costs ---- */
+    ret->cost = DEFAULT_COST + n->cost;
+
+    return ret;
+}
+
+PFpa_op_t *
+PFpa_mark_grp (const PFpa_op_t *n, PFalg_att_t new_att, PFalg_att_t part)
+{
+    PFpa_op_t *ret      = wire1 (pa_mark_grp, n);
     bool       dir_asc  = false,
                dir_desc = false;
 
-    ret->sem.number.attname = new_att;
-    ret->sem.number.part = part;
+    assert (part);
+
+    ret->sem.mark.res  = new_att;
+    ret->sem.mark.part = part;
 
     /* allocate memory for the result schema */
     ret->schema.count = n->schema.count + 1;

Index: planner.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/planner.c,v
retrieving revision 1.41
retrieving revision 1.42
diff -u -d -r1.41 -r1.42
--- planner.c   17 Oct 2007 12:25:34 -0000      1.41
+++ planner.c   6 Dec 2007 08:42:15 -0000       1.42
@@ -849,9 +849,11 @@
                   project (
                       val_select (
                           cast (
-                              number (*(plan_t **) PFarray_at (sorted, i),
-                                      num,
-                                      n->sem.pos_sel.part),
+                              n->sem.pos_sel.part
+                              ? mark_grp (*(plan_t **) PFarray_at (sorted, i),
+                                          num,
+                                          n->sem.pos_sel.part)
+                              : mark (*(plan_t **) PFarray_at (sorted, i), 
num),
                               cast,
                               num,
                               aat_int),
@@ -1283,27 +1285,27 @@
     ord_wo_part = PFordering ();
 
     /* the partitioning attribute must be the primary ordering */
-    if (n->sem.rownum.part) {
-        ord_asc  = PFord_refine (ord_asc, n->sem.rownum.part, DIR_ASC);
-        ord_desc = PFord_refine (ord_desc, n->sem.rownum.part, DIR_DESC);
+    if (n->sem.sort.part) {
+        ord_asc  = PFord_refine (ord_asc, n->sem.sort.part, DIR_ASC);
+        ord_desc = PFord_refine (ord_desc, n->sem.sort.part, DIR_DESC);
     }
 
     /* then we refine by all the attributes in the sortby parameter */
     for (unsigned int i = 0;
-         i < PFord_count (n->sem.rownum.sortby);
+         i < PFord_count (n->sem.sort.sortby);
          i++) {
         ord_asc     = PFord_refine (
                           ord_asc,
-                          PFord_order_col_at (n->sem.rownum.sortby, i),
-                          PFord_order_dir_at (n->sem.rownum.sortby, i));
+                          PFord_order_col_at (n->sem.sort.sortby, i),
+                          PFord_order_dir_at (n->sem.sort.sortby, i));
         ord_desc    = PFord_refine (
                           ord_desc,
-                          PFord_order_col_at (n->sem.rownum.sortby, i),
-                          PFord_order_dir_at (n->sem.rownum.sortby, i));
+                          PFord_order_col_at (n->sem.sort.sortby, i),
+                          PFord_order_dir_at (n->sem.sort.sortby, i));
         ord_wo_part = PFord_refine (
                           ord_wo_part,
-                          PFord_order_col_at (n->sem.rownum.sortby, i),
-                          PFord_order_dir_at (n->sem.rownum.sortby, i));
+                          PFord_order_col_at (n->sem.sort.sortby, i),
+                          PFord_order_dir_at (n->sem.sort.sortby, i));
     }
 
     /* ensure correct input ordering for MergeRowNumber */
@@ -1325,9 +1327,12 @@
     /* for each remaining plan, generate a MergeRowNumber operator */
     for (unsigned int i = 0; i < PFarray_last (sorted); i++)
         add_plan (ret,
-                  number (*(plan_t **) PFarray_at (sorted, i),
-                          n->sem.rownum.res,
-                          n->sem.rownum.part));
+                  n->sem.sort.part
+                  ? mark_grp (*(plan_t **) PFarray_at (sorted, i),
+                              n->sem.sort.res,
+                              n->sem.sort.part)
+                  : mark (*(plan_t **) PFarray_at (sorted, i),
+                          n->sem.sort.res));
 
     return ret;
 }
@@ -1338,30 +1343,30 @@
     PFplanlist_t *ret    = new_planlist ();
     PFplanlist_t *sorted = new_planlist ();
 
-    assert (n); assert (n->kind == la_rank);
+    assert (n); assert (n->kind == la_rank || n->kind == la_rowrank);
     assert (L(n)); assert (L(n)->plans);
+    assert (!n->sem.sort.part);
 
     for (unsigned int i = 0; i < PFarray_last (L(n)->plans); i++)
         add_plans (sorted,
                    ensure_ordering (
                        *(plan_t **) PFarray_at (L(n)->plans, i),
-                       n->sem.rank.sortby));
+                       n->sem.sort.sortby));
 
     for (unsigned int i = 0; i < PFarray_last (sorted); i++)
         add_plan (ret,
-                  number (*(plan_t **) PFarray_at (sorted, i),
-                          n->sem.rank.res,
-                          att_NULL));
+                  rank (*(plan_t **) PFarray_at (sorted, i),
+                        n->sem.sort.res, n->sem.sort.sortby));
     return ret;
 }
 
 static PFplanlist_t *
-plan_number (const PFla_op_t *n)
+plan_rowid (const PFla_op_t *n)
 {
     PFplanlist_t *ret     = new_planlist ();
     plan_t        *cheapest_unordered = NULL;
 
-    assert (n); assert (n->kind == la_number);
+    assert (n); assert (n->kind == la_rowid);
     assert (L(n)); assert (L(n)->plans);
 
 
@@ -1373,9 +1378,7 @@
             cheapest_unordered
                 = *(plan_t **) PFarray_at (L(n)->plans, i);
 
-    add_plan (ret, number (cheapest_unordered,
-                           n->sem.number.res,
-                           att_NULL));
+    add_plan (ret, mark (cheapest_unordered, n->sem.rowid.res));
 
     return ret;
 }
@@ -2728,8 +2731,13 @@
         case la_count:          plans = plan_count (n);        break;
 
         case la_rownum:         plans = plan_rownum (n);       break;
+        case la_rowrank:
+            PFinfo (OOPS_WARNING,
+                    "The column generated by the rowrank operator"
+                    " will not start with value 1");
+            /* fall through */
         case la_rank:           plans = plan_rank (n);         break;
-        case la_number:         plans = plan_number (n);       break;
+        case la_rowid:          plans = plan_rowid (n);       break;
         case la_type:           plans = plan_type (n);         break;
         case la_type_assert:    plans = plan_type_assert (n);  break;
         case la_cast:           plans = plan_cast (n);         break;

Index: algebra_cse.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/algebra_cse.c,v
retrieving revision 1.59
retrieving revision 1.60
diff -u -d -r1.59 -r1.60
--- algebra_cse.c       27 Nov 2007 21:27:27 -0000      1.59
+++ algebra_cse.c       6 Dec 2007 08:42:09 -0000       1.60
@@ -311,44 +311,28 @@
             return true;
 
         case la_rownum:
-            if (a->sem.rownum.res != b->sem.rownum.res)
+        case la_rowrank:
+        case la_rank:
+            if (a->sem.sort.res != b->sem.sort.res)
                 return false;
 
-            for (unsigned int i = 0;
-                 i < PFord_count (a->sem.rownum.sortby);
-                 i++)
-                if (PFord_order_col_at (a->sem.rownum.sortby, i) !=
-                    PFord_order_col_at (b->sem.rownum.sortby, i) ||
-                    PFord_order_dir_at (a->sem.rownum.sortby, i) !=
-                    PFord_order_dir_at (b->sem.rownum.sortby, i))
+            for (unsigned int i = 0; i < PFord_count (a->sem.sort.sortby); i++)
+                if (PFord_order_col_at (a->sem.sort.sortby, i) !=
+                    PFord_order_col_at (b->sem.sort.sortby, i) ||
+                    PFord_order_dir_at (a->sem.sort.sortby, i) !=
+                    PFord_order_dir_at (b->sem.sort.sortby, i))
                     return false;
 
-            /* either both rownums are partitioned or none */
+            /* either both operators are partitioned or none */
             /* partitioning attribute must be equal (if available) */
-            if (a->sem.rownum.part != b->sem.rownum.part)
-                return false;
-
-            return true;
-            break;
-
-        case la_rank:
-            if (a->sem.rank.res != b->sem.rank.res)
+            if (a->sem.sort.part != b->sem.sort.part)
                 return false;
 
-            for (unsigned int i = 0;
-                 i < PFord_count (a->sem.rank.sortby);
-                 i++)
-                if (PFord_order_col_at (a->sem.rank.sortby, i) !=
-                    PFord_order_col_at (b->sem.rank.sortby, i) ||
-                    PFord_order_dir_at (a->sem.rank.sortby, i) !=
-                    PFord_order_dir_at (b->sem.rank.sortby, i))
-                    return false;
-
             return true;
             break;
 
-        case la_number:
-            if (a->sem.number.res != b->sem.number.res)
+        case la_rowid:
+            if (a->sem.rowid.res != b->sem.rowid.res)
                 return false;
 
             return true;

Index: logical.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/logical.c,v
retrieving revision 1.67
retrieving revision 1.68
diff -u -d -r1.67 -r1.68
--- logical.c   28 Nov 2007 17:08:57 -0000      1.67
+++ logical.c   6 Dec 2007 08:42:13 -0000       1.68
@@ -2042,23 +2042,22 @@
 
 
 /**
- * The `rownum' operator, a Pathfinder-specific extension to the
- * relational algebra.
+ * Worker for PFla_rownum, PFla_rowrank, and PFla_rank.
  */
-PFla_op_t *
-PFla_rownum (const PFla_op_t *n,
-             PFalg_att_t a, PFord_ordering_t s, PFalg_att_t p)
+static PFla_op_t *
+sort_col (const PFla_op_t *n, PFla_op_kind_t kind, char *name, 
+          PFalg_att_t a, PFord_ordering_t s, PFalg_att_t p)
 {
-    PFla_op_t    *ret = la_op_wire1 (la_rownum, n);
+    PFla_op_t    *ret = la_op_wire1 (kind, n);
     unsigned int  i;
     unsigned int  j;
 
     assert (s);
 
     /* copy parameters into semantic content of return node */
-    ret->sem.rownum.res = a;
-    ret->sem.rownum.sortby = s;
-    ret->sem.rownum.part = p;
+    ret->sem.sort.res = a;
+    ret->sem.sort.sortby = s;
+    ret->sem.sort.part = p;
 
     /* result schema is input schema plus the new attribute */
     ret->schema.count = n->schema.count + 1;
@@ -2066,12 +2065,11 @@
         = PFmalloc (ret->schema.count * sizeof (*(ret->schema.items)));
 
     for (i = 0; i < n->schema.count; i++) {
-
         /* there must not be an argument attribute named like the new one */
         if (n->schema.items[i].name == a)
             PFoops (OOPS_FATAL,
-                    "rownum operator would result in duplicate attribute `%s'",
-                    PFatt_str (a));
+                    "%s operator would result in duplicate attribute `%s'",
+                    name, PFatt_str (a));
 
         /* copy this attribute specification */
         ret->schema.items[i] = n->schema.items[i];
@@ -2081,29 +2079,18 @@
         = (struct PFalg_schm_item_t) { .name = a, .type = aat_nat };
 
     /* see if we can find all sort specifications */
-    for (i = 0; i < PFord_count (ret->sem.rownum.sortby); i++) {
-
+    for (i = 0; i < PFord_count (s); i++) {
         for (j = 0; j < n->schema.count; j++)
-            if (n->schema.items[j].name ==
-                         PFord_order_col_at (ret->sem.rownum.sortby, i))
+            if (n->schema.items[j].name == PFord_order_col_at (s, i))
                 break;
 
         if (j == n->schema.count)
             PFoops (OOPS_FATAL,
                     "could not find sort attribute `%s'",
-                    PFatt_str (PFord_order_col_at (
-                                   ret->sem.rownum.sortby, i)));
-
-        if (!monomorphic (n->schema.items[j].type))
-            PFoops (OOPS_FATAL,
-                    "sort criterion for rownum must be monomorphic, "
-                    "type: %i, name: %s",
-                    n->schema.items[j].type,
-                    PFatt_str (n->schema.items[j].name));
+                    PFatt_str (PFord_order_col_at (s, i)));
 
-        if (ret->sem.rownum.part
-            && PFord_order_col_at (ret->sem.rownum.sortby, i)
-               == ret->sem.rownum.part)
+        if (ret->sem.sort.part &&
+            PFord_order_col_at (s, i) == ret->sem.sort.part)
             PFoops (OOPS_FATAL,
                     "partitioning attribute must not appear in sort clause");
     }
@@ -2113,91 +2100,61 @@
 
 
 /**
- * The `rank' operator, a Pathfinder-specific extension to the
- * relational algebra.
+ * The `rownum' operator, a Pathfinder-specific extension to the
+ * relational algebra (behaves like SQL ROW_NUMBER()).
  */
 PFla_op_t *
-PFla_rank (const PFla_op_t *n, PFalg_att_t a, PFord_ordering_t s)
+PFla_rownum (const PFla_op_t *n,
+             PFalg_att_t a, PFord_ordering_t s, PFalg_att_t p)
 {
-    PFla_op_t    *ret = la_op_wire1 (la_rank, n);
-    unsigned int  i;
-    unsigned int  j;
-
-    assert (s);
-
-    /* copy parameters into semantic content of return node */
-    ret->sem.rank.res = a;
-    ret->sem.rank.sortby = s;
-
-    /* result schema is input schema plus the new attribute */
-    ret->schema.count = n->schema.count + 1;
-    ret->schema.items
-        = PFmalloc (ret->schema.count * sizeof (*(ret->schema.items)));
-
-    for (i = 0; i < n->schema.count; i++) {
-
-        /* there must not be an argument attribute named like the new one */
-        if (n->schema.items[i].name == a)
-            PFoops (OOPS_FATAL,
-                    "rank operator would result in duplicate attribute `%s'",
-                    PFatt_str (a));
+    return sort_col (n, la_rownum, "rownum", a, s, p);
+}
 
-        /* copy this attribute specification */
-        ret->schema.items[i] = n->schema.items[i];
-    }
-    /* append new attribute, named as given in a, with type nat */
-    ret->schema.items[ret->schema.count - 1]
-        = (struct PFalg_schm_item_t) { .name = a, .type = aat_nat };
 
-    /* sanity checks below */
+/**
+ * The `rowrank' operator, a Pathfinder-specific extension to the
+ * relational algebra (behaves like SQL DENSE_RANK()).
+ */
+PFla_op_t *
+PFla_rowrank (const PFla_op_t *n, PFalg_att_t a, PFord_ordering_t s)
+{
     if (PFord_count (s) == 0)
-        PFinfo (OOPS_WARNING,
+        PFinfo (OOPS_FATAL,
                 "applying rank operator without sort specifier");
 
-    /* see if we can find all sort specifications */
-    for (i = 0; i < PFord_count (ret->sem.rank.sortby); i++) {
+    return sort_col (n, la_rowrank, "rowrank", a, s, att_NULL);
+}
 
-        for (j = 0; j < n->schema.count; j++)
-            if (n->schema.items[j].name ==
-                         PFord_order_col_at (ret->sem.rank.sortby, i))
-                break;
 
-        if (j == n->schema.count)
-            PFoops (OOPS_FATAL,
-                    "could not find sort attribute `%s'",
-                    PFatt_str (PFord_order_col_at (
-                                   ret->sem.rank.sortby, i)));
-
-        /* FIXME: we do allow polymorphic sequences as these
-           are only introduced by a combination of sequence
-           construction and optimizing rank operators. We only
-           ensure that these polymorphic columns are not the first
-           order constraint. The order of applying the different
-           columns independentely thus does not matter. */
-        if (!i && !monomorphic (n->schema.items[j].type))
-            PFoops (OOPS_FATAL,
-                    "sort criterion for rank must be monomorphic, "
-                    "type: %i, name: %s",
-                    n->schema.items[j].type,
-                    PFatt_str (n->schema.items[j].name));
-    }
+/**
+ * The `rank' operator, a Pathfinder-specific extension to the
+ * relational algebra (behaves like SQL DENSE_RANK()). In comparison
+ * to rowrank we do not care about the generated values and can
+ * therefore apply more rewrites.
+ */
+PFla_op_t *
+PFla_rank (const PFla_op_t *n, PFalg_att_t a, PFord_ordering_t s)
+{
+    if (PFord_count (s) == 0)
+        PFinfo (OOPS_FATAL,
+                "applying rank operator without sort specifier");
 
-    return ret;
+    return sort_col (n, la_rank, "rank", a, s, att_NULL);
 }
 
 
 /**
- * The `number' operator, a Pathfinder-specific extension to the
+ * The `rowid' operator, a Pathfinder-specific extension to the
  * relational algebra.
  */
 PFla_op_t *
-PFla_number (const PFla_op_t *n, PFalg_att_t a)
+PFla_rowid (const PFla_op_t *n, PFalg_att_t a)
 {
-    PFla_op_t    *ret = la_op_wire1 (la_number, n);
+    PFla_op_t    *ret = la_op_wire1 (la_rowid, n);
     unsigned int  i;
 
     /* copy parameters into semantic content of return node */
-    ret->sem.number.res  = a;
+    ret->sem.rowid.res  = a;
 
     /* result schema is input schema plus the new attribute */
     ret->schema.count = n->schema.count + 1;
@@ -2209,7 +2166,7 @@
         /* there must not be an argument attribute named like the new one */
         if (n->schema.items[i].name == a)
             PFoops (OOPS_FATAL,
-                    "number operator would result in duplicate attribute `%s'",
+                    "rowid operator would result in duplicate attribute `%s'",
                     PFatt_str (a));
 
         /* copy this attribute specification */
@@ -4107,18 +4064,23 @@
 
         case la_rownum:
             return PFla_rownum (left,
-                                n->sem.rownum.res,
-                                n->sem.rownum.sortby,
-                                n->sem.rownum.part);
+                                n->sem.sort.res,
+                                n->sem.sort.sortby,
+                                n->sem.sort.part);
+
+        case la_rowrank:
+            return PFla_rowrank (left,
+                                 n->sem.sort.res,
+                                 n->sem.sort.sortby);
 
         case la_rank:
             return PFla_rank (left,
-                              n->sem.rank.res,
-                              n->sem.rank.sortby);
+                              n->sem.sort.res,
+                              n->sem.sort.sortby);
 
-        case la_number:
-            return PFla_number (left,
-                                n->sem.number.res);
+        case la_rowid:
+            return PFla_rowid (left,
+                               n->sem.rowid.res);
 
         case la_type:
             return PFla_type (left,

Index: builtins.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/builtins.c,v
retrieving revision 1.64
retrieving revision 1.65
diff -u -d -r1.64 -r1.65
--- builtins.c  27 Nov 2007 17:12:57 -0000      1.64
+++ builtins.c  6 Dec 2007 08:42:12 -0000       1.65
@@ -49,6 +49,8 @@
 #include "logical.h"
 #include "logical_mnemonic.h"
 
+#define RANK_DUMMY 42
+
 /**
  * Assign correct row numbers in case we need the real values.
  */
@@ -507,7 +509,7 @@
                                   proj (att_item, att_item));
 
     /* renumber */
-    PFla_op_t *q = number (nodes, att_inner);
+    PFla_op_t *q = rowid (nodes, att_inner);
 
     PFla_op_t *map = project (q,
                               proj (att_outer, att_iter),
@@ -2712,12 +2714,12 @@
     (void) loop; (void) ordering;
 
     return (struct PFla_pair_t) {
-                  .rel = number (
+                  .rel = attach (
                              distinct (
                                  project (args[0].rel,
                                       proj (att_iter, att_iter),
                                       proj (att_item, att_item))),
-                             att_pos),
+                             att_pos, lit_nat (RANK_DUMMY)),
                   .frag = args[0].frag };
 }
 
@@ -2974,11 +2976,11 @@
      * project away pos column
      */
     return (struct PFla_pair_t) {
-        .rel  = number (
+        .rel  = attach (
                     project (args[0].rel,
                              proj (att_iter, att_iter),
                              proj (att_item, att_item)),
-                    att_pos),
+                    att_pos, lit_nat (RANK_DUMMY)),
         .frag = args[0].frag };
 }
 
@@ -3076,7 +3078,7 @@
             .frag = PFla_set_union (args[0].frag, args[1].frag) };
     else
         return (struct  PFla_pair_t) {
-            .rel = number (distinct, att_pos),
+            .rel = attach (distinct, att_pos, lit_nat (RANK_DUMMY)),
             .frag = PFla_set_union (args[0].frag, args[1].frag) };
 }
 
@@ -3112,7 +3114,7 @@
             .frag = PFla_set_union (args[0].frag, args[1].frag) };
     else
         return (struct  PFla_pair_t) {
-            .rel = number (distinct, att_pos),
+            .rel = attach (distinct, att_pos, lit_nat (RANK_DUMMY)),
             .frag = PFla_set_union (args[0].frag, args[1].frag) };
 }
 
@@ -3151,7 +3153,7 @@
             .frag = args[0].frag };
     else
         return (struct  PFla_pair_t) {
-            .rel = number (difference, att_pos),
+            .rel = attach (difference, att_pos, lit_nat (RANK_DUMMY)),
             /* result nodes can only originate from first argument */
             .frag = args[0].frag };
 }
@@ -3547,7 +3549,7 @@
             .frag = args[1].frag };
     else
         return (struct PFla_pair_t) {
-            .rel = number (distinct (op), att_pos),
+            .rel = attach (distinct (op), att_pos, lit_nat (RANK_DUMMY)),
             .frag = args[1].frag };
 }
 
@@ -3677,7 +3679,7 @@
             .frag = args[0].frag };
     else
         return (struct  PFla_pair_t) {
-            .rel = number (distinct, att_pos),
+            .rel = attach (distinct, att_pos, lit_nat (RANK_DUMMY)),
             .frag = args[0].frag };
 }
 
@@ -3958,7 +3960,7 @@
      * create textnodes for each string
      * (casted atomic values and whitespace strings)
      */
-    PFla_op_t *unq_strings = number (
+    PFla_op_t *unq_strings = rowid (
                                  disjunion (
                                      attach (project (strings,
                                                      proj (att_iter, att_iter),


-------------------------------------------------------------------------
SF.Net email is sponsored by: The Future of Linux Business White Paper
from Novell.  From the desktop to the data center, Linux is going
mainstream.  Let it simplify your IT future.
http://altfarm.mediaplex.com/ad/ck/8857-50307-18918-4
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to