Update of /cvsroot/monetdb/pathfinder/compiler/sql
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv29987/sql

Modified Files:
        lalg2sql.brg sql.c sqlprint.c 
Log Message:
-- Re-organized our set of numbering operators. We have now 4 different
   operators with consistent semantics that cope with sorting and numbering:

     - la_rownum behaves exactly like SQLs ROW_NUMBER. It is used to generate
       position values.

     - la_rowrank behaves exactly like SQLs DENSE_RANK. It is used to generate
       the group by semantics of our functional source language. Up til now
       we only need the unpartitioned variant. (In MIL it is implemented
       using the sort extend.)

     - la_rank -- beside one exception -- behaves like la_rowrank. It is also
       implemented in our SQL compilation with a DENSE_RANK operation. la_rank's
       important difference to la_rowrank is that its resulting values are used
       solely for ordering. No operation should ever look at the generated 
values.
       While this difference is uninteresting in the resulting code it 
simplifies
       the algebraic optimizer a lot. Instead of repeatedly inferring a property
       that checks for column usage we can optimize based on the operator kind.

     - la_rowid generates unrepeatable unique numbers (as 'ROW_NUMBER() OVER ()'
       does in SQL or 'mark()' does in MIL). It is used to generate a new key
       column for mapping joins.

   In comparison to the old version we introduced a new operator la_rowrank,
   changed the semantic of la_rank from ROW_NUMBER to DENSE_RANK, and renamed
   the formular la_number operator into la_rowid.

   To implement positions in our Core to Algebra translation consistently we
   now use only la_rownum (to generate real position values),
   la_rank (to represent intermediate position order), and constant values
   (to represent unordered sequences).

-- Introduced new SQL operator DENSE_RANK.

-- Splitted up the physical pa_number operator into the 3 operators:
   pa_mark, pa_rank, and pa_mark_grp. The first and the last operator correspond
   to the respective MIL primitives. The result column of pa_rank is generated
   by the extend column of a CTrefine operation.

-- Added check for environment variable PF_DEBUG_PRINT_FRAG to disable the
   fragment printing in the AT&T dot output of the logical algebra.



Index: sql.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/sql/sql.c,v
retrieving revision 1.42
retrieving revision 1.43
diff -u -d -r1.42 -r1.43
--- sql.c       27 Nov 2007 21:26:45 -0000      1.42
+++ sql.c       6 Dec 2007 08:42:43 -0000       1.43
@@ -973,12 +973,21 @@
 }
 
 /**
- * Create a SQL tree node representing SQL `ROWNUMBER()' function.
+ * Create a SQL tree node representing SQL `ROW_NUMBER()' function.
  */
 PFsql_t *
-PFsql_rownumber (void)
+PFsql_row_number (void)
 {
-    return leaf (sql_rownumber);
+    return leaf (sql_row_number);
+}
+
+/**
+ * Create a SQL tree node representing SQL `DENSE_RANK()' function.
+ */
+PFsql_t *
+PFsql_dense_rank (void)
+{
+    return leaf (sql_dense_rank);
 }
 
 /**

Index: lalg2sql.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/sql/lalg2sql.brg,v
retrieving revision 1.94
retrieving revision 1.95
diff -u -d -r1.94 -r1.95
--- lalg2sql.brg        27 Nov 2007 21:26:45 -0000      1.94
+++ lalg2sql.brg        6 Dec 2007 08:42:42 -0000       1.95
@@ -111,16 +111,17 @@
 %term min_            = 34 /**< operator for (partitioned) min of a column */
 %term sum             = 35 /**< operator for (partitioned) sum of a column */
 %term count           = 36 /**< (partitioned) row counting operator */
-%term rownum          = 37 /**< consecutive number generation */
-%term rank            = 38 /**< arbitrary but ordered number generation */
-%term number          = 39 /**< arbitrary, unordered number generation */
-%term type            = 40 /**< selection of rows where a column is of a
+%term rownum          = 37 /**< consecutive number generation (DENSE_RANK) */
+%term rowrank         = 38 /**< consecutive number generation (ROW_NUMBER) */
+%term rank            = 39 /**< arbitrary but ordered number generation */
+%term rowid           = 40 /**< arbitrary, unordered number generation */
+%term type            = 41 /**< selection of rows where a column is of a
                                 certain type */
-%term type_assert     = 41 /**< restricts the type of a relation */
-%term cast            = 42 /**< type cast of an attribute */
-%term seqty1          = 43 /**< test for exactly one type occurrence in one
+%term type_assert     = 42 /**< restricts the type of a relation */
+%term cast            = 43 /**< type cast of an attribute */
+%term seqty1          = 44 /**< test for exactly one type occurrence in one
                                 iteration (Pathfinder extension) */
-%term all             = 44 /**< test if all items in an iteration are true */
+%term all             = 45 /**< test if all items in an iteration are true */
 %term step            = 50 /**< XPath location step */
 %term step_join       = 51 /**< duplicate generating path step */
 %term guide_step      = 52 /**< XPath location step
@@ -231,8 +232,9 @@
 Rel:    string_join (Rel, Rel)                       =  43 (10);
 
 Rel:    rownum (Rel)                                 =  50 (10);
-Rel:    number (Rel)                                 =  51 (10);
+Rel:    rowrank (Rel)                                =  51 (10);
 Rel:    rank (Rel)                                   =  52 (10);
+Rel:    rowid (Rel)                                  =  53 (10);
 
 Rel:    step (Frag, Rel)                             =  55 (10);
 Rel:    guide_step (Frag, Rel)                       =  56 (10);
@@ -1319,7 +1321,7 @@
                                column_name (iter_col)),
                            column_assign (
                                add (MAX(max),
-                                    over (rownumber (),
+                                    over (row_number (),
                                           window_clause (NULL, NULL))),
                                PRE_),
                            column_assign (lit_int (0), SIZE_),
@@ -1342,7 +1344,7 @@
                                column_name (iter_col)),
                            column_assign (
                                add (MAX(max),
-                                    over (rownumber (),
+                                    over (row_number (),
                                           window_clause (
                                               NULL,
                                               order_by (sortkey_list)))),
@@ -1764,9 +1766,9 @@
 
                 assert (R(p)->kind == la_project &&
                         RL(p)->kind == la_rank &&
-                        pos == RL(p)->sem.rank.res);
+                        pos == RL(p)->sem.sort.res);
                 
-                sortby = RL(p)->sem.rank.sortby;
+                sortby = RL(p)->sem.sort.sortby;
                 for (int i = PFord_count (sortby) - 1; i >= 0; i--) {
                     ord = PFord_order_col_at (sortby, i);
                     asc = PFord_order_dir_at (sortby, i) == DIR_ASC;
@@ -2121,7 +2123,7 @@
                               false,
                               select_list (
                                   column_assign (
-                                      over (rownumber (),
+                                      over (row_number (),
                                             window_clause (NULL, NULL)),
                                       PRE_),
                                   SIZE(content),
@@ -2762,7 +2764,7 @@
                 col_env_add (COLMAP(p),
                              num,
                              aat_int,
-                             over (rownumber (),
+                             over (row_number (),
                                    window_clause (
                                        partlist,
                                        PFord_count (sortby)
@@ -3447,9 +3449,9 @@
             copy_cols_from_where (p, L(p));
 
             /* collect all sorting criteria */
-            for (int i = PFord_count (p->sem.rownum.sortby) - 1; i >= 0; i--) {
-                ord = PFord_order_col_at (p->sem.rownum.sortby, i);
-                asc = PFord_order_dir_at (p->sem.rownum.sortby, i) == DIR_ASC;
+            for (int i = PFord_count (p->sem.sort.sortby) - 1; i >= 0; i--) {
+                ord = PFord_order_col_at (p->sem.sort.sortby, i);
+                asc = PFord_order_dir_at (p->sem.sort.sortby, i) == DIR_ASC;
                 srtbylist = sortkey_list (
                                 sortkey_item (
                                     col_env_lookup (
@@ -3461,43 +3463,30 @@
             }
 
             /* create partition criterion if present */
-            if (p->sem.rownum.part)
+            if (p->sem.sort.part)
                 partlist = partition (
                                column_list (
                                    col_env_lookup (
                                        COLMAP(L(p)),
-                                       p->sem.rownum.part,
-                                       type_of (L(p), p->sem.rownum.part))));
+                                       p->sem.sort.part,
+                                       type_of (L(p), p->sem.sort.part))));
 
             orderby = (srtbylist->kind == sql_nil)?NULL:order_by(srtbylist);
 
             col_env_add (COLMAP(p),
-                         p->sem.rownum.res,
+                         p->sem.sort.res,
                          aat_nat,
-                         over (rownumber (),
+                         over (row_number (),
                                window_clause (
                                    partlist,
                                    orderby)));
 
             bind = true;
-            execute (comment ("binding due to rownumber operator"));
+            execute (comment ("binding due to rownum operator"));
         }   break;
 
-        /* Rel:    number (Rel) */
+        /* Rel:    rowrank (Rel) */
         case 51:
-            /* copy all existing column, from, and where lists */
-            copy_cols_from_where (p, L(p));
-
-            col_env_add (COLMAP(p),
-                         p->sem.number.res,
-                         aat_nat,
-                         over (rownumber (),
-                               window_clause (NULL, NULL)));
-
-            bind = true;
-            execute (comment ("binding due to number operator"));
-            break;
-
         /* Rel:    rank (Rel) */
         case 52:
             /* copy all existing column, from, and where lists */
@@ -3507,7 +3496,7 @@
                 /* Ignore the result of the rank operator in the normal
                    translation process and only add it to the special
                    rank map environment. */
-                PFord_ordering_t sortby = p->sem.rank.sortby;
+                PFord_ordering_t sortby = p->sem.sort.sortby;
                 unsigned int     i, j, k;
                 PFalg_att_t      ord;
                 PFarray_t       *srtbylist = PFarray (sizeof (PFsql_t *));
@@ -3552,19 +3541,19 @@
 
                 /* add new entry to the rank map */
                 rank_map = PFmalloc (sizeof (rank_map_t));
-                *rank_map = (rank_map_t) { .name = p->sem.rank.res,
+                *rank_map = (rank_map_t) { .name = p->sem.sort.res,
                                            .sort_list = srtbylist };
                 *(rank_map_t **) PFarray_add (RANK_MAP(p)) = rank_map;
 
                 /* add a dummy for the rank operator */
                 col_env_add (COLMAP(p),
-                             p->sem.rank.res,
+                             p->sem.sort.res,
                              aat_nat,
                              lit_int(0));
             } else {
-                /* Normal translation of rank operator (using a ROW_NUMBER
+                /* Normal translation of rank operator (using a DENSE_RANK
                    operator). */
-                PFord_ordering_t sortby = p->sem.rank.sortby;
+                PFord_ordering_t sortby = p->sem.sort.sortby;
                 PFsql_t *srtbylist = NULL;
                 PFalg_att_t ord;
                 bool asc;
@@ -3584,9 +3573,9 @@
 
                 }
                 col_env_add (COLMAP(p),
-                             p->sem.rank.res,
+                             p->sem.sort.res,
                              aat_nat,
-                             over (rownumber (),
+                             over (dense_rank (),
                                    window_clause (NULL, order_by 
(srtbylist))));
 
                 bind = true;
@@ -3594,6 +3583,21 @@
             }
             break;
 
+        /* Rel:    rowid (Rel) */
+        case 53:
+            /* copy all existing column, from, and where lists */
+            copy_cols_from_where (p, L(p));
+
+            col_env_add (COLMAP(p),
+                         p->sem.rowid.res,
+                         aat_nat,
+                         over (row_number (),
+                               window_clause (NULL, NULL)));
+
+            bind = true;
+            execute (comment ("binding due to rowid operator"));
+            break;
+
         /* Rel:    step (Frag, Rel) */
         case 55:
         /* Rel:    guide_step (Frag, Rel) */
@@ -4590,7 +4594,7 @@
         }   break;
 
         case la_rank:
-            RANK_LIST(p) = p->sem.rank.res;
+            RANK_LIST(p) = p->sem.sort.res;
         case la_roots:
         case la_attach:
             ITEM_LIST(p) = ITEM_LIST(L(p));
@@ -4654,8 +4658,8 @@
              * the rank is not save.
              * Except the case where item is the last sort criterion
              * but we don't handle this. */
-            for (int i = PFord_count (p->sem.rank.sortby) - 1; i >= 0; i--) {
-                ord = PFord_order_col_at (p->sem.rank.sortby, i);
+            for (int i = PFord_count (p->sem.sort.sortby) - 1; i >= 0; i--) {
+                ord = PFord_order_col_at (p->sem.sort.sortby, i);
                 if (ITEM_LIST(p) & ord) {
                     SER_REPORT(p) = ser_no;
                     return;

Index: sqlprint.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/sql/sqlprint.c,v
retrieving revision 1.41
retrieving revision 1.42
diff -u -d -r1.41 -r1.42
--- sqlprint.c  27 Nov 2007 21:26:45 -0000      1.41
+++ sqlprint.c  6 Dec 2007 08:42:44 -0000       1.42
@@ -128,7 +128,8 @@
       [sql_min]    /* used */ = "MIN",
       [sql_avg]    /* used */ = "AVG",
       [sql_over]              = "over",
-      [sql_rownumber]         = "rownumber",
+      [sql_row_number]        = "ROW_NUMBER",
+      [sql_dense_rank]        = "DENSE_RANK",
       [sql_wnd_clause]        = "wnd_clause",
       [sql_order_by]          = "order_by",
       [sql_sortkey_list]      = "sortkey_list",
@@ -571,9 +572,10 @@
             break;
 
         case sql_over:
-            assert (L(n)->kind == sql_rownumber);
+            assert (L(n)->kind == sql_row_number ||
+                    L(n)->kind == sql_dense_rank);
             
-            PFprettyprintf ("ROW_NUMBER () OVER (%c", START_BLOCK );
+            PFprettyprintf ("%s () OVER (%c", ID[L(n)->kind], START_BLOCK);
             print_window_clause (R(n));
             PFprettyprintf ("%c)", END_BLOCK);
             break;


-------------------------------------------------------------------------
SF.Net email is sponsored by: The Future of Linux Business White Paper
from Novell.  From the desktop to the data center, Linux is going
mainstream.  Let it simplify your IT future.
http://altfarm.mediaplex.com/ad/ck/8857-50307-18918-4
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to