Update of /cvsroot/monetdb/pathfinder/compiler/algebra/opt
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv11089/algebra/opt

Modified Files:
        opt_algebra_cse.c opt_join_graph.c opt_join_pd.c opt_mvd.c 
        opt_req_node.c opt_set.c opt_thetajoin.c 
Log Message:
-- First steps to get rid of bit-encoded column names:

   o The planner and the MIL generation do not rely
     on bit-encoded columns anymore.

   o A global counter allows to assign new column names
     without the need to analyze the plan before.

   o Operator la_eqjoin_unq is only introduced in the
     join pushdown phase and a normal eqjoin is available
     for all other phases working on unique column names.

   o Changed the required node optimization phase to cope
     with unique column names.


U opt_algebra_cse.c
Index: opt_algebra_cse.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_algebra_cse.c,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -d -r1.32 -r1.33
--- opt_algebra_cse.c   2 Jun 2008 13:54:31 -0000       1.32
+++ opt_algebra_cse.c   18 Jun 2008 11:24:23 -0000      1.33
@@ -488,7 +488,7 @@
     }
 
     if (name_conflict) {
-        new_col = PFalg_ori_name (PFalg_unq_name (att, 0),
+        new_col = PFalg_ori_name (PFalg_unq_name (att),
                             ~used_cols);
     }
 
@@ -1402,7 +1402,7 @@
                        PFalg_att_t t =
                               PFalg_ori_name (
                                   PFalg_unq_name (
-                                      CSE(R(n))->schema.items[i].name, 0),
+                                      CSE(R(n))->schema.items[i].name),
                                       ~used_cols);
 
                        /* create new entry for projection */

U opt_join_pd.c
Index: opt_join_pd.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_join_pd.c,v
retrieving revision 1.48
retrieving revision 1.49
diff -u -d -r1.48 -r1.49
--- opt_join_pd.c       5 Jun 2008 09:19:30 -0000       1.48
+++ opt_join_pd.c       18 Jun 2008 11:24:26 -0000      1.49
@@ -88,19 +88,6 @@
 #define ratt(p)  (proj_at(rproj(p),0).old)
 #define res(p)   (proj_at(lproj(p),0).new)
 
-/* highest free id value */
-static unsigned int highest_id;
-
-/* increase the id of the input column name */
-static PFalg_att_t
-free_col (PFalg_att_t att)
-{
-    assert (PFalg_is_unq_name (att));
-
-    return PFalg_unq_name (PFalg_ori_name (att, ~att_NULL),
-                           highest_id++);
-}
-        
 /* abbreviation for attribute dependency test */
 static bool
 is_join_att (PFla_op_t *p, PFalg_att_t att)
@@ -664,7 +651,7 @@
                     /* any new column will be added
                        with an unused column name */
                     if (j == lproj_old_size)
-                        proj_add (lproj) = proj (free_col (cur), cur);
+                        proj_add (lproj) = proj (PFalg_new_name (cur), cur);
                 }
 
                 *p = *(PFla_project_ (eqjoin_unq (L(lp), rp, lproj, rproj),
@@ -1685,26 +1672,133 @@
 }
 
 /**
- * find the highest column name id
+ * Introduce the special eqjoin operator with implicit
+ * projection list that is pushed down in the optimization
+ * phase.
  */
-static unsigned int
-get_highest_id (PFla_op_t *p, unsigned int id)
+static void
+introduce_eqjoin_unq (PFla_op_t *p)
 {
-    unsigned int col_id;
+    if (SEEN(p))
+        return;
+    else
+        SEEN(p) = true;
+    
+    for (unsigned int i = 0; i < PFLA_OP_MAXCHILD && p->child[i]; i++)
+        introduce_eqjoin_unq (p->child[i]);
 
+    /* introduce the special eqjoin operator */
+    if (p->kind == la_eqjoin) {
+        unsigned int  lcount = L(p)->schema.count,
+                      rcount = R(p)->schema.count,
+                      i;
+        PFarray_t    *lproj  = PFarray (sizeof (PFalg_proj_t), lcount),
+                     *rproj  = PFarray (sizeof (PFalg_proj_t), rcount);
+        PFalg_proj_t *projlist;
+        PFalg_att_t   att1   = p->sem.eqjoin.att1,
+                      att2   = p->sem.eqjoin.att2,
+                      res    = att1 < att2 ? att1 : att2,
+                      att;
+        
+        projlist  = PFmalloc (p->schema.count * sizeof (PFalg_proj_t));
+        
+        /* add the join columns as first arguments
+           to the projection lists */
+        proj_add (lproj) = proj (res, att1);
+        proj_add (rproj) = proj (res, att2);
+        
+        /* fill the projection lists */
+        for (i = 0; i < lcount; i++) {
+            att = L(p)->schema.items[i].name;
+            if (att == att1) {
+                projlist[i] = proj (att1, res);
+            }
+            else {
+                projlist[i] = proj (att, att);
+                proj_add (lproj) = proj (att, att);
+            }
+        }
+        for (i = 0; i < rcount; i++) {
+            att = R(p)->schema.items[i].name;
+            if (att == att2) {
+                projlist[i+lcount] = proj (att2, res);
+            }
+            else {
+                projlist[i+lcount] = proj (att, att);
+                proj_add (rproj) = proj (att, att);
+            }
+        }
+            
+        *p = *PFla_project_ (
+                  eqjoin_unq (L(p), R(p), lproj, rproj),
+                  p->schema.count, projlist);
+    }
+}
+
+/**
+ * Replace the special eqjoin operator with implicit
+ * projection list with the normal eqjoin operator.
+ */
+static void
+remove_eqjoin_unq (PFla_op_t *p)
+{
     if (SEEN(p))
-        return id;
+        return;
     else
         SEEN(p) = true;
     
-    for (unsigned int i = 0; i < p->schema.count; i++) {
-        col_id = PFalg_unq_name_id (p->schema.items[i].name);
-        if (id <= col_id)
-            id = col_id;
+    for (unsigned int i = 0; i < PFLA_OP_MAXCHILD && p->child[i]; i++)
+        remove_eqjoin_unq (p->child[i]);
+
+    /* remove the special eqjoin operator */
+    if (p->kind == la_eqjoin_unq) {
+        PFarray_t    *lproj  = p->sem.eqjoin_unq.lproj,
+                     *rproj  = p->sem.eqjoin_unq.rproj;
+        PFalg_proj_t *projlist,
+                     *lprojlist,
+                     *rprojlist;
+        unsigned int  lcount = PFarray_last (lproj),
+                      rcount = PFarray_last (rproj),
+                      i;
+        PFalg_att_t   att1_new,
+                      att2_old,
+                      att2_new,
+                      att;
+        
+        /* look up the join column names */
+        att1_new = proj_at (lproj, 0).new;
+        att2_old = proj_at (rproj, 0).old;
+        /* get a new column name */
+        att2_new = PFalg_new_name (att2_old);
+
+        projlist  = PFmalloc (p->schema.count * sizeof (PFalg_proj_t));
+        lprojlist = PFmalloc (lcount * sizeof (PFalg_proj_t));
+        rprojlist = PFmalloc (rcount * sizeof (PFalg_proj_t));
+        
+        /* create the projection list for the left operand */
+        for (i = 0; i < PFarray_last (lproj); i++)
+            lprojlist[i] = proj_at (lproj, i);
+
+        /* create the projection list for the right operand */
+        rprojlist[0] = proj (att2_new, att2_old);
+        for (i = 1; i < PFarray_last (rproj); i++)
+            rprojlist[i] = proj_at (rproj, i);
+
+        /* As some operators rely on the schema of its operands
+           we introduce a projection that removes the second join
+           attribute thus maintaining the schema of the duplicate
+           aware eqjoin operator. */
+        for (unsigned int i = 0; i < p->schema.count; i++) {
+            att = p->schema.items[i].name;
+            projlist[i] = proj (att, att);
+        }
+        
+        *p = *PFla_project_ (
+                  eqjoin (PFla_project_ (L(p), lcount, lprojlist),
+                          PFla_project_ (R(p), rcount, rprojlist),
+                          att1_new, att2_new),
+                  p->schema.count, projlist);
     }
-    if (L(p)) id = get_highest_id (L(p), id);
-    if (R(p)) id = get_highest_id (R(p), id);
-    return id;
 }
 
 /**
@@ -1717,7 +1811,9 @@
     unsigned int tries = 0, max_tries = 1;
     bool modified = true;
 
-    highest_id = get_highest_id (root, 0) + 1;
+    /* replace la_eqjoin by la_eqjoin_unq operators */
+    introduce_eqjoin_unq (root);
+    PFla_dag_reset (root);
 
     /* Optimize algebra tree */
     while (modified || tries <= max_tries) {
@@ -1733,6 +1829,11 @@
         PFla_dag_reset (root);
         if (!modified) tries++;
     }
+
+    /* replace la_eqjoin_unq by la_eqjoin operators */
+    remove_eqjoin_unq (root);
+    PFla_dag_reset (root);
+
     return root;
 }
 

U opt_set.c
Index: opt_set.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_set.c,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -d -r1.11 -r1.12
--- opt_set.c   5 Jun 2008 09:19:30 -0000       1.11
+++ opt_set.c   18 Jun 2008 11:24:27 -0000      1.12
@@ -75,7 +75,7 @@
                 PFprop_set (L(p)->prop)) {
                 PFalg_att_t item_res;
                 item_res = PFalg_ori_name (
-                               PFalg_unq_name (att_item, 0),
+                               PFalg_unq_name (att_item),
                                ~(L(p)->sem.step.item |
                                  L(p)->sem.step.iter));
                 *L(p) = *PFla_project (
@@ -97,7 +97,7 @@
                 PFprop_set (R(p)->prop)) {
                 PFalg_att_t item_res;
                 item_res = PFalg_ori_name (
-                               PFalg_unq_name (att_item, 0),
+                               PFalg_unq_name (att_item),
                                ~(R(p)->sem.step.item |
                                  R(p)->sem.step.iter));
                 *R(p) = *PFla_project (

U opt_thetajoin.c
Index: opt_thetajoin.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_thetajoin.c,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -d -r1.24 -r1.25
--- opt_thetajoin.c     2 Jun 2008 13:54:32 -0000       1.24
+++ opt_thetajoin.c     18 Jun 2008 11:24:28 -0000      1.25
@@ -239,7 +239,7 @@
         PFalg_att_t new_col, cur_col;
 
         /* generate new column name */
-        new_col = PFalg_ori_name (PFalg_unq_name (att, 0),
+        new_col = PFalg_ori_name (PFalg_unq_name (att),
                                   ~used_cols);
         used_cols = used_cols | new_col;
 
@@ -270,7 +270,7 @@
         PFalg_att_t new_col, cur_col;
 
         /* generate new column name */
-        new_col = PFalg_ori_name (PFalg_unq_name (att, 0),
+        new_col = PFalg_ori_name (PFalg_unq_name (att),
                                   ~used_cols);
         used_cols = used_cols | new_col;
 
@@ -356,7 +356,7 @@
 
             if (cur_col & conf_cols) {
                 /* generate new column name */
-                new_col = PFalg_ori_name (PFalg_unq_name (cur_col, 0),
+                new_col = PFalg_ori_name (PFalg_unq_name (cur_col),
                                           ~used_cols);
                 used_cols = used_cols | new_col;
 
@@ -388,7 +388,7 @@
 
             if (cur_col & conf_cols) {
                 /* generate new column name */
-                new_col = PFalg_ori_name (PFalg_unq_name (cur_col, 0),
+                new_col = PFalg_ori_name (PFalg_unq_name (cur_col),
                                           ~used_cols);
                 used_cols = used_cols | new_col;
 
@@ -899,7 +899,7 @@
                             /* introduce a new column name ... */
                             PFalg_att_t new_col;
                             new_col = PFalg_ori_name (
-                                          PFalg_unq_name (LEFT_AT(pred, i), 0),
+                                          PFalg_unq_name (LEFT_AT(pred, i)),
                                           ~used_cols);
                             used_cols = used_cols | new_col;
 
@@ -923,7 +923,7 @@
                             /* introduce a new column name ... */
                             PFalg_att_t new_col;
                             new_col = PFalg_ori_name (
-                                          PFalg_unq_name (RIGHT_AT(pred, i), 
0),
+                                          PFalg_unq_name (RIGHT_AT(pred, i)),
                                           ~used_cols);
                             used_cols = used_cols | new_col;
 
@@ -2129,7 +2129,7 @@
                                 PFalg_att_t new_col;
                                 /* get a new column name */
                                 new_col = PFalg_ori_name (
-                                              PFalg_unq_name (cur_col, 0),
+                                              PFalg_unq_name (cur_col),
                                               ~used_cols);
                                 used_cols = used_cols | new_col;
                                 /* introduce a renaming */
@@ -2155,7 +2155,7 @@
                                 PFalg_att_t new_col;
                                 /* get a new column name */
                                 new_col = PFalg_ori_name (
-                                              PFalg_unq_name (cur_col, 0),
+                                              PFalg_unq_name (cur_col),
                                               ~used_cols);
                                 used_cols = used_cols | new_col;
                                 /* introduce a renaming */
@@ -2198,7 +2198,7 @@
                                 if (cur_col & used_cols) {
                                     /* get a new column name */
                                     new_col = PFalg_ori_name (
-                                                  PFalg_unq_name (cur_col, 0),
+                                                  PFalg_unq_name (cur_col),
                                                   ~(used_cols |
                                                     used_invisible_cols));
                                     used_cols = used_cols | new_col;
@@ -2224,7 +2224,7 @@
                                 if (cur_col & used_cols) {
                                     /* get a new column name */
                                     new_col = PFalg_ori_name (
-                                                  PFalg_unq_name (cur_col, 0),
+                                                  PFalg_unq_name (cur_col),
                                                   ~(used_cols |
                                                     used_invisible_cols));
                                     used_cols = used_cols | new_col;
@@ -2489,7 +2489,7 @@
                         PFalg_att_t new_col;
                         /* get a new column name */
                         new_col = PFalg_ori_name (
-                                      PFalg_unq_name (RES_AT(pred, i), 0),
+                                      PFalg_unq_name (RES_AT(pred, i)),
                                       ~used_cols);
                         used_cols = used_cols | new_col;
 
@@ -2516,7 +2516,7 @@
                         PFalg_att_t new_col;
                         /* get a new column name */
                         new_col = PFalg_ori_name (
-                                      PFalg_unq_name (RES_AT(pred, i), 0),
+                                      PFalg_unq_name (RES_AT(pred, i)),
                                       ~used_cols);
                         used_cols = used_cols | new_col;
 
@@ -2536,7 +2536,7 @@
                         PFalg_att_t new_col;
                         /* get a new column name */
                         new_col = PFalg_ori_name (
-                                      PFalg_unq_name (RES_AT(pred, i), 0),
+                                      PFalg_unq_name (RES_AT(pred, i)),
                                       ~used_cols);
                         used_cols = used_cols | new_col;
 

U opt_req_node.c
Index: opt_req_node.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_req_node.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- opt_req_node.c      4 Apr 2008 09:45:15 -0000       1.1
+++ opt_req_node.c      18 Jun 2008 11:24:27 -0000      1.2
@@ -135,61 +135,63 @@
  * @brief Copy a constant fragment using a different loop relation (@a loop).
  */
 static PFla_op_t *
-build_constant_frag (PFla_op_t *p, PFla_op_t *loop)
+build_constant_frag (PFla_op_t *p, PFla_op_t *loop, PFalg_att_t iter, bool unq)
 {
+#define name_item  (unq ? PFalg_unq_fixed_name (att_item, 1) : att_item)
+#define name_item1 (unq ? PFalg_unq_fixed_name (att_item, 2) : att_item1)
     switch (p->kind) {
         case la_fcns:
-            return fcns (build_constant_frag (L(p), loop),
-                         build_constant_frag (R(p), loop));
+            return fcns (build_constant_frag (L(p), loop, iter, unq),
+                         build_constant_frag (R(p), loop, iter, unq));
         case la_docnode:
             return docnode (loop,
-                            build_constant_frag (R(p), loop), att_iter);
+                            build_constant_frag (R(p), loop, iter, unq), iter);
         case la_element:
             return element (attach (loop,
-                                    att_item,
+                                    name_item,
                                     PFprop_const_val_left (
                                         p->prop,
                                         p->sem.iter_item.item)),
-                            build_constant_frag (R(p), loop),
-                            att_iter, att_item);
+                            build_constant_frag (R(p), loop, iter, unq),
+                            iter, name_item);
         case la_textnode:
             return textnode (attach (loop,
-                                     att_item,
+                                     name_item,
                                      PFprop_const_val_left (
                                          p->prop,
                                          p->sem.iter_item.item)),
-                             att_iter, att_item);
+                             iter, name_item);
         case la_comment:
             return comment (attach (loop,
-                                    att_item,
+                                    name_item,
                                     PFprop_const_val_left (
                                         p->prop,
                                         p->sem.iter_item.item)),
-                            att_iter, att_item);
+                            iter, name_item);
         case la_attribute:
             return attribute (attach (
                                   attach (loop,
-                                          att_item,
+                                          name_item,
                                           PFprop_const_val_left (
                                               p->prop,
                                               p->sem.iter_item1_item2.item1)),
-                                  att_item1,
+                                  name_item1,
                                   PFprop_const_val_left (
                                       p->prop,
                                       p->sem.iter_item1_item2.item2)),
-                              att_iter, att_item, att_item1);
+                              iter, name_item, name_item1);
         case la_processi:
             return processi (attach (
                                  attach (loop,
-                                         att_item,
+                                         name_item,
                                          PFprop_const_val_left (
                                              p->prop,
                                              p->sem.iter_item1_item2.item1)),
-                                 att_item1,
+                                 name_item1,
                                  PFprop_const_val_left (
                                      p->prop,
                                      p->sem.iter_item1_item2.item2)),
-                             att_iter, att_item, att_item1);
+                             iter, name_item, name_item1);
         case la_content:
             PFoops (OOPS_FATAL, "constructor not constant");
         case la_nil:
@@ -266,16 +268,19 @@
              twig_constant (LL(p))) {
         /* Save the old item column name as well as the old loop relation
            before overwriting the references. */
-        PFalg_att_t item = L(p)->sem.iter_item.item;
+        PFalg_att_t iter = L(p)->sem.iter_item.iter,
+                    item = L(p)->sem.iter_item.item;
         PFla_op_t  *loop = get_loop_relation (LL(p), L(p)->sem.iter_item.iter);
 
         /* relink the fragment operator */
         *L(p) = *twig (build_constant_frag (
                            LL(p),
-                           lit_tbl (attlist (att_iter), tuple (lit_nat (1)))),
-                       att_iter, att_item);
+                           lit_tbl (attlist (iter), tuple (lit_nat (1))),
+                           iter,
+                           PFalg_is_unq_name (item)),
+                       iter, item);
 
-        *p    = *cross (project (roots (L(p)), proj (item, att_item)),
+        *p    = *cross (project (roots (L(p)), proj (item, item)),
                         loop);
     }
 

U opt_mvd.c
Index: opt_mvd.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_mvd.c,v
retrieving revision 1.38
retrieving revision 1.39
diff -u -d -r1.38 -r1.39
--- opt_mvd.c   3 Apr 2008 15:23:06 -0000       1.38
+++ opt_mvd.c   18 Jun 2008 11:24:27 -0000      1.39
@@ -1681,12 +1681,12 @@
                    to ensure that at least one column remains as input
                    for the cross product. */
                 if (!lcount) {
-                    cur = PFalg_ori_name (PFalg_unq_name (att_iter, 0),
+                    cur = PFalg_ori_name (PFalg_unq_name (att_iter),
                                           ~used_cols);
                     proj_lcross[0] = proj (cur, p->schema.items[0].name);
                     lcount++;
                 } else if (!rcount) {
-                    cur = PFalg_ori_name (PFalg_unq_name (att_iter, 0),
+                    cur = PFalg_ori_name (PFalg_unq_name (att_iter),
                                           ~used_cols);
                     proj_rcross[0] = proj (cur, rcross->schema.items[0].name);
                     rcount++;

U opt_join_graph.c
Index: opt_join_graph.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_join_graph.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -d -r1.14 -r1.15
--- opt_join_graph.c    8 May 2008 20:13:21 -0000       1.14
+++ opt_join_graph.c    18 Jun 2008 11:24:25 -0000      1.15
@@ -154,7 +154,7 @@
 
                 PFalg_att_t item_res;
                 item_res = PFalg_ori_name (
-                               PFalg_unq_name (att_item, 0),
+                               PFalg_unq_name (att_item),
                                ~(p->sem.step.item |
                                  p->sem.step.iter));
 
@@ -191,7 +191,7 @@
 
                 PFalg_att_t item_res;
                 item_res = PFalg_ori_name (
-                               PFalg_unq_name (att_item, 0),
+                               PFalg_unq_name (att_item),
                                ~(p->sem.step.item |
                                  p->sem.step.iter));
 
@@ -270,7 +270,7 @@
                 /* rename the join argument in case a name conflict occurs */
                 if (used_cols & new_col)
                     new_col = PFalg_ori_name (
-                                  PFalg_unq_name (new_col, 0),
+                                  PFalg_unq_name (new_col),
                                   ~used_cols);
 
                 /* replace the semijoin */
@@ -306,7 +306,7 @@
 
                 PFalg_att_t item_res;
                 item_res = PFalg_ori_name (
-                               PFalg_unq_name (att_item, 0),
+                               PFalg_unq_name (att_item),
                                ~(p->sem.step.item |
                                  p->sem.step.iter));
 
@@ -334,7 +334,7 @@
 
                 PFalg_att_t item_res;
                 item_res = PFalg_ori_name (
-                               PFalg_unq_name (att_item, 0),
+                               PFalg_unq_name (att_item),
                                ~(p->sem.step.item |
                                  p->sem.step.iter));
 


-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://sourceforge.net/services/buy/index.php
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to