Update of /cvsroot/monetdb/pathfinder/compiler/algebra
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv26788/algebra

Modified Files:
      Tag: XQuery_0-24
        algebra.c algebra_cse.c algopt.c logical.c 
Log Message:
-- Re-implemented the join pushdown optimization phase
   (due to incorrect rewrites).

   o The special equi-join operator working on unique column
     names now incorporates for each operand a projection.

     Based on this projection we can now always maintain the
     correct schema and push the join even through renaming
     projections.

     The old variant did in some cases 'forget' from which
     operand of an equi-join columns with the same name stem
     from.

   o The new equi-join pushdown phase is a lot more effective
     than the old one and thus generates very wide relations
     (where a large number of columns can be pruned afterwards).

     Placing a projection pushdown phase (icols optimization)
     afterwards allows us to map the resulting plans back to
     bit-encoded column names without running out of column
     bits.


U algopt.c
Index: algopt.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/algopt.c,v
retrieving revision 1.35
retrieving revision 1.35.2.1
diff -u -d -r1.35 -r1.35.2.1
--- algopt.c    8 May 2008 20:13:21 -0000       1.35
+++ algopt.c    28 May 2008 11:37:18 -0000      1.35.2.1
@@ -197,8 +197,6 @@
                 break;
 
             case 'I':
-                MAP_ORI_NAMES("icol optimization")
-
                 tm = PFtimer_start ();
 
                 root = PFalgopt_icol (root);
@@ -286,10 +284,6 @@
                 break;
 
             case 'S':
-                /* we need the icols property and thus cannot
-                   apply the optimization if our names are unique */
-                MAP_ORI_NAMES("set optimization")
-
                 tm = PFtimer_start ();
 
                 root = PFalgopt_set (root);
@@ -348,7 +342,7 @@
                                   true  /* const */,
                                   true  /* set */,
                                   true  /* dom */,
-                                  false /* icol */,
+                                  true  /* icol */,
                                   false /* composite key */,
                                   true  /* key */,
                                   false /* ocols */,

U algebra.c
Index: algebra.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/algebra.c,v
retrieving revision 1.84
retrieving revision 1.84.2.1
diff -u -d -r1.84 -r1.84.2.1
--- algebra.c   9 May 2008 19:24:55 -0000       1.84
+++ algebra.c   28 May 2008 11:37:16 -0000      1.84.2.1
@@ -786,12 +786,25 @@
  * Checks whether a name is unique or not.
  */
 bool
-PFalg_is_unq_name(PFalg_att_t att)
+PFalg_is_unq_name (PFalg_att_t att)
 {
     return ((1 << 3) & att) && (att & 7);
 }
 
 /**
+ * Return the id of a unique name
+ */
+unsigned int
+PFalg_unq_name_id (PFalg_att_t att)
+{
+    if (!PFalg_is_unq_name(att))
+        PFoops (OOPS_FATAL,
+                "unique column name expected");
+    
+    return att >> 4;
+}
+    
+/**
  * Create an unique name based on an id @a id and
  * an original name @a ori that retains the usage information
  * of the new variable (iter, pos or item).

U algebra_cse.c
Index: algebra_cse.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/algebra_cse.c,v
retrieving revision 1.74
retrieving revision 1.74.2.1
diff -u -d -r1.74 -r1.74.2.1
--- algebra_cse.c       12 May 2008 09:57:44 -0000      1.74
+++ algebra_cse.c       28 May 2008 11:37:18 -0000      1.74.2.1
@@ -213,10 +213,27 @@
             break;
 
         case la_eqjoin_unq:
-            return (a->sem.eqjoin_unq.att1 == b->sem.eqjoin_unq.att1
-                    && a->sem.eqjoin_unq.att2 == b->sem.eqjoin_unq.att2
-                    && a->sem.eqjoin_unq.res == b->sem.eqjoin_unq.res);
-            break;
+        {
+            PFalg_proj_t aproj,
+                         bproj;
+            unsigned int i      = 0,
+                         lcount = PFarray_last (a->sem.eqjoin_unq.lproj),
+                         rcount = PFarray_last (a->sem.eqjoin_unq.rproj);
+
+            if (lcount != PFarray_last (b->sem.eqjoin_unq.lproj) ||
+                rcount != PFarray_last (b->sem.eqjoin_unq.rproj))
+                return false;
+            
+            for (i = 0; i < lcount; i++) {
+#define proj_at(l,i) (*(PFalg_proj_t *) PFarray_at ((l),(i)))
+                aproj = proj_at(a->sem.eqjoin_unq.lproj, i),
+                bproj = proj_at(b->sem.eqjoin_unq.lproj, i);
+                if (aproj.old != bproj.old || aproj.new != bproj.new)
+                    return false;
+            }
+
+            return true;
+        }   break;
 
         case la_project:
             if (a->sem.proj.count != b->sem.proj.count)

U logical.c
Index: logical.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/logical.c,v
retrieving revision 1.98.2.1
retrieving revision 1.98.2.2
diff -u -d -r1.98.2.1 -r1.98.2.2
--- logical.c   28 May 2008 11:08:11 -0000      1.98.2.1
+++ logical.c   28 May 2008 11:37:20 -0000      1.98.2.2
@@ -687,84 +687,131 @@
  *
  * Assert that @a att1 is an attribute of @a n1 and @a att2 is an attribute
  * of @a n2. The schema of the result is:
- * schema(@a n1) + schema(@a n2) - duplicate columns.
+ * schema(@a n1) + schema(@a n2) - duplicate join column.
  */
 PFla_op_t *
 PFla_eqjoin_clone (const PFla_op_t *n1, const PFla_op_t *n2,
-                   PFalg_att_t att1, PFalg_att_t att2, PFalg_att_t res)
+                   PFarray_t *lproj, PFarray_t *rproj)
 {
-    PFla_op_t    *ret;
-    unsigned int  i;
-    unsigned int  count;
+#define proj_at(l,i) (*(PFalg_proj_t *) PFarray_at ((l),(i)))
+    PFalg_simple_type_t res_ty;
+    PFla_op_t          *ret;
+    unsigned int        i,
+                        j,
+                        count;
 
     assert (n1); assert (n2);
+    assert (lproj && PFarray_last (lproj));
+    assert (rproj && PFarray_last (rproj));
+
+#ifndef NDEBUG
+    if (proj_at(lproj,0).new != proj_at(rproj,0).new)
+        PFoops (OOPS_FATAL,
+                "common result column name for join columns expected");
+#endif
 
     /* verify that att1 is attribute of n1 ... */
     for (i = 0; i < n1->schema.count; i++)
-        if (att1 == n1->schema.items[i].name)
+        if (proj_at(lproj,0).old == n1->schema.items[i].name) {
+            res_ty = n1->schema.items[i].type;
             break;
+        }
 
     /* did we find attribute att1? */
     if (i >= n1->schema.count)
         PFoops (OOPS_FATAL,
                 "attribute `%s' referenced in join not found",
-                PFatt_str(att1));
+                PFatt_str(proj_at(lproj,0).old));
 
     /* ... and att2 is attribute of n2 */
     for (i = 0; i < n2->schema.count; i++)
-        if (att2 == n2->schema.items[i].name)
+        if (proj_at(rproj,0).old == n2->schema.items[i].name) {
+            if (n2->schema.items[i].type != res_ty)
+                PFoops (OOPS_FATAL,
+                        "attribute types in join do not match");
             break;
+        }
 
     /* did we find attribute att2? */
     if (i >= n2->schema.count)
         PFoops (OOPS_FATAL,
                 "attribute `%s' referenced in join not found",
-                PFatt_str (att2));
+                PFatt_str(proj_at(rproj,0).old));
 
     /* build new equi-join node */
     ret = la_op_wire2 (la_eqjoin_unq, n1, n2);
 
-    /* insert semantic value (join attributes) into the result */
-    ret->sem.eqjoin_unq.att1 = att1;
-    ret->sem.eqjoin_unq.att2 = att2;
-    ret->sem.eqjoin_unq.res  = res;
+    /* insert a copy of the semantic value (join attributes) into the result */
+    lproj = PFarray_copy (lproj);
+    rproj = PFarray_copy (rproj);
+    ret->sem.eqjoin_unq.lproj = lproj;
+    ret->sem.eqjoin_unq.rproj = rproj;
 
     /* allocate memory for the result schema (schema(n1) + schema(n2)) */
-    ret->schema.count = n1->schema.count + n2->schema.count;
+    ret->schema.count = PFarray_last (lproj) + PFarray_last (rproj);
     ret->schema.items
         = PFmalloc (ret->schema.count * sizeof (*(ret->schema.items)));
 
-    count = 0;
     /* add join attribute to the result */
-    for (i = 0; i < n1->schema.count; i++)
-        if (n1->schema.items[i].name == att1) {
-            ret->schema.items[count] = n1->schema.items[i];
-            ret->schema.items[count++].name = res;
-            break;
-        }
-
-    /* copy schema from argument 'n1' */
-    for (i = 0; i < n1->schema.count; i++)
-        /* discard join columns - they are already added */
-        if (n1->schema.items[i].name != att1 &&
-            n1->schema.items[i].name != att2)
-            ret->schema.items[count++] = n1->schema.items[i];
-
-    /* copy schema from argument 'n2' */
-    for (i = 0; i < n2->schema.count; i++)
-        /* discard join columns - they are already added */
-        if (n2->schema.items[i].name != att1 &&
-            n2->schema.items[i].name != att2) {
-            /* only include new columns */
-            unsigned int j;
-            for (j = 0; j < count; j++)
-                if (ret->schema.items[j].name
-                    == n2->schema.items[i].name)
-                    break;
+    ret->schema.items[0].name = proj_at(lproj,0).new;
+    ret->schema.items[0].type = res_ty;
+    count = 1;
 
-            if (j == count)
-                ret->schema.items[count++] = n2->schema.items[i];
+    /* copy schema from projection list 'lproj' */
+    /* discard join columns - they are already added */
+    for (i = 1; i < PFarray_last (lproj); i++) {
+        PFalg_proj_t proj_item = proj_at(lproj, i);
+#ifndef NDEBUG
+        /* check that we do not add duplicate names */
+        for (j = 0; j < count; j++)
+            if (ret->schema.items[j].name == proj_item.new)
+                PFoops (OOPS_FATAL,
+                        "duplicate attribute `%s' in join operator",
+                        PFatt_str (proj_item.new));
+#endif
+        for (j = 0; j < n1->schema.count; j++)
+            if (proj_item.old == n1->schema.items[j].name) {
+                ret->schema.items[count].name = proj_item.new;
+                ret->schema.items[count].type = n1->schema.items[j].type;
+                count++;
+                break;
+            }
+        if (j == n1->schema.count) {
+            /* projection column is missing --
+               remove it from the projection list */
+            proj_at(lproj, i) = *(PFalg_proj_t *) PFarray_top (lproj);
+            PFarray_last (lproj)--;
+            i--;
+        }
+    }
+    
+    /* copy schema from projection list 'rproj' */
+    /* discard join columns - they are already added */
+    for (i = 1; i < PFarray_last (rproj); i++) {
+        PFalg_proj_t proj_item = proj_at(rproj, i);
+#ifndef NDEBUG
+        /* check that we do not add duplicate names */
+        for (j = 0; j < count; j++)
+            if (ret->schema.items[j].name == proj_item.new)
+                PFoops (OOPS_FATAL,
+                        "duplicate attribute `%s' in join operator",
+                        PFatt_str (proj_item.new));
+#endif
+        for (j = 0; j < n2->schema.count; j++)
+            if (proj_item.old == n2->schema.items[j].name) {
+                ret->schema.items[count].name = proj_item.new;
+                ret->schema.items[count].type = n2->schema.items[j].type;
+                count++;
+                break;
+            }
+        if (j == n2->schema.count) {
+            /* projection column is missing --
+               remove it from the projection list */
+            proj_at(rproj, i) = *(PFalg_proj_t *) PFarray_top (rproj);
+            PFarray_last (rproj)--;
+            i--;
         }
+    }
 
     /* adjust schema size */
     ret->schema.count = count;


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to