Update of /cvsroot/monetdb/pathfinder/compiler/algebra/opt
In directory sc8-pr-cvs16:/tmp/cvs-serv26652/algebra/opt

Modified Files:
        Makefile.ag opt_general.brg opt_join_pd.c opt_mvd.c 
        opt_reqval.c 
Added Files:
        opt_thetajoin.c 
Log Message:
Thetajoin operator:

    Introduced a thetajoin operator in the logical algebra (and the physical
    algebra and the MIL generation). This new thetajoin can handle a list of
    conjunctive predicates where each predicate represents an (in-)equality
    condition.

Thetajoin introduction:

    A thetajoin operator is introduced either by a small pattern
    (select-comparison-cross) in opt_general.brg or by a new introduction
    phase in intro_thetajoin.c. The latter introduction phase uses a
    selection as a basis to find the *correct* equi-join and transforms
    this equi-join into a thetajoin (for more details please look into
    the code).

Thetajoin optimization:

    Similar to the MVD optimization phase that pushes cross product
    operators up in the DAG structure this checkin provides a new
    optimization phase that pushes thetajoin operators up in the DAG
    (opt_thetajoin.c). These rewrites result in thetajoin operators
    that contain multiple predicates. (E.g. the transformed equi-joins
    eventually hit the selection that triggered the rewrite and thus form
    a new scope-dependent value-based join -- see XMark Q3 and Q4.)

Additional changes:

    opt_general.brg
     * Extended thetajoin pattern to cope with inequality comparisons
     * Rewrite the pattern: semijoin (distinct (Rel), Rel) into
       distinct (semijoin (Rel, Rel) as the semijoin hopefully reduces
       the cardinality more than the distinct operator. (This speeds
       up XMark Q12 by a factor of 2.5.)

    intro_proxy.c
     * Added a preceding phase that removes all semijoin operators.
       (Using only simply checks more proxies are now detected.)
     * Removed unnest optimization phase (thetajoin optimization
       does the same in a more general scenario).

    physical.c
     * Toyed around with the cost model
     * Fixed 'PFord_order_dir_at' bug

    planner.c
     * Generated more ordered alternatives (even if an ordering is
       not required) after an operator is mapped to physical algebra.
       In some situations this avoids in a later step planning the same
       sort operator multiple times. (We only introduce orderings for
       iter-like columns and sort at most two columns. Otherwise the
       plan space explodes for some sample queries.)
     * Extended list of possible orderings for the rownum operator



Index: opt_join_pd.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_join_pd.c,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -d -r1.24 -r1.25
--- opt_join_pd.c       2 Apr 2007 20:14:54 -0000       1.24
+++ opt_join_pd.c       7 May 2007 10:17:14 -0000       1.25
@@ -270,7 +270,7 @@
                            PFprop_dom (rp->prop, ratt),
                            PFprop_dom (lp->prop, latt)) &&
             ratt == p->sem.eqjoin_unq.res) {
-            *p = *rp;
+            *p = *dummy (rp);
             return true;
         }
         
@@ -292,6 +292,7 @@
             case la_eqjoin:
             case la_eqjoin_unq:
             case la_semijoin:
+            case la_thetajoin:
             case la_project:
             case la_select:
             case la_disjunion:
@@ -332,7 +333,6 @@
             case la_proxy_base:
             case la_cross_mvd:
             case la_string_join:
-            case la_dummy:
                 /* nothing to do -- can't throw away lp */
                 break;
 
@@ -421,6 +421,11 @@
                 if (L(lp)->kind == la_textnode &&
                     (modified = GEN(L(lp)->sem.textnode.res))) LP = LL(lp);
                 break;
+
+            case la_dummy:
+                LP = L(lp);
+                next_join = p;
+                break;
         }
         /* If a child of p was replaced ... */
         if (modified) {
@@ -917,6 +922,49 @@
                 *(PFla_op_t **) PFarray_add (clean_up_list) = R(p);
                 break;
                 
+            case la_thetajoin:
+                /* choose the correct operand of the theta-join
+                   (the one with the join argument) to push down
+                   the equi-join */
+                if (PFprop_ocol (L(lp), latt)) {
+                    PFalg_sel_t *pred = PFmalloc (lp->sem.thetajoin.count *
+                                                  sizeof (PFalg_sel_t));
+                    /* push join below the left side */
+                    for (unsigned int i = 0; i < lp->sem.thetajoin.count; i++) 
{
+                        pred[i] = lp->sem.thetajoin.pred[i];
+                        if (pred[i].left == latt)
+                            pred[i].left = p->sem.eqjoin_unq.res;
+                    }
+
+                    *p = *(thetajoin (eqjoin_unq (L(lp), rp, latt, ratt,
+                                                  p->sem.eqjoin_unq.res),
+                                      R(lp),
+                                      lp->sem.thetajoin.count,
+                                      pred));
+
+                    next_join = L(p);
+                    *(PFla_op_t **) PFarray_add (clean_up_list) = R(p);
+                } else {
+                    PFalg_sel_t *pred = PFmalloc (lp->sem.thetajoin.count *
+                                                  sizeof (PFalg_sel_t));
+                    /* push join below the right side */
+                    for (unsigned int i = 0; i < lp->sem.thetajoin.count; i++) 
{
+                        pred[i] = lp->sem.thetajoin.pred[i];
+                        if (pred[i].right == latt)
+                            pred[i].right = p->sem.eqjoin_unq.res;
+                    }
+
+                    *p = *(thetajoin (L(lp),
+                                      eqjoin_unq (R(lp), rp, latt, ratt,
+                                                  p->sem.eqjoin_unq.res),
+                                      lp->sem.thetajoin.count,
+                                      pred));
+
+                    next_join = R(p);
+                    *(PFla_op_t **) PFarray_add (clean_up_list) = L(p);
+                }
+                break;
+                
             case la_project:
                 /* Here we apply transformations in two different cases.
                    First we rewrite equi-joins followed by a project that
@@ -1550,7 +1598,6 @@
                 PFoops (OOPS_FATAL,
                         "cannot cope with proxy nodes");
                 break;
-
         }
         
         if (next_join)
@@ -1623,6 +1670,7 @@
         case la_disjunion:
         case la_eqjoin_unq:
         case la_semijoin:
+        case la_thetajoin:
         case la_roots:
         case la_trace:
         case la_cond_err:

Index: opt_reqval.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_reqval.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- opt_reqval.c        21 Feb 2007 14:25:02 -0000      1.8
+++ opt_reqval.c        7 May 2007 10:17:14 -0000       1.9
@@ -93,6 +93,12 @@
             
     }
 
+    /* if we look for exactly one required value and 
+       a child of the current operator is a union operator
+       that produces two different constant values in
+       the required value column in its children then
+       we can directly link to the operator that produces
+       the required value. */
     if (count == 1) {
         if (p->kind == la_project)
             for (unsigned int i = 0; i < p->sem.proj.count; i++)

Index: opt_general.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_general.brg,v
retrieving revision 1.25
retrieving revision 1.26
diff -u -d -r1.25 -r1.26
--- opt_general.brg     15 Mar 2007 14:12:55 -0000      1.25
+++ opt_general.brg     7 May 2007 10:17:13 -0000       1.26
@@ -82,12 +82,13 @@
 %term  cross          =   5
 %term  eqjoin         =   6
 %term  semijoin       =   7
-%term  project        =   8
-%term  select_        =   9
-%term  disjunion      =  10
-%term  intersect      =  11
-%term  difference     =  12
-%term  distinct       =  13
+%term  thetajoin      =   8
+%term  project        =   9
+%term  select_        =  10
+%term  disjunion      =  11
+%term  intersect      =  12
+%term  difference     =  13
+%term  distinct       =  14
 %term  fun_1to1       =  20
 %term  num_eq         =  25
 %term  num_gt         =  26
@@ -145,9 +146,11 @@
 Rel:    cross (lit_tbl, Rel)                      =   6 (10);
 Rel:    eqjoin (Rel, Rel)                         =   8 (10);
 Rel:    semijoin (Rel, Rel)                       =   9 (10);
-Rel:    project (Rel)                             =  10 (10);
-Rel:    project (attach (Rel))                    =  11 (10);
-Rel:    project (project (Rel))                   =  13  (1);
+Rel:    semijoin (distinct (Rel), Rel)            =  10 (10);
+Rel:    thetajoin (Rel, Rel)                      =  11 (10);
+Rel:    project (Rel)                             =  12 (10);
+Rel:    project (attach (Rel))                    =  13 (10);
+Rel:    project (project (Rel))                   =  14  (1);
 Rel:    select_ (Rel)                             =  19 (10);
 Rel:    disjunion (Rel, Rel)                      =  20 (10);
 Rel:    disjunion (EmptyRel, Rel)                 =  21  (5);
@@ -436,8 +439,26 @@
             } 
             break;
 
-        /* Rel:    project (Rel) */
+        /* Rel:    semijoin (distinct (Rel), Rel) */
         case 10:
+            /* push down semijoin operator 
+               underneath the distinct operator as 
+               the semijoin hopefully is more selective
+               than the distinct operator */
+            *p = *PFla_distinct (
+                      PFla_semijoin (
+                          LL(p), R(p),
+                          p->sem.eqjoin.att1,
+                          p->sem.eqjoin.att2));
+            SEEN(L(p)) = false;
+            SEEN(p) = false;
+            /* relabeling will be expensive 
+               if there is no real dummy node */
+            relabel (p, kids);
+            break;
+
+        /* Rel:    project (Rel) */
+        case 12:
         {   /**
              * we can skip every project operator that is not needed.
              * Therefore it has to fulfill two conditions:
@@ -465,7 +486,7 @@
         }   break;
 
         /* Rel:    project (attach (Rel)) */
-        case 11:
+        case 13:
             /* only rewrite if there are more than one columns */
             if (p->schema.count > 1) {
                 bool found = false, rewrite = true;
@@ -501,7 +522,7 @@
             break;
             
         /* Rel:    project (project (Rel)) */
-        case 13:
+        case 14:
         {   /* combine two projections */            
             PFalg_proj_t *proj = PFmalloc (p->schema.count *
                                               sizeof (PFalg_proj_t));
@@ -580,6 +601,7 @@
 
             PFalg_att_t sel = p->sem.select.att;
             PFalg_att_t item1, item2;
+            PFalg_comp_t comp;
             unsigned int i;
 
             node = L(p);
@@ -597,10 +619,14 @@
             
             /* the selection column has to match the
                result column of the comparison operator */
-            if (node->kind != la_num_eq ||
+            if ((node->kind != la_num_eq && node->kind != la_num_gt) ||
                 node->sem.binary.res != sel)
                 break;
 
+            if (node->kind == la_num_eq)
+                comp = alg_comp_eq;
+            else
+                comp = alg_comp_gt;
             item1 = node->sem.binary.att1;
             item2 = node->sem.binary.att2;
             node = L(node);
@@ -645,8 +671,10 @@
                can be replaced by an theta-join and an attach operator
                that adds the missing res column */
             if (!proj && !proj2) {
+                PFalg_sel_t *pred = PFmalloc (sizeof (PFalg_sel_t));
+                pred[0] = PFalg_sel (comp, item1, item2);
                 *p = *PFla_attach (
-                          PFla_eqjoin (left, right, item1, item2),
+                          PFla_thetajoin (left, right, 1, pred),
                           p->sem.select.att, PFalg_lit_bln (true));
                 SEEN(p) = false;
                 SEEN(L(p)) = false;
@@ -664,6 +692,8 @@
                    and attach operator */
                 PFalg_proj_t *proj_list;
                 unsigned int j, count = 0;
+                PFalg_sel_t *pred = PFmalloc (sizeof (PFalg_sel_t));
+                pred[0] = PFalg_sel (comp, item1, item2);
                 
                 /* create projection list */
                 proj_list = PFmalloc ((p->schema.count - 1)*
@@ -700,7 +730,7 @@
                                                    
                 *p = *PFla_attach (
                           PFla_project_ (
-                              PFla_eqjoin (left, right, item1, item2),
+                              PFla_thetajoin (left, right, 1, pred),
                               p->schema.count - 1, proj_list),
                           p->sem.select.att, PFalg_lit_bln (true));
                 SEEN(p) = false;

--- NEW FILE: opt_thetajoin.c ---
/**
 * @file
 *
 * Optimize relational algebra expression DAG based on multivalued
 * dependencies. We make use of the fact that a large number of operators
 * does not rely on both sides of a thetajoin operator. We push down 
 * as many operators underneath the thetajoin operator and thus replace
 * multivalued dependencies by an explicit join operator.
 *
 * We replace all thetajoin operators by an internal variant that
 * can cope with identical columns, can hide its input columns, and
 * is able to represent partial predicates (comparisons where a
 * selection has not been applied yet). With this enhanced thetajoin
 * operator we can push down operators into both operands whenever we
 * do not know which operand really requires the operators. Furthermore
 * we can collect the predicates step by step without looking at a bigger
 * pattern.
 * A cleaning phase then replaces the internal thetajoin operators by 
 * normal ones and adds additional operators for all partial predicates
[...2196 lines suppressed...]
    PFla_dag_reset (root);

    /* Traverse the DAG bottom up and look for op-thetajoin 
       operator pairs. As long as we find a rewrite we start
       a new traversal. */
    while (opt_mvd (root))
        PFla_dag_reset (root);
    PFla_dag_reset (root);

    /* replace the internal thetajoin representation by
       normal thetajoins and generate operators for all
       operators that cound not be integrated in the normal
       thetajoins. */
    remove_thetajoin_opt (root);
    PFla_dag_reset (root);

    return root;
}

/* vim:set shiftwidth=4 expandtab filetype=c: */

Index: Makefile.ag
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/Makefile.ag,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -d -r1.9 -r1.10
--- Makefile.ag 3 Jan 2007 12:32:35 -0000       1.9
+++ Makefile.ag 7 May 2007 10:17:12 -0000       1.10
@@ -42,5 +42,6 @@
                 opt_key.c \
                 opt_mvd.c \
                 opt_reqval.c \
-                opt_set.c
+                opt_set.c \
+                opt_thetajoin.c
 }

Index: opt_mvd.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_mvd.c,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -d -r1.19 -r1.20
--- opt_mvd.c   2 Apr 2007 19:38:43 -0000       1.19
+++ opt_mvd.c   7 May 2007 10:17:14 -0000       1.20
@@ -423,6 +423,63 @@
         }
         break;
         
+    case la_thetajoin:
+        /* Move the independent expression (the one without any
+           join attributes) up the DAG. */
+        if (is_cross (L(p))) {
+            bool all_left = true,
+                 all_right = true;
+            
+            for (unsigned int i = 0; i < p->sem.thetajoin.count; i++) {
+                all_left  = all_left &&
+                            att_present (LL(p), p->sem.thetajoin.pred[i].left);
+                all_right = all_right &&
+                            att_present (LR(p), p->sem.thetajoin.pred[i].left);
+            }
+
+            if (all_left)
+                *p = *(cross_can (LR(p), thetajoin (LL(p), R(p), 
+                                                    p->sem.thetajoin.count,
+                                                    p->sem.thetajoin.pred)));
+            else if (all_right)
+                *p = *(cross_can (LL(p), thetajoin (LR(p), R(p),
+                                                    p->sem.thetajoin.count,
+                                                    p->sem.thetajoin.pred)));
+            else
+                /* do not rewrite */
+                break;
+
+            modified = true;
+            break;
+        }
+        if (is_cross (R(p))) {
+            bool all_left = true,
+                 all_right = true;
+            
+            for (unsigned int i = 0; i < p->sem.thetajoin.count; i++) {
+                all_left  = all_left &&
+                            att_present (RL(p), 
p->sem.thetajoin.pred[i].right);
+                all_right = all_right &&
+                            att_present (RR(p), 
p->sem.thetajoin.pred[i].right);
+            }
+
+            if (all_left)
+                *p = *(cross_can (RR(p), thetajoin (L(p), RL(p), 
+                                                    p->sem.thetajoin.count,
+                                                    p->sem.thetajoin.pred)));
+            else if (all_right)
+                *p = *(cross_can (RL(p), thetajoin (L(p), RR(p),
+                                                    p->sem.thetajoin.count,
+                                                    p->sem.thetajoin.pred)));
+            else
+                /* do not rewrite */
+                break;
+
+            modified = true;
+            break;
+        }
+        break;
+        
     case la_project:
         /* Split project operator and push it beyond the cross product. */
         if (is_cross (L(p))) {


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to