Update of /cvsroot/monetdb/pathfinder/compiler/debug
In directory sc8-pr-cvs16:/tmp/cvs-serv26652/debug

Modified Files:
        logdebug.c physdebug.c 
Log Message:
Thetajoin operator:

    Introduced a thetajoin operator in the logical algebra (and the physical
    algebra and the MIL generation). This new thetajoin can handle a list of
    conjunctive predicates where each predicate represents an (in-)equality
    condition.

Thetajoin introduction:

    A thetajoin operator is introduced either by a small pattern
    (select-comparison-cross) in opt_general.brg or by a new introduction
    phase in intro_thetajoin.c. The latter introduction phase uses a
    selection as a basis to find the *correct* equi-join and transforms
    this equi-join into a thetajoin (for more details please look into
    the code).

Thetajoin optimization:

    Similar to the MVD optimization phase that pushes cross product
    operators up in the DAG structure this checkin provides a new
    optimization phase that pushes thetajoin operators up in the DAG
    (opt_thetajoin.c). These rewrites result in thetajoin operators
    that contain multiple predicates. (E.g. the transformed equi-joins
    eventually hit the selection that triggered the rewrite and thus form
    a new scope-dependent value-based join -- see XMark Q3 and Q4.)

Additional changes:

    opt_general.brg
     * Extended thetajoin pattern to cope with inequality comparisons
     * Rewrite the pattern: semijoin (distinct (Rel), Rel) into
       distinct (semijoin (Rel, Rel) as the semijoin hopefully reduces
       the cardinality more than the distinct operator. (This speeds
       up XMark Q12 by a factor of 2.5.)

    intro_proxy.c
     * Added a preceding phase that removes all semijoin operators.
       (Using only simply checks more proxies are now detected.)
     * Removed unnest optimization phase (thetajoin optimization
       does the same in a more general scenario).

    physical.c
     * Toyed around with the cost model
     * Fixed 'PFord_order_dir_at' bug

    planner.c
     * Generated more ordered alternatives (even if an ordering is
       not required) after an operator is mapped to physical algebra.
       In some situations this avoids in a later step planning the same
       sort operator multiple times. (We only introduce orderings for
       iter-like columns and sort at most two columns. Otherwise the
       plan space explodes for some sample queries.)
     * Extended list of possible orderings for the rownum operator



Index: logdebug.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/debug/logdebug.c,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -d -r1.60 -r1.61
--- logdebug.c  29 Mar 2007 09:48:15 -0000      1.60
+++ logdebug.c  7 May 2007 10:17:25 -0000       1.61
@@ -56,6 +56,7 @@
     , [la_eqjoin]           = "Join"              /* \"#00FF00\" */
     , [la_eqjoin_unq]       = "Join"              /* \"#00FF00\" */
     , [la_semijoin]         = "SemiJoin"          /* \"#00FF00\" */
+    , [la_thetajoin]        = "ThetaJoin"         /* \"#00FF00\" */
     , [la_project]          = "Project"
     , [la_select]           = "Select"
     , [la_disjunion]        = "UNION"
@@ -121,6 +122,7 @@
     , [la_eqjoin]           = "eqjoin"
     , [la_eqjoin_unq]       = "eqjoin_unq"
     , [la_semijoin]         = "semijoin"
+    , [la_thetajoin]        = "thetajoin"
     , [la_project]          = "project"
     , [la_select]           = "select"
     , [la_disjunion]        = "union"
@@ -267,6 +269,20 @@
     return (char *) s->base;
 }
 
+static char *
+comp_str (PFalg_comp_t comp) {
+    switch (comp) {
+        case alg_comp_eq: return "eq";
+        case alg_comp_gt: return "gt";
+        case alg_comp_ge: return "ge";
+        case alg_comp_lt: return "lt";
+        case alg_comp_le: return "le";
+        case alg_comp_ne: return "ne";
+    }
+    assert (0);
+    return NULL;
+}
+
 /**
  * Print algebra tree in AT&T dot notation.
  * @param dot Array into which we print
@@ -287,6 +303,7 @@
         , [la_eqjoin]         = "\"#00FF00\""
         , [la_eqjoin_unq]     = "\"#00CC00\""
         , [la_semijoin]       = "\"#009900\""
+        , [la_thetajoin]      = "\"#00AA00\""
         , [la_project]        = "\"#EEEEEE\""
         , [la_select]         = "\"#00DDDD\""
         , [la_disjunion]      = "\"#909090\""
@@ -405,6 +422,19 @@
                             PFatt_str (n->sem.eqjoin.att2));
             break;
 
+        case la_thetajoin:
+            /* overwrite standard node layout */
+            PFarray_printf (dot, "\", shape=polygon peripheries=2, label=\"");
+            
+            PFarray_printf (dot, "%s", a_id[n->kind]);
+
+            for (c = 0; c < n->sem.thetajoin.count; c++)
+                PFarray_printf (dot, "\\n(%s %s %s)",
+                                PFatt_str (n->sem.thetajoin.pred[c].left),
+                                comp_str (n->sem.thetajoin.pred[c].comp),
+                                PFatt_str (n->sem.thetajoin.pred[c].right));
+            break;
+            
         case la_eqjoin_unq:
             PFarray_printf (dot, "%s (%s:%s = %s)",
                             a_id[n->kind], 
@@ -1200,6 +1230,22 @@
                             PFatt_str (n->sem.eqjoin.att2));
             break;
 
+        case la_thetajoin:
+            PFarray_printf (xml, "    <content>\n");
+            for (c = 0; c < n->sem.thetajoin.count; c++)
+                PFarray_printf (xml,
+                                "      <comparison kind=\"%s\">\n"
+                                "        <column name=\"%s\" new=\"false\""
+                                               " position=\"1\"/>\n"
+                                "        <column name=\"%s\" new=\"false\""
+                                               " position=\"2\"/>\n"
+                                "      </comparison>\n",
+                                comp_str (n->sem.thetajoin.pred[c].comp),
+                                PFatt_str (n->sem.thetajoin.pred[c].left),
+                                PFatt_str (n->sem.thetajoin.pred[c].right));
+            PFarray_printf (xml, "    </content>\n");
+            break;
+
         case la_eqjoin_unq:
             PFarray_printf (xml, 
                             "    <content>\n"

Index: physdebug.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/debug/physdebug.c,v
retrieving revision 1.39
retrieving revision 1.40
diff -u -d -r1.39 -r1.40
--- physdebug.c 22 Mar 2007 09:39:05 -0000      1.39
+++ physdebug.c 7 May 2007 10:17:27 -0000       1.40
@@ -60,6 +60,9 @@
     , [pa_leftjoin]        = "LEFTJOIN"         /* \"#00FF00\" */
     , [pa_eqjoin]          = "EQJOIN"           /* \"#00FF00\" */
     , [pa_semijoin]        = "SEMIJOIN"         /* \"#00FF00\" */
+    , [pa_thetajoin]       = "thetajoin"
+    , [pa_unq2_thetajoin]  = "unique_thetajoin"
+    , [pa_unq1_thetajoin]  = "dep_unique_thetajoin"
     , [pa_project]         = "ΒΆ "
     , [pa_select]          = "SEL"
     , [pa_append_union]    = "APPEND_UNION"
@@ -136,6 +139,9 @@
     , [pa_leftjoin]        = "leftjoin"
     , [pa_eqjoin]          = "eqjoin"
     , [pa_semijoin]        = "semijoin"
+    , [pa_thetajoin]       = "thetajoin"
+    , [pa_unq2_thetajoin]  = "unique_thetajoin"
+    , [pa_unq1_thetajoin]  = "dep_unique_thetajoin"
     , [pa_project]         = "project"
     , [pa_select]          = "select"
     , [pa_append_union]    = "append_union"
@@ -291,6 +297,20 @@
     return (char *) s->base;
 }
 
+static char *
+comp_str (PFalg_comp_t comp) {
+    switch (comp) {
+        case alg_comp_eq: return "eq";
+        case alg_comp_gt: return "gt";
+        case alg_comp_ge: return "ge";
+        case alg_comp_lt: return "lt";
+        case alg_comp_le: return "le";
+        case alg_comp_ne: return "ne";
+    }
+    assert (0);
+    return NULL;
+}
+
 /**
  * Print algebra tree in AT&T dot notation.
  * @param dot Array into which we print
@@ -312,6 +332,9 @@
         , [pa_leftjoin]        = "\"#00FF00\""
         , [pa_eqjoin]          = "\"#00FF00\""
         , [pa_semijoin]        = "\"#00FF00\""
+        , [pa_thetajoin]       = "\"#00FF00\""
+        , [pa_unq2_thetajoin]  = "\"#00FF00\""
+        , [pa_unq1_thetajoin]  = "\"#00FF00\""
         , [pa_project]         = "\"#EEEEEE\""
         , [pa_select]          = "\"#00DDDD\""
         , [pa_append_union]    = "\"#909090\""
@@ -438,6 +461,38 @@
                             PFatt_str (n->sem.eqjoin.att2));
             break;
             
+        case pa_thetajoin:
+            PFarray_printf (dot, "%s", a_id[n->kind]);
+
+            for (c = 0; c < n->sem.thetajoin.count; c++)
+                PFarray_printf (dot, "\\n(%s %s %s)",
+                                PFatt_str (n->sem.thetajoin.pred[c].left),
+                                comp_str (n->sem.thetajoin.pred[c].comp),
+                                PFatt_str (n->sem.thetajoin.pred[c].right));
+            break;
+            
+        case pa_unq2_thetajoin:
+            PFarray_printf (dot, "%s:\\n(%s %s %s)\\ndist (%s, %s)",
+                            a_id[n->kind],
+                            PFatt_str (n->sem.unq_thetajoin.left),
+                            comp_str (n->sem.unq_thetajoin.comp),
+                            PFatt_str (n->sem.unq_thetajoin.right),
+                            PFatt_str (n->sem.unq_thetajoin.ldist),
+                            PFatt_str (n->sem.unq_thetajoin.ldist));
+            break;
+
+        case pa_unq1_thetajoin:
+            PFarray_printf (dot, "%s:\\n(%s = %s)\\n"
+                            "(%s %s %s)\\ndist (%s)",
+                            a_id[n->kind],
+                            PFatt_str (n->sem.unq_thetajoin.ldist),
+                            PFatt_str (n->sem.unq_thetajoin.rdist),
+                            PFatt_str (n->sem.unq_thetajoin.left),
+                            comp_str (n->sem.unq_thetajoin.comp),
+                            PFatt_str (n->sem.unq_thetajoin.right),
+                            PFatt_str (n->sem.unq_thetajoin.ldist));
+            break;
+            
         case pa_project:
             if (n->sem.proj.items[0].new != n->sem.proj.items[0].old)
                 PFarray_printf (dot, "%s (%s:%s", a_id[n->kind],
@@ -475,11 +530,17 @@
             break;
 
         case pa_std_sort:
-        case pa_refine_sort:
             PFarray_printf (dot, "%s: (%s)", a_id[n->kind],
                             PFord_str (n->sem.sortby.required));
             break;
 
+        case pa_refine_sort:
+            PFarray_printf (dot, "%s: (%s)\\nprovided (%s)",
+                            a_id[n->kind],
+                            PFord_str (n->sem.sortby.required),
+                            PFord_str (n->sem.sortby.existing));
+            break;
+
         case pa_fun_1to1:
             PFarray_printf (dot, "%s [%s] (%s:<", a_id[n->kind],
                             PFalg_fun_str (n->sem.fun_1to1.kind),
@@ -908,6 +969,72 @@
                             PFatt_str (n->sem.eqjoin.att2));
             break;
 
+        case pa_thetajoin:
+            PFarray_printf (xml, "    <content>\n");
+            for (c = 0; c < n->sem.thetajoin.count; c++)
+                PFarray_printf (xml,
+                                "      <comparison kind=\"%s\">\n"
+                                "        <column name=\"%s\" new=\"false\""
+                                               " position=\"1\"/>\n"
+                                "        <column name=\"%s\" new=\"false\""
+                                               " position=\"2\"/>\n"
+                                "      </comparison>\n",
+                                comp_str (n->sem.thetajoin.pred[c].comp),
+                                PFatt_str (n->sem.thetajoin.pred[c].left),
+                                PFatt_str (n->sem.thetajoin.pred[c].right));
+            PFarray_printf (xml, "    </content>\n");
+            break;
+
+        case pa_unq2_thetajoin:
+            PFarray_printf (xml,
+                            "    <content>\n"
+                            "      <comparison kind=\"%s\">\n"
+                            "        <column name=\"%s\" new=\"false\""
+                                           " position=\"1\"/>\n"
+                            "        <column name=\"%s\" new=\"false\""
+                                           " position=\"2\"/>\n"
+                            "      </comparison>\n"
+                            "      <distinct>\n"
+                            "        <column name=\"%s\" new=\"false\""
+                                           " position=\"1\"/>\n"
+                            "        <column name=\"%s\" new=\"false\""
+                                           " position=\"2\"/>\n"
+                            "      </distinct>\n"
+                            "    </content>\n",
+                            comp_str (n->sem.unq_thetajoin.comp),
+                            PFatt_str (n->sem.unq_thetajoin.left),
+                            PFatt_str (n->sem.unq_thetajoin.right),
+                            PFatt_str (n->sem.unq_thetajoin.ldist),
+                            PFatt_str (n->sem.unq_thetajoin.rdist));
+            break;
+
+        case pa_unq1_thetajoin:
+            PFarray_printf (xml,
+                            "    <content>\n"
+                            "      <comparison kind=\"=\">\n"
+                            "        <column name=\"%s\" new=\"false\""
+                                           " position=\"1\"/>\n"
+                            "        <column name=\"%s\" new=\"false\""
+                                           " position=\"2\"/>\n"
+                            "      </comparison>\n"
+                            "      <comparison kind=\"%s\">\n"
+                            "        <column name=\"%s\" new=\"false\""
+                                           " position=\"1\"/>\n"
+                            "        <column name=\"%s\" new=\"false\""
+                                           " position=\"2\"/>\n"
+                            "      </comparison>\n"
+                            "      <distinct>\n"
+                            "        <column name=\"%s\" new=\"false\"/>\n"
+                            "      </distinct>\n"
+                            "    </content>\n",
+                            PFatt_str (n->sem.unq_thetajoin.ldist),
+                            PFatt_str (n->sem.unq_thetajoin.rdist),
+                            comp_str (n->sem.unq_thetajoin.comp),
+                            PFatt_str (n->sem.unq_thetajoin.left),
+                            PFatt_str (n->sem.unq_thetajoin.right),
+                            PFatt_str (n->sem.unq_thetajoin.ldist));
+            break;
+
         case pa_project:
             PFarray_printf (xml, "    <content>\n");
             for (c = 0; c < n->sem.proj.count; c++)
@@ -950,7 +1077,7 @@
             for (c = 0; c < PFord_count (n->sem.sortby.existing); c++)
                 PFarray_printf (xml, 
                                 "      <column name=\"%s\" direction=\"%s\""
-                                        "function=\"existing sort\""
+                                        " function=\"existing sort\""
                                         " position=\"%u\" new=\"false\">\n"
                                 "        <annotation>%u. existing sort "
                                         "criterion</annotation>\n"
@@ -969,7 +1096,7 @@
             for (c = 0; c < PFord_count (n->sem.sortby.required); c++)
                 PFarray_printf (xml, 
                                 "      <column name=\"%s\" direction=\"%s\""
-                                        "function=\"new sort\""
+                                        " function=\"new sort\""
                                         " position=\"%u\" new=\"false\">\n"
                                 "        <annotation>%u. sort argument"
                                         "</annotation>\n"


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins

Reply via email to