Update of /cvsroot/monetdb/pathfinder/compiler/debug
In directory sc8-pr-cvs16:/tmp/cvs-serv26652/debug
Modified Files:
logdebug.c physdebug.c
Log Message:
Thetajoin operator:
Introduced a thetajoin operator in the logical algebra (and the physical
algebra and the MIL generation). This new thetajoin can handle a list of
conjunctive predicates where each predicate represents an (in-)equality
condition.
Thetajoin introduction:
A thetajoin operator is introduced either by a small pattern
(select-comparison-cross) in opt_general.brg or by a new introduction
phase in intro_thetajoin.c. The latter introduction phase uses a
selection as a basis to find the *correct* equi-join and transforms
this equi-join into a thetajoin (for more details please look into
the code).
Thetajoin optimization:
Similar to the MVD optimization phase that pushes cross product
operators up in the DAG structure this checkin provides a new
optimization phase that pushes thetajoin operators up in the DAG
(opt_thetajoin.c). These rewrites result in thetajoin operators
that contain multiple predicates. (E.g. the transformed equi-joins
eventually hit the selection that triggered the rewrite and thus form
a new scope-dependent value-based join -- see XMark Q3 and Q4.)
Additional changes:
opt_general.brg
* Extended thetajoin pattern to cope with inequality comparisons
* Rewrite the pattern: semijoin (distinct (Rel), Rel) into
distinct (semijoin (Rel, Rel) as the semijoin hopefully reduces
the cardinality more than the distinct operator. (This speeds
up XMark Q12 by a factor of 2.5.)
intro_proxy.c
* Added a preceding phase that removes all semijoin operators.
(Using only simply checks more proxies are now detected.)
* Removed unnest optimization phase (thetajoin optimization
does the same in a more general scenario).
physical.c
* Toyed around with the cost model
* Fixed 'PFord_order_dir_at' bug
planner.c
* Generated more ordered alternatives (even if an ordering is
not required) after an operator is mapped to physical algebra.
In some situations this avoids in a later step planning the same
sort operator multiple times. (We only introduce orderings for
iter-like columns and sort at most two columns. Otherwise the
plan space explodes for some sample queries.)
* Extended list of possible orderings for the rownum operator
Index: logdebug.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/debug/logdebug.c,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -d -r1.60 -r1.61
--- logdebug.c 29 Mar 2007 09:48:15 -0000 1.60
+++ logdebug.c 7 May 2007 10:17:25 -0000 1.61
@@ -56,6 +56,7 @@
, [la_eqjoin] = "Join" /* \"#00FF00\" */
, [la_eqjoin_unq] = "Join" /* \"#00FF00\" */
, [la_semijoin] = "SemiJoin" /* \"#00FF00\" */
+ , [la_thetajoin] = "ThetaJoin" /* \"#00FF00\" */
, [la_project] = "Project"
, [la_select] = "Select"
, [la_disjunion] = "UNION"
@@ -121,6 +122,7 @@
, [la_eqjoin] = "eqjoin"
, [la_eqjoin_unq] = "eqjoin_unq"
, [la_semijoin] = "semijoin"
+ , [la_thetajoin] = "thetajoin"
, [la_project] = "project"
, [la_select] = "select"
, [la_disjunion] = "union"
@@ -267,6 +269,20 @@
return (char *) s->base;
}
+static char *
+comp_str (PFalg_comp_t comp) {
+ switch (comp) {
+ case alg_comp_eq: return "eq";
+ case alg_comp_gt: return "gt";
+ case alg_comp_ge: return "ge";
+ case alg_comp_lt: return "lt";
+ case alg_comp_le: return "le";
+ case alg_comp_ne: return "ne";
+ }
+ assert (0);
+ return NULL;
+}
+
/**
* Print algebra tree in AT&T dot notation.
* @param dot Array into which we print
@@ -287,6 +303,7 @@
, [la_eqjoin] = "\"#00FF00\""
, [la_eqjoin_unq] = "\"#00CC00\""
, [la_semijoin] = "\"#009900\""
+ , [la_thetajoin] = "\"#00AA00\""
, [la_project] = "\"#EEEEEE\""
, [la_select] = "\"#00DDDD\""
, [la_disjunion] = "\"#909090\""
@@ -405,6 +422,19 @@
PFatt_str (n->sem.eqjoin.att2));
break;
+ case la_thetajoin:
+ /* overwrite standard node layout */
+ PFarray_printf (dot, "\", shape=polygon peripheries=2, label=\"");
+
+ PFarray_printf (dot, "%s", a_id[n->kind]);
+
+ for (c = 0; c < n->sem.thetajoin.count; c++)
+ PFarray_printf (dot, "\\n(%s %s %s)",
+ PFatt_str (n->sem.thetajoin.pred[c].left),
+ comp_str (n->sem.thetajoin.pred[c].comp),
+ PFatt_str (n->sem.thetajoin.pred[c].right));
+ break;
+
case la_eqjoin_unq:
PFarray_printf (dot, "%s (%s:%s = %s)",
a_id[n->kind],
@@ -1200,6 +1230,22 @@
PFatt_str (n->sem.eqjoin.att2));
break;
+ case la_thetajoin:
+ PFarray_printf (xml, " <content>\n");
+ for (c = 0; c < n->sem.thetajoin.count; c++)
+ PFarray_printf (xml,
+ " <comparison kind=\"%s\">\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"1\"/>\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"2\"/>\n"
+ " </comparison>\n",
+ comp_str (n->sem.thetajoin.pred[c].comp),
+ PFatt_str (n->sem.thetajoin.pred[c].left),
+ PFatt_str (n->sem.thetajoin.pred[c].right));
+ PFarray_printf (xml, " </content>\n");
+ break;
+
case la_eqjoin_unq:
PFarray_printf (xml,
" <content>\n"
Index: physdebug.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/debug/physdebug.c,v
retrieving revision 1.39
retrieving revision 1.40
diff -u -d -r1.39 -r1.40
--- physdebug.c 22 Mar 2007 09:39:05 -0000 1.39
+++ physdebug.c 7 May 2007 10:17:27 -0000 1.40
@@ -60,6 +60,9 @@
, [pa_leftjoin] = "LEFTJOIN" /* \"#00FF00\" */
, [pa_eqjoin] = "EQJOIN" /* \"#00FF00\" */
, [pa_semijoin] = "SEMIJOIN" /* \"#00FF00\" */
+ , [pa_thetajoin] = "thetajoin"
+ , [pa_unq2_thetajoin] = "unique_thetajoin"
+ , [pa_unq1_thetajoin] = "dep_unique_thetajoin"
, [pa_project] = "ΒΆ "
, [pa_select] = "SEL"
, [pa_append_union] = "APPEND_UNION"
@@ -136,6 +139,9 @@
, [pa_leftjoin] = "leftjoin"
, [pa_eqjoin] = "eqjoin"
, [pa_semijoin] = "semijoin"
+ , [pa_thetajoin] = "thetajoin"
+ , [pa_unq2_thetajoin] = "unique_thetajoin"
+ , [pa_unq1_thetajoin] = "dep_unique_thetajoin"
, [pa_project] = "project"
, [pa_select] = "select"
, [pa_append_union] = "append_union"
@@ -291,6 +297,20 @@
return (char *) s->base;
}
+static char *
+comp_str (PFalg_comp_t comp) {
+ switch (comp) {
+ case alg_comp_eq: return "eq";
+ case alg_comp_gt: return "gt";
+ case alg_comp_ge: return "ge";
+ case alg_comp_lt: return "lt";
+ case alg_comp_le: return "le";
+ case alg_comp_ne: return "ne";
+ }
+ assert (0);
+ return NULL;
+}
+
/**
* Print algebra tree in AT&T dot notation.
* @param dot Array into which we print
@@ -312,6 +332,9 @@
, [pa_leftjoin] = "\"#00FF00\""
, [pa_eqjoin] = "\"#00FF00\""
, [pa_semijoin] = "\"#00FF00\""
+ , [pa_thetajoin] = "\"#00FF00\""
+ , [pa_unq2_thetajoin] = "\"#00FF00\""
+ , [pa_unq1_thetajoin] = "\"#00FF00\""
, [pa_project] = "\"#EEEEEE\""
, [pa_select] = "\"#00DDDD\""
, [pa_append_union] = "\"#909090\""
@@ -438,6 +461,38 @@
PFatt_str (n->sem.eqjoin.att2));
break;
+ case pa_thetajoin:
+ PFarray_printf (dot, "%s", a_id[n->kind]);
+
+ for (c = 0; c < n->sem.thetajoin.count; c++)
+ PFarray_printf (dot, "\\n(%s %s %s)",
+ PFatt_str (n->sem.thetajoin.pred[c].left),
+ comp_str (n->sem.thetajoin.pred[c].comp),
+ PFatt_str (n->sem.thetajoin.pred[c].right));
+ break;
+
+ case pa_unq2_thetajoin:
+ PFarray_printf (dot, "%s:\\n(%s %s %s)\\ndist (%s, %s)",
+ a_id[n->kind],
+ PFatt_str (n->sem.unq_thetajoin.left),
+ comp_str (n->sem.unq_thetajoin.comp),
+ PFatt_str (n->sem.unq_thetajoin.right),
+ PFatt_str (n->sem.unq_thetajoin.ldist),
+ PFatt_str (n->sem.unq_thetajoin.ldist));
+ break;
+
+ case pa_unq1_thetajoin:
+ PFarray_printf (dot, "%s:\\n(%s = %s)\\n"
+ "(%s %s %s)\\ndist (%s)",
+ a_id[n->kind],
+ PFatt_str (n->sem.unq_thetajoin.ldist),
+ PFatt_str (n->sem.unq_thetajoin.rdist),
+ PFatt_str (n->sem.unq_thetajoin.left),
+ comp_str (n->sem.unq_thetajoin.comp),
+ PFatt_str (n->sem.unq_thetajoin.right),
+ PFatt_str (n->sem.unq_thetajoin.ldist));
+ break;
+
case pa_project:
if (n->sem.proj.items[0].new != n->sem.proj.items[0].old)
PFarray_printf (dot, "%s (%s:%s", a_id[n->kind],
@@ -475,11 +530,17 @@
break;
case pa_std_sort:
- case pa_refine_sort:
PFarray_printf (dot, "%s: (%s)", a_id[n->kind],
PFord_str (n->sem.sortby.required));
break;
+ case pa_refine_sort:
+ PFarray_printf (dot, "%s: (%s)\\nprovided (%s)",
+ a_id[n->kind],
+ PFord_str (n->sem.sortby.required),
+ PFord_str (n->sem.sortby.existing));
+ break;
+
case pa_fun_1to1:
PFarray_printf (dot, "%s [%s] (%s:<", a_id[n->kind],
PFalg_fun_str (n->sem.fun_1to1.kind),
@@ -908,6 +969,72 @@
PFatt_str (n->sem.eqjoin.att2));
break;
+ case pa_thetajoin:
+ PFarray_printf (xml, " <content>\n");
+ for (c = 0; c < n->sem.thetajoin.count; c++)
+ PFarray_printf (xml,
+ " <comparison kind=\"%s\">\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"1\"/>\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"2\"/>\n"
+ " </comparison>\n",
+ comp_str (n->sem.thetajoin.pred[c].comp),
+ PFatt_str (n->sem.thetajoin.pred[c].left),
+ PFatt_str (n->sem.thetajoin.pred[c].right));
+ PFarray_printf (xml, " </content>\n");
+ break;
+
+ case pa_unq2_thetajoin:
+ PFarray_printf (xml,
+ " <content>\n"
+ " <comparison kind=\"%s\">\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"1\"/>\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"2\"/>\n"
+ " </comparison>\n"
+ " <distinct>\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"1\"/>\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"2\"/>\n"
+ " </distinct>\n"
+ " </content>\n",
+ comp_str (n->sem.unq_thetajoin.comp),
+ PFatt_str (n->sem.unq_thetajoin.left),
+ PFatt_str (n->sem.unq_thetajoin.right),
+ PFatt_str (n->sem.unq_thetajoin.ldist),
+ PFatt_str (n->sem.unq_thetajoin.rdist));
+ break;
+
+ case pa_unq1_thetajoin:
+ PFarray_printf (xml,
+ " <content>\n"
+ " <comparison kind=\"=\">\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"1\"/>\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"2\"/>\n"
+ " </comparison>\n"
+ " <comparison kind=\"%s\">\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"1\"/>\n"
+ " <column name=\"%s\" new=\"false\""
+ " position=\"2\"/>\n"
+ " </comparison>\n"
+ " <distinct>\n"
+ " <column name=\"%s\" new=\"false\"/>\n"
+ " </distinct>\n"
+ " </content>\n",
+ PFatt_str (n->sem.unq_thetajoin.ldist),
+ PFatt_str (n->sem.unq_thetajoin.rdist),
+ comp_str (n->sem.unq_thetajoin.comp),
+ PFatt_str (n->sem.unq_thetajoin.left),
+ PFatt_str (n->sem.unq_thetajoin.right),
+ PFatt_str (n->sem.unq_thetajoin.ldist));
+ break;
+
case pa_project:
PFarray_printf (xml, " <content>\n");
for (c = 0; c < n->sem.proj.count; c++)
@@ -950,7 +1077,7 @@
for (c = 0; c < PFord_count (n->sem.sortby.existing); c++)
PFarray_printf (xml,
" <column name=\"%s\" direction=\"%s\""
- "function=\"existing sort\""
+ " function=\"existing sort\""
" position=\"%u\" new=\"false\">\n"
" <annotation>%u. existing sort "
"criterion</annotation>\n"
@@ -969,7 +1096,7 @@
for (c = 0; c < PFord_count (n->sem.sortby.required); c++)
PFarray_printf (xml,
" <column name=\"%s\" direction=\"%s\""
- "function=\"new sort\""
+ " function=\"new sort\""
" position=\"%u\" new=\"false\">\n"
" <annotation>%u. sort argument"
"</annotation>\n"
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins