Update of /cvsroot/monetdb/pathfinder/compiler/algebra/opt
In directory sc8-pr-cvs16:/tmp/cvs-serv26652/algebra/opt
Modified Files:
Makefile.ag opt_general.brg opt_join_pd.c opt_mvd.c
opt_reqval.c
Added Files:
opt_thetajoin.c
Log Message:
Thetajoin operator:
Introduced a thetajoin operator in the logical algebra (and the physical
algebra and the MIL generation). This new thetajoin can handle a list of
conjunctive predicates where each predicate represents an (in-)equality
condition.
Thetajoin introduction:
A thetajoin operator is introduced either by a small pattern
(select-comparison-cross) in opt_general.brg or by a new introduction
phase in intro_thetajoin.c. The latter introduction phase uses a
selection as a basis to find the *correct* equi-join and transforms
this equi-join into a thetajoin (for more details please look into
the code).
Thetajoin optimization:
Similar to the MVD optimization phase that pushes cross product
operators up in the DAG structure this checkin provides a new
optimization phase that pushes thetajoin operators up in the DAG
(opt_thetajoin.c). These rewrites result in thetajoin operators
that contain multiple predicates. (E.g. the transformed equi-joins
eventually hit the selection that triggered the rewrite and thus form
a new scope-dependent value-based join -- see XMark Q3 and Q4.)
Additional changes:
opt_general.brg
* Extended thetajoin pattern to cope with inequality comparisons
* Rewrite the pattern: semijoin (distinct (Rel), Rel) into
distinct (semijoin (Rel, Rel) as the semijoin hopefully reduces
the cardinality more than the distinct operator. (This speeds
up XMark Q12 by a factor of 2.5.)
intro_proxy.c
* Added a preceding phase that removes all semijoin operators.
(Using only simply checks more proxies are now detected.)
* Removed unnest optimization phase (thetajoin optimization
does the same in a more general scenario).
physical.c
* Toyed around with the cost model
* Fixed 'PFord_order_dir_at' bug
planner.c
* Generated more ordered alternatives (even if an ordering is
not required) after an operator is mapped to physical algebra.
In some situations this avoids in a later step planning the same
sort operator multiple times. (We only introduce orderings for
iter-like columns and sort at most two columns. Otherwise the
plan space explodes for some sample queries.)
* Extended list of possible orderings for the rownum operator
Index: opt_join_pd.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_join_pd.c,v
retrieving revision 1.24
retrieving revision 1.25
diff -u -d -r1.24 -r1.25
--- opt_join_pd.c 2 Apr 2007 20:14:54 -0000 1.24
+++ opt_join_pd.c 7 May 2007 10:17:14 -0000 1.25
@@ -270,7 +270,7 @@
PFprop_dom (rp->prop, ratt),
PFprop_dom (lp->prop, latt)) &&
ratt == p->sem.eqjoin_unq.res) {
- *p = *rp;
+ *p = *dummy (rp);
return true;
}
@@ -292,6 +292,7 @@
case la_eqjoin:
case la_eqjoin_unq:
case la_semijoin:
+ case la_thetajoin:
case la_project:
case la_select:
case la_disjunion:
@@ -332,7 +333,6 @@
case la_proxy_base:
case la_cross_mvd:
case la_string_join:
- case la_dummy:
/* nothing to do -- can't throw away lp */
break;
@@ -421,6 +421,11 @@
if (L(lp)->kind == la_textnode &&
(modified = GEN(L(lp)->sem.textnode.res))) LP = LL(lp);
break;
+
+ case la_dummy:
+ LP = L(lp);
+ next_join = p;
+ break;
}
/* If a child of p was replaced ... */
if (modified) {
@@ -917,6 +922,49 @@
*(PFla_op_t **) PFarray_add (clean_up_list) = R(p);
break;
+ case la_thetajoin:
+ /* choose the correct operand of the theta-join
+ (the one with the join argument) to push down
+ the equi-join */
+ if (PFprop_ocol (L(lp), latt)) {
+ PFalg_sel_t *pred = PFmalloc (lp->sem.thetajoin.count *
+ sizeof (PFalg_sel_t));
+ /* push join below the left side */
+ for (unsigned int i = 0; i < lp->sem.thetajoin.count; i++)
{
+ pred[i] = lp->sem.thetajoin.pred[i];
+ if (pred[i].left == latt)
+ pred[i].left = p->sem.eqjoin_unq.res;
+ }
+
+ *p = *(thetajoin (eqjoin_unq (L(lp), rp, latt, ratt,
+ p->sem.eqjoin_unq.res),
+ R(lp),
+ lp->sem.thetajoin.count,
+ pred));
+
+ next_join = L(p);
+ *(PFla_op_t **) PFarray_add (clean_up_list) = R(p);
+ } else {
+ PFalg_sel_t *pred = PFmalloc (lp->sem.thetajoin.count *
+ sizeof (PFalg_sel_t));
+ /* push join below the right side */
+ for (unsigned int i = 0; i < lp->sem.thetajoin.count; i++)
{
+ pred[i] = lp->sem.thetajoin.pred[i];
+ if (pred[i].right == latt)
+ pred[i].right = p->sem.eqjoin_unq.res;
+ }
+
+ *p = *(thetajoin (L(lp),
+ eqjoin_unq (R(lp), rp, latt, ratt,
+ p->sem.eqjoin_unq.res),
+ lp->sem.thetajoin.count,
+ pred));
+
+ next_join = R(p);
+ *(PFla_op_t **) PFarray_add (clean_up_list) = L(p);
+ }
+ break;
+
case la_project:
/* Here we apply transformations in two different cases.
First we rewrite equi-joins followed by a project that
@@ -1550,7 +1598,6 @@
PFoops (OOPS_FATAL,
"cannot cope with proxy nodes");
break;
-
}
if (next_join)
@@ -1623,6 +1670,7 @@
case la_disjunion:
case la_eqjoin_unq:
case la_semijoin:
+ case la_thetajoin:
case la_roots:
case la_trace:
case la_cond_err:
Index: opt_reqval.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_reqval.c,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -d -r1.8 -r1.9
--- opt_reqval.c 21 Feb 2007 14:25:02 -0000 1.8
+++ opt_reqval.c 7 May 2007 10:17:14 -0000 1.9
@@ -93,6 +93,12 @@
}
+ /* if we look for exactly one required value and
+ a child of the current operator is a union operator
+ that produces two different constant values in
+ the required value column in its children then
+ we can directly link to the operator that produces
+ the required value. */
if (count == 1) {
if (p->kind == la_project)
for (unsigned int i = 0; i < p->sem.proj.count; i++)
Index: opt_general.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_general.brg,v
retrieving revision 1.25
retrieving revision 1.26
diff -u -d -r1.25 -r1.26
--- opt_general.brg 15 Mar 2007 14:12:55 -0000 1.25
+++ opt_general.brg 7 May 2007 10:17:13 -0000 1.26
@@ -82,12 +82,13 @@
%term cross = 5
%term eqjoin = 6
%term semijoin = 7
-%term project = 8
-%term select_ = 9
-%term disjunion = 10
-%term intersect = 11
-%term difference = 12
-%term distinct = 13
+%term thetajoin = 8
+%term project = 9
+%term select_ = 10
+%term disjunion = 11
+%term intersect = 12
+%term difference = 13
+%term distinct = 14
%term fun_1to1 = 20
%term num_eq = 25
%term num_gt = 26
@@ -145,9 +146,11 @@
Rel: cross (lit_tbl, Rel) = 6 (10);
Rel: eqjoin (Rel, Rel) = 8 (10);
Rel: semijoin (Rel, Rel) = 9 (10);
-Rel: project (Rel) = 10 (10);
-Rel: project (attach (Rel)) = 11 (10);
-Rel: project (project (Rel)) = 13 (1);
+Rel: semijoin (distinct (Rel), Rel) = 10 (10);
+Rel: thetajoin (Rel, Rel) = 11 (10);
+Rel: project (Rel) = 12 (10);
+Rel: project (attach (Rel)) = 13 (10);
+Rel: project (project (Rel)) = 14 (1);
Rel: select_ (Rel) = 19 (10);
Rel: disjunion (Rel, Rel) = 20 (10);
Rel: disjunion (EmptyRel, Rel) = 21 (5);
@@ -436,8 +439,26 @@
}
break;
- /* Rel: project (Rel) */
+ /* Rel: semijoin (distinct (Rel), Rel) */
case 10:
+ /* push down semijoin operator
+ underneath the distinct operator as
+ the semijoin hopefully is more selective
+ than the distinct operator */
+ *p = *PFla_distinct (
+ PFla_semijoin (
+ LL(p), R(p),
+ p->sem.eqjoin.att1,
+ p->sem.eqjoin.att2));
+ SEEN(L(p)) = false;
+ SEEN(p) = false;
+ /* relabeling will be expensive
+ if there is no real dummy node */
+ relabel (p, kids);
+ break;
+
+ /* Rel: project (Rel) */
+ case 12:
{ /**
* we can skip every project operator that is not needed.
* Therefore it has to fulfill two conditions:
@@ -465,7 +486,7 @@
} break;
/* Rel: project (attach (Rel)) */
- case 11:
+ case 13:
/* only rewrite if there are more than one columns */
if (p->schema.count > 1) {
bool found = false, rewrite = true;
@@ -501,7 +522,7 @@
break;
/* Rel: project (project (Rel)) */
- case 13:
+ case 14:
{ /* combine two projections */
PFalg_proj_t *proj = PFmalloc (p->schema.count *
sizeof (PFalg_proj_t));
@@ -580,6 +601,7 @@
PFalg_att_t sel = p->sem.select.att;
PFalg_att_t item1, item2;
+ PFalg_comp_t comp;
unsigned int i;
node = L(p);
@@ -597,10 +619,14 @@
/* the selection column has to match the
result column of the comparison operator */
- if (node->kind != la_num_eq ||
+ if ((node->kind != la_num_eq && node->kind != la_num_gt) ||
node->sem.binary.res != sel)
break;
+ if (node->kind == la_num_eq)
+ comp = alg_comp_eq;
+ else
+ comp = alg_comp_gt;
item1 = node->sem.binary.att1;
item2 = node->sem.binary.att2;
node = L(node);
@@ -645,8 +671,10 @@
can be replaced by an theta-join and an attach operator
that adds the missing res column */
if (!proj && !proj2) {
+ PFalg_sel_t *pred = PFmalloc (sizeof (PFalg_sel_t));
+ pred[0] = PFalg_sel (comp, item1, item2);
*p = *PFla_attach (
- PFla_eqjoin (left, right, item1, item2),
+ PFla_thetajoin (left, right, 1, pred),
p->sem.select.att, PFalg_lit_bln (true));
SEEN(p) = false;
SEEN(L(p)) = false;
@@ -664,6 +692,8 @@
and attach operator */
PFalg_proj_t *proj_list;
unsigned int j, count = 0;
+ PFalg_sel_t *pred = PFmalloc (sizeof (PFalg_sel_t));
+ pred[0] = PFalg_sel (comp, item1, item2);
/* create projection list */
proj_list = PFmalloc ((p->schema.count - 1)*
@@ -700,7 +730,7 @@
*p = *PFla_attach (
PFla_project_ (
- PFla_eqjoin (left, right, item1, item2),
+ PFla_thetajoin (left, right, 1, pred),
p->schema.count - 1, proj_list),
p->sem.select.att, PFalg_lit_bln (true));
SEEN(p) = false;
--- NEW FILE: opt_thetajoin.c ---
/**
* @file
*
* Optimize relational algebra expression DAG based on multivalued
* dependencies. We make use of the fact that a large number of operators
* does not rely on both sides of a thetajoin operator. We push down
* as many operators underneath the thetajoin operator and thus replace
* multivalued dependencies by an explicit join operator.
*
* We replace all thetajoin operators by an internal variant that
* can cope with identical columns, can hide its input columns, and
* is able to represent partial predicates (comparisons where a
* selection has not been applied yet). With this enhanced thetajoin
* operator we can push down operators into both operands whenever we
* do not know which operand really requires the operators. Furthermore
* we can collect the predicates step by step without looking at a bigger
* pattern.
* A cleaning phase then replaces the internal thetajoin operators by
* normal ones and adds additional operators for all partial predicates
[...2196 lines suppressed...]
PFla_dag_reset (root);
/* Traverse the DAG bottom up and look for op-thetajoin
operator pairs. As long as we find a rewrite we start
a new traversal. */
while (opt_mvd (root))
PFla_dag_reset (root);
PFla_dag_reset (root);
/* replace the internal thetajoin representation by
normal thetajoins and generate operators for all
operators that cound not be integrated in the normal
thetajoins. */
remove_thetajoin_opt (root);
PFla_dag_reset (root);
return root;
}
/* vim:set shiftwidth=4 expandtab filetype=c: */
Index: Makefile.ag
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/Makefile.ag,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -d -r1.9 -r1.10
--- Makefile.ag 3 Jan 2007 12:32:35 -0000 1.9
+++ Makefile.ag 7 May 2007 10:17:12 -0000 1.10
@@ -42,5 +42,6 @@
opt_key.c \
opt_mvd.c \
opt_reqval.c \
- opt_set.c
+ opt_set.c \
+ opt_thetajoin.c
}
Index: opt_mvd.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_mvd.c,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -d -r1.19 -r1.20
--- opt_mvd.c 2 Apr 2007 19:38:43 -0000 1.19
+++ opt_mvd.c 7 May 2007 10:17:14 -0000 1.20
@@ -423,6 +423,63 @@
}
break;
+ case la_thetajoin:
+ /* Move the independent expression (the one without any
+ join attributes) up the DAG. */
+ if (is_cross (L(p))) {
+ bool all_left = true,
+ all_right = true;
+
+ for (unsigned int i = 0; i < p->sem.thetajoin.count; i++) {
+ all_left = all_left &&
+ att_present (LL(p), p->sem.thetajoin.pred[i].left);
+ all_right = all_right &&
+ att_present (LR(p), p->sem.thetajoin.pred[i].left);
+ }
+
+ if (all_left)
+ *p = *(cross_can (LR(p), thetajoin (LL(p), R(p),
+ p->sem.thetajoin.count,
+ p->sem.thetajoin.pred)));
+ else if (all_right)
+ *p = *(cross_can (LL(p), thetajoin (LR(p), R(p),
+ p->sem.thetajoin.count,
+ p->sem.thetajoin.pred)));
+ else
+ /* do not rewrite */
+ break;
+
+ modified = true;
+ break;
+ }
+ if (is_cross (R(p))) {
+ bool all_left = true,
+ all_right = true;
+
+ for (unsigned int i = 0; i < p->sem.thetajoin.count; i++) {
+ all_left = all_left &&
+ att_present (RL(p),
p->sem.thetajoin.pred[i].right);
+ all_right = all_right &&
+ att_present (RR(p),
p->sem.thetajoin.pred[i].right);
+ }
+
+ if (all_left)
+ *p = *(cross_can (RR(p), thetajoin (L(p), RL(p),
+ p->sem.thetajoin.count,
+ p->sem.thetajoin.pred)));
+ else if (all_right)
+ *p = *(cross_can (RL(p), thetajoin (L(p), RR(p),
+ p->sem.thetajoin.count,
+ p->sem.thetajoin.pred)));
+ else
+ /* do not rewrite */
+ break;
+
+ modified = true;
+ break;
+ }
+ break;
+
case la_project:
/* Split project operator and push it beyond the cross product. */
if (is_cross (L(p))) {
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins