Update of /cvsroot/monetdb/pathfinder/compiler/algebra/prop
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv26788/algebra/prop
Modified Files:
Tag: XQuery_0-24
prop_dom.c prop_dom_nat.c prop_key.c prop_ori_names.c
Log Message:
-- Re-implemented the join pushdown optimization phase
(due to incorrect rewrites).
o The special equi-join operator working on unique column
names now incorporates for each operand a projection.
Based on this projection we can now always maintain the
correct schema and push the join even through renaming
projections.
The old variant did in some cases 'forget' from which
operand of an equi-join columns with the same name stem
from.
o The new equi-join pushdown phase is a lot more effective
than the old one and thus generates very wide relations
(where a large number of columns can be pruned afterwards).
Placing a projection pushdown phase (icols optimization)
afterwards allows us to map the resulting plans back to
bit-encoded column names without running out of column
bits.
U prop_dom_nat.c
Index: prop_dom_nat.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/prop/prop_dom_nat.c,v
retrieving revision 1.21
retrieving revision 1.21.2.1
diff -u -d -r1.21 -r1.21.2.1
--- prop_dom_nat.c 3 Apr 2008 11:03:29 -0000 1.21
+++ prop_dom_nat.c 28 May 2008 11:37:30 -0000 1.21.2.1
@@ -283,9 +283,13 @@
break;
case la_cross:
- {
/* we have to make sure to assign subdomains as otherwise
dynamic empty relations might be ignored */
+ case la_thetajoin:
+ /* As we do not know how multiple predicates interact
+ we assign subdomains for all attributes. */
+ {
+
dom_t cur_dom;
/* create new subdomains for all attributes */
@@ -307,7 +311,6 @@
} break;
case la_eqjoin:
- case la_eqjoin_unq:
{ /**
* Infering the domains of the join attributes results
* in a common schema, that is either the more general
@@ -369,6 +372,74 @@
}
} break;
+ case la_eqjoin_unq:
+ /* do the same as for normal joins and
+ correctly update the columns names */
+#define proj_at(l,i) (*(PFalg_proj_t *) PFarray_at ((l),(i)))
+ { /**
+ * Infering the domains of the join attributes results
+ * in a common domain, that is either the more general
+ * domain if the are in a subdomain relationship or a
+ * new subdomain. The domains of all other columns
+ * (whose domain is different from the domains of the
+ * join arguments) remain unchanged.
+ */
+ PFarray_t *lproj = n->sem.eqjoin_unq.lproj,
+ *rproj = n->sem.eqjoin_unq.rproj;
+ PFalg_att_t att1 = proj_at(lproj, 0).old,
+ att2 = proj_at(rproj, 0).old,
+ res = proj_at(lproj, 0).new;
+
+ dom_t att1_dom = PFprop_dom (L(n)->prop, att1),
+ att2_dom = PFprop_dom (R(n)->prop, att2),
+ join_dom,
+ cur_dom;
+
+ if (att1_dom == att2_dom)
+ join_dom = att1_dom;
+ else if (PFprop_subdom (n->prop, att1_dom, att2_dom))
+ join_dom = att1_dom;
+ else if (PFprop_subdom (n->prop, att2_dom, att1_dom))
+ join_dom = att2_dom;
+ else {
+ join_dom = id++;
+ add_subdom (n->prop, att1_dom, join_dom);
+ add_subdom (n->prop, att2_dom, join_dom);
+ }
+ add_dom (n->prop, res, join_dom);
+
+ /* copy domains and update domains of join arguments */
+ for (unsigned int i = 1; i < PFarray_last (lproj); i++)
+ if (PFprop_type_of (L(n), proj_at (lproj, i).old) == aat_nat) {
+ if ((cur_dom = PFprop_dom (
+ L(n)->prop,
+ proj_at (lproj, i).old))
+ == att1_dom)
+ add_dom (n->prop, proj_at (lproj, i).new, join_dom);
+ else if (join_dom == att1_dom)
+ add_dom (n->prop, proj_at (lproj, i).new, cur_dom);
+ else {
+ add_subdom (n->prop, cur_dom, id);
+ add_dom (n->prop, proj_at (lproj, i).new, id++);
+ }
+ }
+
+ for (unsigned int i = 1; i < PFarray_last (rproj); i++)
+ if (PFprop_type_of (R(n), proj_at (rproj, i).old) == aat_nat) {
+ if ((cur_dom = PFprop_dom (
+ R(n)->prop,
+ proj_at (rproj, i).old))
+ == att2_dom)
+ add_dom (n->prop, proj_at (rproj, i).new, join_dom);
+ else if (join_dom == att2_dom)
+ add_dom (n->prop, proj_at (rproj, i).new, cur_dom);
+ else {
+ add_subdom (n->prop, cur_dom, id);
+ add_dom (n->prop, proj_at (rproj, i).new, id++);
+ }
+ }
+ } break;
+
case la_semijoin:
{ /**
* Infering the domains of the join attributes results
@@ -416,85 +487,6 @@
}
} break;
- case la_thetajoin:
- { /**
- * Infering the domains of the equi-join attributes results
- * in a common domain, that is either the more general
- * domain if the are in a subdomain relationship or a
- * new subdomain. A new subdomain is created for all other
- * columns.
- */
- dom_t att1_dom, att2_dom, join_dom;
- for (unsigned int i = 0; i < n->sem.thetajoin.count; i++)
- if (n->sem.thetajoin.pred[i].comp == alg_comp_eq) {
- att1_dom = PFprop_dom (L(n)->prop,
- n->sem.thetajoin.pred[i].left);
- att2_dom = PFprop_dom (R(n)->prop,
- n->sem.thetajoin.pred[i].right);
-
- if (!att1_dom || !att2_dom)
- continue;
-
- if (att1_dom == att2_dom)
- join_dom = att1_dom;
- else if (PFprop_subdom (n->prop, att1_dom, att2_dom))
- join_dom = att1_dom;
- else if (PFprop_subdom (n->prop, att2_dom, att1_dom))
- join_dom = att2_dom;
- else {
- join_dom = id++;
- add_subdom (n->prop, att1_dom, join_dom);
- add_subdom (n->prop, att2_dom, join_dom);
- }
- add_dom (n->prop,
- n->sem.thetajoin.pred[i].left,
- join_dom);
- add_dom (n->prop,
- n->sem.thetajoin.pred[i].right,
- join_dom);
- }
-
- /* copy domains and update domains of join arguments */
- for (unsigned int i = 0; i < L(n)->schema.count; i++)
- if (L(n)->schema.items[i].type == aat_nat) {
- unsigned int j;
- /* filter out the equi-join columns */
- for (j = 0; j < n->sem.thetajoin.count; j++)
- if (n->sem.thetajoin.pred[i].comp == alg_comp_eq &&
- L(n)->schema.items[i].name ==
- n->sem.thetajoin.pred[j].left)
- break;
- if (j == n->sem.thetajoin.count) {
- add_subdom (n->prop,
- PFprop_dom (
- L(n)->prop,
- L(n)->schema.items[i].name),
- id);
- add_dom (n->prop, L(n)->schema.items[i].name, id++);
- }
- }
-
- for (unsigned int i = 0; i < R(n)->schema.count; i++)
- if (R(n)->schema.items[i].type == aat_nat) {
- unsigned int j;
- /* filter out the equi-join columns */
- for (j = 0; j < n->sem.thetajoin.count; j++)
- if (n->sem.thetajoin.pred[i].comp == alg_comp_eq &&
- R(n)->schema.items[i].name ==
- n->sem.thetajoin.pred[j].right)
- break;
- if (j == n->sem.thetajoin.count) {
- add_subdom (n->prop,
- PFprop_dom (
- R(n)->prop,
- R(n)->schema.items[i].name),
- id);
- add_dom (n->prop, R(n)->schema.items[i].name, id++);
- }
- }
-
- } break;
-
case la_project:
/* bind all existing domains to the possibly new names */
for (unsigned int i = 0; i < n->schema.count; i++)
U prop_ori_names.c
Index: prop_ori_names.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/prop/prop_ori_names.c,v
retrieving revision 1.33
retrieving revision 1.33.2.1
diff -u -d -r1.33 -r1.33.2.1
--- prop_ori_names.c 19 Mar 2008 13:50:12 -0000 1.33
+++ prop_ori_names.c 28 May 2008 11:37:32 -0000 1.33.2.1
@@ -56,7 +56,7 @@
#define ARRAY_SIZE(n) ((n)->schema.count > 10 ? (n)->schema.count : 10)
/* reuse the icols field to maintain the bitlist of free variables */
-#define FREE(n) ((n)->prop->l_icols)
+#define FREE(n) ((n)->prop->free_cols)
/* initial value for lists that encode free variables */
#define ALL (~att_NULL)
@@ -268,67 +268,37 @@
break;
case la_eqjoin_unq:
+#define proj_at(l,i) (*(PFalg_proj_t *) PFarray_at ((l),(i)))
{
- PFalg_att_t att1 = n->sem.eqjoin_unq.att1;
- PFalg_att_t att2 = n->sem.eqjoin_unq.att2;
-
- /* only one join column is in the name pair list
- (as only one is in the schema). Therefore we
- add the corresponding other one (if no name
- conflict arises). */
- if (att1 != att2) {
- if (att1 != n->sem.eqjoin_unq.res)
- unq = att1;
- else
- unq = att2;
-
- /* add the missing attribute to the name pair list */
- ori = PFalg_ori_name (unq, FREE(n));
- FREE(n) = diff (FREE(n), ori);
- add_name_pair (np_list, ori, unq);
- }
+ PFarray_t *lproj = n->sem.eqjoin_unq.lproj,
+ *rproj = n->sem.eqjoin_unq.rproj;
+ unsigned int i;
+ PFalg_att_t unq_old,
+ unq_new,
+ ori_new,
+ unq_att2 = proj_at (rproj, 0).old,
+ ori_att2 = PFalg_ori_name (unq_att2, FREE(n));
+ FREE(n) = diff (FREE(n), ori_att2);
/* create name pair list for the left operand */
- for (unsigned int i = 0; i < L(n)->schema.count; i++) {
- unq = L(n)->schema.items[i].name;
- ori = find_ori_name (np_list, unq);
- add_name_pair (n->prop->l_name_pairs, ori, unq);
+ for (i = 0; i < PFarray_last (lproj); i++) {
+ unq_new = proj_at (lproj, i).new;
+ unq_old = proj_at (lproj, i).old;
+ ori_new = find_ori_name (np_list, unq_new);
+ add_name_pair (n->prop->l_name_pairs, ori_new, unq_old);
}
- if (att1 == att2) {
- /* The right join argument needs a new bit-encoded column
- name. As the unique names of the two join arguments
- conflicts we add the right join argument with a special
- unused unique name to the name pair list. (The name pair
- list for the right operand however contains the correct
- unique name to support the correct reference.) The
- matching then exploits this special column if both
- join arguments have the same unique name. */
- ori = PFalg_ori_name (att2, FREE(n));
- FREE(n) = diff (FREE(n), ori);
- /* create special name */
- unq = PFalg_unq_name (att_item, 0);
- assert (!find_ori_name (np_list, unq));
- add_name_pair (np_list, ori, unq);
- /* make new original name available to right operand */
- add_name_pair (n->prop->r_name_pairs, ori, att2);
- /* copy all remaining name pairs */
- for (unsigned int i = 0; i < R(n)->schema.count; i++) {
- unq = R(n)->schema.items[i].name;
- /* discard left join column */
- if (unq != att1) {
- ori = find_ori_name (np_list, unq);
- add_name_pair (n->prop->r_name_pairs, ori, unq);
- }
- }
+ /* create name pair list for the right operand */
+ /* add the join column separately ... */
+ add_name_pair (n->prop->r_name_pairs, ori_att2, unq_att2);
+ /* ... and discard the join column in the iteration */
+ for (i = 1; i < PFarray_last (rproj); i++) {
+ unq_new = proj_at (rproj, i).new;
+ unq_old = proj_at (rproj, i).old;
+ ori_new = find_ori_name (np_list, unq_new);
+ add_name_pair (n->prop->r_name_pairs, ori_new, unq_old);
}
- else
- /* create name pair list for the right operand */
- for (unsigned int i = 0; i < R(n)->schema.count; i++) {
- unq = R(n)->schema.items[i].name;
- ori = find_ori_name (np_list, unq);
- add_name_pair (n->prop->r_name_pairs, ori, unq);
- }
+
} break;
case la_semijoin:
U prop_key.c
Index: prop_key.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/prop/prop_key.c,v
retrieving revision 1.47
retrieving revision 1.47.2.1
diff -u -d -r1.47 -r1.47.2.1
--- prop_key.c 23 May 2008 13:01:27 -0000 1.47
+++ prop_key.c 28 May 2008 11:37:31 -0000 1.47.2.1
@@ -362,7 +362,6 @@
break;
case la_eqjoin:
- case la_eqjoin_unq:
/* only a key-join retains all key properties */
if (PFprop_key (L(n)->prop, n->sem.eqjoin.att1) &&
PFprop_key (R(n)->prop, n->sem.eqjoin.att2)) {
@@ -375,6 +374,38 @@
copy (n->prop->keys, L(n)->prop->keys);
break;
+ case la_eqjoin_unq:
+ /* do the same as for normal joins and
+ correctly update the columns names */
+#define proj_at(l,i) (*(PFalg_proj_t *) PFarray_at ((l),(i)))
+ {
+ PFarray_t *lproj = n->sem.eqjoin_unq.lproj,
+ *rproj = n->sem.eqjoin_unq.rproj;
+ PFalg_att_t att1 = proj_at(lproj, 0).old,
+ att2 = proj_at(rproj, 0).old,
+ res = proj_at(lproj, 0).new;
+
+ /* only a key-join retains all key properties */
+ if (PFprop_key (L(n)->prop, att1) &&
+ PFprop_key (R(n)->prop, att2)) {
+ union_ (n->prop->keys, res);
+ for (unsigned int i = 1; i < PFarray_last (lproj); i++)
+ if (PFprop_key (L(n)->prop, proj_at(lproj, i).old))
+ union_ (n->prop->keys, proj_at(lproj, i).new);
+ for (unsigned int i = 1; i < PFarray_last (rproj); i++)
+ if (PFprop_key (R(n)->prop, proj_at(rproj, i).old))
+ union_ (n->prop->keys, proj_at(rproj, i).new);
+ }
+ else if (PFprop_key (L(n)->prop, att1))
+ for (unsigned int i = 1; i < PFarray_last (rproj); i++)
+ if (PFprop_key (R(n)->prop, proj_at(rproj, i).old))
+ union_ (n->prop->keys, proj_at(rproj, i).new);
+ else if (PFprop_key (R(n)->prop, att2))
+ for (unsigned int i = 1; i < PFarray_last (lproj); i++)
+ if (PFprop_key (L(n)->prop, proj_at(lproj, i).old))
+ union_ (n->prop->keys, proj_at(lproj, i).new);
+ } break;
+
case la_semijoin:
copy (n->prop->keys, L(n)->prop->keys);
break;
@@ -768,7 +799,7 @@
if (n->prop->keys)
PFarray_last (n->prop->keys) = 0;
else
- n->prop->keys = PFarray (sizeof (PFalg_att_t), 10);
+ n->prop->keys = PFarray (sizeof (PFalg_att_t), 10);
if (L(n)) {
if (n->prop->l_keys)
U prop_dom.c
Index: prop_dom.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/prop/prop_dom.c,v
retrieving revision 1.54
retrieving revision 1.54.2.1
diff -u -d -r1.54 -r1.54.2.1
--- prop_dom.c 3 Apr 2008 11:03:28 -0000 1.54
+++ prop_dom.c 28 May 2008 11:37:29 -0000 1.54.2.1
@@ -548,6 +548,9 @@
case la_cross:
/* we have to make sure to assign subdomains as otherwise
dynamic empty relations might be ignored */
+ case la_thetajoin:
+ /* As we do not know how multiple predicates interact
+ we assign subdomains for all attributes. */
/* create new subdomains for all attributes */
for (unsigned int i = 0; i < L(n)->schema.count; i++) {
@@ -570,7 +573,6 @@
break;
case la_eqjoin:
- case la_eqjoin_unq:
{ /**
* Infering the domains of the join attributes results
* in a common domain, that is either the more general
@@ -626,6 +628,70 @@
}
} break;
+ case la_eqjoin_unq:
+ /* do the same as for normal joins and
+ correctly update the columns names */
+#define proj_at(l,i) (*(PFalg_proj_t *) PFarray_at ((l),(i)))
+ { /**
+ * Infering the domains of the join attributes results
+ * in a common domain, that is either the more general
+ * domain if the are in a subdomain relationship or a
+ * new subdomain. The domains of all other columns
+ * (whose domain is different from the domains of the
+ * join arguments) remain unchanged.
+ */
+ PFarray_t *lproj = n->sem.eqjoin_unq.lproj,
+ *rproj = n->sem.eqjoin_unq.rproj;
+ PFalg_att_t att1 = proj_at(lproj, 0).old,
+ att2 = proj_at(rproj, 0).old,
+ res = proj_at(lproj, 0).new;
+
+ dom_t att1_dom = PFprop_dom (L(n)->prop, att1),
+ att2_dom = PFprop_dom (R(n)->prop, att2),
+ join_dom,
+ cur_dom;
+
+ if (att1_dom == att2_dom)
+ join_dom = att1_dom;
+ else if (PFprop_subdom (n->prop, att1_dom, att2_dom))
+ join_dom = att1_dom;
+ else if (PFprop_subdom (n->prop, att2_dom, att1_dom))
+ join_dom = att2_dom;
+ else {
+ join_dom = id++;
+ add_subdom (n->prop, att1_dom, join_dom);
+ add_subdom (n->prop, att2_dom, join_dom);
+ }
+ add_dom (n->prop, res, join_dom);
+
+ /* copy domains and update domains of join arguments */
+ for (unsigned int i = 1; i < PFarray_last (lproj); i++)
+ if ((cur_dom = PFprop_dom (
+ L(n)->prop,
+ proj_at (lproj, i).old))
+ == att1_dom)
+ add_dom (n->prop, proj_at (lproj, i).new, join_dom);
+ else if (join_dom == att1_dom)
+ add_dom (n->prop, proj_at (lproj, i).new, cur_dom);
+ else {
+ add_subdom (n->prop, cur_dom, id);
+ add_dom (n->prop, proj_at (lproj, i).new, id++);
+ }
+
+ for (unsigned int i = 1; i < PFarray_last (rproj); i++)
+ if ((cur_dom = PFprop_dom (
+ R(n)->prop,
+ proj_at (rproj, i).old))
+ == att2_dom)
+ add_dom (n->prop, proj_at (rproj, i).new, join_dom);
+ else if (join_dom == att2_dom)
+ add_dom (n->prop, proj_at (rproj, i).new, cur_dom);
+ else {
+ add_subdom (n->prop, cur_dom, id);
+ add_dom (n->prop, proj_at (rproj, i).new, id++);
+ }
+ } break;
+
case la_semijoin:
{ /**
* Infering the domains of the join attributes results
@@ -669,80 +735,6 @@
}
} break;
- case la_thetajoin:
- { /**
- * Infering the domains of the equi-join attributes results
- * in a common domain, that is either the more general
- * domain if the are in a subdomain relationship or a
- * new subdomain. A new subdomain is created for all other
- * columns.
- */
- dom_t att1_dom, att2_dom, join_dom;
- for (unsigned int i = 0; i < n->sem.thetajoin.count; i++)
- if (n->sem.thetajoin.pred[i].comp == alg_comp_eq) {
- att1_dom = PFprop_dom (L(n)->prop,
- n->sem.thetajoin.pred[i].left);
- att2_dom = PFprop_dom (R(n)->prop,
- n->sem.thetajoin.pred[i].right);
-
- if (att1_dom == att2_dom)
- join_dom = att1_dom;
- else if (PFprop_subdom (n->prop, att1_dom, att2_dom))
- join_dom = att1_dom;
- else if (PFprop_subdom (n->prop, att2_dom, att1_dom))
- join_dom = att2_dom;
- else {
- join_dom = id++;
- add_subdom (n->prop, att1_dom, join_dom);
- add_subdom (n->prop, att2_dom, join_dom);
- }
- add_dom (n->prop,
- n->sem.thetajoin.pred[i].left,
- join_dom);
- add_dom (n->prop,
- n->sem.thetajoin.pred[i].right,
- join_dom);
- }
-
- /* copy domains and update domains of join arguments */
- for (unsigned int i = 0; i < L(n)->schema.count; i++) {
- unsigned int j;
- /* filter out the equi-join columns */
- for (j = 0; j < n->sem.thetajoin.count; j++)
- if (n->sem.thetajoin.pred[i].comp == alg_comp_eq &&
- L(n)->schema.items[i].name ==
- n->sem.thetajoin.pred[j].left)
- break;
- if (j == n->sem.thetajoin.count) {
- add_subdom (n->prop,
- PFprop_dom (
- L(n)->prop,
- L(n)->schema.items[i].name),
- id);
- add_dom (n->prop, L(n)->schema.items[i].name, id++);
- }
- }
-
- for (unsigned int i = 0; i < R(n)->schema.count; i++) {
- unsigned int j;
- /* filter out the equi-join columns */
- for (j = 0; j < n->sem.thetajoin.count; j++)
- if (n->sem.thetajoin.pred[i].comp == alg_comp_eq &&
- R(n)->schema.items[i].name ==
- n->sem.thetajoin.pred[j].right)
- break;
- if (j == n->sem.thetajoin.count) {
- add_subdom (n->prop,
- PFprop_dom (
- R(n)->prop,
- R(n)->schema.items[i].name),
- id);
- add_dom (n->prop, R(n)->schema.items[i].name, id++);
- }
- }
-
- } break;
-
case la_project:
/* bind all existing domains to the possibly new names */
for (unsigned int i = 0; i < n->schema.count; i++)
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins