Update of /cvsroot/monetdb/pathfinder/compiler/algebra/map
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv26788/algebra/map
Modified Files:
Tag: XQuery_0-24
map_ori_names.c map_unq_names.c
Log Message:
-- Re-implemented the join pushdown optimization phase
(due to incorrect rewrites).
o The special equi-join operator working on unique column
names now incorporates for each operand a projection.
Based on this projection we can now always maintain the
correct schema and push the join even through renaming
projections.
The old variant did in some cases 'forget' from which
operand of an equi-join columns with the same name stem
from.
o The new equi-join pushdown phase is a lot more effective
than the old one and thus generates very wide relations
(where a large number of columns can be pruned afterwards).
Placing a projection pushdown phase (icols optimization)
afterwards allows us to map the resulting plans back to
bit-encoded column names without running out of column
bits.
U map_ori_names.c
Index: map_ori_names.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/map/map_ori_names.c,v
retrieving revision 1.33
retrieving revision 1.33.2.1
diff -u -d -r1.33 -r1.33.2.1
--- map_ori_names.c 3 Apr 2008 11:03:13 -0000 1.33
+++ map_ori_names.c 28 May 2008 11:37:21 -0000 1.33.2.1
@@ -279,69 +279,72 @@
break;
case la_eqjoin_unq:
+ /* We need to turn the implicit projections
+ into explicit ones to use the la_eqjoin
+ operator again. */
+#define proj_at(l,i) (*(PFalg_proj_t *) PFarray_at ((l),(i)))
{
- PFalg_proj_t *projlist;
- PFalg_att_t ori;
- unsigned int count;
-
- /* cope with special case where unique names
- of the join arguments conflict */
- if (p->sem.eqjoin_unq.att1 ==
- p->sem.eqjoin_unq.att2) {
- /* translation is almost identical to function
- add_renaming_projection(). The only difference
- is the special unique name used for the right
- join argument. */
- PFalg_att_t ori_new, ori_old, unq, ori_join = att_NULL;
- PFla_op_t *right = O(R(p));
- bool renamed = false;
-
- projlist = PFmalloc (right->schema.count *
- sizeof (PFalg_proj_t));
- count = 0;
-
- for (unsigned int i = 0; i < right->schema.count; i++) {
- ori_old = right->schema.items[i].name;
-
- /* lookup unique name for column @a ori_old */
- unq = C_UNAME (p, ori_old, RIGHT);
-
- /* column ori_old is not referenced by operator @a p
- and thus does not appear in the projection */
- if (!unq) continue;
-
- if (unq == p->sem.eqjoin_unq.att2) {
- /* use special name to lookup original
- attribute name proposed for the join
- argument */
- unq = PFalg_unq_name (att_item, 0);
- ori_join = PFprop_ori_name (p->prop, unq);
- ori_new = ori_join;
- }
- else
- /* lookup corresponding new name for column @a ori_old
*/
- ori_new = ONAME(p, unq);
-
- /* don't allow missing matches */
- assert (ori_new);
+ PFarray_t *lproj = p->sem.eqjoin_unq.lproj,
+ *rproj = p->sem.eqjoin_unq.rproj;
+ PFalg_proj_t *projlist,
+ *lprojlist,
+ *rprojlist;
+ PFla_op_t *left = O(L(p)),
+ *right = O(R(p));
+ unsigned int lcount = 0,
+ rcount = 0,
+ i;
+ PFalg_att_t att1_new,
+ att2_new,
+ att2_old,
+ att2_unq,
+ used_cols = 0,
+ ori,
+ unq_new,
+ unq_old,
+ ori_new,
+ ori_old;
+
+ /* look up the join column names */
+ att1_new = ONAME(p, proj_at (lproj, 0).new);
+ att2_unq = proj_at (rproj, 0).old;
+ att2_old = PFprop_ori_name_right (p->prop, att2_unq);
- projlist[count++] = proj (ori_new, ori_old);
- renamed = renamed || (ori_new != ori_old);
- }
- if (renamed)
- right = PFla_project_ (right, count, projlist);
+ projlist = PFmalloc (p->schema.count * sizeof (PFalg_proj_t));
+ lprojlist = PFmalloc (p->schema.count * sizeof (PFalg_proj_t));
+ rprojlist = PFmalloc (p->schema.count * sizeof (PFalg_proj_t));
+
+ /* create the projection list for the left operand */
+ for (i = 0; i < PFarray_last (lproj); i++) {
+ unq_new = proj_at (lproj, i).new;
+ unq_old = proj_at (lproj, i).old;
+ ori_new = ONAME (p, unq_new);
+ ori_old = ONAME (L(p), unq_old);
+ lprojlist[lcount++] = proj (ori_new, ori_old);
+ }
- res = eqjoin (PROJ(LEFT, p), right,
- ONAME(p, p->sem.eqjoin_unq.att1),
- ori_join);
+ /* Create a new unused column name for the right join argument
+ that is projected away again. This way name conflicts are
+ avoided. */
+ for (i = 0; i < p->schema.count; i++)
+ used_cols |= ONAME(p, p->schema.items[i].name);
+ att2_new = PFalg_ori_name (att2_unq, ~used_cols);
+ rprojlist[rcount++] = proj (att2_new, att2_old);
+ /* create the projection list for the right operand */
+ for (i = 1; i < PFarray_last (rproj); i++) {
+ unq_new = proj_at (rproj, i).new;
+ unq_old = proj_at (rproj, i).old;
+ ori_new = ONAME (p, unq_new);
+ ori_old = ONAME (R(p), unq_old);
+ rprojlist[rcount++] = proj (ori_new, ori_old);
}
- else
- res = eqjoin (PROJ(LEFT, p), PROJ(RIGHT, p),
- ONAME(p, p->sem.eqjoin_unq.att1),
- ONAME(p, p->sem.eqjoin_unq.att2));
- /* As some operators may rely on the schema of its operands
+ res = eqjoin (PFla_project_ (left, lcount, lprojlist),
+ PFla_project_ (right, rcount, rprojlist),
+ att1_new, att2_new);
+
+ /* As some operators rely on the schema of its operands
we introduce a projection that removes the second join
attribute thus maintaining the schema of the duplicate
aware eqjoin operator. */
U map_unq_names.c
Index: map_unq_names.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/map/map_unq_names.c,v
retrieving revision 1.33
retrieving revision 1.33.2.1
diff -u -d -r1.33 -r1.33.2.1
--- map_unq_names.c 3 Apr 2008 12:39:29 -0000 1.33
+++ map_unq_names.c 28 May 2008 11:37:22 -0000 1.33.2.1
@@ -268,62 +268,56 @@
case la_eqjoin:
{
- /* Add a projection for each operand to ensure
- that all columns are present. */
- PFla_op_t *left, *right;
- PFalg_proj_t *projlist, *projlist2;
- PFalg_att_t ori, unq, l_unq, r_unq, att1_unq, att2_unq;
- unsigned int count, count2;
- bool renamed;
-
- left = U(L(p));
- right = U(R(p));
+ /* Prepare a projection list for both operands
+ to ensure the correct names of all columns. */
+ PFla_op_t *left,
+ *right;
+ PFarray_t *lproj,
+ *rproj;
+ PFalg_att_t ori,
+ unq,
+ l_unq,
+ r_unq,
+ att1_unq,
+ att2_unq;
+ left = U(L(p));
+ right = U(R(p));
+
att1_unq = PFprop_unq_name_left (p->prop, p->sem.eqjoin.att1);
att2_unq = PFprop_unq_name_right (p->prop, p->sem.eqjoin.att2);
+
+ lproj = PFarray (sizeof (PFalg_proj_t), left->schema.count);
+ rproj = PFarray (sizeof (PFalg_proj_t), right->schema.count);
+
+ *(PFalg_proj_t *) PFarray_add (lproj)
+ = proj (UNAME(p, p->sem.eqjoin.att1), att1_unq);
+ *(PFalg_proj_t *) PFarray_add (rproj)
+ = proj (UNAME(p, p->sem.eqjoin.att2), att2_unq);
- projlist = PFmalloc (left->schema.count *
- sizeof (PFalg_proj_t));
- projlist2 = PFmalloc (right->schema.count *
- sizeof (PFalg_proj_t));
- count = 0;
- count2 = 0;
- projlist [count++] = proj (att1_unq, att1_unq);
- projlist2[count2++] = proj (att2_unq, att2_unq);
-
- renamed = false;
for (unsigned int i = 0; i < left->schema.count; i++) {
l_unq = left->schema.items[i].name;
ori = PFprop_ori_name_left (p->prop, l_unq);
assert (ori);
- unq = PFprop_unq_name (p->prop, ori);
- if (l_unq != att1_unq) {
- projlist[count++] = proj (unq, l_unq);
- renamed = renamed || (unq != l_unq);
- }
+ unq = UNAME(p, ori);
+ if (l_unq != att1_unq)
+ *(PFalg_proj_t *) PFarray_add (lproj) = proj (unq, l_unq);
}
- if (renamed)
- left = PFla_project_ (left, count, projlist);
- renamed = false;
for (unsigned int i = 0; i < right->schema.count; i++) {
r_unq = right->schema.items[i].name;
ori = PFprop_ori_name_right (p->prop, r_unq);
assert (ori);
- unq = PFprop_unq_name (p->prop, ori);
- if (r_unq != att2_unq) {
- projlist2[count2++] = proj (unq, r_unq);
- renamed = renamed || (unq != r_unq);
- }
+ unq = UNAME(p, ori);
+ if (r_unq != att2_unq)
+ *(PFalg_proj_t *) PFarray_add (rproj) = proj (unq, r_unq);
}
- if (renamed)
- right = PFla_project_ (right, count2, projlist2);
- res = PFla_eqjoin_clone (left, right,
- att1_unq, att2_unq,
- UNAME(p, p->sem.eqjoin.att1));
+ /* create a join operator that comes with two internal
+ projection lists */
+ res = PFla_eqjoin_clone (left, right, lproj, rproj);
} break;
case la_eqjoin_unq:
@@ -335,6 +329,14 @@
/* Transform semi-join operations into equi-joins
as these semi-joins might be superfluous as well. */
if (PFprop_set (p->prop)) {
+ PFarray_t *lproj,
+ *rproj;
+ PFalg_att_t ori,
+ unq,
+ l_unq,
+ att1_unq,
+ att2_unq;
+
/* we have to make sure that only the columns from
the left side are visible */
PFalg_proj_t *projlist;
@@ -345,14 +347,30 @@
for (unsigned int i = 0; i < left->schema.count; i++)
projlist[i] = proj (left->schema.items[i].name,
left->schema.items[i].name);
+
+ att1_unq = PFprop_unq_name_left (p->prop, p->sem.eqjoin.att1);
+ att2_unq = PFprop_unq_name_right (p->prop, p->sem.eqjoin.att2);
+
+ lproj = PFarray (sizeof (PFalg_proj_t), left->schema.count);
+ rproj = PFarray (sizeof (PFalg_proj_t), 1);
+
+ *(PFalg_proj_t *) PFarray_add (lproj)
+ = proj (att1_unq, att1_unq);
+ *(PFalg_proj_t *) PFarray_add (rproj)
+ = proj (att1_unq, att2_unq);
+
+ for (unsigned int i = 0; i < left->schema.count; i++) {
+ l_unq = left->schema.items[i].name;
+ ori = PFprop_ori_name_left (p->prop, l_unq);
+ assert (ori);
+
+ unq = UNAME(p, ori);
+ if (l_unq != att1_unq)
+ *(PFalg_proj_t *) PFarray_add (lproj) = proj (unq,
l_unq);
+ }
+
res = PFla_project_ (
- PFla_eqjoin_clone (
- left,
- U(R(p)),
- UNAME (p, p->sem.eqjoin.att1),
- PFprop_unq_name_right (p->prop,
- p->sem.eqjoin.att2),
- UNAME (p, p->sem.eqjoin.att1)),
+ PFla_eqjoin_clone (left, U(R(p)), lproj, rproj),
left->schema.count,
projlist);
} else {
@@ -373,6 +391,9 @@
left = U(L(p));
+ /* we ensure that a column split like proj_(item2:item1, item1)
+ is avoided in the unique name encoding by keeping the column
+ only once (proj_(item23)) */
for (unsigned int i = 0; i < left->schema.count; i++)
for (unsigned int j = 0; j < p->sem.proj.count; j++)
if ((unq = UNAME(p, p->sem.proj.items[j].new)) ==
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins