Update of /cvsroot/monetdb/pathfinder/compiler/algebra/opt
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv26646/algebra/opt
Modified Files:
opt_join_graph.c
Log Message:
-- Fixed bug: Optimizations based on key and set properties may not be mixed
in a single DAG traversal as duplicate elimination may get lost.
The solution is to split up the optimizations into two phases.
-- Introduced rewrite that introduces a distinct operator on top of the plan
if the result is key. This allows us to get rid of distinct operators
in the remainder of the plan.
We furthermore ensure that the distinct operator is removed afterwards if
it the result produces key values (even without duplicate elimination).
This is ensured by
1.) introducing distinct operator on top,
2.) applying optimizations based on the set property, and
3.) applying optimizations based on the key property.
Index: opt_join_graph.c
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/algebra/opt/opt_join_graph.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -d -r1.6 -r1.7
--- opt_join_graph.c 16 Oct 2007 15:46:45 -0000 1.6
+++ opt_join_graph.c 26 Nov 2007 08:54:47 -0000 1.7
@@ -62,7 +62,7 @@
#define SEEN(p) ((p)->bit_dag)
-/* worker for PFalgopt_join_graph */
+/* Bottom-up worker for PFalgopt_join_graph */
static void
opt_join_graph (PFla_op_t *p)
{
@@ -80,59 +80,11 @@
/* action code */
switch (p->kind) {
- /* Replace semijoin operators by normal equi-joins. (A following
- optimization phase might then try to remove the equi-joins.) */
- case la_semijoin:
- if (PFprop_set (p->prop)) {
- /* we need to additional projections:
- top to remove the right join argument from the result
list
- right to avoid name conflicts with the left input columns
- and to ensure that distinct is applied
- only on the join column */
- PFalg_proj_t *top = PFmalloc (p->schema.count *
- sizeof (PFalg_proj_t));
- PFalg_att_t used_cols = 0,
- new_col = p->sem.eqjoin.att2,
- cur_col;
-
- /* fill the 'top' projection and collect all used columns
- of the left input relation (equals the semijoin output
- schema) */
- for (unsigned int i = 0; i < p->schema.count; i++) {
- cur_col = p->schema.items[i].name;
-
- top[i] = PFalg_proj (cur_col, cur_col);
- used_cols |= cur_col;
- }
- /* rename the join argument in case a name conflict occurs */
- if (used_cols & new_col)
- new_col = PFalg_ori_name (
- PFalg_unq_name (new_col, 0),
- ~used_cols);
-
- /* replace the semijoin */
- *p = *PFla_project_ (
- PFla_eqjoin (L(p),
- /* project away all columns except
- for the join column */
- PFla_project (
- R(p),
- PFalg_proj (
- new_col,
- p->sem.eqjoin.att2)),
- p->sem.eqjoin.att1,
- new_col),
- p->schema.count,
- top);
- }
- break;
-
/* Remove unnecessary distinct operators and narrow the schema
of a distinct operator if some columns are not required
while the remaining columns still provide a composite key. */
case la_distinct:
- if (PFprop_set (p->prop) ||
- PFprop_ckey (L(p)->prop, p->schema))
+ if (PFprop_ckey (L(p)->prop, p->schema))
*p = *PFla_dummy (L(p));
else {
PFalg_schema_t schema;
@@ -233,8 +185,7 @@
to allow a following optimization phase to get rid
of unnecessary eqjoin and number operators */
case la_step:
- if ((PFprop_set (p->prop)) ||
- (PFprop_key_right (p->prop, p->sem.step.item) &&
+ if ((PFprop_key_right (p->prop, p->sem.step.item) &&
(p->sem.step.axis == alg_attr ||
p->sem.step.axis == alg_chld ||
p->sem.step.axis == alg_self)) ||
@@ -270,8 +221,7 @@
operators to allow a following optimization phase
to get rid of unnecessary eqjoin and number operators */
case la_guide_step:
- if (PFprop_set (p->prop) ||
- ((PFprop_key_right (p->prop, p->sem.step.item) ||
+ if (((PFprop_key_right (p->prop, p->sem.step.item) ||
PFprop_ckey (R(p)->prop, p->schema)) &&
(p->sem.step.axis == alg_attr ||
p->sem.step.axis == alg_chld ||
@@ -319,12 +269,215 @@
}
}
+/* Top-down property worker for PFalgopt_join_graph */
+static void
+opt_set (PFla_op_t *p)
+{
+ assert (p);
+
+ /* rewrite each node only once */
+ if (SEEN(p))
+ return;
+ else
+ SEEN(p) = true;
+
+ /* apply join-graph specific optimization for children */
+ for (unsigned int i = 0; i < PFLA_OP_MAXCHILD && p->child[i]; i++)
+ opt_set (p->child[i]);
+
+ /* action code */
+ switch (p->kind) {
+ /* Replace semijoin operators by normal equi-joins. (A following
+ optimization phase might then try to remove the equi-joins.) */
+ case la_semijoin:
+ if (PFprop_set (p->prop)) {
+ /* we need to additional projections:
+ top to remove the right join argument from the result
list
+ right to avoid name conflicts with the left input columns
+ and to ensure that distinct is applied
+ only on the join column */
+ PFalg_proj_t *top = PFmalloc (p->schema.count *
+ sizeof (PFalg_proj_t));
+ PFalg_att_t used_cols = 0,
+ new_col = p->sem.eqjoin.att2,
+ cur_col;
+
+ /* fill the 'top' projection and collect all used columns
+ of the left input relation (equals the semijoin output
+ schema) */
+ for (unsigned int i = 0; i < p->schema.count; i++) {
+ cur_col = p->schema.items[i].name;
+
+ top[i] = PFalg_proj (cur_col, cur_col);
+ used_cols |= cur_col;
+ }
+ /* rename the join argument in case a name conflict occurs */
+ if (used_cols & new_col)
+ new_col = PFalg_ori_name (
+ PFalg_unq_name (new_col, 0),
+ ~used_cols);
+
+ /* replace the semijoin */
+ *p = *PFla_project_ (
+ PFla_eqjoin (L(p),
+ /* project away all columns except
+ for the join column */
+ PFla_project (
+ R(p),
+ PFalg_proj (
+ new_col,
+ p->sem.eqjoin.att2)),
+ p->sem.eqjoin.att1,
+ new_col),
+ p->schema.count,
+ top);
+ }
+ break;
+
+ /* Remove unnecessary distinct operators and narrow the schema
+ of a distinct operator if some columns are not required
+ while the remaining columns still provide a composite key. */
+ case la_distinct:
+ if (PFprop_set (p->prop))
+ *p = *PFla_dummy (L(p));
+ break;
+
+ /* Replace all step operators by step_join operators
+ to allow a following optimization phase to get rid
+ of unnecessary eqjoin and number operators */
+ case la_step:
+ if (PFprop_set (p->prop)) {
+
+ PFalg_att_t item_res;
+ item_res = PFalg_ori_name (
+ PFalg_unq_name (att_item, 0),
+ ~(p->sem.step.item |
+ p->sem.step.iter));
+
+ *p = *PFla_project (
+ PFla_step_join (
+ L(p),
+ R(p),
+ p->sem.step.axis,
+ p->sem.step.ty,
+ p->sem.step.level,
+ p->sem.step.item,
+ item_res),
+ PFalg_proj (p->sem.step.iter,
+ p->sem.step.iter),
+ PFalg_proj (p->sem.step.item_res,
+ item_res));
+ break;
+ }
+ break;
+
+ /* Replace all guide_step operators by guide_step_join
+ operators to allow a following optimization phase
+ to get rid of unnecessary eqjoin and number operators */
+ case la_guide_step:
+ if (PFprop_set (p->prop)) {
+
+ PFalg_att_t item_res;
+ item_res = PFalg_ori_name (
+ PFalg_unq_name (att_item, 0),
+ ~(p->sem.step.item |
+ p->sem.step.iter));
+
+ *p = *PFla_project (
+ PFla_guide_step_join (
+ L(p),
+ R(p),
+ p->sem.step.axis,
+ p->sem.step.ty,
+ p->sem.step.guide_count,
+ p->sem.step.guides,
+ p->sem.step.level,
+ p->sem.step.item,
+ item_res),
+ PFalg_proj (p->sem.step.iter,
+ p->sem.step.iter),
+ PFalg_proj (p->sem.step.item_res,
+ item_res));
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
/**
* Invoke algebra optimization.
+ *
+ * NOTE: make sure that top-down and bottom-up properties (especially
+ * property key and set) are not mixed as this might result in
+ * inconsistencies (as e.g., two distinct operators are removed
+ * while one still has to remain).
*/
PFla_op_t *
PFalgopt_join_graph (PFla_op_t *root, PFguide_tree_t *guide_tree)
{
+ /* PHASE 1: Seed a distinct operator on top of the query plan
+ if possible to remove all other distinct operators. */
+
+ /* Infer key property first */
+ PFprop_infer_key (root);
+
+ /* Place a distinct operator on top if the query represents a join
+ graph whose sort criterion is also the output (check for keyness
+ of column item only). Here we apply this rewrite first to remove all
+ distinct operators in the following phase and to remove the distinct
+ operator by key property analysis at the end (if it is not needed). */
+ if (root->kind == la_serialize_seq) {
+ PFla_op_t *p = root;
+ PFalg_att_t item = p->sem.ser_seq.item;
+
+ /* introduce a new distinct operator on top of the plan if the
+ input already forms a key, ... */
+ if (PFprop_key (p->prop, item) &&
+ /* ... the query is a join-graph query, ... */
+ !(PFprop_type_of (p, item) & ~aat_node) &&
+ /* ... and a distinct operator does not occur in the near
+ surrounding. */
+ R(p)->kind != la_distinct &&
+ !(R(p)->kind == la_project && RL(p)->kind == la_distinct)) {
+ /* once again check for the join graph
+ this time ensuring that the result does
+ not produce new nodes */
+ PFla_op_t *frag = L(p);
+ while (frag->kind == la_frag_union) {
+ assert (R(frag)->kind == la_fragment);
+ if (RL(frag)->kind != la_doc_tbl)
+ break;
+ frag = L(frag);
+ }
+ if (frag->kind != la_frag_union) {
+ assert (frag->kind == la_empty_frag);
+
+ /* introduce distinct */
+ R(p) = PFla_distinct (R(p));
+ }
+ }
+ }
+
+ /* PHASE 2: optimize based on top-down set property */
+
+ /* Infer set property once more */
+ PFprop_infer_set (root);
+ /* and evaluate all optimization on the top-down
+ set property separately */
+ opt_set (root);
+ PFla_dag_reset (root);
+
+ /* In addition optimize the resulting DAG using the icols property
+ to remove inconsistencies introduced by changing the types
+ of unreferenced columns (rule distinct). The icols optimization
+ will ensure that these columns are 'really' never used. */
+ root = PFalgopt_icol (root);
+
+ /* PHASE 3: optimize based on bottom-up properties */
+
/* Infer key, icols, set, and composite key
properties first */
PFprop_infer_composite_key (root);
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins