Update of /cvsroot/monetdb/pathfinder/compiler/mil
In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv22242/compiler/mil
Modified Files:
milgen.brg
Log Message:
propagated changes of Monday Feb 16 2009 - Tuesday Feb 17 2009
from the Feb2009 branch to the development trunk
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2009/02/16 - tsheyar: compiler/mil/milgen.brg,1.201.2.6
-- For constructed element and document nodes with empty content allow
all nodes defined in the grammar.
-- Rewrite the content of an empty element or document constructors
into a unified variant ('fncs (nil, nil)' -> 'nil').
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2009/02/16 - tsheyar: compiler/mil/milgen.brg,1.201.2.7
-- Performance fix for QName lookup.
This rewrite improves the following query (a typical one in
the XIRAF context) by more than an order of magnitude:
for $a in (distinct-values (doc("auction100MB.xml")//*/name()))
order by $a
return $a
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
U milgen.brg
Index: milgen.brg
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/compiler/mil/milgen.brg,v
retrieving revision 1.206
retrieving revision 1.207
diff -u -d -r1.206 -r1.207
--- milgen.brg 9 Feb 2009 08:30:31 -0000 1.206
+++ milgen.brg 17 Feb 2009 00:54:02 -0000 1.207
@@ -280,9 +280,9 @@
Rel: twig (textnode (Rel)) = 104 (10);
Fcns: fcns (Twig, Fcns) = 105 (10);
Fcns: fcns (Twig, nil) = 106 (10);
-Twig: docnode (Rel, fcns (nil, nil)) = 107 (10);
+Twig: docnode (Rel, nil) = 107 (10);
Twig: docnode (Rel, Fcns) = 108 (10);
-Twig: element (Rel, fcns (nil, nil)) = 109 (10);
+Twig: element (Rel, nil) = 109 (10);
Twig: element (Rel, Fcns) = 110 (10);
Twig: attribute (Rel) = 111 (10);
Twig: textnode (Rel) = 112 (10);
@@ -1344,11 +1344,15 @@
* entries are assigned to
* @param[out] cont the MIL variable the containers to the respective
* QName references are assigned to
+ *
+ * @return indicate if all rows provide a QName
*/
-static void
-fn_node_name (PFpa_op_t *p, PFalg_col_t col, mvar_t *id, mvar_t *cont)
+static bool
+fn_node_name (PFpa_op_t *p, PFalg_col_t col, mvar_t *id, mvar_t *cont, bool
set)
{ /* fold( */
- PFalg_simple_type_t ty = type_of (p, col);
+ PFla_op_t *origin = PFprop_lineage (p->prop, col);
+ bool names_only = false;
+ PFalg_simple_type_t ty = type_of (p, col);
if (ty == aat_pnode) {
/* find all element nodes and extract their QName references */
@@ -1359,35 +1363,51 @@
PFmil_t *pre = VAR (p->env, col, aat_pre),
*pre_cont = VAR (p->env, col, aat_frag);
- execute (
- assgn (var (kind->name),
- mposjoin (
- pre,
- pre_cont,
- fetch (var (PF_MIL_VAR_WS),
- var (PF_MIL_VAR_PRE_KIND)))),
- assgn (var (map->name),
- hmark (select_ (var (kind->name),
- var (PF_MIL_VAR_KIND_ELEM)),
- lit_oid (0))),
- assgn (var (elem->name),
- leftfetchjoin (var (map->name), pre)),
- assgn (var (elem_cont->name),
- leftfetchjoin (var (map->name), pre_cont)),
- assgn (var (id->name),
- leftfetchjoin (
- reverse (var (map->name)),
- mposjoin (var (elem->name),
- var (elem_cont->name),
+ /* check if the input stems from an element step */
+ if (origin && origin->kind == la_step_join &&
+ origin->sem.step.item_res == PFprop_lineage_col (p->prop, col) &&
+ origin->sem.step.spec.kind == node_kind_elem) {
+ execute (
+ assgn (var (id->name),
+ mposjoin (pre, pre_cont,
fetch (var (PF_MIL_VAR_WS),
- var (PF_MIL_VAR_PRE_PROP))))),
- assgn (var (cont->name),
- leftfetchjoin (
- reverse (var (map->name)),
- mposjoin (var (elem->name),
- var (elem_cont->name),
+ var (PF_MIL_VAR_PRE_PROP)))),
+ assgn (var (cont->name),
+ mposjoin (pre, pre_cont,
fetch (var (PF_MIL_VAR_WS),
- var (PF_MIL_VAR_PRE_CONT))))));
+ var (PF_MIL_VAR_PRE_CONT)))));
+ names_only = true;
+ }
+ else
+ execute (
+ assgn (var (kind->name),
+ mposjoin (
+ pre,
+ pre_cont,
+ fetch (var (PF_MIL_VAR_WS),
+ var (PF_MIL_VAR_PRE_KIND)))),
+ assgn (var (map->name),
+ hmark (select_ (var (kind->name),
+ var (PF_MIL_VAR_KIND_ELEM)),
+ lit_oid (0))),
+ assgn (var (elem->name),
+ leftfetchjoin (var (map->name), pre)),
+ assgn (var (elem_cont->name),
+ leftfetchjoin (var (map->name), pre_cont)),
+ assgn (var (id->name),
+ leftfetchjoin (
+ reverse (var (map->name)),
+ mposjoin (var (elem->name),
+ var (elem_cont->name),
+ fetch (var (PF_MIL_VAR_WS),
+ var (PF_MIL_VAR_PRE_PROP))))),
+ assgn (var (cont->name),
+ leftfetchjoin (
+ reverse (var (map->name)),
+ mposjoin (var (elem->name),
+ var (elem_cont->name),
+ fetch (var (PF_MIL_VAR_WS),
+ var (PF_MIL_VAR_PRE_CONT))))));
unpin (kind, 1);
unpin (elem, 1);
@@ -1410,6 +1430,7 @@
attr_cont,
fetch (var (PF_MIL_VAR_WS),
var (PF_MIL_VAR_ATTR_CONT)))));
+ names_only = true;
}
else {
assert (ty == aat_node);
@@ -1517,6 +1538,43 @@
unpin (pcont, 1);
unpin (res, 1);
}
+
+ /* remove the duplicates early -- before
+ duplicates are removed based on strings */
+ if (set &&
+ /* avoid rewrite if we are not done afterwards */
+ names_only &&
+ /* test for a constant fragment */
+ origin &&
+ origin->kind == la_step_join &&
+ origin->sem.step.item_res == PFprop_lineage_col (p->prop, col) &&
+ L(origin)->kind == la_frag_union &&
+ LL(origin)->kind == la_empty_frag &&
+ LR(origin)->kind == la_fragment &&
+ LRL(origin)->kind == la_doc_tbl &&
+ PFprop_const (LRL(origin)->prop,
+ LRL(origin)->sem.doc_tbl.col)) {
+ /* we assign new head values here -- as we are sure that
+ this logical column (BATs id & cont) is the only column
+ that is used lateron */
+ mvar_t *min = new_var (1);
+ execute (
+ assgn (var (min->name),
+ PFmil_gmin (reverse (var (id->name)))),
+ /* make sure that the output is sorted
+ in the input order again */
+ assgn (var (min->name),
+ reverse (sort (reverse (var (min->name)), DIR_ASC))),
+ assgn (var (id->name),
+ hmark (var (min->name), lit_oid (0))),
+ assgn (var (cont->name),
+ tmark (
+ leftfetchjoin (var (min->name), var (cont->name)),
+ lit_oid (0))));
+ unpin (min, 1);
+ }
+
+ return names_only;
} /* fold) */
/**
@@ -5631,13 +5689,19 @@
*empty_str = new_var (1),
*mu = new_var (1),
*prefix = new_var (1),
- *name = new_var (1),
*prefix_bool = new_var (1),
*true_oid = new_var (1),
*false_oid = new_var (1);
+ bool set,
+ names_only;
+
+ /* check whether we are allowed to remove duplicates */
+ set = PFprop_set (p->prop) &&
+ PFprop_icols_count (p->prop) == 1 &&
+ PFprop_icol (p->prop, p->sem.fun_1to1.res);
/* look up the correct QName references */
- fn_node_name (L(p), col, id, cont);
+ names_only = fn_node_name (L(p), col, id, cont, set);
/* get all prefixes */
execute (
@@ -5673,7 +5737,7 @@
tmark (
fetch (var (mu->name), lit_int (1)),
lit_oid (0))),
- assgn (var (name->name),
+ assgn (var (res->name),
madd (var (prefix->name),
mposjoin (tmark (var (id->name),
lit_oid (0)),
@@ -5681,30 +5745,31 @@
lit_oid (0)),
fetch (
var (PF_MIL_VAR_WS),
- var (PF_MIL_VAR_QN_LOC))))),
- /* empty names for nodes that have no QName */
- assgn (var (empty_str->name),
- project (kdiff (ANY_VAR(L(p)->env),
- var (id->name)),
- lit_str (""))),
- assgn (var (mu->name),
- merged_union (
- arg (hmark (var (id->name), lit_oid (0)),
- arg (hmark (var (empty_str->name),
- lit_oid (0)),
- arg (var (name->name),
- tmark (var (empty_str->name),
- lit_oid (0))))))),
- assgn (var (res->name),
- tmark (fetch (var (mu->name), lit_int (1)),
- lit_oid (0))));
+ var
(PF_MIL_VAR_QN_LOC))))));
+ if (!names_only)
+ execute (
+ /* empty names for nodes that have no QName */
+ assgn (var (empty_str->name),
+ project (kdiff (ANY_VAR(L(p)->env),
+ var (id->name)),
+ lit_str (""))),
+ assgn (var (mu->name),
+ merged_union (
+ arg (hmark (var (id->name), lit_oid
(0)),
+ arg (hmark (var (empty_str->name),
+ lit_oid (0)),
+ arg (var (res->name),
+ tmark (var
(empty_str->name),
+ lit_oid (0))))))),
+ assgn (var (res->name),
+ tmark (fetch (var (mu->name), lit_int (1)),
+ lit_oid (0))));
unpin (id, 1);
unpin (cont, 1);
unpin (empty_str, 1);
unpin (mu, 1);
unpin (prefix, 1);
- unpin (name, 1);
unpin (prefix_bool, 1);
unpin (true_oid, 1);
unpin (false_oid, 1);
@@ -5717,10 +5782,11 @@
PFalg_col_t col = clat (p->sem.fun_1to1.refs, 0);
mvar_t *id = new_var (1),
*cont = new_var (1),
- *name = new_var (1),
*empty_str = new_var (1),
*mu = new_var (1);
PFmil_ident_t uri_loc = PF_MIL_VAR_UNUSED;
+ bool set,
+ names_only;
switch (p->sem.fun_1to1.kind) {
case alg_fun_fn_local_name:
@@ -5731,36 +5797,42 @@
assert(!"should never reach here"); break;
}
+ /* check whether we are allowed to remove duplicates */
+ set = PFprop_set (p->prop) &&
+ PFprop_icols_count (p->prop) == 1 &&
+ PFprop_icol (p->prop, p->sem.fun_1to1.res);
+
/* look up the correct QName references */
- fn_node_name (L(p), col, id, cont);
+ names_only = fn_node_name (L(p), col, id, cont, set);
/* get all prefixes */
execute (
- assgn (var (name->name),
+ assgn (var (res->name),
mposjoin (tmark (var (id->name), lit_oid (0)),
tmark (var (cont->name), lit_oid (0)),
fetch (var (PF_MIL_VAR_WS),
- var (uri_loc)))),
- /* empty names for nodes that have no QName */
- assgn (var (empty_str->name),
- project (kdiff (ANY_VAR(L(p)->env),
- var (id->name)),
- lit_str (""))),
- assgn (var (mu->name),
- merged_union (
- arg (hmark (var (id->name), lit_oid (0)),
- arg (hmark (var (empty_str->name),
- lit_oid (0)),
- arg (var (name->name),
- tmark (var (empty_str->name),
- lit_oid (0))))))),
- assgn (var (res->name),
- tmark (fetch (var (mu->name), lit_int (1)),
- lit_oid (0))));
+ var (uri_loc)))));
+ if (!names_only)
+ execute (
+ /* empty names for nodes that have no QName */
+ assgn (var (empty_str->name),
+ project (kdiff (ANY_VAR(L(p)->env),
+ var (id->name)),
+ lit_str (""))),
+ assgn (var (mu->name),
+ merged_union (
+ arg (hmark (var (id->name), lit_oid
(0)),
+ arg (hmark (var (empty_str->name),
+ lit_oid (0)),
+ arg (var (res->name),
+ tmark (var
(empty_str->name),
+ lit_oid (0))))))),
+ assgn (var (res->name),
+ tmark (fetch (var (mu->name), lit_int (1)),
+ lit_oid (0))));
unpin (id, 1);
unpin (cont, 1);
- unpin (name, 1);
unpin (empty_str, 1);
unpin (mu, 1);
@@ -8289,7 +8361,7 @@
}
break; /* fold) */
- /* Twig: docnode (Rel, fcns (nil, nil)) */
+ /* Twig: docnode (Rel, nil) */
case 107: /* fold( */
assert (type_of (L(p), p->sem.ii.iter) == aat_nat);
{
@@ -8460,7 +8532,7 @@
}
break; /* fold) */
- /* Twig: element (Rel, fcns (nil, nil)) */
+ /* Twig: element (Rel, nil) */
case 109: /* fold( */
assert (type_of (L(p), p->sem.ii.iter) == aat_nat);
assert (type_of (L(p), p->sem.ii.item) == aat_qname);
------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins