Update of /cvsroot/monetdb/pathfinder/runtime
In directory sc8-pr-cvs16:/tmp/cvs-serv3022
Modified Files:
pf_support.mx
Log Message:
-- Extended merge_adjacent_text_node PROC with an early-out semantic.
-- Added alternative evaluation for PROC ll_htordered_unique_thetajoin:
In case the iteration values are almost key we do a join on the iteration
values first before filtering out the matching tuples of the nested
value-based join.
-- Added helper PROC combine_node_info that squeezes all node information
(fragment + attribute value + pre value) into a single long to support
inequality comparisons/joins on nodes.
Index: pf_support.mx
===================================================================
RCS file: /cvsroot/monetdb/pathfinder/runtime/pf_support.mx,v
retrieving revision 1.217
retrieving revision 1.218
diff -u -d -r1.217 -r1.218
--- pf_support.mx 25 Apr 2007 12:52:39 -0000 1.217
+++ pf_support.mx 27 Apr 2007 14:20:31 -0000 1.218
@@ -1187,6 +1187,43 @@
bat[void,oid] lo, bat[void,oid] ro,
any lx, any rx) : bat[any,any]
{
+ if (not(reverse(lo).ordered()) or not(reverse(ro).ordered()))
+ ERROR("htordered_unique_thetajoin(): ordered left and right outer
columns (iters) expected.\n");
+
+ # in case the equality predicate (between lo and ro) is restrictive enough
+ # (at most 3*3 hits) we apply it in one go and filter out the false
positives
+ # using the second criterion (between l and r).
+ if ((lo.slice(0,1000).count() < 3 * lo.slice(0,1000).tunique().count())
+ and
+ (ro.slice(0,1000).count() < 3 * ro.slice(0,1000).tunique().count())) {
+ # join on the tails of lo and ro
+ var join_res := lo.tmark([EMAIL PROTECTED]).leftjoin
(ro.reverse().mark([EMAIL PROTECTED]));
+ # select for all result tuples the tail values of l
+ var l_map := join_res.hmark([EMAIL
PROTECTED]).leftfetchjoin(l.tmark([EMAIL PROTECTED]));
+ # select for all result tuples the tail values of r
+ var r_map := join_res.tmark([EMAIL
PROTECTED]).leftfetchjoin(r.tmark([EMAIL PROTECTED]));
+ # compare on the second condition 'mode(l, r)'
+ var res;
+ if (mode = EQ) { res := [=](l_map, r_map); }
+ else { if (mode = GT) { res := [>](l_map, r_map); }
+ else { if (mode = GE) { res := [>=](l_map, r_map); }
+ else { if (mode = LT) { res := [<](l_map, r_map); }
+ else { if (mode = LE) { res := [<=](l_map, r_map); } }}}}
+ # select all matches
+ res := res.select(true);
+ # map back to the keys of l and lo
+ res := res.reverse()
+ .leftfetchjoin(join_res.hmark([EMAIL PROTECTED]))
+ .reverse().mirror();
+ # attach lo
+ res := res.leftfetchjoin(lo.tmark([EMAIL PROTECTED])).reverse();
+ # attach the head of l
+ res := res.leftfetchjoin(l.hmark([EMAIL PROTECTED]));
+ # remove duplicates and sort
+ res := res.sunique().sort();
+ return res;
+ }
+
var lo_histo := histogram(lo), lr_histo := reverse(lo_histo);
var ro_histo := histogram(ro), rr_histo := reverse(ro_histo);
var li := 0LL, lp := 0LL, lc := count(int(lo_histo));
@@ -1194,9 +1231,6 @@
var b := new(void,bat,min(lc,rc));
var tpe := ttype(l);
- if (not(reverse(lo).ordered()) or not(reverse(ro).ordered()))
- ERROR("htordered_unique_thetajoin(): ordered left and right outer
columns (iters) expected.\n");
-
# trivial case; not handled below as log2() cannot cope with max() of an
empty BAT being NIL
if ((lc = 0LL) or (rc = 0LL)) {
return bat(oid, oid, 0).access(BAT_READ);
@@ -1243,6 +1277,107 @@
return bn.access(BAT_READ);
}
+# procedure that squeezes two node sequences # (encoded in 3 bats: pre,
+# frag, and attr) into two bats of type lng by using bit-shifting.
+# The procedure needs both node sequences to align the bit-shifting
+# and thus to guarantee that the lng values can be compared instead
+# of the node representation.
+PROC combine_node_info (any frag1, bat[void,oid] pre1, any attr1,
+ any frag2, bat[void,oid] pre2, any attr2) :
bat[void,bat]
+{
+ var flen, plen, alen, maxlen;
+ var res := bat (void, bat, 2).seqbase([EMAIL PROTECTED]);
+
+ # nothing to do if we have no nodes
+ if (pre1.count() = 0) return res.append(pre1).append(pre1);
+ if (pre2.count() = 0) return res.append(pre2).append(pre2);
+
+ plen := max (log2 (lng (max(pre1))), log2 (lng (max(pre2))));
+
+ if (type(frag1) = bat) {
+ if ((frag1.count() != pre1.count()) or
+ (frag1.seqbase() != pre1.seqbase()))
+ ERROR("combine_node_info(): pre and frag (of the first input) are
not aligned.\n");
+ flen := log2 (lng (max(frag1)));
+ } else {
+ if (type(frag1) = oid) {
+ flen := log2 (lng (frag1));
+ } else {
+ ERROR("combine_node_info(): frag value(s) expected.\n");
+ }
+ }
+
+ if (type(attr1) = bat) {
+ if ((pre1.count() != attr1.count()) or
+ (pre1.seqbase() != attr1.seqbase()))
+ ERROR("combine_node_info(): pre and attr (of the first input) are
not aligned.\n");
+ alen := log2 (lng (max(attr1)));
+ } else {
+ if (type(attr1) = oid) {
+ alen := log2 (lng (attr1));
+ } else {
+ alen := 0;
+ }
+ }
+
+ if (type(frag2) = bat) {
+ if ((frag2.count() != pre2.count()) or
+ (frag2.seqbase() != pre2.seqbase()))
+ ERROR("combine_node_info(): pre and frag (of the second input) are
not aligned.\n");
+ flen := max (flen, log2 (lng (max(frag2))));
+ } else {
+ if (type(frag2) = oid) {
+ flen := max (flen, log2 (lng (frag2)));
+ } else {
+ ERROR("combine_node_info(): frag value(s) expected.\n");
+ }
+ }
+
+ if (type(attr2) = bat) {
+ if ((pre2.count() != attr2.count()) or
+ (pre2.seqbase() != attr2.seqbase()))
+ ERROR("combine_node_info(): pre and attr (of the second input) are
not aligned.\n");
+ alen := max (alen, log2 (lng (max(attr2))));
+ } else {
+ if (type(attr2) = oid) {
+ alen := max (alen, log2 (lng (attr2)));
+ } else {
+ alen := max (alen, 0);
+ }
+ }
+
+ maxlen := flen + plen + alen;
+ if (maxlen > 64)
+ ERROR("combine_node_info(): node compression failed.\n");
+
+ var res1 := [<<]([lng](pre1), alen).access(BAT_WRITE);
+ var res2 := [<<]([lng](pre2), alen).access(BAT_WRITE);
+ if (alen > 0) {
+ if (type(attr1) = bat) {
+ [:+=](res1, [lng](attr1.tmark([EMAIL PROTECTED])));
+ } else {
+ [:+=](res1, const lng(attr1));
+ }
+ if (type(attr2) = bat) {
+ [:+=](res2, [lng](attr2.tmark([EMAIL PROTECTED])));
+ } else {
+ [:+=](res2, const lng(attr2));
+ }
+ }
+ if (type(frag1) = bat) {
+ [:+=](res1, [<<]([lng](frag1.tmark([EMAIL PROTECTED])), alen + plen));
+ } else {
+ [:+=](res1, const << (lng(frag1), alen + plen));
+ }
+ if (type(frag2) = bat) {
+ [:+=](res2, [<<]([lng](frag2.tmark([EMAIL PROTECTED])), alen + plen));
+ } else {
+ [:+=](res2, const << (lng(frag2), alen + plen));
+ }
+ return res.append (res1.access(BAT_READ))
+ .append (res2.access(BAT_READ));
+}
+
@- loop-lifted staircase join
@mil
#############################################
@@ -7081,6 +7216,13 @@
BAT[void,oid] pcont,
BAT[void,BAT] ws) : BAT[void,oid]
{
+ # nothing to do if our item sequences are all of length 1
+ if (iter.count() = iter.tunique().count())
+ return new (void, BAT).append (ws)
+ .append (pre)
+ .append (pcont)
+ .seqbase ([EMAIL PROTECTED]);
+
var map := pre.ord_uselect(oid_nil,oid_nil).hmark([EMAIL PROTECTED]);
iter := map.leftfetchjoin(iter);
pre := map.leftfetchjoin(pre);
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Monetdb-pf-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins