Update of /cvsroot/monetdb/MonetDB/src/gdk
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv30674/src/gdk

Modified Files:
        gdk_setop.mx 
Log Message:
made kintersect/kdiff faster.
There is now atleast one case worked out using pointer acces. Special case
for void. And new implementation for small right bats (a very common case),
we first check if the hash doens have collisions. If not we can use direct
hash (+value check), ie no need to follow the chain list. Also a very small
in memory lookup table can be used.


Index: gdk_setop.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB/src/gdk/gdk_setop.mx,v
retrieving revision 1.57
retrieving revision 1.58
diff -u -d -r1.57 -r1.58
--- gdk_setop.mx        14 Nov 2007 12:40:55 -0000      1.57
+++ gdk_setop.mx        11 Dec 2007 17:39:28 -0000      1.58
@@ -73,6 +73,17 @@
 #define HITdiff(h,t)
 #define MISSintersect(h,t)
 #define MISSdiff(h,t)           bunfastins(bn,h,t)
+
+#define HITintersect_nocheck(h,t)       
bunfastins_nocheck(bn,BUNlast(bn),h,t,Hsize(bn),Tsize(bn))
+#define HITdiff_nocheck(h,t)
+#define MISSintersect_nocheck(h,t)
+#define MISSdiff_nocheck(h,t)           
bunfastins_nocheck(bn,BUNlast(bn),h,t,Hsize(bn),Tsize(bn))
+
+#define DHITintersect(h,t)       bnh[o] = *(h); bnt[o++] = t;
+#define DHITdiff(h,t)
+#define DMISSintersect(h,t)
+#define DMISSdiff(h,t)           bnh[o] = *(h); bnt[o++] = t;
+
 #define ENDintersect(h,t)
 #define ENDdiff(h,t)            for(;p1<q1;p1++) bunfastins(bn,h,t)
 #define RALIGNdiff(bn,l,r)      FALSE
@@ -84,6 +95,7 @@
 #include "monetdb_config.h"
 #include "gdk.h"
 #include "gdk_setop.h"
+#include "gdk_search.h"
 
 @+ Double Elimination
 Comes in two flavors: looking at one column, or at two at-a-time.
@@ -428,7 +440,7 @@
        BUN p1, q1;
        int ins;
        hash_t s2;
-       ptr h, t, t2 = NULL, h2 = hnil;
+       ptr h, t, h2 = hnil;
        BATiter li = bat_iterator(l);
        BATiter ri = bat_iterator(r);
 
@@ -443,8 +455,7 @@
                ins = TRUE;
                if (@6) /* check for not-nil (nils don't match anyway) */
                        [EMAIL PROTECTED](ri, r->H->hash, s2, h) {
-                               t2 = BUNtail(ri, s2);
-                               if ([EMAIL PROTECTED](t, t2)) {
+                               if ([EMAIL PROTECTED](t, BUNtail(ri, s2))) {
                                        [EMAIL PROTECTED](h, t);
                                        ins = FALSE;
                                        break;
@@ -454,10 +465,114 @@
                        continue;
                [EMAIL PROTECTED](h, t);
        }
-       if (t2 || h2)
-               ins = 0;               /* dummy action for the compiler */
+       (void)h2; /* in some cases the @6 check doesn't use the h2 */
        BATmmap_unpin(r);
 
+#define DIRECT_MAX 256
+
+#define chr_EQ(x,y) simple_EQ(x,y,chr)
+#define bte_EQ(x,y) simple_EQ(x,y,bte)
+#define sht_EQ(x,y) simple_EQ(x,y,sht)
+#define int_EQ(x,y) simple_EQ(x,y,int)
+#define lng_EQ(x,y) simple_EQ(x,y,lng)
+#define flt_EQ(x,y) simple_EQ(x,y,flt)
+#define dbl_EQ(x,y) simple_EQ(x,y,dbl)
+
+#define hashvar(h,v) hash_any(h,v)
+#define hashloc(h,v) hash_any(h,v)
+
+/* later add version for l void tail, remove general tail values then */
[EMAIL PROTECTED] directcheck
+       BUN p1, q1;
+       int i;
+       ptr h, h2 = hnil;
+       BATiter li = bat_iterator(l);
+       BATiter ri = bat_iterator(r);
+       sht d[DIRECT_MAX];
+       Hash hs, *H = &hs;
+       int collision = 0;
+
+       H -> mask = DIRECT_MAX-1;
+       H -> type = BAThtype(l);
+
+       ALGODEBUG THRprintf(GDKout, "[EMAIL PROTECTED]@2: [EMAIL PROTECTED], 
@2, @3, @4, @5];\n");
+
+       assert(l->htype == r->htype && r->htype != TYPE_void);
+
+       memset(d, 0, sizeof(d));
+       BATloop(r, p1, q1) {
+               h = [EMAIL PROTECTED](ri,p1);
+               i = [EMAIL PROTECTED](H, h);
+               /* collision or check for not-nil (nils don't match anyway) */
+               if (d[i] != 0 || !(@6)) {
+                       collision = 1;
+                       break;
+               }
+               d[i] = ((sht)p1)+1;
+       }
+       if (collision) {
+               @:hashcheck(@1,@2,@3,[EMAIL PROTECTED],@5,@6)@
+       } else {
+               if (!l->ttype && l->tseqbase != oid_nil) {
+                       oid b = l->tseqbase, *t = &b;
+                       @4 *h = (@4*)BUNhloc(li, BUNfirst(l));
+                       @4 *rh = (@4*)BUNhloc(ri, 0);
+                       @4 *bnh;
+                       oid *bnt;
+                       BUN o = BUNfirst(bn);
+
+                       ALGODEBUG THRprintf(GDKout, "[EMAIL PROTECTED]@2: 
[EMAIL PROTECTED], @2, @3, [EMAIL PROTECTED], @5][void tail]; %d %d\n", 
BATcount(l), BATcount(r));
+                       p1 = 0;
+                       q1 = BATcount(l);
+                       while(p1 < q1) {
+                               BUN r1 = p1 + 65536;
+                               if (r1 > q1) r1 = q1;
+                               if (o + 65536 > BATcapacity(bn)){       
+                                       if (BATextend(bn, 
BATcapacity(bn)+65536) == NULL)
+                                               goto bunins_failed; 
+                               }
+                               bnh = (@4*)Hloc(bn,0);
+                               bnt = (oid*)Tloc(bn,0);
+                               for (; p1<r1; p1++, b++){ 
+                                       i = [EMAIL PROTECTED](H, h+p1);
+                                       if (d[i] != 0 && @7(h+p1, rh+d[i]-1) &&
+                                           [EMAIL PROTECTED](t, BUNtail(ri, 
d[i]-1))) {
+                                               [EMAIL PROTECTED](h+p1, b);
+                                       } else {
+                                               [EMAIL PROTECTED](h+p1, b);
+                                       }
+                               }
+                       }
+                       BATsetcount(bn, o);
+                       (void)t;
+               } else { 
+                       @4 *h = (@4*)BUNhloc(li, 0);
+                       @4 *rh = (@4*)BUNhloc(ri, 0);
+
+                       ALGODEBUG THRprintf(GDKout, "[EMAIL PROTECTED]@2: 
[EMAIL PROTECTED], @2, @3, [EMAIL PROTECTED], @5]; %d %d\n", BATcount(l), 
BATcount(r));
+                       p1 = BUNfirst(l);
+                       q1 = BUNlast(l);
+                       while(p1 < q1) {
+                               BUN r1 = p1 + 65536;
+                               if (r1 > q1) r1 = q1;
+                               if (BUNlast(bn) + 65536 > BATcapacity(bn)){     
+                                       if (BATextend(bn, 
BATcapacity(bn)+65536) == NULL)
+                                               goto bunins_failed; 
+                               }
+                               for (; p1<r1; p1++) { 
+                                       i = [EMAIL PROTECTED](H, h+p1);
+                                       if (d[i] != 0 && @7(h+p1, rh+d[i]-1) &&
+                                       [EMAIL PROTECTED](BUNtail(li,p1), 
BUNtail(ri, d[i]-1))) {
+                                               [EMAIL PROTECTED](h+p1, 
BUNtail(li, p1));
+                                       } else {
+                                               [EMAIL PROTECTED](h+p1, 
BUNtail(li, p1));
+                                       }
+                               }
+                       }
+               }
+       }
+       (void)h2; /* in some cases the @6 check doesn't use the h2 */
+
 @= voidcheck
        {
                BATiter li = bat_iterator(l);
@@ -534,7 +649,7 @@
                }
        }
 
[EMAIL PROTECTED] check
[EMAIL PROTECTED] checkall
        if (BAThdense(l)) {
                @:hashcheck(@1,pos,@2,@3,@5,TRUE)@
        } else if (hash) {
@@ -544,6 +659,20 @@
        }
        break;
 
[EMAIL PROTECTED] check
+       if (BAThdense(l)) {
+               @:hashcheck(@1,pos,@2,[EMAIL PROTECTED],@5,TRUE)@
+       } else if (hash) {
+               if (BATcount(r) < DIRECT_MAX && (GDKdebug&65536)) {
+                       @:directcheck(@1,@2,@2,@3,@5,@4,@6)@
+               } else {
+                       @:hashcheck(@1,@2,@2,[EMAIL PROTECTED],@5,@4)@
+               }
+       } else {
+               @:mergecheck(@1,@2,[EMAIL PROTECTED],@4,@5)@
+       }
+       break;
+
 @= batcheck
 static BAT*
 [EMAIL PROTECTED]@2(BAT *bn, BAT *l, BAT *r)
@@ -583,37 +712,37 @@
                switch(ATOMstorage(r->htype)) {
 #ifndef NOEXPAND_CHR
                case TYPE_chr:
-                       @:check(@2,loc,_chr,simple_CMP(h,h2,chr),@1)@
+                       @:check(@2,loc,chr,simple_CMP(h,h2,chr),@1,chr_EQ)@
 #endif
 #ifndef NOEXPAND_BTE
                case TYPE_bte:
-                       @:check(@2,loc,_bte,simple_CMP(h,h2,bte),@1)@
+                       @:check(@2,loc,bte,simple_CMP(h,h2,bte),@1,bte_EQ)@
 #endif
 #ifndef NOEXPAND_SHT
                case TYPE_sht:
-                       @:check(@2,loc,_sht,simple_CMP(h,h2,sht),@1)@
+                       @:check(@2,loc,sht,simple_CMP(h,h2,sht),@1,sht_EQ)@
 #endif
 #ifndef NOEXPAND_INT
                case TYPE_int:
-                       @:check(@2,loc,_int,simple_CMP(h,h2,int),@1)@
+                       @:check(@2,loc,int,simple_CMP(h,h2,int),@1,int_EQ)@
 #endif
 #ifndef NOEXPAND_FLT
                case TYPE_flt:
-                       @:check(@2,loc,_flt,simple_CMP(h,h2,flt),@1)@
+                       @:check(@2,loc,flt,simple_CMP(h,h2,flt),@1,flt_EQ)@
 #endif
 #ifndef NOEXPAND_DBL
                case TYPE_dbl:
-                       @:check(@2,loc,_dbl,simple_CMP(h,h2,dbl),@1)@
+                       @:check(@2,loc,dbl,simple_CMP(h,h2,dbl),@1,dbl_EQ)@
 #endif
 #ifndef NOEXPAND_LNG
                case TYPE_lng:
-                       @:check(@2,loc,_lng,simple_CMP(h,h2,lng),@1)@
+                       @:check(@2,loc,lng,simple_CMP(h,h2,lng),@1,lng_EQ)@
 #endif
                default:
                        if (r->hvarsized) {
-                               @:check(@2,var,var,((*merge)(h,h2)),@1)@
+                               
@:checkall(@2,var,var,((*merge)(h,h2)),@1,0==cmp)@
                        } else {
-                               @:check(@2,loc,loc,((*merge)(h,h2)),@1)@
+                               
@:checkall(@2,loc,loc,((*merge)(h,h2)),@1,0==cmp)@
                        }
                }
        }


-------------------------------------------------------------------------
SF.Net email is sponsored by: 
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://sourceforge.net/services/buy/index.php
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to