Update of /cvsroot/monetdb/MonetDB4/src/modules/plain
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv21408/src/modules/plain

Modified Files:
      Tag: MonetDB_4-20
        xtables.mx 
Log Message:
made a more advanced version of the sample trick. Was needed as the old
solution could over estimate (leading to failed hash table creation)



Index: xtables.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB4/src/modules/plain/xtables.mx,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -u -d -r1.4.2.1 -r1.4.2.2
--- xtables.mx  22 Oct 2007 19:55:13 -0000      1.4.2.1
+++ xtables.mx  23 Oct 2007 17:29:12 -0000      1.4.2.2
@@ -703,22 +703,35 @@
 @c
 #define SAMPLE_SIZE    1024
 hash_t
-derive_mask( BAT *ct_histo, BAT *b)
+derive_mask( BAT *ct_map, BAT *ct_histo, BAT *b)
 {
+       size_t cnt = BATcount(b);
        int n = bits(BATcount(ct_histo)), *N = &n;
 
        if (BATcount(b) > (SAMPLE_SIZE<<3)) {
                BAT *s = BATsample(b, SAMPLE_SIZE);
-               BAT *u = BATkunique(BATmirror(s)); 
-               size_t uc = BATcount(u);
+               grp d,o;
+               BUN p, q;
+               BAT *histo;
+               hash_t r = 0;
 
+               o.map = ct_map->batCacheid;
+               o.histo = ct_histo->batCacheid;
+               if (CTderive(&d, &o, s) != GDK_SUCCEED) {
+                       BBPunfix(s->batCacheid);
+                       return GDK_FAIL;
+               }
                BBPunfix(s->batCacheid);
-               BBPunfix(u->batCacheid);
-               if (uc > 0) {
-                       size_t ratio = (((dbl)BATcount(b)/SAMPLE_SIZE) * uc);
-                       if (ratio > 0)
-                               return (1<<bits(*N * ratio)) - 1;
+               assert (d.histo);
+               histo = BATdescriptor(d.histo);
+               BATloop(histo, p, q) {
+                       /* + 4 for a average chain list of 4 */
+                       r += ((dbl)cnt/SAMPLE_SIZE)/(*(int*)BUNtloc(histo,p)+4);
                }
+               BBPunfix(d.histo);
+               grp_unfix(&d);
+               if (bits(r) > *N)
+                       return (1<<bits(r))-1;
        }
        /* default to */
        return (1<<*N) - 1;
@@ -735,7 +748,7 @@
        mapentry_t entry, *e;
        BAT *map;
        [EMAIL PROTECTED]
-       hash_t mask = derive_mask(ct_histo, b); 
+       hash_t mask = derive_mask(ct_map, ct_histo, b); 
        int custom_rng = BATcount(ct_histo); /* expected number of groups */
        hash_t custom_MASK = mask;
 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to