Update of /cvsroot/monetdb/MonetDB5/src/modules/kernel
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv18951/src/modules/kernel

Modified Files:
      Tag: MonetDB_5-2
        group.mx 
Log Message:
made a more advanced version of the sample trick. Was needed as the old
solution could over estimate (leading to failed hash table creation)


Index: group.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/modules/kernel/group.mx,v
retrieving revision 1.93.2.1
retrieving revision 1.93.2.2
diff -u -d -r1.93.2.1 -r1.93.2.2
--- group.mx    22 Oct 2007 19:55:59 -0000      1.93.2.1
+++ group.mx    23 Oct 2007 17:28:44 -0000      1.93.2.2
@@ -649,22 +649,30 @@
 @c
 #define SAMPLE_SIZE    1024
 hash_t
-derive_mask( BAT *ct_histo, BAT *b)
+derive_mask( BAT *ct_map, BAT *ct_histo, BAT *b)
 {
+       size_t cnt = BATcount(b);
        int n = bits(BATcount(ct_histo)), *N = &n;
 
        if (BATcount(b) > (SAMPLE_SIZE<<3)) {
                BAT *s = BATsample(b, SAMPLE_SIZE);
-               BAT *u = BATkunique(BATmirror(s)); 
-               size_t uc = BATcount(u);
+               BUN p, q;
+               BAT *histo, *map;
+               hash_t r = 0;
 
+               if (CTderive(&map, &histo, ct_histo, ct_map, s) != GDK_SUCCEED){
+                       BBPunfix(s->batCacheid);
+                       return GDK_FAIL;
+               }
                BBPunfix(s->batCacheid);
-               BBPunfix(u->batCacheid);
-               if (uc > 0) {
-                       size_t ratio = (((dbl)BATcount(b)/SAMPLE_SIZE) * uc);
-                       if (ratio > 0)
-                               return (1<<bits(*N * ratio)) - 1;
+               BATloop(histo, p, q) {
+                       /* + 4 for a average chain list of 4 */
+                       r += ((dbl)cnt/SAMPLE_SIZE)/(*(int*)BUNtloc(histo,p)+4);
                }
+               BBPunfix(histo->batCacheid);
+               BBPunfix(map->batCacheid);
+               if (bits(r) > *N)
+                       return (1<<bits(r))-1;
        }
        /* default to */
        return (1<<*N) - 1;
@@ -681,7 +689,7 @@
        mapentry_t entry, *e;
        BAT *map;
        [EMAIL PROTECTED]
-       hash_t mask = derive_mask(ct_histo, b); 
+       hash_t mask = derive_mask(ct_map, ct_histo, b); 
        int custom_rng = BATcount(ct_histo); /* expected number of groups */
        hash_t custom_MASK = mask;
 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to