Update of /cvsroot/monetdb/MonetDB5/src/modules/kernel
In directory sc8-pr-cvs16.sourceforge.net:/tmp/cvs-serv29764/src/modules/kernel

Modified Files:
        group.mx 
Log Message:
added an optimized group for ordered data.


Index: group.mx
===================================================================
RCS file: /cvsroot/monetdb/MonetDB5/src/modules/kernel/group.mx,v
retrieving revision 1.106
retrieving revision 1.107
diff -u -d -r1.106 -r1.107
--- group.mx    8 Feb 2008 22:36:33 -0000       1.106
+++ group.mx    13 Feb 2008 09:10:17 -0000      1.107
@@ -244,7 +244,6 @@
 grp_new(BAT *b, BAT *h)
 {
        if (h) {
-               if (!(h->batDirty&2)) h = BATsetaccess(h, BAT_READ); 
                BATkey(h, TRUE);
                h->tsorted = 0;
                if ((h->hsorted = BAThordered(b)) & 1) {
@@ -255,6 +254,7 @@
                        BBPreclaim(h);
                        return GDK_FAIL;
                }
+               if (!(h->batDirty&2)) h = BATsetaccess(h, BAT_READ); 
                BBPkeepref(h->batCacheid);
        } else {
                assert(h);
@@ -492,6 +492,7 @@
                GDKfree(hash);
        return NULL;
 }
+
 @c
 static int
 tailtype(BAT *b, int str_trick)
@@ -1244,6 +1245,76 @@
        return MAL_SUCCEED;
 }
 
[EMAIL PROTECTED] group_ordered
+BAT *
[EMAIL PROTECTED](BAT *b, BAT **res) 
+{
+       BAT *bn = BATnew(TYPE_void, TYPE_oid, BATcount(b));
+       BAT *histo = BATnew(TYPE_oid, TYPE_int, 1024); 
+       @1 *t = (@1*)Tloc(b, BUNfirst(b)), *e = t + BATcount(b), cur;
+       int cnt = 0;
+       oid *rt = (oid*)Tloc(bn, BUNfirst(bn)), curid;
+
+       if (bn == NULL || histo == NULL) 
+               return NULL;
+
+       bn->hseqbase = b->hseqbase;
+       cur = *t;
+       if (b->htype == TYPE_void) {
+               oid h = b->hseqbase;
+
+               curid = h;
+               for(; t < e; t++, h++) {
+                       if (*t != cur) {
+                               BUNins(histo, &curid, &cnt, FALSE);
+                               cur = *t;
+                               curid = h;
+                               cnt = 0;
+                       }
+                       cnt++;
+                       *rt++ = curid;
+               }
+       } else { /* TYPE_oid */
+               oid *h = (oid*)Hloc(b, BUNfirst(b));
+       
+               curid = *h;
+               for(; t < e; t++, h++) {
+                       if (*t != cur) {
+                               BUNins(histo, &curid, &cnt, FALSE);
+                               cur = *t;
+                               curid = *h;
+                               cnt = 0;
+                       }
+                       cnt++;
+                       *rt++ = curid;
+               }
+       }
+       BATsetcount(bn, BATcount(b));
+       if (BATcount(b))
+               BUNins(histo, &curid, &cnt, FALSE);
+       if (b->htype != bn->htype) {
+               BAT *r = VIEWcreate(b,bn);
+
+               BBPreleaseref(bn->batCacheid);
+               bn = r;
+       }
+       if (BAThordered(b)) {
+               bn->tsorted = b->tsorted;
+               histo->hsorted = b->hsorted;
+               histo->tsorted = b->tsorted;
+               BATkey(histo, TRUE);
+       }
+       if (b->hkey)
+               BATkey(bn, TRUE);
+       *res = bn;
+       return histo;
+}
[EMAIL PROTECTED]
+@:group_ordered(bte)@
+@:group_ordered(sht)@
+@:group_ordered(int)@
+@:group_ordered(lng)@
+
 group_export str GRPgroup_custom(int *rethisto, int *retbid, int *bid, int *N, 
int *rng);
 group_export str GRPgroup(int *rethisto, int *retbid, int *bid);
 str
@@ -1254,6 +1325,33 @@
        if ((b = BATdescriptor(*bid)) == NULL) {
                throw(MAL, "group.group", "Cannot access descriptor");
        }
+       if (BATcount(b) > 1 &&
+           BATtordered(b)&1 && 
+           ATOMtype(b->htype) == TYPE_oid && 
+           b->ttype <= TYPE_str) {
+               switch(tailtype(b,TRUE)) {
+               case TYPE_bte:
+                       histo = CTgroup_bte_ordered(b, &bn);
+                       break;
+               case TYPE_sht:
+                       histo = CTgroup_sht_ordered(b, &bn);
+                       break;
+               case TYPE_int:
+                       histo = CTgroup_int_ordered(b, &bn);
+                       break;
+               case TYPE_lng:
+                       histo = CTgroup_lng_ordered(b, &bn);
+                       break;
+               default:
+                       throw(MAL, "group.group", "Impossible type");
+               }
+               if (!histo)
+                       throw(MAL, "group.group", "ordered group failed");
+               BBPkeepref(*rethisto = histo->batCacheid);
+               BBPkeepref(*retbid = bn->batCacheid);
+               BBPreleaseref(b->batCacheid);
+               return MAL_SUCCEED;
+       }
        if (BATcount(b) > 1024*1024 &&
            (ATOMstorage(b->ttype) == TYPE_int || 
             ATOMstorage(b->ttype) == TYPE_lng)) { 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Monetdb-checkins mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-checkins

Reply via email to