Changeset: 9a8bf76a4ca2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9a8bf76a4ca2
Modified Files:
        clients/Tests/exports.stable.out
        monetdb5/modules/mal/cluster.c
        monetdb5/modules/mal/cluster.h
        monetdb5/modules/mal/cluster.mal
Branch: int128
Log Message:

added int128 / "hge" support in monetdb5/modules/mal/cluster.*


diffs (truncated from 397 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1098,6 +1098,7 @@ str CLRycc(color *c, int *y, int *cr, in
 str CLS_create2_bte(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset, bit *order);
 str CLS_create2_dbl(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset, bit *order);
 str CLS_create2_flt(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset, bit *order);
+str CLS_create2_hge(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset, bit *order);
 str CLS_create2_int(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset, bit *order);
 str CLS_create2_lng(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset, bit *order);
 str CLS_create2_sht(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset, bit *order);
@@ -1105,6 +1106,7 @@ str CLS_create2_wrd(bat *rpsum, bat *rcm
 str CLS_create_bte(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset);
 str CLS_create_dbl(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset);
 str CLS_create_flt(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset);
+str CLS_create_hge(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset);
 str CLS_create_int(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset);
 str CLS_create_lng(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset);
 str CLS_create_sht(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits, 
unsigned int *offset);
diff --git a/monetdb5/modules/mal/cluster.c b/monetdb5/modules/mal/cluster.c
--- a/monetdb5/modules/mal/cluster.c
+++ b/monetdb5/modules/mal/cluster.c
@@ -166,6 +166,25 @@ CLUSTER_key_lng( BAT *map, BAT *b)
        }
 }
 
+#ifdef HAVE_HGE
+static void
+CLUSTER_key_hge( BAT *map, BAT *b)
+{
+       hge *bt, *be;
+       oid *o;
+
+       assert(BUNfirst(map) == 0);
+       assert(BUNfirst(b) == 0);
+       o = (oid*)Tloc(map, 0);
+       bt = (hge*)Tloc(b, 0);
+       be = bt + BATcount(b);
+       for ( ; bt < be; bt++){
+               BUN h = hash_hge(b->T->hash,bt);
+               *o++= h;
+       }
+}
+#endif
+
 static void
 CLUSTER_key_flt( BAT *map, BAT *b)
 {
@@ -300,6 +319,9 @@ CLUSTER_key( bat *M, bat *B){
                case TYPE_wrd: CLUSTER_key_wrd(map,b); break;
                case TYPE_int: CLUSTER_key_int(map,b); break;
                case TYPE_lng: CLUSTER_key_lng(map,b); break;
+#ifdef HAVE_HGE
+               case TYPE_hge: CLUSTER_key_hge(map,b); break;
+#endif
                case TYPE_flt: CLUSTER_key_flt(map,b); break;
                case TYPE_dbl: CLUSTER_key_dbl(map,b); break;
                case TYPE_str: CLUSTER_key_str(map,b); break;
@@ -464,6 +486,9 @@ CLUSTER_apply(bat *bid, BAT *b, BAT *cma
        case TYPE_wrd: CLUSTER_column_wrd(nb, b, cmap);break;
        case TYPE_int: CLUSTER_column_int(nb, b, cmap);break;
        case TYPE_lng: CLUSTER_column_lng(nb, b, cmap);break;
+#ifdef HAVE_HGE
+       case TYPE_hge: CLUSTER_column_hge(nb, b, cmap);break;
+#endif
        case TYPE_flt: CLUSTER_column_flt(nb, b, cmap);break;
        case TYPE_dbl: CLUSTER_column_dbl(nb, b, cmap);break;
 */
@@ -935,6 +960,88 @@ CLS_create_lng( bat *rpsum, bat *rcmap, 
        return MAL_SUCCEED;
 }
 
+#ifdef HAVE_HGE
+str
+CLS_create_hge( bat *rpsum, bat *rcmap, bat *B, unsigned int *Bits, unsigned 
int *offset)
+{
+       BAT *psum, *cmap, *b;
+       int i, mask = 0, off = *offset;
+       unsigned int bits = *Bits;
+       hge *bt, *be; 
+       wrd *cnt, *pos, sum, *m;
+
+       if (off < 0)
+               off = 0;
+       if (bits >= sizeof(int)*8)
+               throw(MAL, "cluster.new", TOO_MANY_BITS);
+
+       if ((bits) != 0)
+               bits--;
+       mask = (1<<bits) - 1;
+       if ((b = BATdescriptor(*B)) == NULL)
+               throw(MAL, "cluster.new", INTERNAL_BAT_ACCESS);
+
+       if ((psum = BATnew(TYPE_void, TYPE_wrd, mask+1)) == NULL) {
+               BBPunfix(*B);
+               throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+       }
+       BATsetcount(psum, mask+1);
+       BATseqbase(psum,0);
+       psum->tsorted= TRUE;
+       psum->trevsorted= FALSE;
+       psum->tdense= FALSE;
+       cnt = (wrd*)Tloc(psum, BUNfirst(psum));
+       for (i=0 ; i <= mask; i++)
+               cnt[i] = 0;
+
+       bt = (hge*)Tloc(b, BUNfirst(b));
+       be = bt + BATcount(b);
+       /* First make a histogram */
+       for ( ; bt < be; bt++) {
+               int h = (((int)(*bt)) >> off) & mask;
+               cnt[h]++;
+       }
+
+       /* convert histogram into prefix sum */
+       pos = (wrd*)GDKzalloc(sizeof(wrd) * (mask+1)); 
+       for (sum = 0, i=0 ; i <= mask; i++) {
+               wrd psum = sum;
+
+               sum += cnt[i];
+               pos[i] = cnt[i] = psum;
+       }
+       
+       /* time to create the cluster map */
+       if ((cmap = BATnew(TYPE_void, TYPE_wrd, BATcount(b))) == NULL) {
+               BBPunfix(*B);
+               BBPunfix(psum->batCacheid);
+               GDKfree(pos);
+               throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+       }
+       BATsetcount(cmap, BATcount(b));
+       BATseqbase(cmap, b->H->seq);
+       cmap->tsorted= FALSE;
+       cmap->trevsorted= FALSE;
+       cmap->tdense= FALSE;
+       m = (wrd*)Tloc(cmap, BUNfirst(cmap));
+
+       bt = (hge*)Tloc(b, BUNfirst(b));
+       be = bt + BATcount(b);
+       for ( ; bt < be; ) {
+               int h = (((int)(*bt++)) >> off) & mask;
+               *m++ = pos[h]++;
+       }
+
+       GDKfree(pos);
+       BBPunfix(*B);
+       BBPkeepref(*rpsum = psum->batCacheid);
+       BBPkeepref(*rcmap = cmap->batCacheid);
+       psum = BATsetaccess(psum, BAT_READ);
+       cmap = BATsetaccess(cmap, BAT_READ);
+       return MAL_SUCCEED;
+}
+#endif
+
 str
 CLS_create_dbl( bat *rpsum, bat *rcmap, bat *B, unsigned int *Bits, unsigned 
int *offset)
 {
@@ -1699,6 +1806,129 @@ CLS_create2_lng( bat *rpsum, bat *rcmap,
        return MAL_SUCCEED;
 }
 
+#ifdef HAVE_HGE
+str
+CLS_create2_hge( bat *rpsum, bat *rcmap, bat *B, unsigned int *Bits, unsigned 
int *offset, bit *order)
+{
+       BAT *psum, *cmap, *b;
+       int i, mask = 0, off = *offset;
+       unsigned int bits = *Bits;
+       hge *bt, *be, *bs; 
+       wrd *cnt, sum;
+
+       if (off < 0)
+               off = 0;
+       if (bits >= sizeof(int)*8)
+               throw(MAL, "cluster.new", TOO_MANY_BITS);
+
+       if ((bits) != 0)
+               bits--;
+       mask = (1<<bits) - 1;
+       if ((b = BATdescriptor(*B)) == NULL)
+               throw(MAL, "cluster.new", INTERNAL_BAT_ACCESS);
+
+       if ((psum = BATnew(TYPE_void, TYPE_wrd, mask+1)) == NULL) {
+               BBPunfix(*B);
+               throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+       }
+       BATsetcount(psum, mask+1);
+       BATseqbase(psum,0);
+       psum->tsorted= TRUE;
+       psum->trevsorted= FALSE;
+       psum->tdense= FALSE;
+       cnt = (wrd*)Tloc(psum, BUNfirst(psum));
+       for (i=0 ; i <= mask; i++)
+               cnt[i] = 0;
+
+       bs = bt = (hge*)Tloc(b, BUNfirst(b));
+       be = bt + BATcount(b);
+
+       /* Make a histogram and fill the cluster map */
+       if (b->tsorted) {
+               bte *mb, *m, h;
+
+               /* time to create the cluster map */
+               if ((cmap = BATnew((!*order)?TYPE_void:TYPE_oid, TYPE_bte, 
BATcount(b))) == NULL) {
+                       BBPunfix(*B);
+                       BBPunfix(psum->batCacheid);
+                       throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+               }
+               BATseqbase(cmap, b->H->seq);
+               cmap->tdense = FALSE;
+               mb = m = (bte*)Tloc(cmap, BUNfirst(cmap));
+
+               if (!*order) {
+                       cmap->tsorted = FALSE;
+                       cmap->trevsorted = FALSE;
+                       for ( ; bt < be; bt++) {
+                               int h = (((int)(*bt)) >> off) & mask;
+                               *m++ = h;
+                               cnt[h]++;
+                       }
+               } else { /* try an optimized distribution, 1/Nth in each part */
+                       oid *o, base;
+                       lng sz = 0, parts = mask+1, psz = BATcount(b)/parts;
+                       hge prev = *bt - 1;
+                       h = -1;
+
+                       cmap->hdense= FALSE;
+                       base = b->hseqbase;
+                       o = (oid*)Hloc(cmap, BUNfirst(cmap));
+                       for ( ; bt < be; bt++, sz++) {
+                               if (prev != *bt && sz >= (h+1)*psz && h < 
(parts-1)) {
+                                       h++;
+                                       assert(base + bt - bs >= 0);
+                                       assert(base + bt - bs <= (ptrdiff_t) 
GDK_oid_max);
+                                       *o++ = (oid) (base + bt - bs);
+                                       *m++ = h;
+                               }
+                               cnt[h]++;
+                               prev = *bt;
+                       }
+               }
+               assert(m - mb >= 0);
+               assert((lng) (m - mb) <= (lng) BUN_MAX);
+               BATsetcount(cmap, (BUN) (m - mb));
+       } else {
+               bte *m;
+
+               /* time to create the cluster map */
+               if ((cmap = BATnew(TYPE_void, TYPE_bte, BATcount(b))) == NULL) {
+                       BBPunfix(*B);
+                       BBPunfix(psum->batCacheid);
+                       throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+               }
+               BATsetcount(cmap, BATcount(b));
+               BATseqbase(cmap, b->H->seq);
+               cmap->tsorted = FALSE;
+               cmap->trevsorted = FALSE;
+               cmap->tdense = FALSE;
+               m = (bte*)Tloc(cmap, BUNfirst(cmap));
+
+               for ( ; bt < be; bt++) {
+                       int h = (((int)(*bt)) >> off) & mask;
+                       cnt[h]++;
+                       *m++ = h;
+               }
+       }
+
+       /* convert histogram into prefix sum */
+       for (sum = 0, i=0 ; i <= mask; i++) {
+               wrd psum = sum;
+
+               sum += cnt[i];
+               cnt[i] = psum;
+       }
+       
+       BBPunfix(*B);
+       BBPkeepref(*rpsum = psum->batCacheid);
+       BBPkeepref(*rcmap = cmap->batCacheid);
+       psum = BATsetaccess(psum, BAT_READ);
+       cmap = BATsetaccess(cmap, BAT_READ);
+       return MAL_SUCCEED;
+}
+#endif
+
 str
 CLS_create2_flt( bat *rpsum, bat *rcmap, bat *B, unsigned int *Bits, unsigned 
int *offset, bit *order)
 {
@@ -2017,6 +2247,27 @@ CLS_map_lng(BAT *rb, BAT *cmap, BAT *b)
        return MAL_SUCCEED;
 }
 
+#ifdef HAVE_HGE
+static str  
+CLS_map_hge(BAT *rb, BAT *cmap, BAT *b)
+{
+       wrd *m;
+       hge *r, *bt, *be;
+
+       r = (hge*)Tloc(rb, BUNfirst(rb));
+       m = (wrd*)Tloc(cmap, BUNfirst(cmap));
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to