Changeset: 9a8bf76a4ca2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9a8bf76a4ca2
Modified Files:
clients/Tests/exports.stable.out
monetdb5/modules/mal/cluster.c
monetdb5/modules/mal/cluster.h
monetdb5/modules/mal/cluster.mal
Branch: int128
Log Message:
added int128 / "hge" support in monetdb5/modules/mal/cluster.*
diffs (truncated from 397 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1098,6 +1098,7 @@ str CLRycc(color *c, int *y, int *cr, in
str CLS_create2_bte(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset, bit *order);
str CLS_create2_dbl(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset, bit *order);
str CLS_create2_flt(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset, bit *order);
+str CLS_create2_hge(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset, bit *order);
str CLS_create2_int(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset, bit *order);
str CLS_create2_lng(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset, bit *order);
str CLS_create2_sht(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset, bit *order);
@@ -1105,6 +1106,7 @@ str CLS_create2_wrd(bat *rpsum, bat *rcm
str CLS_create_bte(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset);
str CLS_create_dbl(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset);
str CLS_create_flt(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset);
+str CLS_create_hge(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset);
str CLS_create_int(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset);
str CLS_create_lng(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset);
str CLS_create_sht(bat *rpsum, bat *rcmap, bat *b, unsigned int *bits,
unsigned int *offset);
diff --git a/monetdb5/modules/mal/cluster.c b/monetdb5/modules/mal/cluster.c
--- a/monetdb5/modules/mal/cluster.c
+++ b/monetdb5/modules/mal/cluster.c
@@ -166,6 +166,25 @@ CLUSTER_key_lng( BAT *map, BAT *b)
}
}
+#ifdef HAVE_HGE
+static void
+CLUSTER_key_hge( BAT *map, BAT *b)
+{
+ hge *bt, *be;
+ oid *o;
+
+ assert(BUNfirst(map) == 0);
+ assert(BUNfirst(b) == 0);
+ o = (oid*)Tloc(map, 0);
+ bt = (hge*)Tloc(b, 0);
+ be = bt + BATcount(b);
+ for ( ; bt < be; bt++){
+ BUN h = hash_hge(b->T->hash,bt);
+ *o++= h;
+ }
+}
+#endif
+
static void
CLUSTER_key_flt( BAT *map, BAT *b)
{
@@ -300,6 +319,9 @@ CLUSTER_key( bat *M, bat *B){
case TYPE_wrd: CLUSTER_key_wrd(map,b); break;
case TYPE_int: CLUSTER_key_int(map,b); break;
case TYPE_lng: CLUSTER_key_lng(map,b); break;
+#ifdef HAVE_HGE
+ case TYPE_hge: CLUSTER_key_hge(map,b); break;
+#endif
case TYPE_flt: CLUSTER_key_flt(map,b); break;
case TYPE_dbl: CLUSTER_key_dbl(map,b); break;
case TYPE_str: CLUSTER_key_str(map,b); break;
@@ -464,6 +486,9 @@ CLUSTER_apply(bat *bid, BAT *b, BAT *cma
case TYPE_wrd: CLUSTER_column_wrd(nb, b, cmap);break;
case TYPE_int: CLUSTER_column_int(nb, b, cmap);break;
case TYPE_lng: CLUSTER_column_lng(nb, b, cmap);break;
+#ifdef HAVE_HGE
+ case TYPE_hge: CLUSTER_column_hge(nb, b, cmap);break;
+#endif
case TYPE_flt: CLUSTER_column_flt(nb, b, cmap);break;
case TYPE_dbl: CLUSTER_column_dbl(nb, b, cmap);break;
*/
@@ -935,6 +960,88 @@ CLS_create_lng( bat *rpsum, bat *rcmap,
return MAL_SUCCEED;
}
+#ifdef HAVE_HGE
+str
+CLS_create_hge( bat *rpsum, bat *rcmap, bat *B, unsigned int *Bits, unsigned
int *offset)
+{
+ BAT *psum, *cmap, *b;
+ int i, mask = 0, off = *offset;
+ unsigned int bits = *Bits;
+ hge *bt, *be;
+ wrd *cnt, *pos, sum, *m;
+
+ if (off < 0)
+ off = 0;
+ if (bits >= sizeof(int)*8)
+ throw(MAL, "cluster.new", TOO_MANY_BITS);
+
+ if ((bits) != 0)
+ bits--;
+ mask = (1<<bits) - 1;
+ if ((b = BATdescriptor(*B)) == NULL)
+ throw(MAL, "cluster.new", INTERNAL_BAT_ACCESS);
+
+ if ((psum = BATnew(TYPE_void, TYPE_wrd, mask+1)) == NULL) {
+ BBPunfix(*B);
+ throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+ }
+ BATsetcount(psum, mask+1);
+ BATseqbase(psum,0);
+ psum->tsorted= TRUE;
+ psum->trevsorted= FALSE;
+ psum->tdense= FALSE;
+ cnt = (wrd*)Tloc(psum, BUNfirst(psum));
+ for (i=0 ; i <= mask; i++)
+ cnt[i] = 0;
+
+ bt = (hge*)Tloc(b, BUNfirst(b));
+ be = bt + BATcount(b);
+ /* First make a histogram */
+ for ( ; bt < be; bt++) {
+ int h = (((int)(*bt)) >> off) & mask;
+ cnt[h]++;
+ }
+
+ /* convert histogram into prefix sum */
+ pos = (wrd*)GDKzalloc(sizeof(wrd) * (mask+1));
+ for (sum = 0, i=0 ; i <= mask; i++) {
+ wrd psum = sum;
+
+ sum += cnt[i];
+ pos[i] = cnt[i] = psum;
+ }
+
+ /* time to create the cluster map */
+ if ((cmap = BATnew(TYPE_void, TYPE_wrd, BATcount(b))) == NULL) {
+ BBPunfix(*B);
+ BBPunfix(psum->batCacheid);
+ GDKfree(pos);
+ throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+ }
+ BATsetcount(cmap, BATcount(b));
+ BATseqbase(cmap, b->H->seq);
+ cmap->tsorted= FALSE;
+ cmap->trevsorted= FALSE;
+ cmap->tdense= FALSE;
+ m = (wrd*)Tloc(cmap, BUNfirst(cmap));
+
+ bt = (hge*)Tloc(b, BUNfirst(b));
+ be = bt + BATcount(b);
+ for ( ; bt < be; ) {
+ int h = (((int)(*bt++)) >> off) & mask;
+ *m++ = pos[h]++;
+ }
+
+ GDKfree(pos);
+ BBPunfix(*B);
+ BBPkeepref(*rpsum = psum->batCacheid);
+ BBPkeepref(*rcmap = cmap->batCacheid);
+ psum = BATsetaccess(psum, BAT_READ);
+ cmap = BATsetaccess(cmap, BAT_READ);
+ return MAL_SUCCEED;
+}
+#endif
+
str
CLS_create_dbl( bat *rpsum, bat *rcmap, bat *B, unsigned int *Bits, unsigned
int *offset)
{
@@ -1699,6 +1806,129 @@ CLS_create2_lng( bat *rpsum, bat *rcmap,
return MAL_SUCCEED;
}
+#ifdef HAVE_HGE
+str
+CLS_create2_hge( bat *rpsum, bat *rcmap, bat *B, unsigned int *Bits, unsigned
int *offset, bit *order)
+{
+ BAT *psum, *cmap, *b;
+ int i, mask = 0, off = *offset;
+ unsigned int bits = *Bits;
+ hge *bt, *be, *bs;
+ wrd *cnt, sum;
+
+ if (off < 0)
+ off = 0;
+ if (bits >= sizeof(int)*8)
+ throw(MAL, "cluster.new", TOO_MANY_BITS);
+
+ if ((bits) != 0)
+ bits--;
+ mask = (1<<bits) - 1;
+ if ((b = BATdescriptor(*B)) == NULL)
+ throw(MAL, "cluster.new", INTERNAL_BAT_ACCESS);
+
+ if ((psum = BATnew(TYPE_void, TYPE_wrd, mask+1)) == NULL) {
+ BBPunfix(*B);
+ throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+ }
+ BATsetcount(psum, mask+1);
+ BATseqbase(psum,0);
+ psum->tsorted= TRUE;
+ psum->trevsorted= FALSE;
+ psum->tdense= FALSE;
+ cnt = (wrd*)Tloc(psum, BUNfirst(psum));
+ for (i=0 ; i <= mask; i++)
+ cnt[i] = 0;
+
+ bs = bt = (hge*)Tloc(b, BUNfirst(b));
+ be = bt + BATcount(b);
+
+ /* Make a histogram and fill the cluster map */
+ if (b->tsorted) {
+ bte *mb, *m, h;
+
+ /* time to create the cluster map */
+ if ((cmap = BATnew((!*order)?TYPE_void:TYPE_oid, TYPE_bte,
BATcount(b))) == NULL) {
+ BBPunfix(*B);
+ BBPunfix(psum->batCacheid);
+ throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+ }
+ BATseqbase(cmap, b->H->seq);
+ cmap->tdense = FALSE;
+ mb = m = (bte*)Tloc(cmap, BUNfirst(cmap));
+
+ if (!*order) {
+ cmap->tsorted = FALSE;
+ cmap->trevsorted = FALSE;
+ for ( ; bt < be; bt++) {
+ int h = (((int)(*bt)) >> off) & mask;
+ *m++ = h;
+ cnt[h]++;
+ }
+ } else { /* try an optimized distribution, 1/Nth in each part */
+ oid *o, base;
+ lng sz = 0, parts = mask+1, psz = BATcount(b)/parts;
+ hge prev = *bt - 1;
+ h = -1;
+
+ cmap->hdense= FALSE;
+ base = b->hseqbase;
+ o = (oid*)Hloc(cmap, BUNfirst(cmap));
+ for ( ; bt < be; bt++, sz++) {
+ if (prev != *bt && sz >= (h+1)*psz && h <
(parts-1)) {
+ h++;
+ assert(base + bt - bs >= 0);
+ assert(base + bt - bs <= (ptrdiff_t)
GDK_oid_max);
+ *o++ = (oid) (base + bt - bs);
+ *m++ = h;
+ }
+ cnt[h]++;
+ prev = *bt;
+ }
+ }
+ assert(m - mb >= 0);
+ assert((lng) (m - mb) <= (lng) BUN_MAX);
+ BATsetcount(cmap, (BUN) (m - mb));
+ } else {
+ bte *m;
+
+ /* time to create the cluster map */
+ if ((cmap = BATnew(TYPE_void, TYPE_bte, BATcount(b))) == NULL) {
+ BBPunfix(*B);
+ BBPunfix(psum->batCacheid);
+ throw(MAL, "cluster.new", MAL_MALLOC_FAIL);
+ }
+ BATsetcount(cmap, BATcount(b));
+ BATseqbase(cmap, b->H->seq);
+ cmap->tsorted = FALSE;
+ cmap->trevsorted = FALSE;
+ cmap->tdense = FALSE;
+ m = (bte*)Tloc(cmap, BUNfirst(cmap));
+
+ for ( ; bt < be; bt++) {
+ int h = (((int)(*bt)) >> off) & mask;
+ cnt[h]++;
+ *m++ = h;
+ }
+ }
+
+ /* convert histogram into prefix sum */
+ for (sum = 0, i=0 ; i <= mask; i++) {
+ wrd psum = sum;
+
+ sum += cnt[i];
+ cnt[i] = psum;
+ }
+
+ BBPunfix(*B);
+ BBPkeepref(*rpsum = psum->batCacheid);
+ BBPkeepref(*rcmap = cmap->batCacheid);
+ psum = BATsetaccess(psum, BAT_READ);
+ cmap = BATsetaccess(cmap, BAT_READ);
+ return MAL_SUCCEED;
+}
+#endif
+
str
CLS_create2_flt( bat *rpsum, bat *rcmap, bat *B, unsigned int *Bits, unsigned
int *offset, bit *order)
{
@@ -2017,6 +2247,27 @@ CLS_map_lng(BAT *rb, BAT *cmap, BAT *b)
return MAL_SUCCEED;
}
+#ifdef HAVE_HGE
+static str
+CLS_map_hge(BAT *rb, BAT *cmap, BAT *b)
+{
+ wrd *m;
+ hge *r, *bt, *be;
+
+ r = (hge*)Tloc(rb, BUNfirst(rb));
+ m = (wrd*)Tloc(cmap, BUNfirst(cmap));
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list