Changeset: 54adcef69551 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/54adcef69551
Modified Files:
sql/backends/monet5/dict.c
sql/storage/bat/bat_storage.c
sql/storage/sql_storage.h
Branch: default
Log Message:
add support for dicts using int offsets
diffs (truncated from 630 to 300 lines):
diff --git a/sql/backends/monet5/dict.c b/sql/backends/monet5/dict.c
--- a/sql/backends/monet5/dict.c
+++ b/sql/backends/monet5/dict.c
@@ -33,7 +33,6 @@ BATmaxminpos_bte(BAT *o, bte m)
bte minval = m<0?GDK_bte_min:0; /* Later once nils use a bitmask we can
include -128 in the range */
bte maxval = m<0?GDK_bte_max:m;
- assert(o->ttype == TYPE_bte);
o->tnil = m<0?true:false;
o->tnonil = m<=0?false:true;
bte *op = (bte*)Tloc(o, 0);
@@ -80,6 +79,33 @@ BATmaxminpos_sht(BAT *o, sht m)
o->tmaxpos = maxpos;
}
+static void
+BATmaxminpos_int(BAT *o, int m)
+{
+ BUN minpos = BUN_NONE, maxpos = BUN_NONE, p, q;
+ int minval = m<0?GDK_int_min:0; /* Later once nils use a bitmask we can
include -32768 in the range */
+ int maxval = m<0?GDK_int_max:m;
+
+ assert(o->ttype == TYPE_int);
+ o->tnil = m<0?true:false;
+ o->tnonil = m<=0?false:true;
+ int *op = (int*)Tloc(o, 0);
+ BATloop(o, p, q) {
+ if (op[p] == minval) {
+ minpos = p;
+ break;
+ }
+ }
+ BATloop(o, p, q) {
+ if (op[p] == maxval) {
+ maxpos = p;
+ break;
+ }
+ }
+ o->tminpos = minpos;
+ o->tmaxpos = maxpos;
+}
+
static str
DICTcompress_intern(BAT **O, BAT **U, BAT *b, bool ordered, bool persists,
bool smallest_type)
{
@@ -91,12 +117,12 @@ DICTcompress_intern(BAT **O, BAT **U, BA
BUN cnt = BATcount(u);
/* create hash on u */
- int tt = (cnt<256)?TYPE_bte:TYPE_sht;
+ int tt = (cnt<256)?TYPE_bte:(cnt<65536)?TYPE_sht:TYPE_int;
if (!smallest_type) {
BUN cnt = BATcount(b);
- tt = (cnt<256)?TYPE_bte:TYPE_sht;
+ tt = (cnt<256)?TYPE_bte:(cnt<65536)?TYPE_sht:TYPE_int;
}
- if (cnt >= 64*1024) {
+ if (cnt >= INT_MAX) {
bat_destroy(u);
throw(SQL, "dict.compress", SQLSTATE(3F000) "dict compress: too
many values");
}
@@ -153,7 +179,7 @@ DICTcompress_intern(BAT **O, BAT **U, BA
if (BATcount(u) > 0)
BATmaxminpos_bte(o, (bte) (BATcount(u)-1));
- } else {
+ } else if (tt == TYPE_sht) {
sht *op = (sht*)Tloc(o, 0);
bool havenil = false;
BATloop(b, p, q) {
@@ -172,6 +198,25 @@ DICTcompress_intern(BAT **O, BAT **U, BA
if (BATcount(u) > 0)
BATmaxminpos_sht(o, (sht) (BATcount(u)-1));
+ } else {
+ int *op = (int*)Tloc(o, 0);
+ bool havenil = false;
+ BATloop(b, p, q) {
+ BUN up = 0;
+ HASHloop(ui, ui.b->thash, up, BUNtail(bi, p)) {
+ op[p] = (int)up;
+ havenil |= is_int_nil(op[p]);
+ }
+ }
+ BATsetcount(o, BATcount(b));
+ o->tsorted = (u->tsorted && bi.sorted);
+ o->trevsorted = false;
+ o->tnil = havenil;
+ o->tnonil = !havenil;
+ o->tkey = bi.key;
+
+ if (BATcount(u) > 0)
+ BATmaxminpos_int(o, (int) (BATcount(u)-1));
}
bat_iterator_end(&bi);
*O = o;
@@ -339,8 +384,7 @@ DICTdecompress_(BAT *o, BAT *u, role_t r
}
}
}
- } else {
- assert(o->ttype == TYPE_sht);
+ } else if (o->ttype == TYPE_sht) {
unsigned short *op = Tloc(o, 0);
switch (ATOMbasetype(u->ttype)) {
@@ -365,6 +409,35 @@ DICTdecompress_(BAT *o, BAT *u, role_t r
}
}
}
+ } else if (o->ttype == TYPE_int) {
+ unsigned int *op = Tloc(o, 0);
+
+ switch (ATOMbasetype(u->ttype)) {
+ case TYPE_int:
+ decompress_loop(int);
+ break;
+ case TYPE_lng:
+ decompress_loop(lng);
+ break;
+#ifdef HAVE_HGE
+ case TYPE_hge:
+ decompress_loop(hge);
+ break;
+#endif
+ default:
+ BATloop(o, p, q) {
+ BUN up = op[p];
+ if (BUNappend(b, BUNtail(ui, up), false) !=
GDK_SUCCEED) {
+ bat_iterator_end(&oi);
+ bat_destroy(b);
+ return NULL;
+ }
+ }
+ }
+ } else {
+ bat_iterator_end(&oi);
+ bat_destroy(b);
+ return NULL;
}
bat_iterator_end(&oi);
return b;
@@ -440,6 +513,22 @@ convert_oid( BAT *o, int rt)
nil |= ((short)rp[p] == sht_nil);
}
}
+ } else if (rt == TYPE_int) {
+ unsigned short *rp = Tloc(b, 0);
+ if (oi.type == TYPE_void) {
+ BATloop(o, p, q) {
+ rp[p] = (unsigned short) (p+o->tseqbase);
+ brokenrange |= ((short)rp[p] < 0);
+ nil |= ((short)rp[p] == int_nil);
+ }
+ } else {
+ oid *op = Tloc(o, 0);
+ BATloop(o, p, q) {
+ rp[p] = (unsigned short) op[p];
+ brokenrange |= ((short)rp[p] < 0);
+ nil |= ((short)rp[p] == int_nil);
+ }
+ }
} else {
assert(0);
}
@@ -583,6 +672,17 @@ DICTrenumber_intern( BAT *o, BAT *lc, BA
BATsetcount(no, cnt);
BATnegateprops(no);
no->tkey = oi.key;
+ } else if (oi.type == TYPE_int) {
+ int *op = Tloc(no, 0);
+ oid *c = Tloc(rc, 0);
+ unsigned short *ip = (unsigned short *) oi.base;
+
+ for(BUN i = 0; i<cnt; i++) {
+ op[i] = (int) ((BUN)ip[i]==offcnt?offcnt:c[ip[i]]);
+ }
+ BATsetcount(no, cnt);
+ BATnegateprops(no);
+ no->tkey = oi.key;
} else {
assert(0);
}
@@ -741,6 +841,9 @@ DICTthetaselect(Client cntxt, MalBlkPtr
} else if (loi.type == TYPE_sht) {
sht val = (sht)p;
bn = BATthetaselect(lo, lc, &val, op);
+ } else if (loi.type == TYPE_int) {
+ int val = (int)p;
+ bn = BATthetaselect(lo, lc, &val, op);
} else
assert(0);
if (bn && (op[0] == '<' || op[0] == '>' ||
op[0] == '!') && (!lvi.nonil || lvi.nil)) { /* filter the NULL value out */
@@ -753,6 +856,9 @@ DICTthetaselect(Client cntxt, MalBlkPtr
} else if (loi.type ==
TYPE_sht) {
sht val = (sht)p;
nbn =
BATthetaselect(lo, bn, &val, "<>");
+ } else if (loi.type ==
TYPE_int) {
+ int val = (int)p;
+ nbn =
BATthetaselect(lo, bn, &val, "<>");
} else
assert(0);
BBPreclaim(bn);
@@ -773,6 +879,9 @@ DICTthetaselect(Client cntxt, MalBlkPtr
} else if (loi.type == TYPE_sht) {
sht val = (sht)p;
bn = BATthetaselect(lo, lc,
&val, op);
+ } else if (loi.type == TYPE_int) {
+ int val = (int)p;
+ bn = BATthetaselect(lo, lc,
&val, op);
} else
assert(0);
} else if (lc) { /* all rows pass, use input
candidate list */
@@ -891,6 +1000,10 @@ DICTselect(Client cntxt, MalBlkPtr mb, M
sht lpos = (sht)p;
sht hpos = (sht)q;
bn = BATselect(lo, lc, &lpos, &hpos, true, hi,
anti, false);
+ } else if (loi.type == TYPE_int) {
+ int lpos = (int)p;
+ int hpos = (int)q;
+ bn = BATselect(lo, lc, &lpos, &hpos, true, hi,
anti, false);
} else
assert(0);
} else {
@@ -925,16 +1038,43 @@ DICTselect(Client cntxt, MalBlkPtr mb, M
BAT *
-DICTenlarge(BAT *offsets, BUN cnt, BUN sz, role_t role)
+DICTenlarge(BAT *offsets, BUN cnt, BUN sz, int type, role_t role)
{
- BAT *n = COLnew(offsets->hseqbase, TYPE_sht, sz, role);
+ BAT *n = NULL;
+ if (type == TYPE_sht) {
+ if (offsets->ttype != TYPE_bte)
+ return NULL;
+ n = COLnew(offsets->hseqbase, TYPE_sht, sz, role);
- if (!n)
+ if (!n)
+ return NULL;
+ unsigned char *o = Tloc(offsets, 0);
+ unsigned short *no = Tloc(n, 0);
+ for(BUN i = 0; i<cnt; i++) {
+ no[i] = o[i];
+ }
+ } else if (type == TYPE_int) {
+ if (offsets->ttype != TYPE_bte && offsets->ttype != TYPE_sht)
+ return NULL;
+ n = COLnew(offsets->hseqbase, TYPE_int, sz, role);
+ if (!n)
+ return NULL;
+ if (offsets->ttype == TYPE_sht) {
+ unsigned char *o = Tloc(offsets, 0);
+ unsigned int *no = Tloc(n, 0);
+ for(BUN i = 0; i<cnt; i++) {
+ no[i] = o[i];
+ }
+ } else {
+ unsigned short *o = Tloc(offsets, 0);
+ unsigned int *no = Tloc(n, 0);
+ for(BUN i = 0; i<cnt; i++) {
+ no[i] = o[i];
+ }
+ }
+
+ } else {
return NULL;
- unsigned char *o = Tloc(offsets, 0);
- unsigned short *no = Tloc(n, 0);
- for(BUN i = 0; i<cnt; i++) {
- no[i] = o[i];
}
BATnegateprops(n);
n->tnil = offsets->tnil;
@@ -970,7 +1110,7 @@ DICTrenumber(Client cntxt, MalBlkPtr mb,
bat_destroy(m);
throw(SQL, "dict.renumber", SQLSTATE(HY013) MAL_MALLOC_FAIL);
}
- assert(o->ttype == TYPE_bte || o->ttype == TYPE_sht);
+ assert(o->ttype == TYPE_bte || o->ttype == TYPE_sht || o->ttype ==
TYPE_int);
bool havenil = false;
if (o->ttype == TYPE_bte) {
unsigned char *np = Tloc(n, 0);
@@ -980,7 +1120,7 @@ DICTrenumber(Client cntxt, MalBlkPtr mb,
np[i] = mp[op[i]];
havenil |= np[i] == 128;
}
- } else {
+ } else if (o->ttype == TYPE_sht) {
unsigned short *np = Tloc(n, 0);
unsigned short *op = Tloc(o, 0);
unsigned short *mp = Tloc(m, 0);
@@ -988,6 +1128,14 @@ DICTrenumber(Client cntxt, MalBlkPtr mb,
np[i] = mp[op[i]];
havenil |= np[i] == 32768;
}
+ } else { /* int case */
+ unsigned int *np = Tloc(n, 0);
+ unsigned int *op = Tloc(o, 0);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]