Changeset: bb0c354be880 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/bb0c354be880
Modified Files:
gdk/gdk_strimps.c
Branch: default
Log Message:
Various fixes to strimps code.
Added a few const qualifiers to indicate read-only access.
Added a few inline specifiers to small static functions.
Cast input to tolower to unsigned char.
Removed some superfluous (unused or unnecessary) members and variables.
Fixes a shift with a shift value that was not within range.
diffs (196 lines):
diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -114,8 +114,6 @@ typedef struct {
typedef struct {
uint64_t cnt;
- uint64_t mask;
- size_t idx;
} PairHistogramElem;
@@ -128,12 +126,13 @@ typedef struct {
#undef UTF8STRIMPS /* Not using utf8 for now */
#ifdef UTF8STRIMPS
static bool
-pair_equal(CharPair *p1, CharPair *p2) {
+pair_equal(const CharPair *p1, const CharPair *p2)
+{
if(p1->psize != p2->psize)
return false;
for(size_t i = 0; i < p1->psize; i++)
- if (*(p1->pbytes + i) != *(p2->pbytes + i))
+ if (p1->pbytes[i] != p2->pbytes[i])
return false;
return true;
@@ -154,19 +153,20 @@ pair_equal(CharPair *p1, CharPair *p2) {
#define pairToIndex(b1, b2) (size_t)(((uint16_t)b2)<<8 | ((uint16_t)b1))
inline static size_t
-bytes2histindex(uint8_t *bytes, uint8_t psize) {
+bytes2histindex(uint8_t *bytes, uint8_t psize)
+{
(void)psize;
return pairToIndex(bytes[0], bytes[1]);
}
inline static bool
-pair_at(PairIterator *pi, CharPair *p)
+pair_at(const PairIterator *pi, CharPair *p)
{
if (pi->pos >= pi->lim - 1)
return false;
- p->pbytes[0] = (uint8_t)tolower(*(pi->s + pi->pos));
- p->pbytes[1] = (uint8_t)tolower(*(pi->s + pi->pos + 1));
+ p->pbytes[0] = (uint8_t)tolower((unsigned char) pi->s[pi->pos]);
+ p->pbytes[1] = (uint8_t)tolower((unsigned char) pi->s[pi->pos + 1]);
p->psize = 2;
p->idx = pairToIndex(p->pbytes[0], p->pbytes[1]);
@@ -185,19 +185,19 @@ next_pair(PairIterator *pi)
/* Returns true if the specified char is ignored.
*/
inline static bool
-ignored(CharPair *p, uint8_t elm)
+ignored(const CharPair *p, uint8_t elm)
{
assert(elm == 0 || elm == 1);
return isIgnored(p->pbytes[elm]);
}
-static strimp_masks_t
-STRMPget_mask(Strimps *r, uint64_t idx)
+inline static strimp_masks_t
+STRMPget_mask(const Strimps *r, uint64_t idx)
{
return r->masks[idx];
}
-static void
+inline static void
STRMPset_mask(Strimps *r, uint64_t idx, strimp_masks_t val)
{
r->masks[idx] = val;
@@ -208,10 +208,10 @@ STRMPset_mask(Strimps *r, uint64_t idx,
* corresponding to the index of the pair in the strimp header, or is 0
* if the pair does not occur.
*/
-static uint64_t
-STRMPpairLookup(Strimps *s, CharPair *p)
+inline static uint64_t
+STRMPpairLookup(const Strimps *s, const CharPair *p)
{
- return STRMPget_mask(s, pairToIndex(p->pbytes[0], p->pbytes[1]));
+ return STRMPget_mask(s, p->idx);
}
@@ -255,7 +255,7 @@ STRMPmakebitstring(const char *s, Strimp
static void
-STRMPchoosePairs(PairHistogramElem *hist, size_t hist_size, CharPair *cp)
+STRMPchoosePairs(const PairHistogramElem *hist, size_t hist_size, CharPair *cp)
{
lng t0 = 0;
size_t i;
@@ -277,10 +277,10 @@ STRMPchoosePairs(PairHistogramElem *hist
}
}
- for(i = 0; i < STRIMP_HEADER_SIZE; i++) {
- cp[i].pbytes[1] = (uint8_t)(hist[indices[i]].idx & 0xFF);
- cp[i].pbytes[0] = (uint8_t)((hist[indices[i]].idx >> 8) & 0xFF);
- cp[i].idx = hist[indices[i]].idx;
+ for(i = 0; i < STRIMP_PAIRS; i++) {
+ cp[i].pbytes[1] = (uint8_t)(indices[i] & 0xFF);
+ cp[i].pbytes[0] = (uint8_t)((indices[i] >> 8) & 0xFF);
+ cp[i].idx = indices[i];
cp[i].psize = 2;
cp[i].mask = ((uint64_t)0x1) << (STRIMP_PAIRS - i - 1);
}
@@ -304,8 +304,8 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai
oid x;
size_t hlen;
PairHistogramElem *hist;
- PairIterator pi, *pip;
- CharPair cp, *cpp;
+ PairIterator pi;
+ CharPair cp;
struct canditer ci;
size_t values = 0;
bool res;
@@ -319,20 +319,12 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai
}
hlen = STRIMP_HISTSIZE;
- if ((hist = (PairHistogramElem
*)GDKmalloc(hlen*sizeof(PairHistogramElem))) == NULL) {
+ if ((hist = (PairHistogramElem
*)GDKzalloc(hlen*sizeof(PairHistogramElem))) == NULL) {
return false;
}
- for(hidx = 0; hidx < hlen; hidx++) {
- hist[hidx].cnt = 0;
- hist[hidx].mask = 0;
- hist[hidx].idx = hidx;
- }
-
// Create Histogram
bi = bat_iterator(b);
- pip = π
- cpp = &cp;
for (i = 0; i < ci.ncand; i++) {
x = canditer_next(&ci) - b->hseqbase;
const char *cs = BUNtvar(bi, x);
@@ -343,14 +335,14 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai
if (pi.lim < 2) {
continue;
}
- while (pair_at(pip, cpp)) {
- if(ignored(cpp, 1)) {
+ while (pair_at(&pi, &cp)) {
+ if(ignored(&cp, 1)) {
/* Skip this AND the next pair
* if the second char of the
* pair is ignored.
*/
- next_pair(pip);
- } else if (ignored(cpp, 0)) {
+ next_pair(&pi);
+ } else if (ignored(&cp, 0)) {
/* Skip this pair if the first
* char is ignored. This should
* only happen at the beginnig
@@ -361,8 +353,8 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai
;
} else {
- /* hidx = histogram_index(hist, hlen,
cpp); */
- hidx = cpp->idx;
+ /* hidx = histogram_index(hist, hlen,
&cp); */
+ hidx = cp.idx;
#ifndef UTF8STRINGS
assert(hidx < hlen);
#else
@@ -375,7 +367,7 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai
values++;
hist[hidx].cnt++;
}
- next_pair(pip);
+ next_pair(&pi);
}
}
}
@@ -754,7 +746,7 @@ STRMPcreateStrimpHeap(BAT *b, BAT *s)
return NULL;
}
- for (size_t i = 0; i < STRIMP_HEADER_SIZE - 1; i++) {
+ for (size_t i = 0; i < STRIMP_PAIRS; i++) {
r->masks[hpairs[i].idx] = hpairs[i].mask;
}
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]