Changeset: 5b54f94087b3 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5b54f94087b3
Modified Files:
gdk/gdk_hash.c
gdk/gdk_hash.h
gdk/gdk_imprints.c
gdk/gdk_imprints.h
gdk/gdk_join.c
Branch: imprints-join
Log Message:
varying # of scans over probe column
diffs (truncated from 780 to 300 lines):
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -134,6 +134,15 @@ HASHnew(Heap *hp, int tpe, BUN size, BUN
return h;
}
+#define GETBIN_IMPS_MERGE(Z,X,B) \
+do { \
+ int _i; \
+ Z = 0; \
+ for (_i = 1; _i < B; _i++) \
+ Z += ((X) >= bins[_i]); \
+ Z = Z >> bin_merge_bits;\
+} while (0)
+/*
#define GETBIN_IMPS(Z,X,B) \
do { \
int _i; \
@@ -142,6 +151,11 @@ do { \
Z += ((X) >= bins[_i]); \
} while (0)
+#define GETBIN_IMPS(Z,X) \
+do { \
+ Z = X >> 17; \
+} while (0)
+*/
#define starthash(TYPE)
\
do { \
TYPE *v = (TYPE *) BUNtloc(bi, 0); \
@@ -165,20 +179,18 @@ do { \
} \
} while (0)
+/*
#define starthash_imps(TYPE)
\
do { \
TYPE *v = (TYPE *) BUNtloc(bi, 0); \
int bin;
\
- Imprints *imprints = (VIEWtparent(b) ?
BBPdescriptor(VIEWtparent(b)): b)->timprints; \
- const TYPE *restrict bins = (TYPE *) imprints->bins;
\
- const int B = imprints->bits; \
for (; r < p; r++) { \
BUN c;
\
- GETBIN_IMPS(bin, *(v + r), B); \
+ GETBIN_IMPS(bin, *(v + r)); \
c = (BUN) hash_imps_##TYPE(h, v+r, bin);
\
\
if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) \
- break; /* mask too full */ \
+ break; \
HASHputlink(h, r, HASHget(h, c)); \
HASHput(h, c, r); \
} \
@@ -187,18 +199,61 @@ do { \
do { \
TYPE *v = (TYPE *) BUNtloc(bi, 0); \
int bin;
\
- Imprints *imprints = (VIEWtparent(b) ?
BBPdescriptor(VIEWtparent(b)): b)->timprints; \
- const TYPE *restrict bins = (TYPE *) imprints->bins;
\
- const int B = imprints->bits; \
for (; p < q; p++) { \
BUN c;
\
- GETBIN_IMPS(bin, *(v + p), B); \
+ GETBIN_IMPS(bin, *(v + p)); \
c = (BUN) hash_imps_##TYPE(h, v + p, bin); \
\
HASHputlink(h, p, HASHget(h, c)); \
HASHput(h, c, p); \
} \
} while (0)
+*/
+
+
+#define starthash_imps(TYPE)
\
+ do { \
+ TYPE *v = (TYPE *) BUNtloc(bi, 0); \
+ unsigned int bin;
\
+ Imprints *imprints = (VIEWtparent(b) ?
BBPdescriptor(VIEWtparent(b)): b)->timprints; \
+ const TYPE *restrict bins = (TYPE *) imprints->bins;
\
+ const int B = imprints->bits; \
+ BUN lowermask = (h->mask) >> imps_bits; \
+ unsigned int left_shift = __builtin_popcount(h->mask) -
imps_bits;\
+ BUN highermask = 0;\
+ for (; r < p; r++) { \
+ BUN c;
\
+ GETBIN_IMPS_MERGE(bin, *(v + r), B); \
+ highermask = bin << left_shift; \
+ c = (BUN) hash_imps_##TYPE(lowermask, v+r, highermask);
\
+ \
+ if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) \
+ break; \
+ HASHputlink(h, r, HASHget(h, c)); \
+ HASHput(h, c, r); \
+ } \
+ } while (0)
+#define finishhash_imps(TYPE) \
+ do { \
+ TYPE *v = (TYPE *) BUNtloc(bi, 0); \
+ unsigned int bin;
\
+ Imprints *imprints = (VIEWtparent(b) ?
BBPdescriptor(VIEWtparent(b)): b)->timprints; \
+ const TYPE *restrict bins = (TYPE *) imprints->bins;
\
+ const int B = imprints->bits; \
+ BUN lowermask = (h->mask) >> imps_bits; \
+ unsigned int left_shift = __builtin_popcount(h->mask) -
imps_bits;\
+ BUN highermask = 0;\
+ for (; p < q; p++) { \
+ BUN c;
\
+ GETBIN_IMPS_MERGE(bin, *(v + p), B); \
+ highermask = bin << left_shift; \
+ c = (BUN) hash_imps_##TYPE(lowermask, v + p,
highermask); \
+ \
+ HASHputlink(h, p, HASHget(h, c)); \
+ HASHput(h, c, p); \
+ } \
+ } while (0)
+
/* collect HASH statistics for analysis */
static void
@@ -580,6 +635,8 @@ gdk_return
BAThash_imps(BAT *b, BUN masksize)
{
lng t0 = 0, t1 = 0;
+ unsigned int imps_bits = 0;
+ unsigned int bin_merge_bits = 6 - imps_bits;
assert(b->batCacheid > 0);
if (BATcheckhash(b)) {
diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h
--- a/gdk/gdk_hash.h
+++ b/gdk/gdk_hash.h
@@ -146,6 +146,7 @@ gdk_export BUN HASHlist(Hash *h, BUN i);
#define hash_bte(H,V) (assert(((H)->mask & 0xFF) == 0xFF), (BUN)
mix_bte(*(const unsigned char*) (V)))
#define hash_sht(H,V) (assert(((H)->mask & 0xFFFF) == 0xFFFF), (BUN)
mix_sht(*(const unsigned short*) (V)))
#define hash_int(H,V) ((BUN) mix_int(*(const unsigned int *) (V)) & (H)->mask)
+//#define hash_int(H,V) ((BUN)(*(const unsigned int *) (V)) & (H)->mask)
/* XXX return size_t-sized value for 8-byte oid? */
#define hash_lng(H,V) ((BUN) mix_lng(*(const ulng *) (V)) & (H)->mask)
#ifdef HAVE_HGE
@@ -157,8 +158,16 @@ gdk_export BUN HASHlist(Hash *h, BUN i);
#define hash_oid(H,V) hash_lng(H,V)
#endif
-#define hash_imps_int(H,V,B) (((BUN) mix_int(*(const unsigned int *) (V)) &
((H)->mask >> 6)) | ((B) << 6))
-#define hash_imps_lng(H,V,B) (((BUN) mix_lng(*(const ulng *) (V)) &
((H)->mask >> 6)) | ((B) << 6))
+//#define hash_imps_int(H,V,B) (((BUN) mix_int(*(const unsigned int *) (V)) &
((H)->mask >> 6)) | ((B) << (__builtin_popcount((H)->mask) - 6)))
+//#define hash_imps_lng(H,V,B) (((BUN) mix_lng(*(const ulng *) (V)) &
((H)->mask >> 6)) | ((B) << (__builtin_popcount((H)->mask) - 6)))
+
+#define hash_imps_int(lmask,V,hmask) (((BUN) mix_int(*(const unsigned int *)
(V)) & lmask) | hmask)
+#define hash_imps_lng(lmask,V,hmask) (((BUN) mix_lng(*(const ulng *) (V)) &
lmask) | hmask)
+
+
+//#define hash_imps_int(H,V,B) (((BUN) (*(const unsigned int *) (V)) &
((H)->mask >> 6)) | ((B) << (__builtin_popcount((H)->mask) - 6)))
+
+
#define hash_flt(H,V) hash_int(H,V)
#define hash_dbl(H,V) hash_lng(H,V)
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -287,7 +287,7 @@ BATcheckimprints(BAT *b)
gdk_return
BATsubimprints(BAT *b, BAT *g) {
//return IMPSinternal(b, g);
- return IMPSinternal(b, (VIEWtparent(g) ?
BBPdescriptor(VIEWtparent(g)): g));
+ return IMPSinternal((VIEWtparent(b) ?
BBPdescriptor(VIEWtparent(b)): b), (VIEWtparent(g) ?
BBPdescriptor(VIEWtparent(g)): g));
}
gdk_return
diff --git a/gdk/gdk_imprints.h b/gdk/gdk_imprints.h
--- a/gdk/gdk_imprints.h
+++ b/gdk/gdk_imprints.h
@@ -27,4 +27,5 @@ typedef struct {
#define IMPSunsetBit(B, X, Y) ((X) & ~((uint##B##_t) 1 << (Y)))
#define IMPSisSet(B, X, Y) (((X) & ((uint##B##_t) 1 << (Y))) != 0)
+
#endif /* GDK_IMPS_H */
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -10,6 +10,7 @@
#include "gdk.h"
#include "gdk_private.h"
#include "gdk_calc_private.h"
+#include "gdk_imprints.h"
/*
* All join variants produce some sort of join on two input BATs,
@@ -2580,6 +2581,14 @@ binsearchcand(const oid *cand, BUN lo, B
if (hb >= (lo) && hb < (hi) && \
simple_EQ(v, BUNtloc(bi, hb), TYPE))
+#define GETBIN_RANGE(Z,X,LB,UB) \
+do { \
+ unsigned int _i; \
+ Z = LB; \
+ for (_i = LB+1; _i <= UB; _i++) \
+ Z += ((X) >= bins[_i]); \
+} while (0)
+
#define GETBIN_(Z,X,B) \
do { \
int _i; \
@@ -2587,6 +2596,12 @@ do { \
for (_i = 1; _i < B; _i++) \
Z += ((X) >= bins[_i]); \
} while (0)
+/*
+#define GETBIN_(Z,X) \
+do { \
+ Z = X >> 17; \
+} while (0)
+*/
#define HASHJOIN(TYPE, WIDTH) \
do { \
@@ -2623,14 +2638,18 @@ do { \
}
\
} while (0)
-
+/** just change the hash function compared to the default probing **/
+/*
#define HASHJOIN_IMPS(TYPE, WIDTH)
\
do { \
BUN hashnil = HASHnil(hsh); \
- int bin;
\
+ unsigned int bin = 0;
\
Imprints *imprints = (VIEWtparent(l) ?
BBPdescriptor(VIEWtparent(l)): l)->timprints; \
- const TYPE *restrict bins = (TYPE *) imprints->bins;
\
- const int B = imprints->bits; \
+ const TYPE *restrict bins = (TYPE *) imprints->bins; \
+ BUN lowermask = (hsh->mask) >> 6; \
+ unsigned int left_shift = __builtin_popcount(hsh->mask) - 6;\
+ BUN highermask = 0;\
+ \
for (lo = lstart + l->hseqbase; \
lstart < lend; \
lo++) { \
@@ -2638,8 +2657,9 @@ do { \
lstart++; \
nr = 0; \
if (*(const TYPE*)v != TYPE##_nil) { \
- GETBIN_(bin, *(const TYPE*)v, B); \
- for (rb = HASHget##WIDTH(hsh,
hash_imps_##TYPE(hsh, v, bin)); \
+ GETBIN_(bin, *(const TYPE*)v, 64); \
+ highermask = bin << left_shift; \
+ for (rb = HASHget##WIDTH(hsh,
hash_imps_##TYPE(lowermask, v, highermask)); \
rb != hashnil; \
rb = HASHgetlink##WIDTH(hsh, rb)) \
if (rb >= rl && rb < rh && \
@@ -2663,6 +2683,320 @@ do { \
} \
} \
} while (0)
+*/
+
+//const TYPE *restrict bins = (TYPE *) imprints->bins;
+//const int B = imprints->bits;
+//fprintf(stderr, "dct[dentry].cnt:%d\n", dct[dentry].cnt);
+//fprintf(stderr, "checking imprints #:%ld\n", impsid);
+//bin = partid;
+//fprintf(stderr, "imprints->dictcnt:%ld\n", imprints->dictcnt);
+
+/** call GETBIN when checking the values of cacheline **/
+/*
+#define PROBING(TYPE, WIDTH)\
+ do { \
+ lcur = lstart + valueid;\
+ lo = lcur + l->hseqbase;\
+ v = FVALUE(l, lcur); \
+ nr = 0; \
+ if (*(const TYPE*)v != TYPE##_nil) { \
+ GETBIN_(bin, *(const TYPE*)v, B); \
+ highermask = bin << left_shift; \
+ if (bin == partid) { \
+ unprobed_num++; \
+ for (rb = HASHget##WIDTH(hsh,
hash_imps_##TYPE(lowermask, v, highermask)); \
+ rb != hashnil; \
+ rb = HASHgetlink##WIDTH(hsh, rb)) \
+ if (rb >= rl && rb < rh && \
+ * (const TYPE *) v == ((const TYPE *)
base)[rb]) { \
+ ro = (oid) (rb - rl + rseq); \
+ HASHLOOPBODY(); \
+ result_num++; \
+ } \
+ }\
+ } \
+ if (nr == 0) { \
+ lskipped = BATcount(r1) > 0; \
+ } else { \
+ if (lskipped) { \
+ r1->tdense = 0; \
+ } \
+ if (nr > 1) { \
+ r1->tkey = 0; \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list