Changeset: f4667483d825 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f4667483d825 Modified Files: gdk/gdk.h gdk/gdk_hash.c gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_utils.c Branch: imprints-join Log Message:
add visit list optimization
diffs (truncated from 658 to 300 lines):
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -656,6 +656,15 @@ typedef struct {
typedef struct Imprints Imprints;
+
+/* added by Xu: to implement a linked list for imps-hashjoin's visit list
optimization*/
+/*
+typedef struct node {
+ uint32_t key;
+ struct node *next;
+} node;
+*/
+
/*
* @+ Binary Association Tables
* Having gone to the previous preliminary definitions, we will now
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -134,6 +134,14 @@ HASHnew(Heap *hp, int tpe, BUN size, BUN
return h;
}
+#define GETBIN_IMPS_STEP(Z,X,B,S) \
+do { \
+ int _i; \
+ Z = 0; \
+ for (_i = S; _i < B; _i += S) \
+ Z += ((X) >= bins[_i]); \
+} while (0)
+
#define GETBIN_IMPS_MERGE(Z,X,B) \
do { \
int _i; \
@@ -211,7 +219,7 @@ do { \
*/
-#define starthash_imps(TYPE)
\
+#define starthash_imps(TYPE, STEP)
\
do { \
TYPE *v = (TYPE *) BUNtloc(bi, 0); \
unsigned int bin;
\
@@ -223,7 +231,8 @@ do { \
BUN highermask = 0;\
for (; r < p; r++) { \
BUN c;
\
- GETBIN_IMPS_MERGE(bin, *(v + r), B); \
+ /*GETBIN_IMPS_MERGE(bin, *(v + r), B);*/
\
+ GETBIN_IMPS_STEP(bin, *(v + p), B, STEP); \
highermask = bin << left_shift; \
c = (BUN) hash_imps_##TYPE(lowermask, v+r, highermask);
\
\
@@ -233,7 +242,7 @@ do { \
HASHput(h, c, r); \
} \
} while (0)
-#define finishhash_imps(TYPE) \
+#define finishhash_imps(TYPE, STEP) \
do { \
TYPE *v = (TYPE *) BUNtloc(bi, 0); \
unsigned int bin;
\
@@ -245,7 +254,8 @@ do { \
BUN highermask = 0;\
for (; p < q; p++) { \
BUN c;
\
- GETBIN_IMPS_MERGE(bin, *(v + p), B); \
+ /*GETBIN_IMPS_MERGE(bin, *(v + p), B);*/ \
+ GETBIN_IMPS_STEP(bin, *(v + p), B, STEP); \
highermask = bin << left_shift; \
c = (BUN) hash_imps_##TYPE(lowermask, v + p,
highermask); \
\
@@ -425,6 +435,7 @@ BAThash(BAT *b, BUN masksize)
assert(b->batCacheid > 0);
if (BATcheckhash(b)) {
+ //fprintf(stderr, "---------------hash already exists\n");
return GDK_SUCCEED;
}
MT_lock_set(&GDKhashLock(b->batCacheid));
@@ -625,6 +636,7 @@ BAThash(BAT *b, BUN masksize)
HASHcollisions(b, b->thash);
}
}
+
MT_lock_unset(&GDKhashLock(b->batCacheid));
return GDK_SUCCEED;
}
@@ -635,7 +647,7 @@ gdk_return
BAThash_imps(BAT *b, BUN masksize)
{
lng t0 = 0, t1 = 0;
- unsigned int imps_bits = 0;
+ unsigned int imps_bits = GDK_imps_hashjoin_bits;
unsigned int bin_merge_bits = 6 - imps_bits;
assert(b->batCacheid > 0);
@@ -746,13 +758,14 @@ BAThash_imps(BAT *b, BUN masksize)
starthash(flt);
break;
case TYPE_int:
- starthash_imps(int);
+ starthash_imps(int, (unsigned int)1 <<
bin_merge_bits);
break;
case TYPE_dbl:
starthash(dbl);
break;
case TYPE_lng:
- starthash(lng);
+ fprintf(stderr, "enter starthash_imps(lng)\n");
+ starthash_imps(lng, (unsigned int)1 <<
bin_merge_bits);
break;
#ifdef HAVE_HGE
case TYPE_hge:
@@ -784,7 +797,7 @@ BAThash_imps(BAT *b, BUN masksize)
finishhash(sht);
break;
case TYPE_int:
- finishhash_imps(int);
+ finishhash_imps(int, (unsigned int)1 << bin_merge_bits);
break;
case TYPE_flt:
finishhash(flt);
@@ -793,7 +806,8 @@ BAThash_imps(BAT *b, BUN masksize)
finishhash(dbl);
break;
case TYPE_lng:
- finishhash(lng);
+ fprintf(stderr, "enter finishhash_imps(lng)\n");
+ finishhash_imps(lng, (unsigned int)1 << bin_merge_bits);
break;
#ifdef HAVE_HGE
case TYPE_hge:
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2596,6 +2596,14 @@ do { \
for (_i = 1; _i < B; _i++) \
Z += ((X) >= bins[_i]); \
} while (0)
+
+#define GETBIN_STEP(Z,X,S) \
+do { \
+ int _i; \
+ Z = 0; \
+ for (_i = S; _i < 64; _i += S) \
+ Z += ((X) >= bins[_i]); \
+} while (0)
/*
#define GETBIN_(Z,X) \
do { \
@@ -2603,6 +2611,7 @@ do { \
} while (0)
*/
+/* original HASHJOIN */
#define HASHJOIN(TYPE, WIDTH) \
do { \
BUN hashnil = HASHnil(hsh);
\
@@ -2638,6 +2647,50 @@ do { \
}
\
} while (0)
+/*fprintf(stderr, "matched; cur result #:%ld; cur probing element #:%ld\n",
counter, element);*/
+/*fprintf(stderr, "probing element #:%ld\n", element++); element++;
*/
+/*
+#define HASHJOIN(TYPE, WIDTH) \
+ do { \
+ BUN hashnil = HASHnil(hsh);
\
+ BUN counter = 0;
\
+ BUN element = 0;
\
+ for (lo = lstart + l->hseqbase;
\
+ lstart < lend;
\
+ lo++) {
\
+ v = FVALUE(l, lstart);
\
+ lstart++;
\
+
+ nr = 0;
\
+ if (*(const TYPE*)v != TYPE##_nil) {
\
+ for (rb =
HASHget##WIDTH(hsh, hash_##TYPE(hsh, v)); \
+ rb != hashnil;
\
+ rb =
HASHgetlink##WIDTH(hsh, rb)) \
+ if (rb
>= rl && rb < rh && \
+
* (const TYPE *) v == ((const TYPE *) base)[rb]) { \
+
ro = (oid) (rb - rl + rseq); \
+
HASHLOOPBODY(); \
+
counter++; \
+ }
\
+ }
\
+ if (nr == 0) {
\
+ lskipped = BATcount(r1)
> 0; \
+ } else {
\
+ if (lskipped) {
\
+
r1->tdense = 0; \
+ }
\
+ if (nr > 1) {
\
+
r1->tkey = 0; \
+
r1->tdense = 0; \
+ }
\
+ if (BATcount(r1) > nr)
\
+
r1->trevsorted = 0; \
+ }
\
+ if (element == 100000) {fprintf(stderr,
"cur probing element #:%ld\n", element); break;}
\
+ }
\
+ } while (0)
+*/
+
/** just change the hash function compared to the default probing **/
/*
#define HASHJOIN_IMPS(TYPE, WIDTH)
\
@@ -2813,7 +2866,7 @@ do { \
/** use the boundary of target bin to void false probing **/
/** the 32/16/8/4/2-scan **/
-#define PROBING(TYPE, WIDTH)\
+#define PROBING_(TYPE, WIDTH)\
do { \
lcur = lstart + valueid;\
lo = lcur + l->hseqbase;\
@@ -2825,7 +2878,7 @@ do { \
/*GETBIN_RANGE(bin, *(const TYPE*)v, partid_lb,
partid_ub);*/ \
bin = super_partid;
\
highermask = bin << left_shift; \
- unprobed_num++; \
+ /*unprobed_num++;*/ \
for (rb = HASHget##WIDTH(hsh,
hash_imps_##TYPE(lowermask, v, highermask)); \
rb != hashnil; \
rb = HASHgetlink##WIDTH(hsh, rb)) \
@@ -2833,7 +2886,7 @@ do { \
* (const TYPE *) v == ((const TYPE *)
base)[rb]) { \
ro = (oid) (rb - rl + rseq); \
HASHLOOPBODY(); \
- result_num++; \
+ /*result_num++; */ \
} \
} \
if (nr == 0) { \
@@ -2851,6 +2904,79 @@ do { \
} \
} while (0)
+
+/* the version matching with visit list optimization */
+#define PROBING_PART(TYPE, WIDTH, PART) \
+ do { \
+ lcur = lstart + valueid;\
+ lo = lcur + l->hseqbase;\
+ v = FVALUE(l, lcur); \
+ nr = 0; \
+ if ((*(const TYPE*)v != TYPE##_nil)) { \
+ bin = PART; \
+ highermask = bin << left_shift; \
+ for (rb = HASHget##WIDTH(hsh,
hash_imps_##TYPE(lowermask, v, highermask)); \
+ rb != hashnil; \
+ rb = HASHgetlink##WIDTH(hsh, rb)) \
+ if (rb >= rl && rb < rh && \
+ * (const TYPE *) v == ((const TYPE *)
base)[rb]) { \
+ ro = (oid) (rb - rl + rseq); \
+ HASHLOOPBODY(); \
+ /*result_num++; */ \
+ } \
+ } \
+ if (nr == 0) { \
+ lskipped = BATcount(r1) > 0; \
+ } else { \
+ if (lskipped) { \
+ r1->tdense = 0; \
+ } \
+ if (nr > 1) { \
+ r1->tkey = 0; \
+ r1->tdense = 0; \
+ } \
+ if (BATcount(r1) > nr) \
+ r1->trevsorted = 0; \
+ } \
+ } while (0)
+
+#define PROBING_UNORDERED(TYPE, WIDTH, STEP)\
+ do { \
+ lcur = lstart + valueid;\
+ lo = lcur + l->hseqbase;\
+ v = FVALUE(l, lcur); \
+ nr = 0; \
+ if ((*(const TYPE*)v != TYPE##_nil)) { \
+ GETBIN_STEP(bin, *(const TYPE*)v, STEP); \
+ /*bin = super_partid;*/
\
+ highermask = bin << left_shift; \
+ /*unprobed_num++;*/ \
+ for (rb = HASHget##WIDTH(hsh,
hash_imps_##TYPE(lowermask, v, highermask)); \
+ rb != hashnil; \
+ rb = HASHgetlink##WIDTH(hsh, rb)) \
+ if (rb >= rl && rb < rh && \
+ * (const TYPE *) v == ((const TYPE *)
base)[rb]) { \
+ ro = (oid) (rb - rl + rseq); \
+ HASHLOOPBODY(); \
+ /*result_num++; */ \
+ } \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list
