Changeset: 5b54f94087b3 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5b54f94087b3
Modified Files:
        gdk/gdk_hash.c
        gdk/gdk_hash.h
        gdk/gdk_imprints.c
        gdk/gdk_imprints.h
        gdk/gdk_join.c
Branch: imprints-join
Log Message:

varying # of scans over probe column


diffs (truncated from 780 to 300 lines):

diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -134,6 +134,15 @@ HASHnew(Heap *hp, int tpe, BUN size, BUN
        return h;
 }
 
+#define GETBIN_IMPS_MERGE(Z,X,B)                       \
+do {                                   \
+       int _i;                         \
+       Z = 0;                          \
+       for (_i = 1; _i < B; _i++)      \
+               Z += ((X) >= bins[_i]); \
+       Z = Z >> bin_merge_bits;\
+} while (0)
+/*
 #define GETBIN_IMPS(Z,X,B)                     \
 do {                                   \
        int _i;                         \
@@ -142,6 +151,11 @@ do {                                       \
                Z += ((X) >= bins[_i]); \
 } while (0)
 
+#define GETBIN_IMPS(Z,X)       \
+do {                           \
+       Z = X >> 17;    \
+} while (0)
+*/
 #define starthash(TYPE)                                                        
\
        do {                                                            \
                TYPE *v = (TYPE *) BUNtloc(bi, 0);                      \
@@ -165,20 +179,18 @@ do {                                      \
                }                                               \
        } while (0)
 
+/*
 #define starthash_imps(TYPE)                                                   
\
        do {                                                            \
                TYPE *v = (TYPE *) BUNtloc(bi, 0);                      \
                int bin;                                                        
\
-               Imprints *imprints = (VIEWtparent(b) ? 
BBPdescriptor(VIEWtparent(b)): b)->timprints;            \
-               const TYPE *restrict bins = (TYPE *) imprints->bins;            
        \
-               const int B = imprints->bits;                   \
                for (; r < p; r++) {                                    \
                        BUN c;                                                  
        \
-                       GETBIN_IMPS(bin, *(v + r), B);          \
+                       GETBIN_IMPS(bin, *(v + r));             \
                        c = (BUN) hash_imps_##TYPE(h, v+r, bin);                
\
                                                                        \
                        if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) \
-                               break; /* mask too full */              \
+                               break;          \
                        HASHputlink(h, r, HASHget(h, c));               \
                        HASHput(h, c, r);                               \
                }                                                       \
@@ -187,18 +199,61 @@ do {                                      \
        do {                                                    \
                TYPE *v = (TYPE *) BUNtloc(bi, 0);              \
                int bin;                                                        
\
-               Imprints *imprints = (VIEWtparent(b) ? 
BBPdescriptor(VIEWtparent(b)): b)->timprints;            \
-               const TYPE *restrict bins = (TYPE *) imprints->bins;            
        \
-               const int B = imprints->bits;                   \
                for (; p < q; p++) {                                    \
                        BUN c;                                                  
        \
-                       GETBIN_IMPS(bin, *(v + p), B);  \
+                       GETBIN_IMPS(bin, *(v + p));     \
                        c = (BUN) hash_imps_##TYPE(h, v + p, bin);      \
                                                                \
                        HASHputlink(h, p, HASHget(h, c));       \
                        HASHput(h, c, p);                       \
                }                                               \
        } while (0)
+*/
+
+
+#define starthash_imps(TYPE)                                                   
\
+       do {                                                            \
+               TYPE *v = (TYPE *) BUNtloc(bi, 0);                      \
+               unsigned int bin;                                               
        \
+               Imprints *imprints = (VIEWtparent(b) ? 
BBPdescriptor(VIEWtparent(b)): b)->timprints;            \
+               const TYPE *restrict bins = (TYPE *) imprints->bins;            
        \
+               const int B = imprints->bits;                   \
+               BUN lowermask = (h->mask) >> imps_bits;                 \
+               unsigned int left_shift = __builtin_popcount(h->mask) - 
imps_bits;\
+               BUN highermask = 0;\
+               for (; r < p; r++) {                                    \
+                       BUN c;                                                  
        \
+                       GETBIN_IMPS_MERGE(bin, *(v + r), B);            \
+                       highermask = bin << left_shift;         \
+                       c = (BUN) hash_imps_##TYPE(lowermask, v+r, highermask); 
        \
+                                                                       \
+                       if (HASHget(h, c) == HASHnil(h) && nslots-- == 0) \
+                               break;          \
+                       HASHputlink(h, r, HASHget(h, c));               \
+                       HASHput(h, c, r);                               \
+               }                                                       \
+       } while (0)
+#define finishhash_imps(TYPE)                                  \
+       do {                                                    \
+               TYPE *v = (TYPE *) BUNtloc(bi, 0);              \
+               unsigned int bin;                                               
        \
+               Imprints *imprints = (VIEWtparent(b) ? 
BBPdescriptor(VIEWtparent(b)): b)->timprints;            \
+               const TYPE *restrict bins = (TYPE *) imprints->bins;            
        \
+               const int B = imprints->bits;                   \
+               BUN lowermask = (h->mask) >> imps_bits;                 \
+               unsigned int left_shift = __builtin_popcount(h->mask) - 
imps_bits;\
+               BUN highermask = 0;\
+               for (; p < q; p++) {                                    \
+                       BUN c;                                                  
        \
+                       GETBIN_IMPS_MERGE(bin, *(v + p), B);    \
+                       highermask = bin << left_shift;         \
+                       c = (BUN) hash_imps_##TYPE(lowermask, v + p, 
highermask);       \
+                                                               \
+                       HASHputlink(h, p, HASHget(h, c));       \
+                       HASHput(h, c, p);                       \
+               }                                               \
+       } while (0)
+
 
 /* collect HASH statistics for analysis */
 static void
@@ -580,6 +635,8 @@ gdk_return
 BAThash_imps(BAT *b, BUN masksize)
 {
        lng t0 = 0, t1 = 0;
+       unsigned int imps_bits = 0;
+       unsigned int bin_merge_bits = 6 - imps_bits;
 
        assert(b->batCacheid > 0);
        if (BATcheckhash(b)) {
diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h
--- a/gdk/gdk_hash.h
+++ b/gdk/gdk_hash.h
@@ -146,6 +146,7 @@ gdk_export BUN HASHlist(Hash *h, BUN i);
 #define hash_bte(H,V)  (assert(((H)->mask & 0xFF) == 0xFF), (BUN) 
mix_bte(*(const unsigned char*) (V)))
 #define hash_sht(H,V)  (assert(((H)->mask & 0xFFFF) == 0xFFFF), (BUN) 
mix_sht(*(const unsigned short*) (V)))
 #define hash_int(H,V)  ((BUN) mix_int(*(const unsigned int *) (V)) & (H)->mask)
+//#define hash_int(H,V) ((BUN)(*(const unsigned int *) (V)) & (H)->mask)
 /* XXX return size_t-sized value for 8-byte oid? */
 #define hash_lng(H,V)  ((BUN) mix_lng(*(const ulng *) (V)) & (H)->mask)
 #ifdef HAVE_HGE
@@ -157,8 +158,16 @@ gdk_export BUN HASHlist(Hash *h, BUN i);
 #define hash_oid(H,V)  hash_lng(H,V)
 #endif
 
-#define hash_imps_int(H,V,B)   (((BUN) mix_int(*(const unsigned int *) (V)) & 
((H)->mask >> 6)) | ((B) << 6))
-#define hash_imps_lng(H,V,B)   (((BUN) mix_lng(*(const ulng *) (V)) & 
((H)->mask >> 6)) | ((B) << 6))
+//#define hash_imps_int(H,V,B) (((BUN) mix_int(*(const unsigned int *) (V)) & 
((H)->mask >> 6)) | ((B) << (__builtin_popcount((H)->mask) - 6)))
+//#define hash_imps_lng(H,V,B) (((BUN) mix_lng(*(const ulng *) (V)) & 
((H)->mask >> 6)) | ((B) << (__builtin_popcount((H)->mask) - 6)))
+
+#define hash_imps_int(lmask,V,hmask) (((BUN) mix_int(*(const unsigned int *) 
(V)) & lmask) | hmask)
+#define hash_imps_lng(lmask,V,hmask) (((BUN) mix_lng(*(const ulng *) (V)) & 
lmask) | hmask)
+
+
+//#define hash_imps_int(H,V,B) (((BUN) (*(const unsigned int *) (V)) & 
((H)->mask >> 6)) | ((B) << (__builtin_popcount((H)->mask) - 6)))
+
+
 
 #define hash_flt(H,V)  hash_int(H,V)
 #define hash_dbl(H,V)  hash_lng(H,V)
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -287,7 +287,7 @@ BATcheckimprints(BAT *b)
 gdk_return
 BATsubimprints(BAT *b, BAT *g) {
         //return IMPSinternal(b, g);
-               return IMPSinternal(b, (VIEWtparent(g) ? 
BBPdescriptor(VIEWtparent(g)): g));
+               return IMPSinternal((VIEWtparent(b) ? 
BBPdescriptor(VIEWtparent(b)): b), (VIEWtparent(g) ? 
BBPdescriptor(VIEWtparent(g)): g));
 }
 
 gdk_return
diff --git a/gdk/gdk_imprints.h b/gdk/gdk_imprints.h
--- a/gdk/gdk_imprints.h
+++ b/gdk/gdk_imprints.h
@@ -27,4 +27,5 @@ typedef struct {
 #define IMPSunsetBit(B, X, Y)  ((X) & ~((uint##B##_t) 1 << (Y)))
 #define IMPSisSet(B, X, Y)     (((X) & ((uint##B##_t) 1 << (Y))) != 0)
 
+
 #endif /* GDK_IMPS_H */
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -10,6 +10,7 @@
 #include "gdk.h"
 #include "gdk_private.h"
 #include "gdk_calc_private.h"
+#include "gdk_imprints.h"
 
 /*
  * All join variants produce some sort of join on two input BATs,
@@ -2580,6 +2581,14 @@ binsearchcand(const oid *cand, BUN lo, B
                if (hb >= (lo) && hb < (hi) &&                  \
                    simple_EQ(v, BUNtloc(bi, hb), TYPE))
 
+#define GETBIN_RANGE(Z,X,LB,UB)                        \
+do {                                   \
+       unsigned int _i;                                \
+       Z = LB;                         \
+       for (_i = LB+1; _i <= UB; _i++) \
+               Z += ((X) >= bins[_i]); \
+} while (0)
+
 #define GETBIN_(Z,X,B)                 \
 do {                                   \
        int _i;                         \
@@ -2587,6 +2596,12 @@ do {                                     \
        for (_i = 1; _i < B; _i++)      \
                Z += ((X) >= bins[_i]); \
 } while (0)
+/*
+#define GETBIN_(Z,X)                   \
+do {                                   \
+       Z = X >> 17;            \
+} while (0)
+*/
 
 #define HASHJOIN(TYPE, WIDTH)                                       \
        do {                                                            \
@@ -2623,14 +2638,18 @@ do {                                    \
                        }                                                       
\
        } while (0)
 
-
+/** just change the hash function compared to the default probing **/
+/*
 #define HASHJOIN_IMPS(TYPE, WIDTH)                                             
\
        do {                                                            \
                BUN hashnil = HASHnil(hsh);                             \
-               int bin;                                                        
        \
+               unsigned int bin = 0;                                           
                \
                Imprints *imprints = (VIEWtparent(l) ? 
BBPdescriptor(VIEWtparent(l)): l)->timprints;            \
-               const TYPE *restrict bins = (TYPE *) imprints->bins;            
        \
-               const int B = imprints->bits;                   \
+               const TYPE *restrict bins = (TYPE *) imprints->bins;    \
+               BUN lowermask = (hsh->mask) >> 6;                       \
+               unsigned int left_shift = __builtin_popcount(hsh->mask) - 6;\
+               BUN highermask = 0;\
+               \
                for (lo = lstart + l->hseqbase;                         \
                     lstart < lend;                                     \
                     lo++) {                                            \
@@ -2638,8 +2657,9 @@ do {                                      \
                        lstart++;                                       \
                        nr = 0;                                         \
                        if (*(const TYPE*)v != TYPE##_nil) {            \
-                               GETBIN_(bin, *(const TYPE*)v, B);       \
-                               for (rb = HASHget##WIDTH(hsh, 
hash_imps_##TYPE(hsh, v, bin)); \
+                               GETBIN_(bin, *(const TYPE*)v, 64);      \
+                               highermask = bin << left_shift;         \
+                               for (rb = HASHget##WIDTH(hsh, 
hash_imps_##TYPE(lowermask, v, highermask)); \
                                     rb != hashnil;                     \
                                     rb = HASHgetlink##WIDTH(hsh, rb))  \
                                        if (rb >= rl && rb < rh &&      \
@@ -2663,6 +2683,320 @@ do {                                    \
                        }                                               \
                }                                                       \
        } while (0)
+*/
+
+//const TYPE *restrict bins = (TYPE *) imprints->bins;
+//const int B = imprints->bits;
+//fprintf(stderr, "dct[dentry].cnt:%d\n", dct[dentry].cnt);
+//fprintf(stderr, "checking imprints #:%ld\n", impsid);
+//bin = partid;
+//fprintf(stderr, "imprints->dictcnt:%ld\n", imprints->dictcnt);
+
+/** call GETBIN when checking the values of cacheline **/
+/*
+#define PROBING(TYPE, WIDTH)\
+       do {                                                            \
+               lcur = lstart + valueid;\
+               lo = lcur + l->hseqbase;\
+               v = FVALUE(l, lcur);                            \
+               nr = 0;                                         \
+               if (*(const TYPE*)v != TYPE##_nil) {            \
+                       GETBIN_(bin, *(const TYPE*)v, B);       \
+                       highermask = bin << left_shift;         \
+                       if (bin == partid) {    \
+                       unprobed_num++; \
+                       for (rb = HASHget##WIDTH(hsh, 
hash_imps_##TYPE(lowermask, v, highermask)); \
+                            rb != hashnil;                     \
+                            rb = HASHgetlink##WIDTH(hsh, rb))  \
+                               if (rb >= rl && rb < rh &&      \
+                                   * (const TYPE *) v == ((const TYPE *) 
base)[rb]) { \
+                                       ro = (oid) (rb - rl + rseq); \
+                                       HASHLOOPBODY();         \
+                                       result_num++;           \
+                               }                               \
+                       }\
+               }                                               \
+               if (nr == 0) {                                  \
+                       lskipped = BATcount(r1) > 0;            \
+               } else {                                        \
+                       if (lskipped) {                         \
+                               r1->tdense = 0;                 \
+                       }                                       \
+                       if (nr > 1) {                           \
+                               r1->tkey = 0;                   \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to