Changeset: f4667483d825 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f4667483d825
Modified Files:
        gdk/gdk.h
        gdk/gdk_hash.c
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_utils.c
Branch: imprints-join
Log Message:

add visit list optimization


diffs (truncated from 658 to 300 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -656,6 +656,15 @@ typedef struct {
 
 typedef struct Imprints Imprints;
 
+
+/* added by Xu: to implement a linked list for imps-hashjoin's visit list 
optimization*/
+/*
+typedef struct node {
+       uint32_t key;
+       struct node *next;
+} node;
+*/
+
 /*
  * @+ Binary Association Tables
  * Having gone to the previous preliminary definitions, we will now
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -134,6 +134,14 @@ HASHnew(Heap *hp, int tpe, BUN size, BUN
        return h;
 }
 
+#define GETBIN_IMPS_STEP(Z,X,B,S)                      \
+do {                                   \
+       int _i;                         \
+       Z = 0;                          \
+       for (_i = S; _i < B; _i += S)   \
+               Z += ((X) >= bins[_i]); \
+} while (0)
+
 #define GETBIN_IMPS_MERGE(Z,X,B)                       \
 do {                                   \
        int _i;                         \
@@ -211,7 +219,7 @@ do {                                \
 */
 
 
-#define starthash_imps(TYPE)                                                   
\
+#define starthash_imps(TYPE, STEP)                                             
        \
        do {                                                            \
                TYPE *v = (TYPE *) BUNtloc(bi, 0);                      \
                unsigned int bin;                                               
        \
@@ -223,7 +231,8 @@ do {                                \
                BUN highermask = 0;\
                for (; r < p; r++) {                                    \
                        BUN c;                                                  
        \
-                       GETBIN_IMPS_MERGE(bin, *(v + r), B);            \
+                       /*GETBIN_IMPS_MERGE(bin, *(v + r), B);*/                
\
+                       GETBIN_IMPS_STEP(bin, *(v + p), B, STEP);       \
                        highermask = bin << left_shift;         \
                        c = (BUN) hash_imps_##TYPE(lowermask, v+r, highermask); 
        \
                                                                        \
@@ -233,7 +242,7 @@ do {                                \
                        HASHput(h, c, r);                               \
                }                                                       \
        } while (0)
-#define finishhash_imps(TYPE)                                  \
+#define finishhash_imps(TYPE, STEP)                                    \
        do {                                                    \
                TYPE *v = (TYPE *) BUNtloc(bi, 0);              \
                unsigned int bin;                                               
        \
@@ -245,7 +254,8 @@ do {                                \
                BUN highermask = 0;\
                for (; p < q; p++) {                                    \
                        BUN c;                                                  
        \
-                       GETBIN_IMPS_MERGE(bin, *(v + p), B);    \
+                       /*GETBIN_IMPS_MERGE(bin, *(v + p), B);*/        \
+                       GETBIN_IMPS_STEP(bin, *(v + p), B, STEP);       \
                        highermask = bin << left_shift;         \
                        c = (BUN) hash_imps_##TYPE(lowermask, v + p, 
highermask);       \
                                                                \
@@ -425,6 +435,7 @@ BAThash(BAT *b, BUN masksize)
 
        assert(b->batCacheid > 0);
        if (BATcheckhash(b)) {
+               //fprintf(stderr, "---------------hash already exists\n");
                return GDK_SUCCEED;
        }
        MT_lock_set(&GDKhashLock(b->batCacheid));
@@ -625,6 +636,7 @@ BAThash(BAT *b, BUN masksize)
                        HASHcollisions(b, b->thash);
                }
        }
+
        MT_lock_unset(&GDKhashLock(b->batCacheid));
        return GDK_SUCCEED;
 }
@@ -635,7 +647,7 @@ gdk_return
 BAThash_imps(BAT *b, BUN masksize)
 {
        lng t0 = 0, t1 = 0;
-       unsigned int imps_bits = 0;
+       unsigned int imps_bits = GDK_imps_hashjoin_bits;
        unsigned int bin_merge_bits = 6 - imps_bits;
 
        assert(b->batCacheid > 0);
@@ -746,13 +758,14 @@ BAThash_imps(BAT *b, BUN masksize)
                                starthash(flt);
                                break;
                        case TYPE_int:
-                               starthash_imps(int);
+                               starthash_imps(int, (unsigned int)1 << 
bin_merge_bits);
                                break;
                        case TYPE_dbl:
                                starthash(dbl);
                                break;
                        case TYPE_lng:
-                               starthash(lng);
+                               fprintf(stderr, "enter starthash_imps(lng)\n");
+                               starthash_imps(lng, (unsigned int)1 << 
bin_merge_bits);
                                break;
 #ifdef HAVE_HGE
                        case TYPE_hge:
@@ -784,7 +797,7 @@ BAThash_imps(BAT *b, BUN masksize)
                        finishhash(sht);
                        break;
                case TYPE_int:
-                       finishhash_imps(int);
+                       finishhash_imps(int, (unsigned int)1 << bin_merge_bits);
                        break;
                case TYPE_flt:
                        finishhash(flt);
@@ -793,7 +806,8 @@ BAThash_imps(BAT *b, BUN masksize)
                        finishhash(dbl);
                        break;
                case TYPE_lng:
-                       finishhash(lng);
+                       fprintf(stderr, "enter finishhash_imps(lng)\n");
+                       finishhash_imps(lng, (unsigned int)1 << bin_merge_bits);
                        break;
 #ifdef HAVE_HGE
                case TYPE_hge:
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2596,6 +2596,14 @@ do {                                     \
        for (_i = 1; _i < B; _i++)      \
                Z += ((X) >= bins[_i]); \
 } while (0)
+
+#define GETBIN_STEP(Z,X,S)                     \
+do {                                   \
+       int _i;                         \
+       Z = 0;                          \
+       for (_i = S; _i < 64; _i += S)  \
+               Z += ((X) >= bins[_i]); \
+} while (0)
 /*
 #define GETBIN_(Z,X)                   \
 do {                                   \
@@ -2603,6 +2611,7 @@ do {                                      \
 } while (0)
 */
 
+/* original HASHJOIN */
 #define HASHJOIN(TYPE, WIDTH)                                       \
        do {                                                            \
                        BUN hashnil = HASHnil(hsh);                             
\
@@ -2638,6 +2647,50 @@ do {                                     \
                        }                                                       
\
        } while (0)
 
+/*fprintf(stderr, "matched; cur result #:%ld; cur probing element #:%ld\n", 
counter, element);*/
+/*fprintf(stderr, "probing element #:%ld\n", element++);       element++;      
                */
+/*
+#define HASHJOIN(TYPE, WIDTH)                                       \
+       do {                                                            \
+                       BUN hashnil = HASHnil(hsh);                             
\
+                       BUN counter = 0;                                        
                                        \
+                       BUN element = 0;                                        
                                        \
+                       for (lo = lstart + l->hseqbase;                         
\
+                                lstart < lend;                                 
    \
+                                lo++) {                                        
    \
+                                       v = FVALUE(l, lstart);                  
        \
+                                       lstart++;                               
        \
+
+                                       nr = 0;                                 
        \
+                                       if (*(const TYPE*)v != TYPE##_nil) {    
        \
+                                                       for (rb = 
HASHget##WIDTH(hsh, hash_##TYPE(hsh, v)); \
+                                                                rb != hashnil; 
                    \
+                                                                rb = 
HASHgetlink##WIDTH(hsh, rb))  \
+                                                                       if (rb 
>= rl && rb < rh &&      \
+                                                                               
* (const TYPE *) v == ((const TYPE *) base)[rb]) { \
+                                                                               
        ro = (oid) (rb - rl + rseq); \
+                                                                               
        HASHLOOPBODY();         \
+                                                                               
        counter++;                      \
+                                                                       }       
                        \
+                                       }                                       
        \
+                                       if (nr == 0) {                          
        \
+                                                       lskipped = BATcount(r1) 
> 0;            \
+                                       } else {                                
        \
+                                                       if (lskipped) {         
                \
+                                                                       
r1->tdense = 0;                 \
+                                                       }                       
                \
+                                                       if (nr > 1) {           
                \
+                                                                       
r1->tkey = 0;                   \
+                                                                       
r1->tdense = 0;                 \
+                                                       }                       
                \
+                                                       if (BATcount(r1) > nr)  
                \
+                                                                       
r1->trevsorted = 0;             \
+                                       }                                       
        \
+                                       if (element == 100000) {fprintf(stderr, 
"cur probing element #:%ld\n", element); break;}                                
                \
+                       }                                                       
\
+       } while (0)
+*/
+
 /** just change the hash function compared to the default probing **/
 /*
 #define HASHJOIN_IMPS(TYPE, WIDTH)                                             
\
@@ -2813,7 +2866,7 @@ do {                                      \
 
 /** use the boundary of target bin to void false probing **/
 /** the 32/16/8/4/2-scan **/
-#define PROBING(TYPE, WIDTH)\
+#define PROBING_(TYPE, WIDTH)\
        do {                                                            \
                lcur = lstart + valueid;\
                lo = lcur + l->hseqbase;\
@@ -2825,7 +2878,7 @@ do {                                      \
                        /*GETBIN_RANGE(bin, *(const TYPE*)v, partid_lb, 
partid_ub);*/   \
                        bin = super_partid;                                     
\
                        highermask = bin << left_shift;         \
-                       unprobed_num++; \
+                       /*unprobed_num++;*/     \
                        for (rb = HASHget##WIDTH(hsh, 
hash_imps_##TYPE(lowermask, v, highermask)); \
                                 rb != hashnil;                 \
                                 rb = HASHgetlink##WIDTH(hsh, rb))      \
@@ -2833,7 +2886,7 @@ do {                                      \
                                        * (const TYPE *) v == ((const TYPE *) 
base)[rb]) { \
                                        ro = (oid) (rb - rl + rseq); \
                                        HASHLOOPBODY();         \
-                                       result_num++;           \
+                                       /*result_num++; */      \
                                }                               \
                }                                               \
                if (nr == 0) {                                  \
@@ -2851,6 +2904,79 @@ do {                                     \
                }                                               \
        } while (0)
 
+
+/* the version matching with visit list optimization */
+#define PROBING_PART(TYPE, WIDTH, PART)                \
+       do {                                                            \
+               lcur = lstart + valueid;\
+               lo = lcur + l->hseqbase;\
+               v = FVALUE(l, lcur);                            \
+               nr = 0;                                         \
+               if ((*(const TYPE*)v != TYPE##_nil)) {          \
+                       bin = PART;                                     \
+                       highermask = bin << left_shift;         \
+                       for (rb = HASHget##WIDTH(hsh, 
hash_imps_##TYPE(lowermask, v, highermask)); \
+                                rb != hashnil;                 \
+                                rb = HASHgetlink##WIDTH(hsh, rb))      \
+                               if (rb >= rl && rb < rh &&      \
+                                       * (const TYPE *) v == ((const TYPE *) 
base)[rb]) { \
+                                       ro = (oid) (rb - rl + rseq); \
+                                       HASHLOOPBODY();         \
+                                       /*result_num++; */      \
+                               }                               \
+               }                                               \
+               if (nr == 0) {                                  \
+                       lskipped = BATcount(r1) > 0;            \
+               } else {                                        \
+                       if (lskipped) {                         \
+                               r1->tdense = 0;                 \
+                       }                                       \
+                       if (nr > 1) {                           \
+                               r1->tkey = 0;                   \
+                               r1->tdense = 0;                 \
+                       }                                       \
+                       if (BATcount(r1) > nr)                  \
+                               r1->trevsorted = 0;             \
+               }                                               \
+       } while (0)
+
+#define PROBING_UNORDERED(TYPE, WIDTH, STEP)\
+       do {                                                            \
+               lcur = lstart + valueid;\
+               lo = lcur + l->hseqbase;\
+               v = FVALUE(l, lcur);                            \
+               nr = 0;                                         \
+               if ((*(const TYPE*)v != TYPE##_nil)) {          \
+                       GETBIN_STEP(bin, *(const TYPE*)v, STEP);        \
+                       /*bin = super_partid;*/                                 
\
+                       highermask = bin << left_shift;         \
+                       /*unprobed_num++;*/     \
+                       for (rb = HASHget##WIDTH(hsh, 
hash_imps_##TYPE(lowermask, v, highermask)); \
+                                rb != hashnil;                 \
+                                rb = HASHgetlink##WIDTH(hsh, rb))      \
+                               if (rb >= rl && rb < rh &&      \
+                                       * (const TYPE *) v == ((const TYPE *) 
base)[rb]) { \
+                                       ro = (oid) (rb - rl + rseq); \
+                                       HASHLOOPBODY();         \
+                                       /*result_num++; */      \
+                               }                               \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to