Changeset: 3ee75bc0bf52 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3ee75bc0bf52
Modified Files:
        gdk/gdk_join.c
Branch: imprints-join
Log Message:

make the branch compile


diffs (truncated from 478 to 300 lines):

diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2616,6 +2616,462 @@ binsearchcand(const oid *cand, BUN lo, B
        } while (0)
 
 static gdk_return
+imps_hashjoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches,
+        int nil_on_miss, int semi, int only_misses, BUN maxsize, lng t0,
+        int swapped, const char *reason)
+{
+       BUN lstart, lend, lcnt;
+       const oid *lcand = NULL, *lcandend = NULL;
+       BUN rstart, rend, rcnt;
+       const oid *rcand = NULL, *rcandend = NULL;
+       oid lo, ro;
+       BATiter ri;
+       BUN rb;
+       BUN rl, rh;
+       oid rseq;
+       BUN nr, nrcand, newcap;
+       const char *lvals;
+       const char *lvars;
+       int lwidth;
+       const void *nil = ATOMnilptr(l->ttype);
+       int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
+       oid lval = oid_nil;     /* hold value if l has dense tail */
+       const char *v = (const char *) &lval;
+       int lskipped = 0;       /* whether we skipped values in l */
+       const Hash *restrict hsh;
+       int t;
+
+       ALGODEBUG fprintf(stderr, "#imps_hashjoin(l=%s#" BUNFMT "[%s]%s%s%s%s,"
+                         "r=%s#" BUNFMT "[%s]%s%s%s%s,sl=%s#" BUNFMT "%s%s%s,"
+                         "sr=%s#" BUNFMT "%s%s%s,nil_matches=%d,"
+                         "nil_on_miss=%d,semi=%d)%s%s%s\n",
+                         BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                         l->tsorted ? "-sorted" : "",
+                         l->trevsorted ? "-revsorted" : "",
+                         l->tkey ? "-key" : "",
+                         l->timprints ? "-limprints": "",
+                         BATgetId(r), BATcount(r), ATOMname(r->ttype),
+                         r->tsorted ? "-sorted" : "",
+                         r->trevsorted ? "-revsorted" : "",
+                         r->tkey ? "-key" : "",
+                     r->timprints ? "-rimprints": "",
+                         sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+                         sl && sl->tsorted ? "-sorted" : "",
+                         sl && sl->trevsorted ? "-revsorted" : "",
+                         sl && sl->tkey ? "-key" : "",
+                         sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+                         sr && sr->tsorted ? "-sorted" : "",
+                         sr && sr->trevsorted ? "-revsorted" : "",
+                         sr && sr->tkey ? "-key" : "",
+                         nil_matches, nil_on_miss, semi,
+                         swapped ? " swapped" : "",
+                         *reason ? " " : "", reason);
+
+       if (l->timprints == NULL || r->timprints == NULL) {
+               fprintf(stderr, "both columns require imprints\n");
+               return GDK_FAIL;
+       }
+
+       /* Ignore the candidate list at this moment */
+       if (sl != NULL || sr != NULL) {
+               fprintf(stderr, "do not deal with candidate list first");
+               return GDK_FAIL;
+       }
+
+       assert(r->ttype != TYPE_void);
+       assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
+       assert(sl == NULL || sl->tsorted);
+       assert(sr == NULL || sr->tsorted);
+
+       CANDINIT(l, sl, lstart, lend, lcnt, lcand, lcandend);
+       CANDINIT(r, sr, rstart, rend, rcnt, rcand, rcandend);
+       lwidth = l->twidth;
+       lvals = (const char *) Tloc(l, 0);
+       if (l->tvarsized && l->ttype) {
+               assert(r->tvarsized && r->ttype);
+               lvars = l->tvheap->base;
+       } else {
+               assert(!r->tvarsized || !r->ttype);
+               lvars = NULL;
+       }
+       /* offset to convert BUN for value in right tail column to OID
+        * in right head column */
+       rseq = r->hseqbase;
+
+       /* basic properties will be adjusted if necessary later on,
+        * they were initially set by joininitresults() */
+
+       /* if an input columns is key, the opposite output column will
+        * be key, and if semi or only_misses is set, the left output
+        * will also be key */
+       r1->tkey = (r->tkey != 0) | semi | only_misses;
+       if (r2) {
+               r2->tkey = l->tkey != 0;
+               /* r2 is not likely to be sorted (although it is
+                * certainly possible) */
+               r2->tsorted = 0;
+               r2->trevsorted = 0;
+               r2->tdense = 0;
+       }
+
+       if (sl)
+               r1->tdense = sl->tdense;
+
+       if (lstart == lend || rstart == rend)
+               return nomatch(r1, r2, l, r, lstart, lend, lcand, lcandend,
+                                  nil_on_miss, only_misses, "imps_hashjoin", 
t0);
+
+       rl = 0;
+#ifndef DISABLE_PARENT_HASH
+       if (VIEWtparent(r)) {
+               BAT *b = BBPdescriptor(VIEWtparent(r));
+               if (b->batPersistence == PERSISTENT || BATcheckhash(b)) {
+                       /* only use parent's hash if it is persistent
+                        * or already has a hash */
+                       ALGODEBUG
+                               fprintf(stderr, "#hashjoin(%s#"BUNFMT"): "
+                                       "using parent(%s#"BUNFMT") for hash\n",
+                                       BATgetId(r), BATcount(r),
+                                       BATgetId(b), BATcount(b));
+                       rl = (BUN) ((r->theap.base - b->theap.base) >> 
r->tshift);
+                       r = b;
+               } else {
+                       ALGODEBUG
+                               fprintf(stderr, "#hashjoin(%s#"BUNFMT"): not "
+                                       "using parent(%s#"BUNFMT") for hash\n",
+                                       BATgetId(r), BATcount(r),
+                                       BATgetId(b), BATcount(b));
+               }
+       }
+#endif
+       rh = rl + rend;
+       rl += rstart;
+       rseq += rstart;
+
+       if (BAThash(r, 0) != GDK_SUCCEED)
+               goto bailout;
+       ri = bat_iterator(r);
+       nrcand = (BUN) (rcandend - rcand);
+       hsh = r->thash;
+       t = ATOMbasetype(r->ttype);
+
+       if (lcand == NULL && rcand == NULL && lvars == NULL &&
+           !nil_matches && !nil_on_miss && !semi && !only_misses &&
+           l->ttype != TYPE_void && (t == TYPE_int || t == TYPE_lng)) {
+               /* special case for a common way of calling this
+                * function */
+               const void *restrict base = Tloc(r, 0);
+
+               if (t == TYPE_int) {
+                       switch (hsh->width) {
+                       case BUN2:
+                               HASHJOIN(int, 2);
+                               break;
+                       case BUN4:
+                               HASHJOIN(int, 4);
+                               break;
+#ifdef BUN8
+                       case BUN8:
+                               HASHJOIN(int, 8);
+                               break;
+#endif
+                       }
+               } else {
+                       /* t == TYPE_lng */
+                       switch (hsh->width) {
+                       case BUN2:
+                               HASHJOIN(lng, 2);
+                               break;
+                       case BUN4:
+                               HASHJOIN(lng, 4);
+                               break;
+#ifdef BUN8
+                       case BUN8:
+                               HASHJOIN(lng, 8);
+                               break;
+#endif
+                       }
+               }
+       } else if (lcand) {
+               while (lcand < lcandend) {
+                       lo = *lcand++;
+                       if (l->ttype == TYPE_void) {
+                               if (l->tseqbase != oid_nil)
+                                       lval = lo - l->hseqbase + l->tseqbase;
+                       } else {
+                               v = VALUE(l, lo - l->hseqbase);
+                       }
+                       nr = 0;
+                       if (!nil_matches && cmp(v, nil) == 0) {
+                               /* no match */
+                       } else if (rcand) {
+                               HASHloop_bound(ri, hsh, rb, v, rl, rh) {
+                                       ro = (oid) (rb - rl + rseq);
+                                       if (!binsearchcand(rcand, 0, nrcand, 
ro))
+                                               continue;
+                                       if (only_misses) {
+                                               nr++;
+                                               break;
+                                       }
+                                       HASHLOOPBODY();
+                                       if (semi)
+                                               break;
+                               }
+                       } else {
+                               HASHloop_bound(ri, hsh, rb, v, rl, rh) {
+                                       ro = (oid) (rb - rl + rseq);
+                                       if (only_misses) {
+                                               nr++;
+                                               break;
+                                       }
+                                       HASHLOOPBODY();
+                                       if (semi)
+                                               break;
+                               }
+                       }
+                       if (nr == 0) {
+                               if (only_misses) {
+                                       nr = 1;
+                                       if (BUNlast(r1) == BATcapacity(r1)) {
+                                               newcap = BATgrows(r1);
+                                               if (newcap > maxsize)
+                                                       newcap = maxsize;
+                                               BATsetcount(r1, BATcount(r1));
+                                               if (BATextend(r1, newcap) != 
GDK_SUCCEED)
+                                                       goto bailout;
+                                       }
+                                       APPEND(r1, lo);
+                                       if (lskipped)
+                                               r1->tdense = 0;
+                               } else if (nil_on_miss) {
+                                       nr = 1;
+                                       r2->tnil = 1;
+                                       r2->tnonil = 0;
+                                       r2->tkey = 0;
+                                       if (BUNlast(r1) == BATcapacity(r1)) {
+                                               newcap = BATgrows(r1);
+                                               if (newcap > maxsize)
+                                                       newcap = maxsize;
+                                               BATsetcount(r1, BATcount(r1));
+                                               BATsetcount(r2, BATcount(r2));
+                                               if (BATextend(r1, newcap) != 
GDK_SUCCEED ||
+                                                   BATextend(r2, newcap) != 
GDK_SUCCEED)
+                                                       goto bailout;
+                                               assert(BATcapacity(r1) == 
BATcapacity(r2));
+                                       }
+                                       APPEND(r1, lo);
+                                       APPEND(r2, oid_nil);
+                               } else {
+                                       lskipped = BATcount(r1) > 0;
+                               }
+                       } else if (only_misses) {
+                               lskipped = BATcount(r1) > 0;
+                       } else {
+                               if (lskipped) {
+                                       /* note, we only get here in
+                                        * an iteration *after*
+                                        * lskipped was first set to
+                                        * 1, i.e. we did indeed skip
+                                        * values in l */
+                                       r1->tdense = 0;
+                               }
+                               if (nr > 1) {
+                                       r1->tkey = 0;
+                                       r1->tdense = 0;
+                               }
+                       }
+                       if (nr > 0 && BATcount(r1) > nr)
+                               r1->trevsorted = 0;
+               }
+       } else {
+               for (lo = lstart + l->hseqbase; lstart < lend; lo++) {
+                       if (l->ttype == TYPE_void) {
+                               if (l->tseqbase != oid_nil)
+                                       lval = lo - l->hseqbase + l->tseqbase;
+                       } else {
+                               v = VALUE(l, lstart);
+                       }
+                       lstart++;
+                       nr = 0;
+                       if (rcand) {
+                               if (nil_matches || cmp(v, nil) != 0) {
+                                       HASHloop_bound(ri, hsh, rb, v, rl, rh) {
+                                               ro = (oid) (rb - rl + rseq);
+                                               if (!binsearchcand(rcand, 0, 
nrcand, ro))
+                                                       continue;
+                                               if (only_misses) {
+                                                       nr++;
+                                                       break;
+                                               }
+                                               HASHLOOPBODY();
+                                               if (semi)
+                                                       break;
+                                       }
+                               }
+                       } else {
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to