Changeset: f3cf07aa9cdd for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f3cf07aa9cdd
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_join.c
        gdk/gdk_storage.c
        monetdb5/modules/kernel/algebra.c
        monetdb5/modules/kernel/algebra.h
        monetdb5/modules/kernel/algebra.mal
Branch: default
Log Message:

The sub*join family of functions (both C and MAL) have an extra parameter.
The extra parameter is nil_matches (for MAL: :bit, for C: int) that
indicates whether nils match each other or not.


diffs (truncated from 386 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -196,14 +196,14 @@ BAT *BATsort_rev(BAT *b);
 BAT *BATssort(BAT *b);
 BAT *BATssort_rev(BAT *b);
 gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr);
-gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
BUN estimate);
-gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, 
BAT *sr, BUN estimate);
-gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, BUN estimate);
-gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, BUN estimate);
+gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
int nil_matches, BUN estimate);
+gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, 
BAT *sr, int nil_matches, BUN estimate);
+gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, int nil_matches, BUN estimate);
+gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, int nil_matches, BUN estimate);
 BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th, int li, int 
hi, int anti);
-gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, BUN estimate);
+gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, int nil_matches, BUN estimate);
 gdk_return BATsubsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, 
BAT *g, int reverse, int stable);
-gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, const char *op, BUN estimate);
+gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, const char *op, int nil_matches, BUN estimate);
 gdk_return BATsum(void *res, int tp, BAT *b, BAT *s, int skip_nils, int 
abort_on_error, int nil_if_empty);
 BAT *BATsunion(BAT *b, BAT *c);
 BAT *BATsunique(BAT *b);
@@ -813,9 +813,9 @@ str ALGssort(int *result, int *bid);
 str ALGssort_rev(int *result, int *bid);
 str ALGstdev(dbl *res, int *bid);
 str ALGstdevp(dbl *res, int *bid);
-str ALGsubjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, lng 
*estimate);
-str ALGsubleftjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, lng 
*estimate);
-str ALGsubouterjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, lng 
*estimate);
+str ALGsubjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit 
*nil_matches, lng *estimate);
+str ALGsubleftjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit 
*nil_matches, lng *estimate);
+str ALGsubouterjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit 
*nil_matches, lng *estimate);
 str ALGsubsample(int *result, int *bid, int *param);
 str ALGsubselect1(bat *result, bat *bid, const void *low, const void *high, 
const bit *li, const bit *hi, const bit *anti);
 str ALGsubselect2(bat *result, bat *bid, bat *sid, const void *low, const void 
*high, const bit *li, const bit *hi, const bit *anti);
@@ -829,7 +829,7 @@ str ALGsubsort23(bat *result, bat *norde
 str ALGsubsort31(bat *result, bat *bid, bat *order, bat *group, bit *reverse, 
bit *stable);
 str ALGsubsort32(bat *result, bat *norder, bat *bid, bat *order, bat *group, 
bit *reverse, bit *stable);
 str ALGsubsort33(bat *result, bat *norder, bat *ngroup, bat *bid, bat *order, 
bat *group, bit *reverse, bit *stable);
-str ALGsubthetajoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, str 
*op, lng *estimate);
+str ALGsubthetajoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, str 
*op, bit *nil_matches, lng *estimate);
 str ALGsunion(int *result, int *lid, int *rid);
 str ALGsunique(int *result, int *bid);
 str ALGtdiff(int *result, int *lid, int *rid);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3156,12 +3156,12 @@ gdk_export gdk_return BATcross1(BAT **r1
 gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr);
 gdk_export BAT *BATcross(BAT *l, BAT *r);
 
-gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, BUN estimate);
-gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, BUN estimate);
-gdk_export gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, const char *op, BUN estimate);
-gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, BUN estimate);
-gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, BUN estimate);
-gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT 
*r, BAT *sl, BAT *sr, BUN estimate);
+gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, const char *op, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT 
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export BAT *BATproject(BAT *l, BAT *r);
 
 gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -1607,7 +1607,7 @@ thetajoin(BAT *r1, BAT *r2, BAT *l, BAT 
  * matching tuples.  The result is in the same order as l (i.e. r1 is
  * sorted). */
 gdk_return
-BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN 
estimate)
+BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate)
 {
        BAT *r1, *r2;
 
@@ -1620,8 +1620,8 @@ BATsubleftjoin(BAT **r1p, BAT **r2p, BAT
        *r1p = r1;
        *r2p = r2;
        if (r->tsorted || r->trevsorted)
-               return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
-       return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
+               return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
+       return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
 }
 
 /* Perform an equi-join over l and r.  Returns two new, aligned,
@@ -1629,7 +1629,7 @@ BATsubleftjoin(BAT **r1p, BAT **r2p, BAT
  * matching tuples.  The result is in the same order as l (i.e. r1 is
  * sorted).  All values in l must match at least one value in r. */
 gdk_return
-BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
BUN estimate)
+BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
int nil_matches, BUN estimate)
 {
        BAT *r1, *r2;
 
@@ -1642,8 +1642,8 @@ BATsubleftfetchjoin(BAT **r1p, BAT **r2p
        *r1p = r1;
        *r2p = r2;
        if (r->tsorted || r->trevsorted)
-               return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 0, 1);
-       return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 0, 1);
+               return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 1);
+       return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 1);
 }
 
 /* Performs a left outer join over l and r.  Returns two new, aligned,
@@ -1652,7 +1652,7 @@ BATsubleftfetchjoin(BAT **r1p, BAT **r2p
  * second output bat if the value in l does not occur in r.  The
  * result is in the same order as l (i.e. r1 is sorted). */
 gdk_return
-BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN 
estimate)
+BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate)
 {
        BAT *r1, *r2;
 
@@ -1665,8 +1665,8 @@ BATsubouterjoin(BAT **r1p, BAT **r2p, BA
        *r1p = r1;
        *r2p = r2;
        if (r->tsorted || r->trevsorted)
-               return mergejoin(r1, r2, l, r, sl, sr, 0, 1, 0, 0);
-       return hashjoin(r1, r2, l, r, sl, sr, 0, 1, 0, 0);
+               return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 1, 0, 0);
+       return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 1, 0, 0);
 }
 
 /* Perform a semi-join over l and r.  Returns two new, aligned,
@@ -1674,7 +1674,7 @@ BATsubouterjoin(BAT **r1p, BAT **r2p, BA
  * matching tuples.  The result is in the same order as l (i.e. r1 is
  * sorted). */
 gdk_return
-BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN 
estimate)
+BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate)
 {
        BAT *r1, *r2;
 
@@ -1687,18 +1687,18 @@ BATsubsemijoin(BAT **r1p, BAT **r2p, BAT
        *r1p = r1;
        *r2p = r2;
        if (r->tsorted || r->trevsorted)
-               return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 1, 0);
-       return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 1, 0);
+               return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 1, 0);
+       return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 0, 1, 0);
 }
 
 gdk_return
-BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const 
char *op, BUN estimate)
+BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const 
char *op, int nil_matches, BUN estimate)
 {
        BAT *r1, *r2;
        int opcode = 0;
 
        if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0))
-               return BATsubjoin(r1p, r2p, l, r, sl, sr, estimate);
+               return BATsubjoin(r1p, r2p, l, r, sl, sr, nil_matches, 
estimate);
 
        /* encode operator as a bit mask into opcode */
        if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
@@ -1748,7 +1748,7 @@ BATsubthetajoin(BAT **r1p, BAT **r2p, BA
 }
 
 gdk_return
-BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN 
estimate)
+BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int 
nil_matches, BUN estimate)
 {
        BAT *r1, *r2;
        BUN lcount, rcount;
@@ -1782,7 +1782,7 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l,
        swap = 0;
        if ((l->tsorted || l->trevsorted) && (r->tsorted || r->trevsorted)) {
                /* both sorted, don't swap */
-               return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
+               return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
        } else if (l->T->hash && r->T->hash) {
                /* both have hash, smallest on right */
                if (lcount < rcount)
@@ -1795,18 +1795,18 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l,
                swap = 0;
        } else if (l->tsorted || l->trevsorted) {
                /* left is sorted, swap */
-               return mergejoin(r2, r1, r, l, sr, sl, 0, 0, 0, 0);
+               return mergejoin(r2, r1, r, l, sr, sl, nil_matches, 0, 0, 0);
        } else if (r->tsorted || r->trevsorted) {
                /* right is sorted, don't swap */
-               return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
+               return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
        } else if (BATcount(l) < BATcount(r)) {
                /* no hashes, not sorted, create hash on smallest BAT */
                swap = 1;
        }
        if (swap) {
-               return hashjoin(r2, r1, r, l, sr, sl, 0, 0, 0, 0);
+               return hashjoin(r2, r1, r, l, sr, sl, nil_matches, 0, 0, 0);
        } else {
-               return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
+               return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
        }
 }
 
@@ -1985,7 +1985,7 @@ BATsemijoin(BAT *l, BAT *r)
                /* r is [dense2,any_1] */
                BBPfix(r->batCacheid);
        }
-       if (BATsubsemijoin(&res1, &res2, l, r, NULL, NULL, BATcount(l)) == 
GDK_FAIL) {
+       if (BATsubsemijoin(&res1, &res2, l, r, NULL, NULL, 0, BATcount(l)) == 
GDK_FAIL) {
                if (lmap)
                        BBPunfix(lmap->batCacheid);
                BBPunfix(l->batCacheid);
@@ -2022,8 +2022,8 @@ BATsemijoin(BAT *l, BAT *r)
 
 static BAT *
 do_batjoin(BAT *l, BAT *r, const char *op, BUN estimate,
-          gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, 
BUN),
-          gdk_return (*joinfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, 
const char *, BUN))
+          gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, 
int, BUN),
+          gdk_return (*joinfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, 
const char *, int, BUN))
 {
        BAT *lmap, *rmap;
        BAT *res1, *res2;
@@ -2056,7 +2056,7 @@ do_batjoin(BAT *l, BAT *r, const char *o
                rmap = NULL;
                BBPfix(r->batCacheid);
        }
-       if ((joinfunc ? (*joinfunc)(&res1, &res2, l, r, NULL, NULL, estimate) : 
(*joinfunc2)(&res1, &res2, l, r, NULL, NULL, op, estimate)) == GDK_FAIL) {
+       if ((joinfunc ? (*joinfunc)(&res1, &res2, l, r, NULL, NULL, 0, 
estimate) : (*joinfunc2)(&res1, &res2, l, r, NULL, NULL, op, 0, estimate)) == 
GDK_FAIL) {
                BBPunfix(l->batCacheid);
                BBPunfix(r->batCacheid);
                if (lmap)
diff --git a/gdk/gdk_storage.c b/gdk/gdk_storage.c
--- a/gdk/gdk_storage.c
+++ b/gdk/gdk_storage.c
@@ -947,7 +947,7 @@ BATmultiprintf(stream *s, int argc, BAT 
 
                if ((r = BATmirror(BATmark(argv[i], 0))) == NULL)
                        goto bailout;
-               ret = BATsubleftjoin(&a, &b, bats[0], r, NULL, NULL, BUN_NONE);
+               ret = BATsubleftjoin(&a, &b, bats[0], r, NULL, NULL, 0, 
BUN_NONE);
                BBPunfix(r->batCacheid);
                if (ret == GDK_FAIL)
                        goto bailout;
diff --git a/monetdb5/modules/kernel/algebra.c 
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -1051,9 +1051,9 @@ ALGrangejoin(int *result, int *lid, int 
 
 static str
 do_join(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
-               const char *op, lng *estimate,
-               gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT 
*, BUN),
-               gdk_return (*thetafunc)(BAT **, BAT **, BAT *, BAT *, BAT *, 
BAT *, const char *, BUN),
+               const char *op, bit *nil_matches, lng *estimate,
+               gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT 
*, int, BUN),
+               gdk_return (*thetafunc)(BAT **, BAT **, BAT *, BAT *, BAT *, 
BAT *, const char *, int, BUN),
                const char *funcname)
 {
        BAT *left = NULL, *right = NULL, *candleft = NULL, *candright = NULL;
@@ -1076,11 +1076,11 @@ do_join(bat *r1, bat *r2, bat *lid, bat 
        if (thetafunc) {
                assert(op != NULL);
                assert(joinfunc == NULL);
-               if ((*thetafunc)(&result1, &result2, left, right, candleft, 
candright, op, est) == GDK_FAIL)
+               if ((*thetafunc)(&result1, &result2, left, right, candleft, 
candright, op, *nil_matches, est) == GDK_FAIL)
                        goto fail;
        } else {
                assert(op == NULL);
-               if ((*joinfunc)(&result1, &result2, left, right, candleft, 
candright, est) == GDK_FAIL)
+               if ((*joinfunc)(&result1, &result2, left, right, candleft, 
candright, *nil_matches, est) == GDK_FAIL)
                        goto fail;
        }
        *r1 = result1->batCacheid;
@@ -1108,30 +1108,30 @@ do_join(bat *r1, bat *r2, bat *lid, bat 
 }
 
 str
-ALGsubjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, lng 
*estimate)
+ALGsubjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, bit 
*nil_matches, lng *estimate)
 {
-       return do_join(r1, r2, lid, rid, slid, srid, NULL, estimate,
+       return do_join(r1, r2, lid, rid, slid, srid, NULL, nil_matches, 
estimate,
                                   BATsubjoin, NULL, "algebra.subjoin");
 }
 
 str
-ALGsubleftjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, lng 
*estimate)
+ALGsubleftjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, bit 
*nil_matches, lng *estimate)
 {
-       return do_join(r1, r2, lid, rid, slid, srid, NULL, estimate,
+       return do_join(r1, r2, lid, rid, slid, srid, NULL, nil_matches, 
estimate,
                                   BATsubleftjoin, NULL, "algebra.subleftjoin");
 }
 
 str
-ALGsubouterjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, 
lng *estimate)
+ALGsubouterjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, 
bit *nil_matches, lng *estimate)
 {
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to