Changeset: f3cf07aa9cdd for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f3cf07aa9cdd
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_join.c
gdk/gdk_storage.c
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/algebra.h
monetdb5/modules/kernel/algebra.mal
Branch: default
Log Message:
The sub*join family of functions (both C and MAL) have an extra parameter.
The extra parameter is nil_matches (for MAL: :bit, for C: int) that
indicates whether nils match each other or not.
diffs (truncated from 386 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -196,14 +196,14 @@ BAT *BATsort_rev(BAT *b);
BAT *BATssort(BAT *b);
BAT *BATssort_rev(BAT *b);
gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr);
-gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
BUN estimate);
-gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl,
BAT *sr, BUN estimate);
-gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, BUN estimate);
-gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, BUN estimate);
+gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
int nil_matches, BUN estimate);
+gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl,
BAT *sr, int nil_matches, BUN estimate);
+gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, int nil_matches, BUN estimate);
+gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, int nil_matches, BUN estimate);
BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th, int li, int
hi, int anti);
-gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, BUN estimate);
+gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, int nil_matches, BUN estimate);
gdk_return BATsubsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o,
BAT *g, int reverse, int stable);
-gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, const char *op, BUN estimate);
+gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, const char *op, int nil_matches, BUN estimate);
gdk_return BATsum(void *res, int tp, BAT *b, BAT *s, int skip_nils, int
abort_on_error, int nil_if_empty);
BAT *BATsunion(BAT *b, BAT *c);
BAT *BATsunique(BAT *b);
@@ -813,9 +813,9 @@ str ALGssort(int *result, int *bid);
str ALGssort_rev(int *result, int *bid);
str ALGstdev(dbl *res, int *bid);
str ALGstdevp(dbl *res, int *bid);
-str ALGsubjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, lng
*estimate);
-str ALGsubleftjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, lng
*estimate);
-str ALGsubouterjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, lng
*estimate);
+str ALGsubjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit
*nil_matches, lng *estimate);
+str ALGsubleftjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit
*nil_matches, lng *estimate);
+str ALGsubouterjoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, bit
*nil_matches, lng *estimate);
str ALGsubsample(int *result, int *bid, int *param);
str ALGsubselect1(bat *result, bat *bid, const void *low, const void *high,
const bit *li, const bit *hi, const bit *anti);
str ALGsubselect2(bat *result, bat *bid, bat *sid, const void *low, const void
*high, const bit *li, const bit *hi, const bit *anti);
@@ -829,7 +829,7 @@ str ALGsubsort23(bat *result, bat *norde
str ALGsubsort31(bat *result, bat *bid, bat *order, bat *group, bit *reverse,
bit *stable);
str ALGsubsort32(bat *result, bat *norder, bat *bid, bat *order, bat *group,
bit *reverse, bit *stable);
str ALGsubsort33(bat *result, bat *norder, bat *ngroup, bat *bid, bat *order,
bat *group, bit *reverse, bit *stable);
-str ALGsubthetajoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, str
*op, lng *estimate);
+str ALGsubthetajoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, str
*op, bit *nil_matches, lng *estimate);
str ALGsunion(int *result, int *lid, int *rid);
str ALGsunique(int *result, int *bid);
str ALGtdiff(int *result, int *lid, int *rid);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3156,12 +3156,12 @@ gdk_export gdk_return BATcross1(BAT **r1
gdk_export gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr);
gdk_export BAT *BATcross(BAT *l, BAT *r);
-gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, BUN estimate);
-gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, BUN estimate);
-gdk_export gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, const char *op, BUN estimate);
-gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, BUN estimate);
-gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, BUN estimate);
-gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT
*r, BAT *sl, BAT *sr, BUN estimate);
+gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, const char *op, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export BAT *BATproject(BAT *l, BAT *r);
gdk_export BAT *BATslice(BAT *b, BUN low, BUN high);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -1607,7 +1607,7 @@ thetajoin(BAT *r1, BAT *r2, BAT *l, BAT
* matching tuples. The result is in the same order as l (i.e. r1 is
* sorted). */
gdk_return
-BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN
estimate)
+BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int
nil_matches, BUN estimate)
{
BAT *r1, *r2;
@@ -1620,8 +1620,8 @@ BATsubleftjoin(BAT **r1p, BAT **r2p, BAT
*r1p = r1;
*r2p = r2;
if (r->tsorted || r->trevsorted)
- return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
- return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
+ return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
+ return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
}
/* Perform an equi-join over l and r. Returns two new, aligned,
@@ -1629,7 +1629,7 @@ BATsubleftjoin(BAT **r1p, BAT **r2p, BAT
* matching tuples. The result is in the same order as l (i.e. r1 is
* sorted). All values in l must match at least one value in r. */
gdk_return
-BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
BUN estimate)
+BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
int nil_matches, BUN estimate)
{
BAT *r1, *r2;
@@ -1642,8 +1642,8 @@ BATsubleftfetchjoin(BAT **r1p, BAT **r2p
*r1p = r1;
*r2p = r2;
if (r->tsorted || r->trevsorted)
- return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 0, 1);
- return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 0, 1);
+ return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 1);
+ return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 1);
}
/* Performs a left outer join over l and r. Returns two new, aligned,
@@ -1652,7 +1652,7 @@ BATsubleftfetchjoin(BAT **r1p, BAT **r2p
* second output bat if the value in l does not occur in r. The
* result is in the same order as l (i.e. r1 is sorted). */
gdk_return
-BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN
estimate)
+BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int
nil_matches, BUN estimate)
{
BAT *r1, *r2;
@@ -1665,8 +1665,8 @@ BATsubouterjoin(BAT **r1p, BAT **r2p, BA
*r1p = r1;
*r2p = r2;
if (r->tsorted || r->trevsorted)
- return mergejoin(r1, r2, l, r, sl, sr, 0, 1, 0, 0);
- return hashjoin(r1, r2, l, r, sl, sr, 0, 1, 0, 0);
+ return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 1, 0, 0);
+ return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 1, 0, 0);
}
/* Perform a semi-join over l and r. Returns two new, aligned,
@@ -1674,7 +1674,7 @@ BATsubouterjoin(BAT **r1p, BAT **r2p, BA
* matching tuples. The result is in the same order as l (i.e. r1 is
* sorted). */
gdk_return
-BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN
estimate)
+BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int
nil_matches, BUN estimate)
{
BAT *r1, *r2;
@@ -1687,18 +1687,18 @@ BATsubsemijoin(BAT **r1p, BAT **r2p, BAT
*r1p = r1;
*r2p = r2;
if (r->tsorted || r->trevsorted)
- return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 1, 0);
- return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 1, 0);
+ return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 1, 0);
+ return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 0, 1, 0);
}
gdk_return
-BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const
char *op, BUN estimate)
+BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const
char *op, int nil_matches, BUN estimate)
{
BAT *r1, *r2;
int opcode = 0;
if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0))
- return BATsubjoin(r1p, r2p, l, r, sl, sr, estimate);
+ return BATsubjoin(r1p, r2p, l, r, sl, sr, nil_matches,
estimate);
/* encode operator as a bit mask into opcode */
if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
@@ -1748,7 +1748,7 @@ BATsubthetajoin(BAT **r1p, BAT **r2p, BA
}
gdk_return
-BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN
estimate)
+BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int
nil_matches, BUN estimate)
{
BAT *r1, *r2;
BUN lcount, rcount;
@@ -1782,7 +1782,7 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l,
swap = 0;
if ((l->tsorted || l->trevsorted) && (r->tsorted || r->trevsorted)) {
/* both sorted, don't swap */
- return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
+ return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
} else if (l->T->hash && r->T->hash) {
/* both have hash, smallest on right */
if (lcount < rcount)
@@ -1795,18 +1795,18 @@ BATsubjoin(BAT **r1p, BAT **r2p, BAT *l,
swap = 0;
} else if (l->tsorted || l->trevsorted) {
/* left is sorted, swap */
- return mergejoin(r2, r1, r, l, sr, sl, 0, 0, 0, 0);
+ return mergejoin(r2, r1, r, l, sr, sl, nil_matches, 0, 0, 0);
} else if (r->tsorted || r->trevsorted) {
/* right is sorted, don't swap */
- return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
+ return mergejoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
} else if (BATcount(l) < BATcount(r)) {
/* no hashes, not sorted, create hash on smallest BAT */
swap = 1;
}
if (swap) {
- return hashjoin(r2, r1, r, l, sr, sl, 0, 0, 0, 0);
+ return hashjoin(r2, r1, r, l, sr, sl, nil_matches, 0, 0, 0);
} else {
- return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 0, 0);
+ return hashjoin(r1, r2, l, r, sl, sr, nil_matches, 0, 0, 0);
}
}
@@ -1985,7 +1985,7 @@ BATsemijoin(BAT *l, BAT *r)
/* r is [dense2,any_1] */
BBPfix(r->batCacheid);
}
- if (BATsubsemijoin(&res1, &res2, l, r, NULL, NULL, BATcount(l)) ==
GDK_FAIL) {
+ if (BATsubsemijoin(&res1, &res2, l, r, NULL, NULL, 0, BATcount(l)) ==
GDK_FAIL) {
if (lmap)
BBPunfix(lmap->batCacheid);
BBPunfix(l->batCacheid);
@@ -2022,8 +2022,8 @@ BATsemijoin(BAT *l, BAT *r)
static BAT *
do_batjoin(BAT *l, BAT *r, const char *op, BUN estimate,
- gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *,
BUN),
- gdk_return (*joinfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *,
const char *, BUN))
+ gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *,
int, BUN),
+ gdk_return (*joinfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *,
const char *, int, BUN))
{
BAT *lmap, *rmap;
BAT *res1, *res2;
@@ -2056,7 +2056,7 @@ do_batjoin(BAT *l, BAT *r, const char *o
rmap = NULL;
BBPfix(r->batCacheid);
}
- if ((joinfunc ? (*joinfunc)(&res1, &res2, l, r, NULL, NULL, estimate) :
(*joinfunc2)(&res1, &res2, l, r, NULL, NULL, op, estimate)) == GDK_FAIL) {
+ if ((joinfunc ? (*joinfunc)(&res1, &res2, l, r, NULL, NULL, 0,
estimate) : (*joinfunc2)(&res1, &res2, l, r, NULL, NULL, op, 0, estimate)) ==
GDK_FAIL) {
BBPunfix(l->batCacheid);
BBPunfix(r->batCacheid);
if (lmap)
diff --git a/gdk/gdk_storage.c b/gdk/gdk_storage.c
--- a/gdk/gdk_storage.c
+++ b/gdk/gdk_storage.c
@@ -947,7 +947,7 @@ BATmultiprintf(stream *s, int argc, BAT
if ((r = BATmirror(BATmark(argv[i], 0))) == NULL)
goto bailout;
- ret = BATsubleftjoin(&a, &b, bats[0], r, NULL, NULL, BUN_NONE);
+ ret = BATsubleftjoin(&a, &b, bats[0], r, NULL, NULL, 0,
BUN_NONE);
BBPunfix(r->batCacheid);
if (ret == GDK_FAIL)
goto bailout;
diff --git a/monetdb5/modules/kernel/algebra.c
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -1051,9 +1051,9 @@ ALGrangejoin(int *result, int *lid, int
static str
do_join(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
- const char *op, lng *estimate,
- gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT
*, BUN),
- gdk_return (*thetafunc)(BAT **, BAT **, BAT *, BAT *, BAT *,
BAT *, const char *, BUN),
+ const char *op, bit *nil_matches, lng *estimate,
+ gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT
*, int, BUN),
+ gdk_return (*thetafunc)(BAT **, BAT **, BAT *, BAT *, BAT *,
BAT *, const char *, int, BUN),
const char *funcname)
{
BAT *left = NULL, *right = NULL, *candleft = NULL, *candright = NULL;
@@ -1076,11 +1076,11 @@ do_join(bat *r1, bat *r2, bat *lid, bat
if (thetafunc) {
assert(op != NULL);
assert(joinfunc == NULL);
- if ((*thetafunc)(&result1, &result2, left, right, candleft,
candright, op, est) == GDK_FAIL)
+ if ((*thetafunc)(&result1, &result2, left, right, candleft,
candright, op, *nil_matches, est) == GDK_FAIL)
goto fail;
} else {
assert(op == NULL);
- if ((*joinfunc)(&result1, &result2, left, right, candleft,
candright, est) == GDK_FAIL)
+ if ((*joinfunc)(&result1, &result2, left, right, candleft,
candright, *nil_matches, est) == GDK_FAIL)
goto fail;
}
*r1 = result1->batCacheid;
@@ -1108,30 +1108,30 @@ do_join(bat *r1, bat *r2, bat *lid, bat
}
str
-ALGsubjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, lng
*estimate)
+ALGsubjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, bit
*nil_matches, lng *estimate)
{
- return do_join(r1, r2, lid, rid, slid, srid, NULL, estimate,
+ return do_join(r1, r2, lid, rid, slid, srid, NULL, nil_matches,
estimate,
BATsubjoin, NULL, "algebra.subjoin");
}
str
-ALGsubleftjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, lng
*estimate)
+ALGsubleftjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, bit
*nil_matches, lng *estimate)
{
- return do_join(r1, r2, lid, rid, slid, srid, NULL, estimate,
+ return do_join(r1, r2, lid, rid, slid, srid, NULL, nil_matches,
estimate,
BATsubleftjoin, NULL, "algebra.subleftjoin");
}
str
-ALGsubouterjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
lng *estimate)
+ALGsubouterjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
bit *nil_matches, lng *estimate)
{
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list