Changeset: a3e343de8dc2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=a3e343de8dc2
Modified Files:
clients/Tests/MAL-signatures.stable.out
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_join.c
gdk/gdk_rangejoin.mx
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/algebra.h
monetdb5/modules/kernel/algebra.mal
Branch: default
Log Message:
Change subthetajoin interface: operator is integer instead of string.
Also added an opcode JOIN_NE so that antijoin can be done by using
thetajoin.
diffs (truncated from 357 to 300 lines):
diff --git a/clients/Tests/MAL-signatures.stable.out
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -2582,7 +2582,7 @@ command algebra.split(b:bat[:lng,:oid]):
address ALGsplit;
comment Split head into two values
-command
algebra.subthetajoin(l:bat[:oid,:any_1],r:bat[:oid,:any_1],sl:bat[:oid,:oid],sr:bat[:oid,:oid],op:str,nil_matches:bit,estimate:lng)
(X_8:bat[:oid,:oid],X_9:bat[:oid,:oid])
+command
algebra.subthetajoin(l:bat[:oid,:any_1],r:bat[:oid,:any_1],sl:bat[:oid,:oid],sr:bat[:oid,:oid],op:int,nil_matches:bit,estimate:lng)
(X_8:bat[:oid,:oid],X_9:bat[:oid,:oid])
address ALGsubthetajoin;
comment Theta join with candidate lists
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -203,7 +203,7 @@ gdk_return BATsubouterjoin(BAT **r1p, BA
BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th, int li, int
hi, int anti);
gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, int nil_matches, BUN estimate);
gdk_return BATsubsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o,
BAT *g, int reverse, int stable);
-gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, const char *op, int nil_matches, BUN estimate);
+gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, int op, int nil_matches, BUN estimate);
gdk_return BATsum(void *res, int tp, BAT *b, BAT *s, int skip_nils, int
abort_on_error, int nil_if_empty);
BAT *BATsunion(BAT *b, BAT *c);
BAT *BATsunique(BAT *b);
@@ -828,7 +828,7 @@ str ALGsubsort23(bat *result, bat *norde
str ALGsubsort31(bat *result, bat *bid, bat *order, bat *group, bit *reverse,
bit *stable);
str ALGsubsort32(bat *result, bat *norder, bat *bid, bat *order, bat *group,
bit *reverse, bit *stable);
str ALGsubsort33(bat *result, bat *norder, bat *ngroup, bat *bid, bat *order,
bat *group, bit *reverse, bit *stable);
-str ALGsubthetajoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, str
*op, bit *nil_matches, lng *estimate);
+str ALGsubthetajoin(bat *r1, bat *r2, bat *l, bat *r, bat *sl, bat *sr, int
*op, bit *nil_matches, lng *estimate);
str ALGsunion(int *result, int *lid, int *rid);
str ALGsunique(int *result, int *bid);
str ALGtdiff(int *result, int *lid, int *rid);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3184,6 +3184,7 @@ gdk_export int BATtopN(BAT *b, BUN topN)
#define JOIN_GT 1
#define JOIN_GE 2
#define JOIN_BAND 3
+#define JOIN_NE (-3)
gdk_export BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th,
int li, int hi, int anti);
gdk_export BAT *BATthetasubselect(BAT *b, BAT *s, const void *val, const char
*op);
@@ -3208,7 +3209,7 @@ gdk_export BAT *BATcross(BAT *l, BAT *r)
gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
-gdk_export gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, const char *op, int nil_matches, BUN estimate);
+gdk_export gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, int op, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int nil_matches, BUN estimate);
gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -1823,43 +1823,32 @@ BATsubsemijoin(BAT **r1p, BAT **r2p, BAT
}
gdk_return
-BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const
char *op, int nil_matches, BUN estimate)
+BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int
op, int nil_matches, BUN estimate)
{
BAT *r1, *r2;
int opcode = 0;
- if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0))
+ /* encode operator as a bit mask into opcode */
+ switch (op) {
+ case JOIN_EQ:
return BATsubjoin(r1p, r2p, l, r, sl, sr, nil_matches,
estimate);
-
- /* encode operator as a bit mask into opcode */
- if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
- /* "=" or "==" */
- opcode |= MASK_EQ;
- } else if (op[0] == '!' && op[1] == '=' && op[2] == 0) {
- /* "!=" (equivalent to "<>") */
- opcode |= MASK_NE;
- } else if (op[0] == '<') {
- if (op[1] == 0) {
- /* "<" */
- opcode |= MASK_LT;
- } else if (op[1] == '=' && op[2] == 0) {
- /* "<=" */
- opcode |= MASK_LE;
- } else if (op[1] == '>' && op[2] == 0) {
- /* "<>" (equivalent to "!=") */
- opcode |= MASK_NE;
- }
- } else if (op[0] == '>') {
- if (op[1] == 0) {
- /* ">" */
- opcode |= MASK_GT;
- } else if (op[1] == '=' && op[2] == 0) {
- /* ">=" */
- opcode |= MASK_GE;
- }
- }
- if (opcode == 0) {
- GDKerror("BATsubthetajoin: unknown operator \"%s\".\n", op);
+ case JOIN_NE:
+ opcode = MASK_NE;
+ break;
+ case JOIN_LT:
+ opcode = MASK_LT;
+ break;
+ case JOIN_LE:
+ opcode = MASK_LE;
+ break;
+ case JOIN_GT:
+ opcode = MASK_GT;
+ break;
+ case JOIN_GE:
+ opcode = MASK_GE;
+ break;
+ default:
+ GDKerror("BATsubthetajoin: unknown operator %d.\n", op);
return GDK_FAIL;
}
@@ -2394,9 +2383,9 @@ BATsemijoin(BAT *l, BAT *r)
}
static BAT *
-do_batjoin(BAT *l, BAT *r, const char *op, BUN estimate,
+do_batjoin(BAT *l, BAT *r, int op, BUN estimate,
gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *,
int, BUN),
- gdk_return (*joinfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *,
const char *, int, BUN))
+ gdk_return (*joinfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *,
int, int, BUN))
{
BAT *lmap, *rmap;
BAT *res1, *res2;
@@ -2405,8 +2394,6 @@ do_batjoin(BAT *l, BAT *r, const char *o
/* exactly one of joinfunc and joinfunc2 is not NULL */
assert(joinfunc == NULL || joinfunc2 == NULL);
assert(joinfunc != NULL || joinfunc2 != NULL);
- /* op is only set if joinfunc2 is */
- assert((joinfunc2 == NULL) == (op == NULL));
r = BATmirror(r);
/* r is [any_3,any_2] */
@@ -2465,7 +2452,7 @@ do_batjoin(BAT *l, BAT *r, const char *o
BAT *
BATjoin(BAT *l, BAT *r, BUN estimate)
{
- return do_batjoin(l, r, NULL, estimate, BATsubjoin, NULL);
+ return do_batjoin(l, r, 0, estimate, BATsubjoin, NULL);
}
/* join [any_1,any_2] with [any_2,any_3], return [any_1,any_3];
@@ -2473,35 +2460,16 @@ BATjoin(BAT *l, BAT *r, BUN estimate)
BAT *
BATleftjoin(BAT *l, BAT *r, BUN estimate)
{
- return do_batjoin(l, r, NULL, estimate, BATsubleftjoin, NULL);
+ return do_batjoin(l, r, 0, estimate, BATsubleftjoin, NULL);
}
/* join [any_1,any_2] with [any_2,any_3], return [any_1,any_3] */
BAT *
BATthetajoin(BAT *l, BAT *r, int op, BUN estimate)
{
- const char *ops;
-
- switch (op) {
- case JOIN_EQ:
- return do_batjoin(l, r, NULL, estimate, BATsubjoin, NULL);
- case JOIN_LT:
- ops = "<";
- break;
- case JOIN_LE:
- ops = "<=";
- break;
- case JOIN_GT:
- ops = ">";
- break;
- case JOIN_GE:
- ops = ">=";
- break;
- default:
- assert(0);
- return NULL;
- }
- return do_batjoin(l, r, ops, estimate, NULL, BATsubthetajoin);
+ if (op == JOIN_EQ)
+ return do_batjoin(l, r, 0, estimate, BATsubjoin, NULL);
+ return do_batjoin(l, r, op, estimate, NULL, BATsubthetajoin);
}
/* join [any_1,any_2] with [any_2,any_3], return [any_1,any_3];
@@ -2509,7 +2477,7 @@ BATthetajoin(BAT *l, BAT *r, int op, BUN
BAT *
BATouterjoin(BAT *l, BAT *r, BUN estimate)
{
- return do_batjoin(l, r, NULL, estimate, BATsubouterjoin, NULL);
+ return do_batjoin(l, r, 0, estimate, BATsubouterjoin, NULL);
}
/* join [any_1,any_2] with [any_2,any_3], return [any_1,any_3];
@@ -2517,13 +2485,13 @@ BATouterjoin(BAT *l, BAT *r, BUN estimat
BAT *
BATleftfetchjoin(BAT *l, BAT *r, BUN estimate)
{
- return do_batjoin(l, r, NULL, estimate, BATsubleftfetchjoin, NULL);
+ return do_batjoin(l, r, 0, estimate, BATsubleftfetchjoin, NULL);
}
BAT *
BATantijoin(BAT *l, BAT *r)
{
- return do_batjoin(l, r, "!=",
+ return do_batjoin(l, r, JOIN_NE,
(BUN) MIN((lng) BATcount(l) * BATcount(r), BUN_MAX),
NULL, BATsubthetajoin);
}
diff --git a/gdk/gdk_rangejoin.mx b/gdk/gdk_rangejoin.mx
--- a/gdk/gdk_rangejoin.mx
+++ b/gdk/gdk_rangejoin.mx
@@ -24,7 +24,7 @@ All Rights Reserved.
* @a N. J. Nes
*
* @* Range Join Operators
- * The sql statement b.x <= a.z <= b.y, could be implemented using too
thetajoins.
+ * The sql statement b.x <= a.z <= b.y, could be implemented using two
thetajoins.
* But that results in very large intermediates.
*/
@h
diff --git a/monetdb5/modules/kernel/algebra.c
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -1051,9 +1051,9 @@ ALGrangejoin(int *result, int *lid, int
static str
do_join(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
- const char *op, bit *nil_matches, lng *estimate,
+ int op, bit *nil_matches, lng *estimate,
gdk_return (*joinfunc)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT
*, int, BUN),
- gdk_return (*thetafunc)(BAT **, BAT **, BAT *, BAT *, BAT *,
BAT *, const char *, int, BUN),
+ gdk_return (*thetafunc)(BAT **, BAT **, BAT *, BAT *, BAT *,
BAT *, int, int, BUN),
const char *funcname)
{
BAT *left = NULL, *right = NULL, *candleft = NULL, *candright = NULL;
@@ -1074,12 +1074,10 @@ do_join(bat *r1, bat *r2, bat *lid, bat
est = (BUN) *estimate;
if (thetafunc) {
- assert(op != NULL);
assert(joinfunc == NULL);
if ((*thetafunc)(&result1, &result2, left, right, candleft,
candright, op, *nil_matches, est) == GDK_FAIL)
goto fail;
} else {
- assert(op == NULL);
if ((*joinfunc)(&result1, &result2, left, right, candleft,
candright, *nil_matches, est) == GDK_FAIL)
goto fail;
}
@@ -1110,26 +1108,26 @@ do_join(bat *r1, bat *r2, bat *lid, bat
str
ALGsubjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, bit
*nil_matches, lng *estimate)
{
- return do_join(r1, r2, lid, rid, slid, srid, NULL, nil_matches,
estimate,
+ return do_join(r1, r2, lid, rid, slid, srid, 0, nil_matches, estimate,
BATsubjoin, NULL, "algebra.subjoin");
}
str
ALGsubleftjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid, bit
*nil_matches, lng *estimate)
{
- return do_join(r1, r2, lid, rid, slid, srid, NULL, nil_matches,
estimate,
+ return do_join(r1, r2, lid, rid, slid, srid, 0, nil_matches, estimate,
BATsubleftjoin, NULL, "algebra.subleftjoin");
}
str
ALGsubouterjoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
bit *nil_matches, lng *estimate)
{
- return do_join(r1, r2, lid, rid, slid, srid, NULL, nil_matches,
estimate,
+ return do_join(r1, r2, lid, rid, slid, srid, 0, nil_matches, estimate,
BATsubouterjoin, NULL,
"algebra.subouterjoin");
}
str
-ALGsubthetajoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
str *op, bit *nil_matches, lng *estimate)
+ALGsubthetajoin(bat *r1, bat *r2, bat *lid, bat *rid, bat *slid, bat *srid,
int *op, bit *nil_matches, lng *estimate)
{
return do_join(r1, r2, lid, rid, slid, srid, *op, nil_matches, estimate,
NULL, BATsubthetajoin,
"algebra.subthetajoin");
@@ -1290,7 +1288,7 @@ ALGantijoin2( bat *l, bat *r, bat *left,
throw(MAL, "algebra.antijoin", RUNTIME_OBJECT_MISSING);
}
- ret = BATsubthetajoin(&j1, &j2, L, R, NULL, NULL, "!=", 0, BUN_NONE);
+ ret = BATsubthetajoin(&j1, &j2, L, R, NULL, NULL, JOIN_NE, 0, BUN_NONE);
BBPunfix(L->batCacheid);
BBPunfix(R->batCacheid);
if (ret == GDK_FAIL)
@@ -1359,27 +1357,6 @@ ALGthetajoin2( bat *l, bat *r, bat *left
{
BAT *L, *R, *j1, *j2;
gdk_return ret;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list