Changeset: 9e6df0555030 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9e6df0555030
Modified Files:
clients/Tests/MAL-signatures.stable.out
clients/Tests/MAL-signatures.stable.out.int128
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_batop.c
gdk/gdk_join.c
gdk/gdk_logger.c
monetdb5/mal/mal_authorize.c
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/algebra.h
monetdb5/modules/kernel/algebra.mal
sql/backends/monet5/sql.c
sql/backends/monet5/sql_statement.c
sql/storage/bat/bat_table.c
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-1join-query.stable.out
sql/test/FeatureRequests/Tests/foreign_key_outer_join_dead_code_elimination-explain-2join-query.stable.out
Branch: subquery
Log Message:
Implemented an extra :bit argument to algebra.difference for NOT IN queries.
diffs (truncated from 713 to 300 lines):
diff --git a/clients/Tests/MAL-signatures.stable.out
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -524,7 +524,7 @@ Ready.
[ "algebra", "bandjoin", "command algebra.bandjoin(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], c1:any_1, c2:any_1, li:bit, hi:bit,
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGbandjoin;", "Band join:
values in l and r match if r - c1 <[=] l <[=] r + c2" ]
[ "algebra", "copy", "command algebra.copy(b:bat[:any_1]):bat[:any_1] ",
"ALGcopy;", "Returns physical copy of a BAT." ]
[ "algebra", "crossproduct", "command algebra.crossproduct(left:bat[:any_1],
right:bat[:any_2]) (l:bat[:oid], r:bat[:oid]) ", "ALGcrossproduct2;",
"Returns 2 columns with all BUNs, consisting of the head-oids\n\t from 'left'
and 'right' for which there are BUNs in 'left'\n\t and 'right' with equal
tails" ]
-[ "algebra", "difference", "command algebra.difference(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit,
estimate:lng):bat[:oid] ", "ALGdifference;", "Difference of l and r
with candidate lists" ]
+[ "algebra", "difference", "command algebra.difference(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, nil_clears:bit,
estimate:lng):bat[:oid] ", "ALGdifference;", "Difference of l and r
with candidate lists" ]
[ "algebra", "exist", "command algebra.exist(b:bat[:any_1],
val:any_1):bit ", "ALGexist;", "Returns whether 'val' occurs in b." ]
[ "algebra", "fetch", "command algebra.fetch(b:bat[:any_1],
x:oid):any_1 ", "ALGfetchoid;", "Returns the value of the BUN at x-th
position with 0 <= x < b.count" ]
[ "algebra", "find", "command algebra.find(b:bat[:any_1], t:any_1):oid ",
"ALGfind;", "Returns the index position of a value. If no such BUN exists
return OID-nil." ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -628,7 +628,7 @@ Ready.
[ "algebra", "bandjoin", "command algebra.bandjoin(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], c1:any_1, c2:any_1, li:bit, hi:bit,
estimate:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGbandjoin;", "Band join:
values in l and r match if r - c1 <[=] l <[=] r + c2" ]
[ "algebra", "copy", "command algebra.copy(b:bat[:any_1]):bat[:any_1] ",
"ALGcopy;", "Returns physical copy of a BAT." ]
[ "algebra", "crossproduct", "command algebra.crossproduct(left:bat[:any_1],
right:bat[:any_2]) (l:bat[:oid], r:bat[:oid]) ", "ALGcrossproduct2;",
"Returns 2 columns with all BUNs, consisting of the head-oids\n\t from 'left'
and 'right' for which there are BUNs in 'left'\n\t and 'right' with equal
tails" ]
-[ "algebra", "difference", "command algebra.difference(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit,
estimate:lng):bat[:oid] ", "ALGdifference;", "Difference of l and r
with candidate lists" ]
+[ "algebra", "difference", "command algebra.difference(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, nil_clears:bit,
estimate:lng):bat[:oid] ", "ALGdifference;", "Difference of l and r
with candidate lists" ]
[ "algebra", "exist", "command algebra.exist(b:bat[:any_1],
val:any_1):bit ", "ALGexist;", "Returns whether 'val' occurs in b." ]
[ "algebra", "fetch", "command algebra.fetch(b:bat[:any_1],
x:oid):any_1 ", "ALGfetchoid;", "Returns the value of the BUN at x-th
position with 0 <= x < b.count" ]
[ "algebra", "find", "command algebra.find(b:bat[:any_1], t:any_1):oid ",
"ALGfind;", "Returns the index position of a value. If no such BUN exists
return OID-nil." ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -113,7 +113,7 @@ BAT *BATconvert(BAT *b, BAT *s, int tp,
BUN BATcount_no_nil(BAT *b);
gdk_return BATdel(BAT *b, BAT *d) __attribute__((__warn_unused_result__));
BAT *BATdense(oid hseq, oid tseq, BUN cnt)
__attribute__((__warn_unused_result__));
-BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate);
+BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool
nil_clears, BUN estimate);
gdk_return BATextend(BAT *b, BUN newcap)
__attribute__((__warn_unused_result__));
void BATfakeCommit(BAT *b);
gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps,
BUN n, bool asc, bool nilslast, bool distinct)
__attribute__((__warn_unused_result__));
@@ -703,7 +703,7 @@ str ALGcount_bat(lng *result, const bat
str ALGcount_nil(lng *result, const bat *bid, const bit *ignore_nils);
str ALGcount_no_nil(lng *result, const bat *bid);
str ALGcrossproduct2(bat *l, bat *r, const bat *lid, const bat *rid);
-str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid,
const bat *srid, const bit *nil_matches, const lng *estimate);
+str ALGdifference(bat *r1, const bat *lid, const bat *rid, const bat *slid,
const bat *srid, const bit *nil_matches, const bit *nil_clears, const lng
*estimate);
str ALGexist(bit *ret, const bat *bid, const void *val);
str ALGfetchoid(ptr ret, const bat *bid, const oid *pos);
str ALGfind(oid *ret, const bat *bid, ptr val);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2719,7 +2719,7 @@ gdk_export gdk_return BATthetajoin(BAT *
gdk_export gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, bool nil_matches, BUN estimate)
__attribute__((__warn_unused_result__));
gdk_export BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool
nil_matches, BUN estimate);
-gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches,
BUN estimate);
+gdk_export BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches,
bool not_in, BUN estimate);
gdk_export gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl,
BAT *sr, bool nil_matches, BUN estimate)
__attribute__((__warn_unused_result__));
gdk_export gdk_return BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, const void *c1, const void *c2, bool li, bool hi, BUN estimate)
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -558,7 +558,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
* this */
BAT *d;
- d = BATdiff(n, b, s, NULL, true, BUN_NONE);
+ d = BATdiff(n, b, s, NULL, true, false, BUN_NONE);
if (d == NULL)
return GDK_FAIL;
s = BATunique(n, d);
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -868,8 +868,8 @@ mergejoin_int(BAT **r1p, BAT **r2p, BAT
BUN i;
ALGODEBUG fprintf(stderr, "#mergejoin_int(l=" ALGOBATFMT ","
- "r=" ALGOBATFMT ")%s\n",
- ALGOBATPAR(l), ALGOBATPAR(r),
+ "r=" ALGOBATFMT ",nil_matches=%d)%s\n",
+ ALGOBATPAR(l), ALGOBATPAR(r), nil_matches,
swapped ? " swapped" : "");
assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
@@ -1145,8 +1145,8 @@ mergejoin_lng(BAT **r1p, BAT **r2p, BAT
BUN i;
ALGODEBUG fprintf(stderr, "#mergejoin_lng(l=" ALGOBATFMT ","
- "r=" ALGOBATFMT ")%s\n",
- ALGOBATPAR(l), ALGOBATPAR(r),
+ "r=" ALGOBATFMT ",nil_matches=%d)%s\n",
+ ALGOBATPAR(l), ALGOBATPAR(r), nil_matches,
swapped ? " swapped" : "");
assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
@@ -1428,7 +1428,7 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
BUN rstart, BUN rend, BUN rcnt,
const oid *restrict rcand, const oid *rcandend,
bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
- BUN estimate, lng t0, bool swapped)
+ bool not_in, BUN estimate, lng t0, bool swapped)
{
BUN rstartorig;
const oid *rcandorig;
@@ -1460,7 +1460,8 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
oid lval = oid_nil, rval = oid_nil; /* temporary space to point v to */
if (sl == NULL && sr == NULL && !nil_on_miss &&
- !semi && !only_misses && l->tsorted && r->tsorted && r2p != NULL) {
+ !semi && !only_misses && !not_in &&
+ l->tsorted && r->tsorted && r2p != NULL) {
/* special cases with far fewer options */
switch (ATOMbasetype(l->ttype)) {
case TYPE_int:
@@ -1475,9 +1476,10 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
ALGODEBUG fprintf(stderr, "#mergejoin(l=" ALGOBATFMT ","
"r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
"sr=" ALGOOPTBATFMT ",nil_matches=%d,"
- "nil_on_miss=%d,semi=%d)%s\n",
+ "nil_on_miss=%d,semi=%d,only_misses=%d,"
+ "not_in=%d)%s\n",
ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(sl),
ALGOOPTBATPAR(sr),
- nil_matches, nil_on_miss, semi,
+ nil_matches, nil_on_miss, semi, only_misses, not_in,
swapped ? " swapped" : "");
assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
@@ -1501,6 +1503,12 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
/* basic properties will be adjusted if necessary later on,
* they were initially set by joininitresults() */
+ if (not_in && rstart < rend && !r->tnonil &&
+ ((BATtvoid(r) && r->tseqbase == oid_nil) ||
+ (rvals && cmp(nil, VALUE(r, r->tsorted ? rcand ? rcand[0] : rstart
: rcand ? rcandend[-1] : rend -1)) == 0)))
+ return nomatch(r1p, r2p, l, r, 0, 0, NULL, NULL, false, false,
+ "mergejoin", t0);
+
if (lstart == lend ||
rstart == rend ||
(!nil_matches &&
@@ -2463,6 +2471,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
BUN rstart, BUN rend, BUN rcnt,
const oid *restrict rcand, const oid *rcandend,
bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
+ bool not_in,
BUN estimate, lng t0, bool swapped, bool phash, const char *reason)
{
oid lo, ro;
@@ -2486,10 +2495,11 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
ALGODEBUG fprintf(stderr, "#hashjoin(l=" ALGOBATFMT ","
"r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
"sr=" ALGOOPTBATFMT ",nil_matches=%d,"
- "nil_on_miss=%d,semi=%d,only_misses=%d)%s%s%s\n",
+ "nil_on_miss=%d,semi=%d,only_misses=%d,"
+ "not_in=%d)%s%s%s\n",
ALGOBATPAR(l), ALGOBATPAR(r),
ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
- nil_matches, nil_on_miss, semi, only_misses,
+ nil_matches, nil_on_miss, semi, only_misses, not_in,
swapped ? " swapped" : "",
*reason ? " " : "", reason);
@@ -2519,23 +2529,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
only_misses, estimate);
if (maxsize == BUN_NONE)
return GDK_FAIL;
- BAT *r1 = *r1p;
- BAT *r2 = r2p ? *r2p : NULL;
-
- /* basic properties will be adjusted if necessary later on,
- * they were initially set by joininitresults() */
-
- if (r2) {
- r2->tkey = l->tkey;
- /* r2 is not likely to be sorted (although it is
- * certainly possible) */
- r2->tsorted = false;
- r2->trevsorted = false;
- r2->tseqbase = oid_nil;
- }
-
- if (sl && !BATtdense(sl))
- r1->tseqbase = oid_nil;
rl = 0;
if (phash) {
@@ -2568,16 +2561,47 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
BATgetId(r));
snprintf(ext, sizeof(ext), "thash%x", sr->batCacheid);
if ((hsh = BAThash_impl(r, sr, ext)) == NULL)
- goto bailout;
+ return GDK_FAIL;
}
} else {
if (BAThash(r) != GDK_SUCCEED)
- goto bailout;
+ return GDK_FAIL;
hsh = r->thash;
}
ri = bat_iterator(r);
t = ATOMbasetype(r->ttype);
+ if (not_in && !r->tnonil) {
+ for (rb = HASHget(hsh, HASHprobe(hsh, nil));
+ rb != HASHnil(hsh);
+ rb = HASHgetlink(hsh, rb)) {
+ ro = BUNtoid(sr, rb);
+ if ((*cmp)(nil, BUNtail(ri, ro - r->hseqbase)) == 0) {
+ return nomatch(r1p, r2p, l, r, 0, 0, NULL, NULL,
+ false, false, "hashjoin", t0);
+ }
+ }
+
+ }
+
+ BAT *r1 = *r1p;
+ BAT *r2 = r2p ? *r2p : NULL;
+
+ /* basic properties will be adjusted if necessary later on,
+ * they were initially set by joininitresults() */
+
+ if (r2) {
+ r2->tkey = l->tkey;
+ /* r2 is not likely to be sorted (although it is
+ * certainly possible) */
+ r2->tsorted = false;
+ r2->trevsorted = false;
+ r2->tseqbase = oid_nil;
+ }
+
+ if (sl && !BATtdense(sl))
+ r1->tseqbase = oid_nil;
+
if (lcand) {
while (lcand < lcandend) {
lo = *lcand++;
@@ -3549,7 +3573,7 @@ fetchjoin(BAT **r1p, BAT **r2p, BAT *l,
static gdk_return
leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
- BUN estimate, const char *func, lng t0)
+ bool not_in, BUN estimate, const char *func, lng t0)
{
BUN lstart, lend, lcnt;
const oid *lcand, *lcandend;
@@ -3563,6 +3587,8 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
assert(r2p != NULL || (semi | only_misses));
/* if nil_on_miss is set, we really need a right output */
assert(!nil_on_miss || r2p != NULL);
+ /* if not_in is set, then so is only_misses */
+ assert(!not_in || only_misses);
*r1p = NULL;
if (r2p)
*r2p = NULL;
@@ -3578,16 +3604,18 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
ALGODEBUG fprintf(stderr, "#%s(l=" ALGOBATFMT ","
"r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
"sr=" ALGOOPTBATFMT ",nil_matches=%d,"
- "nil_on_miss=%d,semi=%d,only_misses=%d)\n",
+ "nil_on_miss=%d,semi=%d,only_misses=%d,"
+ "not_in=%d)\n",
func,
ALGOBATPAR(l), ALGOBATPAR(r),
ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
- nil_matches, nil_on_miss, semi, only_misses);
+ nil_matches, nil_on_miss, semi, only_misses,
+ not_in);
return nomatch(r1p, r2p, l, r, lstart, lend, lcand, lcandend,
nil_on_miss, only_misses, func, t0);
}
- if (!nil_on_miss && !semi && !only_misses &&
+ if (!nil_on_miss && !semi && !only_misses && !not_in &&
(lcnt == 1 || (BATordered(l) && BATordered_rev(l)))) {
/* single value to join, use select */
return selectjoin(r1p, r2p, l, r, sl, sr,
@@ -3605,6 +3633,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
&& !semi
&& !nil_matches
&& !only_misses
+ && !not_in
/* && (rcnt * 1024) < lcnt */
&& (BATordered(r) || BATordered_rev(r))) {
assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
@@ -3620,7 +3649,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
lstart, lend, lcnt, lcand, lcandend,
rstart, rend, rcnt, rcand, rcandend,
nil_matches, nil_on_miss, semi, only_misses,
- estimate, t0, false);
+ not_in, estimate, t0, false);
}
phash = sr == NULL &&
VIEWtparent(r) != 0 &&
@@ -3629,7 +3658,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
lstart, lend, lcnt, lcand, lcandend,
rstart, rend, rcnt, rcand, rcandend,
nil_matches, nil_on_miss, semi, only_misses,
- estimate, t0, false, phash, func);
+ not_in, estimate, t0, false, phash, func);
}
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list