Changeset: 626f65a1893e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=626f65a1893e
Modified Files:
clients/Tests/MAL-signatures.stable.out
clients/Tests/MAL-signatures.stable.out.int128
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_join.c
monetdb5/modules/kernel/algebra.c
Branch: unlock
Log Message:
Implemented extra flag for algebra.outerjoin to match exactly one.
diffs (truncated from 538 to 300 lines):
diff --git a/clients/Tests/MAL-signatures.stable.out
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -676,12 +676,12 @@ stdout of test 'MAL-signatures` in direc
[ "algebra", "not_like", "command algebra.not_like(X_1:str, X_2:str):bit
", "PCREnotlike2;", "" ]
[ "algebra", "not_like", "command algebra.not_like(X_1:str, X_2:str,
X_3:str):bit ", "PCREnotlike3;", "" ]
[ "algebra", "orderidx", "command algebra.orderidx(X_1:bat[:any_1],
X_2:bit):bat[:any_1] ", "OIDXorderidx;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1],
X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ",
"ALGouterjoin1;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1],
X_2:bat[:msk], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ",
"ALGouterjoin1;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:msk],
X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ",
"ALGouterjoin1;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1],
X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1],
X_3:bat[:msk], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng) (X_0:bat[:oid],
X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:msk],
X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1],
X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit,
X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1],
X_2:bat[:msk], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit,
X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:msk],
X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit,
X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1],
X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1],
X_3:bat[:msk], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:msk],
X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
[ "algebra", "project", "pattern algebra.project(X_1:bat[:any_1],
X_2:any_3):bat[:any_3] ", "ALGprojecttail;", "" ]
[ "algebra", "projection", "command algebra.projection(X_1:bat[:msk],
X_2:bat[:any_3], X_3:bat[:any_3]):bat[:any_3] ", "ALGprojection2;", ""
]
[ "algebra", "projection", "command algebra.projection(X_1:bat[:oid],
X_2:bat[:any_3], X_3:bat[:any_3]):bat[:any_3] ", "ALGprojection2;", ""
]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -793,12 +793,12 @@ stdout of test 'MAL-signatures` in direc
[ "algebra", "not_like", "command algebra.not_like(X_1:str, X_2:str):bit
", "PCREnotlike2;", "" ]
[ "algebra", "not_like", "command algebra.not_like(X_1:str, X_2:str,
X_3:str):bit ", "PCREnotlike3;", "" ]
[ "algebra", "orderidx", "command algebra.orderidx(X_1:bat[:any_1],
X_2:bit):bat[:any_1] ", "OIDXorderidx;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1],
X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ",
"ALGouterjoin1;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1],
X_2:bat[:msk], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ",
"ALGouterjoin1;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:msk],
X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ",
"ALGouterjoin1;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1],
X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1],
X_3:bat[:msk], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng) (X_0:bat[:oid],
X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
-[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:msk],
X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1],
X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit,
X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1],
X_2:bat[:msk], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit,
X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:msk],
X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit,
X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1],
X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1],
X_3:bat[:msk], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
+[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:msk],
X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng)
(X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ]
[ "algebra", "project", "pattern algebra.project(X_1:bat[:any_1],
X_2:any_3):bat[:any_3] ", "ALGprojecttail;", "" ]
[ "algebra", "projection", "command algebra.projection(X_1:bat[:msk],
X_2:bat[:any_3], X_3:bat[:any_3]):bat[:any_3] ", "ALGprojection2;", ""
]
[ "algebra", "projection", "command algebra.projection(X_1:bat[:oid],
X_2:bat[:any_3], X_3:bat[:any_3]):bat[:any_3] ", "ALGprojection2;", ""
]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -173,7 +173,7 @@ BAT *BATnil_grp(BAT *l, BAT *g, BAT *e,
bool BATordered(BAT *b);
bool BATordered_rev(BAT *b);
gdk_return BATorderidx(BAT *b, bool stable);
-gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
+gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, bool nil_matches, bool match_one, BUN estimate)
__attribute__((__warn_unused_result__));
gdk_return BATprint(stream *s, BAT *b);
gdk_return BATprintcolumns(stream *s, int argc, BAT *argv[]);
gdk_return BATprod(void *res, int tp, BAT *b, BAT *s, bool skip_nils, bool
abort_on_error, bool nil_if_empty);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2116,7 +2116,7 @@ gdk_export gdk_return BATsubcross(BAT **
gdk_export gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, bool nil_matches, BUN estimate)
__attribute__((__warn_unused_result__));
-gdk_export gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, bool nil_matches, BUN estimate)
+gdk_export gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate)
__attribute__((__warn_unused_result__));
gdk_export gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, int op, bool nil_matches, BUN estimate)
__attribute__((__warn_unused_result__));
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -95,7 +95,8 @@ joinparamcheck(BAT *l, BAT *r1, BAT *r2,
* number of outputs that could possibly be generated. */
static BUN
joininitresults(BAT **r1p, BAT **r2p, BUN lcnt, BUN rcnt, bool lkey, bool rkey,
- bool semi, bool nil_on_miss, bool only_misses, BUN estimate)
+ bool semi, bool nil_on_miss, bool only_misses, bool min_one,
+ BUN estimate)
{
BAT *r1, *r2;
BUN maxsize, size;
@@ -151,6 +152,8 @@ joininitresults(BAT **r1p, BAT **r2p, BU
* once */
size = maxsize;
}
+ if (min_one && size < lcnt)
+ size = lcnt;
if (maxsize == 0) {
r1 = BATdense(0, 0, 0);
@@ -790,7 +793,7 @@ mergejoin_int(BAT **r1p, BAT **r2p, BAT
if ((maxsize = joininitresults(r1p, r2p, BATcount(l), BATcount(r),
l->tkey, r->tkey, false, false,
- false, estimate)) == BUN_NONE)
+ false, false, estimate)) == BUN_NONE)
return GDK_FAIL;
r1 = *r1p;
r2 = r2p ? *r2p : NULL;
@@ -1089,7 +1092,7 @@ mergejoin_lng(BAT **r1p, BAT **r2p, BAT
if ((maxsize = joininitresults(r1p, r2p, BATcount(l), BATcount(r),
l->tkey, r->tkey, false, false,
- false, estimate)) == BUN_NONE)
+ false, false, estimate)) == BUN_NONE)
return GDK_FAIL;
r1 = *r1p;
r2 = r2p ? *r2p : NULL;
@@ -1396,7 +1399,7 @@ mergejoin_cand(BAT **r1p, BAT **r2p, BAT
if ((maxsize = joininitresults(r1p, r2p, BATcount(l), BATcount(r),
l->tkey, r->tkey, false, false,
- false, estimate)) == BUN_NONE)
+ false, false, estimate)) == BUN_NONE)
return GDK_FAIL;
r1 = *r1p;
r2 = r2p ? *r2p : NULL;
@@ -1648,7 +1651,8 @@ static gdk_return
mergejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
struct canditer *restrict lci, struct canditer *restrict rci,
bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
- bool not_in, bool max_one, BUN estimate, lng t0, bool swapped,
+ bool not_in, bool max_one, bool min_one, BUN estimate,
+ lng t0, bool swapped,
const char *reason)
{
/* [lr]scan determine how far we look ahead in l/r in order to
@@ -1680,7 +1684,8 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
if (lci->tpe == cand_dense && lci->ncand == BATcount(l) &&
rci->tpe == cand_dense && rci->ncand == BATcount(r) &&
- !nil_on_miss && !semi && !max_one && !only_misses && !not_in &&
+ !nil_on_miss && !semi && !max_one && !min_one && !only_misses &&
+ !not_in &&
l->tsorted && r->tsorted) {
/* special cases with far fewer options */
if (r->ttype == TYPE_void && r->tvheap)
@@ -1757,7 +1762,8 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
BUN maxsize = joininitresults(r1p, r2p, lci->ncand, rci->ncand,
l->tkey, r->tkey, semi | max_one,
- nil_on_miss, only_misses, estimate);
+ nil_on_miss, only_misses, min_one,
+ estimate);
if (maxsize == BUN_NONE)
return GDK_FAIL;
BAT *r1 = *r1p;
@@ -2138,6 +2144,10 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
if (nr == 0) {
/* no entries in r found */
if (!(nil_on_miss | only_misses)) {
+ if (min_one) {
+ GDKerror("not enough matches");
+ goto bailout;
+ }
if (lscan > 0 &&
(equal_order ? rci->next == rci->ncand :
rci->next == 0)) {
/* nothing more left to match
@@ -2450,6 +2460,9 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l,
goto bailout; \
APPEND(r1, lo); \
APPEND(r2, oid_nil); \
+ } else if (min_one) { \
+ GDKerror("not enough matches"); \
+ goto bailout; \
} else { \
lskipped = BATcount(r1) > 0; \
} \
@@ -2482,7 +2495,7 @@ static gdk_return
hashjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
struct canditer *restrict lci, struct canditer *restrict rci,
bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
- bool not_in, bool max_one,
+ bool not_in, bool max_one, bool min_one,
BUN estimate, lng t0, bool swapped, bool hash, bool phash,
const char *reason)
{
@@ -2530,7 +2543,8 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
BUN maxsize = joininitresults(r1p, r2p, lci->ncand, rci->ncand,
l->tkey, r->tkey, semi | max_one,
- nil_on_miss, only_misses, estimate);
+ nil_on_miss, only_misses, min_one,
+ estimate);
if (maxsize == BUN_NONE)
return GDK_FAIL;
@@ -2721,6 +2735,9 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
goto bailout;
APPEND(r1, lo);
APPEND(r2, oid_nil);
+ } else if (min_one) {
+ GDKerror("not enough matches");
+ goto bailout;
} else {
lskipped = BATcount(r1) > 0;
}
@@ -2784,7 +2801,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
"nil_matches=%s,nil_on_miss=%s,semi=%s,only_misses=%s,"
- "not_in=%s,max_one=%s;%s %s -> " ALGOBATFMT "," ALGOOPTBATFMT
+ "not_in=%s,max_one=%s,min_one=%s;%s %s -> " ALGOBATFMT ","
ALGOOPTBATFMT
" (" LLFMT "usec)\n",
ALGOBATPAR(l), ALGOBATPAR(r),
ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
@@ -2794,6 +2811,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
only_misses ? "true" : "false",
not_in ? "true" : "false",
max_one ? "true" : "false",
+ min_one ? "true" : "false",
swapped ? " swapped" : "", reason,
ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
GDKusec() - t0);
@@ -2874,7 +2892,7 @@ thetajoin(BAT **r1p, BAT **r2p, BAT *l,
}
BUN maxsize = joininitresults(r1p, r2p, lcnt, rcnt, false, false,
- false, false, false, estimate);
+ false, false, false, false, estimate);
if (maxsize == BUN_NONE)
return GDK_FAIL;
BAT *r1 = *r1p;
@@ -3146,7 +3164,8 @@ bitmaskjoin(BAT *l, BAT *r,
static gdk_return
leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
- bool not_in, bool max_one, BUN estimate, const char *func, lng t0)
+ bool not_in, bool max_one, bool min_one, BUN estimate,
+ const char *func, lng t0)
{
BUN lcnt, rcnt;
struct canditer lci, rci;
@@ -3208,18 +3227,18 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
"r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
"sr=" ALGOOPTBATFMT ",nil_matches=%d,"
"nil_on_miss=%d,semi=%d,only_misses=%d,"
- "not_in=%d,max_one=%d)\n",
+ "not_in=%d,max_one=%d,min_one=%d)\n",
func,
ALGOBATPAR(l), ALGOBATPAR(r),
ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
nil_matches, nil_on_miss, semi, only_misses,
- not_in, max_one);
+ not_in, max_one, min_one);
rc = nomatch(r1p, r2p, l, r, &lci,
nil_on_miss, only_misses, func, t0);
goto doreturn;
}
- if (!nil_on_miss && !semi && !max_one && !only_misses && !not_in &&
+ if (!nil_on_miss && !semi && !max_one && !min_one && !only_misses &&
!not_in &&
(lcnt == 1 || (BATordered(l) && BATordered_rev(l)) ||
(l->ttype == TYPE_void && is_oid_nil(l->tseqbase)))) {
/* single value to join, use select */
@@ -3238,6 +3257,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
&& rci.tpe == cand_dense
&& !semi
&& !max_one
+ && !min_one
&& !nil_matches
&& !only_misses
&& !not_in
@@ -3252,7 +3272,8 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
&& (semi || only_misses)
&& !nil_on_miss
&& !not_in
- && !max_one) {
+ && !max_one
+ && !min_one) {
*r1p = bitmaskjoin(l, r, &lci, &rci, only_misses, func, t0);
rc = *r1p == NULL ? GDK_FAIL : GDK_SUCCEED;
goto doreturn;
@@ -3264,7 +3285,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
|| BATcount(r) * (Tsize(r) + (r->tvheap ?
r->tvheap->size : 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ?
GDKnr_threads : 1))) {
rc = mergejoin(r1p, r2p, l, r, &lci, &rci,
nil_matches, nil_on_miss, semi, only_misses,
- not_in, max_one, estimate, t0, false, func);
+ not_in, max_one, min_one, estimate, t0, false,
func);
goto doreturn;
}
rhash = BATcheckhash(r);
@@ -3310,7 +3331,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
}
}
- if (!nil_on_miss && !only_misses && !not_in && !max_one) {
+ if (!nil_on_miss && !only_misses && !not_in && !max_one && !min_one) {
/* maybe do a hash join on the swapped operands; if we
* do, we need to sort the output, so we take that into
* account as well */
@@ -3376,7 +3397,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
canditer_init(&rci, r, sr);
}
rc = hashjoin(&r2, &r1, r, l, &rci, &lci, nil_matches,
- false, false, false, false, false,
estimate,
+ false, false, false, false, false, false,
estimate,
t0, true, lhash, plhash, func);
if (semi)
BBPunfix(sr->batCacheid);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list