Changeset: 8a6820190a59 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/8a6820190a59
Modified Files:
clients/Tests/MAL-signatures-hge.test
clients/Tests/MAL-signatures.test
clients/Tests/exports.stable.out
gdk/gdk_subquery.c
monetdb5/modules/kernel/algebra.c
monetdb5/optimizer/opt_mergetable.c
monetdb5/optimizer/opt_prelude.c
monetdb5/optimizer/opt_prelude.h
monetdb5/optimizer/opt_support.c
sql/backends/monet5/rel_bin.c
sql/backends/monet5/sql_scenario.c
sql/backends/monet5/sql_statement.c
sql/backends/monet5/sql_statement.h
sql/server/rel_dump.c
sql/server/rel_optimize_sel.c
sql/server/rel_optimizer.c
sql/server/rel_unnest.c
sql/server/sql_atom.c
sql/server/sql_parser.y
sql/server/sql_scan.c
sql/test/bugs/Tests/rtrim_bug.test
Branch: groupjoin
Log Message:
groupjoin
diffs (truncated from 1287 to 300 lines):
diff --git a/clients/Tests/MAL-signatures-hge.test
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -3494,6 +3494,11 @@ command algebra.likeselect(X_0:bat[:str]
PCRElikeselect;
Select all head values of the first input BAT for which the@tail value is
"like" the given (SQL-style) pattern and for@which the head value occurs in the
tail of the second input@BAT.@Input is a dense-headed BAT, output is a
dense-headed BAT with in@the tail the head value of the input BAT for which
the@relationship holds. The output BAT is sorted on the tail value.
algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid],
X_3:bat[:oid], X_4:bit, X_5:lng) (X_6:bat[:oid], X_7:bat[:bit])
+ALGmarkjoin;
+Left mark join with candidate lists, produces left output and mark flag;
+algebra
not_like
command algebra.not_like(X_0:str, X_1:str, X_2:str, X_3:bit):bit
PCREnotlike;
diff --git a/clients/Tests/MAL-signatures.test
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -2929,6 +2929,11 @@ command algebra.likeselect(X_0:bat[:str]
PCRElikeselect;
Select all head values of the first input BAT for which the@tail value is
"like" the given (SQL-style) pattern and for@which the head value occurs in the
tail of the second input@BAT.@Input is a dense-headed BAT, output is a
dense-headed BAT with in@the tail the head value of the input BAT for which
the@relationship holds. The output BAT is sorted on the tail value.
algebra
+markjoin
+command algebra.markjoin(X_0:bat[:any_1], X_1:bat[:any_1], X_2:bat[:oid],
X_3:bat[:oid], X_4:bit, X_5:lng) (X_6:bat[:oid], X_7:bat[:bit])
+ALGmarkjoin;
+Left mark join with candidate lists, produces left output and mark flag;
+algebra
not_like
command algebra.not_like(X_0:str, X_1:str, X_2:str, X_3:bit):bit
PCREnotlike;
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -1051,6 +1051,8 @@ const char *mal_version(void);
stream *maleventstream;
const char *manifoldRef;
const char *mapiRef;
+const char *markjoinRef;
+const char *markselectRef;
const char *maskRef;
const char *matRef;
const char *maxRef;
@@ -1127,6 +1129,7 @@ void opt_pipes_reset(void);
str optimizeMALBlock(Client cntxt, MalBlkPtr mb);
const char *optimizerRef;
const char *outerjoinRef;
+const char *outerselectRef;
const char *packIncrementRef;
const char *packRef;
const char *parametersRef;
diff --git a/gdk/gdk_subquery.c b/gdk/gdk_subquery.c
--- a/gdk/gdk_subquery.c
+++ b/gdk/gdk_subquery.c
@@ -448,8 +448,9 @@ BATanyequal_grp(BAT *l, BAT *r, BAT *g,
res->tkey = BATcount(res) <= 1;
res->tsorted = BATcount(res) <= 1;
res->trevsorted = BATcount(res) <= 1;
- res->tnil = hasnil != 0;
- res->tnonil = hasnil == 0;
+ (void)hasnil;
+ res->tnil = false ;//hasnil != 0;
+ res->tnonil = false ;//hasnil == 0;
}
TRC_DEBUG(ALGO, "l=" ALGOBATFMT ",r=" ALGOBATFMT ",g=" ALGOBATFMT
@@ -500,7 +501,7 @@ BATallnotequal_grp(BAT *l, BAT *r, BAT *
if ((res = COLnew(min, TYPE_bit, ngrp, TRANSIENT)) == NULL)
goto alloc_fail;
ret = (bit *) Tloc(res, 0);
- memset(ret, FALSE, ngrp * sizeof(bit));
+ memset(ret, TRUE, ngrp * sizeof(bit));
if (!g || BATtdense(g))
gids = NULL;
diff --git a/monetdb5/modules/kernel/algebra.c
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -377,6 +377,180 @@ ALGthetaselect2(bat *result, const bat *
return MAL_SUCCEED;
}
+#include <gdk_subquery.h>
+static str
+ALGmarkselect(bat *r1, bat *r2, const bat *gid, const bat *mid, const bat
*lid, const bat *rid, const bit *any)
+{
+ /* g, e = group.done(gid)
+ * m = anyequal(l, r, g, e, NULL); or allnotequal
+ * li = project(e, gid);
+ * return li, m
+ */
+ BAT *li = BATdescriptor(*gid), *g, *e, *mask = NULL;
+
+ if (!li)
+ throw(MAL, "algebra.markselect", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
+ if (mid && !is_bat_nil(*mid) && (mask = BATdescriptor(*mid)) == NULL) {
+ BBPreclaim(li);
+ throw(MAL, "algebra.markselect", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
+ }
+ if (BATgroup(&g, &e, NULL, li, NULL, NULL, NULL, NULL) == GDK_SUCCEED) {
+ BAT *l = BATdescriptor(*lid);
+ BAT *r = BATdescriptor(*rid);
+ BAT *m = NULL;
+
+ if (!l || !r) {
+ BBPreclaim(li);
+ BBPreclaim(mask);
+ BBPreclaim(l);
+ BBPreclaim(r);
+ throw(MAL, "algebra.markselect", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
+ }
+ if (mask) {
+ /* ugh temp convert into oid bat ! */
+ BUN nr = BATcount(mask);
+ BAT *rid = COLnew(0, TYPE_oid, nr, TRANSIENT);
+ if (!rid) {
+ BBPreclaim(li);
+ BBPreclaim(mask);
+ BBPreclaim(l);
+ BBPreclaim(r);
+ throw(MAL, "algebra.markselect",
SQLSTATE(HY002) RUNTIME_OBJECT_MISSING);
+ }
+ oid *o = Tloc(rid, 0);
+ bit *ma = Tloc(mask, 0);
+ for(BUN i = 0; i < nr; i++ ) {
+ o[i] = (ma[i] == bit_nil)?oid_nil:0;
+ }
+ BATsetcount(rid, nr);
+ rid->tsorted = 0;
+ rid->tkey = 0;
+ rid->tnil = 0;
+ rid->tnonil = 0;
+ if (*any)
+ m = BATanyequal_grp2(l, r, rid, g, e, NULL);
+ else
+ m = BATallnotequal_grp2(l, r, rid, g, e, NULL);
+ BBPreclaim(rid);
+ } else {
+ if (*any)
+ m = BATanyequal_grp(l, r, g, e, NULL);
+ else
+ m = BATallnotequal_grp(l, r, g, e, NULL);
+ }
+
+ BBPreclaim(mask);
+ BBPreclaim(l);
+ BBPreclaim(r);
+ BBPreclaim(g);
+ l = BATproject(e, li);
+ BBPreclaim(e);
+ BBPreclaim(li);
+
+ BBPkeepref(l);
+ BBPkeepref(m);
+ *r1 = l->batCacheid;
+ *r2 = m->batCacheid;
+ return MAL_SUCCEED;
+ } else {
+ BBPreclaim(li);
+ throw(MAL, "algebra.markselect", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
+ }
+ return MAL_SUCCEED;
+}
+
+static str
+ALGouterselect(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat
*pid)
+{
+ /* for each l-cand in lid, return atleast one, if rid == nil, return
nil else pid */
+ BAT *l = BATdescriptor(*lid); /* oid */
+ BAT *r = BATdescriptor(*rid); /* bit, nil for empty */
+ BAT *p = BATdescriptor(*pid); /* bit */
+ BAT *res1 = NULL, *res2 = NULL;
+
+ if (!l || !r || !p) {
+ if (l) BBPreclaim(l);
+ if (r) BBPreclaim(r);
+ if (p) BBPreclaim(p);
+ throw(MAL, "algebra.outerselect", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
+ }
+ BUN nr = BATcount(l), q = 0;
+
+ if ((res1 = COLnew(0, TYPE_oid, nr, TRANSIENT)) == NULL || (res2 =
COLnew(0, TYPE_bit, nr, TRANSIENT)) == NULL) {
+ BBPreclaim(l);
+ BBPreclaim(r);
+ BBPreclaim(p);
+ if (res1) BBPreclaim(res1);
+ throw(MAL, "algebra.outerselect", SQLSTATE(HY002)
RUNTIME_OBJECT_MISSING);
+ }
+ assert(l->tsorted);
+ /* TODO handle void cases */
+ oid *ri1 = Tloc(res1, 0);
+ bit *ri2 = Tloc(res2, 0);
+ oid *li = Tloc(l, 0);
+ bit *ri = Tloc(r, 0);
+ bit *pi = Tloc(p, 0);
+ oid cur = oid_nil;
+
+ if (!li) { /* void case ? */
+ oid c = l->hseqbase;
+ for (BUN n = 0; n < nr; n++, c++) {
+ ri1[q] = c;
+ ri2[q] = (ri[n]!=bit_nil && *pi == TRUE)?TRUE:bit_nil;
+ q++;
+ }
+ } else {
+ oid c = l->hseqbase;
+ if (nr)
+ cur = li[0];
+ bool used = false;
+ for (BUN n = 0; n < nr; n++, c++) {
+ if (c && cur != li[n]) {
+ if (!used) {
+ ri1[q] = c-1;
+ ri2[q] = bit_nil;
+ q++;
+ }
+ used = false;
+ cur = li[n];
+ }
+ if (*ri == bit_nil || *pi == TRUE) {
+ ri1[q] = c;
+ ri2[q] = *ri == bit_nil?bit_nil:TRUE;
+ cur = li[n];
+ used = true;
+ q++;
+ }
+ }
+ if (nr && !used) {
+ ri1[q] = c-1;
+ ri2[q] = bit_nil;
+ q++;
+ }
+ }
+ BATsetcount(res1, q);
+ BATsetcount(res2, q);
+ res1->tsorted = true;
+ res1->tkey = true;
+ res1->trevsorted = false;
+ res2->tsorted = false;
+ res2->trevsorted = false;
+ res1->tnil = false;
+ res1->tnonil = true;
+ res2->tnonil = false;
+
+ BBPreclaim(l);
+ BBPreclaim(r);
+ BBPreclaim(p);
+
+ BBPkeepref(res1);
+ BBPkeepref(res2);
+ *r1 = res1->batCacheid;
+ *r2 = res2->batCacheid;
+ return MAL_SUCCEED;
+}
+
+
static str
ALGselectNotNil(bat *result, const bat *bid)
{
@@ -567,6 +741,86 @@ ALGleftjoin1(bat *r1, const bat *lid, co
BATleftjoin, NULL, NULL, NULL, NULL, NULL,
NULL, "algebra.leftjoin");
}
+static str ALGcrossproduct2(bat *l, bat *r, const bat *left, const bat *right,
const bit *max_one);
+static str
+ALGmarkjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat *slid,
const bat *srid,
+ const bit *any, const lng *estimate)
+{
+ str res = NULL;
+ bit max_one = false;
+ *r1 = *r2 = 0;
+ (void)estimate;
+ /* for now: (left) cross aggr (any-equal) */
+ BAT *rr = BATdescriptor(is_bat_nil(*srid)?*rid:*srid);
+ if (!BATcount(rr)) {
+ BAT *l = NULL;
+ if (is_bat_nil(*slid)) {
+ BAT *lp = BATdescriptor(*lid);
+
+ if (lp) {
+ l = BATdense(lp->hseqbase, lp->hseqbase,
BATcount(lp));
+ BBPunfix(lp->batCacheid);
+ }
+ } else {
+ l = BATdescriptor(*slid);
+ }
+ bit v = *any?false:true;
+ BAT *m = BATconstant( l->hseqbase, TYPE_bit, &v, BATcount(l),
TRANSIENT);
+ BBPkeepref(l);
+ BBPkeepref(m);
+ *r1 = l->batCacheid;
+ *r2 = m->batCacheid;
+ BBPunfix(rr->batCacheid);
+ return MAL_SUCCEED;
+ }
+ BBPunfix(rr->batCacheid);
+ if ((res = ALGcrossproduct2(r1, r2, is_bat_nil(*slid)?lid:slid,
(*srid)?rid:srid, &max_one)) == MAL_SUCCEED) {
+ BAT *li = BATdescriptor(*r1), *g = NULL, *e = NULL;
+ if (!li) {
+ BBPrelease(*r1);
+ BBPrelease(*r2);
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]