Changeset: f9a7a7e5b087 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f9a7a7e5b087
Modified Files:
gdk/gdk_cand.h
gdk/gdk_cross.c
gdk/gdk_join.c
monetdb5/modules/kernel/algebra.c
monetdb5/modules/kernel/algebra.h
monetdb5/modules/kernel/algebra.mal
monetdb5/optimizer/opt_mergetable.c
sql/backends/monet5/rel_bin.c
sql/backends/monet5/sql_statement.c
sql/backends/monet5/sql_statement.h
Branch: subjoin
Log Message:
initial work for pushing down selection vectors into the
join operators.
diffs (truncated from 1633 to 300 lines):
diff --git a/gdk/gdk_cand.h b/gdk/gdk_cand.h
--- a/gdk/gdk_cand.h
+++ b/gdk/gdk_cand.h
@@ -23,6 +23,7 @@
assert(BATttype(s) == TYPE_oid); \
if (BATcount(s) == 0) { \
start = end = 0; \
+ cnt = 0; \
} else { \
if (BATtdense(s)) { \
start = (s)->tseqbase; \
diff --git a/gdk/gdk_cross.c b/gdk/gdk_cross.c
--- a/gdk/gdk_cross.c
+++ b/gdk/gdk_cross.c
@@ -32,8 +32,12 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
CANDINIT(r, sr, start2, end2, cnt2, rcand, rcandend);
if (lcand)
cnt1 = lcandend - lcand;
+ else
+ cnt1 = end1 - start1;
if (rcand)
cnt2 = rcandend - rcand;
+ else
+ cnt2 = end2 - start2;
bn1 = COLnew(0, TYPE_oid, cnt1 * cnt2, TRANSIENT);
if (bn1 == NULL)
@@ -51,13 +55,13 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
*p++ = lcand[i];
bn1->tdense = 0;
} else {
- seq = l->hseqbase + start1;
- for (i = 0; i < cnt1; i++)
- for (j = 0; j < cnt2; j++)
+ seq = l->hseqbase;
+ for (i = start1; i < end1; i++)
+ for (j = start2; j < end2; j++)
*p++ = i + seq;
bn1->tdense = bn1->tkey != 0;
if (bn1->tdense)
- BATtseqbase(bn1, seq);
+ BATtseqbase(bn1, seq+start1);
}
bn2 = COLnew(0, TYPE_oid, cnt1 * cnt2, TRANSIENT);
@@ -78,13 +82,13 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
*p++ = rcand[j];
bn2->tdense = 0;
} else {
- seq = r->hseqbase + start2;
- for (i = 0; i < cnt1; i++)
- for (j = 0; j < cnt2; j++)
+ seq = r->hseqbase;
+ for (i = start1; i < end1; i++)
+ for (j = start2; j < end2; j++)
*p++ = j + seq;
bn2->tdense = bn2->tkey != 0;
if (bn2->tdense)
- BATtseqbase(bn2, seq);
+ BATtseqbase(bn2, seq+start2);
}
*r1p = bn1;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -595,7 +595,7 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
cnt -= i;
o = l->hseqbase + BATcount(l);
i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, o, 1, 0);
- cnt -= i;
+ //cnt -= i;
if (BATextend(r1, cnt) != GDK_SUCCEED)
goto bailout;
diff --git a/monetdb5/modules/kernel/algebra.c
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -676,6 +676,42 @@ ALGcrossproduct2( bat *l, bat *r, const
}
str
+ALGcrossproduct( bat *l, bat *r, const bat *left, const bat *right, const bat
*sl, const bat *sr)
+{
+ BAT *L, *R, *SL = NULL, *SR = NULL, *bn1, *bn2;
+ gdk_return ret;
+
+ if ((L = BATdescriptor(*left)) == NULL) {
+ throw(MAL, "algebra.crossproduct", RUNTIME_OBJECT_MISSING);
+ }
+ if ((R = BATdescriptor(*right)) == NULL) {
+ BBPunfix(L->batCacheid);
+ throw(MAL, "algebra.crossproduct", RUNTIME_OBJECT_MISSING);
+ }
+ if (sl && *sl != bat_nil && (SL = BATdescriptor(*sl)) == NULL) {
+ BBPunfix(L->batCacheid);
+ BBPunfix(R->batCacheid);
+ throw(MAL, "algebra.crossproduct", RUNTIME_OBJECT_MISSING);
+ }
+ if (sr && *sr != bat_nil && (SR = BATdescriptor(*sr)) == NULL) {
+ BBPunfix(L->batCacheid);
+ BBPunfix(R->batCacheid);
+ BBPunfix(SL->batCacheid);
+ throw(MAL, "algebra.crossproduct", RUNTIME_OBJECT_MISSING);
+ }
+ ret = BATsubcross(&bn1, &bn2, L, R, SL, SR);
+ BBPunfix(L->batCacheid);
+ BBPunfix(R->batCacheid);
+ if (SL) BBPunfix(SL->batCacheid);
+ if (SR) BBPunfix(SR->batCacheid);
+ if (ret != GDK_SUCCEED)
+ throw(MAL, "algebra.crossproduct", GDK_EXCEPTION);
+ BBPkeepref(*l = bn1->batCacheid);
+ BBPkeepref(*r = bn2->batCacheid);
+ return MAL_SUCCEED;
+}
+
+str
ALGprojection(bat *result, const bat *lid, const bat *rid)
{
return ALGbinary(result, lid, rid, BATproject, "algebra.projection");
diff --git a/monetdb5/modules/kernel/algebra.h
b/monetdb5/modules/kernel/algebra.h
--- a/monetdb5/modules/kernel/algebra.h
+++ b/monetdb5/modules/kernel/algebra.h
@@ -39,6 +39,7 @@ mal_export str ALGintersect(bat *r1, con
/* legacy join functions */
mal_export str ALGcrossproduct2(bat *l, bat *r, const bat *lid, const bat
*rid);
+mal_export str ALGcrossproduct(bat *l, bat *r, const bat *lid, const bat *rid,
const bat *slid, const bat *srid);
/* end legacy join functions */
mal_export str ALGfirstn(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr
pci);
diff --git a/monetdb5/modules/kernel/algebra.mal
b/monetdb5/modules/kernel/algebra.mal
--- a/monetdb5/modules/kernel/algebra.mal
+++ b/monetdb5/modules/kernel/algebra.mal
@@ -179,6 +179,13 @@ comment "Returns 2 columns with all BUNs
from 'left' and 'right' for which there are BUNs in 'left'
and 'right' with equal tails";
+command crossproduct( left:bat[:any_1], right:bat[:any_2], sl:bat[:oid],
sr:bat[:oid])
+ (l:bat[:oid],r:bat[:oid])
+address ALGcrossproduct
+comment "Returns 2 columns with all BUNs, consisting of the head-oids
+ from 'left' and 'right' for which there are BUNs in 'left'
+ and 'right' with equal tails";
+
command
join(l:bat[:any_1],r:bat[:any_1],sl:bat[:oid],sr:bat[:oid],nil_matches:bit,estimate:lng)
(:bat[:oid],:bat[:oid])
address ALGjoin
comment "Join";
diff --git a/monetdb5/optimizer/opt_mergetable.c
b/monetdb5/optimizer/opt_mergetable.c
--- a/monetdb5/optimizer/opt_mergetable.c
+++ b/monetdb5/optimizer/opt_mergetable.c
@@ -87,6 +87,19 @@ nr_of_bats(MalBlkPtr mb, InstrPtr p)
return cnt;
}
+static int
+nr_of_cands(MalBlkPtr mb, InstrPtr p)
+{
+ int j,cnt=0;
+ for(j=p->retc+2; j<p->argc; j++) {
+ int type = getArgType(mb,p,j);
+ if (isaBatType(type) && getBatType(type) == TYPE_oid)
+ cnt++;
+ }
+ return cnt;
+}
+
+
/* some mat's have intermediates (with intermediate result variables), therefor
* we pass the old output mat variable */
inline static void
@@ -569,7 +582,7 @@ mat_projection(MalBlkPtr mb, InstrPtr p,
}
static void
-mat_join2(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int n)
+mat_join2(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int n, int cm, int
cn)
{
int tpe = getArgType(mb,p, 0), j,k, nr = 1;
InstrPtr l = newInstruction(mb, matRef, packRef);
@@ -591,6 +604,8 @@ mat_join2(MalBlkPtr mb, InstrPtr p, matl
getArg(q,1) = newTmpVariable(mb, tpe);
getArg(q,2) = getArg(mat[m].mi,k);
getArg(q,3) = getArg(mat[n].mi,j);
+ if (cm >= 0) getArg(q,4) = getArg(mat[cm].mi,k);
+ if (cn >= 0) getArg(q,5) = getArg(mat[cn].mi,j);
pushInstruction(mb,q);
propagatePartnr(ml, getArg(mat[m].mi, k),
getArg(q,0), nr);
@@ -606,6 +621,7 @@ mat_join2(MalBlkPtr mb, InstrPtr p, matl
int mv = (m>=0)?m:n;
int av = (m<0);
int bv = (m>=0);
+ int cv = (m>=0 && cm>=0)?cm:cn;
for(k=1; k<mat[mv].mi->argc; k++) {
InstrPtr q = copyInstruction(p);
@@ -613,6 +629,7 @@ mat_join2(MalBlkPtr mb, InstrPtr p, matl
getArg(q,0) = newTmpVariable(mb, tpe);
getArg(q,1) = newTmpVariable(mb, tpe);
getArg(q,p->retc+av) = getArg(mat[mv].mi, k);
+ if (cv >= 0) getArg(q,p->retc+av+2) =
getArg(mat[cv].mi,k);
pushInstruction(mb,q);
propagatePartnr(ml, getArg(mat[mv].mi, k),
getArg(q,av), k);
@@ -1564,7 +1581,7 @@ OPTmergetableImplementation(Client cntxt
mb->stop = 0;
for( i=0; i<oldtop; i++){
- int bats = 0;
+ int bats = 0, cands = 0, cm = -1, cn = -1;
InstrPtr r;
p = old[i];
@@ -1584,6 +1601,7 @@ OPTmergetableImplementation(Client cntxt
continue;
}
bats = nr_of_bats(mb, p);
+ cands = nr_of_cands(mb, p);
/* (l,r) Join (L, R, ..)
* 2 -> (l,r) equi/theta joins (l,r)
@@ -1591,24 +1609,28 @@ OPTmergetableImplementation(Client cntxt
* NxM -> (l,r) filter-joins (l1,..,ln,r1,..,rm)
*/
if (match > 0 && isMatJoinOp(p) &&
- p->argc >= 3 && p->retc == 2 && bats >= 2) {
- if (bats == 2) {
+ p->argc >= 3 && p->retc == 2 && (bats-cands) >= 2) {
+ if ((bats-cands) == 2) {
m = is_a_mat(getArg(p,p->retc), &ml);
n = is_a_mat(getArg(p,p->retc+1), &ml);
- mat_join2(mb, p, &ml, m, n);
+ cm = is_a_mat(getArg(p,p->retc+2), &ml);
+ cn = is_a_mat(getArg(p,p->retc+3), &ml);
+ mat_join2(mb, p, &ml, m, n, cm, cn);
} else {
+ assert(cands == 0);
mat_joinNxM(cntxt, mb, p, &ml, bats);
}
actions++;
continue;
}
if (match > 0 && isMatLeftJoinOp(p) && p->argc >= 3 && p->retc
== 2 &&
- match == 1 && bats == 2) {
+ match == 1 && (bats-cands) == 2) {
m = is_a_mat(getArg(p,p->retc), &ml);
n = -1;
if (m >= 0) {
- mat_join2(mb, p, &ml, m, n);
+ cm = is_a_mat(getArg(p,p->retc+2), &ml);
+ mat_join2(mb, p, &ml, m, n, cm, -1);
actions++;
continue;
}
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -20,7 +20,7 @@
#define OUTER_ZERO 64
-static stmt * exp_bin(backend *be, sql_exp *e, stmt *left, stmt *right, stmt
*grp, stmt *ext, stmt *cnt, stmt *sel);
+static stmt * exp_bin(backend *be, sql_exp *e, stmt *left, stmt *right, stmt
*grp, stmt *ext, stmt *cnt, stmt *sel, stmt *rsel);
static stmt * rel_bin(backend *be, sql_rel *rel);
static stmt * subrel_bin(backend *be, sql_rel *rel, list *refs);
@@ -211,7 +211,7 @@ handle_in_exps(backend *be, sql_exp *ce,
{
mvc *sql = be->mvc;
node *n;
- stmt *s = NULL, *c = exp_bin(be, ce, left, right, grp, ext, cnt, NULL);
+ stmt *s = NULL, *c = exp_bin(be, ce, left, right, grp, ext, cnt, NULL,
NULL);
if (c->nrcols == 0) {
sql_subtype *bt = sql_bind_localtype("bit");
@@ -223,7 +223,7 @@ handle_in_exps(backend *be, sql_exp *ce,
for( n = nl->h; n; n = n->next) {
sql_exp *e = n->data;
- stmt *i = exp_bin(be, use_r?e->r:e, left, right, grp,
ext, cnt, NULL);
+ stmt *i = exp_bin(be, use_r?e->r:e, left, right, grp,
ext, cnt, NULL, NULL);
i = stmt_binop(be, c, i, cmp);
if (s)
@@ -243,7 +243,7 @@ handle_in_exps(backend *be, sql_exp *ce,
s = sel;
for( n = nl->h; n; n = n->next) {
sql_exp *e = n->data;
- stmt *i = exp_bin(be, use_r?e->r:e, left, right, grp,
ext, cnt, NULL);
+ stmt *i = exp_bin(be, use_r?e->r:e, left, right, grp,
ext, cnt, NULL, NULL);
if (in) {
i = stmt_uselect(be, c, i, cmp, sel, 0);
@@ -269,7 +269,7 @@ value_list(backend *be, list *vals)
s = stmt_temp(be, exp_subtype(vals->h->data));
for( n = vals->h; n; n = n->next) {
sql_exp *e = n->data;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list