Changeset: f9a7a7e5b087 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f9a7a7e5b087
Modified Files:
        gdk/gdk_cand.h
        gdk/gdk_cross.c
        gdk/gdk_join.c
        monetdb5/modules/kernel/algebra.c
        monetdb5/modules/kernel/algebra.h
        monetdb5/modules/kernel/algebra.mal
        monetdb5/optimizer/opt_mergetable.c
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/sql_statement.c
        sql/backends/monet5/sql_statement.h
Branch: subjoin
Log Message:

initial work for pushing down selection vectors into the
join operators.


diffs (truncated from 1633 to 300 lines):

diff --git a/gdk/gdk_cand.h b/gdk/gdk_cand.h
--- a/gdk/gdk_cand.h
+++ b/gdk/gdk_cand.h
@@ -23,6 +23,7 @@
                        assert(BATttype(s) == TYPE_oid);                \
                        if (BATcount(s) == 0) {                         \
                                start = end = 0;                        \
+                               cnt = 0;                                \
                        } else {                                        \
                                if (BATtdense(s)) {                     \
                                        start = (s)->tseqbase;          \
diff --git a/gdk/gdk_cross.c b/gdk/gdk_cross.c
--- a/gdk/gdk_cross.c
+++ b/gdk/gdk_cross.c
@@ -32,8 +32,12 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
        CANDINIT(r, sr, start2, end2, cnt2, rcand, rcandend);
        if (lcand)
                cnt1 = lcandend - lcand;
+       else
+               cnt1 = end1 - start1;
        if (rcand)
                cnt2 = rcandend - rcand;
+       else
+               cnt2 = end2 - start2;
 
        bn1 = COLnew(0, TYPE_oid, cnt1 * cnt2, TRANSIENT);
        if (bn1 == NULL)
@@ -51,13 +55,13 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
                                *p++ = lcand[i];
                bn1->tdense = 0;
        } else {
-               seq = l->hseqbase + start1;
-               for (i = 0; i < cnt1; i++)
-                       for (j = 0; j < cnt2; j++)
+               seq = l->hseqbase;
+               for (i = start1; i < end1; i++)
+                       for (j = start2; j < end2; j++)
                                *p++ = i + seq;
                bn1->tdense = bn1->tkey != 0;
                if (bn1->tdense)
-                       BATtseqbase(bn1, seq);
+                       BATtseqbase(bn1, seq+start1);
        }
 
        bn2 = COLnew(0, TYPE_oid, cnt1 * cnt2, TRANSIENT);
@@ -78,13 +82,13 @@ BATsubcross(BAT **r1p, BAT **r2p, BAT *l
                                *p++ = rcand[j];
                bn2->tdense = 0;
        } else {
-               seq = r->hseqbase + start2;
-               for (i = 0; i < cnt1; i++)
-                       for (j = 0; j < cnt2; j++)
+               seq = r->hseqbase;
+               for (i = start1; i < end1; i++)
+                       for (j = start2; j < end2; j++)
                                *p++ = j + seq;
                bn2->tdense = bn2->tkey != 0;
                if (bn2->tdense)
-                       BATtseqbase(bn2, seq);
+                       BATtseqbase(bn2, seq+start2);
        }
 
        *r1p = bn1;
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -595,7 +595,7 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                        cnt -= i;
                        o = l->hseqbase + BATcount(l);
                        i = binsearch_oid(NULL, 0, lcand, 0, cnt - 1, o, 1, 0);
-                       cnt -= i;
+                       //cnt -= i;
 
                        if (BATextend(r1, cnt) != GDK_SUCCEED)
                                goto bailout;
diff --git a/monetdb5/modules/kernel/algebra.c 
b/monetdb5/modules/kernel/algebra.c
--- a/monetdb5/modules/kernel/algebra.c
+++ b/monetdb5/modules/kernel/algebra.c
@@ -676,6 +676,42 @@ ALGcrossproduct2( bat *l, bat *r, const 
 }
 
 str
+ALGcrossproduct( bat *l, bat *r, const bat *left, const bat *right, const bat 
*sl, const bat *sr)
+{
+       BAT *L, *R, *SL = NULL, *SR = NULL, *bn1, *bn2;
+       gdk_return ret;
+
+       if ((L = BATdescriptor(*left)) == NULL) {
+               throw(MAL, "algebra.crossproduct", RUNTIME_OBJECT_MISSING);
+       }
+       if ((R = BATdescriptor(*right)) == NULL) {
+               BBPunfix(L->batCacheid);
+               throw(MAL, "algebra.crossproduct", RUNTIME_OBJECT_MISSING);
+       }
+       if (sl && *sl != bat_nil && (SL = BATdescriptor(*sl)) == NULL) {
+               BBPunfix(L->batCacheid);
+               BBPunfix(R->batCacheid);
+               throw(MAL, "algebra.crossproduct", RUNTIME_OBJECT_MISSING);
+       }
+       if (sr && *sr != bat_nil && (SR = BATdescriptor(*sr)) == NULL) {
+               BBPunfix(L->batCacheid);
+               BBPunfix(R->batCacheid);
+               BBPunfix(SL->batCacheid);
+               throw(MAL, "algebra.crossproduct", RUNTIME_OBJECT_MISSING);
+       }
+       ret = BATsubcross(&bn1, &bn2, L, R, SL, SR);
+       BBPunfix(L->batCacheid);
+       BBPunfix(R->batCacheid);
+       if (SL) BBPunfix(SL->batCacheid);
+       if (SR) BBPunfix(SR->batCacheid);
+       if (ret != GDK_SUCCEED)
+               throw(MAL, "algebra.crossproduct", GDK_EXCEPTION);
+       BBPkeepref(*l = bn1->batCacheid);
+       BBPkeepref(*r = bn2->batCacheid);
+       return MAL_SUCCEED;
+}
+
+str
 ALGprojection(bat *result, const bat *lid, const bat *rid)
 {
        return ALGbinary(result, lid, rid, BATproject, "algebra.projection");
diff --git a/monetdb5/modules/kernel/algebra.h 
b/monetdb5/modules/kernel/algebra.h
--- a/monetdb5/modules/kernel/algebra.h
+++ b/monetdb5/modules/kernel/algebra.h
@@ -39,6 +39,7 @@ mal_export str ALGintersect(bat *r1, con
 
 /* legacy join functions */
 mal_export str ALGcrossproduct2(bat *l, bat *r, const bat *lid, const bat 
*rid);
+mal_export str ALGcrossproduct(bat *l, bat *r, const bat *lid, const bat *rid, 
const bat *slid, const bat *srid);
 /* end legacy join functions */
 
 mal_export str ALGfirstn(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
diff --git a/monetdb5/modules/kernel/algebra.mal 
b/monetdb5/modules/kernel/algebra.mal
--- a/monetdb5/modules/kernel/algebra.mal
+++ b/monetdb5/modules/kernel/algebra.mal
@@ -179,6 +179,13 @@ comment "Returns 2 columns with all BUNs
          from 'left' and 'right' for which there are BUNs in 'left'
          and 'right' with equal tails";
 
+command crossproduct( left:bat[:any_1], right:bat[:any_2], sl:bat[:oid], 
sr:bat[:oid])
+               (l:bat[:oid],r:bat[:oid])
+address ALGcrossproduct
+comment "Returns 2 columns with all BUNs, consisting of the head-oids
+         from 'left' and 'right' for which there are BUNs in 'left'
+         and 'right' with equal tails";
+
 command 
join(l:bat[:any_1],r:bat[:any_1],sl:bat[:oid],sr:bat[:oid],nil_matches:bit,estimate:lng)
 (:bat[:oid],:bat[:oid])
 address ALGjoin
 comment "Join";
diff --git a/monetdb5/optimizer/opt_mergetable.c 
b/monetdb5/optimizer/opt_mergetable.c
--- a/monetdb5/optimizer/opt_mergetable.c
+++ b/monetdb5/optimizer/opt_mergetable.c
@@ -87,6 +87,19 @@ nr_of_bats(MalBlkPtr mb, InstrPtr p)
        return cnt;
 }
 
+static int
+nr_of_cands(MalBlkPtr mb, InstrPtr p)
+{
+       int j,cnt=0;
+       for(j=p->retc+2; j<p->argc; j++) {
+               int type = getArgType(mb,p,j);
+               if (isaBatType(type) && getBatType(type) == TYPE_oid) 
+                       cnt++;
+       }
+       return cnt;
+}
+
+
 /* some mat's have intermediates (with intermediate result variables), therefor
  * we pass the old output mat variable */
 inline static void
@@ -569,7 +582,7 @@ mat_projection(MalBlkPtr mb, InstrPtr p,
 }
 
 static void
-mat_join2(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int n)
+mat_join2(MalBlkPtr mb, InstrPtr p, matlist_t *ml, int m, int n, int cm, int 
cn)
 {
        int tpe = getArgType(mb,p, 0), j,k, nr = 1;
        InstrPtr l = newInstruction(mb, matRef, packRef);
@@ -591,6 +604,8 @@ mat_join2(MalBlkPtr mb, InstrPtr p, matl
                                getArg(q,1) = newTmpVariable(mb, tpe);
                                getArg(q,2) = getArg(mat[m].mi,k);
                                getArg(q,3) = getArg(mat[n].mi,j);
+                               if (cm >= 0) getArg(q,4) = getArg(mat[cm].mi,k);
+                               if (cn >= 0) getArg(q,5) = getArg(mat[cn].mi,j);
                                pushInstruction(mb,q);
        
                                propagatePartnr(ml, getArg(mat[m].mi, k), 
getArg(q,0), nr);
@@ -606,6 +621,7 @@ mat_join2(MalBlkPtr mb, InstrPtr p, matl
                int mv = (m>=0)?m:n;
                int av = (m<0);
                int bv = (m>=0);
+               int cv = (m>=0 && cm>=0)?cm:cn;
 
                for(k=1; k<mat[mv].mi->argc; k++) {
                        InstrPtr q = copyInstruction(p);
@@ -613,6 +629,7 @@ mat_join2(MalBlkPtr mb, InstrPtr p, matl
                        getArg(q,0) = newTmpVariable(mb, tpe);
                        getArg(q,1) = newTmpVariable(mb, tpe);
                        getArg(q,p->retc+av) = getArg(mat[mv].mi, k);
+                       if (cv >= 0) getArg(q,p->retc+av+2) = 
getArg(mat[cv].mi,k);
                        pushInstruction(mb,q);
 
                        propagatePartnr(ml, getArg(mat[mv].mi, k), 
getArg(q,av), k);
@@ -1564,7 +1581,7 @@ OPTmergetableImplementation(Client cntxt
        mb->stop = 0;
 
        for( i=0; i<oldtop; i++){
-               int bats = 0;
+               int bats = 0, cands = 0, cm = -1, cn = -1;
                InstrPtr r;
 
                p = old[i];
@@ -1584,6 +1601,7 @@ OPTmergetableImplementation(Client cntxt
                        continue;
                }
                bats = nr_of_bats(mb, p);
+               cands = nr_of_cands(mb, p);
 
                /* (l,r) Join (L, R, ..)
                 * 2 -> (l,r) equi/theta joins (l,r)
@@ -1591,24 +1609,28 @@ OPTmergetableImplementation(Client cntxt
                 * NxM -> (l,r) filter-joins (l1,..,ln,r1,..,rm)
                 */
                if (match > 0 && isMatJoinOp(p) && 
-                   p->argc >= 3 && p->retc == 2 && bats >= 2) {
-                       if (bats == 2) {
+                   p->argc >= 3 && p->retc == 2 && (bats-cands) >= 2) {
+                       if ((bats-cands) == 2) {
                                m = is_a_mat(getArg(p,p->retc), &ml);
                                n = is_a_mat(getArg(p,p->retc+1), &ml);
-                               mat_join2(mb, p, &ml, m, n);
+                               cm = is_a_mat(getArg(p,p->retc+2), &ml);
+                               cn = is_a_mat(getArg(p,p->retc+3), &ml);
+                               mat_join2(mb, p, &ml, m, n, cm, cn);
                        } else {
+                               assert(cands == 0);
                                mat_joinNxM(cntxt, mb, p, &ml, bats);
                        }
                        actions++;
                        continue;
                }
                if (match > 0 && isMatLeftJoinOp(p) && p->argc >= 3 && p->retc 
== 2 &&
-                               match == 1 && bats == 2) {
+                               match == 1 && (bats-cands) == 2) {
                        m = is_a_mat(getArg(p,p->retc), &ml);
                        n = -1;
 
                        if (m >= 0) {
-                               mat_join2(mb, p, &ml, m, n);
+                               cm = is_a_mat(getArg(p,p->retc+2), &ml);
+                               mat_join2(mb, p, &ml, m, n, cm, -1);
                                actions++;
                                continue;
                        }
diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -20,7 +20,7 @@
 
 #define OUTER_ZERO 64
 
-static stmt * exp_bin(backend *be, sql_exp *e, stmt *left, stmt *right, stmt 
*grp, stmt *ext, stmt *cnt, stmt *sel);
+static stmt * exp_bin(backend *be, sql_exp *e, stmt *left, stmt *right, stmt 
*grp, stmt *ext, stmt *cnt, stmt *sel, stmt *rsel);
 static stmt * rel_bin(backend *be, sql_rel *rel);
 static stmt * subrel_bin(backend *be, sql_rel *rel, list *refs);
 
@@ -211,7 +211,7 @@ handle_in_exps(backend *be, sql_exp *ce,
 {
        mvc *sql = be->mvc;
        node *n;
-       stmt *s = NULL, *c = exp_bin(be, ce, left, right, grp, ext, cnt, NULL);
+       stmt *s = NULL, *c = exp_bin(be, ce, left, right, grp, ext, cnt, NULL, 
NULL);
 
        if (c->nrcols == 0) {
                sql_subtype *bt = sql_bind_localtype("bit");
@@ -223,7 +223,7 @@ handle_in_exps(backend *be, sql_exp *ce,
 
                for( n = nl->h; n; n = n->next) {
                        sql_exp *e = n->data;
-                       stmt *i = exp_bin(be, use_r?e->r:e, left, right, grp, 
ext, cnt, NULL);
+                       stmt *i = exp_bin(be, use_r?e->r:e, left, right, grp, 
ext, cnt, NULL, NULL);
                        
                        i = stmt_binop(be, c, i, cmp); 
                        if (s)
@@ -243,7 +243,7 @@ handle_in_exps(backend *be, sql_exp *ce,
                        s = sel;
                for( n = nl->h; n; n = n->next) {
                        sql_exp *e = n->data;
-                       stmt *i = exp_bin(be, use_r?e->r:e, left, right, grp, 
ext, cnt, NULL);
+                       stmt *i = exp_bin(be, use_r?e->r:e, left, right, grp, 
ext, cnt, NULL, NULL);
                        
                        if (in) { 
                                i = stmt_uselect(be, c, i, cmp, sel, 0); 
@@ -269,7 +269,7 @@ value_list(backend *be, list *vals)
        s = stmt_temp(be, exp_subtype(vals->h->data));
        for( n = vals->h; n; n = n->next) {
                sql_exp *e = n->data;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to