Changeset: f9e92f9706cb for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f9e92f9706cb
Modified Files:
        clients/Tests/exports.stable.out
        gdk/ChangeLog
        gdk/gdk.h
        gdk/gdk_bat.c
        gdk/gdk_join.c
        gdk/gdk_logger.c
        gdk/gdk_setop.c
        sql/backends/monet5/sql.c
        sql/storage/bat/bat_table.c
        sql/test/leaks/Tests/check1.stable.out
        sql/test/leaks/Tests/check1.stable.out.int128
        sql/test/leaks/Tests/check2.stable.out
        sql/test/leaks/Tests/check2.stable.out.int128
        sql/test/leaks/Tests/check3.stable.out
        sql/test/leaks/Tests/check3.stable.out.int128
        sql/test/leaks/Tests/check4.stable.out
        sql/test/leaks/Tests/check4.stable.out.int128
        sql/test/leaks/Tests/check5.stable.out
        sql/test/leaks/Tests/check5.stable.out.int128
        sql/test/leaks/Tests/drop3.stable.out
        sql/test/leaks/Tests/drop3.stable.out.int128
        sql/test/leaks/Tests/select1.stable.out
        sql/test/leaks/Tests/select1.stable.out.int128
        sql/test/leaks/Tests/select2.stable.out
        sql/test/leaks/Tests/select2.stable.out.int128
        sql/test/leaks/Tests/temp1.stable.out
        sql/test/leaks/Tests/temp1.stable.out.int128
        sql/test/leaks/Tests/temp2.stable.out
        sql/test/leaks/Tests/temp2.stable.out.int128
        sql/test/leaks/Tests/temp3.stable.out
        sql/test/leaks/Tests/temp3.stable.out.int128
Branch: default
Log Message:

Implemented BATsubdiff, replacing BATkdiff.


diffs (truncated from 2621 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -188,6 +188,7 @@ BAT *BATssort(BAT *b);
 BAT *BATssort_rev(BAT *b);
 gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
 gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr);
+BAT *BATsubdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, BUN 
estimate);
 gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
int nil_matches, BUN estimate);
 gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, 
BAT *sr, int nil_matches, BUN estimate);
 gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, int nil_matches, BUN estimate);
diff --git a/gdk/ChangeLog b/gdk/ChangeLog
--- a/gdk/ChangeLog
+++ b/gdk/ChangeLog
@@ -1,6 +1,12 @@
 # ChangeLog file for MonetDB
 # This file is updated with Maddlog
 
+* Sat Sep  5 2015 Sjoerd Mullender <[email protected]>
+- Implemented BATsubdiff which returns a list of OIDs (sorted, i.e. usable
+  as candidate list) of tuples in the left input whose value does not
+  occur in the right input.  Reimplemented BATkdiff (to be removed later)
+  using this new function.
+
 * Fri Sep  4 2015 Sjoerd Mullender <[email protected]>
 - Removed function BATkintersect.  It wasn't used anymore.  It's
   functionality can be obtained by using BATsubsemijoin.
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3176,6 +3176,7 @@ gdk_export gdk_return BATsubleftjoin(BAT
 gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, int op, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
+gdk_export BAT *BATsubdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, 
BUN estimate);
 gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubleftfetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT 
*r, BAT *sl, BAT *sr, int nil_matches, BUN estimate);
 gdk_export gdk_return BATsubbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, const void *c1, const void *c2, int li, int hi, BUN estimate);
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -2101,6 +2101,34 @@ BATsetcount(BAT *b, BUN cnt)
                b->hsorted = b->hrevsorted = ATOMlinear(b->htype) != 0;
                b->tsorted = b->trevsorted = ATOMlinear(b->ttype) != 0;
        }
+       if (b->htype == TYPE_void) {
+               b->hsorted = 1;
+               if (b->hseqbase == oid_nil) { /* unlikely */
+                       b->hkey = cnt <= 1;
+                       b->hrevsorted = 1;
+                       b->H->nil = 1;
+                       b->H->nonil = 0;
+               } else {
+                       b->hkey = 1;
+                       b->hrevsorted = cnt <= 1;
+                       b->H->nil = 0;
+                       b->H->nonil = 1;
+               }
+       }
+       if (b->ttype == TYPE_void) {
+               b->tsorted = 1;
+               if (b->tseqbase == oid_nil) {
+                       b->tkey = cnt <= 1;
+                       b->trevsorted = 1;
+                       b->T->nil = 1;
+                       b->T->nonil = 0;
+               } else {
+                       b->tkey = 1;
+                       b->trevsorted = cnt <= 1;
+                       b->T->nil = 0;
+                       b->T->nonil = 1;
+               }
+       }
        assert(b->batCapacity >= cnt);
 }
 
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -65,6 +65,17 @@
  *     values match if the left value is between two corresponding
  *     right values; two extra Boolean parameters, li and hi,
  *     indicate whether equal values match
+ *
+ * In addition to these functions, there are two more functions that
+ * are closely related:
+ * BATsubdiff
+ *     difference: return a candidate list compatible list of OIDs of
+ *     tuples in the left input whose value does not occur in the
+ *     right input
+ * BATproject
+ *     projection: return a BAT aligned with the left input whose
+ *     values are the values from the right input that were referred
+ *     to by the OIDs in the tail of the left input
  */
 
 /* Perform a bunch of sanity checks on the inputs to a join. */
@@ -107,9 +118,12 @@ joinparamcheck(BAT *l, BAT *r1, BAT *r2,
        return GDK_SUCCEED;
 }
 
-/* Create the result bats for a join. */
+/* Create the result bats for a join, returns the absolute maximum
+ * number of outputs that could possibly be generated. */
 static BUN
-joininitresults(BAT **r1p, BAT **r2p, BUN lcnt, BUN rcnt, int lkey, int rkey, 
int semi, int nil_on_miss, BUN estimate, const char *func)
+joininitresults(BAT **r1p, BAT **r2p, BUN lcnt, BUN rcnt, int lkey, int rkey,
+               int semi, int nil_on_miss, int only_misses, BUN estimate,
+               const char *func)
 {
        BAT *r1, *r2;
        BUN maxsize, size;
@@ -117,7 +131,10 @@ joininitresults(BAT **r1p, BAT **r2p, BU
        lkey |= lcnt <= 1;
        rkey |= rcnt <= 1;
 
-       if (rkey | semi) {
+       *r1p = NULL;
+       if (r2p)
+               *r2p = NULL;
+       if (rkey | semi | only_misses) {
                /* each entry left matches at most one on right, in
                 * case nil_on_miss is also set, each entry matches
                 * exactly one */
@@ -143,32 +160,41 @@ joininitresults(BAT **r1p, BAT **r2p, BU
        size = estimate == BUN_NONE ? lcnt : estimate;
        if (size > maxsize)
                size = maxsize;
+       if ((rkey | semi | only_misses) & nil_on_miss) {
+               /* see comment above: each entry left matches exactly
+                * once */
+               size = maxsize;
+       }
 
        r1 = BATnew(TYPE_void, TYPE_oid, size, TRANSIENT);
-       r2 = BATnew(TYPE_void, TYPE_oid, size, TRANSIENT);
-       if (r1 == NULL || r2 == NULL) {
-               BBPreclaim(r1);
-               BBPreclaim(r2);
-               *r1p = *r2p = NULL;
+       if (r1 == NULL) {
                GDKerror("%s: cannot create output BATs.\n", func);
                return BUN_NONE;
        }
        BATseqbase(r1, 0);
-       BATseqbase(r2, 0);
        r1->T->nil = 0;
        r1->T->nonil = 1;
        r1->tkey = 1;
        r1->tsorted = 1;
        r1->trevsorted = 1;
        r1->tdense = 1;
-       r2->T->nil = 0;
-       r2->T->nonil = 1;
-       r2->tkey = 1;
-       r2->tsorted = 1;
-       r2->trevsorted = 1;
-       r2->tdense = 1;
        *r1p = r1;
-       *r2p = r2;
+       if (r2p) {
+               r2 = BATnew(TYPE_void, TYPE_oid, size, TRANSIENT);
+               if (r2 == NULL) {
+                       BBPreclaim(r1);
+                       GDKerror("%s: cannot create output BATs.\n", func);
+                       return BUN_NONE;
+               }
+               BATseqbase(r2, 0);
+               r2->T->nil = 0;
+               r2->T->nonil = 1;
+               r2->tkey = 1;
+               r2->tsorted = 1;
+               r2->trevsorted = 1;
+               r2->tdense = 1;
+               *r2p = r2;
+       }
        return maxsize;
 }
 
@@ -435,11 +461,11 @@ binsearch(const oid *rcand, oid offset,
 static gdk_return
 nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r, BUN lstart, BUN lend,
        const oid *lcand, const oid *lcandend,
-       int nil_on_miss, int must_match, const char *func)
+       int nil_on_miss, int only_misses, int must_match, const char *func)
 {
        BUN cnt;
 
-       if (lstart == lend || (!must_match && !nil_on_miss)) {
+       if (lstart == lend || (!must_match && !(nil_on_miss | only_misses))) {
                virtualize(r1);
                virtualize(r2);
                return GDK_SUCCEED;
@@ -474,15 +500,19 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
                BATsetcount(r1, cnt);
                BATseqbase(BATmirror(r1), lstart + l->hseqbase);
        }
-       HEAPfree(&r2->T->heap, 1);
-       r2->ttype = TYPE_void;
-       r2->tvarsized = 1;
-       r2->T->width = 0;
-       r2->T->shift = 0;
-       r2->tdense = 0;
-       BATextend(r2, cnt);
-       BATsetcount(r2, cnt);
-       BATseqbase(BATmirror(r2), oid_nil);
+       BATseqbase(r1, 0);
+       if (r2) {
+               HEAPfree(&r2->T->heap, 1);
+               r2->ttype = TYPE_void;
+               r2->tvarsized = 1;
+               r2->T->width = 0;
+               r2->T->shift = 0;
+               r2->tdense = 0;
+               BATextend(r2, cnt);
+               BATsetcount(r2, cnt);
+               BATseqbase(BATmirror(r2), oid_nil);
+               BATseqbase(r2, 0);
+       }
        ALGODEBUG fprintf(stderr,
                          "#%s(l=%s,r=%s)=(%s#"BUNFMT"%s%s,%s#"BUNFMT"%s%s) -- 
nomatch\n",
                          func,
@@ -490,15 +520,15 @@ nomatch(BAT *r1, BAT *r2, BAT *l, BAT *r
                          BATgetId(r1), BATcount(r1),
                          r1->tsorted ? "-sorted" : "",
                          r1->trevsorted ? "-revsorted" : "",
-                         BATgetId(r2), BATcount(r2),
-                         r2->tsorted ? "-sorted" : "",
-                         r2->trevsorted ? "-revsorted" : "");
+                         r2 ? BATgetId(r2) : "--", r2 ? BATcount(r2) : 0,
+                         r2 && r2->tsorted ? "-sorted" : "",
+                         r2 && r2->trevsorted ? "-revsorted" : "");
        return GDK_SUCCEED;
 }
 
 static gdk_return
 mergejoin_void(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr,
-              int nil_on_miss, int must_match)
+              int nil_on_miss, int only_misses, int must_match)
 {
        oid lo, hi;
        BUN cnt, i;
@@ -575,10 +605,10 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                                               seq - l->hseqbase,
                                               seq + cnt - l->hseqbase,
                                               NULL, NULL, nil_on_miss,
-                                              must_match,
+                                              only_misses, must_match,
                                               "mergejoin_void");
                        if (must_match && hi - lo < cnt) {
-                               GDKerror("mergejoin(%s,%s) does not hit always 
=> can't use fetchjoin.\n", BATgetId(l), BATgetId(r));
+                               GDKerror("mergejoin_void(%s,%s) does not hit 
always => can't use fetchjoin.\n", BATgetId(l), BATgetId(r));
                                goto bailout;
                        }
 
@@ -590,6 +620,48 @@ mergejoin_void(BAT *r1, BAT *r2, BAT *l,
                         * tail since all values in l will match
                         * something (even if nil if nil_on_miss is
                         * set) */
+                       if (only_misses) {
+                               /* the return values are
+                                * [seq..lo') + [hi'..seq+cnt)
+                                * where lo' and hi' are lo and hi
+                                * translated back to l's head
+                                * values */
+                               lo = lo + l->hseqbase - l->tseqbase; /* lo' */
+                               hi = hi + l->hseqbase - l->tseqbase; /* hi' */
+                               assert(lo >= seq);
+                               assert(hi <= seq + cnt);
+                               if (lo == seq || hi == seq + cnt) {
+                                       /* one of [seq..lo') and
+                                        * [hi'..seq+cnt) is empty, so
+                                        * the result is the other
+                                        * range and thus dense */
+                                       HEAPfree(&r1->T->heap, 1);
+                                       r1->ttype = TYPE_void;
+                                       r1->tvarsized = 1;
+                                       r1->T->width = 0;
+                                       r1->T->shift = 0;
+                                       r1->tdense = 0;
+                                       BATextend(r1, cnt - (hi - lo));
+                                       BATsetcount(r1, cnt - (hi - lo));
+                                       BATseqbase(BATmirror(r1), lo == seq ? 
hi : seq);
+                               } else {
+                                       if (BATextend(r1, cnt - (hi - lo)) != 
GDK_SUCCEED)
+                                               goto bailout;
+                                       for (o = seq; o < lo; o++)
+                                               APPEND(r1, o);
+                                       seq += cnt;
+                                       for (o = hi; o < seq; o++)
+                                               APPEND(r1, o);
+                                       BATsetcount(r1, cnt - (hi - lo));
+                                       r1->tsorted = 1;
+                                       r1->trevsorted = 0;
+                                       r1->tdense = 0;
+                                       r1->tkey = 1;
+                                       r1->T->nil = 0;
+                                       r1->T->nonil = 1;
+                               }
+                               goto doreturn;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to