Changeset: 52d2fac8d5cf for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=52d2fac8d5cf
Added Files:
        monetdb5/optimizer/Tests/dataflow4.mal
        monetdb5/optimizer/Tests/dataflow4.stable.err
        monetdb5/optimizer/Tests/dataflow4.stable.out
        monetdb5/optimizer/Tests/dataflow5.mal
        monetdb5/optimizer/Tests/dataflow5.stable.err
        monetdb5/optimizer/Tests/dataflow5.stable.out
        monetdb5/optimizer/Tests/inlineFunction1.mal
        monetdb5/optimizer/Tests/inlineFunction1.stable.err
        monetdb5/optimizer/Tests/inlineFunction1.stable.out
        monetdb5/optimizer/Tests/inlineFunction3.mal
        monetdb5/optimizer/Tests/inlineFunction3.stable.err
        monetdb5/optimizer/Tests/inlineFunction3.stable.out
        monetdb5/optimizer/Tests/inlineFunction4.mal
        monetdb5/optimizer/Tests/inlineFunction4.stable.err
        monetdb5/optimizer/Tests/inlineFunction4.stable.out
        sql/test/BugTracker-2009/Tests/overflow.SF-2853458.stable.err.Solaris
        sql/test/BugTracker-2009/Tests/overflow.SF-2853458.stable.out.Solaris
Removed Files:
        sql/test/BugTracker-2009/Tests/overflow.SF-2853458.stable.err.SunOS
        sql/test/BugTracker-2009/Tests/overflow.SF-2853458.stable.out.SunOS
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_calc_private.h
        gdk/gdk_heap.c
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_storage.c
        monetdb5/extras/jaql/parser/jaql.l
        monetdb5/mal/Tests/tst019.stable.out
        monetdb5/mal/mal.c
        monetdb5/mal/mal.h
        monetdb5/mal/mal_parser.c
        monetdb5/mal/mal_profiler.c
        monetdb5/mal/mal_profiler.h
        monetdb5/modules/kernel/bat5.c
        monetdb5/optimizer/Tests/All
        monetdb5/optimizer/Tests/dataflow3.mal
        monetdb5/optimizer/Tests/dataflow3.stable.out
        monetdb5/optimizer/Tests/ifthencst.stable.out
        monetdb5/optimizer/Tests/inlineFunction.mal
        monetdb5/optimizer/Tests/inlineFunction.stable.out
        monetdb5/optimizer/Tests/inlineFunction2.mal
        monetdb5/optimizer/Tests/inlineFunction2.stable.out
        monetdb5/optimizer/opt_dataflow.c
        monetdb5/optimizer/opt_evaluate.c
        sql/backends/monet5/sql.mx
        sql/test/BugTracker-2012/Tests/predicate_select.Bug-3090.stable.out
        sql/test/Tests/systemfunctions_nogeom.stable.out
        testing/Mtest.py.in
        tools/merovingian/utils/control.c
        tools/mserver/mserver5.c
Branch: holindex
Log Message:

Merge with default.


diffs (truncated from 2880 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -199,6 +199,7 @@ gdk_return BATsubjoin(BAT **r1p, BAT **r
 gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, BUN estimate);
 gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, BUN estimate);
 BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th, int li, int 
hi, int anti);
+gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, BUN estimate);
 gdk_return BATsubsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, 
BAT *g, int reverse, int stable);
 gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT 
*sr, const char *op, BUN estimate);
 gdk_return BATsum(void *res, int tp, BAT *b, BAT *s, int skip_nils, int 
abort_on_error, int nil_if_empty);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3180,6 +3180,7 @@ gdk_export BAT *BATcross(BAT *l, BAT *r)
 gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, BUN estimate);
 gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, BUN estimate);
 gdk_export gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, 
BAT *sl, BAT *sr, const char *op, BUN estimate);
+gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, BUN estimate);
 gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT 
*sl, BAT *sr, BUN estimate);
 gdk_export BAT *BATproject(BAT *l, BAT *r);
 
diff --git a/gdk/gdk_calc_private.h b/gdk/gdk_calc_private.h
--- a/gdk/gdk_calc_private.h
+++ b/gdk/gdk_calc_private.h
@@ -46,6 +46,7 @@ typedef unsigned __int64 ulng;
        do {                                                            \
                start = 0;                                              \
                end = cnt = BATcount(b);                                \
+               cand = candend = NULL;                                  \
                if (s) {                                                \
                        assert(BATttype(s) == TYPE_oid);                \
                        if (BATcount(s) == 0) {                         \
diff --git a/gdk/gdk_heap.c b/gdk/gdk_heap.c
--- a/gdk/gdk_heap.c
+++ b/gdk/gdk_heap.c
@@ -174,43 +174,17 @@ HEAPcacheFind(size_t *maxsz, char *fn, s
                        }
                        if (e != NULL && e->maxsz < *maxsz) {
                                /* resize file ? */
-                               FILE *fp;
                                long_str fn;
 
                                GDKfilepath(fn, HCDIR, e->fn, NULL);
-
-                               if ((fp = fopen(fn, "rb+")) != NULL &&
-#ifdef _WIN64
-                                   _fseeki64(fp, (ssize_t) *maxsz - 1, 
SEEK_SET) >= 0 &&
-#else
-#ifdef HAVE_FSEEKO
-                                   fseeko(fp, (off_t) *maxsz - 1, SEEK_SET) >= 
0 &&
-#else
-                                   fseek(fp, (long) *maxsz - 1, SEEK_SET) >= 0 
&&
-#endif
-#endif
-                                   fputc('\n', fp) >= 0 &&
-                                   fflush(fp) >= 0) {
-                                       if (fclose(fp) >= 0) {
-                                               void *base = GDKload(fn, NULL, 
*maxsz, *maxsz, STORE_MMAP);
-                                               GDKmunmap(e->base, e->maxsz);
-                                               e->base = base;
-                                               e->maxsz = *maxsz;
-                                       } else {
-                                               /* extending may have
-                                                * failed since fclose
-                                                * failed */
-                                               e = NULL;
-                                       }
-                                       /* after fclose, successful or
-                                        * not, we can't call fclose
-                                        * again */
-                                       fp = NULL;
-                               }
-                               if (fp) {
-                                       /* if set, extending the file
+                               if (GDKextend(fn, *maxsz) == 0) {
+                                       void *base = GDKload(fn, NULL, *maxsz, 
*maxsz, STORE_MMAP);
+                                       GDKmunmap(e->base, e->maxsz);
+                                       e->base = base;
+                                       e->maxsz = *maxsz;
+                               } else {
+                                       /* extending may have
                                         * failed */
-                                       fclose(fp);
                                        e = NULL;
                                }
                        }
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -30,7 +30,11 @@ joinparamcheck(BAT *l, BAT *r, BAT *sl, 
                GDKerror("%s: inputs must have dense head.\n", func);
                return GDK_FAIL;
        }
-       if (ATOMtype(l->ttype) != ATOMtype(r->ttype)) {
+       if (l->ttype == TYPE_void || r->ttype == TYPE_void) {
+               GDKerror("%s: tail type must not be VOID.\n", func);
+               return GDK_FAIL;
+       }
+       if (l->ttype != r->ttype) {
                GDKerror("%s: inputs not compatible.\n", func);
                return GDK_FAIL;
        }
@@ -86,7 +90,9 @@ joininitresults(BAT **r1p, BAT **r2p, BU
        return GDK_SUCCEED;
 }
 
-#define VALUE(side, x)         (side##vars ? side##vars + 
VarHeapVal(side##vals, (x), side##width) : side##vals + ((x) * side##width))
+#define VALUE(s, x)    (s##vars ? \
+                        s##vars + VarHeapVal(s##vals, (x), s##width) : \
+                        s##vals + ((x) * s##width))
 
 /* Do a binary search for the first/last occurrence of v between lo and hi
  * (lo inclusive, hi not inclusive) in rvals/rvars.
@@ -138,9 +144,9 @@ static gdk_return
 mergejoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches, 
int nil_on_miss, int semi)
 {
        BUN lstart, lend, lcnt;
-       const oid *lcand = NULL, *lcandend = NULL;
+       const oid *lcand, *lcandend;
        BUN rstart, rend, rcnt, rstartorig;
-       const oid *rcand = NULL, *rcandend = NULL, *rcandorig;
+       const oid *rcand, *rcandend, *rcandorig;
        BUN lscan, rscan;
        const char *lvals, *rvals;
        const char *lvars, *rvars;
@@ -155,8 +161,28 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT 
        oid lv;
        BUN i;
 
+       ALGODEBUG fprintf(stderr, "#mergejoin(l=%s#" BUNFMT "[%s]%s%s,"
+                         "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
+                         "sr=%s#" BUNFMT "%s%s,nil_matches=%d,"
+                         "nil_on_miss=%d,semi=%d)\n",
+                         BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                         l->tsorted ? "-sorted" : "",
+                         l->trevsorted ? "-revsorted" : "",
+                         BATgetId(r), BATcount(r), ATOMname(r->ttype),
+                         r->tsorted ? "-sorted" : "",
+                         r->trevsorted ? "-revsorted" : "",
+                         sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+                         sl && sl->tsorted ? "-sorted" : "",
+                         sl && sl->trevsorted ? "-revsorted" : "",
+                         sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+                         sr && sr->tsorted ? "-sorted" : "",
+                         sr && sr->trevsorted ? "-revsorted" : "",
+                         nil_matches, nil_on_miss, semi);
+
        assert(BAThdense(l));
        assert(BAThdense(r));
+       assert(l->ttype != TYPE_void);
+       assert(r->ttype != TYPE_void);
        assert(l->ttype == r->ttype);
        assert(r->tsorted || r->trevsorted);
        assert(sl == NULL || sl->tsorted);
@@ -178,14 +204,6 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT 
        }
        lwidth = l->T->width;
        rwidth = r->T->width;
-       /* equal_order is set if we can scan both BATs in the same
-        * order, so when both are sorted or both are reverse
-        * sorted */
-       equal_order = l->tsorted == r->tsorted || l->trevsorted == 
r->trevsorted;
-       /* [lr]reverse is either 1 or -1 depending on the order of
-        * l/r: it determines the comparison function used */
-       lreverse = l->tsorted ? 1 : -1;
-       rreverse = r->tsorted ? 1 : -1;
 
        /* set basic properties, they will be adjusted if necessary
         * later on */
@@ -213,12 +231,23 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT 
                     nl > 0;
                     rscan++)
                        nl >>= 1;
+
+               /* equal_order is set if we can scan both BATs in the
+                * same order, so when both are sorted or both are
+                * reverse sorted */
+               equal_order = l->tsorted == r->tsorted || l->trevsorted == 
r->trevsorted;
+               /* [lr]reverse is either 1 or -1 depending on the
+                * order of l/r: it determines the comparison function
+                * used */
+               lreverse = l->tsorted ? 1 : -1;
        } else {
                /* if l not sorted, we will always use binary search
                 * on r */
                lscan = rscan = 0;
                equal_order = 1;
+               lreverse = 1;
        }
+       rreverse = r->tsorted ? 1 : -1;
 
        while (lcand ? lcand < lcandend : lstart < lend) {
                if (!nil_on_miss && lscan > 0) {
@@ -358,7 +387,8 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT 
                if (nr == 0) {
                        /* no entries in r found */
                        if (!nil_on_miss) {
-                               if (rcand ? rcand == rcandend : rstart == rend) 
{
+                               if (lscan > 0 &&
+                                   (rcand ? rcand == rcandend : rstart == 
rend)) {
                                        /* nothing more left to match
                                         * in r */
                                        break;
@@ -531,8 +561,28 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT *
        int (*cmp)(const void *, const void *) = BATatoms[l->ttype].atomCmp;
        const char *v;
 
+       ALGODEBUG fprintf(stderr, "#hashjoin(l=%s#" BUNFMT "[%s]%s%s,"
+                         "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
+                         "sr=%s#" BUNFMT "%s%s,nil_matches=%d,"
+                         "nil_on_miss=%d,semi=%d)\n",
+                         BATgetId(l), BATcount(l), ATOMname(l->ttype),
+                         l->tsorted ? "-sorted" : "",
+                         l->trevsorted ? "-revsorted" : "",
+                         BATgetId(r), BATcount(r), ATOMname(r->ttype),
+                         r->tsorted ? "-sorted" : "",
+                         r->trevsorted ? "-revsorted" : "",
+                         sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+                         sl && sl->tsorted ? "-sorted" : "",
+                         sl && sl->trevsorted ? "-revsorted" : "",
+                         sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+                         sr && sr->tsorted ? "-sorted" : "",
+                         sr && sr->trevsorted ? "-revsorted" : "",
+                         nil_matches, nil_on_miss, semi);
+
        assert(BAThdense(l));
        assert(BAThdense(r));
+       assert(l->ttype != TYPE_void);
+       assert(r->ttype != TYPE_void);
        assert(l->ttype == r->ttype);
        assert(sl == NULL || sl->tsorted);
        assert(sr == NULL || sr->tsorted);
@@ -760,6 +810,8 @@ thetajoin(BAT *r1, BAT *r2, BAT *l, BAT 
 
        assert(BAThdense(l));
        assert(BAThdense(r));
+       assert(l->ttype != TYPE_void);
+       assert(r->ttype != TYPE_void);
        assert(l->ttype == r->ttype);
        assert(sl == NULL || sl->tsorted);
        assert(sr == NULL || sr->tsorted);
@@ -938,6 +990,28 @@ BATsubouterjoin(BAT **r1p, BAT **r2p, BA
        return hashjoin(r1, r2, l, r, sl, sr, 0, 1, 0);
 }
 
+/* Perform a semi-join over l and r.  Returns two new, aligned,
+ * dense-headed bats with in the tail the oids (head column values) of
+ * matching tuples.  The result is in the same order as l (i.e. r1 is
+ * sorted). */
+gdk_return
+BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN 
estimate)
+{
+       BAT *r1, *r2;
+
+       *r1p = NULL;
+       *r2p = NULL;
+       if (joinparamcheck(l, r, sl, sr, "BATsubsemijoin") == GDK_FAIL)
+               return GDK_FAIL;
+       if (joininitresults(&r1, &r2, estimate != BUN_NONE ? estimate : sl ? 
BATcount(sl) : BATcount(l), "BATsubsemijoin") == GDK_FAIL)
+               return GDK_FAIL;
+       *r1p = r1;
+       *r2p = r2;
+       if (r->tsorted || r->trevsorted)
+               return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 1);
+       return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 1);
+}
+
 gdk_return
 BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const 
char *op, BUN estimate)
 {
@@ -1024,15 +1098,24 @@ BATproject(BAT *l, BAT *r)
        const oid *o;
        const void *nil = ATOMnilptr(r->ttype);
        const void *v, *prev;
-       BATiter ri;
+       BATiter ri, bni;
        oid lo, hi;
        BUN n;
        int (*cmp)(const void *, const void *) = BATatoms[r->ttype].atomCmp;
        int c;
 
+       ALGODEBUG fprintf(stderr, "#BATproject(l=%s#" BUNFMT "%s%s,"
+                         "r=%s#" BUNFMT "[%s]%s%s)\n",
+                         BATgetId(l), BATcount(l),
+                         l->tsorted ? "-sorted" : "",
+                         l->trevsorted ? "-revsorted" : "",
+                         BATgetId(r), BATcount(r), ATOMname(r->ttype),
+                         r->tsorted ? "-sorted" : "",
+                         r->trevsorted ? "-revsorted" : "");
+
        assert(BAThdense(l));
        assert(BAThdense(r));
-       assert(l->ttype == TYPE_void || l->ttype == TYPE_oid);
+       assert(ATOMtype(l->ttype) == TYPE_oid);
 
        if (BATtdense(l) && BATcount(l) > 0) {
                lo = l->tseqbase;
@@ -1054,12 +1137,13 @@ BATproject(BAT *l, BAT *r)
                return bn;
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to