Changeset: 7fdfe8dbc0ad for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=7fdfe8dbc0ad
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_calc_private.h
gdk/gdk_join.c
monetdb5/mal/mal.c
monetdb5/mal/mal.h
monetdb5/mal/mal_profiler.c
monetdb5/mal/mal_profiler.h
testing/Mtest.py.in
tools/merovingian/utils/control.c
tools/mserver/mserver5.c
Branch: sciql
Log Message:
merged from default
diffs (truncated from 599 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -199,6 +199,7 @@ gdk_return BATsubjoin(BAT **r1p, BAT **r
gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, BUN estimate);
gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, BUN estimate);
BAT *BATsubselect(BAT *b, BAT *s, const void *tl, const void *th, int li, int
hi, int anti);
+gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, BUN estimate);
gdk_return BATsubsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o,
BAT *g, int reverse, int stable);
gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT
*sr, const char *op, BUN estimate);
gdk_return BATsum(void *res, int tp, BAT *b, BAT *s, int skip_nils, int
abort_on_error, int nil_if_empty);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -3180,6 +3180,7 @@ gdk_export BAT *BATcross(BAT *l, BAT *r)
gdk_export gdk_return BATsubleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, BUN estimate);
gdk_export gdk_return BATsubouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, BUN estimate);
gdk_export gdk_return BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
BAT *sl, BAT *sr, const char *op, BUN estimate);
+gdk_export gdk_return BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, BUN estimate);
gdk_export gdk_return BATsubjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT
*sl, BAT *sr, BUN estimate);
gdk_export BAT *BATproject(BAT *l, BAT *r);
diff --git a/gdk/gdk_calc_private.h b/gdk/gdk_calc_private.h
--- a/gdk/gdk_calc_private.h
+++ b/gdk/gdk_calc_private.h
@@ -46,6 +46,7 @@ typedef unsigned __int64 ulng;
do { \
start = 0; \
end = cnt = BATcount(b); \
+ cand = candend = NULL; \
if (s) { \
assert(BATttype(s) == TYPE_oid); \
if (BATcount(s) == 0) { \
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -30,7 +30,11 @@ joinparamcheck(BAT *l, BAT *r, BAT *sl,
GDKerror("%s: inputs must have dense head.\n", func);
return GDK_FAIL;
}
- if (ATOMtype(l->ttype) != ATOMtype(r->ttype)) {
+ if (l->ttype == TYPE_void || r->ttype == TYPE_void) {
+ GDKerror("%s: tail type must not be VOID.\n", func);
+ return GDK_FAIL;
+ }
+ if (l->ttype != r->ttype) {
GDKerror("%s: inputs not compatible.\n", func);
return GDK_FAIL;
}
@@ -86,7 +90,9 @@ joininitresults(BAT **r1p, BAT **r2p, BU
return GDK_SUCCEED;
}
-#define VALUE(side, x) (side##vars ? side##vars +
VarHeapVal(side##vals, (x), side##width) : side##vals + ((x) * side##width))
+#define VALUE(s, x) (s##vars ? \
+ s##vars + VarHeapVal(s##vals, (x), s##width) : \
+ s##vals + ((x) * s##width))
/* Do a binary search for the first/last occurrence of v between lo and hi
* (lo inclusive, hi not inclusive) in rvals/rvars.
@@ -138,9 +144,9 @@ static gdk_return
mergejoin(BAT *r1, BAT *r2, BAT *l, BAT *r, BAT *sl, BAT *sr, int nil_matches,
int nil_on_miss, int semi)
{
BUN lstart, lend, lcnt;
- const oid *lcand = NULL, *lcandend = NULL;
+ const oid *lcand, *lcandend;
BUN rstart, rend, rcnt, rstartorig;
- const oid *rcand = NULL, *rcandend = NULL, *rcandorig;
+ const oid *rcand, *rcandend, *rcandorig;
BUN lscan, rscan;
const char *lvals, *rvals;
const char *lvars, *rvars;
@@ -155,8 +161,28 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT
oid lv;
BUN i;
+ ALGODEBUG fprintf(stderr, "#mergejoin(l=%s#" BUNFMT "[%s]%s%s,"
+ "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
+ "sr=%s#" BUNFMT "%s%s,nil_matches=%d,"
+ "nil_on_miss=%d,semi=%d)\n",
+ BATgetId(l), BATcount(l), ATOMname(l->ttype),
+ l->tsorted ? "-sorted" : "",
+ l->trevsorted ? "-revsorted" : "",
+ BATgetId(r), BATcount(r), ATOMname(r->ttype),
+ r->tsorted ? "-sorted" : "",
+ r->trevsorted ? "-revsorted" : "",
+ sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+ sl && sl->tsorted ? "-sorted" : "",
+ sl && sl->trevsorted ? "-revsorted" : "",
+ sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+ sr && sr->tsorted ? "-sorted" : "",
+ sr && sr->trevsorted ? "-revsorted" : "",
+ nil_matches, nil_on_miss, semi);
+
assert(BAThdense(l));
assert(BAThdense(r));
+ assert(l->ttype != TYPE_void);
+ assert(r->ttype != TYPE_void);
assert(l->ttype == r->ttype);
assert(r->tsorted || r->trevsorted);
assert(sl == NULL || sl->tsorted);
@@ -178,14 +204,6 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT
}
lwidth = l->T->width;
rwidth = r->T->width;
- /* equal_order is set if we can scan both BATs in the same
- * order, so when both are sorted or both are reverse
- * sorted */
- equal_order = l->tsorted == r->tsorted || l->trevsorted ==
r->trevsorted;
- /* [lr]reverse is either 1 or -1 depending on the order of
- * l/r: it determines the comparison function used */
- lreverse = l->tsorted ? 1 : -1;
- rreverse = r->tsorted ? 1 : -1;
/* set basic properties, they will be adjusted if necessary
* later on */
@@ -213,12 +231,23 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT
nl > 0;
rscan++)
nl >>= 1;
+
+ /* equal_order is set if we can scan both BATs in the
+ * same order, so when both are sorted or both are
+ * reverse sorted */
+ equal_order = l->tsorted == r->tsorted || l->trevsorted ==
r->trevsorted;
+ /* [lr]reverse is either 1 or -1 depending on the
+ * order of l/r: it determines the comparison function
+ * used */
+ lreverse = l->tsorted ? 1 : -1;
} else {
/* if l not sorted, we will always use binary search
* on r */
lscan = rscan = 0;
equal_order = 1;
+ lreverse = 1;
}
+ rreverse = r->tsorted ? 1 : -1;
while (lcand ? lcand < lcandend : lstart < lend) {
if (!nil_on_miss && lscan > 0) {
@@ -358,7 +387,8 @@ mergejoin(BAT *r1, BAT *r2, BAT *l, BAT
if (nr == 0) {
/* no entries in r found */
if (!nil_on_miss) {
- if (rcand ? rcand == rcandend : rstart == rend)
{
+ if (lscan > 0 &&
+ (rcand ? rcand == rcandend : rstart ==
rend)) {
/* nothing more left to match
* in r */
break;
@@ -531,8 +561,28 @@ hashjoin(BAT *r1, BAT *r2, BAT *l, BAT *
int (*cmp)(const void *, const void *) = BATatoms[l->ttype].atomCmp;
const char *v;
+ ALGODEBUG fprintf(stderr, "#hashjoin(l=%s#" BUNFMT "[%s]%s%s,"
+ "r=%s#" BUNFMT "[%s]%s%s,sl=%s#" BUNFMT "%s%s,"
+ "sr=%s#" BUNFMT "%s%s,nil_matches=%d,"
+ "nil_on_miss=%d,semi=%d)\n",
+ BATgetId(l), BATcount(l), ATOMname(l->ttype),
+ l->tsorted ? "-sorted" : "",
+ l->trevsorted ? "-revsorted" : "",
+ BATgetId(r), BATcount(r), ATOMname(r->ttype),
+ r->tsorted ? "-sorted" : "",
+ r->trevsorted ? "-revsorted" : "",
+ sl ? BATgetId(sl) : "NULL", sl ? BATcount(sl) : 0,
+ sl && sl->tsorted ? "-sorted" : "",
+ sl && sl->trevsorted ? "-revsorted" : "",
+ sr ? BATgetId(sr) : "NULL", sr ? BATcount(sr) : 0,
+ sr && sr->tsorted ? "-sorted" : "",
+ sr && sr->trevsorted ? "-revsorted" : "",
+ nil_matches, nil_on_miss, semi);
+
assert(BAThdense(l));
assert(BAThdense(r));
+ assert(l->ttype != TYPE_void);
+ assert(r->ttype != TYPE_void);
assert(l->ttype == r->ttype);
assert(sl == NULL || sl->tsorted);
assert(sr == NULL || sr->tsorted);
@@ -760,6 +810,8 @@ thetajoin(BAT *r1, BAT *r2, BAT *l, BAT
assert(BAThdense(l));
assert(BAThdense(r));
+ assert(l->ttype != TYPE_void);
+ assert(r->ttype != TYPE_void);
assert(l->ttype == r->ttype);
assert(sl == NULL || sl->tsorted);
assert(sr == NULL || sr->tsorted);
@@ -938,6 +990,28 @@ BATsubouterjoin(BAT **r1p, BAT **r2p, BA
return hashjoin(r1, r2, l, r, sl, sr, 0, 1, 0);
}
+/* Perform a semi-join over l and r. Returns two new, aligned,
+ * dense-headed bats with in the tail the oids (head column values) of
+ * matching tuples. The result is in the same order as l (i.e. r1 is
+ * sorted). */
+gdk_return
+BATsubsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, BUN
estimate)
+{
+ BAT *r1, *r2;
+
+ *r1p = NULL;
+ *r2p = NULL;
+ if (joinparamcheck(l, r, sl, sr, "BATsubsemijoin") == GDK_FAIL)
+ return GDK_FAIL;
+ if (joininitresults(&r1, &r2, estimate != BUN_NONE ? estimate : sl ?
BATcount(sl) : BATcount(l), "BATsubsemijoin") == GDK_FAIL)
+ return GDK_FAIL;
+ *r1p = r1;
+ *r2p = r2;
+ if (r->tsorted || r->trevsorted)
+ return mergejoin(r1, r2, l, r, sl, sr, 0, 0, 1);
+ return hashjoin(r1, r2, l, r, sl, sr, 0, 0, 1);
+}
+
gdk_return
BATsubthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, const
char *op, BUN estimate)
{
@@ -1024,15 +1098,24 @@ BATproject(BAT *l, BAT *r)
const oid *o;
const void *nil = ATOMnilptr(r->ttype);
const void *v, *prev;
- BATiter ri;
+ BATiter ri, bni;
oid lo, hi;
BUN n;
int (*cmp)(const void *, const void *) = BATatoms[r->ttype].atomCmp;
int c;
+ ALGODEBUG fprintf(stderr, "#BATproject(l=%s#" BUNFMT "%s%s,"
+ "r=%s#" BUNFMT "[%s]%s%s)\n",
+ BATgetId(l), BATcount(l),
+ l->tsorted ? "-sorted" : "",
+ l->trevsorted ? "-revsorted" : "",
+ BATgetId(r), BATcount(r), ATOMname(r->ttype),
+ r->tsorted ? "-sorted" : "",
+ r->trevsorted ? "-revsorted" : "");
+
assert(BAThdense(l));
assert(BAThdense(r));
- assert(l->ttype == TYPE_void || l->ttype == TYPE_oid);
+ assert(ATOMtype(l->ttype) == TYPE_oid);
if (BATtdense(l) && BATcount(l) > 0) {
lo = l->tseqbase;
@@ -1054,12 +1137,13 @@ BATproject(BAT *l, BAT *r)
return bn;
}
assert(l->ttype == TYPE_oid);
- bn = BATnew(TYPE_void, r->ttype, BATcount(l));
+ bn = BATnew(TYPE_void, ATOMtype(r->ttype), BATcount(l));
if (bn == NULL)
return NULL;
o = (const oid *) Tloc(l, BUNfirst(l));
n = BUNfirst(bn);
ri = bat_iterator(r);
+ bni = bat_iterator(bn);
/* be optimistic, we'll change this as needed */
bn->T->nonil = 1;
bn->T->nil = 0;
@@ -1092,16 +1176,15 @@ BATproject(BAT *l, BAT *r)
bn->trevsorted = 0;
if (!bn->tsorted)
bn->tkey = 0; /* can't be sure
*/
- }
- if (c > 0) {
+ } else if (c > 0) {
bn->tsorted = 0;
if (!bn->trevsorted)
bn->tkey = 0; /* can't be sure
*/
+ } else {
+ bn->tkey = 0; /* definitely */
}
- if (c == 0)
- bn->tkey = 0; /* definitely */
}
- prev = v;
+ prev = BUNtail(bni, n);
}
}
assert(n == BATcount(l));
diff --git a/monetdb5/mal/mal.c b/monetdb5/mal/mal.c
--- a/monetdb5/mal/mal.c
+++ b/monetdb5/mal/mal.c
@@ -177,6 +177,7 @@
char monet_cwd[PATHLENGTH] = { 0 };
size_t monet_memory;
+char *mal_trace; /* enable profile events on console */
#include "mal_stack.h"
#include "mal_linker.h"
@@ -189,6 +190,7 @@ size_t monet_memory;
#include "mal_sabaoth.h"
#include "mal_recycle.h"
#include "mal_dataflow.h"
+#include "mal_profiler.h"
MT_Lock mal_contextLock MT_LOCK_INITIALIZER("mal_contextLock");
MT_Lock mal_namespaceLock MT_LOCK_INITIALIZER("mal_namespaceLock");
@@ -254,6 +256,36 @@ int mal_init(void){
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list