Changeset: 404f0a7d5c99 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=404f0a7d5c99
Modified Files:
clients/Tests/exports.stable.out
gdk/gdk.h
gdk/gdk_private.h
gdk/gdk_relop.mx
gdk/gdk_storage.c
Branch: default
Log Message:
Removed BATmutijoin, replaced BATmultiprintf.
diffs (truncated from 1094 to 300 lines):
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -154,7 +154,7 @@ size_t BATmemsize(BAT *b, int dirty);
BAT *BATmergecand(BAT *a, BAT *b);
int BATmmap(BAT *b, int hb, int tb, int hh, int th, int force);
BAT *BATmode(BAT *b, int onoff);
-int BATmultiprintf(stream *f, int argc, BAT *argv[], int printoid, int order,
int printorderby);
+gdk_return BATmultiprintf(stream *f, int argc, BAT *argv[], int printoid, int
order, int printorderby);
int BATname(BAT *b, const char *nme);
BAT *BATnew(int hdtype, int tltype, BUN capacity);
BAT *BATorder(BAT *b);
@@ -163,8 +163,9 @@ int BATordered(BAT *b);
int BATordered_rev(BAT *b);
BAT *BATouterjoin(BAT *l, BAT *r, BUN estimate);
BAT *BATprev(BAT *b);
-int BATprint(BAT *b);
-int BATprintf(stream *f, BAT *b);
+gdk_return BATprint(BAT *b);
+gdk_return BATprintcols(stream *s, int argc, BAT *argv[]);
+gdk_return BATprintf(stream *f, BAT *b);
gdk_return BATprod(void *res, int tp, BAT *b, BAT *s, int skip_nils, int
abort_on_error, int nil_if_empty);
BAT *BATproject(BAT *l, BAT *r);
BAT *BATrangejoin(BAT *l, BAT *rl, BAT *rh, bit li, bit hi);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -1650,9 +1650,10 @@ gdk_export int GDKcreatedir(const char *
* oid-s in the head columns. It performs the multijoin over them, and
* prints the multi-column result on the file.
*/
-gdk_export int BATprint(BAT *b);
-gdk_export int BATprintf(stream *f, BAT *b);
-gdk_export int BATmultiprintf(stream *f, int argc, BAT *argv[], int printoid,
int order, int printorderby);
+gdk_export gdk_return BATprintcols(stream *s, int argc, BAT *argv[]);
+gdk_export gdk_return BATprint(BAT *b);
+gdk_export gdk_return BATprintf(stream *f, BAT *b);
+gdk_export gdk_return BATmultiprintf(stream *f, int argc, BAT *argv[], int
printoid, int order, int printorderby);
/*
* @- BAT clustering
@@ -3191,14 +3192,6 @@ gdk_export BAT *BATintersectcand(BAT *a,
gdk_export BAT *BATsample(BAT *b, BUN n);
gdk_export BAT *BATsample_(BAT *b, BUN n); /* version that expects void head
and returns oids */
-/* generic n-ary multijoin beast, with defines to interpret retval */
-#define MULTIJOIN_SORTED(r) ((char*) &r)[0]
-#define MULTIJOIN_KEY(r) ((char*) &r)[1]
-#define MULTIJOIN_SYNCED(r) ((char*) &r)[2]
-#define MULTIJOIN_LEAD(r) ((char*) &r)[3]
-
-typedef void (*ColFcn) (ptr, const void *);
-typedef void (*RowFcn) (ptr, ptr *);
/*
*
*/
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -37,7 +37,6 @@ BUN BATguess(BAT *b);
void BATinit_idents(BAT *bn);
BAT *BATload_intern(bat bid, int lock);
BAT *BATmaterializet(BAT *b);
-int BATmultijoin(int argc, BAT *argv[], RowFcn tuple_fcn, ptr tuple_data,
ColFcn value_fcn[], ptr value_data[], int orderspec);
void BATpropagate(BAT *dst, BAT *src, int idx);
str BATrename(BAT *b, const char *nme);
void BATsetdims(BAT *b);
diff --git a/gdk/gdk_relop.mx b/gdk/gdk_relop.mx
--- a/gdk/gdk_relop.mx
+++ b/gdk/gdk_relop.mx
@@ -193,6 +193,23 @@ bunins_failed:
}
@
@c
+/*
+ * @+ Cartesian product
+ * The matching algorithms tries to construct non-empty matches on all head
+ * columns. Each time this succeeds, it calls the Cartesian routine to
+ * construct a join result that consists of the Cartesian product of these
+ * matches.
+ *
+ * The matching tuples can be encoded in two ways:
+ * @table @samp
+ * @item clustered
+ * here we have two BUN pointers 'hi' and 'lo' that point
+ * to a consecutive range of BUNs in a BAT that match.
+ * @item nonclustered here we have a hit pointer that points to an array
+ * of BUN pointers that match.
+ * @end table
+ * The below structures are used for keeping track of the matching process.
+ */
BAT *
BATcross(BAT *l, BAT *r)
{
@@ -242,492 +259,3 @@ BATcross(BAT *l, BAT *r)
return bn;
}
-
-/*
- * @+ Cartesian product
- * The matching algorithms tries to construct non-empty matches on all head
- * columns. Each time this succeeds, it calls the Cartesian routine to
- * construct a join result that consists of the Cartesian product of these
- * matches.
- *
- * The matching tuples can be encoded in two ways:
- * @table @samp
- * @item clustered
- * here we have two BUN pointers 'hi' and 'lo' that point
- * to a consecutive range of BUNs in a BAT that match.
- * @item nonclustered here we have a hit pointer that points to an array
- * of BUN pointers that match.
- * @end table
- * The below structures are used for keeping track of the matching process.
- */
-typedef struct _column_t {
- BAT *b; /* BAT of this column */
- BATiter bi;
- BUN cur; /* current BUN in b */
- BUN nhits; /* number of matched BUNs */
-
- /* clustered */
- BUN lo; /* first BUN that matches */
- BUN hi; /* past last BUN that matches */
- /* nonclustered */
- BUN *hit; /* BUN array pointer */
- size_t hitsize; /* size of hit array */
-
- /* properties */
-/* I'm not sure whether offset can become negative, so to be on the
- * save side, use a signed type. However the magnitude should be
- * within the range allowed by BUN, so the casts associated with this
- * value should be OK. */
- ssize_t offset; /* BUNindex of BUNfirst */
- struct _column_t *sync; /* iff > 0: column with synchronous BAT */
- BUN size; /* size of the BAT */
- char binsearch; /* sparse matching expected? */
- char ordered; /* merge matching */
-} column_t;
-
-typedef struct {
- RowFcn tuple_fcn; /* function to invoke per match */
- ptr tuple_data; /* application-specific data */
- ColFcn *value_fcn; /* for each col: function to invoke per value */
- ptr *value_data; /* for each col: application-specific data */
- column_t *c; /* array of columns */
- int argc; /* size of c */
-} table_t;
-
-static void
-column_result(table_t *t, int i)
-{
- if (++i > t->argc) {
- /* end of recursion: invoke tuple-match routine */
- t->tuple_fcn(t->tuple_data, t->value_data);
- } else {
- /* recurse over all matches on this column */
- column_t *c = t->c + (i - 1);
- BUN q, *p = c->hit;
- BUN j;
-
- if (p == NULL) { /* clustered */
- for (q = c->lo; q < c->hi; q++) {
- t->value_fcn[i] (t->value_data[i],
BUNtail(c->bi, q));
- column_result(t, i);
- }
- } else {
- for (j = 0; j < c->nhits; j++, p++) {
- t->value_fcn[i] (t->value_data[i],
BUNtail(c->bi, *p));
- column_result(t, i);
- }
- }
- }
-}
-
-/*
- * @* MultiColumn Joins
- * Computes the n-ary equijoin over the head columns of multiple BATs.
- * This function is complex, and uses nested functions calls,
- * for the specific stuff, it uses the stack for generating the
- * Cartesian product on each hit tuple. Most of all, it emits tuples one
- * at a time, in a pipeline rather than bulk fashion. For all these reasons,
- * it is not main-memory efficient. It does things that MonetDB actually
- * specifically was designed to avoid.
- *
- * USE THIS FUNCTION ONLY WHEN YOU REALLY REALLY HAVE TO:
- * @table @code
- * @item -
- * printing a multicolumn table to a watching end-user is one such example
- * @end table
- * @+ multijoin entry routine
- * The multijoin will cause a cascade of value_fcn() calls putting
- * values in to place, rounded off each time by a tuple_fcn() that is
- * executed on each produced tuple. If this corresponds 1-1 with
- * the elements of one of the parameter BAT, the 'result' of the
- * operation would be aligned with it.
- *
- * The return value of this operation contains this status information.
- * It is an integer, of which all 4 bytes are used:
- * @table @code
- * @item ret[0] == 1,
- * if a mergejoin was used, 0 otherwise
- * @item ret[1] == 1,
- * if all bats had the key property set, 0 otherwise
- * @item ret[2] == 1
- * if there was a 1-1 join, 0 otherwise
- * @item ret[3] ==
- * the parameter number of the BAT which was used as leader
- * @end table
- */
-#define COLSIZE(c)\
- (((c)->b->htype!=TYPE_void || (c)->b->hseqbase!=oid_nil)?(c)->size:0)
-#define REALLOCBUNS(c,n) if (c->hitsize <= n)\
- c->hit = (BUN*) GDKrealloc(c->hit, (c->hitsize+=n)*sizeof(BUN))
-
-#define LEAD_INTERRUPT_END 1
-#define LEAD_TRAVERSE_SSK 3 /* seq, sorted, key */
-#define LEAD_TRAVERSE_SNK 4 /* seq, nonsorted, key */
-#define LEAD_TRAVERSE_SEQ 6 /* enforced seq (for order purposes) */
-#define LEAD_TRAVERSE_SRT 9 /* traverse by sorted chunk */
-
-int
-BATmultijoin(int argc, BAT *argv[], RowFcn tuple_fcn, ptr tuple_data, ColFcn
value_fcn[], ptr value_data[], int orderby)
-{
- column_t *lead_col, *c = (column_t *) GDKzalloc(argc * (int)
sizeof(column_t));
- column_t **reorder = (column_t **) GDKmalloc(argc * (int)
sizeof(column_t *));
- int status = 0, algo = LEAD_TRAVERSE_SEQ;
- int i, k;
- BUN p, q;
- table_t t;
-
- /*
- * Init the table descriptor.
- */
- if (c == NULL || reorder == NULL) {
- GDKfree(c);
- GDKfree(reorder);
- return 0;
- }
- t.tuple_data = tuple_data;
- t.value_data = value_data;
- t.tuple_fcn = tuple_fcn;
- t.value_fcn = value_fcn;
- t.argc = argc;
- t.c = c;
- /*
- * order columns by their size (smallest first)
- */
- for (i = 0; i < argc; i++) {
- int j;
-
- c[i].b = argv[i];
- c[i].bi = bat_iterator(c[i].b);
- c[i].nhits = 1; /* default value */
- c[i].offset = (ssize_t) BUNfirst(c[i].b);
- c[i].size = BATcount(c[i].b);
-
- /* positional lookup possible => ignore other alternatives */
- if (!BAThdense(c[i].b))
- c[i].ordered = BAThordered(c[i].b);
-
- /* insertion sort on size */
- for (j = 0; j < i; j++) {
- if (COLSIZE(reorder[j]) > COLSIZE(c + i) ||
- /* in case of equal size, we prefer dense over
non-dense */
- (COLSIZE(reorder[j]) == COLSIZE(c + i) &&
!BAThdense(reorder[j]->b) && BAThdense((c + i)->b))) {
- for (k = i; k > j; k--) {
- reorder[k] = reorder[k - 1];
- }
- break;
- }
- }
- reorder[j] = c + i;
- }
- /*
- * @- handle explicit ordering requests
- * An 'orderby' specification tells that the multijoin should
- * match in the order of one specific BAT parameter.
- *
- * Notice that we *respect* the ordering of the orderby column
- * rather than we sort it explicitly (ie; you should order the
- * most significant column beforehand). This allows for both
- * for join results ordered on some tail column as results
- * ordered on head column, or even 'reverse' or other specific
- * orderings. One such specific ordering is the SQL ORDER BY
- * multi-column ordering that can be obtained with the
- * CTorderby command from the xtables module.
- */
- if (orderby) { /* order on tail of some column */
- int lead = orderby - 1;
-
- for (i = 0; i < argc; i++)
- if (reorder[i] == c + lead)
- break;
- while (--i >= 0) {
- reorder[i + 1] = reorder[i];
- }
- reorder[0] = c + lead;
- }
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list