MonetDB: default - Removed BATmutijoin, replaced BATmultiprintf.

Sjoerd Mullender Fri, 28 Jun 2013 06:14:00 -0700

Changeset: 404f0a7d5c99 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=404f0a7d5c99
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_private.h
        gdk/gdk_relop.mx
        gdk/gdk_storage.c
Branch: default
Log Message:


Removed BATmutijoin, replaced BATmultiprintf.


diffs (truncated from 1094 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -154,7 +154,7 @@ size_t BATmemsize(BAT *b, int dirty);
 BAT *BATmergecand(BAT *a, BAT *b);
 int BATmmap(BAT *b, int hb, int tb, int hh, int th, int force);
 BAT *BATmode(BAT *b, int onoff);
-int BATmultiprintf(stream *f, int argc, BAT *argv[], int printoid, int order, 
int printorderby);
+gdk_return BATmultiprintf(stream *f, int argc, BAT *argv[], int printoid, int 
order, int printorderby);
 int BATname(BAT *b, const char *nme);
 BAT *BATnew(int hdtype, int tltype, BUN capacity);
 BAT *BATorder(BAT *b);
@@ -163,8 +163,9 @@ int BATordered(BAT *b);
 int BATordered_rev(BAT *b);
 BAT *BATouterjoin(BAT *l, BAT *r, BUN estimate);
 BAT *BATprev(BAT *b);
-int BATprint(BAT *b);
-int BATprintf(stream *f, BAT *b);
+gdk_return BATprint(BAT *b);
+gdk_return BATprintcols(stream *s, int argc, BAT *argv[]);
+gdk_return BATprintf(stream *f, BAT *b);
 gdk_return BATprod(void *res, int tp, BAT *b, BAT *s, int skip_nils, int 
abort_on_error, int nil_if_empty);
 BAT *BATproject(BAT *l, BAT *r);
 BAT *BATrangejoin(BAT *l, BAT *rl, BAT *rh, bit li, bit hi);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -1650,9 +1650,10 @@ gdk_export int GDKcreatedir(const char *
  * oid-s in the head columns. It performs the multijoin over them, and
  * prints the multi-column result on the file.
  */
-gdk_export int BATprint(BAT *b);
-gdk_export int BATprintf(stream *f, BAT *b);
-gdk_export int BATmultiprintf(stream *f, int argc, BAT *argv[], int printoid, 
int order, int printorderby);
+gdk_export gdk_return BATprintcols(stream *s, int argc, BAT *argv[]);
+gdk_export gdk_return BATprint(BAT *b);
+gdk_export gdk_return BATprintf(stream *f, BAT *b);
+gdk_export gdk_return BATmultiprintf(stream *f, int argc, BAT *argv[], int 
printoid, int order, int printorderby);
 
 /*
  * @- BAT clustering
@@ -3191,14 +3192,6 @@ gdk_export BAT *BATintersectcand(BAT *a,
 gdk_export BAT *BATsample(BAT *b, BUN n);
 gdk_export BAT *BATsample_(BAT *b, BUN n); /* version that expects void head 
and returns oids */
 
-/* generic n-ary multijoin beast, with defines to interpret retval */
-#define MULTIJOIN_SORTED(r)    ((char*) &r)[0]
-#define MULTIJOIN_KEY(r)       ((char*) &r)[1]
-#define MULTIJOIN_SYNCED(r)    ((char*) &r)[2]
-#define MULTIJOIN_LEAD(r)      ((char*) &r)[3]
-
-typedef void (*ColFcn) (ptr, const void *);
-typedef void (*RowFcn) (ptr, ptr *);
 /*
  *
  */
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -37,7 +37,6 @@ BUN BATguess(BAT *b);
 void BATinit_idents(BAT *bn);
 BAT *BATload_intern(bat bid, int lock);
 BAT *BATmaterializet(BAT *b);
-int BATmultijoin(int argc, BAT *argv[], RowFcn tuple_fcn, ptr tuple_data, 
ColFcn value_fcn[], ptr value_data[], int orderspec);
 void BATpropagate(BAT *dst, BAT *src, int idx);
 str BATrename(BAT *b, const char *nme);
 void BATsetdims(BAT *b);
diff --git a/gdk/gdk_relop.mx b/gdk/gdk_relop.mx
--- a/gdk/gdk_relop.mx
+++ b/gdk/gdk_relop.mx
@@ -193,6 +193,23 @@ bunins_failed:
 }
 @
 @c
+/*
+ * @+ Cartesian product
+ * The matching algorithms tries to construct non-empty matches on all head
+ * columns. Each time this succeeds, it calls the Cartesian routine to
+ * construct a join result that consists of the Cartesian product of these
+ * matches.
+ *
+ * The matching tuples can be encoded in two ways:
+ * @table @samp
+ * @item clustered
+ *  here we have two BUN pointers 'hi' and 'lo' that point
+ * to a consecutive range of BUNs in a BAT that match.
+ * @item nonclustered here we have a hit pointer that points to an array
+ * of BUN  pointers that match.
+ * @end table
+ * The below structures are used for keeping track of the matching process.
+ */
 BAT *
 BATcross(BAT *l, BAT *r)
 {
@@ -242,492 +259,3 @@ BATcross(BAT *l, BAT *r)
 
        return bn;
 }
-
-/*
- * @+ Cartesian product
- * The matching algorithms tries to construct non-empty matches on all head
- * columns. Each time this succeeds, it calls the Cartesian routine to
- * construct a join result that consists of the Cartesian product of these
- * matches.
- *
- * The matching tuples can be encoded in two ways:
- * @table @samp
- * @item clustered
- *  here we have two BUN pointers 'hi' and 'lo' that point
- * to a consecutive range of BUNs in a BAT that match.
- * @item nonclustered here we have a hit pointer that points to an array
- * of BUN  pointers that match.
- * @end table
- * The below structures are used for keeping track of the matching process.
- */
-typedef struct _column_t {
-       BAT *b;                 /* BAT of this column */
-       BATiter bi;
-       BUN cur;                /* current BUN in b */
-       BUN nhits;              /* number of matched BUNs */
-
-       /* clustered */
-       BUN lo;                 /* first BUN that matches */
-       BUN hi;                 /* past last BUN that matches */
-       /* nonclustered */
-       BUN *hit;               /* BUN array pointer */
-       size_t hitsize;         /* size of hit array */
-
-       /* properties */
-/* I'm not sure whether offset can become negative, so to be on the
- * save side, use a signed type.  However the magnitude should be
- * within the range allowed by BUN, so the casts associated with this
- * value should be OK. */
-       ssize_t offset;         /* BUNindex of BUNfirst  */
-       struct _column_t *sync; /* iff > 0: column with synchronous BAT */
-       BUN size;               /* size of the BAT */
-       char binsearch;         /* sparse matching expected? */
-       char ordered;           /* merge matching */
-} column_t;
-
-typedef struct {
-       RowFcn tuple_fcn;       /* function to invoke per match */
-       ptr tuple_data;         /* application-specific data */
-       ColFcn *value_fcn;      /* for each col: function to invoke per value */
-       ptr *value_data;        /* for each col: application-specific data */
-       column_t *c;            /* array of columns */
-       int argc;               /* size of c */
-} table_t;
-
-static void
-column_result(table_t *t, int i)
-{
-       if (++i > t->argc) {
-               /* end of recursion: invoke tuple-match routine */
-               t->tuple_fcn(t->tuple_data, t->value_data);
-       } else {
-               /* recurse over all matches on this column */
-               column_t *c = t->c + (i - 1);
-               BUN q, *p = c->hit;
-               BUN j;
-
-               if (p == NULL) {        /* clustered */
-                       for (q = c->lo; q < c->hi; q++) {
-                               t->value_fcn[i] (t->value_data[i], 
BUNtail(c->bi, q));
-                               column_result(t, i);
-                       }
-               } else {
-                       for (j = 0; j < c->nhits; j++, p++) {
-                               t->value_fcn[i] (t->value_data[i], 
BUNtail(c->bi, *p));
-                               column_result(t, i);
-                       }
-               }
-       }
-}
-
-/*
- * @* MultiColumn Joins
- * Computes the n-ary equijoin over the head columns of multiple BATs.
- * This function is complex, and uses nested functions calls,
- * for the specific stuff, it uses the stack for generating the
- * Cartesian product on each hit tuple. Most of all, it emits tuples one
- * at a time, in a pipeline rather than bulk fashion. For all these reasons,
- * it is not main-memory efficient. It does things that MonetDB actually
- * specifically was designed to avoid.
- *
- * USE THIS FUNCTION ONLY WHEN YOU REALLY REALLY HAVE TO:
- * @table @code
- * @item  -
- * printing a multicolumn table to a watching end-user is one such example
- * @end table
- * @+ multijoin entry routine
- * The multijoin will cause a cascade of value_fcn() calls putting
- * values in to place, rounded off each time by a tuple_fcn() that is
- * executed on each produced tuple. If this corresponds 1-1 with
- * the elements of one of the parameter BAT, the 'result' of the
- * operation would be aligned with it.
- *
- * The return value of this operation contains this status information.
- * It is an integer, of which all 4 bytes are used:
- * @table @code
- * @item ret[0] == 1,
- * if a mergejoin was used, 0 otherwise
- * @item ret[1] == 1,
- * if all bats had the key property set, 0 otherwise
- * @item ret[2] == 1
- * if there was a 1-1 join, 0 otherwise
- * @item ret[3] ==
- * the parameter number of the BAT which was used as leader
- * @end table
- */
-#define COLSIZE(c)\
-       (((c)->b->htype!=TYPE_void || (c)->b->hseqbase!=oid_nil)?(c)->size:0)
-#define REALLOCBUNS(c,n) if (c->hitsize <= n)\
-       c->hit = (BUN*) GDKrealloc(c->hit, (c->hitsize+=n)*sizeof(BUN))
-
-#define LEAD_INTERRUPT_END  1
-#define LEAD_TRAVERSE_SSK   3  /* seq, sorted, key */
-#define LEAD_TRAVERSE_SNK   4  /* seq, nonsorted, key */
-#define LEAD_TRAVERSE_SEQ   6  /* enforced seq (for order purposes) */
-#define LEAD_TRAVERSE_SRT   9  /* traverse by sorted chunk */
-
-int
-BATmultijoin(int argc, BAT *argv[], RowFcn tuple_fcn, ptr tuple_data, ColFcn 
value_fcn[], ptr value_data[], int orderby)
-{
-       column_t *lead_col, *c = (column_t *) GDKzalloc(argc * (int) 
sizeof(column_t));
-       column_t **reorder = (column_t **) GDKmalloc(argc * (int) 
sizeof(column_t *));
-       int status = 0, algo = LEAD_TRAVERSE_SEQ;
-       int i, k;
-       BUN p, q;
-       table_t t;
-
-       /*
-        * Init the table descriptor.
-        */
-       if (c == NULL || reorder == NULL) {
-               GDKfree(c);
-               GDKfree(reorder);
-               return 0;
-       }
-       t.tuple_data = tuple_data;
-       t.value_data = value_data;
-       t.tuple_fcn = tuple_fcn;
-       t.value_fcn = value_fcn;
-       t.argc = argc;
-       t.c = c;
-       /*
-        * order columns by their size (smallest first)
-        */
-       for (i = 0; i < argc; i++) {
-               int j;
-
-               c[i].b = argv[i];
-               c[i].bi = bat_iterator(c[i].b);
-               c[i].nhits = 1; /* default value */
-               c[i].offset = (ssize_t) BUNfirst(c[i].b);
-               c[i].size = BATcount(c[i].b);
-
-               /* positional lookup possible => ignore other alternatives */
-               if (!BAThdense(c[i].b))
-                       c[i].ordered = BAThordered(c[i].b);
-
-               /* insertion sort on size */
-               for (j = 0; j < i; j++) {
-                       if (COLSIZE(reorder[j]) > COLSIZE(c + i) ||
-                           /* in case of equal size, we prefer dense over 
non-dense */
-                           (COLSIZE(reorder[j]) == COLSIZE(c + i) && 
!BAThdense(reorder[j]->b) && BAThdense((c + i)->b))) {
-                               for (k = i; k > j; k--) {
-                                       reorder[k] = reorder[k - 1];
-                               }
-                               break;
-                       }
-               }
-               reorder[j] = c + i;
-       }
-       /*
-        * @- handle explicit ordering requests
-        * An 'orderby' specification tells that the multijoin should
-        * match in the order of one specific BAT parameter.
-        *
-        * Notice that we *respect* the ordering of the orderby column
-        * rather than we sort it explicitly (ie; you should order the
-        * most significant column beforehand).  This allows for both
-        * for join results ordered on some tail column as results
-        * ordered on head column, or even 'reverse' or other specific
-        * orderings.  One such specific ordering is the SQL ORDER BY
-        * multi-column ordering that can be obtained with the
-        * CTorderby command from the xtables module.
-        */
-       if (orderby) {          /* order on tail of some column */
-               int lead = orderby - 1;
-
-               for (i = 0; i < argc; i++)
-                       if (reorder[i] == c + lead)
-                               break;
-               while (--i >= 0) {
-                       reorder[i + 1] = reorder[i];
-               }
-               reorder[0] = c + lead;
-       }
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

MonetDB: default - Removed BATmutijoin, replaced BATmultiprintf.

Reply via email to