Changeset: 934ba3d429a6 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=934ba3d429a6
Modified Files:
gdk/gdk_firstn.c
Branch: Jul2017
Log Message:
Backed out changeset f65ce66374ad: premature and accidental commit.
diffs (truncated from 363 to 300 lines):
diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c
--- a/gdk/gdk_firstn.c
+++ b/gdk/gdk_firstn.c
@@ -102,7 +102,7 @@
#define shuffle_unique(TYPE, OP) \
do { \
- const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \
+ const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \
heapify(OP##fix, SWAP1); \
while (cand ? cand < candend : start < end) { \
i = cand ? *cand++ : start++ + b->hseqbase; \
@@ -121,7 +121,7 @@
* multiple equal values to take us past N, we return a subset of those.
*/
static BAT *
-BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc, oid *lastp)
+BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc)
{
BAT *bn;
BATiter bi = bat_iterator(b);
@@ -293,8 +293,6 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n,
break;
}
}
- if (lastp)
- *lastp = oids[0]; /* store id of largest value */
/* output must be sorted since it's a candidate list */
GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid);
bn->tsorted = 1;
@@ -547,61 +545,288 @@ BATfirstn_unique_with_groups(BAT *b, BAT
return bn;
}
+#define shuffle_grouped1_body(COMPARE, EQUAL) \
+ do { \
+ for (i = cand ? *cand++ - b->hseqbase : start; \
+ i < end; \
+ cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \
+ for (j = 0; j < n; j++) { \
+ if (j == top) { \
+ assert(top < n); \
+ groups[top].cnt = 1; \
+ groups[top++].bun = i; \
+ break; \
+ } else { \
+ assert(j < top); \
+ assert(groups[j].bun < i); \
+ if (COMPARE) { \
+ if (top < n) \
+ top++; \
+ for (k = top - 1; k > j; k--) {
\
+ groups[k] = groups[k -
1]; \
+ } \
+ groups[j].bun = i; \
+ groups[j].cnt = 1; \
+ break; \
+ } else if (EQUAL) { \
+ groups[j].cnt++; \
+ break; \
+ } \
+ } \
+ } \
+ } \
+ } while (0)
+
+#define shuffle_grouped1(TYPE, OPER) \
+ do { \
+ const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \
+ shuffle_grouped1_body(OPER(v[i], v[groups[j].bun]), \
+ v[i] == v[groups[j].bun]); \
+ } while (0)
+
+#define shuffle_grouped2(TYPE) \
+ do { \
+ const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \
+ TYPE lastval = v[groups[top - 1].bun]; \
+ for (i = cand ? *cand++ - b->hseqbase : start; \
+ i < end; \
+ cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \
+ if (asc ? v[i] > lastval : v[i] < lastval) \
+ continue; \
+ for (j = 0; j < top; j++) { \
+ if (v[i] == v[groups[j].bun]) { \
+ if (bp) \
+ *bp++ = i + b->hseqbase; \
+ *gp++ = j; \
+ break; \
+ } \
+ } \
+ } \
+ } while (0)
+
static gdk_return
BATfirstn_grouped(BAT **topn, BAT **gids, BAT *b, BAT *s, BUN n, int asc, int
distinct)
{
- BAT *bn1, *bn2, *bn, *gn;
- oid last;
+ BAT *bn, *gn;
+ BATiter bi = bat_iterator(b);
+ oid *restrict bp, *restrict gp;
+ BUN top, i, j, k, cnt, start, end;
+ const oid *restrict cand, *candend, *oldcand;
+ int tpe = b->ttype;
+ int c;
+ int (*cmp)(const void *, const void *);
+ BUN ncnt;
+ struct group {
+ BUN bun;
+ BUN cnt;
+ } *restrict groups;
- if (distinct) {
- s = BATunique(b, s);
- if (s == NULL)
+ assert(topn);
+
+ CANDINIT(b, s, start, end, cnt, cand, candend);
+
+ if (n > cnt)
+ n = cnt;
+ if (cand && n > (BUN) (candend - cand))
+ n = (BUN) (candend - cand);
+
+ if (n == 0) {
+ /* candidate list might refer only to values outside
+ * of the bat and hence be effectively empty */
+ bn = COLnew(0, TYPE_void, 0, TRANSIENT);
+ if (bn == NULL)
return GDK_FAIL;
- }
- bn1 = BATfirstn_unique(b, s, n, asc, &last);
- if (bn1 == NULL)
- return GDK_FAIL;
- if (BATcount(bn1) == 0) {
+ BATtseqbase(bn, 0);
if (gids) {
gn = COLnew(0, TYPE_void, 0, TRANSIENT);
if (gn == NULL) {
- BBPunfix(bn1->batCacheid);
+ BBPreclaim(bn);
return GDK_FAIL;
}
+ BATtseqbase(gn, 0);
*gids = gn;
}
- *topn = bn1;
+ *topn = bn;
return GDK_SUCCEED;
}
- if (b->tkey) {
- bn = bn1;
- } else if (distinct) {
- BBPunfix(s->batCacheid);
- if (BATsemijoin(&bn, NULL, b, b, NULL, bn1, 1, BUN_NONE) !=
GDK_SUCCEED) {
- BBPunfix(bn1->batCacheid);
- return GDK_FAIL;
+
+ top = 0;
+ cmp = ATOMcompare(b->ttype);
+ /* if base type has same comparison function as type itself, we
+ * can use the base type */
+ tpe = ATOMbasetype(tpe); /* takes care of oid */
+ groups = GDKmalloc(sizeof(*groups) * n);
+ if (groups == NULL)
+ return GDK_FAIL;
+ oldcand = cand;
+ if (asc) {
+ switch (tpe) {
+ case TYPE_void:
+ shuffle_grouped1_body(i < groups[j].bun,
+ i == groups[j].bun);
+ break;
+ case TYPE_bte:
+ shuffle_grouped1(bte, LT);
+ break;
+ case TYPE_sht:
+ shuffle_grouped1(sht, LT);
+ break;
+ case TYPE_int:
+ shuffle_grouped1(int, LT);
+ break;
+ case TYPE_lng:
+ shuffle_grouped1(lng, LT);
+ break;
+#ifdef HAVE_HGE
+ case TYPE_hge:
+ shuffle_grouped1(hge, LT);
+ break;
+#endif
+ case TYPE_flt:
+ shuffle_grouped1(flt, LT);
+ break;
+ case TYPE_dbl:
+ shuffle_grouped1(dbl, LT);
+ break;
+ default:
+ shuffle_grouped1_body(
+ (c = cmp(BUNtail(bi, i),
+ BUNtail(bi, groups[j].bun))) < 0,
+ c == 0);
+ break;
}
- BBPunfix(bn1->batCacheid);
} else {
- BATiter bi = bat_iterator(b);
- bn2 = BATselect(b, s, BUNtail(bi, last - b->hseqbase), NULL, 1,
0, 0);
- if (bn2 == NULL) {
- BBPunfix(bn1->batCacheid);
- return GDK_FAIL;
+ switch (tpe) {
+ case TYPE_void:
+ shuffle_grouped1_body(i > groups[j].bun,
+ i == groups[j].bun);
+ break;
+ case TYPE_bte:
+ shuffle_grouped1(bte, GT);
+ break;
+ case TYPE_sht:
+ shuffle_grouped1(sht, GT);
+ break;
+ case TYPE_int:
+ shuffle_grouped1(int, GT);
+ break;
+ case TYPE_lng:
+ shuffle_grouped1(lng, GT);
+ break;
+#ifdef HAVE_HGE
+ case TYPE_hge:
+ shuffle_grouped1(hge, GT);
+ break;
+#endif
+ case TYPE_flt:
+ shuffle_grouped1(flt, GT);
+ break;
+ case TYPE_dbl:
+ shuffle_grouped1(dbl, GT);
+ break;
+ default:
+ shuffle_grouped1_body(
+ (c = cmp(BUNtail(bi, i),
+ BUNtail(bi, groups[j].bun))) > 0,
+ c == 0);
+ break;
}
- bn = BATmergecand(bn1, bn2);
- BBPunfix(bn1->batCacheid);
- BBPunfix(bn2->batCacheid);
- if (bn == NULL)
- return GDK_FAIL;
+ }
+ cand = oldcand;
+ for (i = 0, ncnt = 0; i < top && (distinct || ncnt < n); i++)
+ ncnt += groups[i].cnt;
+ top = i;
+ assert(ncnt <= cnt);
+ if (ncnt == cnt)
+ bn = COLnew(0, TYPE_void, ncnt, TRANSIENT);
+ else
+ bn = COLnew(0, TYPE_oid, ncnt, TRANSIENT);
+ gn = COLnew(0, TYPE_oid, ncnt, TRANSIENT);
+ if (bn == NULL || gn == NULL) {
+ GDKfree(groups);
+ BBPreclaim(bn);
+ BBPreclaim(gn);
+ return GDK_FAIL;
+ }
+ if (ncnt == cnt)
+ bp = NULL;
+ else
+ bp = (oid *) Tloc(bn, 0);
+ gp = (oid *) Tloc(gn, 0);
+ switch (tpe) {
+ case TYPE_void:
+ for (i = cand ? *cand++ - b->hseqbase : start;
+ i < end;
+ cand < candend ? (i = *cand++ - b->hseqbase) : i++) {
+ for (j = 0; j < top; j++) {
+ if (i == groups[j].bun) {
+ if (bp)
+ *bp++ = i + b->hseqbase;
+ *gp++ = j;
+ break;
+ }
+ }
+ }
+ break;
+ case TYPE_bte:
+ shuffle_grouped2(bte);
+ break;
+ case TYPE_sht:
+ shuffle_grouped2(sht);
+ break;
+ case TYPE_int:
+ shuffle_grouped2(int);
+ break;
+ case TYPE_lng:
+ shuffle_grouped2(lng);
+ break;
+#ifdef HAVE_HGE
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list