Changeset: d51edc6055ce for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d51edc6055ce Modified Files: clients/Tests/exports.stable.out gdk/ChangeLog gdk/gdk.h gdk/gdk_aggr.c gdk/gdk_batop.c gdk/gdk_cand.h gdk/gdk_firstn.c gdk/gdk_imprints.c gdk/gdk_orderidx.c gdk/gdk_private.h gdk/gdk_qsort.c gdk/gdk_qsort_impl.h gdk/gdk_search.c gdk/gdk_select.c gdk/gdk_ssort.c gdk/gdk_ssort_impl.h gdk/gdk_tm.c monetdb5/modules/atoms/batxml.c monetdb5/modules/atoms/json.c monetdb5/modules/kernel/algebra.c monetdb5/modules/kernel/bat5.c monetdb5/optimizer/opt_costModel.c monetdb5/optimizer/opt_prelude.c monetdb5/optimizer/opt_prelude.h monetdb5/optimizer/opt_support.c sql/backends/monet5/sql.c sql/backends/monet5/vaults/bam/Tests/query2.2.sql sql/backends/monet5/vaults/bam/Tests/query2.2.stable.out sql/backends/monet5/vaults/bam/Tests/query2.2.stable.out.int128 sql/backends/monet5/vaults/bam/Tests/query2.6.sql sql/backends/monet5/vaults/bam/Tests/query2.6.stable.out sql/common/sql_list.c sql/server/rel_optimizer.c sql/storage/bat/bat_table.c sql/storage/store.c sql/test/SQLite_regress/sqllogictest/Tests/select3.test.stable.out sql/test/SQLite_regress/sqllogictest/Tests/select3.test.stable.out.int128 sql/test/Tests/rank.stable.out sql/test/rank.sql Branch: default Log Message:
merged diffs (truncated from 6632 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -116,7 +116,7 @@ BAT *BATdense(oid hseq, oid tseq, BUN cn BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate); gdk_return BATextend(BAT *b, BUN newcap) __attribute__((__warn_unused_result__)); void BATfakeCommit(BAT *b); -gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool distinct) __attribute__((__warn_unused_result__)); +gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool nilslast, bool distinct) __attribute__((__warn_unused_result__)); int BATgetaccess(BAT *b); PROPrec *BATgetprop(BAT *b, enum prop_t idx); gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *s, BAT *g, BAT *e, BAT *h) __attribute__((__warn_unused_result__)); @@ -172,7 +172,7 @@ void BATsetcapacity(BAT *b, BUN cnt); void BATsetcount(BAT *b, BUN cnt); void BATsetprop(BAT *b, enum prop_t idx, int type, const void *v); BAT *BATslice(BAT *b, BUN low, BUN high); -gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, BAT *g, bool reverse, bool stable) __attribute__((__warn_unused_result__)); +gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable) __attribute__((__warn_unused_result__)); gdk_return BATstr_group_concat(ValPtr res, BAT *b, BAT *s, bool skip_nils, bool abort_on_error, bool nil_if_empty, const char *separator); gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr) __attribute__((__warn_unused_result__)); gdk_return BATsum(void *res, int tp, BAT *b, BAT *s, bool skip_nils, bool abort_on_error, bool nil_if_empty); @@ -258,8 +258,7 @@ gdk_return GDKmmapfile(str buffer, size_ int GDKms(void); int GDKnr_threads; void GDKprepareExit(void); -void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe); -void GDKqsort_rev(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe); +void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast); void *GDKrealloc(void *pold, size_t size) __attribute__((__alloc_size__(2))) __attribute__((__warn_unused_result__)); void GDKregister(MT_Id pid); gdk_return GDKreleasemmap(void *ptr, size_t size, size_t id, str *msg); @@ -2038,10 +2037,7 @@ str batmalRef; str batmmathRef; str batmtimeRef; str batpyapi3Ref; -str batpyapi3Ref; str batpyapiRef; -str batpyapiRef; -str batrapiRef; str batrapiRef; str batsqlRef; str batstrRef; @@ -2134,7 +2130,6 @@ str deleteRef; void deleteSymbol(Module scope, Symbol prg); str deltaRef; str dense_rankRef; -str dense_rankRef; malType destinationType(MalBlkPtr mb, InstrPtr p); str diffRef; str differenceRef; @@ -2524,12 +2519,8 @@ str putName(const char *nme); str putNameLen(const char *nme, size_t len); str putRef; str pyapi3Ref; -str pyapi3Ref; -str pyapi3mapRef; str pyapi3mapRef; str pyapiRef; -str pyapiRef; -str pyapimapRef; str pyapimapRef; int qtop; str queryRef; @@ -2537,8 +2528,6 @@ str querylogRef; str raiseRef; str rangejoinRef; str rankRef; -str rankRef; -str rapiRef; str rapiRef; int readConsole(Client cntxt); MalStkPtr reallocGlobalStack(MalStkPtr s, int cnt); @@ -2620,7 +2609,6 @@ str sinkRef; void slash_2_dir_sep(str fname); str sliceRef; str sortRef; -str sortReverseRef; str soundex_impl(str *res, str *Name); str sqlRef; str sqlcatalogRef; @@ -2644,7 +2632,6 @@ str subcountRef; str subdeltaRef; str subdiffRef; str subeval_aggrRef; -str subeval_aggrRef; str subgroupRef; str subgroupdoneRef; str subinterRef; diff --git a/gdk/ChangeLog b/gdk/ChangeLog --- a/gdk/ChangeLog +++ b/gdk/ChangeLog @@ -1,6 +1,20 @@ # ChangeLog file for MonetDB # This file is updated with Maddlog +* Wed Nov 7 2018 Sjoerd Mullender <[email protected]> +- Implemented a nilslast option for BATfirstn. If set, NILs come + last in the ordering that BATfirstn simulates, so non-NIL values are + preferentially returned. The old behavior can be obtained by setting + nilslast to !asc(ending). + +* Tue Nov 6 2018 Sjoerd Mullender <[email protected]> +- Implemented a nilslast option for BATsort. This option should be + equal to the reverse option for stable sort (it is not implemented for + stable sort), but can be different from reverse for non-stable sort. + The functions BATsort and GDKqsort have extra parameters, the function + GDKqsort_rev has been removed (superseded by GDKqsort with the new + `reverse' parameter). + * Tue Oct 30 2018 Sjoerd Mullender <[email protected]> - The BUNtail, BUNtvar, BUNtloc, and BUNtpos macros (and Tloc and Tpos) now return a `void *' instead of a `char *'. diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -1422,12 +1422,11 @@ gdk_export gdk_return BATprint(BAT *b); gdk_export bool BATkeyed(BAT *b); gdk_export bool BATordered(BAT *b); gdk_export bool BATordered_rev(BAT *b); -gdk_export gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, BAT *g, bool reverse, bool stable) +gdk_export gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable) __attribute__((__warn_unused_result__)); -gdk_export void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe); -gdk_export void GDKqsort_rev(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe); +gdk_export void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast); #define BATtordered(b) ((b)->tsorted) #define BATtrevordered(b) ((b)->trevsorted) @@ -2757,7 +2756,7 @@ gdk_export BAT *BATunique(BAT *b, BAT *s gdk_export BAT *BATmergecand(BAT *a, BAT *b); gdk_export BAT *BATintersectcand(BAT *a, BAT *b); -gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool distinct) +gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, BUN n, bool asc, bool nilslast, bool distinct) __attribute__((__warn_unused_result__)); #include "gdk_calc.h" diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c --- a/gdk/gdk_aggr.c +++ b/gdk/gdk_aggr.c @@ -2867,14 +2867,14 @@ BATgroupquantile(BAT *b, BAT *g, BAT *e, BBPunfix(g->batCacheid); return bn; } - if (BATsort(&t1, &t2, NULL, g, NULL, NULL, false, false) != GDK_SUCCEED) + if (BATsort(&t1, &t2, NULL, g, NULL, NULL, false, false, false) != GDK_SUCCEED) goto bunins_failed; if (freeg) BBPunfix(g->batCacheid); g = t1; freeg = true; - if (BATsort(&t1, NULL, NULL, b, t2, g, false, false) != GDK_SUCCEED) { + if (BATsort(&t1, NULL, NULL, b, t2, g, false, false, false) != GDK_SUCCEED) { BBPunfix(t2->batCacheid); goto bunins_failed; } @@ -2948,7 +2948,7 @@ BATgroupquantile(BAT *b, BAT *g, BAT *e, BATcheckorderidx(pb))) { ords = (const oid *) (pb ? pb->torderidx->base : b->torderidx->base) + ORDERIDXOFF; } else { - if (BATsort(NULL, &t1, NULL, b, NULL, g, false, false) != GDK_SUCCEED) + if (BATsort(NULL, &t1, NULL, b, NULL, g, false, false, false) != GDK_SUCCEED) goto bunins_failed; if (BATtdense(t1)) ords = NULL; diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c --- a/gdk/gdk_batop.c +++ b/gdk/gdk_batop.c @@ -1326,22 +1326,18 @@ BATordered_rev(BAT *b) * "quick" sort does not produce errors */ static gdk_return do_sort(void *restrict h, void *restrict t, const void *restrict base, - size_t n, int hs, int ts, int tpe, bool reverse, bool stable) + size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast, + bool stable) { if (n <= 1) /* trivially sorted */ return GDK_SUCCEED; - if (reverse) { - if (stable) { + if (stable) { + if (reverse) return GDKssort_rev(h, t, base, n, hs, ts, tpe); - } else { - GDKqsort_rev(h, t, base, n, hs, ts, tpe); - } + else + return GDKssort(h, t, base, n, hs, ts, tpe); } else { - if (stable) { - return GDKssort(h, t, base, n, hs, ts, tpe); - } else { - GDKqsort(h, t, base, n, hs, ts, tpe); - } + GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast); } return GDK_SUCCEED; } @@ -1376,14 +1372,14 @@ do_sort(void *restrict h, void *restrict * Apart from error checking and maintaining reference counts, sorting * three columns (col1, col2, col3) could look like this with the * sorted results in (col1s, col2s, col3s): - * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false); - * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false); - * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false); + * BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false); + * BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false); + * BATsort(&col3s, NULL, NULL, col3, ord2, grp2, false, false, false); * Note that the "reverse" parameter can be different for each call. */ gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, - BAT *b, BAT *o, BAT *g, bool reverse, bool stable) + BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable) { BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL; oid *restrict grps, *restrict ords, prev; @@ -1392,10 +1388,20 @@ BATsort(BAT **sorted, BAT **order, BAT * ALGODEBUG t0 = GDKusec(); + /* we haven't implemented NILs as largest value for stable + * sort, so NILs come first for ascending and last for + * descending */ + assert(!stable || reverse == nilslast); + if (b == NULL) { GDKerror("BATsort: b must exist\n"); return GDK_FAIL; } + if (stable && reverse != nilslast) { + GDKerror("BATsort: stable sort cannot have " + "reverse != nilslast\n"); + return GDK_FAIL; + } if (!ATOMlinear(b->ttype)) { GDKerror("BATsort: type %s cannot be sorted\n", ATOMname(b->ttype)); @@ -1436,7 +1442,8 @@ BATsort(BAT **sorted, BAT **order, BAT * (g->ttype == TYPE_void && /* no nil tail */ BATcount(g) != 0 && is_oid_nil(g->tseqbase)))) { - GDKerror("BATsort: g must have type oid, sorted on the tail, and same size as b\n"); + GDKerror("BATsort: g must have type oid, sorted on the tail, " + "and same size as b\n"); return GDK_FAIL; } if (sorted == NULL && order == NULL) { @@ -1449,8 +1456,15 @@ BATsort(BAT **sorted, BAT **order, BAT * * subsorting and the sort is not stable */ o = NULL; } + if (b->tnonil) { + /* if there are no nils, placement of nils doesn't + * matter, so set nilslast such that ordered bits can + * be used */ + nilslast = reverse; + } if (BATcount(b) <= 1 || - ((reverse ? BATtrevordered(b) : BATtordered(b)) && + (reverse == nilslast && + (reverse ? BATtrevordered(b) : BATtordered(b)) && o == NULL && g == NULL && (groups == NULL || BATtkey(b) || (reverse ? BATtordered(b) : BATtrevordered(b))))) { @@ -1488,11 +1502,12 @@ BATsort(BAT **sorted, BAT **order, BAT * } ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o=" ALGOOPTBATFMT ",g=" ALGOOPTBATFMT - ",reverse=%d,stable=%d) = (" ALGOOPTBATFMT - "," ALGOOPTBATFMT "," ALGOOPTBATFMT - ") -- trivial (" LLFMT " usec)\n", + ",reverse=%d,nilslast=%d,stable=%d) = (" + ALGOOPTBATFMT "," ALGOOPTBATFMT "," + ALGOOPTBATFMT ") -- trivial (" LLFMT + " usec)\n", ALGOBATPAR(b), ALGOOPTBATPAR(o), - ALGOOPTBATPAR(g), reverse, stable, + ALGOOPTBATPAR(g), reverse, nilslast, stable, ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn), ALGOOPTBATPAR(on), GDKusec() - t0); return GDK_SUCCEED; @@ -1507,7 +1522,7 @@ BATsort(BAT **sorted, BAT **order, BAT * } else { pb = b; } - if (g == NULL && o == NULL && !reverse && + if (g == NULL && o == NULL && !reverse && !nilslast && pb != NULL && BATcheckorderidx(pb) && /* if we want a stable sort, the order index must be * stable, if we don't want stable, we don't care */ @@ -1556,11 +1571,12 @@ BATsort(BAT **sorted, BAT **order, BAT * } ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o=" ALGOOPTBATFMT ",g=" ALGOOPTBATFMT - ",reverse=%d,stable=%d) = (" ALGOOPTBATFMT - "," ALGOOPTBATFMT "," ALGOOPTBATFMT _______________________________________________ checkin-list mailing list [email protected] https://www.monetdb.org/mailman/listinfo/checkin-list
