Changeset: d51edc6055ce for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d51edc6055ce
Modified Files:
        clients/Tests/exports.stable.out
        gdk/ChangeLog
        gdk/gdk.h
        gdk/gdk_aggr.c
        gdk/gdk_batop.c
        gdk/gdk_cand.h
        gdk/gdk_firstn.c
        gdk/gdk_imprints.c
        gdk/gdk_orderidx.c
        gdk/gdk_private.h
        gdk/gdk_qsort.c
        gdk/gdk_qsort_impl.h
        gdk/gdk_search.c
        gdk/gdk_select.c
        gdk/gdk_ssort.c
        gdk/gdk_ssort_impl.h
        gdk/gdk_tm.c
        monetdb5/modules/atoms/batxml.c
        monetdb5/modules/atoms/json.c
        monetdb5/modules/kernel/algebra.c
        monetdb5/modules/kernel/bat5.c
        monetdb5/optimizer/opt_costModel.c
        monetdb5/optimizer/opt_prelude.c
        monetdb5/optimizer/opt_prelude.h
        monetdb5/optimizer/opt_support.c
        sql/backends/monet5/sql.c
        sql/backends/monet5/vaults/bam/Tests/query2.2.sql
        sql/backends/monet5/vaults/bam/Tests/query2.2.stable.out
        sql/backends/monet5/vaults/bam/Tests/query2.2.stable.out.int128
        sql/backends/monet5/vaults/bam/Tests/query2.6.sql
        sql/backends/monet5/vaults/bam/Tests/query2.6.stable.out
        sql/common/sql_list.c
        sql/server/rel_optimizer.c
        sql/storage/bat/bat_table.c
        sql/storage/store.c
        sql/test/SQLite_regress/sqllogictest/Tests/select3.test.stable.out
        
sql/test/SQLite_regress/sqllogictest/Tests/select3.test.stable.out.int128
        sql/test/Tests/rank.stable.out
        sql/test/rank.sql
Branch: default
Log Message:

merged


diffs (truncated from 6632 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -116,7 +116,7 @@ BAT *BATdense(oid hseq, oid tseq, BUN cn
 BAT *BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate);
 gdk_return BATextend(BAT *b, BUN newcap) 
__attribute__((__warn_unused_result__));
 void BATfakeCommit(BAT *b);
-gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, 
BUN n, bool asc, bool distinct) __attribute__((__warn_unused_result__));
+gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, BAT *grps, 
BUN n, bool asc, bool nilslast, bool distinct) 
__attribute__((__warn_unused_result__));
 int BATgetaccess(BAT *b);
 PROPrec *BATgetprop(BAT *b, enum prop_t idx);
 gdk_return BATgroup(BAT **groups, BAT **extents, BAT **histo, BAT *b, BAT *s, 
BAT *g, BAT *e, BAT *h) __attribute__((__warn_unused_result__));
@@ -172,7 +172,7 @@ void BATsetcapacity(BAT *b, BUN cnt);
 void BATsetcount(BAT *b, BUN cnt);
 void BATsetprop(BAT *b, enum prop_t idx, int type, const void *v);
 BAT *BATslice(BAT *b, BUN low, BUN high);
-gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, 
BAT *g, bool reverse, bool stable) __attribute__((__warn_unused_result__));
+gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, BAT *o, 
BAT *g, bool reverse, bool nilslast, bool stable) 
__attribute__((__warn_unused_result__));
 gdk_return BATstr_group_concat(ValPtr res, BAT *b, BAT *s, bool skip_nils, 
bool abort_on_error, bool nil_if_empty, const char *separator);
 gdk_return BATsubcross(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr) 
__attribute__((__warn_unused_result__));
 gdk_return BATsum(void *res, int tp, BAT *b, BAT *s, bool skip_nils, bool 
abort_on_error, bool nil_if_empty);
@@ -258,8 +258,7 @@ gdk_return GDKmmapfile(str buffer, size_
 int GDKms(void);
 int GDKnr_threads;
 void GDKprepareExit(void);
-void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, 
size_t n, int hs, int ts, int tpe);
-void GDKqsort_rev(void *restrict h, void *restrict t, const void *restrict 
base, size_t n, int hs, int ts, int tpe);
+void GDKqsort(void *restrict h, void *restrict t, const void *restrict base, 
size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast);
 void *GDKrealloc(void *pold, size_t size) __attribute__((__alloc_size__(2))) 
__attribute__((__warn_unused_result__));
 void GDKregister(MT_Id pid);
 gdk_return GDKreleasemmap(void *ptr, size_t size, size_t id, str *msg);
@@ -2038,10 +2037,7 @@ str batmalRef;
 str batmmathRef;
 str batmtimeRef;
 str batpyapi3Ref;
-str batpyapi3Ref;
 str batpyapiRef;
-str batpyapiRef;
-str batrapiRef;
 str batrapiRef;
 str batsqlRef;
 str batstrRef;
@@ -2134,7 +2130,6 @@ str deleteRef;
 void deleteSymbol(Module scope, Symbol prg);
 str deltaRef;
 str dense_rankRef;
-str dense_rankRef;
 malType destinationType(MalBlkPtr mb, InstrPtr p);
 str diffRef;
 str differenceRef;
@@ -2524,12 +2519,8 @@ str putName(const char *nme);
 str putNameLen(const char *nme, size_t len);
 str putRef;
 str pyapi3Ref;
-str pyapi3Ref;
-str pyapi3mapRef;
 str pyapi3mapRef;
 str pyapiRef;
-str pyapiRef;
-str pyapimapRef;
 str pyapimapRef;
 int qtop;
 str queryRef;
@@ -2537,8 +2528,6 @@ str querylogRef;
 str raiseRef;
 str rangejoinRef;
 str rankRef;
-str rankRef;
-str rapiRef;
 str rapiRef;
 int readConsole(Client cntxt);
 MalStkPtr reallocGlobalStack(MalStkPtr s, int cnt);
@@ -2620,7 +2609,6 @@ str sinkRef;
 void slash_2_dir_sep(str fname);
 str sliceRef;
 str sortRef;
-str sortReverseRef;
 str soundex_impl(str *res, str *Name);
 str sqlRef;
 str sqlcatalogRef;
@@ -2644,7 +2632,6 @@ str subcountRef;
 str subdeltaRef;
 str subdiffRef;
 str subeval_aggrRef;
-str subeval_aggrRef;
 str subgroupRef;
 str subgroupdoneRef;
 str subinterRef;
diff --git a/gdk/ChangeLog b/gdk/ChangeLog
--- a/gdk/ChangeLog
+++ b/gdk/ChangeLog
@@ -1,6 +1,20 @@
 # ChangeLog file for MonetDB
 # This file is updated with Maddlog
 
+* Wed Nov  7 2018 Sjoerd Mullender <[email protected]>
+- Implemented a nilslast option for BATfirstn.  If set, NILs come
+  last in the ordering that BATfirstn simulates, so non-NIL values are
+  preferentially returned.  The old behavior can be obtained by setting
+  nilslast to !asc(ending).
+
+* Tue Nov  6 2018 Sjoerd Mullender <[email protected]>
+- Implemented a nilslast option for BATsort.  This option should be
+  equal to the reverse option for stable sort (it is not implemented for
+  stable sort), but can be different from reverse for non-stable sort.
+  The functions BATsort and GDKqsort have extra parameters, the function
+  GDKqsort_rev has been removed (superseded by GDKqsort with the new
+  `reverse' parameter).
+
 * Tue Oct 30 2018 Sjoerd Mullender <[email protected]>
 - The BUNtail, BUNtvar, BUNtloc, and BUNtpos macros (and Tloc and Tpos)
   now return a `void *' instead of a `char *'.
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -1422,12 +1422,11 @@ gdk_export gdk_return BATprint(BAT *b);
 gdk_export bool BATkeyed(BAT *b);
 gdk_export bool BATordered(BAT *b);
 gdk_export bool BATordered_rev(BAT *b);
-gdk_export gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, 
BAT *o, BAT *g, bool reverse, bool stable)
+gdk_export gdk_return BATsort(BAT **sorted, BAT **order, BAT **groups, BAT *b, 
BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
        __attribute__((__warn_unused_result__));
 
 
-gdk_export void GDKqsort(void *restrict h, void *restrict t, const void 
*restrict base, size_t n, int hs, int ts, int tpe);
-gdk_export void GDKqsort_rev(void *restrict h, void *restrict t, const void 
*restrict base, size_t n, int hs, int ts, int tpe);
+gdk_export void GDKqsort(void *restrict h, void *restrict t, const void 
*restrict base, size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast);
 
 #define BATtordered(b) ((b)->tsorted)
 #define BATtrevordered(b) ((b)->trevsorted)
@@ -2757,7 +2756,7 @@ gdk_export BAT *BATunique(BAT *b, BAT *s
 gdk_export BAT *BATmergecand(BAT *a, BAT *b);
 gdk_export BAT *BATintersectcand(BAT *a, BAT *b);
 
-gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, 
BAT *grps, BUN n, bool asc, bool distinct)
+gdk_export gdk_return BATfirstn(BAT **topn, BAT **gids, BAT *b, BAT *cands, 
BAT *grps, BUN n, bool asc, bool nilslast, bool distinct)
        __attribute__((__warn_unused_result__));
 
 #include "gdk_calc.h"
diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -2867,14 +2867,14 @@ BATgroupquantile(BAT *b, BAT *g, BAT *e,
                                BBPunfix(g->batCacheid);
                        return bn;
                }
-               if (BATsort(&t1, &t2, NULL, g, NULL, NULL, false, false) != 
GDK_SUCCEED)
+               if (BATsort(&t1, &t2, NULL, g, NULL, NULL, false, false, false) 
!= GDK_SUCCEED)
                        goto bunins_failed;
                if (freeg)
                        BBPunfix(g->batCacheid);
                g = t1;
                freeg = true;
 
-               if (BATsort(&t1, NULL, NULL, b, t2, g, false, false) != 
GDK_SUCCEED) {
+               if (BATsort(&t1, NULL, NULL, b, t2, g, false, false, false) != 
GDK_SUCCEED) {
                        BBPunfix(t2->batCacheid);
                        goto bunins_failed;
                }
@@ -2948,7 +2948,7 @@ BATgroupquantile(BAT *b, BAT *g, BAT *e,
                     BATcheckorderidx(pb))) {
                        ords = (const oid *) (pb ? pb->torderidx->base : 
b->torderidx->base) + ORDERIDXOFF;
                } else {
-                       if (BATsort(NULL, &t1, NULL, b, NULL, g, false, false) 
!= GDK_SUCCEED)
+                       if (BATsort(NULL, &t1, NULL, b, NULL, g, false, false, 
false) != GDK_SUCCEED)
                                goto bunins_failed;
                        if (BATtdense(t1))
                                ords = NULL;
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -1326,22 +1326,18 @@ BATordered_rev(BAT *b)
  * "quick" sort does not produce errors */
 static gdk_return
 do_sort(void *restrict h, void *restrict t, const void *restrict base,
-       size_t n, int hs, int ts, int tpe, bool reverse, bool stable)
+       size_t n, int hs, int ts, int tpe, bool reverse, bool nilslast,
+       bool stable)
 {
        if (n <= 1)             /* trivially sorted */
                return GDK_SUCCEED;
-       if (reverse) {
-               if (stable) {
+       if (stable) {
+               if (reverse)
                        return GDKssort_rev(h, t, base, n, hs, ts, tpe);
-               } else {
-                       GDKqsort_rev(h, t, base, n, hs, ts, tpe);
-               }
+               else
+                       return GDKssort(h, t, base, n, hs, ts, tpe);
        } else {
-               if (stable) {
-                       return GDKssort(h, t, base, n, hs, ts, tpe);
-               } else {
-                       GDKqsort(h, t, base, n, hs, ts, tpe);
-               }
+               GDKqsort(h, t, base, n, hs, ts, tpe, reverse, nilslast);
        }
        return GDK_SUCCEED;
 }
@@ -1376,14 +1372,14 @@ do_sort(void *restrict h, void *restrict
  * Apart from error checking and maintaining reference counts, sorting
  * three columns (col1, col2, col3) could look like this with the
  * sorted results in (col1s, col2s, col3s):
- *     BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false);
- *     BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false);
- *     BATsort(&col3s,  NULL,  NULL, col3, ord2, grp2, false, false);
+ *     BATsort(&col1s, &ord1, &grp1, col1, NULL, NULL, false, false, false);
+ *     BATsort(&col2s, &ord2, &grp2, col2, ord1, grp1, false, false, false);
+ *     BATsort(&col3s,  NULL,  NULL, col3, ord2, grp2, false, false, false);
  * Note that the "reverse" parameter can be different for each call.
  */
 gdk_return
 BATsort(BAT **sorted, BAT **order, BAT **groups,
-          BAT *b, BAT *o, BAT *g, bool reverse, bool stable)
+       BAT *b, BAT *o, BAT *g, bool reverse, bool nilslast, bool stable)
 {
        BAT *bn = NULL, *on = NULL, *gn = NULL, *pb = NULL;
        oid *restrict grps, *restrict ords, prev;
@@ -1392,10 +1388,20 @@ BATsort(BAT **sorted, BAT **order, BAT *
 
        ALGODEBUG t0 = GDKusec();
 
+       /* we haven't implemented NILs as largest value for stable
+        * sort, so NILs come first for ascending and last for
+        * descending */
+       assert(!stable || reverse == nilslast);
+
        if (b == NULL) {
                GDKerror("BATsort: b must exist\n");
                return GDK_FAIL;
        }
+       if (stable && reverse != nilslast) {
+               GDKerror("BATsort: stable sort cannot have "
+                        "reverse != nilslast\n");
+               return GDK_FAIL;
+       }
        if (!ATOMlinear(b->ttype)) {
                GDKerror("BATsort: type %s cannot be sorted\n",
                         ATOMname(b->ttype));
@@ -1436,7 +1442,8 @@ BATsort(BAT **sorted, BAT **order, BAT *
             (g->ttype == TYPE_void &&         /* no nil tail */
              BATcount(g) != 0 &&
              is_oid_nil(g->tseqbase)))) {
-               GDKerror("BATsort: g must have type oid, sorted on the tail, 
and same size as b\n");
+               GDKerror("BATsort: g must have type oid, sorted on the tail, "
+                        "and same size as b\n");
                return GDK_FAIL;
        }
        if (sorted == NULL && order == NULL) {
@@ -1449,8 +1456,15 @@ BATsort(BAT **sorted, BAT **order, BAT *
                 * subsorting and the sort is not stable */
                o = NULL;
        }
+       if (b->tnonil) {
+               /* if there are no nils, placement of nils doesn't
+                * matter, so set nilslast such that ordered bits can
+                * be used */
+               nilslast = reverse;
+       }
        if (BATcount(b) <= 1 ||
-           ((reverse ? BATtrevordered(b) : BATtordered(b)) &&
+           (reverse == nilslast &&
+            (reverse ? BATtrevordered(b) : BATtordered(b)) &&
             o == NULL && g == NULL &&
             (groups == NULL || BATtkey(b) ||
              (reverse ? BATtordered(b) : BATtrevordered(b))))) {
@@ -1488,11 +1502,12 @@ BATsort(BAT **sorted, BAT **order, BAT *
                }
                ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o="
                                  ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
-                                 ",reverse=%d,stable=%d) = (" ALGOOPTBATFMT
-                                 "," ALGOOPTBATFMT "," ALGOOPTBATFMT
-                                 ") -- trivial (" LLFMT " usec)\n",
+                                 ",reverse=%d,nilslast=%d,stable=%d) = ("
+                                 ALGOOPTBATFMT "," ALGOOPTBATFMT ","
+                                 ALGOOPTBATFMT ") -- trivial (" LLFMT
+                                 " usec)\n",
                                  ALGOBATPAR(b), ALGOOPTBATPAR(o),
-                                 ALGOOPTBATPAR(g), reverse, stable,
+                                 ALGOOPTBATPAR(g), reverse, nilslast, stable,
                                  ALGOOPTBATPAR(bn), ALGOOPTBATPAR(gn),
                                  ALGOOPTBATPAR(on), GDKusec() - t0);
                return GDK_SUCCEED;
@@ -1507,7 +1522,7 @@ BATsort(BAT **sorted, BAT **order, BAT *
        } else {
                pb = b;
        }
-       if (g == NULL && o == NULL && !reverse &&
+       if (g == NULL && o == NULL && !reverse && !nilslast &&
            pb != NULL && BATcheckorderidx(pb) &&
            /* if we want a stable sort, the order index must be
             * stable, if we don't want stable, we don't care */
@@ -1556,11 +1571,12 @@ BATsort(BAT **sorted, BAT **order, BAT *
                }
                ALGODEBUG fprintf(stderr, "#BATsort(b=" ALGOBATFMT ",o="
                                  ALGOOPTBATFMT ",g=" ALGOOPTBATFMT
-                                 ",reverse=%d,stable=%d) = (" ALGOOPTBATFMT
-                                 "," ALGOOPTBATFMT "," ALGOOPTBATFMT
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to