Changeset: 3bfb8bcf5240 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/3bfb8bcf5240
Modified Files:
gdk/gdk_batop.c
gdk/gdk_private.h
gdk/gdk_rsort.c
Branch: default
Log Message:
Back to sorting UUID with radix sort, but now like big-endian.
Also some cleanup.
diffs (167 lines):
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -2284,16 +2284,11 @@ do_sort(void *restrict h, void *restrict
if (nilslast == reverse && (stable || n > 100))
return GDKrsort(h, t, n, hs, ts, reverse, false);
break;
-#ifdef WORDS_BIGENDIAN
- /* only use radix sort for UUID on big-endian architectures since
- * the bytes need to be sorted in the opposite order from
- * little-endian */
case TYPE_uuid:
assert(base == NULL);
if (nilslast == reverse && (stable || n > 100))
return GDKrsort(h, t, n, hs, ts, reverse, true);
break;
-#endif
default:
break;
}
diff --git a/gdk/gdk_private.h b/gdk/gdk_private.h
--- a/gdk/gdk_private.h
+++ b/gdk/gdk_private.h
@@ -151,7 +151,7 @@ gdk_return GDKremovedir(int farmid, cons
gdk_return GDKsave(int farmid, const char *nme, const char *ext, void *buf,
size_t size, storage_t mode, bool dosync)
__attribute__((__warn_unused_result__))
__attribute__((__visibility__("hidden")));
-gdk_return GDKrsort(void *restrict h, void *restrict t, size_t n, size_t hs,
size_t ts, bool reverse, bool nosign)
+gdk_return GDKrsort(void *restrict h, void *restrict t, size_t n, size_t hs,
size_t ts, bool reverse, bool isuuid)
__attribute__((__warn_unused_result__))
__attribute__((__visibility__("hidden")));
gdk_return GDKssort_rev(void *restrict h, void *restrict t, const void
*restrict base, size_t n, int hs, int ts, int tpe)
diff --git a/gdk/gdk_rsort.c b/gdk/gdk_rsort.c
--- a/gdk/gdk_rsort.c
+++ b/gdk/gdk_rsort.c
@@ -18,10 +18,9 @@
#define NBUCKETS (1 << RADIX)
gdk_return
-GDKrsort(void *restrict h, void *restrict t, size_t n, size_t hs, size_t ts,
bool reverse, bool nosign)
+GDKrsort(void *restrict h, void *restrict t, size_t n, size_t hs, size_t ts,
bool reverse, bool isuuid)
{
- const size_t niters = (8 * hs + RADIX - 1) / RADIX;
- size_t *counts = GDKmalloc(niters * NBUCKETS * sizeof(size_t));
+ size_t *counts = GDKmalloc(hs * NBUCKETS * sizeof(size_t));
size_t pos[NBUCKETS];
uint8_t *h1 = h;
uint8_t *h2;
@@ -58,25 +57,32 @@ GDKrsort(void *restrict h, void *restric
ts = 0;
}
- memset(counts, 0, niters * NBUCKETS * sizeof(size_t));
- for (size_t i = 0, o = 0; i < n; i++, o += hs) {
- for (size_t j = 0; j < niters; j++) {
-#ifdef WORDS_BIGENDIAN
- uint8_t v = h1[o + hs - j - 1];
-#else
- uint8_t v = h1[o + j];
+ memset(counts, 0, hs * NBUCKETS * sizeof(size_t));
+#ifndef WORDS_BIGENDIAN
+ if (isuuid /* UUID, treat like big-endian */)
#endif
- counts[(j << RADIX) + v]++;
+ for (size_t i = 0, o = 0; i < n; i++, o += hs) {
+ for (size_t j = 0, k = hs - 1; j < hs; j++, k--) {
+ uint8_t v = h1[o + k];
+ counts[(j << RADIX) + v]++;
+ }
}
- }
-
+#ifndef WORDS_BIGENDIAN
+ else
+ for (size_t i = 0, o = 0; i < n; i++, o += hs) {
+ for (size_t j = 0; j < hs; j++) {
+ uint8_t v = h1[o + j];
+ counts[(j << RADIX) + v]++;
+ }
+ }
+#endif
/* When sorting in ascending order, the negative numbers occupy
* the second half of the buckets in the last iteration; when
* sorting in descending order, the negative numbers occupy the
* first half. In either case, at the end we need to put the
* second half first and the first half after. */
size_t negpos = 0;
- for (size_t j = 0, b = 0; j < niters; j++, b += NBUCKETS) {
+ for (size_t j = 0, b = 0, k = hs - 1; j < hs; j++, b += NBUCKETS, k--) {
size_t nb = counts[b] > 0;
if (reverse) {
pos[NBUCKETS - 1] = 0;
@@ -100,16 +106,24 @@ GDKrsort(void *restrict h, void *restric
continue;
}
/* note, this loop changes the pos array */
- for (size_t i = 0, ho = 0, to = 0; i < n; i++, ho += hs, to +=
ts) {
-#ifdef WORDS_BIGENDIAN
- uint8_t v = h1[ho + hs - j - 1];
-#else
- uint8_t v = h1[ho + j];
+#ifndef WORDS_BIGENDIAN
+ if (isuuid /* UUID, treat like big-endian */)
#endif
- if (t)
- memcpy(t2 + ts * pos[v], t1 + to, ts);
- memcpy(h2 + hs * pos[v]++, h1 + ho, hs);
- }
+ for (size_t i = 0, ho = 0, to = 0; i < n; i++, ho +=
hs, to += ts) {
+ uint8_t v = h1[ho + k];
+ if (t)
+ memcpy(t2 + ts * pos[v], t1 + to, ts);
+ memcpy(h2 + hs * pos[v]++, h1 + ho, hs);
+ }
+#ifndef WORDS_BIGENDIAN
+ else
+ for (size_t i = 0, ho = 0, to = 0; i < n; i++, ho +=
hs, to += ts) {
+ uint8_t v = h1[ho + j];
+ if (t)
+ memcpy(t2 + ts * pos[v], t1 + to, ts);
+ memcpy(h2 + hs * pos[v]++, h1 + ho, hs);
+ }
+#endif
uint8_t *t = h1;
h1 = h2;
h2 = t;
@@ -120,7 +134,9 @@ GDKrsort(void *restrict h, void *restric
GDKfree(counts);
if (h1 != (uint8_t *) h) {
- if (nosign) {
+ /* we need to copy the data back to the correct heap */
+ if (isuuid) {
+ /* no negative values in uuid, so no shuffling */
memcpy(h2, h1, n * hs);
if (t)
memcpy(t2, t1, n * ts);
@@ -137,21 +153,16 @@ GDKrsort(void *restrict h, void *restric
memcpy(t2 + ts * (n - negpos), t1,
negpos * ts);
}
}
- } else if (negpos != 0 && negpos != n && !nosign) {
+ } else if (negpos > 0 && negpos < n && !isuuid) {
/* copy the negative integers to the start, copy positive after
*/
- if (negpos < n) {
- memcpy(h2, h1 + hs * negpos, (n - negpos) * hs);
- if (t)
- memcpy(t2, t1 + ts * negpos, (n - negpos) * ts);
+ memcpy(h2, h1 + hs * negpos, (n - negpos) * hs);
+ memcpy(h2 + hs * (n - negpos), h1, negpos * hs);
+ memcpy(h, h2, n * hs);
+ if (t) {
+ memcpy(t2, t1 + ts * negpos, (n - negpos) * ts);
+ memcpy(t2 + ts * (n - negpos), t1, negpos * ts);
+ memcpy(t, t2, n * ts);
}
- if (negpos > 0) {
- memcpy(h2 + hs * (n - negpos), h1, negpos * hs);
- if (t)
- memcpy(t2 + ts * (n - negpos), t1, negpos * ts);
- }
- memcpy(h, h2, n * hs);
- if (t)
- memcpy(t, t2, n * ts);
} /* else, everything is already in the correct place */
HEAPfree(&tmph, true);
if (t)
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]