commit da46b2648d2846dc23e310b7ac0cc3ddebb7ccd3
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Sun Jan 9 17:30:53 2022 +0100
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Sun Jan 9 17:30:53 2022 +0100

    Refactor benchmark code
    
    Rename some variables for more consistent naming, add a function
    to explicitly generate a UTF-8-test-buffer and move some things into
    benchmark/util.
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/benchmark/character.c b/benchmark/character.c
index 626733d..2ef0d1c 100644
--- a/benchmark/character.c
+++ b/benchmark/character.c
@@ -14,27 +14,20 @@
 
 #define NUM_ITERATIONS 1000000
 
-#ifdef __has_attribute
-       #if __has_attribute(optnone)
-               void libgrapheme(const void *) __attribute__((optnone));
-               void libutf8proc(const void *) __attribute__((optnone));
-       #endif
-#endif
-
-struct payload {
+struct break_benchmark_payload {
        uint_least32_t *buf;
-       utf8proc_int32_t *buf_int32;
-       size_t bufsiz;
+       utf8proc_int32_t *buf_utf8proc;
+       size_t buflen;
 };
 
 void
 libgrapheme(const void *payload)
 {
        GRAPHEME_STATE state = { 0 };
-       const struct payload *p = payload;
+       const struct break_benchmark_payload *p = payload;
        size_t i;
 
-       for (i = 0; i + 1 < p->bufsiz; i++) {
+       for (i = 0; i + 1 < p->buflen; i++) {
                (void)grapheme_is_character_break(p->buf[i], p->buf[i+1],
                                                  &state);
        }
@@ -44,12 +37,12 @@ void
 libutf8proc(const void *payload)
 {
        utf8proc_int32_t state = 0;
-       const struct payload *p = payload;
+       const struct break_benchmark_payload *p = payload;
        size_t i;
 
-       for (i = 0; i + 1 < p->bufsiz; i++) {
-               (void)utf8proc_grapheme_break_stateful(p->buf_int32[i],
-                                                      p->buf_int32[i+1],
+       for (i = 0; i + 1 < p->buflen; i++) {
+               (void)utf8proc_grapheme_break_stateful(p->buf_utf8proc[i],
+                                                      p->buf_utf8proc[i+1],
                                                       &state);
        }
 }
@@ -57,33 +50,33 @@ libutf8proc(const void *payload)
 int
 main(int argc, char *argv[])
 {
-       struct payload p;
+       struct break_benchmark_payload p;
        double baseline = (double)NAN;
        size_t i;
 
        (void)argc;
 
-       if ((p.buf = generate_test_buffer(character_test, LEN(character_test),
-                                         &(p.bufsiz))) == NULL) {
+       if ((p.buf = generate_cp_test_buffer(character_test, 
LEN(character_test),
+                                            &(p.buflen))) == NULL) {
                return 1;
        }
-       if ((p.buf_int32 = malloc(p.bufsiz * sizeof(*(p.buf_int32)))) == NULL) {
+       if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) == 
NULL) {
                fprintf(stderr, "malloc: %s\n", strerror(errno));
                exit(1);
        }
-       for (i = 0; i < p.bufsiz; i++) {
+       for (i = 0; i < p.buflen; i++) {
                /*
                 * there is no overflow, as we know that the maximum
                 * codepoint is 0x10FFFF, which is way below 2^31
                 */
-               p.buf_int32[i] = (utf8proc_int32_t)p.buf[i];
+               p.buf_utf8proc[i] = (utf8proc_int32_t)p.buf[i];
        }
 
        printf("%s\n", argv[0]);
        run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "comparison",
-                     &baseline, NUM_ITERATIONS, p.bufsiz - 1);
+                     &baseline, NUM_ITERATIONS, p.buflen - 1);
        run_benchmark(libutf8proc, &p, "libutf8proc ", NULL, "comparison",
-                     &baseline, NUM_ITERATIONS, p.bufsiz - 1);
+                     &baseline, NUM_ITERATIONS, p.buflen - 1);
 
        free(p.buf);
 
diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c
index 5dc0321..68d28fe 100644
--- a/benchmark/utf8-decode.c
+++ b/benchmark/utf8-decode.c
@@ -14,30 +14,23 @@
 
 #define NUM_ITERATIONS 100000
 
-#ifdef __has_attribute
-       #if __has_attribute(optnone)
-               void libgrapheme(const void *) __attribute__((optnone));
-               void libutf8proc(const void *) __attribute__((optnone));
-       #endif
-#endif
-
-struct payload {
-       char *buf_char;
-       utf8proc_uint8_t *buf_uint8;
-       size_t bufsiz;
+struct utf8_benchmark_payload {
+       char *buf;
+       utf8proc_uint8_t *buf_utf8proc;
+       size_t buflen;
 };
 
 void
 libgrapheme(const void *payload)
 {
-       const struct payload *p = payload;
+       const struct utf8_benchmark_payload *p = payload;
        uint_least32_t cp;
        size_t ret, off;
 
-       for (off = 0; off < p->bufsiz; off += ret) {
-               if ((ret = grapheme_decode_utf8(p->buf_char + off,
-                                               p->bufsiz - off, &cp)) >
-                   (p->bufsiz - off)) {
+       for (off = 0; off < p->buflen; off += ret) {
+               if ((ret = grapheme_decode_utf8(p->buf + off,
+                                               p->buflen - off, &cp)) >
+                   (p->buflen - off)) {
                        break;
                }
                (void)cp;
@@ -47,14 +40,14 @@ libgrapheme(const void *payload)
 void
 libutf8proc(const void *payload)
 {
-       const struct payload *p = payload;
+       const struct utf8_benchmark_payload *p = payload;
        utf8proc_int32_t cp;
        utf8proc_ssize_t ret;
        size_t off;
 
-       for (off = 0; off < p->bufsiz; off += (size_t)ret) {
-               if ((ret = utf8proc_iterate(p->buf_uint8 + off,
-                                           (utf8proc_ssize_t)(p->bufsiz - off),
+       for (off = 0; off < p->buflen; off += (size_t)ret) {
+               if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
+                                           (utf8proc_ssize_t)(p->buflen - off),
                                            &cp)) < 0) {
                        break;
                }
@@ -65,57 +58,38 @@ libutf8proc(const void *payload)
 int
 main(int argc, char *argv[])
 {
-       struct payload p;
-       size_t cpbufsiz, i, off, ret;
-       uint_least32_t *cpbuf;
+       struct utf8_benchmark_payload p;
+       size_t i;
        double baseline = (double)NAN;
 
        (void)argc;
 
-       if ((cpbuf = generate_test_buffer(character_test, LEN(character_test),
-                                         &cpbufsiz)) == NULL) {
-               return 1;
-       }
+       p.buf = generate_utf8_test_buffer(character_test,
+                                         LEN(character_test),
+                                         &(p.buflen));
 
-       /* convert cp-buffer to utf8-data (both as char and custom uint8-type) 
*/
-       for (i = 0, p.bufsiz = 0; i < cpbufsiz; i++) {
-               p.bufsiz += grapheme_encode_utf8(cpbuf[i], NULL, 0);
-       }
-       if ((p.buf_char = malloc(p.bufsiz)) == NULL) {
-               fprintf(stderr, "malloc: %s\n", strerror(errno));
-               exit(1);
-       }
-       for (i = 0, off = 0; i < cpbufsiz; i++, off += ret) {
-               if ((ret = grapheme_encode_utf8(cpbuf[i], p.buf_char + off,
-                                               p.bufsiz - off)) >
-                   (p.bufsiz - off)) {
-                       /* shouldn't happen */
-                       fprintf(stderr, "Error while converting buffer.\n");
-                       exit(1);
-               }
-       }
-       if ((p.buf_uint8 = malloc(p.bufsiz)) == NULL) { 
+       /* convert cp-buffer to stupid custom libutf8proc-uint8-type */
+       if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
                fprintf(stderr, "malloc: %s\n", strerror(errno));
                exit(1);
        }
-       for (i = 0; i < p.bufsiz; i++) {
+       for (i = 0; i < p.buflen; i++) {
                /* 
                 * even if char is larger than 8 bit, it will only have
                 * any of the first 8 bits set (by construction).
                 */
-               p.buf_uint8[i] = (utf8proc_uint8_t)p.buf_char[i];
+               p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i];
        }
 
        printf("%s\n", argv[0]);
        run_benchmark(libgrapheme, &p, "libgrapheme ", NULL,
-                     "byte", &baseline, NUM_ITERATIONS, p.bufsiz);
+                     "byte", &baseline, NUM_ITERATIONS, p.buflen);
        run_benchmark(libutf8proc, &p, "libutf8proc ",
                      "but unsafe (does not detect overlong encodings)",
-                     "byte", &baseline, NUM_ITERATIONS, p.bufsiz);
+                     "byte", &baseline, NUM_ITERATIONS, p.buflen);
 
-       free(cpbuf);
-       free(p.buf_char);
-       free(p.buf_uint8);
+       free(p.buf);
+       free(p.buf_utf8proc);
 
        return 0;
 }
diff --git a/benchmark/util.c b/benchmark/util.c
index b5d7e23..5f85874 100644
--- a/benchmark/util.c
+++ b/benchmark/util.c
@@ -5,22 +5,23 @@
 #include <time.h>
 
 #include "../gen/types.h"
+#include "../grapheme.h"
 #include "util.h"
 
 uint_least32_t *
-generate_test_buffer(const struct break_test *test, size_t testlen,
-                     size_t *bufsiz)
+generate_cp_test_buffer(const struct break_test *test, size_t testlen,
+                        size_t *buflen)
 {
        size_t i, j, off;
        uint_least32_t *buf;
 
        /* allocate and generate buffer */
-       for (i = 0, *bufsiz = 0; i < testlen; i++) {
-               *bufsiz += test[i].cplen;
+       for (i = 0, *buflen = 0; i < testlen; i++) {
+               *buflen += test[i].cplen;
        }
-       if (!(buf = calloc(*bufsiz, sizeof(*buf)))) {
+       if (!(buf = calloc(*buflen, sizeof(*buf)))) {
                fprintf(stderr, "generate_test_buffer: calloc: Out of 
memory.\n");
-               return NULL;
+               exit(1);
        }
        for (i = 0, off = 0; i < testlen; i++) {
                for (j = 0; j < test[i].cplen; j++) {
@@ -32,6 +33,42 @@ generate_test_buffer(const struct break_test *test, size_t 
testlen,
        return buf;
 }
 
+char *
+generate_utf8_test_buffer(const struct break_test *test, size_t testlen,
+                          size_t *buflen)
+{
+       size_t i, j, off, ret;
+       char *buf;
+
+       /* allocate and generate buffer */
+       for (i = 0, *buflen = 0; i < testlen; i++) {
+               for (j = 0; j < test[i].cplen; j++) {
+                       *buflen += grapheme_encode_utf8(test[i].cp[j], NULL, 0);
+               }
+       }
+       (*buflen)++; /* terminating NUL-byte */
+       if (!(buf = malloc(*buflen))) {
+               fprintf(stderr, "generate_test_buffer: malloc: Out of 
memory.\n");
+               exit(1);
+       }
+       for (i = 0, off = 0; i < testlen; i++) {
+               for (j = 0; j < test[i].cplen; j++, off += ret) {
+                       if ((ret = grapheme_encode_utf8(test[i].cp[j],
+                                                       buf + off,
+                                                       *buflen - off)) >
+                           (*buflen - off)) {
+                               /* shouldn't happen */
+                               fprintf(stderr, "generate_utf8_test_buffer: "
+                                       "Buffer too small.\n");
+                               exit(1);
+                       }
+               }
+       }
+       buf[*buflen - 1] = '\0';
+
+       return buf;
+}
+
 static double
 time_diff(struct timespec *a, struct timespec *b)
 {
diff --git a/benchmark/util.h b/benchmark/util.h
index 7451290..653d9da 100644
--- a/benchmark/util.h
+++ b/benchmark/util.h
@@ -6,8 +6,17 @@
 
 #define LEN(x) (sizeof(x) / sizeof(*(x)))
 
-uint_least32_t *generate_test_buffer(const struct break_test *, size_t,
-                                     size_t *);
+#ifdef __has_attribute
+       #if __has_attribute(optnone)
+               void libgrapheme(const void *) __attribute__((optnone));
+               void libutf8proc(const void *) __attribute__((optnone));
+       #endif
+#endif
+
+uint_least32_t *generate_cp_test_buffer(const struct break_test *, size_t,
+                                        size_t *);
+char *generate_utf8_test_buffer(const struct break_test *, size_t, size_t *);
+
 void run_benchmark(void (*func)(const void *), const void *, const char *,
                    const char *, const char *, double *, size_t, size_t);
 

Reply via email to