commit 602ae9b2041df6c7e2b1d9f9da2b5ae57eb94b64
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Tue Jan 4 18:29:30 2022 +0100
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Tue Jan 4 18:33:00 2022 +0100

    Generate separate utf8proc_int32_t buffer to preserve strict aliasing
    
    This clearly shows why it's never a good idea to roll your own types
    and to better stick with ones provided by the standard library.
    
    Even if the custom type in libutf8proc was defined as an unsigned
    32-bit integer type, it could be changed at any point (e.g. to
    uint_fast32_t which might default to an unsigned 64 bit type). So we
    can't simply cast between the pointers anyway, even if we didn't care
    about strict aliasing.
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/benchmark/character.c b/benchmark/character.c
index 57d7990..53bb30a 100644
--- a/benchmark/character.c
+++ b/benchmark/character.c
@@ -1,8 +1,10 @@
 /* See LICENSE file for copyright and license details. */
+#include <errno.h>
 #include <math.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "../grapheme.h"
 #include "../gen/character-test.h"
@@ -21,6 +23,7 @@
 
 struct payload {
        uint_least32_t *buf;
+       utf8proc_int32_t *buf_int32;
        size_t bufsiz;
 };
 
@@ -45,7 +48,8 @@ libutf8proc(const void *payload)
        size_t i;
 
        for (i = 0; i + 1 < p->bufsiz; i++) {
-               (void)utf8proc_grapheme_break_stateful(p->buf[i], p->buf[i+1],
+               (void)utf8proc_grapheme_break_stateful(p->buf_int32[i],
+                                                      p->buf_int32[i+1],
                                                       &state);
        }
 }
@@ -54,7 +58,8 @@ int
 main(int argc, char *argv[])
 {
        struct payload p;
-       double baseline = NAN;
+       double baseline = (double)NAN;
+       size_t i;
 
        (void)argc;
 
@@ -62,6 +67,17 @@ main(int argc, char *argv[])
                                          &(p.bufsiz))) == NULL) {
                return 1;
        }
+       if ((p.buf_int32 = malloc(p.bufsiz * sizeof(*(p.buf_int32)))) == NULL) {
+               fprintf(stderr, "malloc: %s\n", strerror(errno));
+               exit(1);
+       }
+       for (i = 0; i < p.bufsiz; i++) {
+               /*
+                * there is no overflow, as we know that the maximum
+                * codepoint is 0x10FFFF, which is way below 2^31
+                */
+               p.buf_int32[i] = (utf8proc_int32_t)p.buf[i];
+       }
 
        printf("%s\n", argv[0]);
        run_benchmark(libgrapheme, &p, "libgrapheme ", &baseline,

Reply via email to