On 2026-03-13 Bruno Haible via Gnulib discussion list wrote:
> These two modules implement the stdc_store8_* functions from the ISO
> C2y draft.

Implementing the aligned loads and stores with type punning results in
strict aliasing violations and undefined behavior. I attached a demo
program. When built with GCC 15.2.1 on x86-64, the output differs
depending on the optimization level (-O0 vs. -O2). With -O2, the 16-bit
store isn't seen by the 32-bit loads (v0 = 0x13121110 instead of
0xF3F21110). Adding -fno-strict-aliasing fixes it but that isn't
standard C anymore.

It's great that C2y will standardize this functionality because
currently they can be difficult to implement in a portable manner
*while keeping them very fast*. I have written some notes on lines
202-323 and lines 621-644:

https://github.com/tukaani-project/xz/blob/bfc5f12a84a2a9df774ed16cd6eb58fd5ab24646/src/common/tuklib_integer.h#L202

The above needs to know if unaligned access is fast or not. For some
archs it's simple, but for ARM64 and LoongArch I couldn't figure out
anything better than building a test program and looking at objdump's
output. The code and relevant notes are here:

https://github.com/tukaani-project/xz/blob/bfc5f12a84a2a9df774ed16cd6eb58fd5ab24646/m4/tuklib_integer.m4#L65

-- 
Lasse Collin
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <byteswap.h>

static inline uint_least16_t
stdc_load8_aligned_leu16 (const unsigned char ptr[2])
{
  uint16_t value = *(const uint16_t *)ptr;
# ifdef WORDS_BIGENDIAN
  return bswap_16 (value);
# else
  return value;
# endif
}

static inline uint_least32_t
stdc_load8_aligned_leu32 (const unsigned char ptr[4])
{
  uint32_t value = *(const uint32_t *)ptr;
# ifdef WORDS_BIGENDIAN
  return bswap_32 (value);
# else
  return value;
# endif
}

static inline void
stdc_store8_aligned_leu16 (uint_least16_t value, unsigned char ptr[2])
{
# ifdef WORDS_BIGENDIAN
  *(uint16_t *)ptr = bswap_16 (value);
# else
  *(uint16_t *)ptr = value;
# endif
}

static inline void
stdc_store8_aligned_leu32 (uint_least32_t value, unsigned char ptr[4])
{
# ifdef WORDS_BIGENDIAN
  *(uint32_t *)ptr = bswap_32 (value);
# else
  *(uint32_t *)ptr = value;
# endif
}

int
main (void)
{
  unsigned char *buf = malloc(8);
  if (buf == NULL)
    return 1;

  stdc_store8_aligned_leu32 (0x13121110, buf);
  stdc_store8_aligned_leu32 (0x17161514, buf + 4);

  stdc_store8_aligned_leu16 (0xF3F2, buf + 2);

  uint32_t v0 = stdc_load8_aligned_leu32 (buf);
  uint32_t v4 = stdc_load8_aligned_leu32 (buf + 4);

  printf ("v0 = 0x%08" PRIX32 "\n", v0);
  printf ("v4 = 0x%08" PRIX32 "\n", v4);

  for (size_t i = 0; i < 8; ++i)
    printf ("0x%X\n", buf[i]);

  free (buf);

  return 0;
}

Reply via email to