Hi Paul,
> Also, it's better to keep code simple when possible. I came up with a
> simpler fix for the -fstrict-align bug that generates identical aligned
> code for GCC 11 (didn't bother going back to GCC 10) and installed the
> attached set of patches to do that.
I vehemently disagree with this simplification.
I just spent a whole day creating working, optimized *_aligned_* functions.
(The purpose of these functions is to be optimized, otherwise the
load8_* / store8_* functions without _aligned_ could be used.)
And your patch undoes my optimizations.
No, your simplified code does *NOT* generate the same code. If you came
to this impression, you probably must have tested x86_64, x86, arm64 only.
Try testing with some new arch (riscv64) or some older arch (alpha or sparc64).
I attach the code of the *_aligned_* functions, from my stdbit.in.h and from
yours.
$ riscv64-linux-gnu-gcc -O2 -S -fomit-frame-pointer loadstore8-bruno.c
$ riscv64-linux-gnu-gcc -O2 -S -fomit-frame-pointer loadstore8-paul.c
Take as example the stdc_load8_aligned_leu32 function.
In loadstore8-bruno.s:
stdc_load8_aligned_leu32:
lw a0,0(a0)
ret
In loadstore8-paul.s:
stdc_load8_aligned_leu32:
.LFB14:
lbu a4,1(a0)
lbu a3,0(a0)
lbu a5,2(a0)
lbu a0,3(a0)
slli a4,a4,8
or a4,a4,a3
slli a5,a5,16
or a5,a5,a4
slli a0,a0,24
or a0,a0,a5
sext.w a0,a0
ret
You don't need to benchmark these in order to see which is faster.
It is obvious: 2 instructions vs. 12 instructions.
Please revert this major de-optimization.
> These patches also remove casts that
> aren't needed (some of which confused me a bit).
The casts to signed intN_t types were there for clarity.
The casts to uint_fast16_t were there for speed. On some architectures,
such as sparc, it is more efficient to work with 32-bit integers than with
16-bit integers, and the definition of uint_fast16_t as uint32_t embodies
this knowledge. Removing these casts is also a de-optimization that no one
has asked for.
Bruno
#include <stdint.h>
#include <string.h>
#include <byteswap.h>
#if (defined __clang__ ? __clang_major__ >= 4 : \
(defined __GNUC__ \
&& (defined __cplusplus \
? __GNUC__ + (__GNUC_MINOR__ >= 9) > 4 \
: __GNUC__ + (__GNUC_MINOR__ >= 7) > 4)))
# define _GL_LOADSTORE8_VARIANT_A 1
#elif defined _MSC_VER
# define _GL_LOADSTORE8_VARIANT_E 1
#else
# define _GL_LOADSTORE8_VARIANT_F 1
#endif
uint_least8_t
stdc_load8_aligned_beu8 (const unsigned char ptr[1])
{
return ptr[0];
}
uint_least16_t
stdc_load8_aligned_beu16 (const unsigned char ptr[2])
{
# if _GL_LOADSTORE8_VARIANT_F
return ((uint_fast16_t) ptr[0] << 8) | (uint_fast16_t) ptr[1];
# else
uint16_t value;
# if _GL_LOADSTORE8_VARIANT_A
memcpy (&value, __builtin_assume_aligned (ptr, 2), 2);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (&value, ptr, 2);
# endif
# ifdef WORDS_BIGENDIAN
return value;
# else
return bswap_16 (value);
# endif
# endif
}
uint_least32_t
stdc_load8_aligned_beu32 (const unsigned char ptr[4])
{
# if _GL_LOADSTORE8_VARIANT_F
return ((uint_fast32_t) ptr[0] << 24) | ((uint_fast32_t) ptr[1] << 16)
| ((uint_fast32_t) ptr[2] << 8) | (uint_fast32_t) ptr[3];
# else
uint32_t value;
# if _GL_LOADSTORE8_VARIANT_A
memcpy (&value, __builtin_assume_aligned (ptr, 4), 4);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (&value, ptr, 4);
# endif
# ifdef WORDS_BIGENDIAN
return value;
# else
return bswap_32 (value);
# endif
# endif
}
uint_least64_t
stdc_load8_aligned_beu64 (const unsigned char ptr[8])
{
# if _GL_LOADSTORE8_VARIANT_F
return ((uint_fast64_t) ptr[0] << 56) | ((uint_fast64_t) ptr[1] << 48)
| ((uint_fast64_t) ptr[2] << 40) | ((uint_fast64_t) ptr[3] << 32)
| ((uint_fast64_t) ptr[4] << 24) | ((uint_fast64_t) ptr[5] << 16)
| ((uint_fast64_t) ptr[6] << 8) | (uint_fast64_t) ptr[7];
# else
uint64_t value;
# if _GL_LOADSTORE8_VARIANT_A
memcpy (&value, __builtin_assume_aligned (ptr, 8), 8);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (&value, ptr, 8);
# endif
# ifdef WORDS_BIGENDIAN
return value;
# else
return bswap_64 (value);
# endif
# endif
}
uint_least8_t
stdc_load8_aligned_leu8 (const unsigned char ptr[1])
{
return ptr[0];
}
uint_least16_t
stdc_load8_aligned_leu16 (const unsigned char ptr[2])
{
# if _GL_LOADSTORE8_VARIANT_F
return (uint_fast16_t) ptr[0] | ((uint_fast16_t) ptr[1] << 8);
# else
uint16_t value;
# if _GL_LOADSTORE8_VARIANT_A
memcpy (&value, __builtin_assume_aligned (ptr, 2), 2);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (&value, ptr, 2);
# endif
# ifdef WORDS_BIGENDIAN
return bswap_16 (value);
# else
return value;
# endif
# endif
}
uint_least32_t
stdc_load8_aligned_leu32 (const unsigned char ptr[4])
{
# if _GL_LOADSTORE8_VARIANT_F
return (uint_fast32_t) ptr[0] | ((uint_fast32_t) ptr[1] << 8)
| ((uint_fast32_t) ptr[2] << 16) | ((uint_fast32_t) ptr[3] << 24);
# else
uint32_t value;
# if _GL_LOADSTORE8_VARIANT_A
memcpy (&value, __builtin_assume_aligned (ptr, 4), 4);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (&value, ptr, 4);
# endif
# ifdef WORDS_BIGENDIAN
return bswap_32 (value);
# else
return value;
# endif
# endif
}
uint_least64_t
stdc_load8_aligned_leu64 (const unsigned char ptr[8])
{
# if _GL_LOADSTORE8_VARIANT_F
return (uint_fast64_t) ptr[0] | ((uint_fast64_t) ptr[1] << 8)
| ((uint_fast64_t) ptr[2] << 16) | ((uint_fast64_t) ptr[3] << 24)
| ((uint_fast64_t) ptr[4] << 32) | ((uint_fast64_t) ptr[5] << 40)
| ((uint_fast64_t) ptr[6] << 48) | ((uint_fast64_t) ptr[7] << 56);
# else
uint64_t value;
# if _GL_LOADSTORE8_VARIANT_A
memcpy (&value, __builtin_assume_aligned (ptr, 8), 8);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (&value, ptr, 8);
# endif
# ifdef WORDS_BIGENDIAN
return bswap_64 (value);
# else
return value;
# endif
# endif
}
static inline uint_least8_t
stdc_load8_beu8 (const unsigned char ptr[1])
{
return ptr[0];
}
static inline uint_least16_t
stdc_load8_beu16 (const unsigned char ptr[2])
{
return ((uint_fast16_t) ptr[0] << 8) | (uint_fast16_t) ptr[1];
}
static inline uint_least32_t
stdc_load8_beu32 (const unsigned char ptr[4])
{
return ((uint_fast32_t) ptr[0] << 24) | ((uint_fast32_t) ptr[1] << 16)
| ((uint_fast32_t) ptr[2] << 8) | (uint_fast32_t) ptr[3];
}
static inline uint_least64_t
stdc_load8_beu64 (const unsigned char ptr[8])
{
return ((uint_fast64_t) ptr[0] << 56) | ((uint_fast64_t) ptr[1] << 48)
| ((uint_fast64_t) ptr[2] << 40) | ((uint_fast64_t) ptr[3] << 32)
| ((uint_fast64_t) ptr[4] << 24) | ((uint_fast64_t) ptr[5] << 16)
| ((uint_fast64_t) ptr[6] << 8) | (uint_fast64_t) ptr[7];
}
static inline uint_least8_t
stdc_load8_leu8 (const unsigned char ptr[1])
{
return ptr[0];
}
static inline uint_least16_t
stdc_load8_leu16 (const unsigned char ptr[2])
{
return (uint_fast16_t) ptr[0] | ((uint_fast16_t) ptr[1] << 8);
}
static inline uint_least32_t
stdc_load8_leu32 (const unsigned char ptr[4])
{
return (uint_fast32_t) ptr[0] | ((uint_fast32_t) ptr[1] << 8)
| ((uint_fast32_t) ptr[2] << 16) | ((uint_fast32_t) ptr[3] << 24);
}
static inline uint_least64_t
stdc_load8_leu64 (const unsigned char ptr[8])
{
return (uint_fast64_t) ptr[0] | ((uint_fast64_t) ptr[1] << 8)
| ((uint_fast64_t) ptr[2] << 16) | ((uint_fast64_t) ptr[3] << 24)
| ((uint_fast64_t) ptr[4] << 32) | ((uint_fast64_t) ptr[5] << 40)
| ((uint_fast64_t) ptr[6] << 48) | ((uint_fast64_t) ptr[7] << 56);
}
void
stdc_store8_aligned_beu8 (uint_least8_t value, unsigned char ptr[1])
{
ptr[0] = value;
}
void
stdc_store8_aligned_beu16 (uint_least16_t value, unsigned char ptr[2])
{
# if _GL_LOADSTORE8_VARIANT_F
ptr[0] = (unsigned char) (value >> 8) & 0xFFU;
ptr[1] = (unsigned char) value & 0xFFU;
# else
uint16_t uvalue;
# ifdef WORDS_BIGENDIAN
uvalue = value;
# else
uvalue = bswap_16 (value);
# endif
# if _GL_LOADSTORE8_VARIANT_A
memcpy (__builtin_assume_aligned (ptr, 2), &uvalue, 2);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (ptr, &uvalue, 2);
# endif
# endif
}
void
stdc_store8_aligned_beu32 (uint_least32_t value, unsigned char ptr[4])
{
# if _GL_LOADSTORE8_VARIANT_F
ptr[0] = (unsigned char) (value >> 24) & 0xFFU;
ptr[1] = (unsigned char) (value >> 16) & 0xFFU;
ptr[2] = (unsigned char) (value >> 8) & 0xFFU;
ptr[3] = (unsigned char) value & 0xFFU;
# else
uint32_t uvalue;
# ifdef WORDS_BIGENDIAN
uvalue = value;
# else
uvalue = bswap_32 (value);
# endif
# if _GL_LOADSTORE8_VARIANT_A
memcpy (__builtin_assume_aligned (ptr, 4), &uvalue, 4);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (ptr, &uvalue, 4);
# endif
# endif
}
void
stdc_store8_aligned_beu64 (uint_least64_t value, unsigned char ptr[8])
{
# if _GL_LOADSTORE8_VARIANT_F
ptr[0] = (unsigned char) (value >> 56) & 0xFFU;
ptr[1] = (unsigned char) (value >> 48) & 0xFFU;
ptr[2] = (unsigned char) (value >> 40) & 0xFFU;
ptr[3] = (unsigned char) (value >> 32) & 0xFFU;
ptr[4] = (unsigned char) (value >> 24) & 0xFFU;
ptr[5] = (unsigned char) (value >> 16) & 0xFFU;
ptr[6] = (unsigned char) (value >> 8) & 0xFFU;
ptr[7] = (unsigned char) value & 0xFFU;
# else
uint64_t uvalue;
# ifdef WORDS_BIGENDIAN
uvalue = value;
# else
uvalue = bswap_64 (value);
# endif
# if _GL_LOADSTORE8_VARIANT_A
memcpy (__builtin_assume_aligned (ptr, 8), &uvalue, 8);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (ptr, &uvalue, 8);
# endif
# endif
}
void
stdc_store8_aligned_leu8 (uint_least8_t value, unsigned char ptr[1])
{
ptr[0] = value;
}
void
stdc_store8_aligned_leu16 (uint_least16_t value, unsigned char ptr[2])
{
# if _GL_LOADSTORE8_VARIANT_F
ptr[0] = (unsigned char) value & 0xFFU;
ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
# else
uint16_t uvalue;
# ifdef WORDS_BIGENDIAN
uvalue = bswap_16 (value);
# else
uvalue = value;
# endif
# if _GL_LOADSTORE8_VARIANT_A
memcpy (__builtin_assume_aligned (ptr, 2), &uvalue, 2);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (ptr, &uvalue, 2);
# endif
# endif
}
void
stdc_store8_aligned_leu32 (uint_least32_t value, unsigned char ptr[4])
{
# if _GL_LOADSTORE8_VARIANT_F
ptr[0] = (unsigned char) value & 0xFFU;
ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
ptr[2] = (unsigned char) (value >> 16) & 0xFFU;
ptr[3] = (unsigned char) (value >> 24) & 0xFFU;
# else
uint32_t uvalue;
# ifdef WORDS_BIGENDIAN
uvalue = bswap_32 (value);
# else
uvalue = value;
# endif
# if _GL_LOADSTORE8_VARIANT_A
memcpy (__builtin_assume_aligned (ptr, 4), &uvalue, 4);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (ptr, &uvalue, 4);
# endif
# endif
}
void
stdc_store8_aligned_leu64 (uint_least64_t value, unsigned char ptr[8])
{
# if _GL_LOADSTORE8_VARIANT_F
ptr[0] = (unsigned char) value & 0xFFU;
ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
ptr[2] = (unsigned char) (value >> 16) & 0xFFU;
ptr[3] = (unsigned char) (value >> 24) & 0xFFU;
ptr[4] = (unsigned char) (value >> 32) & 0xFFU;
ptr[5] = (unsigned char) (value >> 40) & 0xFFU;
ptr[6] = (unsigned char) (value >> 48) & 0xFFU;
ptr[7] = (unsigned char) (value >> 56) & 0xFFU;
# else
uint64_t uvalue;
# ifdef WORDS_BIGENDIAN
uvalue = bswap_64 (value);
# else
uvalue = value;
# endif
# if _GL_LOADSTORE8_VARIANT_A
memcpy (__builtin_assume_aligned (ptr, 8), &uvalue, 8);
# else /* _GL_LOADSTORE8_VARIANT_E */
memcpy (ptr, &uvalue, 8);
# endif
# endif
}
static inline void
stdc_store8_beu8 (uint_least8_t value, unsigned char ptr[1])
{
ptr[0] = value;
}
static inline void
stdc_store8_beu16 (uint_least16_t value, unsigned char ptr[2])
{
ptr[0] = (unsigned char) (value >> 8) & 0xFFU;
ptr[1] = (unsigned char) value & 0xFFU;
}
static inline void
stdc_store8_beu32 (uint_least32_t value, unsigned char ptr[4])
{
ptr[0] = (unsigned char) (value >> 24) & 0xFFU;
ptr[1] = (unsigned char) (value >> 16) & 0xFFU;
ptr[2] = (unsigned char) (value >> 8) & 0xFFU;
ptr[3] = (unsigned char) value & 0xFFU;
}
static inline void
stdc_store8_beu64 (uint_least64_t value, unsigned char ptr[8])
{
ptr[0] = (unsigned char) (value >> 56) & 0xFFU;
ptr[1] = (unsigned char) (value >> 48) & 0xFFU;
ptr[2] = (unsigned char) (value >> 40) & 0xFFU;
ptr[3] = (unsigned char) (value >> 32) & 0xFFU;
ptr[4] = (unsigned char) (value >> 24) & 0xFFU;
ptr[5] = (unsigned char) (value >> 16) & 0xFFU;
ptr[6] = (unsigned char) (value >> 8) & 0xFFU;
ptr[7] = (unsigned char) value & 0xFFU;
}
static inline void
stdc_store8_leu8 (uint_least8_t value, unsigned char ptr[1])
{
ptr[0] = value;
}
static inline void
stdc_store8_leu16 (uint_least16_t value, unsigned char ptr[2])
{
ptr[0] = (unsigned char) value & 0xFFU;
ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
}
static inline void
stdc_store8_leu32 (uint_least32_t value, unsigned char ptr[4])
{
ptr[0] = (unsigned char) value & 0xFFU;
ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
ptr[2] = (unsigned char) (value >> 16) & 0xFFU;
ptr[3] = (unsigned char) (value >> 24) & 0xFFU;
}
static inline void
stdc_store8_leu64 (uint_least64_t value, unsigned char ptr[8])
{
ptr[0] = (unsigned char) value & 0xFFU;
ptr[1] = (unsigned char) (value >> 8) & 0xFFU;
ptr[2] = (unsigned char) (value >> 16) & 0xFFU;
ptr[3] = (unsigned char) (value >> 24) & 0xFFU;
ptr[4] = (unsigned char) (value >> 32) & 0xFFU;
ptr[5] = (unsigned char) (value >> 40) & 0xFFU;
ptr[6] = (unsigned char) (value >> 48) & 0xFFU;
ptr[7] = (unsigned char) (value >> 56) & 0xFFU;
}
#include <stdint.h>
static inline uint_least8_t
stdc_load8_beu8 (const unsigned char ptr[1])
{
return ptr[0];
}
static inline uint_least16_t
stdc_load8_beu16 (const unsigned char ptr[2])
{
return (ptr[0] << 8) | ptr[1];
}
static inline uint_least32_t
stdc_load8_beu32 (const unsigned char ptr[4])
{
return ((uint_fast32_t) ptr[0] << 24) | ((uint_fast32_t) ptr[1] << 16)
| ((uint_fast32_t) ptr[2] << 8) | (uint_fast32_t) ptr[3];
}
static inline uint_least64_t
stdc_load8_beu64 (const unsigned char ptr[8])
{
return ((uint_fast64_t) ptr[0] << 56) | ((uint_fast64_t) ptr[1] << 48)
| ((uint_fast64_t) ptr[2] << 40) | ((uint_fast64_t) ptr[3] << 32)
| ((uint_fast64_t) ptr[4] << 24) | ((uint_fast64_t) ptr[5] << 16)
| ((uint_fast64_t) ptr[6] << 8) | (uint_fast64_t) ptr[7];
}
static inline uint_least8_t
stdc_load8_leu8 (const unsigned char ptr[1])
{
return ptr[0];
}
static inline uint_least16_t
stdc_load8_leu16 (const unsigned char ptr[2])
{
return ptr[0] | (ptr[1] << 8);
}
static inline uint_least32_t
stdc_load8_leu32 (const unsigned char ptr[4])
{
return (uint_fast32_t) ptr[0] | ((uint_fast32_t) ptr[1] << 8)
| ((uint_fast32_t) ptr[2] << 16) | ((uint_fast32_t) ptr[3] << 24);
}
static inline uint_least64_t
stdc_load8_leu64 (const unsigned char ptr[8])
{
return (uint_fast64_t) ptr[0] | ((uint_fast64_t) ptr[1] << 8)
| ((uint_fast64_t) ptr[2] << 16) | ((uint_fast64_t) ptr[3] << 24)
| ((uint_fast64_t) ptr[4] << 32) | ((uint_fast64_t) ptr[5] << 40)
| ((uint_fast64_t) ptr[6] << 48) | ((uint_fast64_t) ptr[7] << 56);
}
uint_least8_t
stdc_load8_aligned_beu8 (const unsigned char ptr[1])
{
return stdc_load8_beu8 (ptr);
}
uint_least16_t
stdc_load8_aligned_beu16 (const unsigned char ptr[2])
{
return stdc_load8_beu16 (ptr);
}
uint_least32_t
stdc_load8_aligned_beu32 (const unsigned char ptr[4])
{
return stdc_load8_beu32 (ptr);
}
uint_least64_t
stdc_load8_aligned_beu64 (const unsigned char ptr[8])
{
return stdc_load8_beu64 (ptr);
}
uint_least8_t
stdc_load8_aligned_leu8 (const unsigned char ptr[1])
{
return stdc_load8_leu8 (ptr);
}
uint_least16_t
stdc_load8_aligned_leu16 (const unsigned char ptr[2])
{
return stdc_load8_leu16 (ptr);
}
uint_least32_t
stdc_load8_aligned_leu32 (const unsigned char ptr[4])
{
return stdc_load8_leu32 (ptr);
}
uint_least64_t
stdc_load8_aligned_leu64 (const unsigned char ptr[8])
{
return stdc_load8_leu64 (ptr);
}
static inline void
stdc_store8_beu8 (uint_least8_t value, unsigned char ptr[1])
{
ptr[0] = value;
}
static inline void
stdc_store8_beu16 (uint_least16_t value, unsigned char ptr[2])
{
ptr[0] = (value >> 8) & 0xFFU;
ptr[1] = value & 0xFFU;
}
static inline void
stdc_store8_beu32 (uint_least32_t value, unsigned char ptr[4])
{
ptr[0] = (value >> 24) & 0xFFU;
ptr[1] = (value >> 16) & 0xFFU;
ptr[2] = (value >> 8) & 0xFFU;
ptr[3] = value & 0xFFU;
}
static inline void
stdc_store8_beu64 (uint_least64_t value, unsigned char ptr[8])
{
ptr[0] = (value >> 56) & 0xFFU;
ptr[1] = (value >> 48) & 0xFFU;
ptr[2] = (value >> 40) & 0xFFU;
ptr[3] = (value >> 32) & 0xFFU;
ptr[4] = (value >> 24) & 0xFFU;
ptr[5] = (value >> 16) & 0xFFU;
ptr[6] = (value >> 8) & 0xFFU;
ptr[7] = value & 0xFFU;
}
static inline void
stdc_store8_leu8 (uint_least8_t value, unsigned char ptr[1])
{
ptr[0] = value;
}
static inline void
stdc_store8_leu16 (uint_least16_t value, unsigned char ptr[2])
{
ptr[0] = value & 0xFFU;
ptr[1] = (value >> 8) & 0xFFU;
}
static inline void
stdc_store8_leu32 (uint_least32_t value, unsigned char ptr[4])
{
ptr[0] = value & 0xFFU;
ptr[1] = (value >> 8) & 0xFFU;
ptr[2] = (value >> 16) & 0xFFU;
ptr[3] = (value >> 24) & 0xFFU;
}
static inline void
stdc_store8_leu64 (uint_least64_t value, unsigned char ptr[8])
{
ptr[0] = value & 0xFFU;
ptr[1] = (value >> 8) & 0xFFU;
ptr[2] = (value >> 16) & 0xFFU;
ptr[3] = (value >> 24) & 0xFFU;
ptr[4] = (value >> 32) & 0xFFU;
ptr[5] = (value >> 40) & 0xFFU;
ptr[6] = (value >> 48) & 0xFFU;
ptr[7] = (value >> 56) & 0xFFU;
}
void
stdc_store8_aligned_beu8 (uint_least8_t value, unsigned char ptr[1])
{
stdc_store8_beu8 (value, ptr);
}
void
stdc_store8_aligned_beu16 (uint_least16_t value, unsigned char ptr[2])
{
stdc_store8_beu16 (value, ptr);
}
void
stdc_store8_aligned_beu32 (uint_least32_t value, unsigned char ptr[4])
{
stdc_store8_beu32 (value, ptr);
}
void
stdc_store8_aligned_beu64 (uint_least64_t value, unsigned char ptr[8])
{
stdc_store8_beu64 (value, ptr);
}
void
stdc_store8_aligned_leu8 (uint_least8_t value, unsigned char ptr[1])
{
stdc_store8_leu8 (value, ptr);
}
void
stdc_store8_aligned_leu16 (uint_least16_t value, unsigned char ptr[2])
{
stdc_store8_leu16 (value, ptr);
}
void
stdc_store8_aligned_leu32 (uint_least32_t value, unsigned char ptr[4])
{
stdc_store8_leu32 (value, ptr);
}
void
stdc_store8_aligned_leu64 (uint_least64_t value, unsigned char ptr[8])
{
stdc_store8_leu64 (value, ptr);
}
.file "loadstore8-bruno.c"
.option pic
.attribute arch,
"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl stdc_load8_aligned_beu8
.type stdc_load8_aligned_beu8, @function
stdc_load8_aligned_beu8:
.LFB17:
.cfi_startproc
lbu a0,0(a0)
ret
.cfi_endproc
.LFE17:
.size stdc_load8_aligned_beu8, .-stdc_load8_aligned_beu8
.align 1
.globl stdc_load8_aligned_beu16
.type stdc_load8_aligned_beu16, @function
stdc_load8_aligned_beu16:
.LFB18:
.cfi_startproc
lhu a5,0(a0)
lhu a4,0(a0)
slliw a0,a5,8
srliw a5,a4,8
or a0,a0,a5
slli a0,a0,48
srli a0,a0,48
ret
.cfi_endproc
.LFE18:
.size stdc_load8_aligned_beu16, .-stdc_load8_aligned_beu16
.globl __bswapsi2
.align 1
.globl stdc_load8_aligned_beu32
.type stdc_load8_aligned_beu32, @function
stdc_load8_aligned_beu32:
.LFB19:
.cfi_startproc
addi sp,sp,-16
.cfi_def_cfa_offset 16
sd ra,8(sp)
.cfi_offset 1, -8
lw a0,0(a0)
call __bswapsi2@plt
ld ra,8(sp)
.cfi_restore 1
sext.w a0,a0
addi sp,sp,16
.cfi_def_cfa_offset 0
jr ra
.cfi_endproc
.LFE19:
.size stdc_load8_aligned_beu32, .-stdc_load8_aligned_beu32
.globl __bswapdi2
.align 1
.globl stdc_load8_aligned_beu64
.type stdc_load8_aligned_beu64, @function
stdc_load8_aligned_beu64:
.LFB20:
.cfi_startproc
addi sp,sp,-16
.cfi_def_cfa_offset 16
sd ra,8(sp)
.cfi_offset 1, -8
ld a0,0(a0)
call __bswapdi2@plt
ld ra,8(sp)
.cfi_restore 1
addi sp,sp,16
.cfi_def_cfa_offset 0
jr ra
.cfi_endproc
.LFE20:
.size stdc_load8_aligned_beu64, .-stdc_load8_aligned_beu64
.align 1
.globl stdc_load8_aligned_leu8
.type stdc_load8_aligned_leu8, @function
stdc_load8_aligned_leu8:
.LFB50:
.cfi_startproc
lbu a0,0(a0)
ret
.cfi_endproc
.LFE50:
.size stdc_load8_aligned_leu8, .-stdc_load8_aligned_leu8
.align 1
.globl stdc_load8_aligned_leu16
.type stdc_load8_aligned_leu16, @function
stdc_load8_aligned_leu16:
.LFB22:
.cfi_startproc
lhu a0,0(a0)
ret
.cfi_endproc
.LFE22:
.size stdc_load8_aligned_leu16, .-stdc_load8_aligned_leu16
.align 1
.globl stdc_load8_aligned_leu32
.type stdc_load8_aligned_leu32, @function
stdc_load8_aligned_leu32:
.LFB23:
.cfi_startproc
lw a0,0(a0)
ret
.cfi_endproc
.LFE23:
.size stdc_load8_aligned_leu32, .-stdc_load8_aligned_leu32
.align 1
.globl stdc_load8_aligned_leu64
.type stdc_load8_aligned_leu64, @function
stdc_load8_aligned_leu64:
.LFB24:
.cfi_startproc
ld a0,0(a0)
ret
.cfi_endproc
.LFE24:
.size stdc_load8_aligned_leu64, .-stdc_load8_aligned_leu64
.align 1
.globl stdc_store8_aligned_beu8
.type stdc_store8_aligned_beu8, @function
stdc_store8_aligned_beu8:
.LFB33:
.cfi_startproc
sb a0,0(a1)
ret
.cfi_endproc
.LFE33:
.size stdc_store8_aligned_beu8, .-stdc_store8_aligned_beu8
.align 1
.globl stdc_store8_aligned_beu16
.type stdc_store8_aligned_beu16, @function
stdc_store8_aligned_beu16:
.LFB34:
.cfi_startproc
slliw a5,a0,8
srliw a0,a0,8
or a5,a5,a0
sh a5,0(a1)
ret
.cfi_endproc
.LFE34:
.size stdc_store8_aligned_beu16, .-stdc_store8_aligned_beu16
.align 1
.globl stdc_store8_aligned_beu32
.type stdc_store8_aligned_beu32, @function
stdc_store8_aligned_beu32:
.LFB35:
.cfi_startproc
addi sp,sp,-16
.cfi_def_cfa_offset 16
sd s0,0(sp)
sd ra,8(sp)
.cfi_offset 8, -16
.cfi_offset 1, -8
mv s0,a1
call __bswapsi2@plt
sw a0,0(s0)
ld ra,8(sp)
.cfi_restore 1
ld s0,0(sp)
.cfi_restore 8
addi sp,sp,16
.cfi_def_cfa_offset 0
jr ra
.cfi_endproc
.LFE35:
.size stdc_store8_aligned_beu32, .-stdc_store8_aligned_beu32
.align 1
.globl stdc_store8_aligned_beu64
.type stdc_store8_aligned_beu64, @function
stdc_store8_aligned_beu64:
.LFB36:
.cfi_startproc
addi sp,sp,-16
.cfi_def_cfa_offset 16
sd s0,0(sp)
sd ra,8(sp)
.cfi_offset 8, -16
.cfi_offset 1, -8
mv s0,a1
call __bswapdi2@plt
sd a0,0(s0)
ld ra,8(sp)
.cfi_restore 1
ld s0,0(sp)
.cfi_restore 8
addi sp,sp,16
.cfi_def_cfa_offset 0
jr ra
.cfi_endproc
.LFE36:
.size stdc_store8_aligned_beu64, .-stdc_store8_aligned_beu64
.align 1
.globl stdc_store8_aligned_leu8
.type stdc_store8_aligned_leu8, @function
stdc_store8_aligned_leu8:
.LFB52:
.cfi_startproc
sb a0,0(a1)
ret
.cfi_endproc
.LFE52:
.size stdc_store8_aligned_leu8, .-stdc_store8_aligned_leu8
.align 1
.globl stdc_store8_aligned_leu16
.type stdc_store8_aligned_leu16, @function
stdc_store8_aligned_leu16:
.LFB38:
.cfi_startproc
sh a0,0(a1)
ret
.cfi_endproc
.LFE38:
.size stdc_store8_aligned_leu16, .-stdc_store8_aligned_leu16
.align 1
.globl stdc_store8_aligned_leu32
.type stdc_store8_aligned_leu32, @function
stdc_store8_aligned_leu32:
.LFB39:
.cfi_startproc
sw a0,0(a1)
ret
.cfi_endproc
.LFE39:
.size stdc_store8_aligned_leu32, .-stdc_store8_aligned_leu32
.align 1
.globl stdc_store8_aligned_leu64
.type stdc_store8_aligned_leu64, @function
stdc_store8_aligned_leu64:
.LFB40:
.cfi_startproc
sd a0,0(a1)
ret
.cfi_endproc
.LFE40:
.size stdc_store8_aligned_leu64, .-stdc_store8_aligned_leu64
.ident "GCC: (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0"
.section .note.GNU-stack,"",@progbits
.file "loadstore8-paul.c"
.option pic
.attribute arch,
"rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.align 1
.globl stdc_load8_aligned_beu8
.type stdc_load8_aligned_beu8, @function
stdc_load8_aligned_beu8:
.LFB8:
.cfi_startproc
lbu a0,0(a0)
ret
.cfi_endproc
.LFE8:
.size stdc_load8_aligned_beu8, .-stdc_load8_aligned_beu8
.align 1
.globl stdc_load8_aligned_beu16
.type stdc_load8_aligned_beu16, @function
stdc_load8_aligned_beu16:
.LFB9:
.cfi_startproc
lbu a5,1(a0)
lbu a4,0(a0)
slli a0,a5,8
or a0,a0,a4
slliw a5,a0,8
srli a0,a0,8
or a0,a0,a5
slli a0,a0,48
srli a0,a0,48
ret
.cfi_endproc
.LFE9:
.size stdc_load8_aligned_beu16, .-stdc_load8_aligned_beu16
.align 1
.globl stdc_load8_aligned_beu32
.type stdc_load8_aligned_beu32, @function
stdc_load8_aligned_beu32:
.LFB10:
.cfi_startproc
lbu a5,0(a0)
lbu a3,1(a0)
lbu a2,3(a0)
lbu a4,2(a0)
slliw a0,a5,24
slliw a5,a3,16
or a0,a0,a5
or a0,a0,a2
slliw a5,a4,8
or a0,a0,a5
sext.w a0,a0
ret
.cfi_endproc
.LFE10:
.size stdc_load8_aligned_beu32, .-stdc_load8_aligned_beu32
.align 1
.globl stdc_load8_aligned_beu64
.type stdc_load8_aligned_beu64, @function
stdc_load8_aligned_beu64:
.LFB11:
.cfi_startproc
lbu a5,0(a0)
lbu a4,1(a0)
lbu a6,7(a0)
lbu a1,2(a0)
lbu a2,3(a0)
slli a4,a4,48
slli a5,a5,56
lbu a3,4(a0)
or a5,a5,a4
or a5,a5,a6
lbu a4,5(a0)
slli a1,a1,40
lbu a0,6(a0)
or a5,a5,a1
slli a2,a2,32
or a5,a5,a2
slli a3,a3,24
or a5,a5,a3
slli a4,a4,16
or a5,a5,a4
slli a0,a0,8
or a0,a5,a0
ret
.cfi_endproc
.LFE11:
.size stdc_load8_aligned_beu64, .-stdc_load8_aligned_beu64
.align 1
.globl stdc_load8_aligned_leu8
.type stdc_load8_aligned_leu8, @function
stdc_load8_aligned_leu8:
.LFB33:
.cfi_startproc
lbu a0,0(a0)
ret
.cfi_endproc
.LFE33:
.size stdc_load8_aligned_leu8, .-stdc_load8_aligned_leu8
.align 1
.globl stdc_load8_aligned_leu16
.type stdc_load8_aligned_leu16, @function
stdc_load8_aligned_leu16:
.LFB13:
.cfi_startproc
lbu a5,1(a0)
lbu a0,0(a0)
slli a5,a5,8
or a0,a5,a0
ret
.cfi_endproc
.LFE13:
.size stdc_load8_aligned_leu16, .-stdc_load8_aligned_leu16
.align 1
.globl stdc_load8_aligned_leu32
.type stdc_load8_aligned_leu32, @function
stdc_load8_aligned_leu32:
.LFB14:
.cfi_startproc
lbu a4,1(a0)
lbu a3,0(a0)
lbu a5,2(a0)
lbu a0,3(a0)
slli a4,a4,8
or a4,a4,a3
slli a5,a5,16
or a5,a5,a4
slli a0,a0,24
or a0,a0,a5
sext.w a0,a0
ret
.cfi_endproc
.LFE14:
.size stdc_load8_aligned_leu32, .-stdc_load8_aligned_leu32
.align 1
.globl stdc_load8_aligned_leu64
.type stdc_load8_aligned_leu64, @function
stdc_load8_aligned_leu64:
.LFB15:
.cfi_startproc
lbu a6,1(a0)
lbu a5,0(a0)
lbu a1,2(a0)
lbu a2,3(a0)
lbu a3,4(a0)
slli a6,a6,8
lbu a4,5(a0)
or a6,a6,a5
slli a1,a1,16
lbu a5,6(a0)
or a1,a1,a6
slli a2,a2,24
lbu a0,7(a0)
or a2,a2,a1
slli a3,a3,32
or a3,a3,a2
slli a4,a4,40
or a4,a4,a3
slli a5,a5,48
or a5,a5,a4
slli a0,a0,56
or a0,a0,a5
ret
.cfi_endproc
.LFE15:
.size stdc_load8_aligned_leu64, .-stdc_load8_aligned_leu64
.align 1
.globl stdc_store8_aligned_beu8
.type stdc_store8_aligned_beu8, @function
stdc_store8_aligned_beu8:
.LFB24:
.cfi_startproc
sb a0,0(a1)
ret
.cfi_endproc
.LFE24:
.size stdc_store8_aligned_beu8, .-stdc_store8_aligned_beu8
.align 1
.globl stdc_store8_aligned_beu16
.type stdc_store8_aligned_beu16, @function
stdc_store8_aligned_beu16:
.LFB25:
.cfi_startproc
srliw a5,a0,8
sb a5,0(a1)
sb a0,1(a1)
ret
.cfi_endproc
.LFE25:
.size stdc_store8_aligned_beu16, .-stdc_store8_aligned_beu16
.align 1
.globl stdc_store8_aligned_beu32
.type stdc_store8_aligned_beu32, @function
stdc_store8_aligned_beu32:
.LFB26:
.cfi_startproc
srliw a3,a0,24
srliw a4,a0,16
srliw a5,a0,8
sb a3,0(a1)
sb a4,1(a1)
sb a5,2(a1)
sb a0,3(a1)
ret
.cfi_endproc
.LFE26:
.size stdc_store8_aligned_beu32, .-stdc_store8_aligned_beu32
.align 1
.globl stdc_store8_aligned_beu64
.type stdc_store8_aligned_beu64, @function
stdc_store8_aligned_beu64:
.LFB27:
.cfi_startproc
srli t1,a0,56
srli a7,a0,48
srli a6,a0,40
srli a2,a0,32
srli a3,a0,24
srli a4,a0,16
srli a5,a0,8
sb t1,0(a1)
sb a7,1(a1)
sb a6,2(a1)
sb a2,3(a1)
sb a3,4(a1)
sb a4,5(a1)
sb a5,6(a1)
sb a0,7(a1)
ret
.cfi_endproc
.LFE27:
.size stdc_store8_aligned_beu64, .-stdc_store8_aligned_beu64
.align 1
.globl stdc_store8_aligned_leu8
.type stdc_store8_aligned_leu8, @function
stdc_store8_aligned_leu8:
.LFB35:
.cfi_startproc
sb a0,0(a1)
ret
.cfi_endproc
.LFE35:
.size stdc_store8_aligned_leu8, .-stdc_store8_aligned_leu8
.align 1
.globl stdc_store8_aligned_leu16
.type stdc_store8_aligned_leu16, @function
stdc_store8_aligned_leu16:
.LFB29:
.cfi_startproc
srliw a5,a0,8
sb a0,0(a1)
sb a5,1(a1)
ret
.cfi_endproc
.LFE29:
.size stdc_store8_aligned_leu16, .-stdc_store8_aligned_leu16
.align 1
.globl stdc_store8_aligned_leu32
.type stdc_store8_aligned_leu32, @function
stdc_store8_aligned_leu32:
.LFB30:
.cfi_startproc
srliw a3,a0,8
srliw a4,a0,16
srliw a5,a0,24
sb a0,0(a1)
sb a3,1(a1)
sb a4,2(a1)
sb a5,3(a1)
ret
.cfi_endproc
.LFE30:
.size stdc_store8_aligned_leu32, .-stdc_store8_aligned_leu32
.align 1
.globl stdc_store8_aligned_leu64
.type stdc_store8_aligned_leu64, @function
stdc_store8_aligned_leu64:
.LFB31:
.cfi_startproc
srli t1,a0,8
srli a7,a0,16
srli a6,a0,24
srli a2,a0,32
srli a3,a0,40
srli a4,a0,48
srli a5,a0,56
sb a0,0(a1)
sb t1,1(a1)
sb a7,2(a1)
sb a6,3(a1)
sb a2,4(a1)
sb a3,5(a1)
sb a4,6(a1)
sb a5,7(a1)
ret
.cfi_endproc
.LFE31:
.size stdc_store8_aligned_leu64, .-stdc_store8_aligned_leu64
.ident "GCC: (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0"
.section .note.GNU-stack,"",@progbits