virtio ring buffer has lockless ring buffer scheme. When guest vcpu reads the memory, qemu io thread may is writing the same address. It requiires atomic operation in qemu side, __builtin_memcpy may read byte-by-byte.
This patch uses fix this, however it may bring negative performance effect on system which does not support hw aligned memory access. Signed-off-by: Bibo Mao <maob...@loongson.cn> --- include/qemu/bswap.h | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/include/qemu/bswap.h b/include/qemu/bswap.h index 2d3bb8b..b914d33 100644 --- a/include/qemu/bswap.h +++ b/include/qemu/bswap.h @@ -327,56 +327,46 @@ static inline void stb_p(void *ptr, uint8_t v) } /* - * Any compiler worth its salt will turn these memcpy into native unaligned - * operations. Thus we don't need to play games with packed attributes, or - * inline byte-by-byte stores. - * Some compilation environments (eg some fortify-source implementations) - * may intercept memcpy() in a way that defeats the compiler optimization, - * though, so we use __builtin_memcpy() to give ourselves the best chance - * of good performance. + * Some driver using lockless ring buffer like virtio ring requires that + * it should be atomic, since guest vcpu thread is reading the memory. + * It may bring out negative performance effect for architectures which + * do not support hw memory aligned access like mips, if ptr is not word + * alignment. */ static inline int lduw_he_p(const void *ptr) { - uint16_t r; - __builtin_memcpy(&r, ptr, sizeof(r)); - return r; + return *(uint16_t *)ptr; } static inline int ldsw_he_p(const void *ptr) { - int16_t r; - __builtin_memcpy(&r, ptr, sizeof(r)); - return r; + return *(int16_t *)ptr; } static inline void stw_he_p(void *ptr, uint16_t v) { - __builtin_memcpy(ptr, &v, sizeof(v)); + *(uint16_t *)ptr = v; } static inline int ldl_he_p(const void *ptr) { - int32_t r; - __builtin_memcpy(&r, ptr, sizeof(r)); - return r; + return *(int32_t *)ptr; } static inline void stl_he_p(void *ptr, uint32_t v) { - __builtin_memcpy(ptr, &v, sizeof(v)); + *(uint32_t *)ptr = v; } static inline uint64_t ldq_he_p(const void *ptr) { - uint64_t r; - __builtin_memcpy(&r, ptr, sizeof(r)); - return r; + return *(uint64_t *)ptr; } static inline void stq_he_p(void *ptr, uint64_t v) { - __builtin_memcpy(ptr, &v, sizeof(v)); + *(uint64_t *)ptr = v; } static inline int lduw_le_p(const void *ptr) -- 1.8.3.1