Introduce a generic memcpy_streaming() interface for write-once copy sites that can fall back to memcpy() when no architecture-specific optimization is available, or when an architecture-specific backend cannot safely handle a given transfer.
Add memcpy_streaming_drain() alongside it so callers can separate the copy primitive from any required ordering point. On x86, use memcpy_flushcache() and sfence only for aligned transfers that can stay entirely on the non-temporal store path; otherwise fall back to memcpy() so the generic API does not expose flushcache semantics on cached head/tail fragments. Callers are responsible for invoking memcpy_streaming_drain() before later normal stores that must be ordered after the streaming copy. Signed-off-by: Li Zhe <[email protected]> --- arch/x86/include/asm/string_64.h | 40 ++++++++++++++++++++++++++++++++ include/linux/string.h | 20 ++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 4635616863f5..0b57e9e6f3db 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -100,6 +100,46 @@ static __always_inline void memcpy_flushcache(void *dst, const void *src, size_t } __memcpy_flushcache(dst, src, cnt); } + +/* + * Only reuse memcpy_flushcache() for transfers that can stay entirely + * on its non-temporal store path. Fall back to memcpy() for zero-length + * copies and for unaligned transfers so the generic streaming API does + * not expose flushcache semantics on cached head/tail fragments. + */ +static __always_inline int memcpy_flushcache_nt_safe(const void *dst, + const void *src, + size_t cnt) +{ + unsigned long d = (unsigned long)dst; + unsigned long s = (unsigned long)src; + + if (!cnt) + return 0; + + if (cnt >= 8) + return !(d & 7) && !(s & 7) && !(cnt & 7); + + return cnt == 4 && !(d & 3) && !(s & 3); +} + +#define __HAVE_ARCH_MEMCPY_STREAMING 1 +static __always_inline void memcpy_streaming(void *dst, const void *src, + size_t cnt) +{ + if (!cnt) + return; + + if (memcpy_flushcache_nt_safe(dst, src, cnt)) + memcpy_flushcache(dst, src, cnt); + else + memcpy(dst, src, cnt); +} + +static __always_inline void memcpy_streaming_drain(void) +{ + asm volatile("sfence" : : : "memory"); +} #endif #endif /* __KERNEL__ */ diff --git a/include/linux/string.h b/include/linux/string.h index b850bd91b3d8..a4c2d4347f58 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -281,6 +281,26 @@ static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) } #endif +#ifndef __HAVE_ARCH_MEMCPY_STREAMING +/* + * memcpy_streaming() is for write-once copy sites that may use + * non-temporal stores on some architectures. Callers must follow it + * with memcpy_streaming_drain() before later normal stores that need to + * be ordered after the streaming copy. Implementations may fall back to + * memcpy() when a specialized backend cannot safely handle the given + * transfer, and backends that use regular cached stores can make the + * drain a no-op. + */ +static inline void memcpy_streaming(void *dst, const void *src, size_t cnt) +{ + memcpy(dst, src, cnt); +} + +static inline void memcpy_streaming_drain(void) +{ +} +#endif + void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *str, char old, char new); -- 2.20.1

