Introduce memcpy_nt() and memcpy_nt_drain() for write-once copy sites that want a named non-temporal copy primitive plus an explicit ordering point. On x86, place the arch-visible wrapper in arch/x86/include/asm/string_64.h and map it to the existing memcpy_flushcache() backend plus sfence. Architectures that do not override the helper fall back to memcpy() and a no-op drain in include/linux/string.h.
The immediate user is the ZONE_DEVICE template-copy path. That path populates struct page descriptors in a write-once pattern, so most destination cachelines are not expected to be reused immediately after the copy. A regular cached memcpy() can therefore incur avoidable write-allocate traffic and pollute the cache with data that has little near-term reuse. This interface lets callers request that non-temporal-copy semantics directly, while x86 simply reuses the existing memcpy_flushcache() backend instead of adding another generic memcpy-like wrapper with extra selection policy above it. Signed-off-by: Li Zhe <[email protected]> --- arch/x86/include/asm/string_64.h | 16 ++++++++++++++++ include/linux/string.h | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 4635616863f5..6f36abedc56a 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -100,6 +100,22 @@ static __always_inline void memcpy_flushcache(void *dst, const void *src, size_t } __memcpy_flushcache(dst, src, cnt); } + +#define __HAVE_ARCH_MEMCPY_NT 1 +/* + * Reuse the existing x86 flushcache backend as the nt copy primitive. + * Callers pair it with memcpy_nt_drain() when later stores must be + * ordered after the copy. + */ +static __always_inline void memcpy_nt(void *dst, const void *src, size_t cnt) +{ + memcpy_flushcache(dst, src, cnt); +} + +static __always_inline void memcpy_nt_drain(void) +{ + asm volatile("sfence" : : : "memory"); +} #endif #endif /* __KERNEL__ */ diff --git a/include/linux/string.h b/include/linux/string.h index 5702daca4326..5165763ab812 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -278,6 +278,24 @@ static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) } #endif +#ifndef __HAVE_ARCH_MEMCPY_NT +/* + * memcpy_nt() requests a non-temporal copy when the architecture has a + * suitable backend. Callers must follow it with memcpy_nt_drain() + * before later normal stores that need to be ordered after the copy. + * Architectures that do not override it fall back to memcpy() and a + * no-op drain. + */ +static inline void memcpy_nt(void *dst, const void *src, size_t cnt) +{ + memcpy(dst, src, cnt); +} + +static inline void memcpy_nt_drain(void) +{ +} +#endif + void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *str, char old, char new); -- 2.20.1

