Introduce memcpy_nt() and memcpy_nt_drain() for write-once copy sites
that want a named non-temporal copy primitive plus an explicit ordering
point. On x86, place the arch-visible wrapper in
arch/x86/include/asm/string_64.h and map it to the existing
memcpy_flushcache() backend plus sfence. Architectures that do not
override the helper fall back to memcpy() and a no-op drain in
include/linux/string.h.

The immediate user is the ZONE_DEVICE template-copy path. That path
populates struct page descriptors in a write-once pattern, so most
destination cachelines are not expected to be reused immediately after
the copy. A regular cached memcpy() can therefore incur avoidable
write-allocate traffic and pollute the cache with data that has little
near-term reuse.

This interface lets callers request that non-temporal-copy semantics
directly, while x86 simply reuses the existing memcpy_flushcache()
backend instead of adding another generic memcpy-like wrapper with
extra selection policy above it.

Signed-off-by: Li Zhe <[email protected]>
---
 arch/x86/include/asm/string_64.h | 16 ++++++++++++++++
 include/linux/string.h           | 18 ++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 4635616863f5..6f36abedc56a 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -100,6 +100,22 @@ static __always_inline void memcpy_flushcache(void *dst, 
const void *src, size_t
        }
        __memcpy_flushcache(dst, src, cnt);
 }
+
+#define __HAVE_ARCH_MEMCPY_NT 1
+/*
+ * Reuse the existing x86 flushcache backend as the nt copy primitive.
+ * Callers pair it with memcpy_nt_drain() when later stores must be
+ * ordered after the copy.
+ */
+static __always_inline void memcpy_nt(void *dst, const void *src, size_t cnt)
+{
+       memcpy_flushcache(dst, src, cnt);
+}
+
+static __always_inline void memcpy_nt_drain(void)
+{
+       asm volatile("sfence" : : : "memory");
+}
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/string.h b/include/linux/string.h
index 5702daca4326..5165763ab812 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -278,6 +278,24 @@ static inline void memcpy_flushcache(void *dst, const void 
*src, size_t cnt)
 }
 #endif
 
+#ifndef __HAVE_ARCH_MEMCPY_NT
+/*
+ * memcpy_nt() requests a non-temporal copy when the architecture has a
+ * suitable backend. Callers must follow it with memcpy_nt_drain()
+ * before later normal stores that need to be ordered after the copy.
+ * Architectures that do not override it fall back to memcpy() and a
+ * no-op drain.
+ */
+static inline void memcpy_nt(void *dst, const void *src, size_t cnt)
+{
+       memcpy(dst, src, cnt);
+}
+
+static inline void memcpy_nt_drain(void)
+{
+}
+#endif
+
 void *memchr_inv(const void *s, int c, size_t n);
 char *strreplace(char *str, char old, char new);
 
-- 
2.20.1

Reply via email to