The present copy_page_sse2() is useful in case the destination page isn't going to get touched again soon, or if we want to limit churn on the caches. Just rename it, to fit the corresponding {clear,scrub}_page_*() naming scheme.
For cases where latency is the most important aspect, or when it is expected that sufficiently large parts of a destination page will get accessed again soon after the copying, introduce a "hot" alternative. Again use alternatives patching to select between a "legacy" and an ERMS variant. Don't switch any callers just yet - this will be the subject of subsequent changes. Signed-off-by: Jan Beulich <jbeul...@suse.com> --- Of course both pages can be independently hot/cold, and hence more flavors may be necessary here. To avoid the NOP padding (also in clear_page_hot()) we could use a double REP prefix in the replacement code (accounting for the REX one in the code being replaced). --- a/xen/arch/x86/copy_page.S +++ b/xen/arch/x86/copy_page.S @@ -13,7 +13,7 @@ #define tmp3_reg %r10 #define tmp4_reg %r11 -FUNC(copy_page_sse2) +FUNC(copy_page_cold) mov $PAGE_SIZE/(4*WORD_SIZE)-3, %ecx prefetchnta 2*4*WORD_SIZE(src_reg) @@ -44,4 +44,19 @@ FUNC(copy_page_sse2) sfence RET -END(copy_page_sse2) +END(copy_page_cold) + + .macro copy_page_movsb + mov $PAGE_SIZE, %ecx + rep movsb + .endm + + .macro copy_page_movsq + mov $PAGE_SIZE/8, %ecx + rep movsq + .endm + +FUNC(copy_page_hot) + ALTERNATIVE copy_page_movsq, copy_page_movsb, X86_FEATURE_ERMS + RET +END(copy_page_hot) --- a/xen/arch/x86/include/asm/page.h +++ b/xen/arch/x86/include/asm/page.h @@ -221,10 +221,11 @@ typedef struct { u64 pfn; } pagetable_t; void clear_page_hot(void *pg); void clear_page_cold(void *pg); -void copy_page_sse2(void *to, const void *from); +void copy_page_hot(void *to, const void *from); +void copy_page_cold(void *to, const void *from); #define clear_page(_p) clear_page_cold(_p) -#define copy_page(_t, _f) copy_page_sse2(_t, _f) +#define copy_page(_t, _f) copy_page_cold(_t, _f) #ifdef CONFIG_DEBUG void scrub_page_hot(void *ptr);