Microbenchmark shows that "REP MOVSB" copy_page() is faster than "REP MOVSQ" version on Intel i5-something Haswell REP_GOOD/ERMS capable CPU.
N=1<<27 rep movsq: 6.758841901 ± 0.04% rep movsb: 6.253927309 ± 0.02% ----------------------------------- -7.5% Signed-off-by: Alexey Dobriyan <adobri...@gmail.com> --- arch/x86/lib/copy_page_64.S | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -12,12 +12,21 @@ */ ALIGN ENTRY(copy_page) - ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD + ALTERNATIVE_2 "jmp copy_page_regs", \ + "", X86_FEATURE_REP_GOOD, \ + "jmp copy_page_rep_movsb", X86_FEATURE_ERMS + movl $4096/8, %ecx rep movsq ret ENDPROC(copy_page) +ENTRY(copy_page_rep_movsb) + mov $4096, %ecx + rep movsb + ret +ENDPROC(copy_page_rep_movsb) + ENTRY(copy_page_regs) subq $2*8, %rsp movq %rbx, (%rsp)