Microbenchmark shows that "REP MOVSB" copy_page() is faster
than "REP MOVSQ" version on Intel i5-something Haswell
REP_GOOD/ERMS capable CPU.

N=1<<27
rep movsq:      6.758841901 ± 0.04%
rep movsb:      6.253927309 ± 0.02%
-----------------------------------
                        -7.5%

Signed-off-by: Alexey Dobriyan <adobri...@gmail.com>
---

 arch/x86/lib/copy_page_64.S |   11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -12,12 +12,21 @@
  */
        ALIGN
 ENTRY(copy_page)
-       ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
+       ALTERNATIVE_2 "jmp copy_page_regs",     \
+               "", X86_FEATURE_REP_GOOD,       \
+               "jmp copy_page_rep_movsb", X86_FEATURE_ERMS
+
        movl    $4096/8, %ecx
        rep     movsq
        ret
 ENDPROC(copy_page)
 
+ENTRY(copy_page_rep_movsb)
+       mov     $4096, %ecx
+       rep movsb
+       ret
+ENDPROC(copy_page_rep_movsb)
+
 ENTRY(copy_page_regs)
        subq    $2*8,   %rsp
        movq    %rbx,   (%rsp)

Reply via email to