3.2.79-rc1 review patch. If anyone has any objections, please let me know.
------------------ From: Toshi Kani <[email protected]> commit a82eee7424525e34e98d821dd059ce14560a1e35 upstream. Data corruption issues were observed in tests which initiated a system crash/reset while accessing BTT devices. This problem is reproducible. The BTT driver calls pmem_rw_bytes() to update data in pmem devices. This interface calls __copy_user_nocache(), which uses non-temporal stores so that the stores to pmem are persistent. __copy_user_nocache() uses non-temporal stores when a request size is 8 bytes or larger (and is aligned by 8 bytes). The BTT driver updates the BTT map table, which entry size is 4 bytes. Therefore, updates to the map table entries remain cached, and are not written to pmem after a crash. Change __copy_user_nocache() to use non-temporal store when a request size is 4 bytes. The change extends the current byte-copy path for a less-than-8-bytes request, and does not add any overhead to the regular path. Reported-and-tested-by: Micah Parrish <[email protected]> Reported-and-tested-by: Brian Boylston <[email protected]> Signed-off-by: Toshi Kani <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Brian Gerst <[email protected]> Cc: Dan Williams <[email protected]> Cc: Denys Vlasenko <[email protected]> Cc: H. Peter Anvin <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Luis R. Rodriguez <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Ross Zwisler <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Toshi Kani <[email protected]> Cc: Vishal Verma <[email protected]> Cc: [email protected] Link: http://lkml.kernel.org/r/[email protected] [ Small readability edits. ] Signed-off-by: Ingo Molnar <[email protected]> [bwh: Backported to 3.2: aadjust filename, context] Signed-off-by: Ben Hutchings <[email protected]> --- arch/x86/lib/copy_user_nocache_64.S | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -49,13 +49,14 @@ * Note: Cached memory copy is used when destination or size is not * naturally aligned. That is: * - Require 8-byte alignment when size is 8 bytes or larger. + * - Require 4-byte alignment when size is 4 bytes. */ ENTRY(__copy_user_nocache) CFI_STARTPROC - /* If size is less than 8 bytes, go to byte copy */ + /* If size is less than 8 bytes, go to 4-byte copy */ cmpl $8,%edx - jb .L_1b_cache_copy_entry + jb .L_4b_nocache_copy_entry /* If destination is not 8-byte aligned, "cache" copy to align it */ ALIGN_DESTINATION @@ -94,7 +95,7 @@ ENTRY(__copy_user_nocache) movl %edx,%ecx andl $7,%edx shrl $3,%ecx - jz .L_1b_cache_copy_entry /* jump if count is 0 */ + jz .L_4b_nocache_copy_entry /* jump if count is 0 */ /* Perform 8-byte nocache loop-copy */ .L_8b_nocache_copy_loop: @@ -106,11 +107,33 @@ ENTRY(__copy_user_nocache) jnz .L_8b_nocache_copy_loop /* If no byte left, we're done */ -.L_1b_cache_copy_entry: +.L_4b_nocache_copy_entry: + andl %edx,%edx + jz .L_finish_copy + + /* If destination is not 4-byte aligned, go to byte copy: */ + movl %edi,%ecx + andl $3,%ecx + jnz .L_1b_cache_copy_entry + + /* Set 4-byte copy count (1 or 0) and remainder */ + movl %edx,%ecx + andl $3,%edx + shrl $2,%ecx + jz .L_1b_cache_copy_entry /* jump if count is 0 */ + + /* Perform 4-byte nocache copy: */ +30: movl (%rsi),%r8d +31: movnti %r8d,(%rdi) + leaq 4(%rsi),%rsi + leaq 4(%rdi),%rdi + + /* If no bytes left, we're done: */ andl %edx,%edx jz .L_finish_copy /* Perform byte "cache" loop-copy for the remainder */ +.L_1b_cache_copy_entry: movl %edx,%ecx .L_1b_cache_copy_loop: 40: movb (%rsi),%al @@ -134,6 +157,9 @@ ENTRY(__copy_user_nocache) .L_fixup_8b_copy: lea (%rdx,%rcx,8),%rdx jmp .L_fixup_handle_tail +.L_fixup_4b_copy: + lea (%rdx,%rcx,4),%rdx + jmp .L_fixup_handle_tail .L_fixup_1b_copy: movl %ecx,%edx .L_fixup_handle_tail: @@ -159,6 +185,8 @@ ENTRY(__copy_user_nocache) _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) _ASM_EXTABLE(20b,.L_fixup_8b_copy) _ASM_EXTABLE(21b,.L_fixup_8b_copy) + _ASM_EXTABLE(30b,.L_fixup_4b_copy) + _ASM_EXTABLE(31b,.L_fixup_4b_copy) _ASM_EXTABLE(40b,.L_fixup_1b_copy) _ASM_EXTABLE(41b,.L_fixup_1b_copy) CFI_ENDPROC

