[PATCH v4 4/5] sparc64: SPARC optimized __fls function
Defined SPARC optimized __fls using lzcnt opcode. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/lib/NG4fls.S | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S index bc17b65..2d0991e 100644 --- a/arch/sparc/lib/NG4fls.S +++ b/arch/sparc/lib/NG4fls.S @@ -18,3 +18,13 @@ ENTRY(NG4fls) retl sub%g3, %g2, %o0 ENDPROC(NG4fls) + +ENTRY(__NG4fls) + brz,pn %o0, 1f + LZCNT_O0_G2 !lzcnt %o0, %g2 + mov 63, %g3 + sub %g3, %g2, %o0 +1: + retl +nop +ENDPROC(__NG4fls) -- 1.7.1
[PATCH v4 4/5] sparc64: SPARC optimized __fls function
Defined SPARC optimized __fls using lzcnt opcode. Signed-off-by: Vijay Kumar --- arch/sparc/lib/NG4fls.S | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S index bc17b65..2d0991e 100644 --- a/arch/sparc/lib/NG4fls.S +++ b/arch/sparc/lib/NG4fls.S @@ -18,3 +18,13 @@ ENTRY(NG4fls) retl sub%g3, %g2, %o0 ENDPROC(NG4fls) + +ENTRY(__NG4fls) + brz,pn %o0, 1f + LZCNT_O0_G2 !lzcnt %o0, %g2 + mov 63, %g3 + sub %g3, %g2, %o0 +1: + retl +nop +ENDPROC(__NG4fls) -- 1.7.1
[PATCH v4 0/5] sparc64: Optimize fls and __fls
SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, __fls and fls64 functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, __fls and fls64 functions. v3->v4: - Fixed a typo. v2->v3: - Using ENTRY(), ENDPROC() for assembler functions. - Removed BITS_PER_LONG from __fls. - Using generic fls64(). - Replaced lzcnt instruction with .word directive. v1->v2: - Fixed delay slot issue. Vijay Kumar (5): sparc64: Define SPARC default fls function sparc64: Define SPARC default __fls function sparc64: SPARC optimized fls function sparc64: SPARC optimized __fls function sparc64: Use sparc optimized fls and __fls for T4 and above arch/sparc/include/asm/bitops_64.h |5 ++- arch/sparc/kernel/head_64.S|2 + arch/sparc/lib/Makefile|3 ++ arch/sparc/lib/NG4fls.S| 30 arch/sparc/lib/NG4patch.S |9 + arch/sparc/lib/fls.S | 67 arch/sparc/lib/fls64.S | 61 7 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 arch/sparc/lib/NG4fls.S create mode 100644 arch/sparc/lib/fls.S create mode 100644 arch/sparc/lib/fls64.S
[PATCH v4 0/5] sparc64: Optimize fls and __fls
SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, __fls and fls64 functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, __fls and fls64 functions. v3->v4: - Fixed a typo. v2->v3: - Using ENTRY(), ENDPROC() for assembler functions. - Removed BITS_PER_LONG from __fls. - Using generic fls64(). - Replaced lzcnt instruction with .word directive. v1->v2: - Fixed delay slot issue. Vijay Kumar (5): sparc64: Define SPARC default fls function sparc64: Define SPARC default __fls function sparc64: SPARC optimized fls function sparc64: SPARC optimized __fls function sparc64: Use sparc optimized fls and __fls for T4 and above arch/sparc/include/asm/bitops_64.h |5 ++- arch/sparc/kernel/head_64.S|2 + arch/sparc/lib/Makefile|3 ++ arch/sparc/lib/NG4fls.S| 30 arch/sparc/lib/NG4patch.S |9 + arch/sparc/lib/fls.S | 67 arch/sparc/lib/fls64.S | 61 7 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 arch/sparc/lib/NG4fls.S create mode 100644 arch/sparc/lib/fls.S create mode 100644 arch/sparc/lib/fls64.S
[PATCH v4 1/5] sparc64: Define SPARC default fls function
fls will now require a boot time patching on T4 and above. Redefining it under arch/sparc/lib. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/include/asm/bitops_64.h |3 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls.S | 67 3 files changed, 70 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 2d52240..30aea56 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -22,9 +22,10 @@ void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +int fls(unsigned int word); + #include -#include #include #include diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index a1a2d39..3b9f5e0 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000..06b8d30 --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,67 @@ +/* fls.S: SPARC default fls definition. + * + * SPARC default fls definition, which follows the same algorithm as + * in generic fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include +#include + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(fls) + brz,pn %o0, 6f +mov0, %o1 + sethi %hi(0x), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f +mov32, %o1 + sethi %hi(0xff00), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f +sethi %hi(0xf000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f +sethi %hi(0xc000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f +sll%o0, 2, %o0 +5: + xnor%g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 +sra%o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b +sra%o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff00), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b +mov16, %o1 + ba,pt %xcc, 1b +sll%o0, 8, %o0 +ENDPROC(fls) +EXPORT_SYMBOL(fls) -- 1.7.1
[PATCH v4 1/5] sparc64: Define SPARC default fls function
fls will now require a boot time patching on T4 and above. Redefining it under arch/sparc/lib. Signed-off-by: Vijay Kumar --- arch/sparc/include/asm/bitops_64.h |3 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls.S | 67 3 files changed, 70 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 2d52240..30aea56 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -22,9 +22,10 @@ void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +int fls(unsigned int word); + #include -#include #include #include diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index a1a2d39..3b9f5e0 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000..06b8d30 --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,67 @@ +/* fls.S: SPARC default fls definition. + * + * SPARC default fls definition, which follows the same algorithm as + * in generic fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include +#include + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(fls) + brz,pn %o0, 6f +mov0, %o1 + sethi %hi(0x), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f +mov32, %o1 + sethi %hi(0xff00), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f +sethi %hi(0xf000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f +sethi %hi(0xc000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f +sll%o0, 2, %o0 +5: + xnor%g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 +sra%o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b +sra%o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff00), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b +mov16, %o1 + ba,pt %xcc, 1b +sll%o0, 8, %o0 +ENDPROC(fls) +EXPORT_SYMBOL(fls) -- 1.7.1
[PATCH v4 2/5] sparc64: Define SPARC default __fls function
__fls will now require a boot time patching on T4 and above. Redefining it under arch/sparc/lib. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/include/asm/bitops_64.h |2 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls64.S | 61 3 files changed, 63 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 30aea56..d7a46e2 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -23,10 +23,10 @@ void change_bit(unsigned long nr, volatile unsigned long *addr); int fls(unsigned int word); +int __fls(unsigned long word); #include -#include #include #ifdef __KERNEL__ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 3b9f5e0..5380c59 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o +lib-$(CONFIG_SPARC64) += fls64.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S new file mode 100644 index 000..c83e22a --- /dev/null +++ b/arch/sparc/lib/fls64.S @@ -0,0 +1,61 @@ +/* fls64.S: SPARC default __fls definition. + * + * SPARC default __fls definition, which follows the same algorithm as + * in generic __fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include +#include + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(__fls) + mov -1, %g2 + sllx%g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f +mov63, %g1 + sllx%o0, 32, %o0 + mov 31, %g1 +1: + mov -1, %g2 + sllx%g2, 48, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f +mov-1, %g2 + sllx%o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx%g2, 56, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f +mov-1, %g2 + sllx%o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx%g2, 60, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f +mov-1, %g2 + sllx%o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx%g2, 62, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f +mov-1, %g3 + sllx%o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx%g3, 63, %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 +sra%g1, 0, %o0 +ENDPROC(__fls) +EXPORT_SYMBOL(__fls) -- 1.7.1
[PATCH v4 2/5] sparc64: Define SPARC default __fls function
__fls will now require a boot time patching on T4 and above. Redefining it under arch/sparc/lib. Signed-off-by: Vijay Kumar --- arch/sparc/include/asm/bitops_64.h |2 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls64.S | 61 3 files changed, 63 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 30aea56..d7a46e2 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -23,10 +23,10 @@ void change_bit(unsigned long nr, volatile unsigned long *addr); int fls(unsigned int word); +int __fls(unsigned long word); #include -#include #include #ifdef __KERNEL__ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 3b9f5e0..5380c59 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o +lib-$(CONFIG_SPARC64) += fls64.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S new file mode 100644 index 000..c83e22a --- /dev/null +++ b/arch/sparc/lib/fls64.S @@ -0,0 +1,61 @@ +/* fls64.S: SPARC default __fls definition. + * + * SPARC default __fls definition, which follows the same algorithm as + * in generic __fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include +#include + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(__fls) + mov -1, %g2 + sllx%g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f +mov63, %g1 + sllx%o0, 32, %o0 + mov 31, %g1 +1: + mov -1, %g2 + sllx%g2, 48, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f +mov-1, %g2 + sllx%o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx%g2, 56, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f +mov-1, %g2 + sllx%o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx%g2, 60, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f +mov-1, %g2 + sllx%o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx%g2, 62, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f +mov-1, %g3 + sllx%o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx%g3, 63, %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 +sra%g1, 0, %o0 +ENDPROC(__fls) +EXPORT_SYMBOL(__fls) -- 1.7.1
[PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above
For T4 and above, patch fls and __fls functions at the boot time to use lzcnt instruction. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/kernel/head_64.S |2 ++ arch/sparc/lib/NG4patch.S |9 + 2 files changed, 11 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 4de9fbd..f362ecb 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -640,6 +640,8 @@ niagara4_patch: nop callniagara4_patch_pageops nop + callniagara4_patch_fls +nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index 3cc0f8c..da65a3e 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -3,6 +3,8 @@ * Copyright (C) 2012 David S. Miller <da...@davemloft.net> */ +#include + #define BRANCH_ALWAYS 0x1068 #define NOP0x0100 #define NG_DO_PATCH(OLD, NEW) \ @@ -52,3 +54,10 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + +ENTRY(niagara4_patch_fls) + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop +ENDPROC(niagara4_patch_fls) -- 1.7.1
[PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above
For T4 and above, patch fls and __fls functions at the boot time to use lzcnt instruction. Signed-off-by: Vijay Kumar --- arch/sparc/kernel/head_64.S |2 ++ arch/sparc/lib/NG4patch.S |9 + 2 files changed, 11 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 4de9fbd..f362ecb 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -640,6 +640,8 @@ niagara4_patch: nop callniagara4_patch_pageops nop + callniagara4_patch_fls +nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index 3cc0f8c..da65a3e 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -3,6 +3,8 @@ * Copyright (C) 2012 David S. Miller */ +#include + #define BRANCH_ALWAYS 0x1068 #define NOP0x0100 #define NG_DO_PATCH(OLD, NEW) \ @@ -52,3 +54,10 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + +ENTRY(niagara4_patch_fls) + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop +ENDPROC(niagara4_patch_fls) -- 1.7.1
[PATCH v4 3/5] sparc64: SPARC optimized fls function
Defined SPARC optimized fls using lzcnt opcode. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/lib/Makefile |1 + arch/sparc/lib/NG4fls.S | 20 2 files changed, 21 insertions(+), 0 deletions(-) diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 5380c59..2823b8e 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += fls64.o +obj-$(CONFIG_SPARC64) += NG4fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000..bc17b65 --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,20 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + +#include + +#define LZCNT_O0_G2\ + .word 0x85b002e8 + + .text + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(NG4fls) + LZCNT_O0_G2 !lzcnt %o0, %g2 + mov 64, %g3 + retl +sub%g3, %g2, %o0 +ENDPROC(NG4fls) -- 1.7.1
[PATCH v4 3/5] sparc64: SPARC optimized fls function
Defined SPARC optimized fls using lzcnt opcode. Signed-off-by: Vijay Kumar --- arch/sparc/lib/Makefile |1 + arch/sparc/lib/NG4fls.S | 20 2 files changed, 21 insertions(+), 0 deletions(-) diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 5380c59..2823b8e 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += fls64.o +obj-$(CONFIG_SPARC64) += NG4fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000..bc17b65 --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,20 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + +#include + +#define LZCNT_O0_G2\ + .word 0x85b002e8 + + .text + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(NG4fls) + LZCNT_O0_G2 !lzcnt %o0, %g2 + mov 64, %g3 + retl +sub%g3, %g2, %o0 +ENDPROC(NG4fls) -- 1.7.1
Re: [PATCH v3 3/5] sparc64: SPARC optimised fls function
On 10/8/2017 11:39 PM, David Miller wrote: From: Vijay Kumar <vijay.ac.ku...@oracle.com> Date: Fri, 6 Oct 2017 10:54:51 -0600 +#define LXCNT_O0_G2\ + .word 0x85b002e8 + + .text + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(NG4fls) + LXCNT_O0_G2 !lzcnt %o0, %g2 Agreed with others that you should name this LZCNT_* instead of LXCNT_*. Yes, That's typo. I will fix it. Vijay
Re: [PATCH v3 3/5] sparc64: SPARC optimised fls function
On 10/8/2017 11:39 PM, David Miller wrote: From: Vijay Kumar Date: Fri, 6 Oct 2017 10:54:51 -0600 +#define LXCNT_O0_G2\ + .word 0x85b002e8 + + .text + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(NG4fls) + LXCNT_O0_G2 !lzcnt %o0, %g2 Agreed with others that you should name this LZCNT_* instead of LXCNT_*. Yes, That's typo. I will fix it. Vijay
[PATCH v3 5/5] sparc64: Use sparc optimised fls and __fls for T4 and above
For T4 and above, patch fls and __fls functions at the boot time to use lzcnt instruction. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/kernel/head_64.S |2 ++ arch/sparc/lib/NG4patch.S |9 + 2 files changed, 11 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 4de9fbd..f362ecb 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -640,6 +640,8 @@ niagara4_patch: nop callniagara4_patch_pageops nop + callniagara4_patch_fls +nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index 3cc0f8c..da65a3e 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -3,6 +3,8 @@ * Copyright (C) 2012 David S. Miller <da...@davemloft.net> */ +#include + #define BRANCH_ALWAYS 0x1068 #define NOP0x0100 #define NG_DO_PATCH(OLD, NEW) \ @@ -52,3 +54,10 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + +ENTRY(niagara4_patch_fls) + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop +ENDPROC(niagara4_patch_fls) -- 1.7.1
[PATCH v3 5/5] sparc64: Use sparc optimised fls and __fls for T4 and above
For T4 and above, patch fls and __fls functions at the boot time to use lzcnt instruction. Signed-off-by: Vijay Kumar --- arch/sparc/kernel/head_64.S |2 ++ arch/sparc/lib/NG4patch.S |9 + 2 files changed, 11 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 4de9fbd..f362ecb 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -640,6 +640,8 @@ niagara4_patch: nop callniagara4_patch_pageops nop + callniagara4_patch_fls +nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index 3cc0f8c..da65a3e 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -3,6 +3,8 @@ * Copyright (C) 2012 David S. Miller */ +#include + #define BRANCH_ALWAYS 0x1068 #define NOP0x0100 #define NG_DO_PATCH(OLD, NEW) \ @@ -52,3 +54,10 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + +ENTRY(niagara4_patch_fls) + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop +ENDPROC(niagara4_patch_fls) -- 1.7.1
[PATCH v3 0/5] sparc64: Optimize fls and __fls
SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, __fls and fls64 functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, __fls and fls64 functions. v2->v3: - Using ENTRY(), ENDPROC() for assembler functions. - Removed BITS_PER_LONG from __fls. - Using generic fls64(). - Replaced lzcnt instruction with .word directive. v1->v2: - Fixed delay slot issue. Vijay Kumar (2): sparc64: Define SPARC default fls and __fls sparc64: Use lzcnt instruction for fls and __fls arch/sparc/Makefile|1 + arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/kernel/head_64.S|2 + arch/sparc/lib/Makefile|4 + arch/sparc/lib/NG4fls.S| 31 + arch/sparc/lib/NG4patch.S |9 +++ arch/sparc/lib/fls.S | 126 7 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 arch/sparc/lib/NG4fls.S create mode 100644 arch/sparc/lib/fls.S
[PATCH v3 0/5] sparc64: Optimize fls and __fls
SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, __fls and fls64 functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, __fls and fls64 functions. v2->v3: - Using ENTRY(), ENDPROC() for assembler functions. - Removed BITS_PER_LONG from __fls. - Using generic fls64(). - Replaced lzcnt instruction with .word directive. v1->v2: - Fixed delay slot issue. Vijay Kumar (2): sparc64: Define SPARC default fls and __fls sparc64: Use lzcnt instruction for fls and __fls arch/sparc/Makefile|1 + arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/kernel/head_64.S|2 + arch/sparc/lib/Makefile|4 + arch/sparc/lib/NG4fls.S| 31 + arch/sparc/lib/NG4patch.S |9 +++ arch/sparc/lib/fls.S | 126 7 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 arch/sparc/lib/NG4fls.S create mode 100644 arch/sparc/lib/fls.S
[PATCH v3 2/5] sparc64: Define SPARC default __fls function
__fls will now require a boot time patching on T4 and above. Redefining it under arch/sparc/lib. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/include/asm/bitops_64.h |2 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls64.S | 61 3 files changed, 63 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 30aea56..d7a46e2 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -23,10 +23,10 @@ void change_bit(unsigned long nr, volatile unsigned long *addr); int fls(unsigned int word); +int __fls(unsigned long word); #include -#include #include #ifdef __KERNEL__ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 3b9f5e0..5380c59 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o +lib-$(CONFIG_SPARC64) += fls64.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S new file mode 100644 index 000..c83e22a --- /dev/null +++ b/arch/sparc/lib/fls64.S @@ -0,0 +1,61 @@ +/* fls64.S: SPARC default __fls definition. + * + * SPARC default __fls definition, which follows the same algorithm as + * in generic __fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include +#include + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(__fls) + mov -1, %g2 + sllx%g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f +mov63, %g1 + sllx%o0, 32, %o0 + mov 31, %g1 +1: + mov -1, %g2 + sllx%g2, 48, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f +mov-1, %g2 + sllx%o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx%g2, 56, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f +mov-1, %g2 + sllx%o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx%g2, 60, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f +mov-1, %g2 + sllx%o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx%g2, 62, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f +mov-1, %g3 + sllx%o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx%g3, 63, %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 +sra%g1, 0, %o0 +ENDPROC(__fls) +EXPORT_SYMBOL(__fls) -- 1.7.1
[PATCH v3 2/5] sparc64: Define SPARC default __fls function
__fls will now require a boot time patching on T4 and above. Redefining it under arch/sparc/lib. Signed-off-by: Vijay Kumar --- arch/sparc/include/asm/bitops_64.h |2 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls64.S | 61 3 files changed, 63 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 30aea56..d7a46e2 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -23,10 +23,10 @@ void change_bit(unsigned long nr, volatile unsigned long *addr); int fls(unsigned int word); +int __fls(unsigned long word); #include -#include #include #ifdef __KERNEL__ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 3b9f5e0..5380c59 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o +lib-$(CONFIG_SPARC64) += fls64.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S new file mode 100644 index 000..c83e22a --- /dev/null +++ b/arch/sparc/lib/fls64.S @@ -0,0 +1,61 @@ +/* fls64.S: SPARC default __fls definition. + * + * SPARC default __fls definition, which follows the same algorithm as + * in generic __fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include +#include + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(__fls) + mov -1, %g2 + sllx%g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f +mov63, %g1 + sllx%o0, 32, %o0 + mov 31, %g1 +1: + mov -1, %g2 + sllx%g2, 48, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f +mov-1, %g2 + sllx%o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx%g2, 56, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f +mov-1, %g2 + sllx%o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx%g2, 60, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f +mov-1, %g2 + sllx%o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx%g2, 62, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f +mov-1, %g3 + sllx%o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx%g3, 63, %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 +sra%g1, 0, %o0 +ENDPROC(__fls) +EXPORT_SYMBOL(__fls) -- 1.7.1
[PATCH v3 3/5] sparc64: SPARC optimised fls function
Defined SPARC optimised fls using lzcnt opcode. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/lib/Makefile |1 + arch/sparc/lib/NG4fls.S | 20 2 files changed, 21 insertions(+), 0 deletions(-) diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 5380c59..2823b8e 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += fls64.o +obj-$(CONFIG_SPARC64) += NG4fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000..5ed7da9 --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,20 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + +#include + +#define LXCNT_O0_G2\ + .word 0x85b002e8 + + .text + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(NG4fls) + LXCNT_O0_G2 !lzcnt %o0, %g2 + mov 64, %g3 + retl +sub%g3, %g2, %o0 +ENDPROC(NG4fls) -- 1.7.1
[PATCH v3 3/5] sparc64: SPARC optimised fls function
Defined SPARC optimised fls using lzcnt opcode. Signed-off-by: Vijay Kumar --- arch/sparc/lib/Makefile |1 + arch/sparc/lib/NG4fls.S | 20 2 files changed, 21 insertions(+), 0 deletions(-) diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 5380c59..2823b8e 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += fls64.o +obj-$(CONFIG_SPARC64) += NG4fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000..5ed7da9 --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,20 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + +#include + +#define LXCNT_O0_G2\ + .word 0x85b002e8 + + .text + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(NG4fls) + LXCNT_O0_G2 !lzcnt %o0, %g2 + mov 64, %g3 + retl +sub%g3, %g2, %o0 +ENDPROC(NG4fls) -- 1.7.1
[PATCH v3 4/5] sparc64: SPARC optimised __fls function
Defined SPARC optimised __fls using lzcnt opcode. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/lib/NG4fls.S | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S index 5ed7da9..34ad371 100644 --- a/arch/sparc/lib/NG4fls.S +++ b/arch/sparc/lib/NG4fls.S @@ -18,3 +18,13 @@ ENTRY(NG4fls) retl sub%g3, %g2, %o0 ENDPROC(NG4fls) + +ENTRY(__NG4fls) + brz,pn %o0, 1f + LXCNT_O0_G2 !lzcnt %o0, %g2 + mov 63, %g3 + sub %g3, %g2, %o0 +1: + retl +nop +ENDPROC(__NG4fls) -- 1.7.1
[PATCH v3 4/5] sparc64: SPARC optimised __fls function
Defined SPARC optimised __fls using lzcnt opcode. Signed-off-by: Vijay Kumar --- arch/sparc/lib/NG4fls.S | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S index 5ed7da9..34ad371 100644 --- a/arch/sparc/lib/NG4fls.S +++ b/arch/sparc/lib/NG4fls.S @@ -18,3 +18,13 @@ ENTRY(NG4fls) retl sub%g3, %g2, %o0 ENDPROC(NG4fls) + +ENTRY(__NG4fls) + brz,pn %o0, 1f + LXCNT_O0_G2 !lzcnt %o0, %g2 + mov 63, %g3 + sub %g3, %g2, %o0 +1: + retl +nop +ENDPROC(__NG4fls) -- 1.7.1
[PATCH v3 1/5] sparc64: Define SPARC default fls function
fls will now require a boot time patching on T4 and above. Redefining it under arch/sparc/lib. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/include/asm/bitops_64.h |3 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls.S | 67 3 files changed, 70 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 2d52240..30aea56 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -22,9 +22,10 @@ void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +int fls(unsigned int word); + #include -#include #include #include diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index a1a2d39..3b9f5e0 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000..06b8d30 --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,67 @@ +/* fls.S: SPARC default fls definition. + * + * SPARC default fls definition, which follows the same algorithm as + * in generic fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include +#include + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(fls) + brz,pn %o0, 6f +mov0, %o1 + sethi %hi(0x), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f +mov32, %o1 + sethi %hi(0xff00), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f +sethi %hi(0xf000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f +sethi %hi(0xc000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f +sll%o0, 2, %o0 +5: + xnor%g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 +sra%o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b +sra%o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff00), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b +mov16, %o1 + ba,pt %xcc, 1b +sll%o0, 8, %o0 +ENDPROC(fls) +EXPORT_SYMBOL(fls) -- 1.7.1
[PATCH v3 1/5] sparc64: Define SPARC default fls function
fls will now require a boot time patching on T4 and above. Redefining it under arch/sparc/lib. Signed-off-by: Vijay Kumar --- arch/sparc/include/asm/bitops_64.h |3 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls.S | 67 3 files changed, 70 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 2d52240..30aea56 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -22,9 +22,10 @@ void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +int fls(unsigned int word); + #include -#include #include #include diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index a1a2d39..3b9f5e0 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000..06b8d30 --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,67 @@ +/* fls.S: SPARC default fls definition. + * + * SPARC default fls definition, which follows the same algorithm as + * in generic fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include +#include + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(fls) + brz,pn %o0, 6f +mov0, %o1 + sethi %hi(0x), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f +mov32, %o1 + sethi %hi(0xff00), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f +sethi %hi(0xf000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f +sethi %hi(0xc000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f +sll%o0, 2, %o0 +5: + xnor%g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 +sra%o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b +sra%o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff00), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b +mov16, %o1 + ba,pt %xcc, 1b +sll%o0, 8, %o0 +ENDPROC(fls) +EXPORT_SYMBOL(fls) -- 1.7.1
Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
Hi Sam, On 9/27/2017 2:56 PM, Sam Ravnborg wrote: .size niagara4_patch_pageops,.-niagara4_patch_pageops + + .globl niagara4_patch_fls + .type niagara4_patch_fls,#function +niagara4_patch_fls: + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop + .size niagara4_patch_fls,.-niagara4_patch_fls Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch. Thanks for your comment. Sure, I will make the changes and address other comments as well in my revised version. - Vijay
Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
Hi Sam, On 9/27/2017 2:56 PM, Sam Ravnborg wrote: .size niagara4_patch_pageops,.-niagara4_patch_pageops + + .globl niagara4_patch_fls + .type niagara4_patch_fls,#function +niagara4_patch_fls: + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop + .size niagara4_patch_fls,.-niagara4_patch_fls Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a separate patch. Thanks for your comment. Sure, I will make the changes and address other comments as well in my revised version. - Vijay
[PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
For T4 and above, patch fls and __fls functions at the boot time to use lzcnt instruction. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> Reviewed-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/Makefile |1 + arch/sparc/kernel/head_64.S |2 ++ arch/sparc/lib/Makefile |3 +++ arch/sparc/lib/NG4fls.S | 30 ++ arch/sparc/lib/NG4patch.S |9 + 5 files changed, 45 insertions(+), 0 deletions(-) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 8496a07..0763cd8 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare KBUILD_CFLAGS += -Wa,--undeclared-regs KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3) KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs +KBUILD_AFLAGS += -Wa,-Asparc4 ifeq ($(CONFIG_MCOUNT),y) KBUILD_CFLAGS += -pg diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 78e0211..1165254 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -628,6 +628,8 @@ niagara4_patch: nop callniagara4_patch_pageops nop + callniagara4_patch_fls +nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index eefbb9c..72d2d8c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o obj-$(CONFIG_SPARC64) += PeeCeeI.o + +obj-$(CONFIG_SPARC64) += fls.o +obj-$(CONFIG_SPARC64) += NG4fls.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000..eb239aa --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,30 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + + .text + .align 32 + + .globl NG4fls + .globl __NG4fls + .type NG4fls, #function + .type __NG4fls, #function + +NG4fls: + lzcnt %o0, %o1 + mov 64, %o2 + retl +sub %o2, %o1, %o0 + .size NG4fls, .-NG4fls + +__NG4fls: + brz,pn %o0, 1f +mov%o0, %o1 + lzcnt %o1, %o0 + mov 63, %o2 + sub %o2, %o0, %o0 +1: + retl +nop + .size __NG4fls, .-__NG4fls diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index 3cc0f8c..1010d53 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -52,3 +52,12 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + + .globl niagara4_patch_fls + .type niagara4_patch_fls,#function +niagara4_patch_fls: + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop + .size niagara4_patch_fls,.-niagara4_patch_fls -- 1.7.1
[PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls
For T4 and above, patch fls and __fls functions at the boot time to use lzcnt instruction. Signed-off-by: Vijay Kumar Reviewed-by: Babu Moger --- arch/sparc/Makefile |1 + arch/sparc/kernel/head_64.S |2 ++ arch/sparc/lib/Makefile |3 +++ arch/sparc/lib/NG4fls.S | 30 ++ arch/sparc/lib/NG4patch.S |9 + 5 files changed, 45 insertions(+), 0 deletions(-) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 8496a07..0763cd8 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare KBUILD_CFLAGS += -Wa,--undeclared-regs KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3) KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs +KBUILD_AFLAGS += -Wa,-Asparc4 ifeq ($(CONFIG_MCOUNT),y) KBUILD_CFLAGS += -pg diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 78e0211..1165254 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -628,6 +628,8 @@ niagara4_patch: nop callniagara4_patch_pageops nop + callniagara4_patch_fls +nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index eefbb9c..72d2d8c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o obj-$(CONFIG_SPARC64) += PeeCeeI.o + +obj-$(CONFIG_SPARC64) += fls.o +obj-$(CONFIG_SPARC64) += NG4fls.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000..eb239aa --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,30 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + + .text + .align 32 + + .globl NG4fls + .globl __NG4fls + .type NG4fls, #function + .type __NG4fls, #function + +NG4fls: + lzcnt %o0, %o1 + mov 64, %o2 + retl +sub %o2, %o1, %o0 + .size NG4fls, .-NG4fls + +__NG4fls: + brz,pn %o0, 1f +mov%o0, %o1 + lzcnt %o1, %o0 + mov 63, %o2 + sub %o2, %o0, %o0 +1: + retl +nop + .size __NG4fls, .-__NG4fls diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index 3cc0f8c..1010d53 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -52,3 +52,12 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + + .globl niagara4_patch_fls + .type niagara4_patch_fls,#function +niagara4_patch_fls: + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop + .size niagara4_patch_fls,.-niagara4_patch_fls -- 1.7.1
[PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls
SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, fls64 and __fls functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, fls64 and __fls functions. v1->v2: - Fixed delay slot issue pointed by Rob Gardner in patch 2/2. Vijay Kumar (2): sparc64: Define SPARC default fls and __fls sparc64: Use lzcnt instruction for fls and __fls arch/sparc/Makefile|1 + arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/kernel/head_64.S|2 + arch/sparc/lib/Makefile|4 + arch/sparc/lib/NG4fls.S| 31 + arch/sparc/lib/NG4patch.S |9 +++ arch/sparc/lib/fls.S | 126 7 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 arch/sparc/lib/NG4fls.S create mode 100644 arch/sparc/lib/fls.S
[PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls
SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, fls64 and __fls functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, fls64 and __fls functions. v1->v2: - Fixed delay slot issue pointed by Rob Gardner in patch 2/2. Vijay Kumar (2): sparc64: Define SPARC default fls and __fls sparc64: Use lzcnt instruction for fls and __fls arch/sparc/Makefile|1 + arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/kernel/head_64.S|2 + arch/sparc/lib/Makefile|4 + arch/sparc/lib/NG4fls.S| 31 + arch/sparc/lib/NG4patch.S |9 +++ arch/sparc/lib/fls.S | 126 7 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 arch/sparc/lib/NG4fls.S create mode 100644 arch/sparc/lib/fls.S
[PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
fls and __fls will now require boot time patching on T4 and above. Redefining these functions under arc/sparc/lib. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> Reviewed-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls.S | 126 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 2d52240..946c236 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -22,11 +22,12 @@ void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +#define fls64(word) (((word)?(__fls(word) + 1):0)) +int fls(unsigned int word); +int __fls(unsigned long word); + #include -#include -#include -#include #ifdef __KERNEL__ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 07c03e7..eefbb9c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000..a19bff2 --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,126 @@ +/* fls.S: SPARC default fls and __fls definitions. + * + * SPARC default fls and __fls definitions, which follows the same + * algorithm as in generic fls() and __fls(). These functions will + * be boot time patched on T4 and onward. + */ + +#include +#include + + .text + .align 32 + + .global fls, __fls + .type fls,#function + .type __fls, #function + + .register %g2, #scratch + .register %g3, #scratch + +EXPORT_SYMBOL(__fls) +EXPORT_SYMBOL(fls) + +fls: + brz,pn %o0, 6f +mov0, %o1 + sethi %hi(0x), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f +mov32, %o1 + sethi %hi(0xff00), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f +sethi %hi(0xf000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f +sethi %hi(0xc000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f +sll%o0, 2, %o0 +5: + xnor%g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 +sra%o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b +sra%o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff00), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b +mov16, %o1 + ba,pt %xcc, 1b +sll%o0, 8, %o0 + .size fls, .-fls + +__fls: +#if BITS_PER_LONG == 64 + mov -1, %g2 + sllx%g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f +mov63, %g1 + sllx%o0, 32, %o0 +#endif + mov 31, %g1 +1: + mov -1, %g2 + sllx%g2, (BITS_PER_LONG-16), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f +mov-1, %g2 + sllx%o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx%g2, (BITS_PER_LONG-8), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f +mov-1, %g2 + sllx%o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx%g2, (BITS_PER_LONG-4), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f +mov-1, %g2 + sllx%o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx%g2, (BITS_PER_LONG-2), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f +mov-1, %g3 + sllx%o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx%g3, (BITS_PER_LONG-1), %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 +sra%g1, 0, %o0 + .size __fls, .-__fls -- 1.7.1
[PATCH v2 1/2] sparc64: Define SPARC default fls and __fls
fls and __fls will now require boot time patching on T4 and above. Redefining these functions under arc/sparc/lib. Signed-off-by: Vijay Kumar Reviewed-by: Babu Moger --- arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls.S | 126 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 2d52240..946c236 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -22,11 +22,12 @@ void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +#define fls64(word) (((word)?(__fls(word) + 1):0)) +int fls(unsigned int word); +int __fls(unsigned long word); + #include -#include -#include -#include #ifdef __KERNEL__ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 07c03e7..eefbb9c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000..a19bff2 --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,126 @@ +/* fls.S: SPARC default fls and __fls definitions. + * + * SPARC default fls and __fls definitions, which follows the same + * algorithm as in generic fls() and __fls(). These functions will + * be boot time patched on T4 and onward. + */ + +#include +#include + + .text + .align 32 + + .global fls, __fls + .type fls,#function + .type __fls, #function + + .register %g2, #scratch + .register %g3, #scratch + +EXPORT_SYMBOL(__fls) +EXPORT_SYMBOL(fls) + +fls: + brz,pn %o0, 6f +mov0, %o1 + sethi %hi(0x), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f +mov32, %o1 + sethi %hi(0xff00), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f +sethi %hi(0xf000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f +sethi %hi(0xc000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f +sll%o0, 2, %o0 +5: + xnor%g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 +sra%o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b +sra%o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff00), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b +mov16, %o1 + ba,pt %xcc, 1b +sll%o0, 8, %o0 + .size fls, .-fls + +__fls: +#if BITS_PER_LONG == 64 + mov -1, %g2 + sllx%g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f +mov63, %g1 + sllx%o0, 32, %o0 +#endif + mov 31, %g1 +1: + mov -1, %g2 + sllx%g2, (BITS_PER_LONG-16), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f +mov-1, %g2 + sllx%o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx%g2, (BITS_PER_LONG-8), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f +mov-1, %g2 + sllx%o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx%g2, (BITS_PER_LONG-4), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f +mov-1, %g2 + sllx%o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx%g2, (BITS_PER_LONG-2), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f +mov-1, %g3 + sllx%o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx%g3, (BITS_PER_LONG-1), %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 +sra%g1, 0, %o0 + .size __fls, .-__fls -- 1.7.1
[PATCH 1/2] sparc64: Define SPARC default fls and __fls
fls and __fls will now require boot time patching on T4 and above. Redefining these functions under arc/sparc/lib. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> Reviewed-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls.S | 126 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 2d52240..946c236 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -22,11 +22,12 @@ void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +#define fls64(word) (((word)?(__fls(word) + 1):0)) +int fls(unsigned int word); +int __fls(unsigned long word); + #include -#include -#include -#include #ifdef __KERNEL__ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 07c03e7..eefbb9c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000..a19bff2 --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,126 @@ +/* fls.S: SPARC default fls and __fls definitions. + * + * SPARC default fls and __fls definitions, which follows the same + * algorithm as in generic fls() and __fls(). These functions will + * be boot time patched on T4 and onward. + */ + +#include +#include + + .text + .align 32 + + .global fls, __fls + .type fls,#function + .type __fls, #function + + .register %g2, #scratch + .register %g3, #scratch + +EXPORT_SYMBOL(__fls) +EXPORT_SYMBOL(fls) + +fls: + brz,pn %o0, 6f +mov0, %o1 + sethi %hi(0x), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f +mov32, %o1 + sethi %hi(0xff00), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f +sethi %hi(0xf000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f +sethi %hi(0xc000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f +sll%o0, 2, %o0 +5: + xnor%g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 +sra%o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b +sra%o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff00), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b +mov16, %o1 + ba,pt %xcc, 1b +sll%o0, 8, %o0 + .size fls, .-fls + +__fls: +#if BITS_PER_LONG == 64 + mov -1, %g2 + sllx%g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f +mov63, %g1 + sllx%o0, 32, %o0 +#endif + mov 31, %g1 +1: + mov -1, %g2 + sllx%g2, (BITS_PER_LONG-16), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f +mov-1, %g2 + sllx%o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx%g2, (BITS_PER_LONG-8), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f +mov-1, %g2 + sllx%o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx%g2, (BITS_PER_LONG-4), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f +mov-1, %g2 + sllx%o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx%g2, (BITS_PER_LONG-2), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f +mov-1, %g3 + sllx%o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx%g3, (BITS_PER_LONG-1), %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 +sra%g1, 0, %o0 + .size __fls, .-__fls -- 1.7.1
[PATCH 1/2] sparc64: Define SPARC default fls and __fls
fls and __fls will now require boot time patching on T4 and above. Redefining these functions under arc/sparc/lib. Signed-off-by: Vijay Kumar Reviewed-by: Babu Moger --- arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/lib/Makefile|1 + arch/sparc/lib/fls.S | 126 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index 2d52240..946c236 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -22,11 +22,12 @@ void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +#define fls64(word) (((word)?(__fls(word) + 1):0)) +int fls(unsigned int word); +int __fls(unsigned long word); + #include -#include -#include -#include #ifdef __KERNEL__ diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 07c03e7..eefbb9c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000..a19bff2 --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,126 @@ +/* fls.S: SPARC default fls and __fls definitions. + * + * SPARC default fls and __fls definitions, which follows the same + * algorithm as in generic fls() and __fls(). These functions will + * be boot time patched on T4 and onward. + */ + +#include +#include + + .text + .align 32 + + .global fls, __fls + .type fls,#function + .type __fls, #function + + .register %g2, #scratch + .register %g3, #scratch + +EXPORT_SYMBOL(__fls) +EXPORT_SYMBOL(fls) + +fls: + brz,pn %o0, 6f +mov0, %o1 + sethi %hi(0x), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f +mov32, %o1 + sethi %hi(0xff00), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f +sethi %hi(0xf000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f +sethi %hi(0xc000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f +sll%o0, 2, %o0 +5: + xnor%g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 +sra%o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b +sra%o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff00), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b +mov16, %o1 + ba,pt %xcc, 1b +sll%o0, 8, %o0 + .size fls, .-fls + +__fls: +#if BITS_PER_LONG == 64 + mov -1, %g2 + sllx%g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f +mov63, %g1 + sllx%o0, 32, %o0 +#endif + mov 31, %g1 +1: + mov -1, %g2 + sllx%g2, (BITS_PER_LONG-16), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f +mov-1, %g2 + sllx%o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx%g2, (BITS_PER_LONG-8), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f +mov-1, %g2 + sllx%o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx%g2, (BITS_PER_LONG-4), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f +mov-1, %g2 + sllx%o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx%g2, (BITS_PER_LONG-2), %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f +mov-1, %g3 + sllx%o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx%g3, (BITS_PER_LONG-1), %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 +sra%g1, 0, %o0 + .size __fls, .-__fls -- 1.7.1
[PATCH 2/2] sparc64: Use lzcnt instruction for fls and __fls
For T4 and above, patch fls and __fls functions at the boot time to use lzcnt instruction. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> Reviewed-by: Babu Moger <babu.mo...@oracle.com> --- arch/sparc/Makefile |1 + arch/sparc/kernel/head_64.S |2 ++ arch/sparc/lib/Makefile |3 +++ arch/sparc/lib/NG4fls.S | 31 +++ arch/sparc/lib/NG4patch.S |9 + 5 files changed, 46 insertions(+), 0 deletions(-) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 8496a07..0763cd8 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare KBUILD_CFLAGS += -Wa,--undeclared-regs KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3) KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs +KBUILD_AFLAGS += -Wa,-Asparc4 ifeq ($(CONFIG_MCOUNT),y) KBUILD_CFLAGS += -pg diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 78e0211..1165254 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -628,6 +628,8 @@ niagara4_patch: nop callniagara4_patch_pageops nop + callniagara4_patch_fls +nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index eefbb9c..72d2d8c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o obj-$(CONFIG_SPARC64) += PeeCeeI.o + +obj-$(CONFIG_SPARC64) += fls.o +obj-$(CONFIG_SPARC64) += NG4fls.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000..7c2cfb3 --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,31 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + + .text + .align 32 + + .globl NG4fls + .globl __NG4fls + .type NG4fls, #function + .type __NG4fls, #function + +NG4fls: + lzcnt %o0, %o1 + mov 64, %o2 + sub %o2, %o1, %o0 + retl + .size NG4fls, .-NG4fls + +__NG4fls: + brz,pn %o0, 1f +mov%o0, %o1 + lzcnt %o1, %o0 + mov 63, %o2 + sub %o2, %o0, %o0 +1: + retl + nop + nop + .size __NG4fls, .-__NG4fls diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index 3cc0f8c..1010d53 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -52,3 +52,12 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + + .globl niagara4_patch_fls + .type niagara4_patch_fls,#function +niagara4_patch_fls: + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop + .size niagara4_patch_fls,.-niagara4_patch_fls -- 1.7.1
[PATCH 2/2] sparc64: Use lzcnt instruction for fls and __fls
For T4 and above, patch fls and __fls functions at the boot time to use lzcnt instruction. Signed-off-by: Vijay Kumar Reviewed-by: Babu Moger --- arch/sparc/Makefile |1 + arch/sparc/kernel/head_64.S |2 ++ arch/sparc/lib/Makefile |3 +++ arch/sparc/lib/NG4fls.S | 31 +++ arch/sparc/lib/NG4patch.S |9 + 5 files changed, 46 insertions(+), 0 deletions(-) diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index 8496a07..0763cd8 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare KBUILD_CFLAGS += -Wa,--undeclared-regs KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3) KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs +KBUILD_AFLAGS += -Wa,-Asparc4 ifeq ($(CONFIG_MCOUNT),y) KBUILD_CFLAGS += -pg diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 78e0211..1165254 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -628,6 +628,8 @@ niagara4_patch: nop callniagara4_patch_pageops nop + callniagara4_patch_fls +nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index eefbb9c..72d2d8c 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o obj-$(CONFIG_SPARC64) += PeeCeeI.o + +obj-$(CONFIG_SPARC64) += fls.o +obj-$(CONFIG_SPARC64) += NG4fls.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000..7c2cfb3 --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,31 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + + .text + .align 32 + + .globl NG4fls + .globl __NG4fls + .type NG4fls, #function + .type __NG4fls, #function + +NG4fls: + lzcnt %o0, %o1 + mov 64, %o2 + sub %o2, %o1, %o0 + retl + .size NG4fls, .-NG4fls + +__NG4fls: + brz,pn %o0, 1f +mov%o0, %o1 + lzcnt %o1, %o0 + mov 63, %o2 + sub %o2, %o0, %o0 +1: + retl + nop + nop + .size __NG4fls, .-__NG4fls diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index 3cc0f8c..1010d53 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -52,3 +52,12 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + + .globl niagara4_patch_fls + .type niagara4_patch_fls,#function +niagara4_patch_fls: + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl +nop + .size niagara4_patch_fls,.-niagara4_patch_fls -- 1.7.1
[PATCH 0/2] sparc64: Optimize fls, fls64 and __fls
SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, fls64 and __fls functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, fls64 and __fls functions. Vijay Kumar (2): sparc64: Define SPARC default fls and __fls sparc64: Use lzcnt instruction for fls and __fls arch/sparc/Makefile|1 + arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/kernel/head_64.S|2 + arch/sparc/lib/Makefile|4 + arch/sparc/lib/NG4fls.S| 31 + arch/sparc/lib/NG4patch.S |9 +++ arch/sparc/lib/fls.S | 126 7 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 arch/sparc/lib/NG4fls.S create mode 100644 arch/sparc/lib/fls.S
[PATCH 0/2] sparc64: Optimize fls, fls64 and __fls
SPARC provides lzcnt instruction (with VIS3) which can be used to optimize fls, fls64 and __fls functions. For the systems that supports lzcnt instruction, we now do boot time patching to use sparc optimized fls, fls64 and __fls functions. Vijay Kumar (2): sparc64: Define SPARC default fls and __fls sparc64: Use lzcnt instruction for fls and __fls arch/sparc/Makefile|1 + arch/sparc/include/asm/bitops_64.h |7 +- arch/sparc/kernel/head_64.S|2 + arch/sparc/lib/Makefile|4 + arch/sparc/lib/NG4fls.S| 31 + arch/sparc/lib/NG4patch.S |9 +++ arch/sparc/lib/fls.S | 126 7 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 arch/sparc/lib/NG4fls.S create mode 100644 arch/sparc/lib/fls.S
Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu
On 7/20/2017 10:45 PM, David Miller wrote: From: Vijay Kumar <vijay.ac.ku...@oracle.com> Date: Thu, 20 Jul 2017 22:36:42 -0500 I can give a try :). But looks to me one thing that will go wrong is irq accounting done in __irq_enter() and rcu_irq_enter(). Actually, the bigger problem is that scheduler_ipi() can raise a software interrupt, and nothing will invoke it. Yes, I see your point. It's turning quite ugly to avoid the IRQ overhead, I must admit. So ignore this for now. In the longer term a probably cleaner way to do this is to have a special direct version of scheduler_ipi() that invokes all the necessary work, even the rebalance softirq, directly rather than indirectly. Sure. Thanks.
Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu
On 7/20/2017 10:45 PM, David Miller wrote: From: Vijay Kumar Date: Thu, 20 Jul 2017 22:36:42 -0500 I can give a try :). But looks to me one thing that will go wrong is irq accounting done in __irq_enter() and rcu_irq_enter(). Actually, the bigger problem is that scheduler_ipi() can raise a software interrupt, and nothing will invoke it. Yes, I see your point. It's turning quite ugly to avoid the IRQ overhead, I must admit. So ignore this for now. In the longer term a probably cleaner way to do this is to have a special direct version of scheduler_ipi() that invokes all the necessary work, even the rebalance softirq, directly rather than indirectly. Sure. Thanks.
Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu
On 7/20/2017 9:55 PM, David Miller wrote: From: Vijay Kumar <vijay.ac.ku...@oracle.com> Date: Thu, 20 Jul 2017 21:44:24 -0500 I had same thoughts initially but I had to go with this approach as scheduler_ipi is wrapped with irq_enter() and irq_exit(). Whereas POKE resumes the cpu in process context. Comments in scheduler_ipi(): * Not all reschedule IPI handlers call irq_enter/irq_exit, since * traditionally all their work was done from the interrupt return * path. Now that we actually do some work, we need to make sure * we do call them. * * Some archs already do call them, luckily irq_enter/exit nest * properly. * * Arguably we should visit all archs and update all handlers, * however a fair share of IPIs are still resched only so this would * somewhat pessimize the simple resched case. */ irq_enter(); I still think we should be able to fake the state such that this direct schedule_ipi() call will work. I could be wrong :) I can give a try :). But looks to me one thing that will go wrong is irq accounting done in __irq_enter() and rcu_irq_enter(). Thanks, Vijay
Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu
On 7/20/2017 9:55 PM, David Miller wrote: From: Vijay Kumar Date: Thu, 20 Jul 2017 21:44:24 -0500 I had same thoughts initially but I had to go with this approach as scheduler_ipi is wrapped with irq_enter() and irq_exit(). Whereas POKE resumes the cpu in process context. Comments in scheduler_ipi(): * Not all reschedule IPI handlers call irq_enter/irq_exit, since * traditionally all their work was done from the interrupt return * path. Now that we actually do some work, we need to make sure * we do call them. * * Some archs already do call them, luckily irq_enter/exit nest * properly. * * Arguably we should visit all archs and update all handlers, * however a fair share of IPIs are still resched only so this would * somewhat pessimize the simple resched case. */ irq_enter(); I still think we should be able to fake the state such that this direct schedule_ipi() call will work. I could be wrong :) I can give a try :). But looks to me one thing that will go wrong is irq accounting done in __irq_enter() and rcu_irq_enter(). Thanks, Vijay
Re: [PATCH 2/2] sparc64: Use cpu_poke to resume idle cpu
On 7/20/2017 2:58 PM, David Miller wrote: From: Vijay Kumar <vijay.ac.ku...@oracle.com> Date: Sat, 8 Jul 2017 14:23:44 -0600 diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 2677312..0b070d5 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -189,7 +189,7 @@ void __init sun4v_hvapi_init(void) group = HV_GRP_CORE; major = 1; - minor = 1; + minor = 6; /* CPU POKE */ if (sun4v_hvapi_register(group, major, )) goto bad; That CPU POKE comment will not stand the test of time, please remove it. + /* Use cpu poke to resume idle cpu if supported*/ Please put a space at the end of the comment and before the "*/" + /*cpu poke is registered. */ Please put a space at the beginning of the comment. And you should decide which way you want to consistently write. Either capitalize the first word and finish the sentence with a '.', or don't. Do it the same way each time. Thanks. Sure, I will fix these in v2. Thanks, -Vijay
Re: [PATCH 2/2] sparc64: Use cpu_poke to resume idle cpu
On 7/20/2017 2:58 PM, David Miller wrote: From: Vijay Kumar Date: Sat, 8 Jul 2017 14:23:44 -0600 diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 2677312..0b070d5 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -189,7 +189,7 @@ void __init sun4v_hvapi_init(void) group = HV_GRP_CORE; major = 1; - minor = 1; + minor = 6; /* CPU POKE */ if (sun4v_hvapi_register(group, major, )) goto bad; That CPU POKE comment will not stand the test of time, please remove it. + /* Use cpu poke to resume idle cpu if supported*/ Please put a space at the end of the comment and before the "*/" + /*cpu poke is registered. */ Please put a space at the beginning of the comment. And you should decide which way you want to consistently write. Either capitalize the first word and finish the sentence with a '.', or don't. Do it the same way each time. Thanks. Sure, I will fix these in v2. Thanks, -Vijay
Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu
On 7/20/2017 2:57 PM, David Miller wrote: From: Vijay Kumar <vijay.ac.ku...@oracle.com> Date: Sat, 8 Jul 2017 14:23:42 -0600 cpu_poke is a low latency path to resume the target cpu if suspended using cpu_yield. Use cpu poke to resume cpu if supported by hypervisor. hackbench results (lower is better): Number of Process:w/o fix with fix 1 0.0120.010 10 0.0210.019 100 0.1510.148 So this only works for a cpu which has yielded. The kernel sends reschedule events to both idle and non-idle cpus. That's why you have to have that fallback code to still send the mondo IPI right? That is correct. For the case where POKE works, it seems like completely unnecessary overhead to set the PIL interrupt. Just disable local cpu interrupts and call schedule_ipi() directly. I bet that improves your benchmark even more. I had same thoughts initially but I had to go with this approach as scheduler_ipi is wrapped with irq_enter() and irq_exit(). Whereas POKE resumes the cpu in process context. Comments in scheduler_ipi(): * Not all reschedule IPI handlers call irq_enter/irq_exit, since * traditionally all their work was done from the interrupt return * path. Now that we actually do some work, we need to make sure * we do call them. * * Some archs already do call them, luckily irq_enter/exit nest * properly. * * Arguably we should visit all archs and update all handlers, * however a fair share of IPIs are still resched only so this would * somewhat pessimize the simple resched case. */ irq_enter(); -Vijay
Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu
On 7/20/2017 2:57 PM, David Miller wrote: From: Vijay Kumar Date: Sat, 8 Jul 2017 14:23:42 -0600 cpu_poke is a low latency path to resume the target cpu if suspended using cpu_yield. Use cpu poke to resume cpu if supported by hypervisor. hackbench results (lower is better): Number of Process:w/o fix with fix 1 0.0120.010 10 0.0210.019 100 0.1510.148 So this only works for a cpu which has yielded. The kernel sends reschedule events to both idle and non-idle cpus. That's why you have to have that fallback code to still send the mondo IPI right? That is correct. For the case where POKE works, it seems like completely unnecessary overhead to set the PIL interrupt. Just disable local cpu interrupts and call schedule_ipi() directly. I bet that improves your benchmark even more. I had same thoughts initially but I had to go with this approach as scheduler_ipi is wrapped with irq_enter() and irq_exit(). Whereas POKE resumes the cpu in process context. Comments in scheduler_ipi(): * Not all reschedule IPI handlers call irq_enter/irq_exit, since * traditionally all their work was done from the interrupt return * path. Now that we actually do some work, we need to make sure * we do call them. * * Some archs already do call them, luckily irq_enter/exit nest * properly. * * Arguably we should visit all archs and update all handlers, * however a fair share of IPIs are still resched only so this would * somewhat pessimize the simple resched case. */ irq_enter(); -Vijay
[PATCH 0/2] sparc64: Use low latency path to resume idle cpu
cpu_poke is a low latency path to resume the target cpu if suspended using cpu_yield. Use cpu poke to resume cpu if supported by hypervisor. hackbench results (lower is better): Number of Process:w/o fix with fix 1 0.0120.010 10 0.0210.019 100 0.1510.148 Vijay Kumar (2): sparc64: Add a new hypercall CPU_POKE sparc64: Use cpu_poke to resume idle cpu arch/sparc/include/asm/hypervisor.h | 17 arch/sparc/include/asm/smp_64.h |5 ++ arch/sparc/kernel/hvapi.c |9 arch/sparc/kernel/hvcalls.S | 11 + arch/sparc/kernel/process_64.c |7 +++- arch/sparc/kernel/setup_64.c|1 + arch/sparc/kernel/smp_64.c | 75 +- 7 files changed, 121 insertions(+), 4 deletions(-) --
[PATCH 0/2] sparc64: Use low latency path to resume idle cpu
cpu_poke is a low latency path to resume the target cpu if suspended using cpu_yield. Use cpu poke to resume cpu if supported by hypervisor. hackbench results (lower is better): Number of Process:w/o fix with fix 1 0.0120.010 10 0.0210.019 100 0.1510.148 Vijay Kumar (2): sparc64: Add a new hypercall CPU_POKE sparc64: Use cpu_poke to resume idle cpu arch/sparc/include/asm/hypervisor.h | 17 arch/sparc/include/asm/smp_64.h |5 ++ arch/sparc/kernel/hvapi.c |9 arch/sparc/kernel/hvcalls.S | 11 + arch/sparc/kernel/process_64.c |7 +++- arch/sparc/kernel/setup_64.c|1 + arch/sparc/kernel/smp_64.c | 75 +- 7 files changed, 121 insertions(+), 4 deletions(-) --
[PATCH 2/2] sparc64: Use cpu_poke to resume idle cpu
Use cpu_poke hypervisor call to resume idle cpu if supported. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> Reviewed-by: Anthony Yznaga <anthony.yzn...@oracle.com> --- arch/sparc/include/asm/smp_64.h |5 ++ arch/sparc/kernel/hvapi.c |2 +- arch/sparc/kernel/process_64.c |7 +++- arch/sparc/kernel/setup_64.c|1 + arch/sparc/kernel/smp_64.c | 80 +- 5 files changed, 90 insertions(+), 5 deletions(-) diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index ce2233f..a750892 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -33,6 +33,9 @@ DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); extern cpumask_t cpu_core_map[NR_CPUS]; +void smp_init_cpu_poke(void); +void scheduler_poke(void); + void arch_send_call_function_single_ipi(int cpu); void arch_send_call_function_ipi_mask(const struct cpumask *mask); @@ -74,6 +77,8 @@ #define smp_fetch_global_regs() do { } while (0) #define smp_fetch_global_pmu() do { } while (0) #define smp_fill_in_cpu_possible_map() do { } while (0) +#define smp_init_cpu_poke() do { } while (0) +#define scheduler_poke() do { } while (0) #endif /* !(CONFIG_SMP) */ diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 2677312..0b070d5 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -189,7 +189,7 @@ void __init sun4v_hvapi_init(void) group = HV_GRP_CORE; major = 1; - minor = 1; + minor = 6; /* CPU POKE */ if (sun4v_hvapi_register(group, major, )) goto bad; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 1badc49..92448af 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -77,8 +77,13 @@ void arch_cpu_idle(void) : "=" (pstate) : "i" (PSTATE_IE)); - if (!need_resched() && !cpu_is_offline(smp_processor_id())) + if (!need_resched() && !cpu_is_offline(smp_processor_id())) { sun4v_cpu_yield(); + /* If resumed by cpu_poke then we need to explicitly +* call scheduler_ipi(). +*/ + scheduler_poke(); + } /* Re-enable interrupts. */ __asm__ __volatile__( diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 422b178..4ff9fd8 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -356,6 +356,7 @@ void __init start_early_boot(void) check_if_starfire(); per_cpu_patch(); sun4v_patch(); + smp_init_cpu_poke(); cpu = hard_smp_processor_id(); if (cpu >= NR_CPUS) { diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index fdf3104..9c3131b 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -74,6 +74,9 @@ static cpumask_t smp_commenced_mask; +static DEFINE_PER_CPU(bool, poke); +static bool cpu_poke; + void smp_info(struct seq_file *m) { int i; @@ -1394,15 +1397,86 @@ void __init smp_cpus_done(unsigned int max_cpus) { } +static void send_cpu_ipi(int cpu) +{ + xcall_deliver((u64) _receive_signal, + 0, 0, cpumask_of(cpu)); +} + +void scheduler_poke(void) +{ + if (!cpu_poke) + return; + + if (!__this_cpu_read(poke)) + return; + + __this_cpu_write(poke, false); + set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); +} + +static unsigned long send_cpu_poke(int cpu) +{ + unsigned long hv_err; + + per_cpu(poke, cpu) = true; + hv_err = sun4v_cpu_poke(cpu); + if (hv_err != HV_EOK) { + per_cpu(poke, cpu) = false; + pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n", + __func__, hv_err); + } + + return hv_err; +} + void smp_send_reschedule(int cpu) { if (cpu == smp_processor_id()) { WARN_ON_ONCE(preemptible()); set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); - } else { - xcall_deliver((u64) _receive_signal, - 0, 0, cpumask_of(cpu)); + return; + } + + /* Use cpu poke to resume idle cpu if supported*/ + if (cpu_poke && idle_cpu(cpu)) { + unsigned long ret; + + ret = send_cpu_poke(cpu); + if (ret == HV_EOK) + return; } + + /* Use IPI in following cases: +* - cpu poke not supported +* - cpu not idle +* - send_cpu_poke() returns with error. +*/ + send_cpu_ipi(cpu); +} + +void smp_init_cpu_poke(void) +{ + unsigned long major; +
[PATCH 1/2] sparc64: Add a new hypercall CPU_POKE
This adds a new hypercall CPU_POKE for quickly waking up an idle CPU. CPU POKE should only be sent to valid non-local CPUs. Signed-off-by: Rob Gardner <rob.gard...@oracle.com> Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> Reviewed-by: Anthony Yznaga <anthony.yzn...@oracle.com> --- arch/sparc/include/asm/hypervisor.h | 18 ++ arch/sparc/kernel/hvcalls.S | 11 +++ 2 files changed, 29 insertions(+), 0 deletions(-) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 73cb897..3dc9215 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -298,6 +298,24 @@ unsigned long sun4v_cpu_start(unsigned long cpuid, unsigned long sun4v_cpu_yield(void); #endif +/* cpu_poke() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_POKE + * RET0: status + * ERRORS: ENOCPU cpuid refers to a CPU that does not exist + * EINVAL cpuid is current CPU + * + * Poke CPU cpuid. If the target CPU is currently suspended having + * invoked the cpu-yield service, that vCPU will be resumed. + * Poke interrupts may only be sent to valid, non-local CPUs. + * It is not legal to poke the current vCPU. + */ +#define HV_FAST_CPU_POKE0x13 + +#ifndef __ASSEMBLY__ +unsigned long sun4v_cpu_poke(unsigned long cpuid); +#endif + /* cpu_qconf() * TRAP: HV_FAST_TRAP * FUNCTION: HV_FAST_CPU_QCONF diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index 4116ee5..e57007f 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -106,6 +106,17 @@ ENTRY(sun4v_cpu_yield) nop ENDPROC(sun4v_cpu_yield) + /* %o0: cpuid +* +* returns %o0: status +*/ +ENTRY(sun4v_cpu_poke) + mov HV_FAST_CPU_POKE, %o5 + ta HV_FAST_TRAP + retl +nop +ENDPROC(sun4v_cpu_poke) + /* %o0: type * %o1: queue paddr * %o2: num queue entries -- 1.7.1
[PATCH 2/2] sparc64: Use cpu_poke to resume idle cpu
Use cpu_poke hypervisor call to resume idle cpu if supported. Signed-off-by: Vijay Kumar Reviewed-by: Anthony Yznaga --- arch/sparc/include/asm/smp_64.h |5 ++ arch/sparc/kernel/hvapi.c |2 +- arch/sparc/kernel/process_64.c |7 +++- arch/sparc/kernel/setup_64.c|1 + arch/sparc/kernel/smp_64.c | 80 +- 5 files changed, 90 insertions(+), 5 deletions(-) diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index ce2233f..a750892 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -33,6 +33,9 @@ DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); extern cpumask_t cpu_core_map[NR_CPUS]; +void smp_init_cpu_poke(void); +void scheduler_poke(void); + void arch_send_call_function_single_ipi(int cpu); void arch_send_call_function_ipi_mask(const struct cpumask *mask); @@ -74,6 +77,8 @@ #define smp_fetch_global_regs() do { } while (0) #define smp_fetch_global_pmu() do { } while (0) #define smp_fill_in_cpu_possible_map() do { } while (0) +#define smp_init_cpu_poke() do { } while (0) +#define scheduler_poke() do { } while (0) #endif /* !(CONFIG_SMP) */ diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 2677312..0b070d5 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -189,7 +189,7 @@ void __init sun4v_hvapi_init(void) group = HV_GRP_CORE; major = 1; - minor = 1; + minor = 6; /* CPU POKE */ if (sun4v_hvapi_register(group, major, )) goto bad; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 1badc49..92448af 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -77,8 +77,13 @@ void arch_cpu_idle(void) : "=" (pstate) : "i" (PSTATE_IE)); - if (!need_resched() && !cpu_is_offline(smp_processor_id())) + if (!need_resched() && !cpu_is_offline(smp_processor_id())) { sun4v_cpu_yield(); + /* If resumed by cpu_poke then we need to explicitly +* call scheduler_ipi(). +*/ + scheduler_poke(); + } /* Re-enable interrupts. */ __asm__ __volatile__( diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 422b178..4ff9fd8 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -356,6 +356,7 @@ void __init start_early_boot(void) check_if_starfire(); per_cpu_patch(); sun4v_patch(); + smp_init_cpu_poke(); cpu = hard_smp_processor_id(); if (cpu >= NR_CPUS) { diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index fdf3104..9c3131b 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -74,6 +74,9 @@ static cpumask_t smp_commenced_mask; +static DEFINE_PER_CPU(bool, poke); +static bool cpu_poke; + void smp_info(struct seq_file *m) { int i; @@ -1394,15 +1397,86 @@ void __init smp_cpus_done(unsigned int max_cpus) { } +static void send_cpu_ipi(int cpu) +{ + xcall_deliver((u64) _receive_signal, + 0, 0, cpumask_of(cpu)); +} + +void scheduler_poke(void) +{ + if (!cpu_poke) + return; + + if (!__this_cpu_read(poke)) + return; + + __this_cpu_write(poke, false); + set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); +} + +static unsigned long send_cpu_poke(int cpu) +{ + unsigned long hv_err; + + per_cpu(poke, cpu) = true; + hv_err = sun4v_cpu_poke(cpu); + if (hv_err != HV_EOK) { + per_cpu(poke, cpu) = false; + pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n", + __func__, hv_err); + } + + return hv_err; +} + void smp_send_reschedule(int cpu) { if (cpu == smp_processor_id()) { WARN_ON_ONCE(preemptible()); set_softint(1 << PIL_SMP_RECEIVE_SIGNAL); - } else { - xcall_deliver((u64) _receive_signal, - 0, 0, cpumask_of(cpu)); + return; + } + + /* Use cpu poke to resume idle cpu if supported*/ + if (cpu_poke && idle_cpu(cpu)) { + unsigned long ret; + + ret = send_cpu_poke(cpu); + if (ret == HV_EOK) + return; } + + /* Use IPI in following cases: +* - cpu poke not supported +* - cpu not idle +* - send_cpu_poke() returns with error. +*/ + send_cpu_ipi(cpu); +} + +void smp_init_cpu_poke(void) +{ + unsigned long major; + unsigned long minor; + int ret; + + if (tlb_type !
[PATCH 1/2] sparc64: Add a new hypercall CPU_POKE
This adds a new hypercall CPU_POKE for quickly waking up an idle CPU. CPU POKE should only be sent to valid non-local CPUs. Signed-off-by: Rob Gardner Signed-off-by: Vijay Kumar Reviewed-by: Anthony Yznaga --- arch/sparc/include/asm/hypervisor.h | 18 ++ arch/sparc/kernel/hvcalls.S | 11 +++ 2 files changed, 29 insertions(+), 0 deletions(-) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 73cb897..3dc9215 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -298,6 +298,24 @@ unsigned long sun4v_cpu_start(unsigned long cpuid, unsigned long sun4v_cpu_yield(void); #endif +/* cpu_poke() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_CPU_POKE + * RET0: status + * ERRORS: ENOCPU cpuid refers to a CPU that does not exist + * EINVAL cpuid is current CPU + * + * Poke CPU cpuid. If the target CPU is currently suspended having + * invoked the cpu-yield service, that vCPU will be resumed. + * Poke interrupts may only be sent to valid, non-local CPUs. + * It is not legal to poke the current vCPU. + */ +#define HV_FAST_CPU_POKE0x13 + +#ifndef __ASSEMBLY__ +unsigned long sun4v_cpu_poke(unsigned long cpuid); +#endif + /* cpu_qconf() * TRAP: HV_FAST_TRAP * FUNCTION: HV_FAST_CPU_QCONF diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index 4116ee5..e57007f 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -106,6 +106,17 @@ ENTRY(sun4v_cpu_yield) nop ENDPROC(sun4v_cpu_yield) + /* %o0: cpuid +* +* returns %o0: status +*/ +ENTRY(sun4v_cpu_poke) + mov HV_FAST_CPU_POKE, %o5 + ta HV_FAST_TRAP + retl +nop +ENDPROC(sun4v_cpu_poke) + /* %o0: type * %o1: queue paddr * %o2: num queue entries -- 1.7.1
Re: [PATCH v3 0/4] sparc64: Jump to boot prom from console on panic
On 2/1/2017 1:50 PM, David Miller wrote: From: Vijay Kumar <vijay.ac.ku...@oracle.com> Date: Wed, 1 Feb 2017 11:34:36 -0800 Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This is intentional, the kernel prints a message telling the user to press break (L1-A) if they want to drop out of the kernel and we force the break to be allowed by setting stop_a_enabled. The problem is that pressing BRK after panic does not drop to OK prompt (when stop_a_enabled is set). So the kernel message to press Stop-A to return to boot prom is misleading in this case. I'm wondering why there is so much effort being directed into BRK behavior. User can drop into ok prompt from the running kernel and as well as from the panicked kernel. Pressing single break to jump to ok prompt conflicts with sysrq key combination (from console, BRK + sysrq_key). To be consistent across both the cases, user will have to send BRK twice in order to drop to ok prompt. Does this sound reasonable? If you want to break into the OK prompt, have the reboot-cmd environment variable set appropriately, and simply hit BRK and it will work in both ldom and non-ldom environments. Kernel does not print message "Press Stop-A (L1-A) to ..." for the case when it is expected to reboot on panic. Rather, it goes through different path in panic() when kernel.panic is _not_ set to 0. Here, patch is addressing the case when kernel.panic=0 (i.e not to reboot on panic). Thanks, Vijay
Re: [PATCH v3 0/4] sparc64: Jump to boot prom from console on panic
On 2/1/2017 1:50 PM, David Miller wrote: From: Vijay Kumar Date: Wed, 1 Feb 2017 11:34:36 -0800 Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This is intentional, the kernel prints a message telling the user to press break (L1-A) if they want to drop out of the kernel and we force the break to be allowed by setting stop_a_enabled. The problem is that pressing BRK after panic does not drop to OK prompt (when stop_a_enabled is set). So the kernel message to press Stop-A to return to boot prom is misleading in this case. I'm wondering why there is so much effort being directed into BRK behavior. User can drop into ok prompt from the running kernel and as well as from the panicked kernel. Pressing single break to jump to ok prompt conflicts with sysrq key combination (from console, BRK + sysrq_key). To be consistent across both the cases, user will have to send BRK twice in order to drop to ok prompt. Does this sound reasonable? If you want to break into the OK prompt, have the reboot-cmd environment variable set appropriately, and simply hit BRK and it will work in both ldom and non-ldom environments. Kernel does not print message "Press Stop-A (L1-A) to ..." for the case when it is expected to reboot on panic. Rather, it goes through different path in panic() when kernel.panic is _not_ set to 0. Here, patch is addressing the case when kernel.panic=0 (i.e not to reboot on panic). Thanks, Vijay
[PATCH v3 1/4] sparc64: Set cpu state to offline when stopped
CPU needs to be marked offline before stopping it. When not marked offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all CPUs received the message, and retries. After 1 retries, it finally fails with fatal mondo timeout. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/kernel/smp_64.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 0ce347f..712bf1b 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1443,6 +1443,7 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) static void stop_this_cpu(void *dummy) { + set_cpu_online(smp_processor_id(), false); prom_stopself(); } @@ -1454,6 +1455,8 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; + + set_cpu_online(cpu, false); #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) { unsigned long hv_err; -- 1.7.1
[PATCH v3 4/4] Documentation/sparc: Steps for sending break on sunhv console
Documented the steps for sending break on sunhv console. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- Documentation/sparc/console.txt |9 + 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt new file mode 100644 index 000..5aa735a --- /dev/null +++ b/Documentation/sparc/console.txt @@ -0,0 +1,9 @@ +Steps for sending 'break' on sunhv console: +=== + +On Baremetal: + 1. press Esc + 'B' + +On LDOM: + 1. pressCtrl + ']' + 2. telnet> send break -- 1.7.1
[PATCH v3 2/4] sparc64: Migrate hvcons irq to panicked cpu
On panic, all other CPUs are stopped except the one which had hit panic. To keep console alive, we need to migrate hvcons irq to panicked CPU. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- v2->v3: Added SERIAL_SUNHV conditional group for sunhv_migrate_hvcons_irq(). --- arch/sparc/include/asm/setup.h |5 - arch/sparc/kernel/smp_64.c |6 +- drivers/tty/serial/sunhv.c |6 ++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 29d64b1..478bf6b 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -59,8 +59,11 @@ static inline int con_is_present(void) extern atomic_t dcpage_flushes_xcall; extern int sysctl_tsb_ratio; -#endif +#ifdef CONFIG_SERIAL_SUNHV +void sunhv_migrate_hvcons_irq(int cpu); +#endif +#endif void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 712bf1b..90a02cb 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1452,8 +1452,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); +#ifdef CONFIG_SERIAL_SUNHV + sunhv_migrate_hvcons_irq(this_cpu); +#endif for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) + if (cpu == this_cpu) continue; set_cpu_online(cpu, false); diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 99ef5c6..039ae05 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -398,6 +398,12 @@ static int sunhv_verify_port(struct uart_port *port, struct serial_struct *ser) static struct uart_port *sunhv_port; +void sunhv_migrate_hvcons_irq(int cpu) +{ + /* Migrate hvcons irq to param cpu */ + irq_force_affinity(sunhv_port->irq, cpumask_of(cpu)); +} + /* Copy 's' into the con_write_page, decoding "\n" into * "\r\n" along the way. We have to return two lengths * because the caller needs to know how much to advance -- 1.7.1
[PATCH v3 1/4] sparc64: Set cpu state to offline when stopped
CPU needs to be marked offline before stopping it. When not marked offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all CPUs received the message, and retries. After 1 retries, it finally fails with fatal mondo timeout. Signed-off-by: Vijay Kumar --- arch/sparc/kernel/smp_64.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 0ce347f..712bf1b 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1443,6 +1443,7 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) static void stop_this_cpu(void *dummy) { + set_cpu_online(smp_processor_id(), false); prom_stopself(); } @@ -1454,6 +1455,8 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; + + set_cpu_online(cpu, false); #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) { unsigned long hv_err; -- 1.7.1
[PATCH v3 4/4] Documentation/sparc: Steps for sending break on sunhv console
Documented the steps for sending break on sunhv console. Signed-off-by: Vijay Kumar --- Documentation/sparc/console.txt |9 + 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt new file mode 100644 index 000..5aa735a --- /dev/null +++ b/Documentation/sparc/console.txt @@ -0,0 +1,9 @@ +Steps for sending 'break' on sunhv console: +=== + +On Baremetal: + 1. press Esc + 'B' + +On LDOM: + 1. pressCtrl + ']' + 2. telnet> send break -- 1.7.1
[PATCH v3 2/4] sparc64: Migrate hvcons irq to panicked cpu
On panic, all other CPUs are stopped except the one which had hit panic. To keep console alive, we need to migrate hvcons irq to panicked CPU. Signed-off-by: Vijay Kumar --- v2->v3: Added SERIAL_SUNHV conditional group for sunhv_migrate_hvcons_irq(). --- arch/sparc/include/asm/setup.h |5 - arch/sparc/kernel/smp_64.c |6 +- drivers/tty/serial/sunhv.c |6 ++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 29d64b1..478bf6b 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -59,8 +59,11 @@ static inline int con_is_present(void) extern atomic_t dcpage_flushes_xcall; extern int sysctl_tsb_ratio; -#endif +#ifdef CONFIG_SERIAL_SUNHV +void sunhv_migrate_hvcons_irq(int cpu); +#endif +#endif void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 712bf1b..90a02cb 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1452,8 +1452,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); +#ifdef CONFIG_SERIAL_SUNHV + sunhv_migrate_hvcons_irq(this_cpu); +#endif for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) + if (cpu == this_cpu) continue; set_cpu_online(cpu, false); diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 99ef5c6..039ae05 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -398,6 +398,12 @@ static int sunhv_verify_port(struct uart_port *port, struct serial_struct *ser) static struct uart_port *sunhv_port; +void sunhv_migrate_hvcons_irq(int cpu) +{ + /* Migrate hvcons irq to param cpu */ + irq_force_affinity(sunhv_port->irq, cpumask_of(cpu)); +} + /* Copy 's' into the con_write_page, decoding "\n" into * "\r\n" along the way. We have to return two lengths * because the caller needs to know how much to advance -- 1.7.1
[PATCH v3 3/4] sparc64: Send break twice from console to return to boot prom
Now we can also jump to boot prom from sunhv console by sending break twice on console for both running and panicked kernel cases. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- drivers/tty/serial/sunhv.c |6 +- kernel/panic.c |3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 039ae05..8975d9c 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port) static int receive_chars_read(struct uart_port *port) { - int saw_console_brk = 0; + static int saw_console_brk; int limit = 1; while (limit-- > 0) { @@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port) bytes_read = 0; if (stat == CON_BREAK) { + if (saw_console_brk) + sun_do_break(); + if (uart_handle_break(port)) continue; saw_console_brk = 1; @@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port) if (port->sysrq != 0 && *con_read_page) { for (i = 0; i < bytes_read; i++) uart_handle_sysrq_char(port, con_read_page[i]); + saw_console_brk = 0; } if (port->state == NULL) diff --git a/kernel/panic.c b/kernel/panic.c index 08aa88d..70f799d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -273,7 +273,8 @@ void panic(const char *fmt, ...) extern int stop_a_enabled; /* Make sure the user can actually press Stop-A (L1-A) */ stop_a_enabled = 1; - pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n"); + pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n" +"twice on console to return to the boot prom\n"); } #endif #if defined(CONFIG_S390) -- 1.7.1
[PATCH v3 3/4] sparc64: Send break twice from console to return to boot prom
Now we can also jump to boot prom from sunhv console by sending break twice on console for both running and panicked kernel cases. Signed-off-by: Vijay Kumar --- drivers/tty/serial/sunhv.c |6 +- kernel/panic.c |3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 039ae05..8975d9c 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port) static int receive_chars_read(struct uart_port *port) { - int saw_console_brk = 0; + static int saw_console_brk; int limit = 1; while (limit-- > 0) { @@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port) bytes_read = 0; if (stat == CON_BREAK) { + if (saw_console_brk) + sun_do_break(); + if (uart_handle_break(port)) continue; saw_console_brk = 1; @@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port) if (port->sysrq != 0 && *con_read_page) { for (i = 0; i < bytes_read; i++) uart_handle_sysrq_char(port, con_read_page[i]); + saw_console_brk = 0; } if (port->state == NULL) diff --git a/kernel/panic.c b/kernel/panic.c index 08aa88d..70f799d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -273,7 +273,8 @@ void panic(const char *fmt, ...) extern int stop_a_enabled; /* Make sure the user can actually press Stop-A (L1-A) */ stop_a_enabled = 1; - pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n"); + pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n" +"twice on console to return to the boot prom\n"); } #endif #if defined(CONFIG_S390) -- 1.7.1
[PATCH v3 0/4] sparc64: Jump to boot prom from console on panic
V3 changes: - patch 02/04: Added SERIAL_SUNHV conditional group for sunhv_migrate_hvcons_irq in smp_send_stop(). V2 changes: - Added cover letter patch Hi, Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This patchset addresses this issue. Also, now we can cause a jump to OBP by sending 'break' twice from sunhv console. On bare metal, one can send a break by typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet, and then "send break" at the telnet prompt. Thanks. sparc64: Set cpu state to offline when stopped sparc64: Migrate hvcons irq to panicked cpu sparc64: Send break twice from console to return to boot prom Documentation/sparc: Steps for sending break on sunhv console Documentation/sparc/console.txt |9 + arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |9 - drivers/tty/serial/sunhv.c | 12 +++- kernel/panic.c |3 ++- 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 Documentation/sparc/console.txt -- 1.7.1
[PATCH v3 0/4] sparc64: Jump to boot prom from console on panic
V3 changes: - patch 02/04: Added SERIAL_SUNHV conditional group for sunhv_migrate_hvcons_irq in smp_send_stop(). V2 changes: - Added cover letter patch Hi, Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This patchset addresses this issue. Also, now we can cause a jump to OBP by sending 'break' twice from sunhv console. On bare metal, one can send a break by typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet, and then "send break" at the telnet prompt. Thanks. sparc64: Set cpu state to offline when stopped sparc64: Migrate hvcons irq to panicked cpu sparc64: Send break twice from console to return to boot prom Documentation/sparc: Steps for sending break on sunhv console Documentation/sparc/console.txt |9 + arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |9 - drivers/tty/serial/sunhv.c | 12 +++- kernel/panic.c |3 ++- 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 Documentation/sparc/console.txt -- 1.7.1
Re: [PATCH v2 2/4] sparc64: Migrate hvcons irq to panicked cpu
On 11/19/2016 9:48 AM, David Miller wrote: From: Vijay Kumar <vijay.ac.ku...@oracle.com> Date: Fri, 11 Nov 2016 10:11:57 -0800 @@ -1444,8 +1444,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); + + sunhv_migrate_hvcons_irq(this_cpu); + You can't unconditionally call into code that might be configured out by a Kconfig option, as you are doing here. If SERIAL_SUNHV=n this change will thus result in a build error. Thanks for your comment. I will fix this and send revised version of patch. Thanks, Vijay
Re: [PATCH v2 2/4] sparc64: Migrate hvcons irq to panicked cpu
On 11/19/2016 9:48 AM, David Miller wrote: From: Vijay Kumar Date: Fri, 11 Nov 2016 10:11:57 -0800 @@ -1444,8 +1444,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); + + sunhv_migrate_hvcons_irq(this_cpu); + You can't unconditionally call into code that might be configured out by a Kconfig option, as you are doing here. If SERIAL_SUNHV=n this change will thus result in a build error. Thanks for your comment. I will fix this and send revised version of patch. Thanks, Vijay
[PATCH v2 3/4] sparc64: Send break twice from console to return to boot prom
Now we can also jump to boot prom from sunhv console by sending break twice on console for both running and panicked kernel cases. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- drivers/tty/serial/sunhv.c |6 +- kernel/panic.c |3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 59828d8..33c35b4 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port) static int receive_chars_read(struct uart_port *port) { - int saw_console_brk = 0; + static int saw_console_brk; int limit = 1; while (limit-- > 0) { @@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port) bytes_read = 0; if (stat == CON_BREAK) { + if (saw_console_brk) + sun_do_break(); + if (uart_handle_break(port)) continue; saw_console_brk = 1; @@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port) if (port->sysrq != 0 && *con_read_page) { for (i = 0; i < bytes_read; i++) uart_handle_sysrq_char(port, con_read_page[i]); + saw_console_brk = 0; } if (port->state == NULL) diff --git a/kernel/panic.c b/kernel/panic.c index ca8cea1..4fe3b28 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -240,7 +240,8 @@ void panic(const char *fmt, ...) extern int stop_a_enabled; /* Make sure the user can actually press Stop-A (L1-A) */ stop_a_enabled = 1; - pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n"); + pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n" +"twice on console to return to the boot prom\n"); } #endif #if defined(CONFIG_S390) -- 1.7.1
[PATCH v2 3/4] sparc64: Send break twice from console to return to boot prom
Now we can also jump to boot prom from sunhv console by sending break twice on console for both running and panicked kernel cases. Signed-off-by: Vijay Kumar --- drivers/tty/serial/sunhv.c |6 +- kernel/panic.c |3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 59828d8..33c35b4 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port) static int receive_chars_read(struct uart_port *port) { - int saw_console_brk = 0; + static int saw_console_brk; int limit = 1; while (limit-- > 0) { @@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port) bytes_read = 0; if (stat == CON_BREAK) { + if (saw_console_brk) + sun_do_break(); + if (uart_handle_break(port)) continue; saw_console_brk = 1; @@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port) if (port->sysrq != 0 && *con_read_page) { for (i = 0; i < bytes_read; i++) uart_handle_sysrq_char(port, con_read_page[i]); + saw_console_brk = 0; } if (port->state == NULL) diff --git a/kernel/panic.c b/kernel/panic.c index ca8cea1..4fe3b28 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -240,7 +240,8 @@ void panic(const char *fmt, ...) extern int stop_a_enabled; /* Make sure the user can actually press Stop-A (L1-A) */ stop_a_enabled = 1; - pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n"); + pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n" +"twice on console to return to the boot prom\n"); } #endif #if defined(CONFIG_S390) -- 1.7.1
[PATCH v2 1/4] sparc64: Set cpu state to offline when stopped
CPU needs to be marked offline before stopping it. When not marked offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all CPUs received the message, and retries. After 1 retries, it finally fails with fatal mondo timeout. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/kernel/smp_64.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d3035ba..14138ad 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1435,6 +1435,7 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) static void stop_this_cpu(void *dummy) { + set_cpu_online(smp_processor_id(), false); prom_stopself(); } @@ -1446,6 +1447,8 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; + + set_cpu_online(cpu, false); #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) { unsigned long hv_err; -- 1.7.1
[PATCH v2 0/4] sparc64: Jump to boot prom from console on panic
Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This patchset addresses this issue. Also, now we can cause a jump to OBP by sending 'break' twice from sunhv console. On bare metal, one can send a break by typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet, and then "send break" at the telnet prompt. sparc64: Set cpu state to offline when stopped sparc64: Migrate hvcons irq to panicked cpu sparc64: Send break twice from console to return to boot prom Documentation/sparc: Steps for sending break on sunhv console Documentation/sparc/console.txt |9 + arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |9 - drivers/tty/serial/sunhv.c | 12 +++- kernel/panic.c |3 ++- 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 Documentation/sparc/console.txt -- 1.7.1
[PATCH v2 1/4] sparc64: Set cpu state to offline when stopped
CPU needs to be marked offline before stopping it. When not marked offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all CPUs received the message, and retries. After 1 retries, it finally fails with fatal mondo timeout. Signed-off-by: Vijay Kumar --- arch/sparc/kernel/smp_64.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d3035ba..14138ad 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1435,6 +1435,7 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) static void stop_this_cpu(void *dummy) { + set_cpu_online(smp_processor_id(), false); prom_stopself(); } @@ -1446,6 +1447,8 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; + + set_cpu_online(cpu, false); #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) { unsigned long hv_err; -- 1.7.1
[PATCH v2 0/4] sparc64: Jump to boot prom from console on panic
Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This patchset addresses this issue. Also, now we can cause a jump to OBP by sending 'break' twice from sunhv console. On bare metal, one can send a break by typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet, and then "send break" at the telnet prompt. sparc64: Set cpu state to offline when stopped sparc64: Migrate hvcons irq to panicked cpu sparc64: Send break twice from console to return to boot prom Documentation/sparc: Steps for sending break on sunhv console Documentation/sparc/console.txt |9 + arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |9 - drivers/tty/serial/sunhv.c | 12 +++- kernel/panic.c |3 ++- 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 Documentation/sparc/console.txt -- 1.7.1
[PATCH v2 4/4] Documentation/sparc: Steps for sending break on sunhv console
Documented the steps for sending break on sunhv console. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- Documentation/sparc/console.txt |9 + 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt new file mode 100644 index 000..ab55353 --- /dev/null +++ b/Documentation/sparc/console.txt @@ -0,0 +1,9 @@ +Steps for sending 'break' on sunhv console: +=== + +On Baremetal: + 1. press Esc + 'B' + +On LDOM: + 1. pressCtrl + ']' + 2. telnet> send break -- 1.7.1
[PATCH v2 4/4] Documentation/sparc: Steps for sending break on sunhv console
Documented the steps for sending break on sunhv console. Signed-off-by: Vijay Kumar --- Documentation/sparc/console.txt |9 + 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt new file mode 100644 index 000..ab55353 --- /dev/null +++ b/Documentation/sparc/console.txt @@ -0,0 +1,9 @@ +Steps for sending 'break' on sunhv console: +=== + +On Baremetal: + 1. press Esc + 'B' + +On LDOM: + 1. pressCtrl + ']' + 2. telnet> send break -- 1.7.1
[PATCH v2 2/4] sparc64: Migrate hvcons irq to panicked cpu
On panic, all other CPUs are stopped except the one which had hit panic. To keep console alive, we need to migrate hvcons irq to panicked CPU. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |6 +- drivers/tty/serial/sunhv.c |6 ++ 3 files changed, 12 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 29d64b1..41691a5 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -61,6 +61,7 @@ extern atomic_t dcpage_flushes_xcall; extern int sysctl_tsb_ratio; #endif +void sunhv_migrate_hvcons_irq(int cpu); void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 14138ad..52dc4b7 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1444,8 +1444,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); + + sunhv_migrate_hvcons_irq(this_cpu); + for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) + if (cpu == this_cpu) continue; set_cpu_online(cpu, false); diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 4e603d0..59828d8 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -398,6 +398,12 @@ static struct uart_driver sunhv_reg = { static struct uart_port *sunhv_port; +void sunhv_migrate_hvcons_irq(int cpu) +{ + /* Migrate hvcons irq to param cpu */ + irq_force_affinity(sunhv_port->irq, cpumask_of(cpu)); +} + /* Copy 's' into the con_write_page, decoding "\n" into * "\r\n" along the way. We have to return two lengths * because the caller needs to know how much to advance -- 1.7.1
[PATCH v2 2/4] sparc64: Migrate hvcons irq to panicked cpu
On panic, all other CPUs are stopped except the one which had hit panic. To keep console alive, we need to migrate hvcons irq to panicked CPU. Signed-off-by: Vijay Kumar --- arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |6 +- drivers/tty/serial/sunhv.c |6 ++ 3 files changed, 12 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 29d64b1..41691a5 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -61,6 +61,7 @@ extern atomic_t dcpage_flushes_xcall; extern int sysctl_tsb_ratio; #endif +void sunhv_migrate_hvcons_irq(int cpu); void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 14138ad..52dc4b7 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1444,8 +1444,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); + + sunhv_migrate_hvcons_irq(this_cpu); + for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) + if (cpu == this_cpu) continue; set_cpu_online(cpu, false); diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 4e603d0..59828d8 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -398,6 +398,12 @@ static struct uart_driver sunhv_reg = { static struct uart_port *sunhv_port; +void sunhv_migrate_hvcons_irq(int cpu) +{ + /* Migrate hvcons irq to param cpu */ + irq_force_affinity(sunhv_port->irq, cpumask_of(cpu)); +} + /* Copy 's' into the con_write_page, decoding "\n" into * "\r\n" along the way. We have to return two lengths * because the caller needs to know how much to advance -- 1.7.1
[PATCH 0/4] sparc64: Jump to boot prom from console on panic
Here is the cover posting for the patches. I did not send cover patch initially as I thought that patch descriptions were self explanatory. But I agree, this would help in overall understanding of the patch. Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This patchset addresses this issue. Also, now we can cause a jump to OBP by sending 'break' twice from sunhv console. On bare metal, one can send a break by typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet, and then "send break" at the telnet prompt. sparc64: Set cpu state to offline when stopped sparc64: Migrate hvcons irq to panicked cpu sparc64: Send break twice from console to return to boot prom Documentation/sparc: Steps for sending break on sunhv console Documentation/sparc/console.txt |9 + arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |9 - drivers/tty/serial/sunhv.c | 12 +++- kernel/panic.c |3 ++- 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 Documentation/sparc/console.txt -- 1.7.1
[PATCH 0/4] sparc64: Jump to boot prom from console on panic
Here is the cover posting for the patches. I did not send cover patch initially as I thought that patch descriptions were self explanatory. But I agree, this would help in overall understanding of the patch. Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This patchset addresses this issue. Also, now we can cause a jump to OBP by sending 'break' twice from sunhv console. On bare metal, one can send a break by typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet, and then "send break" at the telnet prompt. sparc64: Set cpu state to offline when stopped sparc64: Migrate hvcons irq to panicked cpu sparc64: Send break twice from console to return to boot prom Documentation/sparc: Steps for sending break on sunhv console Documentation/sparc/console.txt |9 + arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |9 - drivers/tty/serial/sunhv.c | 12 +++- kernel/panic.c |3 ++- 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 Documentation/sparc/console.txt -- 1.7.1
[PATCH 2/4] sparc64: Migrate hvcons irq to panicked cpu
On panic, all other CPUs are stopped except the one which had hit panic. To keep console alive, we need to migrate hvcons irq to panicked CPU. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |6 +- drivers/tty/serial/sunhv.c |6 ++ 3 files changed, 12 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 29d64b1..41691a5 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -61,6 +61,7 @@ extern atomic_t dcpage_flushes_xcall; extern int sysctl_tsb_ratio; #endif +void sunhv_migrate_hvcons_irq(int cpu); void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 14138ad..52dc4b7 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1444,8 +1444,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); + + sunhv_migrate_hvcons_irq(this_cpu); + for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) + if (cpu == this_cpu) continue; set_cpu_online(cpu, false); diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 4e603d0..59828d8 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -398,6 +398,12 @@ static struct uart_driver sunhv_reg = { static struct uart_port *sunhv_port; +void sunhv_migrate_hvcons_irq(int cpu) +{ + /* Migrate hvcons irq to param cpu */ + irq_force_affinity(sunhv_port->irq, cpumask_of(cpu)); +} + /* Copy 's' into the con_write_page, decoding "\n" into * "\r\n" along the way. We have to return two lengths * because the caller needs to know how much to advance -- 1.7.1
[PATCH 2/4] sparc64: Migrate hvcons irq to panicked cpu
On panic, all other CPUs are stopped except the one which had hit panic. To keep console alive, we need to migrate hvcons irq to panicked CPU. Signed-off-by: Vijay Kumar --- arch/sparc/include/asm/setup.h |1 + arch/sparc/kernel/smp_64.c |6 +- drivers/tty/serial/sunhv.c |6 ++ 3 files changed, 12 insertions(+), 1 deletions(-) diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 29d64b1..41691a5 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -61,6 +61,7 @@ extern atomic_t dcpage_flushes_xcall; extern int sysctl_tsb_ratio; #endif +void sunhv_migrate_hvcons_irq(int cpu); void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 14138ad..52dc4b7 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1444,8 +1444,12 @@ void smp_send_stop(void) int cpu; if (tlb_type == hypervisor) { + int this_cpu = smp_processor_id(); + + sunhv_migrate_hvcons_irq(this_cpu); + for_each_online_cpu(cpu) { - if (cpu == smp_processor_id()) + if (cpu == this_cpu) continue; set_cpu_online(cpu, false); diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 4e603d0..59828d8 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -398,6 +398,12 @@ static struct uart_driver sunhv_reg = { static struct uart_port *sunhv_port; +void sunhv_migrate_hvcons_irq(int cpu) +{ + /* Migrate hvcons irq to param cpu */ + irq_force_affinity(sunhv_port->irq, cpumask_of(cpu)); +} + /* Copy 's' into the con_write_page, decoding "\n" into * "\r\n" along the way. We have to return two lengths * because the caller needs to know how much to advance -- 1.7.1
[PATCH 3/4] sparc64: Send break twice from console to return to boot prom
Now we can also jump to boot prom from sunhv console by sending break twice on console for both running and panicked kernel cases. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- drivers/tty/serial/sunhv.c |6 +- kernel/panic.c |3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 59828d8..33c35b4 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port) static int receive_chars_read(struct uart_port *port) { - int saw_console_brk = 0; + static int saw_console_brk; int limit = 1; while (limit-- > 0) { @@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port) bytes_read = 0; if (stat == CON_BREAK) { + if (saw_console_brk) + sun_do_break(); + if (uart_handle_break(port)) continue; saw_console_brk = 1; @@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port) if (port->sysrq != 0 && *con_read_page) { for (i = 0; i < bytes_read; i++) uart_handle_sysrq_char(port, con_read_page[i]); + saw_console_brk = 0; } if (port->state == NULL) diff --git a/kernel/panic.c b/kernel/panic.c index ca8cea1..4fe3b28 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -240,7 +240,8 @@ void panic(const char *fmt, ...) extern int stop_a_enabled; /* Make sure the user can actually press Stop-A (L1-A) */ stop_a_enabled = 1; - pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n"); + pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n" +"twice on console to return to the boot prom\n"); } #endif #if defined(CONFIG_S390) -- 1.7.1
[PATCH 3/4] sparc64: Send break twice from console to return to boot prom
Now we can also jump to boot prom from sunhv console by sending break twice on console for both running and panicked kernel cases. Signed-off-by: Vijay Kumar --- drivers/tty/serial/sunhv.c |6 +- kernel/panic.c |3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index 59828d8..33c35b4 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port) static int receive_chars_read(struct uart_port *port) { - int saw_console_brk = 0; + static int saw_console_brk; int limit = 1; while (limit-- > 0) { @@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port) bytes_read = 0; if (stat == CON_BREAK) { + if (saw_console_brk) + sun_do_break(); + if (uart_handle_break(port)) continue; saw_console_brk = 1; @@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port) if (port->sysrq != 0 && *con_read_page) { for (i = 0; i < bytes_read; i++) uart_handle_sysrq_char(port, con_read_page[i]); + saw_console_brk = 0; } if (port->state == NULL) diff --git a/kernel/panic.c b/kernel/panic.c index ca8cea1..4fe3b28 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -240,7 +240,8 @@ void panic(const char *fmt, ...) extern int stop_a_enabled; /* Make sure the user can actually press Stop-A (L1-A) */ stop_a_enabled = 1; - pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n"); + pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n" +"twice on console to return to the boot prom\n"); } #endif #if defined(CONFIG_S390) -- 1.7.1
[PATCH 4/4] Documentation/sparc: Steps for sending break on sunhv console
Documented the steps for sending break on sunhv console. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- Documentation/sparc/console.txt |9 + 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt new file mode 100644 index 000..ab55353 --- /dev/null +++ b/Documentation/sparc/console.txt @@ -0,0 +1,9 @@ +Steps for sending 'break' on sunhv console: +=== + +On Baremetal: + 1. press Esc + 'B' + +On LDOM: + 1. pressCtrl + ']' + 2. telnet> send break -- 1.7.1
[PATCH 4/4] Documentation/sparc: Steps for sending break on sunhv console
Documented the steps for sending break on sunhv console. Signed-off-by: Vijay Kumar --- Documentation/sparc/console.txt |9 + 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt new file mode 100644 index 000..ab55353 --- /dev/null +++ b/Documentation/sparc/console.txt @@ -0,0 +1,9 @@ +Steps for sending 'break' on sunhv console: +=== + +On Baremetal: + 1. press Esc + 'B' + +On LDOM: + 1. pressCtrl + ']' + 2. telnet> send break -- 1.7.1
[PATCH 1/4] sparc64: Set cpu state to offline when stopped
CPU needs to be marked offline before stopping it. When not marked offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all CPUs received the message, and retries. After 1 retries, it finally fails with fatal mondo timeout. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- arch/sparc/kernel/smp_64.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d3035ba..14138ad 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1435,6 +1435,7 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) static void stop_this_cpu(void *dummy) { + set_cpu_online(smp_processor_id(), false); prom_stopself(); } @@ -1446,6 +1447,8 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; + + set_cpu_online(cpu, false); #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) { unsigned long hv_err; -- 1.7.1
[PATCH 1/4] sparc64: Set cpu state to offline when stopped
CPU needs to be marked offline before stopping it. When not marked offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all CPUs received the message, and retries. After 1 retries, it finally fails with fatal mondo timeout. Signed-off-by: Vijay Kumar --- arch/sparc/kernel/smp_64.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d3035ba..14138ad 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1435,6 +1435,7 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) static void stop_this_cpu(void *dummy) { + set_cpu_online(smp_processor_id(), false); prom_stopself(); } @@ -1446,6 +1447,8 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; + + set_cpu_online(cpu, false); #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) { unsigned long hv_err; -- 1.7.1
[PATCH v2 2/2] Documentation/ABI: Added ABI information for devspec and obppath.
Updated Documentation/ABI for devspec and obppath sysfs entries. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- Documentation/ABI/stable/sysfs-devices | 14 ++ 1 files changed, 14 insertions(+), 0 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-devices b/Documentation/ABI/stable/sysfs-devices index df449d7..35c457f 100644 --- a/Documentation/ABI/stable/sysfs-devices +++ b/Documentation/ABI/stable/sysfs-devices @@ -8,3 +8,17 @@ Description: Any device associated with a device-tree node will have an of_path symlink pointing to the corresponding device node in /sys/firmware/devicetree/ + +What: /sys/devices/*/devspec +Date: October 2016 +Contact: Device Tree mailing list <devicet...@vger.kernel.org> +Description: + If CONFIG_OF is enabled, then this file is present. When + read, it returns full name of the device node. + +What: /sys/devices/*/obppath +Date: October 2016 +Contact: Device Tree mailing list <devicet...@vger.kernel.org> +Description: + If CONFIG_OF is enabled, then this file is present. When + read, it returns full name of the device node. -- 1.7.1
[PATCH v2 1/2] usb/core: Added devspec sysfs entry for devices behind the usb hub
Grub finds incorrect of_node path for devices behind usb hub. Added devspec sysfs entry for devices behind usb hub so that right of_node path is returned during grub sysfs walk for these devices. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- drivers/usb/core/sysfs.c | 15 +++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index c953a0f..84d66d5 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "usb.h" /* Active configuration fields */ @@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device *dev, static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR, bConfigurationValue_show, bConfigurationValue_store); +#ifdef CONFIG_OF +static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct device_node *of_node = dev->of_node; + + return sprintf(buf, "%s\n", of_node_full_name(of_node)); +} +static DEVICE_ATTR_RO(devspec); +#endif + /* String fields */ #define usb_string_attr(name) \ static ssize_t name##_show(struct device *dev, \ @@ -786,6 +798,9 @@ static struct attribute *dev_attrs[] = { _attr_remove.attr, _attr_removable.attr, _attr_ltm_capable.attr, +#ifdef CONFIG_OF + _attr_devspec.attr, +#endif NULL, }; static struct attribute_group dev_attr_grp = { -- 1.7.1
[PATCH v2 2/2] Documentation/ABI: Added ABI information for devspec and obppath.
Updated Documentation/ABI for devspec and obppath sysfs entries. Signed-off-by: Vijay Kumar --- Documentation/ABI/stable/sysfs-devices | 14 ++ 1 files changed, 14 insertions(+), 0 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-devices b/Documentation/ABI/stable/sysfs-devices index df449d7..35c457f 100644 --- a/Documentation/ABI/stable/sysfs-devices +++ b/Documentation/ABI/stable/sysfs-devices @@ -8,3 +8,17 @@ Description: Any device associated with a device-tree node will have an of_path symlink pointing to the corresponding device node in /sys/firmware/devicetree/ + +What: /sys/devices/*/devspec +Date: October 2016 +Contact: Device Tree mailing list +Description: + If CONFIG_OF is enabled, then this file is present. When + read, it returns full name of the device node. + +What: /sys/devices/*/obppath +Date: October 2016 +Contact: Device Tree mailing list +Description: + If CONFIG_OF is enabled, then this file is present. When + read, it returns full name of the device node. -- 1.7.1
[PATCH v2 1/2] usb/core: Added devspec sysfs entry for devices behind the usb hub
Grub finds incorrect of_node path for devices behind usb hub. Added devspec sysfs entry for devices behind usb hub so that right of_node path is returned during grub sysfs walk for these devices. Signed-off-by: Vijay Kumar --- drivers/usb/core/sysfs.c | 15 +++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index c953a0f..84d66d5 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "usb.h" /* Active configuration fields */ @@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device *dev, static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR, bConfigurationValue_show, bConfigurationValue_store); +#ifdef CONFIG_OF +static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct device_node *of_node = dev->of_node; + + return sprintf(buf, "%s\n", of_node_full_name(of_node)); +} +static DEVICE_ATTR_RO(devspec); +#endif + /* String fields */ #define usb_string_attr(name) \ static ssize_t name##_show(struct device *dev, \ @@ -786,6 +798,9 @@ static struct attribute *dev_attrs[] = { _attr_remove.attr, _attr_removable.attr, _attr_ltm_capable.attr, +#ifdef CONFIG_OF + _attr_devspec.attr, +#endif NULL, }; static struct attribute_group dev_attr_grp = { -- 1.7.1
Re: [PATCH] usb/core: Added devspec sysfs entry for devices behind usb hub
On 10/4/2016 2:49 PM, Greg KH wrote: On Tue, Oct 04, 2016 at 12:04:40PM -0700, Vijay Kumar wrote: Grub finds incorrect of_node path for devices behind usb hub. Added devspec sysfs entry for devices behind usb hub so that right of_node path is returned during grub sysfs walk for these devices. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- drivers/usb/core/sysfs.c | 15 +++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index c953a0f..84d66d5 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "usb.h" /* Active configuration fields */ @@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device *dev, static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR, bConfigurationValue_show, bConfigurationValue_store); +#ifdef CONFIG_OF +static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, +char *buf) +{ + struct device_node *of_node = dev->of_node; + + return sprintf(buf, "%s\n", of_node_full_name(of_node)); +} +static DEVICE_ATTR_RO(devspec); +#endif Any way to do this without the #ifdef? Thanks for your comment. I looked into it again and find that grub would report ofpath incorrectly if CONFIG_OF not defined but devspec sysfs file exists. I see pci-sysfs.c has also defines devspec in same way. And you need to also update Documentation/ABI if you add a new sysfs file. Sure, if you agree with my above comment then should I make Document/ABI changes in a separate patch? Thanks, Vijay
Re: [PATCH] usb/core: Added devspec sysfs entry for devices behind usb hub
On 10/4/2016 2:49 PM, Greg KH wrote: On Tue, Oct 04, 2016 at 12:04:40PM -0700, Vijay Kumar wrote: Grub finds incorrect of_node path for devices behind usb hub. Added devspec sysfs entry for devices behind usb hub so that right of_node path is returned during grub sysfs walk for these devices. Signed-off-by: Vijay Kumar --- drivers/usb/core/sysfs.c | 15 +++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index c953a0f..84d66d5 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "usb.h" /* Active configuration fields */ @@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device *dev, static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR, bConfigurationValue_show, bConfigurationValue_store); +#ifdef CONFIG_OF +static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, +char *buf) +{ + struct device_node *of_node = dev->of_node; + + return sprintf(buf, "%s\n", of_node_full_name(of_node)); +} +static DEVICE_ATTR_RO(devspec); +#endif Any way to do this without the #ifdef? Thanks for your comment. I looked into it again and find that grub would report ofpath incorrectly if CONFIG_OF not defined but devspec sysfs file exists. I see pci-sysfs.c has also defines devspec in same way. And you need to also update Documentation/ABI if you add a new sysfs file. Sure, if you agree with my above comment then should I make Document/ABI changes in a separate patch? Thanks, Vijay
[PATCH] usb/core: Added devspec sysfs entry for devices behind usb hub
Grub finds incorrect of_node path for devices behind usb hub. Added devspec sysfs entry for devices behind usb hub so that right of_node path is returned during grub sysfs walk for these devices. Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com> --- drivers/usb/core/sysfs.c | 15 +++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index c953a0f..84d66d5 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "usb.h" /* Active configuration fields */ @@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device *dev, static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR, bConfigurationValue_show, bConfigurationValue_store); +#ifdef CONFIG_OF +static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, +char *buf) +{ + struct device_node *of_node = dev->of_node; + + return sprintf(buf, "%s\n", of_node_full_name(of_node)); +} +static DEVICE_ATTR_RO(devspec); +#endif + /* String fields */ #define usb_string_attr(name) \ static ssize_t name##_show(struct device *dev, \ @@ -786,6 +798,9 @@ static struct attribute *dev_attrs[] = { _attr_remove.attr, _attr_removable.attr, _attr_ltm_capable.attr, +#ifdef CONFIG_OF + _attr_devspec.attr, +#endif NULL, }; static struct attribute_group dev_attr_grp = { -- 1.7.1
[PATCH] usb/core: Added devspec sysfs entry for devices behind usb hub
Grub finds incorrect of_node path for devices behind usb hub. Added devspec sysfs entry for devices behind usb hub so that right of_node path is returned during grub sysfs walk for these devices. Signed-off-by: Vijay Kumar --- drivers/usb/core/sysfs.c | 15 +++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c index c953a0f..84d66d5 100644 --- a/drivers/usb/core/sysfs.c +++ b/drivers/usb/core/sysfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "usb.h" /* Active configuration fields */ @@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device *dev, static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR, bConfigurationValue_show, bConfigurationValue_store); +#ifdef CONFIG_OF +static ssize_t devspec_show(struct device *dev, struct device_attribute *attr, +char *buf) +{ + struct device_node *of_node = dev->of_node; + + return sprintf(buf, "%s\n", of_node_full_name(of_node)); +} +static DEVICE_ATTR_RO(devspec); +#endif + /* String fields */ #define usb_string_attr(name) \ static ssize_t name##_show(struct device *dev, \ @@ -786,6 +798,9 @@ static struct attribute *dev_attrs[] = { _attr_remove.attr, _attr_removable.attr, _attr_ltm_capable.attr, +#ifdef CONFIG_OF + _attr_devspec.attr, +#endif NULL, }; static struct attribute_group dev_attr_grp = { -- 1.7.1
[PATCH] Specify all interrupts for the GPIO controller.
The PXA GPIO controller has 3 interrupt outputs, this needs to be indicated in the DTS file. Without this mainstone's CPLD interrupt 0 will not be raised to the processor. Signed-off-by: Vijay Kumar B. <vijayku...@zilogic.com> Reviewed-by: Deepak S. <dee...@zilogic.com> --- arch/arm/boot/dts/pxa2xx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi index 5e5af07..9ca2e5b 100644 --- a/arch/arm/boot/dts/pxa2xx.dtsi +++ b/arch/arm/boot/dts/pxa2xx.dtsi @@ -54,8 +54,8 @@ reg = <0x40e0 0x1>; gpio-controller; #gpio-cells = <0x2>; - interrupts = <10>; - interrupt-names = "gpio_mux"; + interrupts = <8 9 10>; + interrupt-names = "gpio0", "gpio1", "gpio_mux"; interrupt-controller; #interrupt-cells = <0x2>; ranges; -- 2.1.4
[PATCH] Specify all interrupts for the GPIO controller.
The PXA GPIO controller has 3 interrupt outputs, this needs to be indicated in the DTS file. Without this mainstone's CPLD interrupt 0 will not be raised to the processor. Signed-off-by: Vijay Kumar B. Reviewed-by: Deepak S. --- arch/arm/boot/dts/pxa2xx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi index 5e5af07..9ca2e5b 100644 --- a/arch/arm/boot/dts/pxa2xx.dtsi +++ b/arch/arm/boot/dts/pxa2xx.dtsi @@ -54,8 +54,8 @@ reg = <0x40e0 0x1>; gpio-controller; #gpio-cells = <0x2>; - interrupts = <10>; - interrupt-names = "gpio_mux"; + interrupts = <8 9 10>; + interrupt-names = "gpio0", "gpio1", "gpio_mux"; interrupt-controller; #interrupt-cells = <0x2>; ranges; -- 2.1.4
[RESEND PATCH] ARM: dts: pxa2xx: Specify all interrupts for the GPIO controller.
The PXA GPIO controller has 3 interrupt outputs, this needs to be indicated in the DTS file. Without this mainstone's CPLD interrupt 0 will not be raised to the processor. Signed-off-by: Vijay Kumar B. <vijayku...@zilogic.com> Reviewed-by: Deepak S. <dee...@zilogic.com> --- arch/arm/boot/dts/pxa2xx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi index 5e5af07..9ca2e5b 100644 --- a/arch/arm/boot/dts/pxa2xx.dtsi +++ b/arch/arm/boot/dts/pxa2xx.dtsi @@ -54,8 +54,8 @@ reg = <0x40e0 0x1>; gpio-controller; #gpio-cells = <0x2>; - interrupts = <10>; - interrupt-names = "gpio_mux"; + interrupts = <8 9 10>; + interrupt-names = "gpio0", "gpio1", "gpio_mux"; interrupt-controller; #interrupt-cells = <0x2>; ranges; -- 2.1.4
[RESEND PATCH] ARM: dts: pxa2xx: Specify all interrupts for the GPIO controller.
The PXA GPIO controller has 3 interrupt outputs, this needs to be indicated in the DTS file. Without this mainstone's CPLD interrupt 0 will not be raised to the processor. Signed-off-by: Vijay Kumar B. Reviewed-by: Deepak S. --- arch/arm/boot/dts/pxa2xx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi index 5e5af07..9ca2e5b 100644 --- a/arch/arm/boot/dts/pxa2xx.dtsi +++ b/arch/arm/boot/dts/pxa2xx.dtsi @@ -54,8 +54,8 @@ reg = <0x40e0 0x1>; gpio-controller; #gpio-cells = <0x2>; - interrupts = <10>; - interrupt-names = "gpio_mux"; + interrupts = <8 9 10>; + interrupt-names = "gpio0", "gpio1", "gpio_mux"; interrupt-controller; #interrupt-cells = <0x2>; ranges; -- 2.1.4