[PATCH v4 4/5] sparc64: SPARC optimized __fls function

2017-10-11 Thread Vijay Kumar
Defined SPARC optimized __fls using lzcnt opcode.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/lib/NG4fls.S |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
index bc17b65..2d0991e 100644
--- a/arch/sparc/lib/NG4fls.S
+++ b/arch/sparc/lib/NG4fls.S
@@ -18,3 +18,13 @@ ENTRY(NG4fls)
retl
 sub%g3, %g2, %o0
 ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+   brz,pn  %o0, 1f
+   LZCNT_O0_G2 !lzcnt  %o0, %g2
+   mov 63, %g3
+   sub %g3, %g2, %o0
+1:
+   retl
+nop
+ENDPROC(__NG4fls)
-- 
1.7.1



[PATCH v4 4/5] sparc64: SPARC optimized __fls function

2017-10-11 Thread Vijay Kumar
Defined SPARC optimized __fls using lzcnt opcode.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/lib/NG4fls.S |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
index bc17b65..2d0991e 100644
--- a/arch/sparc/lib/NG4fls.S
+++ b/arch/sparc/lib/NG4fls.S
@@ -18,3 +18,13 @@ ENTRY(NG4fls)
retl
 sub%g3, %g2, %o0
 ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+   brz,pn  %o0, 1f
+   LZCNT_O0_G2 !lzcnt  %o0, %g2
+   mov 63, %g3
+   sub %g3, %g2, %o0
+1:
+   retl
+nop
+ENDPROC(__NG4fls)
-- 
1.7.1



[PATCH v4 0/5] sparc64: Optimize fls and __fls

2017-10-11 Thread Vijay Kumar
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.

v3->v4:
 -  Fixed a typo.
v2->v3:
 -  Using ENTRY(), ENDPROC() for assembler functions.
 -  Removed BITS_PER_LONG from __fls.
 -  Using generic fls64().
 -  Replaced lzcnt instruction with .word directive.
v1->v2:
 - Fixed delay slot issue.

Vijay Kumar (5):
  sparc64: Define SPARC default fls function
  sparc64: Define SPARC default __fls function
  sparc64: SPARC optimized fls function
  sparc64: SPARC optimized __fls function
  sparc64: Use sparc optimized fls and __fls for T4 and above

 arch/sparc/include/asm/bitops_64.h |5 ++-
 arch/sparc/kernel/head_64.S|2 +
 arch/sparc/lib/Makefile|3 ++
 arch/sparc/lib/NG4fls.S|   30 
 arch/sparc/lib/NG4patch.S  |9 +
 arch/sparc/lib/fls.S   |   67 
 arch/sparc/lib/fls64.S |   61 
 7 files changed, 175 insertions(+), 2 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S
 create mode 100644 arch/sparc/lib/fls64.S



[PATCH v4 0/5] sparc64: Optimize fls and __fls

2017-10-11 Thread Vijay Kumar
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.

v3->v4:
 -  Fixed a typo.
v2->v3:
 -  Using ENTRY(), ENDPROC() for assembler functions.
 -  Removed BITS_PER_LONG from __fls.
 -  Using generic fls64().
 -  Replaced lzcnt instruction with .word directive.
v1->v2:
 - Fixed delay slot issue.

Vijay Kumar (5):
  sparc64: Define SPARC default fls function
  sparc64: Define SPARC default __fls function
  sparc64: SPARC optimized fls function
  sparc64: SPARC optimized __fls function
  sparc64: Use sparc optimized fls and __fls for T4 and above

 arch/sparc/include/asm/bitops_64.h |5 ++-
 arch/sparc/kernel/head_64.S|2 +
 arch/sparc/lib/Makefile|3 ++
 arch/sparc/lib/NG4fls.S|   30 
 arch/sparc/lib/NG4patch.S  |9 +
 arch/sparc/lib/fls.S   |   67 
 arch/sparc/lib/fls64.S |   61 
 7 files changed, 175 insertions(+), 2 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S
 create mode 100644 arch/sparc/lib/fls64.S



[PATCH v4 1/5] sparc64: Define SPARC default fls function

2017-10-11 Thread Vijay Kumar
fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |3 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls.S   |   67 
 3 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 2d52240..30aea56 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,9 +22,10 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+int fls(unsigned int word);
+
 #include 
 
-#include 
 #include 
 #include 
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a1a2d39..3b9f5e0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000..06b8d30
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+ENTRY(fls)
+   brz,pn  %o0, 6f
+mov0, %o1
+   sethi   %hi(0x), %g3
+   mov %o0, %g2
+   andcc   %o0, %g3, %g0
+   be,pt   %icc, 8f
+mov32, %o1
+   sethi   %hi(0xff00), %g3
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 3f
+sethi  %hi(0xf000), %g3
+   sll %o0, 8, %o0
+1:
+   add %o1, -8, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+2:
+   sethi   %hi(0xf000), %g3
+3:
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 4f
+sethi  %hi(0xc000), %g3
+   sll %o0, 4, %o0
+   add %o1, -4, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+4:
+   andcc   %g2, %g3, %g0
+   be,a,pt %icc, 7f
+sll%o0, 2, %o0
+5:
+   xnor%g0, %o0, %o0
+   srl %o0, 31, %o0
+   sub %o1, %o0, %o1
+6:
+   jmp %o7 + 8
+sra%o1, 0, %o0
+7:
+   add %o1, -2, %o1
+   ba,pt   %xcc, 5b
+sra%o0, 0, %o0
+8:
+   sll %o0, 16, %o0
+   sethi   %hi(0xff00), %g3
+   sra %o0, 0, %o0
+   mov %o0, %g2
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 2b
+mov16, %o1
+   ba,pt   %xcc, 1b
+sll%o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
-- 
1.7.1



[PATCH v4 1/5] sparc64: Define SPARC default fls function

2017-10-11 Thread Vijay Kumar
fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/include/asm/bitops_64.h |3 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls.S   |   67 
 3 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 2d52240..30aea56 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,9 +22,10 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+int fls(unsigned int word);
+
 #include 
 
-#include 
 #include 
 #include 
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a1a2d39..3b9f5e0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000..06b8d30
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+ENTRY(fls)
+   brz,pn  %o0, 6f
+mov0, %o1
+   sethi   %hi(0x), %g3
+   mov %o0, %g2
+   andcc   %o0, %g3, %g0
+   be,pt   %icc, 8f
+mov32, %o1
+   sethi   %hi(0xff00), %g3
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 3f
+sethi  %hi(0xf000), %g3
+   sll %o0, 8, %o0
+1:
+   add %o1, -8, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+2:
+   sethi   %hi(0xf000), %g3
+3:
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 4f
+sethi  %hi(0xc000), %g3
+   sll %o0, 4, %o0
+   add %o1, -4, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+4:
+   andcc   %g2, %g3, %g0
+   be,a,pt %icc, 7f
+sll%o0, 2, %o0
+5:
+   xnor%g0, %o0, %o0
+   srl %o0, 31, %o0
+   sub %o1, %o0, %o1
+6:
+   jmp %o7 + 8
+sra%o1, 0, %o0
+7:
+   add %o1, -2, %o1
+   ba,pt   %xcc, 5b
+sra%o0, 0, %o0
+8:
+   sll %o0, 16, %o0
+   sethi   %hi(0xff00), %g3
+   sra %o0, 0, %o0
+   mov %o0, %g2
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 2b
+mov16, %o1
+   ba,pt   %xcc, 1b
+sll%o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
-- 
1.7.1



[PATCH v4 2/5] sparc64: Define SPARC default __fls function

2017-10-11 Thread Vijay Kumar
__fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |2 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls64.S |   61 
 3 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 30aea56..d7a46e2 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -23,10 +23,10 @@
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
 int fls(unsigned int word);
+int __fls(unsigned long word);
 
 #include 
 
-#include 
 #include 
 
 #ifdef __KERNEL__
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 3b9f5e0..5380c59 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 000..c83e22a
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+ENTRY(__fls)
+   mov -1, %g2
+   sllx%g2, 32, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 1f
+mov63, %g1
+   sllx%o0, 32, %o0
+   mov 31, %g1
+1:
+   mov -1, %g2
+   sllx%g2, 48, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 2f
+mov-1, %g2
+   sllx%o0, 16, %o0
+   add %g1, -16, %g1
+2:
+   mov -1, %g2
+   sllx%g2, 56, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 3f
+mov-1, %g2
+   sllx%o0, 8, %o0
+   add %g1, -8, %g1
+3:
+   sllx%g2, 60, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 4f
+mov-1, %g2
+   sllx%o0, 4, %o0
+   add %g1, -4, %g1
+4:
+   sllx%g2, 62, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 5f
+mov-1, %g3
+   sllx%o0, 2, %o0
+   add %g1, -2, %g1
+5:
+   mov 0, %g2
+   sllx%g3, 63, %g3
+   and %o0, %g3, %o0
+   movre   %o0, 1, %g2
+   sub %g1, %g2, %g1
+   jmp %o7+8
+sra%g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
-- 
1.7.1



[PATCH v4 2/5] sparc64: Define SPARC default __fls function

2017-10-11 Thread Vijay Kumar
__fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/include/asm/bitops_64.h |2 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls64.S |   61 
 3 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 30aea56..d7a46e2 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -23,10 +23,10 @@
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
 int fls(unsigned int word);
+int __fls(unsigned long word);
 
 #include 
 
-#include 
 #include 
 
 #ifdef __KERNEL__
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 3b9f5e0..5380c59 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 000..c83e22a
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+ENTRY(__fls)
+   mov -1, %g2
+   sllx%g2, 32, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 1f
+mov63, %g1
+   sllx%o0, 32, %o0
+   mov 31, %g1
+1:
+   mov -1, %g2
+   sllx%g2, 48, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 2f
+mov-1, %g2
+   sllx%o0, 16, %o0
+   add %g1, -16, %g1
+2:
+   mov -1, %g2
+   sllx%g2, 56, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 3f
+mov-1, %g2
+   sllx%o0, 8, %o0
+   add %g1, -8, %g1
+3:
+   sllx%g2, 60, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 4f
+mov-1, %g2
+   sllx%o0, 4, %o0
+   add %g1, -4, %g1
+4:
+   sllx%g2, 62, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 5f
+mov-1, %g3
+   sllx%o0, 2, %o0
+   add %g1, -2, %g1
+5:
+   mov 0, %g2
+   sllx%g3, 63, %g3
+   and %o0, %g3, %o0
+   movre   %o0, 1, %g2
+   sub %g1, %g2, %g1
+   jmp %o7+8
+sra%g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
-- 
1.7.1



[PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above

2017-10-11 Thread Vijay Kumar
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/kernel/head_64.S |2 ++
 arch/sparc/lib/NG4patch.S   |9 +
 2 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4de9fbd..f362ecb 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -640,6 +640,8 @@ niagara4_patch:
 nop
callniagara4_patch_pageops
 nop
+   callniagara4_patch_fls
+nop
 
ba,a,pt %xcc, 80f
 nop
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..da65a3e 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -3,6 +3,8 @@
  * Copyright (C) 2012 David S. Miller <da...@davemloft.net>
  */
 
+#include 
+
 #define BRANCH_ALWAYS  0x1068
 #define NOP0x0100
 #define NG_DO_PATCH(OLD, NEW)  \
@@ -52,3 +54,10 @@ niagara4_patch_pageops:
retl
 nop
.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+ENDPROC(niagara4_patch_fls)
-- 
1.7.1



[PATCH v4 5/5] sparc64: Use sparc optimized fls and __fls for T4 and above

2017-10-11 Thread Vijay Kumar
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/kernel/head_64.S |2 ++
 arch/sparc/lib/NG4patch.S   |9 +
 2 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4de9fbd..f362ecb 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -640,6 +640,8 @@ niagara4_patch:
 nop
callniagara4_patch_pageops
 nop
+   callniagara4_patch_fls
+nop
 
ba,a,pt %xcc, 80f
 nop
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..da65a3e 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -3,6 +3,8 @@
  * Copyright (C) 2012 David S. Miller 
  */
 
+#include 
+
 #define BRANCH_ALWAYS  0x1068
 #define NOP0x0100
 #define NG_DO_PATCH(OLD, NEW)  \
@@ -52,3 +54,10 @@ niagara4_patch_pageops:
retl
 nop
.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+ENDPROC(niagara4_patch_fls)
-- 
1.7.1



[PATCH v4 3/5] sparc64: SPARC optimized fls function

2017-10-11 Thread Vijay Kumar
Defined SPARC optimized fls using lzcnt opcode.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/lib/Makefile |1 +
 arch/sparc/lib/NG4fls.S |   20 
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5380c59..2823b8e 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
 lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000..bc17b65
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,20 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include 
+
+#define LZCNT_O0_G2\
+   .word   0x85b002e8
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+ENTRY(NG4fls)
+   LZCNT_O0_G2 !lzcnt  %o0, %g2
+   mov 64, %g3
+   retl
+sub%g3, %g2, %o0
+ENDPROC(NG4fls)
-- 
1.7.1



[PATCH v4 3/5] sparc64: SPARC optimized fls function

2017-10-11 Thread Vijay Kumar
Defined SPARC optimized fls using lzcnt opcode.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/lib/Makefile |1 +
 arch/sparc/lib/NG4fls.S |   20 
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5380c59..2823b8e 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
 lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000..bc17b65
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,20 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include 
+
+#define LZCNT_O0_G2\
+   .word   0x85b002e8
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+ENTRY(NG4fls)
+   LZCNT_O0_G2 !lzcnt  %o0, %g2
+   mov 64, %g3
+   retl
+sub%g3, %g2, %o0
+ENDPROC(NG4fls)
-- 
1.7.1



Re: [PATCH v3 3/5] sparc64: SPARC optimised fls function

2017-10-09 Thread Vijay Kumar



On 10/8/2017 11:39 PM, David Miller wrote:

From: Vijay Kumar <vijay.ac.ku...@oracle.com>
Date: Fri,  6 Oct 2017 10:54:51 -0600


+#define LXCNT_O0_G2\
+   .word   0x85b002e8
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+ENTRY(NG4fls)
+   LXCNT_O0_G2 !lzcnt  %o0, %g2


Agreed with others that you should name this LZCNT_* instead of
LXCNT_*.

Yes, That's typo. I will fix it.

Vijay


Re: [PATCH v3 3/5] sparc64: SPARC optimised fls function

2017-10-09 Thread Vijay Kumar



On 10/8/2017 11:39 PM, David Miller wrote:

From: Vijay Kumar 
Date: Fri,  6 Oct 2017 10:54:51 -0600


+#define LXCNT_O0_G2\
+   .word   0x85b002e8
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+ENTRY(NG4fls)
+   LXCNT_O0_G2 !lzcnt  %o0, %g2


Agreed with others that you should name this LZCNT_* instead of
LXCNT_*.

Yes, That's typo. I will fix it.

Vijay


[PATCH v3 5/5] sparc64: Use sparc optimised fls and __fls for T4 and above

2017-10-06 Thread Vijay Kumar
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/kernel/head_64.S |2 ++
 arch/sparc/lib/NG4patch.S   |9 +
 2 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4de9fbd..f362ecb 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -640,6 +640,8 @@ niagara4_patch:
 nop
callniagara4_patch_pageops
 nop
+   callniagara4_patch_fls
+nop
 
ba,a,pt %xcc, 80f
 nop
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..da65a3e 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -3,6 +3,8 @@
  * Copyright (C) 2012 David S. Miller <da...@davemloft.net>
  */
 
+#include 
+
 #define BRANCH_ALWAYS  0x1068
 #define NOP0x0100
 #define NG_DO_PATCH(OLD, NEW)  \
@@ -52,3 +54,10 @@ niagara4_patch_pageops:
retl
 nop
.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+ENDPROC(niagara4_patch_fls)
-- 
1.7.1



[PATCH v3 5/5] sparc64: Use sparc optimised fls and __fls for T4 and above

2017-10-06 Thread Vijay Kumar
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/kernel/head_64.S |2 ++
 arch/sparc/lib/NG4patch.S   |9 +
 2 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 4de9fbd..f362ecb 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -640,6 +640,8 @@ niagara4_patch:
 nop
callniagara4_patch_pageops
 nop
+   callniagara4_patch_fls
+nop
 
ba,a,pt %xcc, 80f
 nop
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..da65a3e 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -3,6 +3,8 @@
  * Copyright (C) 2012 David S. Miller 
  */
 
+#include 
+
 #define BRANCH_ALWAYS  0x1068
 #define NOP0x0100
 #define NG_DO_PATCH(OLD, NEW)  \
@@ -52,3 +54,10 @@ niagara4_patch_pageops:
retl
 nop
.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+ENDPROC(niagara4_patch_fls)
-- 
1.7.1



[PATCH v3 0/5] sparc64: Optimize fls and __fls

2017-10-06 Thread Vijay Kumar
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports 
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.

v2->v3:
 -  Using ENTRY(), ENDPROC() for assembler functions.
 -  Removed BITS_PER_LONG from __fls.
 -  Using generic fls64().
 -  Replaced lzcnt instruction with .word directive.
v1->v2: 
 - Fixed delay slot issue.

Vijay Kumar (2):
  sparc64: Define SPARC default fls and __fls
  sparc64: Use lzcnt instruction for fls and __fls

 arch/sparc/Makefile|1 +
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/kernel/head_64.S|2 +
 arch/sparc/lib/Makefile|4 +
 arch/sparc/lib/NG4fls.S|   31 +
 arch/sparc/lib/NG4patch.S  |9 +++
 arch/sparc/lib/fls.S   |  126 
 7 files changed, 177 insertions(+), 3 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S



[PATCH v3 0/5] sparc64: Optimize fls and __fls

2017-10-06 Thread Vijay Kumar
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, __fls and fls64 functions. For the systems that supports 
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, __fls and fls64 functions.

v2->v3:
 -  Using ENTRY(), ENDPROC() for assembler functions.
 -  Removed BITS_PER_LONG from __fls.
 -  Using generic fls64().
 -  Replaced lzcnt instruction with .word directive.
v1->v2: 
 - Fixed delay slot issue.

Vijay Kumar (2):
  sparc64: Define SPARC default fls and __fls
  sparc64: Use lzcnt instruction for fls and __fls

 arch/sparc/Makefile|1 +
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/kernel/head_64.S|2 +
 arch/sparc/lib/Makefile|4 +
 arch/sparc/lib/NG4fls.S|   31 +
 arch/sparc/lib/NG4patch.S  |9 +++
 arch/sparc/lib/fls.S   |  126 
 7 files changed, 177 insertions(+), 3 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S



[PATCH v3 2/5] sparc64: Define SPARC default __fls function

2017-10-06 Thread Vijay Kumar
__fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |2 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls64.S |   61 
 3 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 30aea56..d7a46e2 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -23,10 +23,10 @@
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
 int fls(unsigned int word);
+int __fls(unsigned long word);
 
 #include 
 
-#include 
 #include 
 
 #ifdef __KERNEL__
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 3b9f5e0..5380c59 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 000..c83e22a
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+ENTRY(__fls)
+   mov -1, %g2
+   sllx%g2, 32, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 1f
+mov63, %g1
+   sllx%o0, 32, %o0
+   mov 31, %g1
+1:
+   mov -1, %g2
+   sllx%g2, 48, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 2f
+mov-1, %g2
+   sllx%o0, 16, %o0
+   add %g1, -16, %g1
+2:
+   mov -1, %g2
+   sllx%g2, 56, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 3f
+mov-1, %g2
+   sllx%o0, 8, %o0
+   add %g1, -8, %g1
+3:
+   sllx%g2, 60, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 4f
+mov-1, %g2
+   sllx%o0, 4, %o0
+   add %g1, -4, %g1
+4:
+   sllx%g2, 62, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 5f
+mov-1, %g3
+   sllx%o0, 2, %o0
+   add %g1, -2, %g1
+5:
+   mov 0, %g2
+   sllx%g3, 63, %g3
+   and %o0, %g3, %o0
+   movre   %o0, 1, %g2
+   sub %g1, %g2, %g1
+   jmp %o7+8
+sra%g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
-- 
1.7.1



[PATCH v3 2/5] sparc64: Define SPARC default __fls function

2017-10-06 Thread Vijay Kumar
__fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/include/asm/bitops_64.h |2 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls64.S |   61 
 3 files changed, 63 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 30aea56..d7a46e2 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -23,10 +23,10 @@
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
 int fls(unsigned int word);
+int __fls(unsigned long word);
 
 #include 
 
-#include 
 #include 
 
 #ifdef __KERNEL__
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 3b9f5e0..5380c59 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -17,6 +17,7 @@ lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 000..c83e22a
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+ENTRY(__fls)
+   mov -1, %g2
+   sllx%g2, 32, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 1f
+mov63, %g1
+   sllx%o0, 32, %o0
+   mov 31, %g1
+1:
+   mov -1, %g2
+   sllx%g2, 48, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 2f
+mov-1, %g2
+   sllx%o0, 16, %o0
+   add %g1, -16, %g1
+2:
+   mov -1, %g2
+   sllx%g2, 56, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 3f
+mov-1, %g2
+   sllx%o0, 8, %o0
+   add %g1, -8, %g1
+3:
+   sllx%g2, 60, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 4f
+mov-1, %g2
+   sllx%o0, 4, %o0
+   add %g1, -4, %g1
+4:
+   sllx%g2, 62, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 5f
+mov-1, %g3
+   sllx%o0, 2, %o0
+   add %g1, -2, %g1
+5:
+   mov 0, %g2
+   sllx%g3, 63, %g3
+   and %o0, %g3, %o0
+   movre   %o0, 1, %g2
+   sub %g1, %g2, %g1
+   jmp %o7+8
+sra%g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
-- 
1.7.1



[PATCH v3 3/5] sparc64: SPARC optimised fls function

2017-10-06 Thread Vijay Kumar
Defined SPARC optimised fls using lzcnt opcode.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/lib/Makefile |1 +
 arch/sparc/lib/NG4fls.S |   20 
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5380c59..2823b8e 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
 lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000..5ed7da9
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,20 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include 
+
+#define LXCNT_O0_G2\
+   .word   0x85b002e8
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+ENTRY(NG4fls)
+   LXCNT_O0_G2 !lzcnt  %o0, %g2
+   mov 64, %g3
+   retl
+sub%g3, %g2, %o0
+ENDPROC(NG4fls)
-- 
1.7.1



[PATCH v3 3/5] sparc64: SPARC optimised fls function

2017-10-06 Thread Vijay Kumar
Defined SPARC optimised fls using lzcnt opcode.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/lib/Makefile |1 +
 arch/sparc/lib/NG4fls.S |   20 
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 5380c59..2823b8e 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -18,6 +18,7 @@ lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
 lib-$(CONFIG_SPARC64) += fls64.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000..5ed7da9
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,20 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include 
+
+#define LXCNT_O0_G2\
+   .word   0x85b002e8
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+ENTRY(NG4fls)
+   LXCNT_O0_G2 !lzcnt  %o0, %g2
+   mov 64, %g3
+   retl
+sub%g3, %g2, %o0
+ENDPROC(NG4fls)
-- 
1.7.1



[PATCH v3 4/5] sparc64: SPARC optimised __fls function

2017-10-06 Thread Vijay Kumar
Defined SPARC optimised __fls using lzcnt opcode.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/lib/NG4fls.S |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
index 5ed7da9..34ad371 100644
--- a/arch/sparc/lib/NG4fls.S
+++ b/arch/sparc/lib/NG4fls.S
@@ -18,3 +18,13 @@ ENTRY(NG4fls)
retl
 sub%g3, %g2, %o0
 ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+   brz,pn  %o0, 1f
+   LXCNT_O0_G2 !lzcnt  %o0, %g2
+   mov 63, %g3
+   sub %g3, %g2, %o0
+1:
+   retl
+nop
+ENDPROC(__NG4fls)
-- 
1.7.1



[PATCH v3 4/5] sparc64: SPARC optimised __fls function

2017-10-06 Thread Vijay Kumar
Defined SPARC optimised __fls using lzcnt opcode.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/lib/NG4fls.S |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
index 5ed7da9..34ad371 100644
--- a/arch/sparc/lib/NG4fls.S
+++ b/arch/sparc/lib/NG4fls.S
@@ -18,3 +18,13 @@ ENTRY(NG4fls)
retl
 sub%g3, %g2, %o0
 ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+   brz,pn  %o0, 1f
+   LXCNT_O0_G2 !lzcnt  %o0, %g2
+   mov 63, %g3
+   sub %g3, %g2, %o0
+1:
+   retl
+nop
+ENDPROC(__NG4fls)
-- 
1.7.1



[PATCH v3 1/5] sparc64: Define SPARC default fls function

2017-10-06 Thread Vijay Kumar
fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |3 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls.S   |   67 
 3 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 2d52240..30aea56 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,9 +22,10 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+int fls(unsigned int word);
+
 #include 
 
-#include 
 #include 
 #include 
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a1a2d39..3b9f5e0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000..06b8d30
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+ENTRY(fls)
+   brz,pn  %o0, 6f
+mov0, %o1
+   sethi   %hi(0x), %g3
+   mov %o0, %g2
+   andcc   %o0, %g3, %g0
+   be,pt   %icc, 8f
+mov32, %o1
+   sethi   %hi(0xff00), %g3
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 3f
+sethi  %hi(0xf000), %g3
+   sll %o0, 8, %o0
+1:
+   add %o1, -8, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+2:
+   sethi   %hi(0xf000), %g3
+3:
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 4f
+sethi  %hi(0xc000), %g3
+   sll %o0, 4, %o0
+   add %o1, -4, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+4:
+   andcc   %g2, %g3, %g0
+   be,a,pt %icc, 7f
+sll%o0, 2, %o0
+5:
+   xnor%g0, %o0, %o0
+   srl %o0, 31, %o0
+   sub %o1, %o0, %o1
+6:
+   jmp %o7 + 8
+sra%o1, 0, %o0
+7:
+   add %o1, -2, %o1
+   ba,pt   %xcc, 5b
+sra%o0, 0, %o0
+8:
+   sll %o0, 16, %o0
+   sethi   %hi(0xff00), %g3
+   sra %o0, 0, %o0
+   mov %o0, %g2
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 2b
+mov16, %o1
+   ba,pt   %xcc, 1b
+sll%o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
-- 
1.7.1



[PATCH v3 1/5] sparc64: Define SPARC default fls function

2017-10-06 Thread Vijay Kumar
fls will now require a boot time patching on T4 and above.
Redefining it under arch/sparc/lib.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/include/asm/bitops_64.h |3 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls.S   |   67 
 3 files changed, 70 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 2d52240..30aea56 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,9 +22,10 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+int fls(unsigned int word);
+
 #include 
 
-#include 
 #include 
 #include 
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index a1a2d39..3b9f5e0 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000..06b8d30
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+ENTRY(fls)
+   brz,pn  %o0, 6f
+mov0, %o1
+   sethi   %hi(0x), %g3
+   mov %o0, %g2
+   andcc   %o0, %g3, %g0
+   be,pt   %icc, 8f
+mov32, %o1
+   sethi   %hi(0xff00), %g3
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 3f
+sethi  %hi(0xf000), %g3
+   sll %o0, 8, %o0
+1:
+   add %o1, -8, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+2:
+   sethi   %hi(0xf000), %g3
+3:
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 4f
+sethi  %hi(0xc000), %g3
+   sll %o0, 4, %o0
+   add %o1, -4, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+4:
+   andcc   %g2, %g3, %g0
+   be,a,pt %icc, 7f
+sll%o0, 2, %o0
+5:
+   xnor%g0, %o0, %o0
+   srl %o0, 31, %o0
+   sub %o1, %o0, %o1
+6:
+   jmp %o7 + 8
+sra%o1, 0, %o0
+7:
+   add %o1, -2, %o1
+   ba,pt   %xcc, 5b
+sra%o0, 0, %o0
+8:
+   sll %o0, 16, %o0
+   sethi   %hi(0xff00), %g3
+   sra %o0, 0, %o0
+   mov %o0, %g2
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 2b
+mov16, %o1
+   ba,pt   %xcc, 1b
+sll%o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
-- 
1.7.1



Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls

2017-09-27 Thread Vijay Kumar

Hi Sam,

On 9/27/2017 2:56 PM, Sam Ravnborg wrote:

.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+   .globl  niagara4_patch_fls
+   .type   niagara4_patch_fls,#function
+niagara4_patch_fls:
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+   .size   niagara4_patch_fls,.-niagara4_patch_fls

Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a 
separate patch.


Thanks for your comment. Sure, I will make the changes and address other 
comments as well in my revised version.


- Vijay


Re: [PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls

2017-09-27 Thread Vijay Kumar

Hi Sam,

On 9/27/2017 2:56 PM, Sam Ravnborg wrote:

.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+   .globl  niagara4_patch_fls
+   .type   niagara4_patch_fls,#function
+niagara4_patch_fls:
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+   .size   niagara4_patch_fls,.-niagara4_patch_fls

Please path the remaining functions in this file with ENTRY(), ENDPROC() - in a 
separate patch.


Thanks for your comment. Sure, I will make the changes and address other 
comments as well in my revised version.


- Vijay


[PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls

2017-09-27 Thread Vijay Kumar
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
Reviewed-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/Makefile |1 +
 arch/sparc/kernel/head_64.S |2 ++
 arch/sparc/lib/Makefile |3 +++
 arch/sparc/lib/NG4fls.S |   30 ++
 arch/sparc/lib/NG4patch.S   |9 +
 5 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 8496a07..0763cd8 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 
-Wno-sign-compare
 KBUILD_CFLAGS += -Wa,--undeclared-regs
 KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
 KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
+KBUILD_AFLAGS += -Wa,-Asparc4
 
 ifeq ($(CONFIG_MCOUNT),y)
   KBUILD_CFLAGS += -pg
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 78e0211..1165254 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -628,6 +628,8 @@ niagara4_patch:
 nop
callniagara4_patch_pageops
 nop
+   callniagara4_patch_fls
+nop
 
ba,a,pt %xcc, 80f
 nop
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index eefbb9c..72d2d8c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o 
ffs.o
 obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
+
+obj-$(CONFIG_SPARC64) += fls.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000..eb239aa
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,30 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+   .text
+   .align 32
+
+   .globl NG4fls
+   .globl __NG4fls
+   .type  NG4fls, #function
+   .type  __NG4fls, #function
+
+NG4fls:
+   lzcnt   %o0, %o1
+   mov 64, %o2
+   retl
+sub %o2, %o1, %o0
+   .size   NG4fls, .-NG4fls
+
+__NG4fls:
+   brz,pn  %o0, 1f
+mov%o0, %o1
+   lzcnt   %o1, %o0
+   mov 63, %o2
+   sub %o2, %o0, %o0
+1:
+   retl
+nop
+   .size   __NG4fls, .-__NG4fls
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..1010d53 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -52,3 +52,12 @@ niagara4_patch_pageops:
retl
 nop
.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+   .globl  niagara4_patch_fls
+   .type   niagara4_patch_fls,#function
+niagara4_patch_fls:
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+   .size   niagara4_patch_fls,.-niagara4_patch_fls
-- 
1.7.1



[PATCH v2 2/2] sparc64: Use lzcnt instruction for fls and __fls

2017-09-27 Thread Vijay Kumar
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar 
Reviewed-by: Babu Moger 
---
 arch/sparc/Makefile |1 +
 arch/sparc/kernel/head_64.S |2 ++
 arch/sparc/lib/Makefile |3 +++
 arch/sparc/lib/NG4fls.S |   30 ++
 arch/sparc/lib/NG4patch.S   |9 +
 5 files changed, 45 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 8496a07..0763cd8 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 
-Wno-sign-compare
 KBUILD_CFLAGS += -Wa,--undeclared-regs
 KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
 KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
+KBUILD_AFLAGS += -Wa,-Asparc4
 
 ifeq ($(CONFIG_MCOUNT),y)
   KBUILD_CFLAGS += -pg
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 78e0211..1165254 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -628,6 +628,8 @@ niagara4_patch:
 nop
callniagara4_patch_pageops
 nop
+   callniagara4_patch_fls
+nop
 
ba,a,pt %xcc, 80f
 nop
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index eefbb9c..72d2d8c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o 
ffs.o
 obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
+
+obj-$(CONFIG_SPARC64) += fls.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000..eb239aa
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,30 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+   .text
+   .align 32
+
+   .globl NG4fls
+   .globl __NG4fls
+   .type  NG4fls, #function
+   .type  __NG4fls, #function
+
+NG4fls:
+   lzcnt   %o0, %o1
+   mov 64, %o2
+   retl
+sub %o2, %o1, %o0
+   .size   NG4fls, .-NG4fls
+
+__NG4fls:
+   brz,pn  %o0, 1f
+mov%o0, %o1
+   lzcnt   %o1, %o0
+   mov 63, %o2
+   sub %o2, %o0, %o0
+1:
+   retl
+nop
+   .size   __NG4fls, .-__NG4fls
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..1010d53 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -52,3 +52,12 @@ niagara4_patch_pageops:
retl
 nop
.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+   .globl  niagara4_patch_fls
+   .type   niagara4_patch_fls,#function
+niagara4_patch_fls:
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+   .size   niagara4_patch_fls,.-niagara4_patch_fls
-- 
1.7.1



[PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls

2017-09-27 Thread Vijay Kumar
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, fls64 and __fls functions. For the systems that supports 
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, fls64 and __fls functions.

v1->v2: 
 - Fixed delay slot issue pointed by Rob Gardner in patch 2/2.

Vijay Kumar (2):
  sparc64: Define SPARC default fls and __fls
  sparc64: Use lzcnt instruction for fls and __fls

 arch/sparc/Makefile|1 +
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/kernel/head_64.S|2 +
 arch/sparc/lib/Makefile|4 +
 arch/sparc/lib/NG4fls.S|   31 +
 arch/sparc/lib/NG4patch.S  |9 +++
 arch/sparc/lib/fls.S   |  126 
 7 files changed, 177 insertions(+), 3 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S



[PATCH v2 0/2] sparc64: Optimize fls, fls64 and __fls

2017-09-27 Thread Vijay Kumar
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, fls64 and __fls functions. For the systems that supports 
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, fls64 and __fls functions.

v1->v2: 
 - Fixed delay slot issue pointed by Rob Gardner in patch 2/2.

Vijay Kumar (2):
  sparc64: Define SPARC default fls and __fls
  sparc64: Use lzcnt instruction for fls and __fls

 arch/sparc/Makefile|1 +
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/kernel/head_64.S|2 +
 arch/sparc/lib/Makefile|4 +
 arch/sparc/lib/NG4fls.S|   31 +
 arch/sparc/lib/NG4patch.S  |9 +++
 arch/sparc/lib/fls.S   |  126 
 7 files changed, 177 insertions(+), 3 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S



[PATCH v2 1/2] sparc64: Define SPARC default fls and __fls

2017-09-27 Thread Vijay Kumar
fls and __fls will now require boot time patching on T4
and above. Redefining these functions under arc/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
Reviewed-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls.S   |  126 
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 2d52240..946c236 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,11 +22,12 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+#define fls64(word)  (((word)?(__fls(word) + 1):0))
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
 #include 
 
-#include 
-#include 
-#include 
 
 #ifdef __KERNEL__
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 07c03e7..eefbb9c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000..a19bff2
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,126 @@
+/* fls.S: SPARC default fls and __fls definitions.
+ *
+ * SPARC default fls and __fls definitions, which follows the same
+ * algorithm as in generic fls() and __fls(). These functions will
+ * be boot time patched on T4 and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .align  32
+
+   .global fls, __fls
+   .type   fls,#function
+   .type   __fls,  #function
+
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+EXPORT_SYMBOL(__fls)
+EXPORT_SYMBOL(fls)
+
+fls:
+   brz,pn  %o0, 6f
+mov0, %o1
+   sethi   %hi(0x), %g3
+   mov %o0, %g2
+   andcc   %o0, %g3, %g0
+   be,pt   %icc, 8f
+mov32, %o1
+   sethi   %hi(0xff00), %g3
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 3f
+sethi  %hi(0xf000), %g3
+   sll %o0, 8, %o0
+1:
+   add %o1, -8, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+2:
+   sethi   %hi(0xf000), %g3
+3:
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 4f
+sethi  %hi(0xc000), %g3
+   sll %o0, 4, %o0
+   add %o1, -4, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+4:
+   andcc   %g2, %g3, %g0
+   be,a,pt %icc, 7f
+sll%o0, 2, %o0
+5:
+   xnor%g0, %o0, %o0
+   srl %o0, 31, %o0
+   sub %o1, %o0, %o1
+6:
+   jmp %o7 + 8
+sra%o1, 0, %o0
+7:
+   add %o1, -2, %o1
+   ba,pt   %xcc, 5b
+sra%o0, 0, %o0
+8:
+   sll %o0, 16, %o0
+   sethi   %hi(0xff00), %g3
+   sra %o0, 0, %o0
+   mov %o0, %g2
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 2b
+mov16, %o1
+   ba,pt   %xcc, 1b
+sll%o0, 8, %o0
+   .size   fls, .-fls
+
+__fls:
+#if BITS_PER_LONG == 64
+   mov -1, %g2
+   sllx%g2, 32, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 1f
+mov63, %g1
+   sllx%o0, 32, %o0
+#endif
+   mov 31, %g1
+1:
+   mov -1, %g2
+   sllx%g2, (BITS_PER_LONG-16), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 2f
+mov-1, %g2
+   sllx%o0, 16, %o0
+   add %g1, -16, %g1
+2:
+   mov -1, %g2
+   sllx%g2, (BITS_PER_LONG-8), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 3f
+mov-1, %g2
+   sllx%o0, 8, %o0
+   add %g1, -8, %g1
+3:
+   sllx%g2, (BITS_PER_LONG-4), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 4f
+mov-1, %g2
+   sllx%o0, 4, %o0
+   add %g1, -4, %g1
+4:
+   sllx%g2, (BITS_PER_LONG-2), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 5f
+mov-1, %g3
+   sllx%o0, 2, %o0
+   add %g1, -2, %g1
+5:
+   mov 0, %g2
+   sllx%g3, (BITS_PER_LONG-1), %g3
+   and %o0, %g3, %o0
+   movre   %o0, 1, %g2
+   sub %g1, %g2, %g1
+   jmp %o7+8
+sra%g1, 0, %o0
+   .size   __fls, .-__fls
-- 
1.7.1



[PATCH v2 1/2] sparc64: Define SPARC default fls and __fls

2017-09-27 Thread Vijay Kumar
fls and __fls will now require boot time patching on T4
and above. Redefining these functions under arc/sparc/lib.

Signed-off-by: Vijay Kumar 
Reviewed-by: Babu Moger 
---
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls.S   |  126 
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 2d52240..946c236 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,11 +22,12 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+#define fls64(word)  (((word)?(__fls(word) + 1):0))
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
 #include 
 
-#include 
-#include 
-#include 
 
 #ifdef __KERNEL__
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 07c03e7..eefbb9c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000..a19bff2
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,126 @@
+/* fls.S: SPARC default fls and __fls definitions.
+ *
+ * SPARC default fls and __fls definitions, which follows the same
+ * algorithm as in generic fls() and __fls(). These functions will
+ * be boot time patched on T4 and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .align  32
+
+   .global fls, __fls
+   .type   fls,#function
+   .type   __fls,  #function
+
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+EXPORT_SYMBOL(__fls)
+EXPORT_SYMBOL(fls)
+
+fls:
+   brz,pn  %o0, 6f
+mov0, %o1
+   sethi   %hi(0x), %g3
+   mov %o0, %g2
+   andcc   %o0, %g3, %g0
+   be,pt   %icc, 8f
+mov32, %o1
+   sethi   %hi(0xff00), %g3
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 3f
+sethi  %hi(0xf000), %g3
+   sll %o0, 8, %o0
+1:
+   add %o1, -8, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+2:
+   sethi   %hi(0xf000), %g3
+3:
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 4f
+sethi  %hi(0xc000), %g3
+   sll %o0, 4, %o0
+   add %o1, -4, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+4:
+   andcc   %g2, %g3, %g0
+   be,a,pt %icc, 7f
+sll%o0, 2, %o0
+5:
+   xnor%g0, %o0, %o0
+   srl %o0, 31, %o0
+   sub %o1, %o0, %o1
+6:
+   jmp %o7 + 8
+sra%o1, 0, %o0
+7:
+   add %o1, -2, %o1
+   ba,pt   %xcc, 5b
+sra%o0, 0, %o0
+8:
+   sll %o0, 16, %o0
+   sethi   %hi(0xff00), %g3
+   sra %o0, 0, %o0
+   mov %o0, %g2
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 2b
+mov16, %o1
+   ba,pt   %xcc, 1b
+sll%o0, 8, %o0
+   .size   fls, .-fls
+
+__fls:
+#if BITS_PER_LONG == 64
+   mov -1, %g2
+   sllx%g2, 32, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 1f
+mov63, %g1
+   sllx%o0, 32, %o0
+#endif
+   mov 31, %g1
+1:
+   mov -1, %g2
+   sllx%g2, (BITS_PER_LONG-16), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 2f
+mov-1, %g2
+   sllx%o0, 16, %o0
+   add %g1, -16, %g1
+2:
+   mov -1, %g2
+   sllx%g2, (BITS_PER_LONG-8), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 3f
+mov-1, %g2
+   sllx%o0, 8, %o0
+   add %g1, -8, %g1
+3:
+   sllx%g2, (BITS_PER_LONG-4), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 4f
+mov-1, %g2
+   sllx%o0, 4, %o0
+   add %g1, -4, %g1
+4:
+   sllx%g2, (BITS_PER_LONG-2), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 5f
+mov-1, %g3
+   sllx%o0, 2, %o0
+   add %g1, -2, %g1
+5:
+   mov 0, %g2
+   sllx%g3, (BITS_PER_LONG-1), %g3
+   and %o0, %g3, %o0
+   movre   %o0, 1, %g2
+   sub %g1, %g2, %g1
+   jmp %o7+8
+sra%g1, 0, %o0
+   .size   __fls, .-__fls
-- 
1.7.1



[PATCH 1/2] sparc64: Define SPARC default fls and __fls

2017-09-27 Thread Vijay Kumar
fls and __fls will now require boot time patching on T4
and above. Redefining these functions under arc/sparc/lib.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
Reviewed-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls.S   |  126 
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 2d52240..946c236 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,11 +22,12 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+#define fls64(word)  (((word)?(__fls(word) + 1):0))
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
 #include 
 
-#include 
-#include 
-#include 
 
 #ifdef __KERNEL__
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 07c03e7..eefbb9c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000..a19bff2
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,126 @@
+/* fls.S: SPARC default fls and __fls definitions.
+ *
+ * SPARC default fls and __fls definitions, which follows the same
+ * algorithm as in generic fls() and __fls(). These functions will
+ * be boot time patched on T4 and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .align  32
+
+   .global fls, __fls
+   .type   fls,#function
+   .type   __fls,  #function
+
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+EXPORT_SYMBOL(__fls)
+EXPORT_SYMBOL(fls)
+
+fls:
+   brz,pn  %o0, 6f
+mov0, %o1
+   sethi   %hi(0x), %g3
+   mov %o0, %g2
+   andcc   %o0, %g3, %g0
+   be,pt   %icc, 8f
+mov32, %o1
+   sethi   %hi(0xff00), %g3
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 3f
+sethi  %hi(0xf000), %g3
+   sll %o0, 8, %o0
+1:
+   add %o1, -8, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+2:
+   sethi   %hi(0xf000), %g3
+3:
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 4f
+sethi  %hi(0xc000), %g3
+   sll %o0, 4, %o0
+   add %o1, -4, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+4:
+   andcc   %g2, %g3, %g0
+   be,a,pt %icc, 7f
+sll%o0, 2, %o0
+5:
+   xnor%g0, %o0, %o0
+   srl %o0, 31, %o0
+   sub %o1, %o0, %o1
+6:
+   jmp %o7 + 8
+sra%o1, 0, %o0
+7:
+   add %o1, -2, %o1
+   ba,pt   %xcc, 5b
+sra%o0, 0, %o0
+8:
+   sll %o0, 16, %o0
+   sethi   %hi(0xff00), %g3
+   sra %o0, 0, %o0
+   mov %o0, %g2
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 2b
+mov16, %o1
+   ba,pt   %xcc, 1b
+sll%o0, 8, %o0
+   .size   fls, .-fls
+
+__fls:
+#if BITS_PER_LONG == 64
+   mov -1, %g2
+   sllx%g2, 32, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 1f
+mov63, %g1
+   sllx%o0, 32, %o0
+#endif
+   mov 31, %g1
+1:
+   mov -1, %g2
+   sllx%g2, (BITS_PER_LONG-16), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 2f
+mov-1, %g2
+   sllx%o0, 16, %o0
+   add %g1, -16, %g1
+2:
+   mov -1, %g2
+   sllx%g2, (BITS_PER_LONG-8), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 3f
+mov-1, %g2
+   sllx%o0, 8, %o0
+   add %g1, -8, %g1
+3:
+   sllx%g2, (BITS_PER_LONG-4), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 4f
+mov-1, %g2
+   sllx%o0, 4, %o0
+   add %g1, -4, %g1
+4:
+   sllx%g2, (BITS_PER_LONG-2), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 5f
+mov-1, %g3
+   sllx%o0, 2, %o0
+   add %g1, -2, %g1
+5:
+   mov 0, %g2
+   sllx%g3, (BITS_PER_LONG-1), %g3
+   and %o0, %g3, %o0
+   movre   %o0, 1, %g2
+   sub %g1, %g2, %g1
+   jmp %o7+8
+sra%g1, 0, %o0
+   .size   __fls, .-__fls
-- 
1.7.1



[PATCH 1/2] sparc64: Define SPARC default fls and __fls

2017-09-27 Thread Vijay Kumar
fls and __fls will now require boot time patching on T4
and above. Redefining these functions under arc/sparc/lib.

Signed-off-by: Vijay Kumar 
Reviewed-by: Babu Moger 
---
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/lib/Makefile|1 +
 arch/sparc/lib/fls.S   |  126 
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/arch/sparc/include/asm/bitops_64.h 
b/arch/sparc/include/asm/bitops_64.h
index 2d52240..946c236 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -22,11 +22,12 @@
 void clear_bit(unsigned long nr, volatile unsigned long *addr);
 void change_bit(unsigned long nr, volatile unsigned long *addr);
 
+#define fls64(word)  (((word)?(__fls(word) + 1):0))
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
 #include 
 
-#include 
-#include 
-#include 
 
 #ifdef __KERNEL__
 
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 07c03e7..eefbb9c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -16,6 +16,7 @@ lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
 lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 000..a19bff2
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,126 @@
+/* fls.S: SPARC default fls and __fls definitions.
+ *
+ * SPARC default fls and __fls definitions, which follows the same
+ * algorithm as in generic fls() and __fls(). These functions will
+ * be boot time patched on T4 and onward.
+ */
+
+#include 
+#include 
+
+   .text
+   .align  32
+
+   .global fls, __fls
+   .type   fls,#function
+   .type   __fls,  #function
+
+   .register   %g2, #scratch
+   .register   %g3, #scratch
+
+EXPORT_SYMBOL(__fls)
+EXPORT_SYMBOL(fls)
+
+fls:
+   brz,pn  %o0, 6f
+mov0, %o1
+   sethi   %hi(0x), %g3
+   mov %o0, %g2
+   andcc   %o0, %g3, %g0
+   be,pt   %icc, 8f
+mov32, %o1
+   sethi   %hi(0xff00), %g3
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 3f
+sethi  %hi(0xf000), %g3
+   sll %o0, 8, %o0
+1:
+   add %o1, -8, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+2:
+   sethi   %hi(0xf000), %g3
+3:
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 4f
+sethi  %hi(0xc000), %g3
+   sll %o0, 4, %o0
+   add %o1, -4, %o1
+   sra %o0, 0, %o0
+   mov %o0, %g2
+4:
+   andcc   %g2, %g3, %g0
+   be,a,pt %icc, 7f
+sll%o0, 2, %o0
+5:
+   xnor%g0, %o0, %o0
+   srl %o0, 31, %o0
+   sub %o1, %o0, %o1
+6:
+   jmp %o7 + 8
+sra%o1, 0, %o0
+7:
+   add %o1, -2, %o1
+   ba,pt   %xcc, 5b
+sra%o0, 0, %o0
+8:
+   sll %o0, 16, %o0
+   sethi   %hi(0xff00), %g3
+   sra %o0, 0, %o0
+   mov %o0, %g2
+   andcc   %g2, %g3, %g0
+   bne,pt  %icc, 2b
+mov16, %o1
+   ba,pt   %xcc, 1b
+sll%o0, 8, %o0
+   .size   fls, .-fls
+
+__fls:
+#if BITS_PER_LONG == 64
+   mov -1, %g2
+   sllx%g2, 32, %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 1f
+mov63, %g1
+   sllx%o0, 32, %o0
+#endif
+   mov 31, %g1
+1:
+   mov -1, %g2
+   sllx%g2, (BITS_PER_LONG-16), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 2f
+mov-1, %g2
+   sllx%o0, 16, %o0
+   add %g1, -16, %g1
+2:
+   mov -1, %g2
+   sllx%g2, (BITS_PER_LONG-8), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 3f
+mov-1, %g2
+   sllx%o0, 8, %o0
+   add %g1, -8, %g1
+3:
+   sllx%g2, (BITS_PER_LONG-4), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 4f
+mov-1, %g2
+   sllx%o0, 4, %o0
+   add %g1, -4, %g1
+4:
+   sllx%g2, (BITS_PER_LONG-2), %g2
+   and %o0, %g2, %g2
+   brnz,pt %g2, 5f
+mov-1, %g3
+   sllx%o0, 2, %o0
+   add %g1, -2, %g1
+5:
+   mov 0, %g2
+   sllx%g3, (BITS_PER_LONG-1), %g3
+   and %o0, %g3, %o0
+   movre   %o0, 1, %g2
+   sub %g1, %g2, %g1
+   jmp %o7+8
+sra%g1, 0, %o0
+   .size   __fls, .-__fls
-- 
1.7.1



[PATCH 2/2] sparc64: Use lzcnt instruction for fls and __fls

2017-09-27 Thread Vijay Kumar
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
Reviewed-by: Babu Moger <babu.mo...@oracle.com>
---
 arch/sparc/Makefile |1 +
 arch/sparc/kernel/head_64.S |2 ++
 arch/sparc/lib/Makefile |3 +++
 arch/sparc/lib/NG4fls.S |   31 +++
 arch/sparc/lib/NG4patch.S   |9 +
 5 files changed, 46 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 8496a07..0763cd8 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 
-Wno-sign-compare
 KBUILD_CFLAGS += -Wa,--undeclared-regs
 KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
 KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
+KBUILD_AFLAGS += -Wa,-Asparc4
 
 ifeq ($(CONFIG_MCOUNT),y)
   KBUILD_CFLAGS += -pg
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 78e0211..1165254 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -628,6 +628,8 @@ niagara4_patch:
 nop
callniagara4_patch_pageops
 nop
+   callniagara4_patch_fls
+nop
 
ba,a,pt %xcc, 80f
 nop
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index eefbb9c..72d2d8c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o 
ffs.o
 obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
+
+obj-$(CONFIG_SPARC64) += fls.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000..7c2cfb3
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,31 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+   .text
+   .align 32
+
+   .globl NG4fls
+   .globl __NG4fls
+   .type  NG4fls, #function
+   .type  __NG4fls, #function
+
+NG4fls:
+   lzcnt   %o0, %o1
+   mov 64, %o2
+   sub %o2, %o1, %o0
+   retl
+   .size   NG4fls, .-NG4fls
+
+__NG4fls:
+   brz,pn  %o0, 1f
+mov%o0, %o1
+   lzcnt   %o1, %o0
+   mov 63, %o2
+   sub %o2, %o0, %o0
+1:
+   retl
+   nop
+   nop
+   .size   __NG4fls, .-__NG4fls
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..1010d53 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -52,3 +52,12 @@ niagara4_patch_pageops:
retl
 nop
.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+   .globl  niagara4_patch_fls
+   .type   niagara4_patch_fls,#function
+niagara4_patch_fls:
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+   .size   niagara4_patch_fls,.-niagara4_patch_fls
-- 
1.7.1



[PATCH 2/2] sparc64: Use lzcnt instruction for fls and __fls

2017-09-27 Thread Vijay Kumar
For T4 and above, patch fls and __fls functions
at the boot time to use lzcnt instruction.

Signed-off-by: Vijay Kumar 
Reviewed-by: Babu Moger 
---
 arch/sparc/Makefile |1 +
 arch/sparc/kernel/head_64.S |2 ++
 arch/sparc/lib/Makefile |3 +++
 arch/sparc/lib/NG4fls.S |   31 +++
 arch/sparc/lib/NG4patch.S   |9 +
 5 files changed, 46 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 8496a07..0763cd8 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -48,6 +48,7 @@ KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 
-Wno-sign-compare
 KBUILD_CFLAGS += -Wa,--undeclared-regs
 KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
 KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
+KBUILD_AFLAGS += -Wa,-Asparc4
 
 ifeq ($(CONFIG_MCOUNT),y)
   KBUILD_CFLAGS += -pg
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 78e0211..1165254 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -628,6 +628,8 @@ niagara4_patch:
 nop
callniagara4_patch_pageops
 nop
+   callniagara4_patch_fls
+nop
 
ba,a,pt %xcc, 80f
 nop
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index eefbb9c..72d2d8c 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -46,3 +46,6 @@ lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o 
ffs.o
 obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
+
+obj-$(CONFIG_SPARC64) += fls.o
+obj-$(CONFIG_SPARC64) += NG4fls.o
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 000..7c2cfb3
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,31 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+   .text
+   .align 32
+
+   .globl NG4fls
+   .globl __NG4fls
+   .type  NG4fls, #function
+   .type  __NG4fls, #function
+
+NG4fls:
+   lzcnt   %o0, %o1
+   mov 64, %o2
+   sub %o2, %o1, %o0
+   retl
+   .size   NG4fls, .-NG4fls
+
+__NG4fls:
+   brz,pn  %o0, 1f
+mov%o0, %o1
+   lzcnt   %o1, %o0
+   mov 63, %o2
+   sub %o2, %o0, %o0
+1:
+   retl
+   nop
+   nop
+   .size   __NG4fls, .-__NG4fls
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
index 3cc0f8c..1010d53 100644
--- a/arch/sparc/lib/NG4patch.S
+++ b/arch/sparc/lib/NG4patch.S
@@ -52,3 +52,12 @@ niagara4_patch_pageops:
retl
 nop
.size   niagara4_patch_pageops,.-niagara4_patch_pageops
+
+   .globl  niagara4_patch_fls
+   .type   niagara4_patch_fls,#function
+niagara4_patch_fls:
+   NG_DO_PATCH(fls, NG4fls)
+   NG_DO_PATCH(__fls, __NG4fls)
+   retl
+nop
+   .size   niagara4_patch_fls,.-niagara4_patch_fls
-- 
1.7.1



[PATCH 0/2] sparc64: Optimize fls, fls64 and __fls

2017-09-27 Thread Vijay Kumar
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, fls64 and __fls functions. For the systems that supports 
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, fls64 and __fls functions.
  
Vijay Kumar (2):
  sparc64: Define SPARC default fls and __fls
  sparc64: Use lzcnt instruction for fls and __fls

 arch/sparc/Makefile|1 +
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/kernel/head_64.S|2 +
 arch/sparc/lib/Makefile|4 +
 arch/sparc/lib/NG4fls.S|   31 +
 arch/sparc/lib/NG4patch.S  |9 +++
 arch/sparc/lib/fls.S   |  126 
 7 files changed, 177 insertions(+), 3 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S



[PATCH 0/2] sparc64: Optimize fls, fls64 and __fls

2017-09-27 Thread Vijay Kumar
SPARC provides lzcnt instruction (with VIS3) which can be used to
optimize fls, fls64 and __fls functions. For the systems that supports 
lzcnt instruction, we now do boot time patching to use sparc
optimized fls, fls64 and __fls functions.
  
Vijay Kumar (2):
  sparc64: Define SPARC default fls and __fls
  sparc64: Use lzcnt instruction for fls and __fls

 arch/sparc/Makefile|1 +
 arch/sparc/include/asm/bitops_64.h |7 +-
 arch/sparc/kernel/head_64.S|2 +
 arch/sparc/lib/Makefile|4 +
 arch/sparc/lib/NG4fls.S|   31 +
 arch/sparc/lib/NG4patch.S  |9 +++
 arch/sparc/lib/fls.S   |  126 
 7 files changed, 177 insertions(+), 3 deletions(-)
 create mode 100644 arch/sparc/lib/NG4fls.S
 create mode 100644 arch/sparc/lib/fls.S



Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu

2017-07-20 Thread Vijay Kumar


On 7/20/2017 10:45 PM, David Miller wrote:

From: Vijay Kumar <vijay.ac.ku...@oracle.com>
Date: Thu, 20 Jul 2017 22:36:42 -0500


I can give a try :). But looks to me one thing that will go wrong is
irq accounting done in __irq_enter() and rcu_irq_enter().

Actually, the bigger problem is that scheduler_ipi() can raise a
software interrupt, and nothing will invoke it.

Yes, I see your point.


It's turning quite ugly to avoid the IRQ overhead, I must admit.
So ignore this for now.

In the longer term a probably cleaner way to do this is to have
a special direct version of scheduler_ipi() that invokes all the
necessary work, even the rebalance softirq, directly rather than
indirectly.


Sure. Thanks.



Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu

2017-07-20 Thread Vijay Kumar


On 7/20/2017 10:45 PM, David Miller wrote:

From: Vijay Kumar 
Date: Thu, 20 Jul 2017 22:36:42 -0500


I can give a try :). But looks to me one thing that will go wrong is
irq accounting done in __irq_enter() and rcu_irq_enter().

Actually, the bigger problem is that scheduler_ipi() can raise a
software interrupt, and nothing will invoke it.

Yes, I see your point.


It's turning quite ugly to avoid the IRQ overhead, I must admit.
So ignore this for now.

In the longer term a probably cleaner way to do this is to have
a special direct version of scheduler_ipi() that invokes all the
necessary work, even the rebalance softirq, directly rather than
indirectly.


Sure. Thanks.



Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu

2017-07-20 Thread Vijay Kumar



On 7/20/2017 9:55 PM, David Miller wrote:

From: Vijay Kumar <vijay.ac.ku...@oracle.com>
Date: Thu, 20 Jul 2017 21:44:24 -0500


I had same thoughts initially but I had to go with this approach as
scheduler_ipi is wrapped with irq_enter() and irq_exit(). Whereas POKE
resumes the cpu in process context.

Comments in scheduler_ipi():

  * Not all reschedule IPI handlers call irq_enter/irq_exit, since
  * traditionally all their work was done from the interrupt return
  * path. Now that we actually do some work, we need to make sure
  * we do call them.
  *
  * Some archs already do call them, luckily irq_enter/exit nest
  * properly.
  *
  * Arguably we should visit all archs and update all handlers,
  * however a fair share of IPIs are still resched only so this would
  * somewhat pessimize the simple resched case.
  */
 irq_enter();


I still think we should be able to fake the state such that this
direct schedule_ipi() call will work.

I could be wrong :)
I can give a try :). But looks to me one thing that will go wrong is irq 
accounting done in __irq_enter() and rcu_irq_enter().


Thanks,
Vijay



Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu

2017-07-20 Thread Vijay Kumar



On 7/20/2017 9:55 PM, David Miller wrote:

From: Vijay Kumar 
Date: Thu, 20 Jul 2017 21:44:24 -0500


I had same thoughts initially but I had to go with this approach as
scheduler_ipi is wrapped with irq_enter() and irq_exit(). Whereas POKE
resumes the cpu in process context.

Comments in scheduler_ipi():

  * Not all reschedule IPI handlers call irq_enter/irq_exit, since
  * traditionally all their work was done from the interrupt return
  * path. Now that we actually do some work, we need to make sure
  * we do call them.
  *
  * Some archs already do call them, luckily irq_enter/exit nest
  * properly.
  *
  * Arguably we should visit all archs and update all handlers,
  * however a fair share of IPIs are still resched only so this would
  * somewhat pessimize the simple resched case.
  */
 irq_enter();


I still think we should be able to fake the state such that this
direct schedule_ipi() call will work.

I could be wrong :)
I can give a try :). But looks to me one thing that will go wrong is irq 
accounting done in __irq_enter() and rcu_irq_enter().


Thanks,
Vijay



Re: [PATCH 2/2] sparc64: Use cpu_poke to resume idle cpu

2017-07-20 Thread Vijay Kumar



On 7/20/2017 2:58 PM, David Miller wrote:

From: Vijay Kumar <vijay.ac.ku...@oracle.com>
Date: Sat,  8 Jul 2017 14:23:44 -0600


diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 2677312..0b070d5 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -189,7 +189,7 @@ void __init sun4v_hvapi_init(void)
  
  	group = HV_GRP_CORE;

major = 1;
-   minor = 1;
+   minor = 6; /* CPU POKE */
if (sun4v_hvapi_register(group, major, ))
goto bad;

That CPU POKE comment will not stand the test of time, please remove it.


+   /* Use cpu poke to resume idle cpu if supported*/

Please put a space at the end of the comment and before the "*/"


+   /*cpu poke is registered. */

Please put a space at the beginning of the comment.

And you should decide which way you want to consistently write.
Either capitalize the first word and finish the sentence with
a '.', or don't.  Do it the same way each time.

Thanks.

Sure, I will fix these in v2.

Thanks,
-Vijay



Re: [PATCH 2/2] sparc64: Use cpu_poke to resume idle cpu

2017-07-20 Thread Vijay Kumar



On 7/20/2017 2:58 PM, David Miller wrote:

From: Vijay Kumar 
Date: Sat,  8 Jul 2017 14:23:44 -0600


diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 2677312..0b070d5 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -189,7 +189,7 @@ void __init sun4v_hvapi_init(void)
  
  	group = HV_GRP_CORE;

major = 1;
-   minor = 1;
+   minor = 6; /* CPU POKE */
if (sun4v_hvapi_register(group, major, ))
goto bad;

That CPU POKE comment will not stand the test of time, please remove it.


+   /* Use cpu poke to resume idle cpu if supported*/

Please put a space at the end of the comment and before the "*/"


+   /*cpu poke is registered. */

Please put a space at the beginning of the comment.

And you should decide which way you want to consistently write.
Either capitalize the first word and finish the sentence with
a '.', or don't.  Do it the same way each time.

Thanks.

Sure, I will fix these in v2.

Thanks,
-Vijay



Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu

2017-07-20 Thread Vijay Kumar



On 7/20/2017 2:57 PM, David Miller wrote:

From: Vijay Kumar <vijay.ac.ku...@oracle.com>
Date: Sat,  8 Jul 2017 14:23:42 -0600


cpu_poke is a low latency path to resume the target cpu if suspended
using cpu_yield. Use cpu poke to resume cpu if supported by hypervisor.

 hackbench results (lower is better):
Number of   
Process:w/o fix with fix
1   0.0120.010
10  0.0210.019
100 0.1510.148

So this only works for a cpu which has yielded.

The kernel sends reschedule events to both idle and non-idle cpus.
That's why you have to have that fallback code to still send the
mondo IPI right?

That is correct.



For the case where POKE works, it seems like completely unnecessary
overhead to set the PIL interrupt.  Just disable local cpu interrupts
and call schedule_ipi() directly.

I bet that improves your benchmark even more.


I had same thoughts initially but I had to go with this approach as 
scheduler_ipi is wrapped with irq_enter() and irq_exit(). Whereas POKE 
resumes the cpu in process context.


Comments in scheduler_ipi():

 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
 * traditionally all their work was done from the interrupt return
 * path. Now that we actually do some work, we need to make sure
 * we do call them.
 *
 * Some archs already do call them, luckily irq_enter/exit nest
 * properly.
 *
 * Arguably we should visit all archs and update all handlers,
 * however a fair share of IPIs are still resched only so this 
would

 * somewhat pessimize the simple resched case.
 */
irq_enter();


-Vijay



Re: [PATCH 0/2] sparc64: Use low latency path to resume idle cpu

2017-07-20 Thread Vijay Kumar



On 7/20/2017 2:57 PM, David Miller wrote:

From: Vijay Kumar 
Date: Sat,  8 Jul 2017 14:23:42 -0600


cpu_poke is a low latency path to resume the target cpu if suspended
using cpu_yield. Use cpu poke to resume cpu if supported by hypervisor.

 hackbench results (lower is better):
Number of   
Process:w/o fix with fix
1   0.0120.010
10  0.0210.019
100 0.1510.148

So this only works for a cpu which has yielded.

The kernel sends reschedule events to both idle and non-idle cpus.
That's why you have to have that fallback code to still send the
mondo IPI right?

That is correct.



For the case where POKE works, it seems like completely unnecessary
overhead to set the PIL interrupt.  Just disable local cpu interrupts
and call schedule_ipi() directly.

I bet that improves your benchmark even more.


I had same thoughts initially but I had to go with this approach as 
scheduler_ipi is wrapped with irq_enter() and irq_exit(). Whereas POKE 
resumes the cpu in process context.


Comments in scheduler_ipi():

 * Not all reschedule IPI handlers call irq_enter/irq_exit, since
 * traditionally all their work was done from the interrupt return
 * path. Now that we actually do some work, we need to make sure
 * we do call them.
 *
 * Some archs already do call them, luckily irq_enter/exit nest
 * properly.
 *
 * Arguably we should visit all archs and update all handlers,
 * however a fair share of IPIs are still resched only so this 
would

 * somewhat pessimize the simple resched case.
 */
irq_enter();


-Vijay



[PATCH 0/2] sparc64: Use low latency path to resume idle cpu

2017-07-08 Thread Vijay Kumar
cpu_poke is a low latency path to resume the target cpu if suspended
using cpu_yield. Use cpu poke to resume cpu if supported by hypervisor.

 hackbench results (lower is better):
Number of   
Process:w/o fix with fix
1   0.0120.010
10  0.0210.019
100 0.1510.148

Vijay Kumar (2):
  sparc64: Add a new hypercall CPU_POKE
  sparc64: Use cpu_poke to resume idle cpu

 arch/sparc/include/asm/hypervisor.h |   17 
 arch/sparc/include/asm/smp_64.h |5 ++
 arch/sparc/kernel/hvapi.c   |9 
 arch/sparc/kernel/hvcalls.S |   11 +
 arch/sparc/kernel/process_64.c  |7 +++-
 arch/sparc/kernel/setup_64.c|1 +
 arch/sparc/kernel/smp_64.c  |   75 +-
 7 files changed, 121 insertions(+), 4 deletions(-)
--


[PATCH 0/2] sparc64: Use low latency path to resume idle cpu

2017-07-08 Thread Vijay Kumar
cpu_poke is a low latency path to resume the target cpu if suspended
using cpu_yield. Use cpu poke to resume cpu if supported by hypervisor.

 hackbench results (lower is better):
Number of   
Process:w/o fix with fix
1   0.0120.010
10  0.0210.019
100 0.1510.148

Vijay Kumar (2):
  sparc64: Add a new hypercall CPU_POKE
  sparc64: Use cpu_poke to resume idle cpu

 arch/sparc/include/asm/hypervisor.h |   17 
 arch/sparc/include/asm/smp_64.h |5 ++
 arch/sparc/kernel/hvapi.c   |9 
 arch/sparc/kernel/hvcalls.S |   11 +
 arch/sparc/kernel/process_64.c  |7 +++-
 arch/sparc/kernel/setup_64.c|1 +
 arch/sparc/kernel/smp_64.c  |   75 +-
 7 files changed, 121 insertions(+), 4 deletions(-)
--


[PATCH 2/2] sparc64: Use cpu_poke to resume idle cpu

2017-07-08 Thread Vijay Kumar
Use cpu_poke hypervisor call to resume idle cpu if supported.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
Reviewed-by: Anthony Yznaga <anthony.yzn...@oracle.com>
---
 arch/sparc/include/asm/smp_64.h |5 ++
 arch/sparc/kernel/hvapi.c   |2 +-
 arch/sparc/kernel/process_64.c  |7 +++-
 arch/sparc/kernel/setup_64.c|1 +
 arch/sparc/kernel/smp_64.c  |   80 +-
 5 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index ce2233f..a750892 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -33,6 +33,9 @@
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 extern cpumask_t cpu_core_map[NR_CPUS];
 
+void smp_init_cpu_poke(void);
+void scheduler_poke(void);
+
 void arch_send_call_function_single_ipi(int cpu);
 void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
@@ -74,6 +77,8 @@
 #define smp_fetch_global_regs() do { } while (0)
 #define smp_fetch_global_pmu() do { } while (0)
 #define smp_fill_in_cpu_possible_map() do { } while (0)
+#define smp_init_cpu_poke() do { } while (0)
+#define scheduler_poke() do { } while (0)
 
 #endif /* !(CONFIG_SMP) */
 
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 2677312..0b070d5 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -189,7 +189,7 @@ void __init sun4v_hvapi_init(void)
 
group = HV_GRP_CORE;
major = 1;
-   minor = 1;
+   minor = 6; /* CPU POKE */
if (sun4v_hvapi_register(group, major, ))
goto bad;
 
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 1badc49..92448af 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -77,8 +77,13 @@ void arch_cpu_idle(void)
: "=" (pstate)
: "i" (PSTATE_IE));
 
-   if (!need_resched() && !cpu_is_offline(smp_processor_id()))
+   if (!need_resched() && !cpu_is_offline(smp_processor_id())) {
sun4v_cpu_yield();
+   /* If resumed by cpu_poke then we need to explicitly
+* call scheduler_ipi().
+*/
+   scheduler_poke();
+   }
 
/* Re-enable interrupts. */
__asm__ __volatile__(
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 422b178..4ff9fd8 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -356,6 +356,7 @@ void __init start_early_boot(void)
check_if_starfire();
per_cpu_patch();
sun4v_patch();
+   smp_init_cpu_poke();
 
cpu = hard_smp_processor_id();
if (cpu >= NR_CPUS) {
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index fdf3104..9c3131b 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -74,6 +74,9 @@
 
 static cpumask_t smp_commenced_mask;
 
+static DEFINE_PER_CPU(bool, poke);
+static bool cpu_poke;
+
 void smp_info(struct seq_file *m)
 {
int i;
@@ -1394,15 +1397,86 @@ void __init smp_cpus_done(unsigned int max_cpus)
 {
 }
 
+static void send_cpu_ipi(int cpu)
+{
+   xcall_deliver((u64) _receive_signal,
+   0, 0, cpumask_of(cpu));
+}
+
+void scheduler_poke(void)
+{
+   if (!cpu_poke)
+   return;
+
+   if (!__this_cpu_read(poke))
+   return;
+
+   __this_cpu_write(poke, false);
+   set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
+}
+
+static unsigned long send_cpu_poke(int cpu)
+{
+   unsigned long hv_err;
+
+   per_cpu(poke, cpu) = true;
+   hv_err = sun4v_cpu_poke(cpu);
+   if (hv_err != HV_EOK) {
+   per_cpu(poke, cpu) = false;
+   pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n",
+   __func__, hv_err);
+   }
+
+   return hv_err;
+}
+
 void smp_send_reschedule(int cpu)
 {
if (cpu == smp_processor_id()) {
WARN_ON_ONCE(preemptible());
set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
-   } else {
-   xcall_deliver((u64) _receive_signal,
- 0, 0, cpumask_of(cpu));
+   return;
+   }
+
+   /* Use cpu poke to resume idle cpu if supported*/
+   if (cpu_poke && idle_cpu(cpu)) {
+   unsigned long ret;
+
+   ret = send_cpu_poke(cpu);
+   if (ret == HV_EOK)
+   return;
}
+
+   /* Use IPI in following cases:
+* - cpu poke not supported
+* - cpu not idle
+* - send_cpu_poke() returns with error.
+*/
+   send_cpu_ipi(cpu);
+}
+
+void smp_init_cpu_poke(void)
+{
+   unsigned long major;
+

[PATCH 1/2] sparc64: Add a new hypercall CPU_POKE

2017-07-08 Thread Vijay Kumar
This adds a new hypercall CPU_POKE for quickly waking up an idle CPU.
CPU POKE should only be sent to valid  non-local CPUs.

Signed-off-by: Rob Gardner <rob.gard...@oracle.com>
Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
Reviewed-by: Anthony Yznaga <anthony.yzn...@oracle.com>
---
 arch/sparc/include/asm/hypervisor.h |   18 ++
 arch/sparc/kernel/hvcalls.S |   11 +++
 2 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/include/asm/hypervisor.h 
b/arch/sparc/include/asm/hypervisor.h
index 73cb897..3dc9215 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -298,6 +298,24 @@ unsigned long sun4v_cpu_start(unsigned long cpuid,
 unsigned long sun4v_cpu_yield(void);
 #endif
 
+/* cpu_poke()
+ * TRAP:   HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_CPU_POKE
+ * RET0:   status
+ * ERRORS: ENOCPU  cpuid refers to a CPU that does not exist
+ * EINVAL  cpuid is current CPU
+ *
+ * Poke CPU cpuid. If the target CPU is currently suspended having
+ * invoked the cpu-yield service, that vCPU will be resumed.
+ * Poke interrupts may only be sent to valid, non-local CPUs.
+ * It is not legal to poke the current vCPU.
+ */
+#define HV_FAST_CPU_POKE0x13
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_cpu_poke(unsigned long cpuid);
+#endif
+
 /* cpu_qconf()
  * TRAP:   HV_FAST_TRAP
  * FUNCTION:   HV_FAST_CPU_QCONF
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index 4116ee5..e57007f 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -106,6 +106,17 @@ ENTRY(sun4v_cpu_yield)
 nop
 ENDPROC(sun4v_cpu_yield)
 
+   /* %o0: cpuid
+*
+* returns %o0: status
+*/
+ENTRY(sun4v_cpu_poke)
+   mov HV_FAST_CPU_POKE, %o5
+   ta  HV_FAST_TRAP
+   retl
+nop
+ENDPROC(sun4v_cpu_poke)
+
/* %o0: type
 * %o1: queue paddr
 * %o2: num queue entries
-- 
1.7.1



[PATCH 2/2] sparc64: Use cpu_poke to resume idle cpu

2017-07-08 Thread Vijay Kumar
Use cpu_poke hypervisor call to resume idle cpu if supported.

Signed-off-by: Vijay Kumar 
Reviewed-by: Anthony Yznaga 
---
 arch/sparc/include/asm/smp_64.h |5 ++
 arch/sparc/kernel/hvapi.c   |2 +-
 arch/sparc/kernel/process_64.c  |7 +++-
 arch/sparc/kernel/setup_64.c|1 +
 arch/sparc/kernel/smp_64.c  |   80 +-
 5 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index ce2233f..a750892 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -33,6 +33,9 @@
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 extern cpumask_t cpu_core_map[NR_CPUS];
 
+void smp_init_cpu_poke(void);
+void scheduler_poke(void);
+
 void arch_send_call_function_single_ipi(int cpu);
 void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
@@ -74,6 +77,8 @@
 #define smp_fetch_global_regs() do { } while (0)
 #define smp_fetch_global_pmu() do { } while (0)
 #define smp_fill_in_cpu_possible_map() do { } while (0)
+#define smp_init_cpu_poke() do { } while (0)
+#define scheduler_poke() do { } while (0)
 
 #endif /* !(CONFIG_SMP) */
 
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 2677312..0b070d5 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -189,7 +189,7 @@ void __init sun4v_hvapi_init(void)
 
group = HV_GRP_CORE;
major = 1;
-   minor = 1;
+   minor = 6; /* CPU POKE */
if (sun4v_hvapi_register(group, major, ))
goto bad;
 
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 1badc49..92448af 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -77,8 +77,13 @@ void arch_cpu_idle(void)
: "=" (pstate)
: "i" (PSTATE_IE));
 
-   if (!need_resched() && !cpu_is_offline(smp_processor_id()))
+   if (!need_resched() && !cpu_is_offline(smp_processor_id())) {
sun4v_cpu_yield();
+   /* If resumed by cpu_poke then we need to explicitly
+* call scheduler_ipi().
+*/
+   scheduler_poke();
+   }
 
/* Re-enable interrupts. */
__asm__ __volatile__(
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 422b178..4ff9fd8 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -356,6 +356,7 @@ void __init start_early_boot(void)
check_if_starfire();
per_cpu_patch();
sun4v_patch();
+   smp_init_cpu_poke();
 
cpu = hard_smp_processor_id();
if (cpu >= NR_CPUS) {
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index fdf3104..9c3131b 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -74,6 +74,9 @@
 
 static cpumask_t smp_commenced_mask;
 
+static DEFINE_PER_CPU(bool, poke);
+static bool cpu_poke;
+
 void smp_info(struct seq_file *m)
 {
int i;
@@ -1394,15 +1397,86 @@ void __init smp_cpus_done(unsigned int max_cpus)
 {
 }
 
+static void send_cpu_ipi(int cpu)
+{
+   xcall_deliver((u64) _receive_signal,
+   0, 0, cpumask_of(cpu));
+}
+
+void scheduler_poke(void)
+{
+   if (!cpu_poke)
+   return;
+
+   if (!__this_cpu_read(poke))
+   return;
+
+   __this_cpu_write(poke, false);
+   set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
+}
+
+static unsigned long send_cpu_poke(int cpu)
+{
+   unsigned long hv_err;
+
+   per_cpu(poke, cpu) = true;
+   hv_err = sun4v_cpu_poke(cpu);
+   if (hv_err != HV_EOK) {
+   per_cpu(poke, cpu) = false;
+   pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n",
+   __func__, hv_err);
+   }
+
+   return hv_err;
+}
+
 void smp_send_reschedule(int cpu)
 {
if (cpu == smp_processor_id()) {
WARN_ON_ONCE(preemptible());
set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
-   } else {
-   xcall_deliver((u64) _receive_signal,
- 0, 0, cpumask_of(cpu));
+   return;
+   }
+
+   /* Use cpu poke to resume idle cpu if supported*/
+   if (cpu_poke && idle_cpu(cpu)) {
+   unsigned long ret;
+
+   ret = send_cpu_poke(cpu);
+   if (ret == HV_EOK)
+   return;
}
+
+   /* Use IPI in following cases:
+* - cpu poke not supported
+* - cpu not idle
+* - send_cpu_poke() returns with error.
+*/
+   send_cpu_ipi(cpu);
+}
+
+void smp_init_cpu_poke(void)
+{
+   unsigned long major;
+   unsigned long minor;
+   int ret;
+
+   if (tlb_type !

[PATCH 1/2] sparc64: Add a new hypercall CPU_POKE

2017-07-08 Thread Vijay Kumar
This adds a new hypercall CPU_POKE for quickly waking up an idle CPU.
CPU POKE should only be sent to valid  non-local CPUs.

Signed-off-by: Rob Gardner 
Signed-off-by: Vijay Kumar 
Reviewed-by: Anthony Yznaga 
---
 arch/sparc/include/asm/hypervisor.h |   18 ++
 arch/sparc/kernel/hvcalls.S |   11 +++
 2 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/include/asm/hypervisor.h 
b/arch/sparc/include/asm/hypervisor.h
index 73cb897..3dc9215 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -298,6 +298,24 @@ unsigned long sun4v_cpu_start(unsigned long cpuid,
 unsigned long sun4v_cpu_yield(void);
 #endif
 
+/* cpu_poke()
+ * TRAP:   HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_CPU_POKE
+ * RET0:   status
+ * ERRORS: ENOCPU  cpuid refers to a CPU that does not exist
+ * EINVAL  cpuid is current CPU
+ *
+ * Poke CPU cpuid. If the target CPU is currently suspended having
+ * invoked the cpu-yield service, that vCPU will be resumed.
+ * Poke interrupts may only be sent to valid, non-local CPUs.
+ * It is not legal to poke the current vCPU.
+ */
+#define HV_FAST_CPU_POKE0x13
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_cpu_poke(unsigned long cpuid);
+#endif
+
 /* cpu_qconf()
  * TRAP:   HV_FAST_TRAP
  * FUNCTION:   HV_FAST_CPU_QCONF
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index 4116ee5..e57007f 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -106,6 +106,17 @@ ENTRY(sun4v_cpu_yield)
 nop
 ENDPROC(sun4v_cpu_yield)
 
+   /* %o0: cpuid
+*
+* returns %o0: status
+*/
+ENTRY(sun4v_cpu_poke)
+   mov HV_FAST_CPU_POKE, %o5
+   ta  HV_FAST_TRAP
+   retl
+nop
+ENDPROC(sun4v_cpu_poke)
+
/* %o0: type
 * %o1: queue paddr
 * %o2: num queue entries
-- 
1.7.1



Re: [PATCH v3 0/4] sparc64: Jump to boot prom from console on panic

2017-02-01 Thread Vijay Kumar


On 2/1/2017 1:50 PM, David Miller wrote:

From: Vijay Kumar <vijay.ac.ku...@oracle.com>
Date: Wed,  1 Feb 2017 11:34:36 -0800


Currently Stop-A (L1A) does not make the kernel switch to OBP on panic.

This is intentional, the kernel prints a message telling the user to
press break (L1-A) if they want to drop out of the kernel and we force
the break to be allowed by setting stop_a_enabled.
The problem is that pressing BRK after panic does not drop to OK prompt 
(when
stop_a_enabled is set).  So the kernel message to press Stop-A to return 
to boot

prom is  misleading in this case.

I'm wondering why there is so much effort being directed into BRK
behavior.

User can drop into ok prompt from the running kernel and as well as from the
panicked kernel. Pressing single break to jump to ok prompt conflicts with
sysrq key combination (from console, BRK + sysrq_key). To be consistent
across both the cases,  user will have to send BRK twice in order to drop to
ok prompt.  Does this sound reasonable?


If you want to break into the OK prompt, have the reboot-cmd
environment variable set appropriately, and simply hit BRK and it will
work in both ldom and non-ldom environments.
Kernel does not print message "Press Stop-A (L1-A) to ..." for the case 
when it is
expected to reboot on panic. Rather, it goes through different path in 
panic() when

kernel.panic is _not_ set to 0. Here, patch is addressing the case when
kernel.panic=0 (i.e not to reboot on panic).

Thanks,
Vijay


Re: [PATCH v3 0/4] sparc64: Jump to boot prom from console on panic

2017-02-01 Thread Vijay Kumar


On 2/1/2017 1:50 PM, David Miller wrote:

From: Vijay Kumar 
Date: Wed,  1 Feb 2017 11:34:36 -0800


Currently Stop-A (L1A) does not make the kernel switch to OBP on panic.

This is intentional, the kernel prints a message telling the user to
press break (L1-A) if they want to drop out of the kernel and we force
the break to be allowed by setting stop_a_enabled.
The problem is that pressing BRK after panic does not drop to OK prompt 
(when
stop_a_enabled is set).  So the kernel message to press Stop-A to return 
to boot

prom is  misleading in this case.

I'm wondering why there is so much effort being directed into BRK
behavior.

User can drop into ok prompt from the running kernel and as well as from the
panicked kernel. Pressing single break to jump to ok prompt conflicts with
sysrq key combination (from console, BRK + sysrq_key). To be consistent
across both the cases,  user will have to send BRK twice in order to drop to
ok prompt.  Does this sound reasonable?


If you want to break into the OK prompt, have the reboot-cmd
environment variable set appropriately, and simply hit BRK and it will
work in both ldom and non-ldom environments.
Kernel does not print message "Press Stop-A (L1-A) to ..." for the case 
when it is
expected to reboot on panic. Rather, it goes through different path in 
panic() when

kernel.panic is _not_ set to 0. Here, patch is addressing the case when
kernel.panic=0 (i.e not to reboot on panic).

Thanks,
Vijay


[PATCH v3 1/4] sparc64: Set cpu state to offline when stopped

2017-02-01 Thread Vijay Kumar
CPU needs to be marked offline before stopping it. When not marked
offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all
CPUs received the message, and retries. After 1 retries, it finally
fails with fatal mondo timeout.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/kernel/smp_64.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 0ce347f..712bf1b 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1443,6 +1443,7 @@ void __irq_entry smp_receive_signal_client(int irq, 
struct pt_regs *regs)
 
 static void stop_this_cpu(void *dummy)
 {
+   set_cpu_online(smp_processor_id(), false);
prom_stopself();
 }
 
@@ -1454,6 +1455,8 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
+
+   set_cpu_online(cpu, false);
 #ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled) {
unsigned long hv_err;
-- 
1.7.1



[PATCH v3 4/4] Documentation/sparc: Steps for sending break on sunhv console

2017-02-01 Thread Vijay Kumar
Documented the steps for sending break on sunhv console.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 Documentation/sparc/console.txt |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt
new file mode 100644
index 000..5aa735a
--- /dev/null
+++ b/Documentation/sparc/console.txt
@@ -0,0 +1,9 @@
+Steps for sending 'break' on sunhv console:
+===
+
+On Baremetal:
+   1. press   Esc + 'B'
+
+On LDOM:
+   1. pressCtrl + ']'
+   2. telnet> send  break
-- 
1.7.1



[PATCH v3 2/4] sparc64: Migrate hvcons irq to panicked cpu

2017-02-01 Thread Vijay Kumar
On panic, all other CPUs are stopped except the one which had
hit panic. To keep console alive, we need to migrate hvcons irq
to panicked CPU.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
v2->v3: Added SERIAL_SUNHV conditional group for
sunhv_migrate_hvcons_irq().
---
 arch/sparc/include/asm/setup.h |5 -
 arch/sparc/kernel/smp_64.c |6 +-
 drivers/tty/serial/sunhv.c |6 ++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 29d64b1..478bf6b 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -59,8 +59,11 @@ static inline int con_is_present(void)
 extern atomic_t dcpage_flushes_xcall;
 
 extern int sysctl_tsb_ratio;
-#endif
 
+#ifdef CONFIG_SERIAL_SUNHV
+void sunhv_migrate_hvcons_irq(int cpu);
+#endif
+#endif
 void sun_do_break(void);
 extern int stop_a_enabled;
 extern int scons_pwroff;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 712bf1b..90a02cb 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1452,8 +1452,12 @@ void smp_send_stop(void)
int cpu;
 
if (tlb_type == hypervisor) {
+   int this_cpu = smp_processor_id();
+#ifdef CONFIG_SERIAL_SUNHV
+   sunhv_migrate_hvcons_irq(this_cpu);
+#endif
for_each_online_cpu(cpu) {
-   if (cpu == smp_processor_id())
+   if (cpu == this_cpu)
continue;
 
set_cpu_online(cpu, false);
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 99ef5c6..039ae05 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -398,6 +398,12 @@ static int sunhv_verify_port(struct uart_port *port, 
struct serial_struct *ser)
 
 static struct uart_port *sunhv_port;
 
+void sunhv_migrate_hvcons_irq(int cpu)
+{
+   /* Migrate hvcons irq to param cpu */
+   irq_force_affinity(sunhv_port->irq, cpumask_of(cpu));
+}
+
 /* Copy 's' into the con_write_page, decoding "\n" into
  * "\r\n" along the way.  We have to return two lengths
  * because the caller needs to know how much to advance
-- 
1.7.1



[PATCH v3 1/4] sparc64: Set cpu state to offline when stopped

2017-02-01 Thread Vijay Kumar
CPU needs to be marked offline before stopping it. When not marked
offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all
CPUs received the message, and retries. After 1 retries, it finally
fails with fatal mondo timeout.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/kernel/smp_64.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 0ce347f..712bf1b 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1443,6 +1443,7 @@ void __irq_entry smp_receive_signal_client(int irq, 
struct pt_regs *regs)
 
 static void stop_this_cpu(void *dummy)
 {
+   set_cpu_online(smp_processor_id(), false);
prom_stopself();
 }
 
@@ -1454,6 +1455,8 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
+
+   set_cpu_online(cpu, false);
 #ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled) {
unsigned long hv_err;
-- 
1.7.1



[PATCH v3 4/4] Documentation/sparc: Steps for sending break on sunhv console

2017-02-01 Thread Vijay Kumar
Documented the steps for sending break on sunhv console.

Signed-off-by: Vijay Kumar 
---
 Documentation/sparc/console.txt |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt
new file mode 100644
index 000..5aa735a
--- /dev/null
+++ b/Documentation/sparc/console.txt
@@ -0,0 +1,9 @@
+Steps for sending 'break' on sunhv console:
+===
+
+On Baremetal:
+   1. press   Esc + 'B'
+
+On LDOM:
+   1. pressCtrl + ']'
+   2. telnet> send  break
-- 
1.7.1



[PATCH v3 2/4] sparc64: Migrate hvcons irq to panicked cpu

2017-02-01 Thread Vijay Kumar
On panic, all other CPUs are stopped except the one which had
hit panic. To keep console alive, we need to migrate hvcons irq
to panicked CPU.

Signed-off-by: Vijay Kumar 
---
v2->v3: Added SERIAL_SUNHV conditional group for
sunhv_migrate_hvcons_irq().
---
 arch/sparc/include/asm/setup.h |5 -
 arch/sparc/kernel/smp_64.c |6 +-
 drivers/tty/serial/sunhv.c |6 ++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 29d64b1..478bf6b 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -59,8 +59,11 @@ static inline int con_is_present(void)
 extern atomic_t dcpage_flushes_xcall;
 
 extern int sysctl_tsb_ratio;
-#endif
 
+#ifdef CONFIG_SERIAL_SUNHV
+void sunhv_migrate_hvcons_irq(int cpu);
+#endif
+#endif
 void sun_do_break(void);
 extern int stop_a_enabled;
 extern int scons_pwroff;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 712bf1b..90a02cb 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1452,8 +1452,12 @@ void smp_send_stop(void)
int cpu;
 
if (tlb_type == hypervisor) {
+   int this_cpu = smp_processor_id();
+#ifdef CONFIG_SERIAL_SUNHV
+   sunhv_migrate_hvcons_irq(this_cpu);
+#endif
for_each_online_cpu(cpu) {
-   if (cpu == smp_processor_id())
+   if (cpu == this_cpu)
continue;
 
set_cpu_online(cpu, false);
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 99ef5c6..039ae05 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -398,6 +398,12 @@ static int sunhv_verify_port(struct uart_port *port, 
struct serial_struct *ser)
 
 static struct uart_port *sunhv_port;
 
+void sunhv_migrate_hvcons_irq(int cpu)
+{
+   /* Migrate hvcons irq to param cpu */
+   irq_force_affinity(sunhv_port->irq, cpumask_of(cpu));
+}
+
 /* Copy 's' into the con_write_page, decoding "\n" into
  * "\r\n" along the way.  We have to return two lengths
  * because the caller needs to know how much to advance
-- 
1.7.1



[PATCH v3 3/4] sparc64: Send break twice from console to return to boot prom

2017-02-01 Thread Vijay Kumar
Now we can also jump to boot prom from sunhv console by sending
break twice on console for both running and panicked kernel
cases.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 drivers/tty/serial/sunhv.c |6 +-
 kernel/panic.c |3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 039ae05..8975d9c 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port)
 
 static int receive_chars_read(struct uart_port *port)
 {
-   int saw_console_brk = 0;
+   static int saw_console_brk;
int limit = 1;
 
while (limit-- > 0) {
@@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port)
bytes_read = 0;
 
if (stat == CON_BREAK) {
+   if (saw_console_brk)
+   sun_do_break();
+
if (uart_handle_break(port))
continue;
saw_console_brk = 1;
@@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port)
if (port->sysrq != 0 &&  *con_read_page) {
for (i = 0; i < bytes_read; i++)
uart_handle_sysrq_char(port, con_read_page[i]);
+   saw_console_brk = 0;
}
 
if (port->state == NULL)
diff --git a/kernel/panic.c b/kernel/panic.c
index 08aa88d..70f799d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -273,7 +273,8 @@ void panic(const char *fmt, ...)
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
-   pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
+   pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
+"twice on console to return to the boot prom\n");
}
 #endif
 #if defined(CONFIG_S390)
-- 
1.7.1



[PATCH v3 3/4] sparc64: Send break twice from console to return to boot prom

2017-02-01 Thread Vijay Kumar
Now we can also jump to boot prom from sunhv console by sending
break twice on console for both running and panicked kernel
cases.

Signed-off-by: Vijay Kumar 
---
 drivers/tty/serial/sunhv.c |6 +-
 kernel/panic.c |3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 039ae05..8975d9c 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port)
 
 static int receive_chars_read(struct uart_port *port)
 {
-   int saw_console_brk = 0;
+   static int saw_console_brk;
int limit = 1;
 
while (limit-- > 0) {
@@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port)
bytes_read = 0;
 
if (stat == CON_BREAK) {
+   if (saw_console_brk)
+   sun_do_break();
+
if (uart_handle_break(port))
continue;
saw_console_brk = 1;
@@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port)
if (port->sysrq != 0 &&  *con_read_page) {
for (i = 0; i < bytes_read; i++)
uart_handle_sysrq_char(port, con_read_page[i]);
+   saw_console_brk = 0;
}
 
if (port->state == NULL)
diff --git a/kernel/panic.c b/kernel/panic.c
index 08aa88d..70f799d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -273,7 +273,8 @@ void panic(const char *fmt, ...)
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
-   pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
+   pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
+"twice on console to return to the boot prom\n");
}
 #endif
 #if defined(CONFIG_S390)
-- 
1.7.1



[PATCH v3 0/4] sparc64: Jump to boot prom from console on panic

2017-02-01 Thread Vijay Kumar
V3 changes:
  - patch 02/04: Added SERIAL_SUNHV conditional group for
sunhv_migrate_hvcons_irq in smp_send_stop().
V2 changes:
 -  Added cover letter patch

Hi,

Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This
patchset addresses this issue. Also, now we can cause a jump to OBP by sending
'break' twice from sunhv console. On bare metal, one can send a break by
typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet,
and then "send break" at the telnet prompt.

Thanks.

  sparc64: Set cpu state to offline when stopped
  sparc64: Migrate hvcons irq to panicked cpu
  sparc64: Send break twice from console to return to boot prom
  Documentation/sparc: Steps for sending break on sunhv console

 Documentation/sparc/console.txt |9 +
 arch/sparc/include/asm/setup.h  |1 +
 arch/sparc/kernel/smp_64.c  |9 -
 drivers/tty/serial/sunhv.c  |   12 +++-
 kernel/panic.c  |3 ++-
 5 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/sparc/console.txt



--
1.7.1


[PATCH v3 0/4] sparc64: Jump to boot prom from console on panic

2017-02-01 Thread Vijay Kumar
V3 changes:
  - patch 02/04: Added SERIAL_SUNHV conditional group for
sunhv_migrate_hvcons_irq in smp_send_stop().
V2 changes:
 -  Added cover letter patch

Hi,

Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This
patchset addresses this issue. Also, now we can cause a jump to OBP by sending
'break' twice from sunhv console. On bare metal, one can send a break by
typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet,
and then "send break" at the telnet prompt.

Thanks.

  sparc64: Set cpu state to offline when stopped
  sparc64: Migrate hvcons irq to panicked cpu
  sparc64: Send break twice from console to return to boot prom
  Documentation/sparc: Steps for sending break on sunhv console

 Documentation/sparc/console.txt |9 +
 arch/sparc/include/asm/setup.h  |1 +
 arch/sparc/kernel/smp_64.c  |9 -
 drivers/tty/serial/sunhv.c  |   12 +++-
 kernel/panic.c  |3 ++-
 5 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/sparc/console.txt



--
1.7.1


Re: [PATCH v2 2/4] sparc64: Migrate hvcons irq to panicked cpu

2016-11-28 Thread Vijay Kumar


On 11/19/2016 9:48 AM, David Miller wrote:

From: Vijay Kumar <vijay.ac.ku...@oracle.com>
Date: Fri, 11 Nov 2016 10:11:57 -0800


@@ -1444,8 +1444,12 @@ void smp_send_stop(void)
int cpu;
  
  	if (tlb_type == hypervisor) {

+   int this_cpu = smp_processor_id();
+
+   sunhv_migrate_hvcons_irq(this_cpu);
+

You can't unconditionally call into code that might be configured
out by a Kconfig option, as you are doing here.

If SERIAL_SUNHV=n this change will thus result in a build error.

Thanks for your comment. I will fix this and send revised version of patch.

Thanks,
Vijay


Re: [PATCH v2 2/4] sparc64: Migrate hvcons irq to panicked cpu

2016-11-28 Thread Vijay Kumar


On 11/19/2016 9:48 AM, David Miller wrote:

From: Vijay Kumar 
Date: Fri, 11 Nov 2016 10:11:57 -0800


@@ -1444,8 +1444,12 @@ void smp_send_stop(void)
int cpu;
  
  	if (tlb_type == hypervisor) {

+   int this_cpu = smp_processor_id();
+
+   sunhv_migrate_hvcons_irq(this_cpu);
+

You can't unconditionally call into code that might be configured
out by a Kconfig option, as you are doing here.

If SERIAL_SUNHV=n this change will thus result in a build error.

Thanks for your comment. I will fix this and send revised version of patch.

Thanks,
Vijay


[PATCH v2 3/4] sparc64: Send break twice from console to return to boot prom

2016-11-11 Thread Vijay Kumar
Now we can also jump to boot prom from sunhv console by sending
break twice on console for both running and panicked kernel
cases.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 drivers/tty/serial/sunhv.c |6 +-
 kernel/panic.c |3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 59828d8..33c35b4 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port)
 
 static int receive_chars_read(struct uart_port *port)
 {
-   int saw_console_brk = 0;
+   static int saw_console_brk;
int limit = 1;
 
while (limit-- > 0) {
@@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port)
bytes_read = 0;
 
if (stat == CON_BREAK) {
+   if (saw_console_brk)
+   sun_do_break();
+
if (uart_handle_break(port))
continue;
saw_console_brk = 1;
@@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port)
if (port->sysrq != 0 &&  *con_read_page) {
for (i = 0; i < bytes_read; i++)
uart_handle_sysrq_char(port, con_read_page[i]);
+   saw_console_brk = 0;
}
 
if (port->state == NULL)
diff --git a/kernel/panic.c b/kernel/panic.c
index ca8cea1..4fe3b28 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -240,7 +240,8 @@ void panic(const char *fmt, ...)
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
-   pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
+   pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
+"twice on console to return to the boot prom\n");
}
 #endif
 #if defined(CONFIG_S390)
-- 
1.7.1



[PATCH v2 3/4] sparc64: Send break twice from console to return to boot prom

2016-11-11 Thread Vijay Kumar
Now we can also jump to boot prom from sunhv console by sending
break twice on console for both running and panicked kernel
cases.

Signed-off-by: Vijay Kumar 
---
 drivers/tty/serial/sunhv.c |6 +-
 kernel/panic.c |3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 59828d8..33c35b4 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port)
 
 static int receive_chars_read(struct uart_port *port)
 {
-   int saw_console_brk = 0;
+   static int saw_console_brk;
int limit = 1;
 
while (limit-- > 0) {
@@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port)
bytes_read = 0;
 
if (stat == CON_BREAK) {
+   if (saw_console_brk)
+   sun_do_break();
+
if (uart_handle_break(port))
continue;
saw_console_brk = 1;
@@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port)
if (port->sysrq != 0 &&  *con_read_page) {
for (i = 0; i < bytes_read; i++)
uart_handle_sysrq_char(port, con_read_page[i]);
+   saw_console_brk = 0;
}
 
if (port->state == NULL)
diff --git a/kernel/panic.c b/kernel/panic.c
index ca8cea1..4fe3b28 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -240,7 +240,8 @@ void panic(const char *fmt, ...)
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
-   pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
+   pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
+"twice on console to return to the boot prom\n");
}
 #endif
 #if defined(CONFIG_S390)
-- 
1.7.1



[PATCH v2 1/4] sparc64: Set cpu state to offline when stopped

2016-11-11 Thread Vijay Kumar
CPU needs to be marked offline before stopping it. When not marked
offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all
CPUs received the message, and retries. After 1 retries, it finally
fails with fatal mondo timeout.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/kernel/smp_64.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index d3035ba..14138ad 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1435,6 +1435,7 @@ void __irq_entry smp_receive_signal_client(int irq, 
struct pt_regs *regs)
 
 static void stop_this_cpu(void *dummy)
 {
+   set_cpu_online(smp_processor_id(), false);
prom_stopself();
 }
 
@@ -1446,6 +1447,8 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
+
+   set_cpu_online(cpu, false);
 #ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled) {
unsigned long hv_err;
-- 
1.7.1



[PATCH v2 0/4] sparc64: Jump to boot prom from console on panic

2016-11-11 Thread Vijay Kumar
Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This
patchset addresses this issue. Also, now we can cause a jump to OBP by sending
'break' twice from sunhv console. On bare metal, one can send a break by
typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet,
and then "send break" at the telnet prompt.

  sparc64: Set cpu state to offline when stopped
  sparc64: Migrate hvcons irq to panicked cpu
  sparc64: Send break twice from console to return to boot prom
  Documentation/sparc: Steps for sending break on sunhv console

 Documentation/sparc/console.txt |9 +
 arch/sparc/include/asm/setup.h  |1 +
 arch/sparc/kernel/smp_64.c  |9 -
 drivers/tty/serial/sunhv.c  |   12 +++-
 kernel/panic.c  |3 ++-
 5 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/sparc/console.txt



--
1.7.1


[PATCH v2 1/4] sparc64: Set cpu state to offline when stopped

2016-11-11 Thread Vijay Kumar
CPU needs to be marked offline before stopping it. When not marked
offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all
CPUs received the message, and retries. After 1 retries, it finally
fails with fatal mondo timeout.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/kernel/smp_64.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index d3035ba..14138ad 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1435,6 +1435,7 @@ void __irq_entry smp_receive_signal_client(int irq, 
struct pt_regs *regs)
 
 static void stop_this_cpu(void *dummy)
 {
+   set_cpu_online(smp_processor_id(), false);
prom_stopself();
 }
 
@@ -1446,6 +1447,8 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
+
+   set_cpu_online(cpu, false);
 #ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled) {
unsigned long hv_err;
-- 
1.7.1



[PATCH v2 0/4] sparc64: Jump to boot prom from console on panic

2016-11-11 Thread Vijay Kumar
Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This
patchset addresses this issue. Also, now we can cause a jump to OBP by sending
'break' twice from sunhv console. On bare metal, one can send a break by
typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet,
and then "send break" at the telnet prompt.

  sparc64: Set cpu state to offline when stopped
  sparc64: Migrate hvcons irq to panicked cpu
  sparc64: Send break twice from console to return to boot prom
  Documentation/sparc: Steps for sending break on sunhv console

 Documentation/sparc/console.txt |9 +
 arch/sparc/include/asm/setup.h  |1 +
 arch/sparc/kernel/smp_64.c  |9 -
 drivers/tty/serial/sunhv.c  |   12 +++-
 kernel/panic.c  |3 ++-
 5 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/sparc/console.txt



--
1.7.1


[PATCH v2 4/4] Documentation/sparc: Steps for sending break on sunhv console

2016-11-11 Thread Vijay Kumar
Documented the steps for sending break on sunhv console.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 Documentation/sparc/console.txt |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt
new file mode 100644
index 000..ab55353
--- /dev/null
+++ b/Documentation/sparc/console.txt
@@ -0,0 +1,9 @@
+Steps for sending 'break' on sunhv console:
+===
+
+On Baremetal:
+   1. press   Esc + 'B'
+
+On LDOM:
+   1. pressCtrl + ']'
+   2. telnet> send  break  
-- 
1.7.1



[PATCH v2 4/4] Documentation/sparc: Steps for sending break on sunhv console

2016-11-11 Thread Vijay Kumar
Documented the steps for sending break on sunhv console.

Signed-off-by: Vijay Kumar 
---
 Documentation/sparc/console.txt |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt
new file mode 100644
index 000..ab55353
--- /dev/null
+++ b/Documentation/sparc/console.txt
@@ -0,0 +1,9 @@
+Steps for sending 'break' on sunhv console:
+===
+
+On Baremetal:
+   1. press   Esc + 'B'
+
+On LDOM:
+   1. pressCtrl + ']'
+   2. telnet> send  break  
-- 
1.7.1



[PATCH v2 2/4] sparc64: Migrate hvcons irq to panicked cpu

2016-11-11 Thread Vijay Kumar
On panic, all other CPUs are stopped except the one which had
hit panic. To keep console alive, we need to migrate hvcons irq
to panicked CPU.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/include/asm/setup.h |1 +
 arch/sparc/kernel/smp_64.c |6 +-
 drivers/tty/serial/sunhv.c |6 ++
 3 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 29d64b1..41691a5 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -61,6 +61,7 @@ extern atomic_t dcpage_flushes_xcall;
 extern int sysctl_tsb_ratio;
 #endif
 
+void sunhv_migrate_hvcons_irq(int cpu);
 void sun_do_break(void);
 extern int stop_a_enabled;
 extern int scons_pwroff;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 14138ad..52dc4b7 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1444,8 +1444,12 @@ void smp_send_stop(void)
int cpu;
 
if (tlb_type == hypervisor) {
+   int this_cpu = smp_processor_id();
+
+   sunhv_migrate_hvcons_irq(this_cpu);
+
for_each_online_cpu(cpu) {
-   if (cpu == smp_processor_id())
+   if (cpu == this_cpu)
continue;
 
set_cpu_online(cpu, false);
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 4e603d0..59828d8 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -398,6 +398,12 @@ static struct uart_driver sunhv_reg = {
 
 static struct uart_port *sunhv_port;
 
+void sunhv_migrate_hvcons_irq(int cpu)
+{
+   /* Migrate hvcons irq to param cpu */
+   irq_force_affinity(sunhv_port->irq, cpumask_of(cpu));
+}
+
 /* Copy 's' into the con_write_page, decoding "\n" into
  * "\r\n" along the way.  We have to return two lengths
  * because the caller needs to know how much to advance
-- 
1.7.1



[PATCH v2 2/4] sparc64: Migrate hvcons irq to panicked cpu

2016-11-11 Thread Vijay Kumar
On panic, all other CPUs are stopped except the one which had
hit panic. To keep console alive, we need to migrate hvcons irq
to panicked CPU.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/include/asm/setup.h |1 +
 arch/sparc/kernel/smp_64.c |6 +-
 drivers/tty/serial/sunhv.c |6 ++
 3 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 29d64b1..41691a5 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -61,6 +61,7 @@ extern atomic_t dcpage_flushes_xcall;
 extern int sysctl_tsb_ratio;
 #endif
 
+void sunhv_migrate_hvcons_irq(int cpu);
 void sun_do_break(void);
 extern int stop_a_enabled;
 extern int scons_pwroff;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 14138ad..52dc4b7 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1444,8 +1444,12 @@ void smp_send_stop(void)
int cpu;
 
if (tlb_type == hypervisor) {
+   int this_cpu = smp_processor_id();
+
+   sunhv_migrate_hvcons_irq(this_cpu);
+
for_each_online_cpu(cpu) {
-   if (cpu == smp_processor_id())
+   if (cpu == this_cpu)
continue;
 
set_cpu_online(cpu, false);
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 4e603d0..59828d8 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -398,6 +398,12 @@ static struct uart_driver sunhv_reg = {
 
 static struct uart_port *sunhv_port;
 
+void sunhv_migrate_hvcons_irq(int cpu)
+{
+   /* Migrate hvcons irq to param cpu */
+   irq_force_affinity(sunhv_port->irq, cpumask_of(cpu));
+}
+
 /* Copy 's' into the con_write_page, decoding "\n" into
  * "\r\n" along the way.  We have to return two lengths
  * because the caller needs to know how much to advance
-- 
1.7.1



[PATCH 0/4] sparc64: Jump to boot prom from console on panic

2016-11-11 Thread Vijay Kumar
Here is the cover posting for  the patches. I did not send cover patch initially
as I thought that patch descriptions were self explanatory. But I agree, this 
would
help  in overall understanding of the patch.

Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This
patchset addresses this issue. Also, now we can cause a jump to OBP by sending
'break' twice from sunhv console. On bare metal, one can send a break by
typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet,
and then "send break" at the telnet prompt.

  sparc64: Set cpu state to offline when stopped
  sparc64: Migrate hvcons irq to panicked cpu
  sparc64: Send break twice from console to return to boot prom
  Documentation/sparc: Steps for sending break on sunhv console

 Documentation/sparc/console.txt |9 +
 arch/sparc/include/asm/setup.h  |1 +
 arch/sparc/kernel/smp_64.c  |9 -
 drivers/tty/serial/sunhv.c  |   12 +++-
 kernel/panic.c  |3 ++-
 5 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/sparc/console.txt



--
1.7.1


[PATCH 0/4] sparc64: Jump to boot prom from console on panic

2016-11-11 Thread Vijay Kumar
Here is the cover posting for  the patches. I did not send cover patch initially
as I thought that patch descriptions were self explanatory. But I agree, this 
would
help  in overall understanding of the patch.

Currently Stop-A (L1A) does not make the kernel switch to OBP on panic. This
patchset addresses this issue. Also, now we can cause a jump to OBP by sending
'break' twice from sunhv console. On bare metal, one can send a break by
typing Esc + 'B' + Sysrq (or whatever). On LDOM, press Ctrl + ] in telnet,
and then "send break" at the telnet prompt.

  sparc64: Set cpu state to offline when stopped
  sparc64: Migrate hvcons irq to panicked cpu
  sparc64: Send break twice from console to return to boot prom
  Documentation/sparc: Steps for sending break on sunhv console

 Documentation/sparc/console.txt |9 +
 arch/sparc/include/asm/setup.h  |1 +
 arch/sparc/kernel/smp_64.c  |9 -
 drivers/tty/serial/sunhv.c  |   12 +++-
 kernel/panic.c  |3 ++-
 5 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/sparc/console.txt



--
1.7.1


[PATCH 2/4] sparc64: Migrate hvcons irq to panicked cpu

2016-11-10 Thread Vijay Kumar
On panic, all other CPUs are stopped except the one which had
hit panic. To keep console alive, we need to migrate hvcons irq
to panicked CPU.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/include/asm/setup.h |1 +
 arch/sparc/kernel/smp_64.c |6 +-
 drivers/tty/serial/sunhv.c |6 ++
 3 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 29d64b1..41691a5 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -61,6 +61,7 @@ extern atomic_t dcpage_flushes_xcall;
 extern int sysctl_tsb_ratio;
 #endif
 
+void sunhv_migrate_hvcons_irq(int cpu);
 void sun_do_break(void);
 extern int stop_a_enabled;
 extern int scons_pwroff;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 14138ad..52dc4b7 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1444,8 +1444,12 @@ void smp_send_stop(void)
int cpu;
 
if (tlb_type == hypervisor) {
+   int this_cpu = smp_processor_id();
+
+   sunhv_migrate_hvcons_irq(this_cpu);
+
for_each_online_cpu(cpu) {
-   if (cpu == smp_processor_id())
+   if (cpu == this_cpu)
continue;
 
set_cpu_online(cpu, false);
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 4e603d0..59828d8 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -398,6 +398,12 @@ static struct uart_driver sunhv_reg = {
 
 static struct uart_port *sunhv_port;
 
+void sunhv_migrate_hvcons_irq(int cpu)
+{
+   /* Migrate hvcons irq to param cpu */
+   irq_force_affinity(sunhv_port->irq, cpumask_of(cpu));
+}
+
 /* Copy 's' into the con_write_page, decoding "\n" into
  * "\r\n" along the way.  We have to return two lengths
  * because the caller needs to know how much to advance
-- 
1.7.1



[PATCH 2/4] sparc64: Migrate hvcons irq to panicked cpu

2016-11-10 Thread Vijay Kumar
On panic, all other CPUs are stopped except the one which had
hit panic. To keep console alive, we need to migrate hvcons irq
to panicked CPU.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/include/asm/setup.h |1 +
 arch/sparc/kernel/smp_64.c |6 +-
 drivers/tty/serial/sunhv.c |6 ++
 3 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 29d64b1..41691a5 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -61,6 +61,7 @@ extern atomic_t dcpage_flushes_xcall;
 extern int sysctl_tsb_ratio;
 #endif
 
+void sunhv_migrate_hvcons_irq(int cpu);
 void sun_do_break(void);
 extern int stop_a_enabled;
 extern int scons_pwroff;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 14138ad..52dc4b7 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1444,8 +1444,12 @@ void smp_send_stop(void)
int cpu;
 
if (tlb_type == hypervisor) {
+   int this_cpu = smp_processor_id();
+
+   sunhv_migrate_hvcons_irq(this_cpu);
+
for_each_online_cpu(cpu) {
-   if (cpu == smp_processor_id())
+   if (cpu == this_cpu)
continue;
 
set_cpu_online(cpu, false);
diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 4e603d0..59828d8 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -398,6 +398,12 @@ static struct uart_driver sunhv_reg = {
 
 static struct uart_port *sunhv_port;
 
+void sunhv_migrate_hvcons_irq(int cpu)
+{
+   /* Migrate hvcons irq to param cpu */
+   irq_force_affinity(sunhv_port->irq, cpumask_of(cpu));
+}
+
 /* Copy 's' into the con_write_page, decoding "\n" into
  * "\r\n" along the way.  We have to return two lengths
  * because the caller needs to know how much to advance
-- 
1.7.1



[PATCH 3/4] sparc64: Send break twice from console to return to boot prom

2016-11-10 Thread Vijay Kumar
Now we can also jump to boot prom from sunhv console by sending
break twice on console for both running and panicked kernel
cases.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 drivers/tty/serial/sunhv.c |6 +-
 kernel/panic.c |3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 59828d8..33c35b4 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port)
 
 static int receive_chars_read(struct uart_port *port)
 {
-   int saw_console_brk = 0;
+   static int saw_console_brk;
int limit = 1;
 
while (limit-- > 0) {
@@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port)
bytes_read = 0;
 
if (stat == CON_BREAK) {
+   if (saw_console_brk)
+   sun_do_break();
+
if (uart_handle_break(port))
continue;
saw_console_brk = 1;
@@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port)
if (port->sysrq != 0 &&  *con_read_page) {
for (i = 0; i < bytes_read; i++)
uart_handle_sysrq_char(port, con_read_page[i]);
+   saw_console_brk = 0;
}
 
if (port->state == NULL)
diff --git a/kernel/panic.c b/kernel/panic.c
index ca8cea1..4fe3b28 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -240,7 +240,8 @@ void panic(const char *fmt, ...)
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
-   pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
+   pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
+"twice on console to return to the boot prom\n");
}
 #endif
 #if defined(CONFIG_S390)
-- 
1.7.1



[PATCH 3/4] sparc64: Send break twice from console to return to boot prom

2016-11-10 Thread Vijay Kumar
Now we can also jump to boot prom from sunhv console by sending
break twice on console for both running and panicked kernel
cases.

Signed-off-by: Vijay Kumar 
---
 drivers/tty/serial/sunhv.c |6 +-
 kernel/panic.c |3 ++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 59828d8..33c35b4 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -116,7 +116,7 @@ static int receive_chars_getchar(struct uart_port *port)
 
 static int receive_chars_read(struct uart_port *port)
 {
-   int saw_console_brk = 0;
+   static int saw_console_brk;
int limit = 1;
 
while (limit-- > 0) {
@@ -128,6 +128,9 @@ static int receive_chars_read(struct uart_port *port)
bytes_read = 0;
 
if (stat == CON_BREAK) {
+   if (saw_console_brk)
+   sun_do_break();
+
if (uart_handle_break(port))
continue;
saw_console_brk = 1;
@@ -151,6 +154,7 @@ static int receive_chars_read(struct uart_port *port)
if (port->sysrq != 0 &&  *con_read_page) {
for (i = 0; i < bytes_read; i++)
uart_handle_sysrq_char(port, con_read_page[i]);
+   saw_console_brk = 0;
}
 
if (port->state == NULL)
diff --git a/kernel/panic.c b/kernel/panic.c
index ca8cea1..4fe3b28 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -240,7 +240,8 @@ void panic(const char *fmt, ...)
extern int stop_a_enabled;
/* Make sure the user can actually press Stop-A (L1-A) */
stop_a_enabled = 1;
-   pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n");
+   pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
+"twice on console to return to the boot prom\n");
}
 #endif
 #if defined(CONFIG_S390)
-- 
1.7.1



[PATCH 4/4] Documentation/sparc: Steps for sending break on sunhv console

2016-11-10 Thread Vijay Kumar
Documented the steps for sending break on sunhv console.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 Documentation/sparc/console.txt |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt
new file mode 100644
index 000..ab55353
--- /dev/null
+++ b/Documentation/sparc/console.txt
@@ -0,0 +1,9 @@
+Steps for sending 'break' on sunhv console:
+===
+
+On Baremetal:
+   1. press   Esc + 'B'
+
+On LDOM:
+   1. pressCtrl + ']'
+   2. telnet> send  break  
-- 
1.7.1



[PATCH 4/4] Documentation/sparc: Steps for sending break on sunhv console

2016-11-10 Thread Vijay Kumar
Documented the steps for sending break on sunhv console.

Signed-off-by: Vijay Kumar 
---
 Documentation/sparc/console.txt |9 +
 1 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/Documentation/sparc/console.txt b/Documentation/sparc/console.txt
new file mode 100644
index 000..ab55353
--- /dev/null
+++ b/Documentation/sparc/console.txt
@@ -0,0 +1,9 @@
+Steps for sending 'break' on sunhv console:
+===
+
+On Baremetal:
+   1. press   Esc + 'B'
+
+On LDOM:
+   1. pressCtrl + ']'
+   2. telnet> send  break  
-- 
1.7.1



[PATCH 1/4] sparc64: Set cpu state to offline when stopped

2016-11-10 Thread Vijay Kumar
CPU needs to be marked offline before stopping it. When not marked
offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all
CPUs received the message, and retries. After 1 retries, it finally
fails with fatal mondo timeout.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 arch/sparc/kernel/smp_64.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index d3035ba..14138ad 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1435,6 +1435,7 @@ void __irq_entry smp_receive_signal_client(int irq, 
struct pt_regs *regs)
 
 static void stop_this_cpu(void *dummy)
 {
+   set_cpu_online(smp_processor_id(), false);
prom_stopself();
 }
 
@@ -1446,6 +1447,8 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
+
+   set_cpu_online(cpu, false);
 #ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled) {
unsigned long hv_err;
-- 
1.7.1



[PATCH 1/4] sparc64: Set cpu state to offline when stopped

2016-11-10 Thread Vijay Kumar
CPU needs to be marked offline before stopping it. When not marked
offline, the xcall receives HV_EWOULDBLOCK and so assumes that not all
CPUs received the message, and retries. After 1 retries, it finally
fails with fatal mondo timeout.

Signed-off-by: Vijay Kumar 
---
 arch/sparc/kernel/smp_64.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index d3035ba..14138ad 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1435,6 +1435,7 @@ void __irq_entry smp_receive_signal_client(int irq, 
struct pt_regs *regs)
 
 static void stop_this_cpu(void *dummy)
 {
+   set_cpu_online(smp_processor_id(), false);
prom_stopself();
 }
 
@@ -1446,6 +1447,8 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
+
+   set_cpu_online(cpu, false);
 #ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled) {
unsigned long hv_err;
-- 
1.7.1



[PATCH v2 2/2] Documentation/ABI: Added ABI information for devspec and obppath.

2016-10-06 Thread Vijay Kumar
Updated Documentation/ABI for devspec and obppath sysfs entries.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 Documentation/ABI/stable/sysfs-devices |   14 ++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/stable/sysfs-devices 
b/Documentation/ABI/stable/sysfs-devices
index df449d7..35c457f 100644
--- a/Documentation/ABI/stable/sysfs-devices
+++ b/Documentation/ABI/stable/sysfs-devices
@@ -8,3 +8,17 @@ Description:
Any device associated with a device-tree node will have
an of_path symlink pointing to the corresponding device
node in /sys/firmware/devicetree/
+
+What:  /sys/devices/*/devspec
+Date:  October 2016
+Contact:   Device Tree mailing list <devicet...@vger.kernel.org>
+Description:
+   If CONFIG_OF is enabled, then this file is present. When
+   read, it returns full name of the device node.
+
+What:  /sys/devices/*/obppath
+Date:  October 2016
+Contact:   Device Tree mailing list <devicet...@vger.kernel.org>
+Description:
+   If CONFIG_OF is enabled, then this file is present. When
+   read, it returns full name of the device node.
-- 
1.7.1



[PATCH v2 1/2] usb/core: Added devspec sysfs entry for devices behind the usb hub

2016-10-06 Thread Vijay Kumar
Grub finds incorrect of_node path for devices behind usb hub.
Added devspec sysfs entry for devices behind usb hub so that
right of_node path is returned during grub sysfs walk for these
devices.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>
---
 drivers/usb/core/sysfs.c |   15 +++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index c953a0f..84d66d5 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "usb.h"
 
 /* Active configuration fields */
@@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device 
*dev,
 static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR,
bConfigurationValue_show, bConfigurationValue_store);
 
+#ifdef CONFIG_OF
+static ssize_t devspec_show(struct device *dev, struct device_attribute *attr,
+   char *buf)
+{
+   struct device_node *of_node = dev->of_node;
+
+   return sprintf(buf, "%s\n", of_node_full_name(of_node));
+}
+static DEVICE_ATTR_RO(devspec);
+#endif
+
 /* String fields */
 #define usb_string_attr(name)  \
 static ssize_t  name##_show(struct device *dev,
\
@@ -786,6 +798,9 @@ static struct attribute *dev_attrs[] = {
_attr_remove.attr,
_attr_removable.attr,
_attr_ltm_capable.attr,
+#ifdef CONFIG_OF
+   _attr_devspec.attr,
+#endif
NULL,
 };
 static struct attribute_group dev_attr_grp = {
-- 
1.7.1



[PATCH v2 2/2] Documentation/ABI: Added ABI information for devspec and obppath.

2016-10-06 Thread Vijay Kumar
Updated Documentation/ABI for devspec and obppath sysfs entries.

Signed-off-by: Vijay Kumar 
---
 Documentation/ABI/stable/sysfs-devices |   14 ++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/Documentation/ABI/stable/sysfs-devices 
b/Documentation/ABI/stable/sysfs-devices
index df449d7..35c457f 100644
--- a/Documentation/ABI/stable/sysfs-devices
+++ b/Documentation/ABI/stable/sysfs-devices
@@ -8,3 +8,17 @@ Description:
Any device associated with a device-tree node will have
an of_path symlink pointing to the corresponding device
node in /sys/firmware/devicetree/
+
+What:  /sys/devices/*/devspec
+Date:  October 2016
+Contact:   Device Tree mailing list 
+Description:
+   If CONFIG_OF is enabled, then this file is present. When
+   read, it returns full name of the device node.
+
+What:  /sys/devices/*/obppath
+Date:  October 2016
+Contact:   Device Tree mailing list 
+Description:
+   If CONFIG_OF is enabled, then this file is present. When
+   read, it returns full name of the device node.
-- 
1.7.1



[PATCH v2 1/2] usb/core: Added devspec sysfs entry for devices behind the usb hub

2016-10-06 Thread Vijay Kumar
Grub finds incorrect of_node path for devices behind usb hub.
Added devspec sysfs entry for devices behind usb hub so that
right of_node path is returned during grub sysfs walk for these
devices.

Signed-off-by: Vijay Kumar 
---
 drivers/usb/core/sysfs.c |   15 +++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index c953a0f..84d66d5 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "usb.h"
 
 /* Active configuration fields */
@@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device 
*dev,
 static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR,
bConfigurationValue_show, bConfigurationValue_store);
 
+#ifdef CONFIG_OF
+static ssize_t devspec_show(struct device *dev, struct device_attribute *attr,
+   char *buf)
+{
+   struct device_node *of_node = dev->of_node;
+
+   return sprintf(buf, "%s\n", of_node_full_name(of_node));
+}
+static DEVICE_ATTR_RO(devspec);
+#endif
+
 /* String fields */
 #define usb_string_attr(name)  \
 static ssize_t  name##_show(struct device *dev,
\
@@ -786,6 +798,9 @@ static struct attribute *dev_attrs[] = {
_attr_remove.attr,
_attr_removable.attr,
_attr_ltm_capable.attr,
+#ifdef CONFIG_OF
+   _attr_devspec.attr,
+#endif
NULL,
 };
 static struct attribute_group dev_attr_grp = {
-- 
1.7.1



Re: [PATCH] usb/core: Added devspec sysfs entry for devices behind usb hub

2016-10-05 Thread Vijay Kumar


On 10/4/2016 2:49 PM, Greg KH wrote:

On Tue, Oct 04, 2016 at 12:04:40PM -0700, Vijay Kumar wrote:

Grub finds incorrect of_node path for devices behind usb hub.
Added devspec sysfs entry for devices behind usb hub so that
right of_node path is returned during grub sysfs walk for these
devices.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>

---
  drivers/usb/core/sysfs.c |   15 +++
  1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index c953a0f..84d66d5 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -14,6 +14,7 @@
  #include 
  #include 
  #include 
+#include 
  #include "usb.h"
  
  /* Active configuration fields */

@@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device 
*dev,
  static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR,
bConfigurationValue_show, bConfigurationValue_store);
  
+#ifdef CONFIG_OF

+static ssize_t devspec_show(struct device *dev, struct device_attribute *attr,
+char *buf)
+{
+   struct device_node *of_node = dev->of_node;
+
+   return sprintf(buf, "%s\n", of_node_full_name(of_node));
+}
+static DEVICE_ATTR_RO(devspec);
+#endif

Any way to do this without the #ifdef?

Thanks for your comment. I looked into it again and find that grub would
report ofpath incorrectly if CONFIG_OF not defined but devspec sysfs 
file exists.

I see pci-sysfs.c has  also defines devspec in same way.


And you need to also update Documentation/ABI if you add a new sysfs
file.


Sure,  if you agree with my above comment then should I make Document/ABI
changes in a separate patch?

Thanks,
Vijay



Re: [PATCH] usb/core: Added devspec sysfs entry for devices behind usb hub

2016-10-05 Thread Vijay Kumar


On 10/4/2016 2:49 PM, Greg KH wrote:

On Tue, Oct 04, 2016 at 12:04:40PM -0700, Vijay Kumar wrote:

Grub finds incorrect of_node path for devices behind usb hub.
Added devspec sysfs entry for devices behind usb hub so that
right of_node path is returned during grub sysfs walk for these
devices.

Signed-off-by: Vijay Kumar 

---
  drivers/usb/core/sysfs.c |   15 +++
  1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index c953a0f..84d66d5 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -14,6 +14,7 @@
  #include 
  #include 
  #include 
+#include 
  #include "usb.h"
  
  /* Active configuration fields */

@@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device 
*dev,
  static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR,
bConfigurationValue_show, bConfigurationValue_store);
  
+#ifdef CONFIG_OF

+static ssize_t devspec_show(struct device *dev, struct device_attribute *attr,
+char *buf)
+{
+   struct device_node *of_node = dev->of_node;
+
+   return sprintf(buf, "%s\n", of_node_full_name(of_node));
+}
+static DEVICE_ATTR_RO(devspec);
+#endif

Any way to do this without the #ifdef?

Thanks for your comment. I looked into it again and find that grub would
report ofpath incorrectly if CONFIG_OF not defined but devspec sysfs 
file exists.

I see pci-sysfs.c has  also defines devspec in same way.


And you need to also update Documentation/ABI if you add a new sysfs
file.


Sure,  if you agree with my above comment then should I make Document/ABI
changes in a separate patch?

Thanks,
Vijay



[PATCH] usb/core: Added devspec sysfs entry for devices behind usb hub

2016-10-04 Thread Vijay Kumar
Grub finds incorrect of_node path for devices behind usb hub.
Added devspec sysfs entry for devices behind usb hub so that
right of_node path is returned during grub sysfs walk for these
devices.

Signed-off-by: Vijay Kumar <vijay.ac.ku...@oracle.com>

---
 drivers/usb/core/sysfs.c |   15 +++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index c953a0f..84d66d5 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "usb.h"
 
 /* Active configuration fields */
@@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device 
*dev,
 static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR,
bConfigurationValue_show, bConfigurationValue_store);
 
+#ifdef CONFIG_OF
+static ssize_t devspec_show(struct device *dev, struct device_attribute *attr,
+char *buf)
+{
+   struct device_node *of_node = dev->of_node;
+
+   return sprintf(buf, "%s\n", of_node_full_name(of_node));
+}
+static DEVICE_ATTR_RO(devspec);
+#endif
+
 /* String fields */
 #define usb_string_attr(name)  \
 static ssize_t  name##_show(struct device *dev,
\
@@ -786,6 +798,9 @@ static struct attribute *dev_attrs[] = {
_attr_remove.attr,
_attr_removable.attr,
_attr_ltm_capable.attr,
+#ifdef CONFIG_OF
+   _attr_devspec.attr,
+#endif
NULL,
 };
 static struct attribute_group dev_attr_grp = {
-- 
1.7.1



[PATCH] usb/core: Added devspec sysfs entry for devices behind usb hub

2016-10-04 Thread Vijay Kumar
Grub finds incorrect of_node path for devices behind usb hub.
Added devspec sysfs entry for devices behind usb hub so that
right of_node path is returned during grub sysfs walk for these
devices.

Signed-off-by: Vijay Kumar 

---
 drivers/usb/core/sysfs.c |   15 +++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index c953a0f..84d66d5 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "usb.h"
 
 /* Active configuration fields */
@@ -104,6 +105,17 @@ static ssize_t bConfigurationValue_store(struct device 
*dev,
 static DEVICE_ATTR_IGNORE_LOCKDEP(bConfigurationValue, S_IRUGO | S_IWUSR,
bConfigurationValue_show, bConfigurationValue_store);
 
+#ifdef CONFIG_OF
+static ssize_t devspec_show(struct device *dev, struct device_attribute *attr,
+char *buf)
+{
+   struct device_node *of_node = dev->of_node;
+
+   return sprintf(buf, "%s\n", of_node_full_name(of_node));
+}
+static DEVICE_ATTR_RO(devspec);
+#endif
+
 /* String fields */
 #define usb_string_attr(name)  \
 static ssize_t  name##_show(struct device *dev,
\
@@ -786,6 +798,9 @@ static struct attribute *dev_attrs[] = {
_attr_remove.attr,
_attr_removable.attr,
_attr_ltm_capable.attr,
+#ifdef CONFIG_OF
+   _attr_devspec.attr,
+#endif
NULL,
 };
 static struct attribute_group dev_attr_grp = {
-- 
1.7.1



[PATCH] Specify all interrupts for the GPIO controller.

2016-10-03 Thread Vijay Kumar B
The PXA GPIO controller has 3 interrupt outputs, this needs to be
indicated in the DTS file. Without this mainstone's CPLD interrupt 0
will not be raised to the processor.

Signed-off-by: Vijay Kumar B. <vijayku...@zilogic.com>
Reviewed-by: Deepak S. <dee...@zilogic.com>
---
 arch/arm/boot/dts/pxa2xx.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi
index 5e5af07..9ca2e5b 100644
--- a/arch/arm/boot/dts/pxa2xx.dtsi
+++ b/arch/arm/boot/dts/pxa2xx.dtsi
@@ -54,8 +54,8 @@
reg = <0x40e0 0x1>;
gpio-controller;
#gpio-cells = <0x2>;
-   interrupts = <10>;
-   interrupt-names = "gpio_mux";
+   interrupts = <8 9 10>;
+   interrupt-names = "gpio0", "gpio1", "gpio_mux";
interrupt-controller;
#interrupt-cells = <0x2>;
ranges;
-- 
2.1.4



[PATCH] Specify all interrupts for the GPIO controller.

2016-10-03 Thread Vijay Kumar B
The PXA GPIO controller has 3 interrupt outputs, this needs to be
indicated in the DTS file. Without this mainstone's CPLD interrupt 0
will not be raised to the processor.

Signed-off-by: Vijay Kumar B. 
Reviewed-by: Deepak S. 
---
 arch/arm/boot/dts/pxa2xx.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi
index 5e5af07..9ca2e5b 100644
--- a/arch/arm/boot/dts/pxa2xx.dtsi
+++ b/arch/arm/boot/dts/pxa2xx.dtsi
@@ -54,8 +54,8 @@
reg = <0x40e0 0x1>;
gpio-controller;
#gpio-cells = <0x2>;
-   interrupts = <10>;
-   interrupt-names = "gpio_mux";
+   interrupts = <8 9 10>;
+   interrupt-names = "gpio0", "gpio1", "gpio_mux";
interrupt-controller;
#interrupt-cells = <0x2>;
ranges;
-- 
2.1.4



[RESEND PATCH] ARM: dts: pxa2xx: Specify all interrupts for the GPIO controller.

2016-10-03 Thread Vijay Kumar B
The PXA GPIO controller has 3 interrupt outputs, this needs to be
indicated in the DTS file. Without this mainstone's CPLD interrupt 0
will not be raised to the processor.

Signed-off-by: Vijay Kumar B. <vijayku...@zilogic.com>
Reviewed-by: Deepak S. <dee...@zilogic.com>
---
 arch/arm/boot/dts/pxa2xx.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi
index 5e5af07..9ca2e5b 100644
--- a/arch/arm/boot/dts/pxa2xx.dtsi
+++ b/arch/arm/boot/dts/pxa2xx.dtsi
@@ -54,8 +54,8 @@
reg = <0x40e0 0x1>;
gpio-controller;
#gpio-cells = <0x2>;
-   interrupts = <10>;
-   interrupt-names = "gpio_mux";
+   interrupts = <8 9 10>;
+   interrupt-names = "gpio0", "gpio1", "gpio_mux";
interrupt-controller;
#interrupt-cells = <0x2>;
ranges;
-- 
2.1.4



[RESEND PATCH] ARM: dts: pxa2xx: Specify all interrupts for the GPIO controller.

2016-10-03 Thread Vijay Kumar B
The PXA GPIO controller has 3 interrupt outputs, this needs to be
indicated in the DTS file. Without this mainstone's CPLD interrupt 0
will not be raised to the processor.

Signed-off-by: Vijay Kumar B. 
Reviewed-by: Deepak S. 
---
 arch/arm/boot/dts/pxa2xx.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi
index 5e5af07..9ca2e5b 100644
--- a/arch/arm/boot/dts/pxa2xx.dtsi
+++ b/arch/arm/boot/dts/pxa2xx.dtsi
@@ -54,8 +54,8 @@
reg = <0x40e0 0x1>;
gpio-controller;
#gpio-cells = <0x2>;
-   interrupts = <10>;
-   interrupt-names = "gpio_mux";
+   interrupts = <8 9 10>;
+   interrupt-names = "gpio0", "gpio1", "gpio_mux";
interrupt-controller;
#interrupt-cells = <0x2>;
ranges;
-- 
2.1.4



  1   2   >