Re: [patch 21/24] Immediate Values - x86 Optimization NMI and MCE support (updated)

2007-12-21 Thread Mathieu Desnoyers
x86 optimization of the immediate values which uses a movl with code patching
to set/unset the value used to populate the register used as variable source.
It uses a breakpoint to bypass the instruction being changed, which lessens the
interrupt latency of the operation and protects against NMIs and MCE.

- More reentrant immediate value : uses a breakpoint. Needs to know the
  instruction's first byte. This is why we keep the "instruction size"
  variable, so we can support the REX prefixed instructions too.

Changelog:
- Use text_poke_early with cr0 WP save/restore to patch the bypass. We are doing
  non atomic writes to a code region only touched by us (nobody can execute it
  since we are protected by the imv_mutex).
- Add x86_64 support, ready for i386+x86_64 -> x86 merge.
- Use asm-x86/asm.h.
- Change the immediate.c update code to support variable length opcodes.
- Use imv_* instead of immediate_*.
- Use kernel_wp_disable/enable instead of save/restore.

Signed-off-by: Mathieu Desnoyers <[EMAIL PROTECTED]>
CC: Andi Kleen <[EMAIL PROTECTED]>
CC: "H. Peter Anvin" <[EMAIL PROTECTED]>
CC: Chuck Ebbert <[EMAIL PROTECTED]>
CC: Christoph Hellwig <[EMAIL PROTECTED]>
CC: Jeremy Fitzhardinge <[EMAIL PROTECTED]>
CC: Thomas Gleixner <[EMAIL PROTECTED]>
CC: Ingo Molnar <[EMAIL PROTECTED]>
---
 arch/x86/kernel/Makefile_32 |1 
 arch/x86/kernel/Makefile_64 |1 
 arch/x86/kernel/immediate.c |  281 
 arch/x86/kernel/traps_32.c  |   10 -
 include/asm-x86/immediate.h |   42 +-
 5 files changed, 326 insertions(+), 9 deletions(-)

Index: linux-2.6-lttng.mm/include/asm-x86/immediate.h
===
--- linux-2.6-lttng.mm.orig/include/asm-x86/immediate.h 2007-12-20 
20:48:44.0 -0500
+++ linux-2.6-lttng.mm/include/asm-x86/immediate.h  2007-12-21 
08:03:51.0 -0500
@@ -12,6 +12,18 @@
 
 #include 
 
+struct __imv {
+   unsigned long var;  /* Pointer to the identifier variable of the
+* immediate value
+*/
+   unsigned long imv;  /*
+* Pointer to the memory location of the
+* immediate value within the instruction.
+*/
+   unsigned char size; /* Type size. */
+   unsigned char insn_size;/* Instruction size. */
+} __attribute__ ((packed));
+
 /**
  * imv_read - read immediate variable
  * @name: immediate value name
@@ -26,6 +38,11 @@
  * what will generate an instruction with 8 bytes immediate value (not the 
REX.W
  * prefixed one that loads a sign extended 32 bits immediate value in a r64
  * register).
+ *
+ * Create the instruction in a discarded section to calculate its size. This is
+ * how we can align the beginning of the instruction on an address that will
+ * permit atomic modification of the immediate value without knowing the size 
of
+ * the opcode used by the compiler. The operand size is known in advance.
  */
 #define imv_read(name) \
({  \
@@ -35,8 +52,9 @@
case 1: \
asm(".section __imv,\"a\",@progbits\n\t"\
_ASM_PTR "%c1, (3f)-%c2\n\t"\
-   ".byte %c2\n\t" \
+   ".byte %c2, (3f-2f)\n\t"\
".previous\n\t" \
+   "2:\n\t"\
"mov $0,%0\n\t" \
"3:\n\t"\
: "=q" (value)  \
@@ -45,10 +63,16 @@
break;  \
case 2: \
case 4: \
-   asm(".section __imv,\"a\",@progbits\n\t"\
+   asm(".section __discard,\"\",@progbits\n\t" \
+   "1:\n\t"\
+   "mov $0,%0\n\t" \
+   "2:\n\t"\
+   ".previous\n\t" \
+   ".section __imv,\"a\",@progbits\n\t"\
_ASM_PTR "%c1, (3f)-%c2\n\t"\
-   ".byte %c2\n\t" \
+   ".byte %c2, (2b-1b)\n\t"\

Re: [patch 21/24] Immediate Values - x86 Optimization NMI and MCE support (updated)

2007-12-21 Thread Mathieu Desnoyers
x86 optimization of the immediate values which uses a movl with code patching
to set/unset the value used to populate the register used as variable source.
It uses a breakpoint to bypass the instruction being changed, which lessens the
interrupt latency of the operation and protects against NMIs and MCE.

- More reentrant immediate value : uses a breakpoint. Needs to know the
  instruction's first byte. This is why we keep the instruction size
  variable, so we can support the REX prefixed instructions too.

Changelog:
- Use text_poke_early with cr0 WP save/restore to patch the bypass. We are doing
  non atomic writes to a code region only touched by us (nobody can execute it
  since we are protected by the imv_mutex).
- Add x86_64 support, ready for i386+x86_64 - x86 merge.
- Use asm-x86/asm.h.
- Change the immediate.c update code to support variable length opcodes.
- Use imv_* instead of immediate_*.
- Use kernel_wp_disable/enable instead of save/restore.

Signed-off-by: Mathieu Desnoyers [EMAIL PROTECTED]
CC: Andi Kleen [EMAIL PROTECTED]
CC: H. Peter Anvin [EMAIL PROTECTED]
CC: Chuck Ebbert [EMAIL PROTECTED]
CC: Christoph Hellwig [EMAIL PROTECTED]
CC: Jeremy Fitzhardinge [EMAIL PROTECTED]
CC: Thomas Gleixner [EMAIL PROTECTED]
CC: Ingo Molnar [EMAIL PROTECTED]
---
 arch/x86/kernel/Makefile_32 |1 
 arch/x86/kernel/Makefile_64 |1 
 arch/x86/kernel/immediate.c |  281 
 arch/x86/kernel/traps_32.c  |   10 -
 include/asm-x86/immediate.h |   42 +-
 5 files changed, 326 insertions(+), 9 deletions(-)

Index: linux-2.6-lttng.mm/include/asm-x86/immediate.h
===
--- linux-2.6-lttng.mm.orig/include/asm-x86/immediate.h 2007-12-20 
20:48:44.0 -0500
+++ linux-2.6-lttng.mm/include/asm-x86/immediate.h  2007-12-21 
08:03:51.0 -0500
@@ -12,6 +12,18 @@
 
 #include asm/asm.h
 
+struct __imv {
+   unsigned long var;  /* Pointer to the identifier variable of the
+* immediate value
+*/
+   unsigned long imv;  /*
+* Pointer to the memory location of the
+* immediate value within the instruction.
+*/
+   unsigned char size; /* Type size. */
+   unsigned char insn_size;/* Instruction size. */
+} __attribute__ ((packed));
+
 /**
  * imv_read - read immediate variable
  * @name: immediate value name
@@ -26,6 +38,11 @@
  * what will generate an instruction with 8 bytes immediate value (not the 
REX.W
  * prefixed one that loads a sign extended 32 bits immediate value in a r64
  * register).
+ *
+ * Create the instruction in a discarded section to calculate its size. This is
+ * how we can align the beginning of the instruction on an address that will
+ * permit atomic modification of the immediate value without knowing the size 
of
+ * the opcode used by the compiler. The operand size is known in advance.
  */
 #define imv_read(name) \
({  \
@@ -35,8 +52,9 @@
case 1: \
asm(.section __imv,\a\,@progbits\n\t\
_ASM_PTR %c1, (3f)-%c2\n\t\
-   .byte %c2\n\t \
+   .byte %c2, (3f-2f)\n\t\
.previous\n\t \
+   2:\n\t\
mov $0,%0\n\t \
3:\n\t\
: =q (value)  \
@@ -45,10 +63,16 @@
break;  \
case 2: \
case 4: \
-   asm(.section __imv,\a\,@progbits\n\t\
+   asm(.section __discard,\\,@progbits\n\t \
+   1:\n\t\
+   mov $0,%0\n\t \
+   2:\n\t\
+   .previous\n\t \
+   .section __imv,\a\,@progbits\n\t\
_ASM_PTR %c1, (3f)-%c2\n\t\
-   .byte %c2\n\t \
+   .byte %c2, (2b-1b)\n\t\
.previous\n\t \
+  

[patch 21/24] Immediate Values - x86 Optimization NMI and MCE support

2007-12-20 Thread Mathieu Desnoyers
x86 optimization of the immediate values which uses a movl with code patching
to set/unset the value used to populate the register used as variable source.
It uses a breakpoint to bypass the instruction being changed, which lessens the
interrupt latency of the operation and protects against NMIs and MCE.

Changelog:
- Use text_poke_early with cr0 WP save/restore to patch the bypass. We are doing
  non atomic writes to a code region only touched by us (nobody can execute it
  since we are protected by the imv_mutex).
- Add x86_64 support, ready for i386+x86_64 -> x86 merge.
- Use asm-x86/asm.h.
- Change the immediate.c update code to support variable length opcodes.
- Use imv_* instead of immediate_*.
- Use kernel_wp_disable/enable instead of save/restore.

Signed-off-by: Mathieu Desnoyers <[EMAIL PROTECTED]>
CC: Andi Kleen <[EMAIL PROTECTED]>
CC: "H. Peter Anvin" <[EMAIL PROTECTED]>
CC: Chuck Ebbert <[EMAIL PROTECTED]>
CC: Christoph Hellwig <[EMAIL PROTECTED]>
CC: Jeremy Fitzhardinge <[EMAIL PROTECTED]>
CC: Thomas Gleixner <[EMAIL PROTECTED]>
CC: Ingo Molnar <[EMAIL PROTECTED]>
---
 arch/x86/kernel/Makefile_32 |1 
 arch/x86/kernel/Makefile_64 |1 
 arch/x86/kernel/immediate.c |  277 
 arch/x86/kernel/traps_32.c  |   10 -
 include/asm-x86/immediate.h |   42 +-
 5 files changed, 322 insertions(+), 9 deletions(-)

Index: linux-2.6-lttng/include/asm-x86/immediate.h
===
--- linux-2.6-lttng.orig/include/asm-x86/immediate.h2007-12-06 
09:41:58.0 -0500
+++ linux-2.6-lttng/include/asm-x86/immediate.h 2007-12-06 09:42:29.0 
-0500
@@ -12,6 +12,18 @@
 
 #include 
 
+struct __imv {
+   unsigned long var;  /* Pointer to the identifier variable of the
+* immediate value
+*/
+   unsigned long imv;  /*
+* Pointer to the memory location of the
+* immediate value within the instruction.
+*/
+   unsigned char size; /* Type size. */
+   unsigned char insn_size;/* Type size. */
+} __attribute__ ((packed));
+
 /**
  * imv_read - read immediate variable
  * @name: immediate value name
@@ -26,6 +38,11 @@
  * what will generate an instruction with 8 bytes immediate value (not the 
REX.W
  * prefixed one that loads a sign extended 32 bits immediate value in a r64
  * register).
+ *
+ * Create the instruction in a discarded section to calculate its size. This is
+ * how we can align the beginning of the instruction on an address that will
+ * permit atomic modification of the immediate value without knowing the size 
of
+ * the opcode used by the compiler. The operand size is known in advance.
  */
 #define imv_read(name) \
({  \
@@ -35,8 +52,9 @@
case 1: \
asm(".section __imv,\"a\",@progbits\n\t"\
_ASM_PTR "%c1, (3f)-%c2\n\t"\
-   ".byte %c2\n\t" \
+   ".byte %c2, (3f-2f)\n\t"\
".previous\n\t" \
+   "2:\n\t"\
"mov $0,%0\n\t" \
"3:\n\t"\
: "=q" (value)  \
@@ -45,10 +63,16 @@
break;  \
case 2: \
case 4: \
-   asm(".section __imv,\"a\",@progbits\n\t"\
+   asm(".section __discard,\"\",@progbits\n\t" \
+   "1:\n\t"\
+   "mov $0,%0\n\t" \
+   "2:\n\t"\
+   ".previous\n\t" \
+   ".section __imv,\"a\",@progbits\n\t"\
_ASM_PTR "%c1, (3f)-%c2\n\t"\
-   ".byte %c2\n\t" \
+   ".byte %c2, (2b-1b)\n\t"\
".previous\n\t" \
+   ".org . + ((-.-(2b-1b)) & (%c2-1)), 0x90\n\t" \
"mov $0,%0\n\t" \

[patch 21/24] Immediate Values - x86 Optimization NMI and MCE support

2007-12-20 Thread Mathieu Desnoyers
x86 optimization of the immediate values which uses a movl with code patching
to set/unset the value used to populate the register used as variable source.
It uses a breakpoint to bypass the instruction being changed, which lessens the
interrupt latency of the operation and protects against NMIs and MCE.

Changelog:
- Use text_poke_early with cr0 WP save/restore to patch the bypass. We are doing
  non atomic writes to a code region only touched by us (nobody can execute it
  since we are protected by the imv_mutex).
- Add x86_64 support, ready for i386+x86_64 - x86 merge.
- Use asm-x86/asm.h.
- Change the immediate.c update code to support variable length opcodes.
- Use imv_* instead of immediate_*.
- Use kernel_wp_disable/enable instead of save/restore.

Signed-off-by: Mathieu Desnoyers [EMAIL PROTECTED]
CC: Andi Kleen [EMAIL PROTECTED]
CC: H. Peter Anvin [EMAIL PROTECTED]
CC: Chuck Ebbert [EMAIL PROTECTED]
CC: Christoph Hellwig [EMAIL PROTECTED]
CC: Jeremy Fitzhardinge [EMAIL PROTECTED]
CC: Thomas Gleixner [EMAIL PROTECTED]
CC: Ingo Molnar [EMAIL PROTECTED]
---
 arch/x86/kernel/Makefile_32 |1 
 arch/x86/kernel/Makefile_64 |1 
 arch/x86/kernel/immediate.c |  277 
 arch/x86/kernel/traps_32.c  |   10 -
 include/asm-x86/immediate.h |   42 +-
 5 files changed, 322 insertions(+), 9 deletions(-)

Index: linux-2.6-lttng/include/asm-x86/immediate.h
===
--- linux-2.6-lttng.orig/include/asm-x86/immediate.h2007-12-06 
09:41:58.0 -0500
+++ linux-2.6-lttng/include/asm-x86/immediate.h 2007-12-06 09:42:29.0 
-0500
@@ -12,6 +12,18 @@
 
 #include asm/asm.h
 
+struct __imv {
+   unsigned long var;  /* Pointer to the identifier variable of the
+* immediate value
+*/
+   unsigned long imv;  /*
+* Pointer to the memory location of the
+* immediate value within the instruction.
+*/
+   unsigned char size; /* Type size. */
+   unsigned char insn_size;/* Type size. */
+} __attribute__ ((packed));
+
 /**
  * imv_read - read immediate variable
  * @name: immediate value name
@@ -26,6 +38,11 @@
  * what will generate an instruction with 8 bytes immediate value (not the 
REX.W
  * prefixed one that loads a sign extended 32 bits immediate value in a r64
  * register).
+ *
+ * Create the instruction in a discarded section to calculate its size. This is
+ * how we can align the beginning of the instruction on an address that will
+ * permit atomic modification of the immediate value without knowing the size 
of
+ * the opcode used by the compiler. The operand size is known in advance.
  */
 #define imv_read(name) \
({  \
@@ -35,8 +52,9 @@
case 1: \
asm(.section __imv,\a\,@progbits\n\t\
_ASM_PTR %c1, (3f)-%c2\n\t\
-   .byte %c2\n\t \
+   .byte %c2, (3f-2f)\n\t\
.previous\n\t \
+   2:\n\t\
mov $0,%0\n\t \
3:\n\t\
: =q (value)  \
@@ -45,10 +63,16 @@
break;  \
case 2: \
case 4: \
-   asm(.section __imv,\a\,@progbits\n\t\
+   asm(.section __discard,\\,@progbits\n\t \
+   1:\n\t\
+   mov $0,%0\n\t \
+   2:\n\t\
+   .previous\n\t \
+   .section __imv,\a\,@progbits\n\t\
_ASM_PTR %c1, (3f)-%c2\n\t\
-   .byte %c2\n\t \
+   .byte %c2, (2b-1b)\n\t\
.previous\n\t \
+   .org . + ((-.-(2b-1b))  (%c2-1)), 0x90\n\t \
mov $0,%0\n\t \
3:\n\t\