Re: [patch 06/11] Text Edit Lock - Alternative code for x86 (updated)

2007-11-13 Thread Mathieu Desnoyers
Text Edit Lock - Alternative code for x86

Fix a memcpy that should be a text_poke (in apply_alternatives).

Use kernel_wp_save/kernel_wp_restore in text_poke to support DEBUG_RODATA
correctly and so the CPU HOTPLUG special case can be removed.

clflush all the cachelines touched by text_poke.

Add text_poke_early, for alternatives and paravirt boot-time and module load
time patching.

Notes:
- the clflush is left there, even though Andi Kleen says it breaks some
  architecture.  The proper fix is to detect these CPUs and set the
  cpu_has_clflush flag appropriately. It does not belong here.
- we use a macro for kernel_wp_save/restore to mimic local_irq_save/restore: the
  argument is passed without &.

Changelog:

- Fix text_set and text_poke alignment check (mixed up bitwise and and or)
- Remove text_set
- Use the new macro INIT_ARRAY() to stop polluting the C files with ({ })
  brackets (which breaks some c parsers in editors).
- Export add_nops, so it can be used by others.
- Remove x86 test for "wp_works_ok", it will just be ignored by the architecture
  if not supported.
- Document text_poke_early.

Signed-off-by: Mathieu Desnoyers <[EMAIL PROTECTED]>
CC: Andi Kleen <[EMAIL PROTECTED]>
CC: [EMAIL PROTECTED]
---
 arch/x86/kernel/alternative.c|   78 +--
 include/asm-x86/alternative_32.h |   37 ++
 include/asm-x86/alternative_64.h |   39 +++
 3 files changed, 132 insertions(+), 22 deletions(-)

Index: linux-2.6-lttng/arch/x86/kernel/alternative.c
===
--- linux-2.6-lttng.orig/arch/x86/kernel/alternative.c  2007-11-13 
13:43:20.0 -0500
+++ linux-2.6-lttng/arch/x86/kernel/alternative.c   2007-11-13 
20:40:54.0 -0500
@@ -27,6 +27,58 @@ __setup("smp-alt-boot", bootonly);
 #define smp_alt_once 1
 #endif
 
+/*
+ * Warning:
+ * When you use this code to patch more than one byte of an instruction
+ * you need to make sure that other CPUs cannot execute this code in parallel.
+ * Also no thread must be currently preempted in the middle of these
+ * instructions.  And on the local CPU you need to be protected again NMI or 
MCE
+ * handlers seeing an inconsistent instruction while you patch.
+ * Warning: read_cr0 is modified by paravirt, this is why we have _early
+ * versions. They are not in the __init section because they can be used at
+ * module load time.
+ */
+static inline void text_sync(void *addr, size_t len)
+{
+   void *faddr;
+
+   sync_core();
+   /* FIXME Could also do a CLFLUSH here to speed up CPU recovery; but
+  that causes hangs on some VIA CPUs. */
+   /* Not strictly needed, but can speed CPU recovery up. */
+   if (0 && cpu_has_clflush)
+   for (faddr = addr; faddr < addr + len;
+   faddr += boot_cpu_data.x86_clflush_size)
+   asm("clflush (%0) " :: "r" (faddr) : "memory");
+}
+
+void *text_poke_early(void *addr, const void *opcode, size_t len)
+{
+   memcpy(addr, opcode, len);
+   text_sync(addr, len);
+   return addr;
+}
+
+/*
+ * Only atomic text poke/set should be allowed when not doing early patching.
+ * It means the size must be writable atomically and the address must be 
aligned
+ * in a way that permits an atomic write.
+ */
+
+void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
+{
+   unsigned long cr0;
+
+   BUG_ON(len > sizeof(long));
+   BUG_ONlong)addr + len - 1) & ~(sizeof(long) - 1))
+   - ((long)addr & ~(sizeof(long) - 1)));
+   kernel_wp_save(cr0);
+   memcpy(addr, opcode, len);
+   kernel_wp_restore(cr0);
+   text_sync(addr, len);
+   return addr;
+}
+
 static int debug_alternative;
 
 static int __init debug_alt(char *str)
@@ -173,7 +225,7 @@ static const unsigned char*const * find_
 #endif /* CONFIG_X86_64 */
 
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
-static void add_nops(void *insns, unsigned int len)
+void add_nops(void *insns, unsigned int len)
 {
const unsigned char *const *noptable = find_nop_table();
 
@@ -186,6 +238,7 @@ static void add_nops(void *insns, unsign
len -= noplen;
}
 }
+EXPORT_SYMBOL_GPL(add_nops);
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
@@ -219,7 +272,7 @@ void apply_alternatives(struct alt_instr
memcpy(insnbuf, a->replacement, a->replacementlen);
add_nops(insnbuf + a->replacementlen,
 a->instrlen - a->replacementlen);
-   text_poke(instr, insnbuf, a->instrlen);
+   text_poke_early(instr, insnbuf, a->instrlen);
}
 }
 
@@ -234,7 +287,8 @@ static void alternatives_smp_lock(u8 **s
continue;
if (*ptr > text_end)
continue;
-   

Re: [patch 06/11] Text Edit Lock - Alternative code for x86 (updated)

2007-11-13 Thread Mathieu Desnoyers
Text Edit Lock - Alternative code for x86

Fix a memcpy that should be a text_poke (in apply_alternatives).

Use kernel_wp_save/kernel_wp_restore in text_poke to support DEBUG_RODATA
correctly and so the CPU HOTPLUG special case can be removed.

clflush all the cachelines touched by text_poke.

Add text_poke_early, for alternatives and paravirt boot-time and module load
time patching.

Notes:
- the clflush is left there, even though Andi Kleen says it breaks some
  architecture.  The proper fix is to detect these CPUs and set the
  cpu_has_clflush flag appropriately. It does not belong here.
- we use a macro for kernel_wp_save/restore to mimic local_irq_save/restore: the
  argument is passed without .

Changelog:

- Fix text_set and text_poke alignment check (mixed up bitwise and and or)
- Remove text_set
- Use the new macro INIT_ARRAY() to stop polluting the C files with ({ })
  brackets (which breaks some c parsers in editors).
- Export add_nops, so it can be used by others.
- Remove x86 test for wp_works_ok, it will just be ignored by the architecture
  if not supported.
- Document text_poke_early.

Signed-off-by: Mathieu Desnoyers [EMAIL PROTECTED]
CC: Andi Kleen [EMAIL PROTECTED]
CC: [EMAIL PROTECTED]
---
 arch/x86/kernel/alternative.c|   78 +--
 include/asm-x86/alternative_32.h |   37 ++
 include/asm-x86/alternative_64.h |   39 +++
 3 files changed, 132 insertions(+), 22 deletions(-)

Index: linux-2.6-lttng/arch/x86/kernel/alternative.c
===
--- linux-2.6-lttng.orig/arch/x86/kernel/alternative.c  2007-11-13 
13:43:20.0 -0500
+++ linux-2.6-lttng/arch/x86/kernel/alternative.c   2007-11-13 
20:40:54.0 -0500
@@ -27,6 +27,58 @@ __setup(smp-alt-boot, bootonly);
 #define smp_alt_once 1
 #endif
 
+/*
+ * Warning:
+ * When you use this code to patch more than one byte of an instruction
+ * you need to make sure that other CPUs cannot execute this code in parallel.
+ * Also no thread must be currently preempted in the middle of these
+ * instructions.  And on the local CPU you need to be protected again NMI or 
MCE
+ * handlers seeing an inconsistent instruction while you patch.
+ * Warning: read_cr0 is modified by paravirt, this is why we have _early
+ * versions. They are not in the __init section because they can be used at
+ * module load time.
+ */
+static inline void text_sync(void *addr, size_t len)
+{
+   void *faddr;
+
+   sync_core();
+   /* FIXME Could also do a CLFLUSH here to speed up CPU recovery; but
+  that causes hangs on some VIA CPUs. */
+   /* Not strictly needed, but can speed CPU recovery up. */
+   if (0  cpu_has_clflush)
+   for (faddr = addr; faddr  addr + len;
+   faddr += boot_cpu_data.x86_clflush_size)
+   asm(clflush (%0)  :: r (faddr) : memory);
+}
+
+void *text_poke_early(void *addr, const void *opcode, size_t len)
+{
+   memcpy(addr, opcode, len);
+   text_sync(addr, len);
+   return addr;
+}
+
+/*
+ * Only atomic text poke/set should be allowed when not doing early patching.
+ * It means the size must be writable atomically and the address must be 
aligned
+ * in a way that permits an atomic write.
+ */
+
+void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
+{
+   unsigned long cr0;
+
+   BUG_ON(len  sizeof(long));
+   BUG_ONlong)addr + len - 1)  ~(sizeof(long) - 1))
+   - ((long)addr  ~(sizeof(long) - 1)));
+   kernel_wp_save(cr0);
+   memcpy(addr, opcode, len);
+   kernel_wp_restore(cr0);
+   text_sync(addr, len);
+   return addr;
+}
+
 static int debug_alternative;
 
 static int __init debug_alt(char *str)
@@ -173,7 +225,7 @@ static const unsigned char*const * find_
 #endif /* CONFIG_X86_64 */
 
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
-static void add_nops(void *insns, unsigned int len)
+void add_nops(void *insns, unsigned int len)
 {
const unsigned char *const *noptable = find_nop_table();
 
@@ -186,6 +238,7 @@ static void add_nops(void *insns, unsign
len -= noplen;
}
 }
+EXPORT_SYMBOL_GPL(add_nops);
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
@@ -219,7 +272,7 @@ void apply_alternatives(struct alt_instr
memcpy(insnbuf, a-replacement, a-replacementlen);
add_nops(insnbuf + a-replacementlen,
 a-instrlen - a-replacementlen);
-   text_poke(instr, insnbuf, a-instrlen);
+   text_poke_early(instr, insnbuf, a-instrlen);
}
 }
 
@@ -234,7 +287,8 @@ static void alternatives_smp_lock(u8 **s
continue;
if (*ptr  text_end)
continue;
-   text_poke(*ptr, ((unsigned char