Thanks for the review, Christophe.

On 10/10/23 11:16 pm, Christophe Leroy wrote:


Le 28/09/2023 à 21:48, Hari Bathini a écrit :
patch_instruction() entails setting up pte, patching the instruction,
clearing the pte and flushing the tlb. If multiple instructions need
to be patched, every instruction would have to go through the above
drill unnecessarily. Instead, introduce function patch_instructions()
that sets up the pte, clears the pte and flushes the tlb only once per
page range of instructions to be patched. This adds a slight overhead
to patch_instruction() call while improving the patching time for
scenarios where more than one instruction needs to be patched.

Not a "slight" but a "significant" overhead on PPC32.

Thinking about it once more I don't think it is a good idea to try and
merge that into the existing code_patching logic which is really single
instruction performance oriented.

Anyway, comments below.


Signed-off-by: Hari Bathini <hbath...@linux.ibm.com>
---
   arch/powerpc/include/asm/code-patching.h |  1 +
   arch/powerpc/lib/code-patching.c         | 93 +++++++++++++++++++++---
   2 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 3f881548fb61..43a4aedfa703 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
   int patch_branch(u32 *addr, unsigned long target, int flags);
   int patch_instruction(u32 *addr, ppc_inst_t instr);
   int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
+int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr);

I don't like void *, you can do to much nasty things with that.
I think you want u32 *

static inline unsigned long patch_site_addr(s32 *site)
   {
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index b00112d7ad46..4ff002bc41f6 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -278,7 +278,36 @@ static void unmap_patch_area(unsigned long addr)
        flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
   }
-static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
+static int __patch_instructions(u32 *patch_addr, void *code, size_t len, bool 
repeat_instr)
+{
+       unsigned long start = (unsigned long)patch_addr;
+
+       /* Repeat instruction */
+       if (repeat_instr) {
+               ppc_inst_t instr = ppc_inst_read(code);
+
+               if (ppc_inst_prefixed(instr)) {
+                       u64 val = ppc_inst_as_ulong(instr);
+
+                       memset64((uint64_t *)patch_addr, val, len / 8);

Use u64 instead of uint64_t.

+               } else {
+                       u32 val = ppc_inst_val(instr);
+
+                       memset32(patch_addr, val, len / 4);
+               }
+       } else
+               memcpy(patch_addr, code, len);

Missing braces, see
https://docs.kernel.org/process/coding-style.html#placing-braces-and-spaces

+
+       smp_wmb();      /* smp write barrier */
+       flush_icache_range(start, start + len);
+       return 0;
+}
+
+/*
+ * A page is mapped and instructions that fit the page are patched.
+ * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
+ */
+static int __do_patch_instructions_mm(u32 *addr, void *code, size_t len, bool 
repeat_instr)
   {
        int err;
        u32 *patch_addr;
@@ -307,11 +336,15 @@ static int __do_patch_instruction_mm(u32 *addr, 
ppc_inst_t instr)
orig_mm = start_using_temp_mm(patching_mm); - err = __patch_instruction(addr, instr, patch_addr);
+       /* Single instruction case. */
+       if (len == 0) {
+               err = __patch_instruction(addr, *(ppc_inst_t *)code, 
patch_addr);

Take care, you can't convert u32 * to ppc_inst_t that way, you have to
use ppc_inst_read() otherwise you'll get odd result with prefixed
instructions depending on endianness.

- /* hwsync performed by __patch_instruction (sync) if successful */
-       if (err)
-               mb();  /* sync */
+               /* hwsync performed by __patch_instruction (sync) if successful 
*/
+               if (err)
+                       mb();  /* sync */

Get this away, see my patch at
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/e88b154eaf2efd9ff177d472d3411dcdec8ff4f5.1696675567.git.christophe.le...@csgroup.eu/

+       } else
+               err = __patch_instructions(patch_addr, code, len, repeat_instr);
/* context synchronisation performed by __patch_instruction (isync or exception) */
        stop_using_temp_mm(patching_mm, orig_mm);
@@ -328,7 +361,11 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t 
instr)
        return err;
   }
-static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
+/*
+ * A page is mapped and instructions that fit the page are patched.
+ * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
+ */
+static int __do_patch_instructions(u32 *addr, void *code, size_t len, bool 
repeat_instr)
   {
        int err;
        u32 *patch_addr;
@@ -345,7 +382,11 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t 
instr)
        if (radix_enabled())
                asm volatile("ptesync": : :"memory");
- err = __patch_instruction(addr, instr, patch_addr);
+       /* Single instruction case. */
+       if (len == 0)
+               err = __patch_instruction(addr, *(ppc_inst_t *)code, 
patch_addr);

Same, use ppc_inst_read() instead of this nasty casting.

+       else
+               err = __patch_instructions(patch_addr, code, len, repeat_instr);
pte_clear(&init_mm, text_poke_addr, pte);
        flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
@@ -369,15 +410,49 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
local_irq_save(flags);
        if (mm_patch_enabled())
-               err = __do_patch_instruction_mm(addr, instr);
+               err = __do_patch_instructions_mm(addr, &instr, 0, false);
        else
-               err = __do_patch_instruction(addr, instr);
+               err = __do_patch_instructions(addr, &instr, 0, false);
        local_irq_restore(flags);
return err;
   }
   NOKPROBE_SYMBOL(patch_instruction);
+/*
+ * Patch 'addr' with 'len' bytes of instructions from 'code'.
+ *
+ * If repeat_instr is true, the same instruction is filled for
+ * 'len' bytes.
+ */
+int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr)

I'd like to see code as a u32 *

+{
+       unsigned long flags;
+       size_t plen;
+       int err;

Move those three variables inside the only block in which they are used.

+
+       while (len > 0) {
+               plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
+
+               local_irq_save(flags);
+               if (mm_patch_enabled())
+                       err = __do_patch_instructions_mm(addr, code, plen, 
repeat_instr);
+               else
+                       err = __do_patch_instructions(addr, code, plen, 
repeat_instr);
+               local_irq_restore(flags);
+               if (err)
+                       break;

replace by 'return err'

+
+               len -= plen;
+               addr = addr + plen;
+               if (!repeat_instr)
+                       code = code + plen;
+       }
+
+       return err;

If len is 0 err will be undefined. Is that expected ?

Replace by return 0;

Posted v6 (https://lore.kernel.org/linuxppc-dev/20231012200310.235137-1-hbath...@linux.ibm.com/)
with code path for patch_instruction() & patch_instriuctions() unmerged
to avoid performance hit reported on ppc32. Also, addressed other review
comments.

Thanks
Hari

Reply via email to