Le 15/03/2024 à 09:38, Christophe Leroy a écrit :
> 
> 
> Le 15/03/2024 à 03:59, Benjamin Gray a écrit :
>> The existing patching alias page setup and teardown sections can be
>> simplified to make use of the new open_patch_window() abstraction.
>>
>> This eliminates the _mm variants of the helpers, consumers no longer
>> need to check mm_patch_enabled(), and consumers no longer need to worry
>> about synchronization and flushing beyond the changes they make in the
>> patching window.
> 
> With this patch, the time needed to activate or de-activate function 
> tracer is approx 10% longer on powerpc 8xx.

See below difference of patch_instruction() before and after your patch, 
both for 4k pages and 16k pages:

16k pages, before your patch:

00000278 <patch_instruction>:
  278:  48 00 00 84     nop
  27c:  7c e0 00 a6     mfmsr   r7
  280:  7c 51 13 a6     mtspr   81,r2
  284:  3d 00 00 00     lis     r8,0
                        286: R_PPC_ADDR16_HA    .data
  288:  39 08 00 00     addi    r8,r8,0
                        28a: R_PPC_ADDR16_LO    .data
  28c:  7c 69 1b 78     mr      r9,r3
  290:  3d 29 40 00     addis   r9,r9,16384
  294:  81 48 00 08     lwz     r10,8(r8)
  298:  55 29 00 22     clrrwi  r9,r9,14
  29c:  81 08 00 04     lwz     r8,4(r8)
  2a0:  61 29 01 2d     ori     r9,r9,301
  2a4:  55 06 00 22     clrrwi  r6,r8,14
  2a8:  91 2a 00 00     stw     r9,0(r10)
  2ac:  91 2a 00 04     stw     r9,4(r10)
  2b0:  91 2a 00 08     stw     r9,8(r10)
  2b4:  91 2a 00 0c     stw     r9,12(r10)
  2b8:  50 68 04 be     rlwimi  r8,r3,0,18,31
  2bc:  90 88 00 00     stw     r4,0(r8)
  2c0:  7c 00 40 6c     dcbst   0,r8
  2c4:  7c 00 04 ac     hwsync
  2c8:  7c 00 1f ac     icbi    0,r3
  2cc:  7c 00 04 ac     hwsync
  2d0:  4c 00 01 2c     isync
  2d4:  38 60 00 00     li      r3,0
  2d8:  39 20 00 00     li      r9,0
  2dc:  91 2a 00 00     stw     r9,0(r10)
  2e0:  91 2a 00 04     stw     r9,4(r10)
  2e4:  91 2a 00 08     stw     r9,8(r10)
  2e8:  91 2a 00 0c     stw     r9,12(r10)
  2ec:  7c 00 32 64     tlbie   r6,r0
  2f0:  7c 00 04 ac     hwsync
  2f4:  7c e0 01 24     mtmsr   r7
  2f8:  4e 80 00 20     blr

16k pages, after your patch. Now we have a stack frame for the call to 
close_patch_window(). And the branch in close_patch_window() is 
unexpected as patch_instruction() works on single pages.

0000024c <close_patch_window>:
  24c:  81 23 00 04     lwz     r9,4(r3)
  250:  39 40 00 00     li      r10,0
  254:  91 49 00 00     stw     r10,0(r9)
  258:  91 49 00 04     stw     r10,4(r9)
  25c:  91 49 00 08     stw     r10,8(r9)
  260:  91 49 00 0c     stw     r10,12(r9)
  264:  81 23 00 00     lwz     r9,0(r3)
  268:  55 2a 00 22     clrrwi  r10,r9,14
  26c:  39 29 40 00     addi    r9,r9,16384
  270:  7d 2a 48 50     subf    r9,r10,r9
  274:  28 09 40 00     cmplwi  r9,16384
  278:  41 81 00 10     bgt     288 <close_patch_window+0x3c>
  27c:  7c 00 52 64     tlbie   r10,r0
  280:  7c 00 04 ac     hwsync
  284:  4e 80 00 20     blr
  288:  7c 00 04 ac     hwsync
  28c:  7c 00 02 e4     tlbia
  290:  4c 00 01 2c     isync
  294:  4e 80 00 20     blr

000002c4 <patch_instruction>:
  2c4:  94 21 ff d0     stwu    r1,-48(r1)
  2c8:  93 c1 00 28     stw     r30,40(r1)
  2cc:  48 00 00 ac     nop
  2d0:  7c 08 02 a6     mflr    r0
  2d4:  90 01 00 34     stw     r0,52(r1)
  2d8:  93 e1 00 2c     stw     r31,44(r1)
  2dc:  7f e0 00 a6     mfmsr   r31
  2e0:  7c 51 13 a6     mtspr   81,r2
  2e4:  3d 40 00 00     lis     r10,0
                        2e6: R_PPC_ADDR16_HA    .data
  2e8:  39 4a 00 00     addi    r10,r10,0
                        2ea: R_PPC_ADDR16_LO    .data
  2ec:  7c 69 1b 78     mr      r9,r3
  2f0:  3d 29 40 00     addis   r9,r9,16384
  2f4:  81 0a 00 08     lwz     r8,8(r10)
  2f8:  80 ca 00 04     lwz     r6,4(r10)
  2fc:  55 29 00 22     clrrwi  r9,r9,14
  300:  61 29 01 2d     ori     r9,r9,301
  304:  38 e0 00 00     li      r7,0
  308:  54 6a 04 be     clrlwi  r10,r3,18
  30c:  91 28 00 00     stw     r9,0(r8)
  310:  91 28 00 04     stw     r9,4(r8)
  314:  91 28 00 08     stw     r9,8(r8)
  318:  91 28 00 0c     stw     r9,12(r8)
  31c:  91 01 00 0c     stw     r8,12(r1)
  320:  90 c1 00 08     stw     r6,8(r1)
  324:  7d 4a 32 14     add     r10,r10,r6
  328:  90 e1 00 10     stw     r7,16(r1)
  32c:  90 e1 00 14     stw     r7,20(r1)
  330:  90 e1 00 18     stw     r7,24(r1)
  334:  90 8a 00 00     stw     r4,0(r10)
  338:  7c 00 50 6c     dcbst   0,r10
  33c:  7c 00 04 ac     hwsync
  340:  7c 00 1f ac     icbi    0,r3
  344:  7c 00 04 ac     hwsync
  348:  4c 00 01 2c     isync
  34c:  3b c0 00 00     li      r30,0
  350:  38 61 00 08     addi    r3,r1,8
  354:  4b ff fe f9     bl      24c <close_patch_window>
  358:  7f e0 01 24     mtmsr   r31
  35c:  80 01 00 34     lwz     r0,52(r1)
  360:  83 e1 00 2c     lwz     r31,44(r1)
  364:  7c 08 03 a6     mtlr    r0
  368:  7f c3 f3 78     mr      r3,r30
  36c:  83 c1 00 28     lwz     r30,40(r1)
  370:  38 21 00 30     addi    r1,r1,48
  374:  4e 80 00 20     blr

4k pages before your patch:

00000268 <patch_instruction>:
  268:  48 00 00 6c     nop
  26c:  7c e0 00 a6     mfmsr   r7
  270:  7c 51 13 a6     mtspr   81,r2
  274:  3d 40 00 00     lis     r10,0
                        276: R_PPC_ADDR16_HA    .data
  278:  39 4a 00 00     addi    r10,r10,0
                        27a: R_PPC_ADDR16_LO    .data
  27c:  7c 69 1b 78     mr      r9,r3
  280:  3d 29 40 00     addis   r9,r9,16384
  284:  81 0a 00 08     lwz     r8,8(r10)
  288:  55 29 00 26     clrrwi  r9,r9,12
  28c:  81 4a 00 04     lwz     r10,4(r10)
  290:  61 29 01 25     ori     r9,r9,293
  294:  91 28 00 00     stw     r9,0(r8)
  298:  55 49 00 26     clrrwi  r9,r10,12
  29c:  50 6a 05 3e     rlwimi  r10,r3,0,20,31
  2a0:  90 8a 00 00     stw     r4,0(r10)
  2a4:  7c 00 50 6c     dcbst   0,r10
  2a8:  7c 00 04 ac     hwsync
  2ac:  7c 00 1f ac     icbi    0,r3
  2b0:  7c 00 04 ac     hwsync
  2b4:  4c 00 01 2c     isync
  2b8:  38 60 00 00     li      r3,0
  2bc:  39 40 00 00     li      r10,0
  2c0:  91 48 00 00     stw     r10,0(r8)
  2c4:  7c 00 4a 64     tlbie   r9,r0
  2c8:  7c 00 04 ac     hwsync
  2cc:  7c e0 01 24     mtmsr   r7
  2d0:  4e 80 00 20     blr

4k pages after your patch. Here close_patch_window() is inlined but the 
test at the end is crazy, we've been patching one instruction with the 
assumption it is properly aligned, so why is there a branch with a tlbia 
whereas only one page needs to be invalidated ?:

00000268 <patch_instruction>:
  268:  48 00 00 84     nop
  26c:  7c c0 00 a6     mfmsr   r6
  270:  7c 51 13 a6     mtspr   81,r2
  274:  3d 40 00 00     lis     r10,0
                        276: R_PPC_ADDR16_HA    .data
  278:  39 4a 00 00     addi    r10,r10,0
                        27a: R_PPC_ADDR16_LO    .data
  27c:  7c 69 1b 78     mr      r9,r3
  280:  3d 29 40 00     addis   r9,r9,16384
  284:  80 ea 00 08     lwz     r7,8(r10)
  288:  55 29 00 26     clrrwi  r9,r9,12
  28c:  81 4a 00 04     lwz     r10,4(r10)
  290:  61 29 01 25     ori     r9,r9,293
  294:  54 68 05 3e     clrlwi  r8,r3,20
  298:  91 27 00 00     stw     r9,0(r7)
  29c:  7d 08 52 14     add     r8,r8,r10
  2a0:  90 88 00 00     stw     r4,0(r8)
  2a4:  7c 00 40 6c     dcbst   0,r8
  2a8:  7c 00 04 ac     hwsync
  2ac:  7c 00 1f ac     icbi    0,r3
  2b0:  7c 00 04 ac     hwsync
  2b4:  4c 00 01 2c     isync
  2b8:  38 60 00 00     li      r3,0
  2bc:  55 49 00 26     clrrwi  r9,r10,12
  2c0:  39 4a 10 00     addi    r10,r10,4096
  2c4:  7d 49 50 50     subf    r10,r9,r10
  2c8:  28 0a 10 00     cmplwi  r10,4096
  2cc:  39 40 00 00     li      r10,0
  2d0:  91 47 00 00     stw     r10,0(r7)
  2d4:  40 81 00 38     ble     30c <patch_instruction+0xa4>
  2d8:  7c 00 04 ac     hwsync
  2dc:  7c 00 02 e4     tlbia
  2e0:  4c 00 01 2c     isync
  2e4:  7c c0 01 24     mtmsr   r6
  2e8:  4e 80 00 20     blr
...
  30c:  7c 00 4a 64     tlbie   r9,r0
  310:  7c 00 04 ac     hwsync
  314:  7c c0 01 24     mtmsr   r6
  318:  4e 80 00 20     blr

Reply via email to