Quoting Tvrtko Ursulin (2018-05-17 11:58:21)
> 
> On 17/05/2018 08:40, Chris Wilson wrote:
> > Store whether or not we need to kick the guc's execlists emulation on
> > the engine itself to avoid chasing the device info.
> 
> We do not chase device info but modparams in this case.
> 
> > gen8_cs_irq_handler                          512     428     -84
> 
> I guess my point from before, (unfortunately I forgot to reply), was how 
> much of the saving remains if GEM_BUG_ON is compiled out?

Remember the motto of killing off checking globals and only checking
derived state? :) (I'm definitely not in favour of sprinkling more global
checking CAPS over the code.)

Out of the debug build

gen8_cs_irq_handler                          170     185     +15
gen8_cs_irq_handler                          170     128     -42 (later)
gen8_cs_irq_handler                          170     128     -42 (+ 
USES_GUC_SUBMISSION)

Hmm,

With USES_GUC_SUBMISSION:

   0x000000000000a780 <+0>:     callq  0xa785 <gen8_cs_irq_handler+5>
   0x000000000000a785 <+5>:     push   %r12
   0x000000000000a787 <+7>:     mov    %esi,%r12d
   0x000000000000a78a <+10>:    and    $0x1,%r12d
   0x000000000000a78e <+14>:    and    $0x100,%esi
   0x000000000000a794 <+20>:    push   %rbp
   0x000000000000a795 <+21>:    push   %rbx
   0x000000000000a796 <+22>:    mov    %rdi,%rbx
   0x000000000000a799 <+25>:    je     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a79b <+27>:    lea    0x458(%rdi),%rdi
   0x000000000000a7a2 <+34>:    callq  0xa7a7 <gen8_cs_irq_handler+39>
   0x000000000000a7a7 <+39>:    mov    0x458(%rbx),%eax
   0x000000000000a7ad <+45>:    test   %eax,%eax
   0x000000000000a7af <+47>:    je     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a7b1 <+49>:    lock btsq $0x1,0x108(%rbx)
   0x000000000000a7bb <+59>:    setae  %bpl
   0x000000000000a7bf <+63>:    test   %r12d,%r12d
   0x000000000000a7c2 <+66>:    je     0xa7fa <gen8_cs_irq_handler+122>
   0x000000000000a7c4 <+68>:    jmp    0xa7cd <gen8_cs_irq_handler+77>
   0x000000000000a7c6 <+70>:    test   %r12d,%r12d
   0x000000000000a7c9 <+73>:    je     0xa812 <gen8_cs_irq_handler+146>
   0x000000000000a7cb <+75>:    xor    %ebp,%ebp
   0x000000000000a7cd <+77>:    lea    0x240(%rbx),%rdi
   0x000000000000a7d4 <+84>:    callq  0xa7d9 <gen8_cs_irq_handler+89>
   0x000000000000a7d9 <+89>:    testb  $0x1,0x240(%rbx)
   0x000000000000a7e0 <+96>:    jne    0xa820 <gen8_cs_irq_handler+160>
   0x000000000000a7e2 <+98>:    mov    $0x0,%rdi
   0x000000000000a7e9 <+105>:   callq  0xa7ee <gen8_cs_irq_handler+110>
   0x000000000000a7ee <+110>:   movzbl 0x0(%rip),%eax        # 0xa7f5 
<gen8_cs_irq_handler+117>
   0x000000000000a7f5 <+117>:   and    $0x1,%eax
   0x000000000000a7f8 <+120>:   or     %eax,%ebp
   0x000000000000a7fa <+122>:   test   %bpl,%bpl
   0x000000000000a7fd <+125>:   je     0xa812 <gen8_cs_irq_handler+146>
   0x000000000000a7ff <+127>:   lea    0x3d8(%rbx),%rdi
   0x000000000000a806 <+134>:   lock btsq $0x0,0x3e0(%rbx)
   0x000000000000a810 <+144>:   jae    0xa817 <gen8_cs_irq_handler+151>
   0x000000000000a812 <+146>:   pop    %rbx
   0x000000000000a813 <+147>:   pop    %rbp
   0x000000000000a814 <+148>:   pop    %r12
   0x000000000000a816 <+150>:   retq   
   0x000000000000a817 <+151>:   pop    %rbx
   0x000000000000a818 <+152>:   pop    %rbp
   0x000000000000a819 <+153>:   pop    %r12
   0x000000000000a81b <+155>:   jmpq   0xa820 <gen8_cs_irq_handler+160>
   0x000000000000a820 <+160>:   mov    %rbx,%rdi
   0x000000000000a823 <+163>:   callq  0xa4a0 <notify_ring>
   0x000000000000a828 <+168>:   jmp    0xa7e2 <gen8_cs_irq_handler+98>

to

   0x000000000000a780 <+0>:     callq  0xa785 <gen8_cs_irq_handler+5>
   0x000000000000a785 <+5>:     push   %r12
   0x000000000000a787 <+7>:     push   %rbp
   0x000000000000a788 <+8>:     mov    %esi,%ebp
   0x000000000000a78a <+10>:    and    $0x1,%ebp
   0x000000000000a78d <+13>:    and    $0x100,%esi
   0x000000000000a793 <+19>:    push   %rbx
   0x000000000000a794 <+20>:    mov    %rdi,%rbx
   0x000000000000a797 <+23>:    je     0xa7cb <gen8_cs_irq_handler+75>
   0x000000000000a799 <+25>:    lea    0x458(%rdi),%rdi
   0x000000000000a7a0 <+32>:    callq  0xa7a5 <gen8_cs_irq_handler+37>
   0x000000000000a7a5 <+37>:    mov    0x458(%rbx),%eax
   0x000000000000a7ab <+43>:    test   %eax,%eax
   0x000000000000a7ad <+45>:    je     0xa7cb <gen8_cs_irq_handler+75>
   0x000000000000a7af <+47>:    lock btsq $0x1,0x108(%rbx)
   0x000000000000a7b9 <+57>:    setae  %r12b
   0x000000000000a7bd <+61>:    test   %ebp,%ebp
   0x000000000000a7bf <+63>:    jne    0xa7d2 <gen8_cs_irq_handler+82>
   0x000000000000a7c1 <+65>:    test   %r12b,%r12b
   0x000000000000a7c4 <+68>:    jne    0xa805 <gen8_cs_irq_handler+133>
   0x000000000000a7c6 <+70>:    pop    %rbx
   0x000000000000a7c7 <+71>:    pop    %rbp
   0x000000000000a7c8 <+72>:    pop    %r12
   0x000000000000a7ca <+74>:    retq   
   0x000000000000a7cb <+75>:    test   %ebp,%ebp
   0x000000000000a7cd <+77>:    je     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a7cf <+79>:    xor    %r12d,%r12d
   0x000000000000a7d2 <+82>:    lea    0x5d8(%rbx),%rdi
   0x000000000000a7d9 <+89>:    callq  0xa7de <gen8_cs_irq_handler+94>
   0x000000000000a7de <+94>:    mov    0x5d8(%rbx),%ebp
   0x000000000000a7e4 <+100>:   lea    0x240(%rbx),%rdi
   0x000000000000a7eb <+107>:   callq  0xa7f0 <gen8_cs_irq_handler+112>
   0x000000000000a7f0 <+112>:   movzbl 0x240(%rbx),%eax
   0x000000000000a7f7 <+119>:   and    $0x8,%ebp
   0x000000000000a7fa <+122>:   and    $0x1,%eax
   0x000000000000a7fd <+125>:   test   %ebp,%ebp
   0x000000000000a7ff <+127>:   je     0xa821 <gen8_cs_irq_handler+161>
   0x000000000000a801 <+129>:   test   %al,%al
   0x000000000000a803 <+131>:   jne    0xa82f <gen8_cs_irq_handler+175>
   0x000000000000a805 <+133>:   lea    0x3d8(%rbx),%rdi
   0x000000000000a80c <+140>:   lock btsq $0x0,0x3e0(%rbx)
   0x000000000000a816 <+150>:   jb     0xa7c6 <gen8_cs_irq_handler+70>
   0x000000000000a818 <+152>:   pop    %rbx
   0x000000000000a819 <+153>:   pop    %rbp
   0x000000000000a81a <+154>:   pop    %r12
   0x000000000000a81c <+156>:   jmpq   0xa821 <gen8_cs_irq_handler+161>
   0x000000000000a821 <+161>:   test   %al,%al
   0x000000000000a823 <+163>:   je     0xa7c1 <gen8_cs_irq_handler+65>
   0x000000000000a825 <+165>:   mov    %rbx,%rdi
   0x000000000000a828 <+168>:   callq  0xa4a0 <notify_ring>
   0x000000000000a82d <+173>:   jmp    0xa7c1 <gen8_cs_irq_handler+65>
   0x000000000000a82f <+175>:   mov    %rbx,%rdi
   0x000000000000a832 <+178>:   callq  0xa4a0 <notify_ring>
   0x000000000000a837 <+183>:   jmp    0xa805 <gen8_cs_irq_handler+133>

And then onwards to

   0x000000000000a680 <+0>:     callq  0xa685 <gen8_cs_irq_handler+5>
   0x000000000000a685 <+5>:     push   %rbx
   0x000000000000a686 <+6>:     mov    %rdi,%rbx
   0x000000000000a689 <+9>:     sub    $0x8,%rsp
   0x000000000000a68d <+13>:    test   $0x100,%esi
   0x000000000000a693 <+19>:    jne    0xa6ca <gen8_cs_irq_handler+74>
   0x000000000000a695 <+21>:    and    $0x1,%esi
   0x000000000000a698 <+24>:    je     0xa6c4 <gen8_cs_irq_handler+68>
   0x000000000000a69a <+26>:    lea    0x5f0(%rbx),%rdi
   0x000000000000a6a1 <+33>:    callq  0xa6a6 <gen8_cs_irq_handler+38>
   0x000000000000a6a6 <+38>:    testb  $0x8,0x5f0(%rbx)
   0x000000000000a6ad <+45>:    jne    0xa6e6 <gen8_cs_irq_handler+102>
   0x000000000000a6af <+47>:    lea    0x240(%rbx),%rdi
   0x000000000000a6b6 <+54>:    callq  0xa6bb <gen8_cs_irq_handler+59>
   0x000000000000a6bb <+59>:    testb  $0x1,0x240(%rbx)
   0x000000000000a6c2 <+66>:    jne    0xa6d9 <gen8_cs_irq_handler+89>
   0x000000000000a6c4 <+68>:    add    $0x8,%rsp
   0x000000000000a6c8 <+72>:    pop    %rbx
   0x000000000000a6c9 <+73>:    retq   
   0x000000000000a6ca <+74>:    mov    %esi,0x4(%rsp)
   0x000000000000a6ce <+78>:    callq  0xa6d3 <gen8_cs_irq_handler+83>
   0x000000000000a6d3 <+83>:    mov    0x4(%rsp),%esi
   0x000000000000a6d7 <+87>:    jmp    0xa695 <gen8_cs_irq_handler+21>
   0x000000000000a6d9 <+89>:    add    $0x8,%rsp
   0x000000000000a6dd <+93>:    mov    %rbx,%rdi
   0x000000000000a6e0 <+96>:    pop    %rbx
   0x000000000000a6e1 <+97>:    jmpq   0xa3a0 <notify_ring>
   0x000000000000a6e6 <+102>:   lea    0x3d8(%rbx),%rdi
   0x000000000000a6ed <+109>:   lock btsq $0x0,0x3e0(%rbx)
   0x000000000000a6f7 <+119>:   jb     0xa6af <gen8_cs_irq_handler+47>
   0x000000000000a6f9 <+121>:   callq  0xa6fe <gen8_cs_irq_handler+126>
   0x000000000000a6fe <+126>:   jmp    0xa6af <gen8_cs_irq_handler+47>

> If nothing or almost nothing, I don't see a need to fiddle with this now.

Also please consider later patches which also need conditionals as
execlists is unfortunately gaining new tricks that are harder to pull
off with the current guc submission :|
(Might be time to stop mixing the two backends?)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to