Hannes Frederic Sowa <[email protected]> wrote:
[...]
>I created it via the function calling convention documented in
>arch/x86/include/asm/calling.h, so I specified each register which a
>function is allowed to clobber with.
>
>I currently cannot see how I can resolve the invalid constraints error
>easily. :(
>
>So either go with my first patch, which I puts the alternative_call
>switch point into its own function without ever inlining or the patch
>needs to be reverted. :/

        As a data point, I tested the first patch as well, and the
system does not panic with it in place.  Inspection shows that it's
using %r14 in place of %r8 in the prior (crashing) implementation.

        Disassembly of the call site (on the non-sse4_1 system) in
ovs_flow_tbl_insert with the first patch applied looks like this:

0xffffffffa00b6bb9 <ovs_flow_tbl_insert+0xb9>:  mov    %r15,0x348(%r14)
0xffffffffa00b6bc0 <ovs_flow_tbl_insert+0xc0>:  movzwl 0x28(%r15),%ecx
0xffffffffa00b6bc5 <ovs_flow_tbl_insert+0xc5>:  movzwl 0x2a(%r15),%esi
0xffffffffa00b6bca <ovs_flow_tbl_insert+0xca>:  movzwl %cx,%eax
0xffffffffa00b6bcd <ovs_flow_tbl_insert+0xcd>:  sub    %ecx,%esi
0xffffffffa00b6bcf <ovs_flow_tbl_insert+0xcf>:  lea    0x38(%r14,%rax,1),%rdi
0xffffffffa00b6bd4 <ovs_flow_tbl_insert+0xd4>:  sar    $0x2,%esi
0xffffffffa00b6bd7 <ovs_flow_tbl_insert+0xd7>:  callq  0xffffffff813a7810 
<__jhash2>
0xffffffffa00b6bdc <ovs_flow_tbl_insert+0xdc>:  mov    %eax,0x30(%r14)
0xffffffffa00b6be0 <ovs_flow_tbl_insert+0xe0>:  mov    (%rbx),%r13
0xffffffffa00b6be3 <ovs_flow_tbl_insert+0xe3>:  mov    %r14,%rsi
0xffffffffa00b6be6 <ovs_flow_tbl_insert+0xe6>:  mov    %r13,%rdi
0xffffffffa00b6be9 <ovs_flow_tbl_insert+0xe9>:  callq  0xffffffffa00b61a0 
<table_instance_insert>

        Compared to the panicking version's function:

0xffffffffa01a55c9 <ovs_flow_tbl_insert+0xb9>:  mov    %r15,0x348(%r8)
0xffffffffa01a55d0 <ovs_flow_tbl_insert+0xc0>:  movzwl 0x28(%r15),%ecx
0xffffffffa01a55d5 <ovs_flow_tbl_insert+0xc5>:  movzwl 0x2a(%r15),%esi
0xffffffffa01a55da <ovs_flow_tbl_insert+0xca>:  movzwl %cx,%eax
0xffffffffa01a55dd <ovs_flow_tbl_insert+0xcd>:  sub    %ecx,%esi
0xffffffffa01a55df <ovs_flow_tbl_insert+0xcf>:  lea    0x38(%r8,%rax,1),%rdi
0xffffffffa01a55e4 <ovs_flow_tbl_insert+0xd4>:  sar    $0x2,%esi
0xffffffffa01a55e7 <ovs_flow_tbl_insert+0xd7>:  callq  0xffffffff813a75c0 
<__jhash2>
0xffffffffa01a55ec <ovs_flow_tbl_insert+0xdc>:  mov    %eax,0x30(%r8)
0xffffffffa01a55f0 <ovs_flow_tbl_insert+0xe0>:  mov    (%rbx),%r13
0xffffffffa01a55f3 <ovs_flow_tbl_insert+0xe3>:  mov    %r8,%rsi
0xffffffffa01a55f6 <ovs_flow_tbl_insert+0xe6>:  mov    %r13,%rdi
0xffffffffa01a55f9 <ovs_flow_tbl_insert+0xe9>:  callq  0xffffffffa01a4ba0 
<table_instance_insert>

        It appears to generate the same instructions, but allocates
registers differently (using %r14 instead of %r8).

        The __jhash2 disassembly appears to be unchanged between the two
versions.

        -J

---
        -Jay Vosburgh, [email protected]
_______________________________________________
discuss mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/discuss

Reply via email to