Hi Julian,
   Some more related dissembly:

000000000040ed60 <y8_deblocking_chroma_vert_edge_h264_sse2>:
  40ed60:       55                      push   %rbp
  40ed61:       48 89 e5                mov    %rsp,%rbp
  40ed64:       48 83 ec 10             sub    $0x10,%rsp
  40ed68:       48 89 5d f8             mov    %rbx,0xfffffffffffffff8(%rbp)
  40ed6c:       48 83 ec 08             sub    $0x8,%rsp
  40ed70:       48 83 e4 f8             and    $0xfffffffffffffff8,%rsp
  40ed74:       4c 89 c8                mov    %r9,%rax
  40ed77:       8b 00                   mov    (%rax),%eax
  40ed79:       85 c0                   test   %eax,%eax
  40ed7b:       0f 84 0d 02 00 00       je     40ef8e <INTERNAL_EDGE_p1>
  40ed81:       4c 8d 1d 98 b8 46 00    lea    4634776(%rip),%r11        # 
87a620 <const_table>
  40ed88:       3d 04 04 04 04          cmp    $0x4040404,%eax
  40ed8d:       0f 84 0e 04 00 00       je     40f1a1 <EXTERNAL_STRONG_EDGE_p1>
  40ed93:       48 f7 c7 3f 00 00 00    test   $0x3f,%rdi
  40ed9a:       0f 84 a3 05 00 00       je     40f343 <CACHE_LINE_SPLIT>



000000000040ef8e <INTERNAL_EDGE_p1>:
  40ef8e:       4c 89 c8                mov    %r9,%rax
  40ef91:       8b 40 08                mov    0x8(%rax),%eax
  40ef94:       85 c0                   test   %eax,%eax
  40ef96:       0f 84 fa 01 00 00       je     40f196 <EXIT_p1>
  40ef9c:       4c 8d 1d 7d b6 46 00    lea    4634237(%rip),%r11        # 
87a620 <const_table>
  40efa3:       48 8d 7f 04             lea    0x4(%rdi),%rdi
  40efa7:       48 8d 1c 76             lea    (%rsi,%rsi,2),%rbx
  40efab:       48 83 ef 02             sub    $0x2,%rdi
  40efaf:       0f 6e 07                movd   (%rdi),%mm0
  40efb2:       0f 60 04 37             punpcklbw (%rdi,%rsi,1),%mm0
  40efb6:       0f 6e 0c 77             movd   (%rdi,%rsi,2),%mm1
  40efba:       0f 60 0c 1f             punpcklbw (%rdi,%rbx,1),%mm1
  40efbe:       48 8d 3c b7             lea    (%rdi,%rsi,4),%rdi
  40efc2:       0f 6e 37                movd   (%rdi),%mm6
  40efc5:       0f 60 34 37             punpcklbw (%rdi,%rsi,1),%mm6
  40efc9:       0f 6e 1c 77             movd   (%rdi,%rsi,2),%mm3
  40efcd:       0f 60 1c 1f             punpcklbw (%rdi,%rbx,1),%mm3
  40efd1:       0f 6f d0                movq   %mm0,%mm2
  40efd4:       0f 61 c1                punpcklwd %mm1,%mm0
  40efd7:       0f 6f fe                movq   %mm6,%mm7
  40efda:       0f 69 d1                punpckhwd %mm1,%mm2
  40efdd:       0f 61 f3                punpcklwd %mm3,%mm6
  40efe0:       0f 6f c8                movq   %mm0,%mm1
  40efe3:       0f 69 fb                punpckhwd %mm3,%mm7
  40efe6:       0f 6f da                movq   %mm2,%mm3
  40efe9:       0f 62 c6                punpckldq %mm6,%mm0
  40efec:       48 83 c7 02             add    $0x2,%rdi
  40eff0:       0f 6a ce                punpckhdq %mm6,%mm1
  40eff3:       48 29 df                sub    %rbx,%rdi
  40eff6:       0f 62 d7                punpckldq %mm7,%mm2
  40eff9:       48 29 f7                sub    %rsi,%rdi
  40effc:       0f 6a df                punpckhdq %mm7,%mm3
  40efff:       f3 0f d6 db             movq2dq %mm3,%xmm3
  40f003:       48 0f 6e e0             movd   %rax,%mm4
  40f007:       0f ef ed                pxor   %mm5,%mm5
  40f00a:       0f 60 e4                punpcklbw %mm4,%mm4
  40f00d:       0f 64 e5                pcmpgtb %mm5,%mm4
  40f010:       48 89 d3                mov    %rdx,%rbx
  40f013:       41 0f 6f 5b 10          movq   0x10(%r11),%mm3
  40f018:       0f ef ff                pxor   %mm7,%mm7
  40f01b:       0f c4 3b 00             pinsrw $0x0,(%rbx),%mm7
  40f01f:       48 89 cb                mov    %rcx,%rbx
40f022:       0f c4 3b 01             pinsrw $0x1,(%rbx),%mm7
  40f026:       0f 60 ff                punpcklbw %mm7,%mm7
  40f029:       0f f8 df                psubb  %mm7,%mm3
  40f02c:       0f 6f e9                movq   %mm1,%mm5
  40f02f:       0f 70 fb ff             pshufw $0xff,%mm3,%mm7
  40f033:       0f 6f f1                movq   %mm1,%mm6
  40f036:       0f 70 db 55             pshufw $0x55,%mm3,%mm3
  40f03a:       0f 7f 3c 24             movq   %mm7,(%rsp)
  40f03e:       0f da ea                pminub %mm2,%mm5
  40f041:       0f de f2                pmaxub %mm2,%mm6
  40f044:       0f f8 ee                psubb  %mm6,%mm5
  40f047:       0f 6f f9                movq   %mm1,%mm7
  40f04a:       49 0f fc 6b 10          rex64Z paddb  0x10(%r11),%mm5
  40f04f:       0f da f8                pminub %mm0,%mm7
  40f052:       0f 64 eb                pcmpgtb %mm3,%mm5
  40f055:       0f 6f d9                movq   %mm1,%mm3
  40f058:       0f db e5                pand   %mm5,%mm4
  40f05b:       0f de d8                pmaxub %mm0,%mm3
  40f05e:       0f f8 fb                psubb  %mm3,%mm7
  40f061:       f2 0f d6 db             movdq2q %xmm3,%mm3
  40f065:       49 0f fc 7b 10          rex64Z paddb  0x10(%r11),%mm7
  40f06a:       0f 6f eb                movq   %mm3,%mm5
  40f06d:       48 0f 64 3c 24          rex64 pcmpgtb (%rsp),%mm7
  40f072:       0f 6f f3                movq   %mm3,%mm6
  40f075:       0f da ea                pminub %mm2,%mm5
  40f078:       0f db e7                pand   %mm7,%mm4
  40f07b:       0f de f2                pmaxub %mm2,%mm6
  40f07e:       0f f8 ee                psubb  %mm6,%mm5
  40f081:       49 0f fc 6b 10          rex64Z paddb  0x10(%r11),%mm5
  40f086:       48 0f 64 2c 24          rex64 pcmpgtb (%rsp),%mm5
  40f08b:       0f db e5                pand   %mm5,%mm4
  40f08e:       0f d7 c4                pmovmskb %mm4,%eax
  40f091:       85 c0                   test   %eax,%eax
  40f093:       0f 84 fd 00 00 00       je     40f196 <EXIT_p1>
  40f099:       66 0f ef e4             pxor   %xmm4,%xmm4
  40f09d:       4c 89 c0                mov    %r8,%rax
  40f0a0:       f3 0f d6 c0             movq2dq %mm0,%xmm0
  40f0a4:       66 0f 6e 78 04          movd   0x4(%rax),%xmm7
  40f0a9:       f3 0f d6 c9             movq2dq %mm1,%xmm1
  40f0ad:       66 0f 60 c4             punpcklbw %xmm4,%xmm0
  40f0b1:       f3 0f d6 d2             movq2dq %mm2,%xmm2
  40f0b5:       66 0f 60 cc             punpcklbw %xmm4,%xmm1
  40f0b9:       66 41 0f fc 3b          paddb  (%r11),%xmm7
40f0be:       f3 0f d6 db             movq2dq %mm3,%xmm3
  40f0c2:       66 0f 60 d4             punpcklbw %xmm4,%xmm2
  40f0c6:       66 0f 6f e9             movdqa %xmm1,%xmm5
  40f0ca:       66 0f 60 dc             punpcklbw %xmm4,%xmm3
  40f0ce:       66 0f 6f f0             movdqa %xmm0,%xmm6
  40f0d2:       66 0f e9 ea             psubsw %xmm2,%xmm5
  40f0d6:       66 0f 60 fc             punpcklbw %xmm4,%xmm7
  40f0da:       66 0f e9 f3             psubsw %xmm3,%xmm6
  40f0de:       66 0f 71 f5 02          psllw  $0x2,%xmm5
  40f0e3:       66 0f e9 f5             psubsw %xmm5,%xmm6
  40f0e7:       66 0f 61 ff             punpcklwd %xmm7,%xmm7
  40f0eb:       66 41 0f ed 73 20       paddsw 0x20(%r11),%xmm6
  40f0f1:       66 0f e9 e7             psubsw %xmm7,%xmm4
  40f0f5:       66 0f 71 e6 03          psraw  $0x3,%xmm6
  40f0fa:       66 0f ea f7             pminsw %xmm7,%xmm6
  40f0fe:       66 0f ee f4             pmaxsw %xmm4,%xmm6
  40f102:       66 0f ed ce             paddsw %xmm6,%xmm1
  40f106:       66 0f e9 d6             psubsw %xmm6,%xmm2
  40f10a:       66 0f 67 c9             packuswb %xmm1,%xmm1
  40f10e:       66 0f 67 d2             packuswb %xmm2,%xmm2
  40f112:       f2 0f d6 e9             movdq2q %xmm1,%mm5
  40f116:       0f 6f fc                movq   %mm4,%mm7
  40f119:       f2 0f d6 f2             movdq2q %xmm2,%mm6
  40f11d:       0f db ec                pand   %mm4,%mm5
  40f120:       0f db f4                pand   %mm4,%mm6
  40f123:       0f df e1                pandn  %mm1,%mm4
  40f126:       0f df fa                pandn  %mm2,%mm7
  40f129:       0f eb ec                por    %mm4,%mm5
  40f12c:       0f eb f7                por    %mm7,%mm6
  40f12f:       0f 6f fd                movq   %mm5,%mm7
  40f132:       0f 60 ee                punpcklbw %mm6,%mm5
  40f135:       0f 68 fe                punpckhbw %mm6,%mm7
  40f138:       0f 7e e8                movd   %mm5,%eax
  40f13b:       66 89 47 ff             mov    %ax,0xffffffffffffffff(%rdi)
  40f13f:       c1 e8 10                shr    $0x10,%eax
  40f142:       0f 73 d5 20             psrlq  $0x20,%mm5
  40f146:       66 89 44 37 ff          mov    
%ax,0xffffffffffffffff(%rdi,%rsi,1)
  40f14b:       48 8d 3c 77             lea    (%rdi,%rsi,2),%rdi
  40f14f:       0f 7e e8                movd   %mm5,%eax
  40f152:       66 89 47 ff             mov    %ax,0xffffffffffffffff(%rdi)
  40f156:       c1 e8 10                shr    $0x10,%eax
  40f159:       66 89 44 37 ff          mov    
%ax,0xffffffffffffffff(%rdi,%rsi,1)
40f15e:       48 8d 3c 77             lea    (%rdi,%rsi,2),%rdi
  40f162:       0f 7e f8                movd   %mm7,%eax
  40f165:       66 89 47 ff             mov    %ax,0xffffffffffffffff(%rdi)
  40f169:       c1 e8 10                shr    $0x10,%eax
  40f16c:       0f 73 d7 20             psrlq  $0x20,%mm7
  40f170:       66 89 44 37 ff          mov    
%ax,0xffffffffffffffff(%rdi,%rsi,1)
  40f175:       48 8d 3c 77             lea    (%rdi,%rsi,2),%rdi
  40f179:       0f 7e f8                movd   %mm7,%eax
  40f17c:       66 89 47 ff             mov    %ax,0xffffffffffffffff(%rdi)
  40f180:       c1 e8 10                shr    $0x10,%eax
  40f183:       66 89 44 37 ff          mov    
%ax,0xffffffffffffffff(%rdi,%rsi,1)
  40f188:       48 8d 3c 77             lea    (%rdi,%rsi,2),%rdi
  40f18c:       48 31 c0                xor    %rax,%rax
  40f18f:       48 29 f0                sub    %rsi,%rax
  40f192:       48 8d 3c c7             lea    (%rdi,%rax,8),%rdi




0000000000423a91 <EXTERNAL_STRONG_EDGE_p1>:
  423a91:       48 f7 c7 3f 00 00 00    test   $0x3f,%rdi
  423a98:       0f 84 d5 03 00 00       je     423e73 <CACHE_LINE_SPLIT_STRONG>

000000000040f343 <CACHE_LINE_SPLIT>:
  40f343:       48 f7 c6 3f 00 00 00    test   $0x3f,%rsi
  40f34a:       0f 85 50 fa ff ff       jne    40eda0 <EXTERNAL_EDGE_CONTINUE>
  40f350:       48 8d 1c 76             lea    (%rsi,%rsi,2),%rbx
  40f354:       0f 6f 47 f8             movq   0xfffffffffffffff8(%rdi),%mm0
  40f358:       0f 6f 17                movq   (%rdi),%mm2
  40f35b:       48 0f 68 44 37 f8       rex64 punpckhbw 
0xfffffffffffffff8(%rdi,%rsi,1),%mm0
  40f361:       0f 60 14 37             punpcklbw (%rdi,%rsi,1),%mm2
  40f365:       0f 6f 74 77 f8          movq   
0xfffffffffffffff8(%rdi,%rsi,2),%mm6
  40f36a:       0f 6f 3c 77             movq   (%rdi,%rsi,2),%mm7
  40f36e:       48 0f 68 74 1f f8       rex64 punpckhbw 
0xfffffffffffffff8(%rdi,%rbx,1),%mm6
  40f374:       0f 60 3c 1f             punpcklbw (%rdi,%rbx,1),%mm7
  40f378:       0f 69 c6                punpckhwd %mm6,%mm0
  40f37b:       0f 61 d7                punpcklwd %mm7,%mm2
  40f37e:       48 8d 3c b7             lea    (%rdi,%rsi,4),%rdi
  40f382:       0f 6f 77 f8             movq   0xfffffffffffffff8(%rdi),%mm6
  40f386:       0f 6f 3f                movq   (%rdi),%mm7
  40f389:       48 0f 68 74 37 f8       rex64 punpckhbw 
0xfffffffffffffff8(%rdi,%rsi,1),%mm6
  40f38f:       0f 60 3c 37             punpcklbw (%rdi,%rsi,1),%mm7
  40f393:       0f 6f 4c 77 f8          movq   
0xfffffffffffffff8(%rdi,%rsi,2),%mm1
  40f398:       0f 6f 1c 77             movq   (%rdi,%rsi,2),%mm3
  40f39c:       48 0f 68 4c 1f f8       rex64 punpckhbw 
0xfffffffffffffff8(%rdi,%rbx,1),%mm1
  40f3a2:       0f 60 1c 1f             punpcklbw (%rdi,%rbx,1),%mm3
  40f3a6:       0f 69 f1                punpckhwd %mm1,%mm6
  40f3a9:       0f 61 fb                punpcklwd %mm3,%mm7
  40f3ac:       0f 6f c8                movq   %mm0,%mm1
  40f3af:       0f 6f da                movq   %mm2,%mm3
  40f3b2:       0f 62 c6                punpckldq %mm6,%mm0
  40f3b5:       0f 6a ce                punpckhdq %mm6,%mm1
  40f3b8:       48 29 df                sub    %rbx,%rdi
  40f3bb:       0f 62 d7                punpckldq %mm7,%mm2
  40f3be:       48 29 f7                sub    %rsi,%rdi
  40f3c1:       0f 6a df                punpckhdq %mm7,%mm3
  40f3c4:       f3 0f d6 db             movq2dq %mm3,%xmm3
  40f3c8:       48 0f 6e e0             movd   %rax,%mm4
  40f3cc:       0f ef ed                pxor   %mm5,%mm5
  40f3cf:       0f 60 e4                punpcklbw %mm4,%mm4
  40f3d2:       0f 64 e5                pcmpgtb %mm5,%mm4
  40f3d5:       48 89 d3                mov    %rdx,%rbx
  40f3d8:       41 0f 6f 5b 10          movq   0x10(%r11),%mm3
  40f3dd:       0f ef ff                pxor   %mm7,%mm7
  40f3e0:       0f c4 3b 00             pinsrw $0x0,(%rbx),%mm7
  40f3e4:       48 89 cb                mov    %rcx,%rbx
40f3e7:       0f c4 3b 01             pinsrw $0x1,(%rbx),%mm7
  40f3eb:       0f 60 ff                punpcklbw %mm7,%mm7
  40f3ee:       0f f8 df                psubb  %mm7,%mm3
  40f3f1:       0f 6f e9                movq   %mm1,%mm5
  40f3f4:       0f 70 fb aa             pshufw $0xaa,%mm3,%mm7
  40f3f8:       0f 6f f1                movq   %mm1,%mm6
  40f3fb:       0f 70 db 00             pshufw $0x0,%mm3,%mm3
  40f3ff:       0f 7f 3c 24             movq   %mm7,(%rsp)
  40f403:       0f da ea                pminub %mm2,%mm5
  40f406:       0f de f2                pmaxub %mm2,%mm6
  40f409:       0f f8 ee                psubb  %mm6,%mm5
  40f40c:       0f 6f f9                movq   %mm1,%mm7
  40f40f:       49 0f fc 6b 10          rex64Z paddb  0x10(%r11),%mm5
  40f414:       0f da f8                pminub %mm0,%mm7
  40f417:       0f 64 eb                pcmpgtb %mm3,%mm5
  40f41a:       0f 6f d9                movq   %mm1,%mm3
  40f41d:       0f db e5                pand   %mm5,%mm4
  40f420:       0f de d8                pmaxub %mm0,%mm3
  40f423:       0f f8 fb                psubb  %mm3,%mm7
  40f426:       f2 0f d6 db             movdq2q %xmm3,%mm3
  40f42a:       49 0f fc 7b 10          rex64Z paddb  0x10(%r11),%mm7
  40f42f:       0f 6f eb                movq   %mm3,%mm5
  40f432:       48 0f 64 3c 24          rex64 pcmpgtb (%rsp),%mm7
  40f437:       0f 6f f3                movq   %mm3,%mm6
  40f43a:       0f da ea                pminub %mm2,%mm5
  40f43d:       0f db e7                pand   %mm7,%mm4
  40f440:       0f de f2                pmaxub %mm2,%mm6
  40f443:       0f f8 ee                psubb  %mm6,%mm5
  40f446:       49 0f fc 6b 10          rex64Z paddb  0x10(%r11),%mm5
  40f44b:       48 0f 64 2c 24          rex64 pcmpgtb (%rsp),%mm5
  40f450:       0f db e5                pand   %mm5,%mm4
  40f453:       0f d7 c4                pmovmskb %mm4,%eax
  40f456:       85 c0                   test   %eax,%eax
  40f458:       0f 84 30 fb ff ff       je     40ef8e <INTERNAL_EDGE_p1>
  40f45e:       66 0f ef e4             pxor   %xmm4,%xmm4
  40f462:       4c 89 c0                mov    %r8,%rax
  40f465:       f3 0f d6 c0             movq2dq %mm0,%xmm0
  40f469:       66 0f 6e 38             movd   (%rax),%xmm7
  40f46d:       f3 0f d6 c9             movq2dq %mm1,%xmm1
  40f471:       66 0f 60 c4             punpcklbw %xmm4,%xmm0
  40f475:       f3 0f d6 d2             movq2dq %mm2,%xmm2
  40f479:       66 0f 60 cc             punpcklbw %xmm4,%xmm1
40f47d:       66 41 0f fc 3b          paddb  (%r11),%xmm7
  40f482:       f3 0f d6 db             movq2dq %mm3,%xmm3
  40f486:       66 0f 60 d4             punpcklbw %xmm4,%xmm2
  40f48a:       66 0f 6f e9             movdqa %xmm1,%xmm5
  40f48e:       66 0f 60 dc             punpcklbw %xmm4,%xmm3
  40f492:       66 0f 6f f0             movdqa %xmm0,%xmm6
  40f496:       66 0f e9 ea             psubsw %xmm2,%xmm5
  40f49a:       66 0f 60 fc             punpcklbw %xmm4,%xmm7
  40f49e:       66 0f e9 f3             psubsw %xmm3,%xmm6
  40f4a2:       66 0f 71 f5 02          psllw  $0x2,%xmm5
  40f4a7:       66 0f e9 f5             psubsw %xmm5,%xmm6
  40f4ab:       66 0f 61 ff             punpcklwd %xmm7,%xmm7
  40f4af:       66 41 0f ed 73 20       paddsw 0x20(%r11),%xmm6
  40f4b5:       66 0f e9 e7             psubsw %xmm7,%xmm4
  40f4b9:       66 0f 71 e6 03          psraw  $0x3,%xmm6
  40f4be:       66 0f ea f7             pminsw %xmm7,%xmm6
  40f4c2:       66 0f ee f4             pmaxsw %xmm4,%xmm6
  40f4c6:       66 0f ed ce             paddsw %xmm6,%xmm1
  40f4ca:       66 0f e9 d6             psubsw %xmm6,%xmm2
  40f4ce:       66 0f 67 c9             packuswb %xmm1,%xmm1
  40f4d2:       66 0f 67 d2             packuswb %xmm2,%xmm2
  40f4d6:       f2 0f d6 e9             movdq2q %xmm1,%mm5
  40f4da:       0f 6f fc                movq   %mm4,%mm7
  40f4dd:       f2 0f d6 f2             movdq2q %xmm2,%mm6
  40f4e1:       0f db ec                pand   %mm4,%mm5
  40f4e4:       0f db f4                pand   %mm4,%mm6
  40f4e7:       0f df e1                pandn  %mm1,%mm4
  40f4ea:       0f df fa                pandn  %mm2,%mm7
  40f4ed:       0f eb ec                por    %mm4,%mm5
  40f4f0:       0f eb f7                por    %mm7,%mm6
  40f4f3:       0f 6f fd                movq   %mm5,%mm7
  40f4f6:       0f 60 ee                punpcklbw %mm6,%mm5
  40f4f9:       0f 68 fe                punpckhbw %mm6,%mm7
  40f4fc:       0f 7e e8                movd   %mm5,%eax
  40f4ff:       88 47 ff                mov    %al,0xffffffffffffffff(%rdi)
  40f502:       c1 e8 08                shr    $0x8,%eax
  40f505:       88 07                   mov    %al,(%rdi)
  40f507:       c1 e8 08                shr    $0x8,%eax
  40f50a:       0f 73 d5 20             psrlq  $0x20,%mm5
  40f50e:       88 44 37 ff             mov    
%al,0xffffffffffffffff(%rdi,%rsi,1)
  40f512:       c1 e8 08                shr    $0x8,%eax
  40f515:       88 04 37                mov    %al,(%rdi,%rsi,1)
  40f518:       48 8d 3c 77             lea    (%rdi,%rsi,2),%rdi
40f51c:       0f 7e e8                movd   %mm5,%eax
  40f51f:       88 47 ff                mov    %al,0xffffffffffffffff(%rdi)
  40f522:       c1 e8 08                shr    $0x8,%eax
  40f525:       88 07                   mov    %al,(%rdi)
  40f527:       c1 e8 08                shr    $0x8,%eax
  40f52a:       88 44 37 ff             mov    
%al,0xffffffffffffffff(%rdi,%rsi,1)
  40f52e:       c1 e8 08                shr    $0x8,%eax
  40f531:       88 04 37                mov    %al,(%rdi,%rsi,1)
  40f534:       48 8d 3c 77             lea    (%rdi,%rsi,2),%rdi
  40f538:       0f 7e f8                movd   %mm7,%eax
  40f53b:       88 47 ff                mov    %al,0xffffffffffffffff(%rdi)
  40f53e:       c1 e8 08                shr    $0x8,%eax
  40f541:       88 07                   mov    %al,(%rdi)
  40f543:       c1 e8 08                shr    $0x8,%eax
  40f546:       0f 73 d7 20             psrlq  $0x20,%mm7
  40f54a:       88 44 37 ff             mov    
%al,0xffffffffffffffff(%rdi,%rsi,1)
  40f54e:       c1 e8 08                shr    $0x8,%eax
  40f551:       88 04 37                mov    %al,(%rdi,%rsi,1)
  40f554:       48 8d 3c 77             lea    (%rdi,%rsi,2),%rdi
  40f558:       0f 7e f8                movd   %mm7,%eax
  40f55b:       88 47 ff                mov    %al,0xffffffffffffffff(%rdi)
  40f55e:       c1 e8 08                shr    $0x8,%eax
  40f561:       88 07                   mov    %al,(%rdi)
  40f563:       c1 e8 08                shr    $0x8,%eax
  40f566:       88 44 37 ff             mov    
%al,0xffffffffffffffff(%rdi,%rsi,1)
  40f56a:       c1 e8 08                shr    $0x8,%eax
  40f56d:       88 04 37                mov    %al,(%rdi,%rsi,1)
  40f570:       48 8d 3c 77             lea    (%rdi,%rsi,2),%rdi
  40f574:       48 31 c0                xor    %rax,%rax
  40f577:       48 29 f0                sub    %rsi,%rax
  40f57a:       48 8d 3c c7             lea    (%rdi,%rax,8),%rdi
  40f57e:       e9 0b fa ff ff          jmpq   40ef8e <INTERNAL_EDGE_p1>



-----Original Message-----
From: Abhishek Mehrotra [mailto:abhishek.mehro...@onmobile.com]
Sent: Monday, June 20, 2011 2:40 PM
To: Julian Seward; valgrind-users@lists.sourceforge.net
Subject: Re: [Valgrind-users] callgrind / valgrind + intel ipp h264 decoder 
gets killed in ipp libraries

Hi Julian,
   Here is the dissembly for the function in question:



00000000003393a8 <y8_ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR>:
  3393a8:       55                      push   %rbp
  3393a9:       48 89 e5                mov    %rsp,%rbp
  3393ac:       48 83 ec 50             sub    $0x50,%rsp
  3393b0:       48 89 5d e8             mov    %rbx,0xffffffffffffffe8(%rbp)
  3393b4:       48 89 7d c0             mov    %rdi,0xffffffffffffffc0(%rbp)
  3393b8:       89 75 f0                mov    %esi,0xfffffffffffffff0(%rbp)
  3393bb:       48 89 55 c8             mov    %rdx,0xffffffffffffffc8(%rbp)
  3393bf:       48 89 4d d0             mov    %rcx,0xffffffffffffffd0(%rbp)
  3393c3:       4c 89 45 d8             mov    %r8,0xffffffffffffffd8(%rbp)
  3393c7:       4c 89 4d e0             mov    %r9,0xffffffffffffffe0(%rbp)
  3393cb:       48 8b 45 c0             mov    0xffffffffffffffc0(%rbp),%rax
  3393cf:       48 85 c0                test   %rax,%rax
  3393d2:       74 12                   je     3393e6 
<y8_ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR+0x3e>
  3393d4:       48 8b 45 c8             mov    0xffffffffffffffc8(%rbp),%rax
  3393d8:       48 85 c0                test   %rax,%rax
  3393db:       74 09                   je     3393e6 
<y8_ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR+0x3e>
  3393dd:       48 8b 45 d0             mov    0xffffffffffffffd0(%rbp),%rax
  3393e1:       48 85 c0                test   %rax,%rax
  3393e4:       75 0b                   jne    3393f1 
<y8_ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR+0x49>
  3393e6:       b8 f8 ff ff ff          mov    $0xfffffff8,%eax
  3393eb:       48 8b 5d e8             mov    0xffffffffffffffe8(%rbp),%rbx
  3393ef:       c9                      leaveq
  3393f0:       c3                      retq
  3393f1:       48 8b 45 d8             mov    0xffffffffffffffd8(%rbp),%rax
  3393f5:       48 85 c0                test   %rax,%rax
  3393f8:       74 09                   je     339403 
<y8_ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR+0x5b>
  3393fa:       48 8b 45 e0             mov    0xffffffffffffffe0(%rbp),%rax
  3393fe:       48 85 c0                test   %rax,%rax
  339401:       75 0b                   jne    33940e 
<y8_ippiFilterDeblockingChroma_VerEdge_H264_8u_C1IR+0x66>
  339403:       b8 f8 ff ff ff          mov    $0xfffffff8,%eax
  339408:       48 8b 5d e8             mov    0xffffffffffffffe8(%rbp),%rbx
  33940c:       c9                      leaveq
  33940d:       c3                      retq
  33940e:       48 8b 45 c0             mov    0xffffffffffffffc0(%rbp),%rax
  339412:       8b 55 f0                mov    0xfffffffffffffff0(%rbp),%edx
  339415:       48 63 d2                movslq %edx,%rdx
  339418:       48 8b 4d c8             mov    0xffffffffffffffc8(%rbp),%rcx
  33941c:       48 8b 5d d0             mov    0xffffffffffffffd0(%rbp),%rbx
   339420:       48 8b 75 d8             mov    0xffffffffffffffd8(%rbp),%rsi
  339424:       48 8b 7d e0             mov    0xffffffffffffffe0(%rbp),%rdi
  339428:       48 89 7d b0             mov    %rdi,0xffffffffffffffb0(%rbp)
  33942c:       48 89 c7                mov    %rax,%rdi
  33942f:       48 89 75 b8             mov    %rsi,0xffffffffffffffb8(%rbp)
  339433:       48 89 d6                mov    %rdx,%rsi
  339436:       48 89 ca                mov    %rcx,%rdx
  339439:       48 89 d9                mov    %rbx,%rcx
  33943c:       48 8b 45 b8             mov    0xffffffffffffffb8(%rbp),%rax
  339440:       49 89 c0                mov    %rax,%r8
  339443:       48 8b 45 b0             mov    0xffffffffffffffb0(%rbp),%rax
  339447:       49 89 c1                mov    %rax,%r9
  33944a:       e8 41 69 d7 ff          callq  afd90 
<y8_deblocking_chroma_vert_edge_h264_sse2@plt>
  33944f:       33 c0                   xor    %eax,%eax
  339451:       48 8b 5d e8             mov    0xffffffffffffffe8(%rbp),%rbx
  339455:       c9                      leaveq
  339456:       c3                      retq
  339457:       90                      nop



-----Original Message-----
From: Julian Seward [mailto:jsew...@acm.org]
Sent: Monday, June 20, 2011 1:28 PM
To: valgrind-users@lists.sourceforge.net
Cc: Abhishek Mehrotra
Subject: Re: [Valgrind-users] callgrind / valgrind + intel ipp h264 decoder 
gets killed in ipp libraries


> vex amd64->IR: unhandled instruction bytes: 0x49 0xF 0xFC 0x6B 0x10 0xF

This instruction (paddb) is supported, but I think the 0x49 might be a
redundant REX prefix that causes the problem.  It would help to see a
disassembly of the instruction and the few on either side; can you get
that?  Running valgrind with --sym-offsets=yes might help navigating
the disassembly process.

J

DISCLAIMER: The information in this message is confidential and may be legally 
privileged. It is intended solely for the addressee. Access to this message by 
anyone else is unauthorized. If you are not the intended recipient, any 
disclosure, copying, or distribution of the message, or any action or omission 
taken by you in reliance on it, is prohibited and may be unlawful. Please 
immediately contact the sender if you have received this message in error. 
Further, this e-mail may contain viruses and all reasonable precaution to 
minimize the risk arising there from is taken by OnMobile. OnMobile is not 
liable for any damage sustained by you as a result of any virus in this e-mail. 
All applicable virus checks should be carried out by you before opening this 
e-mail or any attachment thereto.
Thank you - OnMobile Global Limited.

------------------------------------------------------------------------------
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
_______________________________________________
Valgrind-users mailing list
Valgrind-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/valgrind-users

DISCLAIMER: The information in this message is confidential and may be legally 
privileged. It is intended solely for the addressee. Access to this message by 
anyone else is unauthorized. If you are not the intended recipient, any 
disclosure, copying, or distribution of the message, or any action or omission 
taken by you in reliance on it, is prohibited and may be unlawful. Please 
immediately contact the sender if you have received this message in error. 
Further, this e-mail may contain viruses and all reasonable precaution to 
minimize the risk arising there from is taken by OnMobile. OnMobile is not 
liable for any damage sustained by you as a result of any virus in this e-mail. 
All applicable virus checks should be carried out by you before opening this 
e-mail or any attachment thereto.
Thank you - OnMobile Global Limited.

------------------------------------------------------------------------------
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
_______________________________________________
Valgrind-users mailing list
Valgrind-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/valgrind-users

Reply via email to