On Fri, Jul 14, 2023 at 12:08:46PM +0200, Alan Kelly wrote:
> ---
>  libswscale/x86/swscale.c    | 11 ++++-------
>  libswscale/x86/yuv2yuvX.asm | 12 ++++++++++--
>  2 files changed, 14 insertions(+), 9 deletions(-)

seems to segfault with

./ffmpeg_g -i mm-short.mpg -an -vcodec snow -t 0.2 -bitexact -pix_fmt yuv410p 
-s 199x199 -vstrict -2 -y  snow3914-199-410.avi

Thread 79 "ffmpeg_g" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffaffef700 (LWP 23533)]
0x000055555658a0f6 in ff_yuv2yuvX_sse3 ()
(gdb) bt
#0  0x000055555658a0f6 in ff_yuv2yuvX_sse3 ()
#1  0x0000555556585bc6 in chr_planar_vscale ()
#2  0x00005555565817d1 in scale_internal ()
#3  0x00005555565827d9 in ff_sws_slice_worker ()
#4  0x000055555662b06e in thread_worker ()
#5  0x00007ffff75fc6db in start_thread (arg=0x7fffaffef700) at 
pthread_create.c:463
#6  0x00007fffed12861f in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) disassemble $rip-32,$rip+32
Dump of assembler code from 0x55555658a0d6 to 0x55555658a116:
   0x000055555658a0d6 <ff_yuv2yuvX_sse3+86>:    std
   0x000055555658a0d7 <ff_yuv2yuvX_sse3+87>:    fldenv 0xf(%rsi)
   0x000055555658a0da <ff_yuv2yuvX_sse3+90>:    outsl  %ds:(%rsi),(%dx)
   0x000055555658a0db <ff_yuv2yuvX_sse3+91>:    sti
   0x000055555658a0dc <ff_yuv2yuvX_sse3+92>:    psraw  $0x4,%xmm7
   0x000055555658a0e1 <ff_yuv2yuvX_sse3+97>:    movdqa %xmm7,%xmm4
   0x000055555658a0e5 <ff_yuv2yuvX_sse3+101>:   movdqa %xmm7,%xmm3
   0x000055555658a0e9 <ff_yuv2yuvX_sse3+105>:   movdqa %xmm7,%xmm6
   0x000055555658a0ed <ff_yuv2yuvX_sse3+109>:   movdqa %xmm7,%xmm1
   0x000055555658a0f1 <ff_yuv2yuvX_sse3+113>:   movddup 0x8(%rsi),%xmm0
=> 0x000055555658a0f6 <ff_yuv2yuvX_sse3+118>:   movdqa (%rdx,%rax,2),%xmm2
   0x000055555658a0fb <ff_yuv2yuvX_sse3+123>:   pmulhw %xmm0,%xmm2
   0x000055555658a0ff <ff_yuv2yuvX_sse3+127>:   movdqa 0x10(%rdx,%rax,2),%xmm5
   0x000055555658a105 <ff_yuv2yuvX_sse3+133>:   pmulhw %xmm0,%xmm5
   0x000055555658a109 <ff_yuv2yuvX_sse3+137>:   paddw  %xmm2,%xmm3
   0x000055555658a10d <ff_yuv2yuvX_sse3+141>:   paddw  %xmm5,%xmm4
   0x000055555658a111 <ff_yuv2yuvX_sse3+145>:   movdqa 0x20(%rdx,%rax,2),%xmm2
End of assembler dump.
(gdb) info all-registers
rax            0x12     18
rbx            0x32     50
rcx            0x555557915480   93825029723264
rdx            0x555557687680   93825027044992
rsi            0x555557666658   93825026909784
rdi            0x555557666658   93825026909784
rbp            0x55555765b880   0x55555765b880
rsp            0x7fffaffee7a8   0x7fffaffee7a8
r8             0x20     32
r9             0x32     50
r10            0x555556589860   93825009227872
r11            0x5555576f9dc0   93825027513792
r12            0x55555763b280   93825026732672
r13            0x555557666658   93825026909784
r14            0x5555577b5800   93825028282368
r15            0x555557622640   93825026631232
rip            0x55555658a0f6   0x55555658a0f6 <ff_yuv2yuvX_sse3+118>
eflags         0x10297  [ CF PF AF SF IF RF ]
cs             0x33     51
ss             0x2b     43
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x0      0
st0            0        (raw 0x00000000000000000000)
st1            0        (raw 0x00000000000000000000)
st2            0        (raw 0x00000000000000000000)
st3            0        (raw 0x00000000000000000000)
st4            0        (raw 0x00000000000000000000)
st5            0        (raw 0x00000000000000000000)
st6            0        (raw 0x00000000000000000000)
st7            0        (raw 0x00000000000000000000)
fctrl          0xffff   65535
fstat          0xffff   65535
ftag           0xaaaa   43690
fiseg          0x1      1
fioff          0x0      0
foseg          0x5646   22086
fooff          0xa      10
fop            0x7ff    2047
mxcsr          0x1fa8   [ OE PE IM DM ZM OM UM PM ]


> 
> diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
> index 52423a1199..71434f58d3 100644
> --- a/libswscale/x86/swscale.c
> +++ b/libswscale/x86/swscale.c
> @@ -202,17 +202,14 @@ static void yuv2yuvX_ ##opt(const int16_t *filter, int 
> filterSize, \
>                             const int16_t **src, uint8_t *dest, int dstW, \
>                             const uint8_t *dither, int offset) \
>  { \
> -    int remainder = (dstW % step); \
> -    int pixelsProcessed = dstW - remainder; \
>      if(((uintptr_t)dest) & 15){ \
>          yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, 
> offset); \
>          return; \
>      } \
> -    if(pixelsProcessed > 0) \
> -        ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, 
> pixelsProcessed + offset, dither, offset); \
> -    if(remainder > 0){ \
> -      yuv2yuvX_ ##tail(filter, filterSize, src, dest, dstW, dither, offset); 
> \
> -    } \
> +    if (dstW >= step) \
> +        ff_yuv2yuvX_ ##opt(filter, filterSize - 1, 0, dest - offset, dstW + 
> offset, dither, offset); \
> +    else \
> +        yuv2yuvX_ ##tail(filter, filterSize, src, dest, dstW, dither, 
> offset); \
>      return; \
>  }
>  
> diff --git a/libswscale/x86/yuv2yuvX.asm b/libswscale/x86/yuv2yuvX.asm
> index 57bfa09d66..ad0e8bd448 100644
> --- a/libswscale/x86/yuv2yuvX.asm
> +++ b/libswscale/x86/yuv2yuvX.asm
> @@ -54,6 +54,8 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, dest, 
> dstW, dither, offset
>  %else
>      movq                 xm3, [ditherq]
>  %endif ; avx2
> +    mov                  ditherq, dstWq
> +    sub                  dstWq, mmsize * unroll
>  
>  %if cpuflag(avx512)
>      mova                 m15, [permutation]
> @@ -131,8 +133,14 @@ cglobal yuv2yuvX, 7, 7, 8, filter, filterSize, src, 
> dest, dstW, dither, offset
>      add                  offsetq, mmsize * unroll
>      mov                  filterSizeq, filterq
>      cmp                  offsetq, dstWq
> -    jb                  .outerloop
> -    RET
> +    jb                   .outerloop
> +
> +    mov                  dstWq, offsetq
> +    mov                  offsetq, ditherq
> +    sub                  offsetq, mmsize * unroll
> +    cmp                  dstWq, ditherq
> +    jb                   .outerloop
> +    REP_RET
>  %endmacro
>  
>  INIT_MMX mmxext
> -- 
> 2.41.0.255.g8b1d071c50-goog
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
> 

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Into a blind darkness they enter who follow after the Ignorance,
they as if into a greater darkness enter who devote themselves
to the Knowledge alone. -- Isha Upanishad

Attachment: signature.asc
Description: PGP signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to