Nothing guarantees that the line is large enough for two blocks, so
current code may result in invalid memory access. This change makes the
function ~5% slower, but since this is a highly obscure filter that
nobody should use, its performance is not very important.
---
libavfilter/x86/vf_interlace.asm | 23 ++++++++---------------
1 file changed, 8 insertions(+), 15 deletions(-)
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index 7302314..7cff3c0 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -28,32 +28,25 @@ SECTION_RODATA
SECTION .text
%macro LOWPASS_LINE 0
-cglobal lowpass_line, 5, 5, 7, dst, linesize, src, src_above, src_below
+cglobal lowpass_line, 5, 5, 3, dst, linesize, src, src_above, src_below
add dstq, linesizeq
add srcq, linesizeq
add src_aboveq, linesizeq
add src_belowq, linesizeq
neg linesizeq
- pcmpeqb m6, m6
+ pcmpeqb m2, m2
.loop:
mova m0, [src_aboveq + linesizeq]
- mova m1, [src_aboveq + linesizeq + mmsize]
pavgb m0, [src_belowq + linesizeq]
- pavgb m1, [src_belowq + linesizeq + mmsize]
- pxor m0, m6
- pxor m1, m6
- pxor m2, m6, [srcq + linesizeq]
- pxor m3, m6, [srcq + linesizeq + mmsize]
- pavgb m0, m2
- pavgb m1, m3
- pxor m0, m6
- pxor m1, m6
- mova [dstq + linesizeq], m0
- mova [dstq + linesizeq + mmsize], m1
+ pxor m0, m2
+ pxor m1, m2, [srcq + linesizeq]
+ pavgb m0, m1
+ pxor m0, m2
+ mova [dstq + linesizeq], m0
- add linesizeq, 2 * mmsize
+ add linesizeq, mmsize
jl .loop
REP_RET
%endmacro
--
2.0.0
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel