Manually load registers to avoid using 8 registers with
compilers that do not align the stack (e.g. MSVC).
---
Better commit message and avoid redzone (Loren's comments).
---
libavfilter/x86/yadif.asm | 68 ++++++++++++++++++++++++++-------------------
1 file changed, 39 insertions(+), 29 deletions(-)
diff --git a/libavfilter/x86/yadif.asm b/libavfilter/x86/yadif.asm
index 5e406a4..adfd3db 100644
--- a/libavfilter/x86/yadif.asm
+++ b/libavfilter/x86/yadif.asm
@@ -31,8 +31,8 @@ pw_1: times 8 dw 1
SECTION .text
%macro CHECK 2
- movu m2, [curq+mrefsq+%1]
- movu m3, [curq+prefsq+%2]
+ movu m2, [r2+t1+%1]
+ movu m3, [r2+t0+%2]
mova m4, m2
mova m5, m2
pxor m4, m3
@@ -97,8 +97,8 @@ SECTION .text
%macro FILTER 3
.loop%1:
pxor m7, m7
- LOAD 0, [curq+mrefsq]
- LOAD 1, [curq+prefsq]
+ LOAD 0, [r2+t1]
+ LOAD 1, [r2+t0]
LOAD 2, [%2]
LOAD 3, [%3]
mova m4, m3
@@ -109,8 +109,8 @@ SECTION .text
mova [rsp+32], m1
psubw m2, m4
ABS1 m2, m4
- LOAD 3, [prevq+mrefsq]
- LOAD 4, [prevq+prefsq]
+ LOAD 3, [r1+t1]
+ LOAD 4, [r1+t0]
psubw m3, m0
psubw m4, m1
ABS1 m3, m5
@@ -119,8 +119,8 @@ SECTION .text
psrlw m2, 1
psrlw m3, 1
pmaxsw m2, m3
- LOAD 3, [nextq+mrefsq]
- LOAD 4, [nextq+prefsq]
+ LOAD 3, [r3+t1]
+ LOAD 4, [r3+t0]
psubw m3, m0
psubw m4, m1
ABS1 m3, m5
@@ -136,8 +136,8 @@ SECTION .text
psrlw m1, 1
ABS1 m0, m2
- movu m2, [curq+mrefsq-1]
- movu m3, [curq+prefsq-1]
+ movu m2, [r2+t1-1]
+ movu m3, [r2+t0-1]
mova m4, m2
psubusb m2, m3
psubusb m3, m4
@@ -164,12 +164,12 @@ SECTION .text
CHECK2
mova m6, [rsp+48]
- cmp DWORD modem, 2
+ cmp DWORD r8m, 2
jge .end%1
- LOAD 2, [%2+mrefsq*2]
- LOAD 4, [%3+mrefsq*2]
- LOAD 3, [%2+prefsq*2]
- LOAD 5, [%3+prefsq*2]
+ LOAD 2, [%2+t1*2]
+ LOAD 4, [%3+t1*2]
+ LOAD 3, [%2+t0*2]
+ LOAD 5, [%3+t0*2]
paddw m2, m4
paddw m3, m5
psrlw m2, 1
@@ -203,30 +203,40 @@ SECTION .text
pminsw m1, m3
packuswb m1, m1
- movh [dstq], m1
- add dstq, mmsize/2
- add prevq, mmsize/2
- add curq, mmsize/2
- add nextq, mmsize/2
- sub wd, mmsize/2
+ movh [r0], m1
+ add r0, mmsize/2
+ add r1, mmsize/2
+ add r2, mmsize/2
+ add r3, mmsize/2
+ sub DWORD r4m, mmsize/2
jg .loop%1
%endmacro
%macro YADIF 0
-cglobal yadif_filter_line, 7, 7, 8, 16*5, dst, prev, cur, next, w, prefs, \
- mrefs, parity, mode
- test wq, wq
+%if ARCH_X86_32
+cglobal yadif_filter_line, 4, 6, 8, 80
+%else
+cglobal yadif_filter_line, 4, 7, 8, 80
+%endif
+ cmp DWORD r4m, 0
jle .ret
- movsxdifnidn prefsq, prefsd
- movsxdifnidn mrefsq, mrefsd
+%if ARCH_X86_32
+ mov r4, r5mp
+ mov r5, r6mp
+ DECLARE_REG_TMP 4,5
+%else
+ movsxd r5, DWORD r5m
+ movsxd r6, DWORD r6m
+ DECLARE_REG_TMP 5,6
+%endif
- cmp DWORD paritym, 0
+ cmp DWORD r7m, 0
je .parity0
- FILTER 1, prevq, curq
+ FILTER 1, r1, r2
jmp .ret
.parity0:
- FILTER 0, curq, nextq
+ FILTER 0, r2, r3
.ret:
RET
--
1.7.10.4
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel