[libav-commits] audiodsp/x86: fix ff_vector_clip_int32_sse2
Module: libav Branch: release/11 Commit: 033e2c69e639b489a1eab300520c107fda02bdb6 Author:Anton KhirnovCommitter: Anton Khirnov Date: Tue Aug 9 14:17:15 2016 +0200 audiodsp/x86: fix ff_vector_clip_int32_sse2 This version, which is the only one doing two processing cycles per loop iteration, computes the load/store indices incorrectly for the second cycle. CC: libav-sta...@libav.org (cherry picked from commit 1d6c76e11febb58738c9647c47079d02b5e10094) Signed-off-by: Anton Khirnov --- libavcodec/x86/audiodsp.asm | 36 ++-- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index f2e831d..51afbd9 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -80,17 +80,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len SPLATDm4 SPLATDm5 .loop: -%assign %%i 1 +%assign %%i 0 %rep %2 -mova m0, [srcq+mmsize*0*%%i] -mova m1, [srcq+mmsize*1*%%i] -mova m2, [srcq+mmsize*2*%%i] -mova m3, [srcq+mmsize*3*%%i] +mova m0, [srcq + mmsize * (0 + %%i)] +mova m1, [srcq + mmsize * (1 + %%i)] +mova m2, [srcq + mmsize * (2 + %%i)] +mova m3, [srcq + mmsize * (3 + %%i)] %if %3 -mova m7, [srcq+mmsize*4*%%i] -mova m8, [srcq+mmsize*5*%%i] -mova m9, [srcq+mmsize*6*%%i] -mova m10, [srcq+mmsize*7*%%i] +mova m7, [srcq + mmsize * (4 + %%i)] +mova m8, [srcq + mmsize * (5 + %%i)] +mova m9, [srcq + mmsize * (6 + %%i)] +mova m10, [srcq + mmsize * (7 + %%i)] %endif CLIPD m0, m4, m5, m6 CLIPD m1, m4, m5, m6 @@ -102,17 +102,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len CLIPD m9, m4, m5, m6 CLIPD m10, m4, m5, m6 %endif -mova [dstq+mmsize*0*%%i], m0 -mova [dstq+mmsize*1*%%i], m1 -mova [dstq+mmsize*2*%%i], m2 -mova [dstq+mmsize*3*%%i], m3 +mova [dstq + mmsize * (0 + %%i)], m0 +mova [dstq + mmsize * (1 + %%i)], m1 +mova [dstq + mmsize * (2 + %%i)], m2 +mova [dstq + mmsize * (3 + %%i)], m3 %if %3 -mova [dstq+mmsize*4*%%i], m7 -mova [dstq+mmsize*5*%%i], m8 -mova [dstq+mmsize*6*%%i], m9 -mova [dstq+mmsize*7*%%i], m10 +mova [dstq + mmsize * (4 + %%i)], m7 +mova [dstq + mmsize * (5 + %%i)], m8 +mova [dstq + mmsize * (6 + %%i)], m9 +mova [dstq + mmsize * (7 + %%i)], m10 %endif -%assign %%i %%i+1 +%assign %%i (%%i + 4 * (1 + %3)) %endrep add srcq, mmsize*4*(%2+%3) add dstq, mmsize*4*(%2+%3) ___ libav-commits mailing list libav-commits@libav.org https://lists.libav.org/mailman/listinfo/libav-commits
[libav-commits] audiodsp/x86: fix ff_vector_clip_int32_sse2
Module: libav Branch: release/12 Commit: f4a7869b4b67485f5e5756d1b3ed2fc5f555a486 Author:Anton KhirnovCommitter: Anton Khirnov Date: Tue Aug 9 14:17:15 2016 +0200 audiodsp/x86: fix ff_vector_clip_int32_sse2 This version, which is the only one doing two processing cycles per loop iteration, computes the load/store indices incorrectly for the second cycle. CC: libav-sta...@libav.org (cherry picked from commit 1d6c76e11febb58738c9647c47079d02b5e10094) Signed-off-by: Anton Khirnov --- libavcodec/x86/audiodsp.asm | 36 ++-- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index 696a73b..dc38ada 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -80,17 +80,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len SPLATDm4 SPLATDm5 .loop: -%assign %%i 1 +%assign %%i 0 %rep %2 -mova m0, [srcq+mmsize*0*%%i] -mova m1, [srcq+mmsize*1*%%i] -mova m2, [srcq+mmsize*2*%%i] -mova m3, [srcq+mmsize*3*%%i] +mova m0, [srcq + mmsize * (0 + %%i)] +mova m1, [srcq + mmsize * (1 + %%i)] +mova m2, [srcq + mmsize * (2 + %%i)] +mova m3, [srcq + mmsize * (3 + %%i)] %if %3 -mova m7, [srcq+mmsize*4*%%i] -mova m8, [srcq+mmsize*5*%%i] -mova m9, [srcq+mmsize*6*%%i] -mova m10, [srcq+mmsize*7*%%i] +mova m7, [srcq + mmsize * (4 + %%i)] +mova m8, [srcq + mmsize * (5 + %%i)] +mova m9, [srcq + mmsize * (6 + %%i)] +mova m10, [srcq + mmsize * (7 + %%i)] %endif CLIPD m0, m4, m5, m6 CLIPD m1, m4, m5, m6 @@ -102,17 +102,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len CLIPD m9, m4, m5, m6 CLIPD m10, m4, m5, m6 %endif -mova [dstq+mmsize*0*%%i], m0 -mova [dstq+mmsize*1*%%i], m1 -mova [dstq+mmsize*2*%%i], m2 -mova [dstq+mmsize*3*%%i], m3 +mova [dstq + mmsize * (0 + %%i)], m0 +mova [dstq + mmsize * (1 + %%i)], m1 +mova [dstq + mmsize * (2 + %%i)], m2 +mova [dstq + mmsize * (3 + %%i)], m3 %if %3 -mova [dstq+mmsize*4*%%i], m7 -mova [dstq+mmsize*5*%%i], m8 -mova [dstq+mmsize*6*%%i], m9 -mova [dstq+mmsize*7*%%i], m10 +mova [dstq + mmsize * (4 + %%i)], m7 +mova [dstq + mmsize * (5 + %%i)], m8 +mova [dstq + mmsize * (6 + %%i)], m9 +mova [dstq + mmsize * (7 + %%i)], m10 %endif -%assign %%i %%i+1 +%assign %%i (%%i + 4 * (1 + %3)) %endrep add srcq, mmsize*4*(%2+%3) add dstq, mmsize*4*(%2+%3) ___ libav-commits mailing list libav-commits@libav.org https://lists.libav.org/mailman/listinfo/libav-commits
[libav-commits] audiodsp/x86: fix ff_vector_clip_int32_sse2
Module: libav Branch: master Commit: 1d6c76e11febb58738c9647c47079d02b5e10094 Author:Anton KhirnovCommitter: Anton Khirnov Date: Tue Aug 9 14:17:15 2016 +0200 audiodsp/x86: fix ff_vector_clip_int32_sse2 This version, which is the only one doing two processing cycles per loop iteration, computes the load/store indices incorrectly for the second cycle. CC: libav-sta...@libav.org --- libavcodec/x86/audiodsp.asm | 36 ++-- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index 696a73b..dc38ada 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -80,17 +80,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len SPLATDm4 SPLATDm5 .loop: -%assign %%i 1 +%assign %%i 0 %rep %2 -mova m0, [srcq+mmsize*0*%%i] -mova m1, [srcq+mmsize*1*%%i] -mova m2, [srcq+mmsize*2*%%i] -mova m3, [srcq+mmsize*3*%%i] +mova m0, [srcq + mmsize * (0 + %%i)] +mova m1, [srcq + mmsize * (1 + %%i)] +mova m2, [srcq + mmsize * (2 + %%i)] +mova m3, [srcq + mmsize * (3 + %%i)] %if %3 -mova m7, [srcq+mmsize*4*%%i] -mova m8, [srcq+mmsize*5*%%i] -mova m9, [srcq+mmsize*6*%%i] -mova m10, [srcq+mmsize*7*%%i] +mova m7, [srcq + mmsize * (4 + %%i)] +mova m8, [srcq + mmsize * (5 + %%i)] +mova m9, [srcq + mmsize * (6 + %%i)] +mova m10, [srcq + mmsize * (7 + %%i)] %endif CLIPD m0, m4, m5, m6 CLIPD m1, m4, m5, m6 @@ -102,17 +102,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len CLIPD m9, m4, m5, m6 CLIPD m10, m4, m5, m6 %endif -mova [dstq+mmsize*0*%%i], m0 -mova [dstq+mmsize*1*%%i], m1 -mova [dstq+mmsize*2*%%i], m2 -mova [dstq+mmsize*3*%%i], m3 +mova [dstq + mmsize * (0 + %%i)], m0 +mova [dstq + mmsize * (1 + %%i)], m1 +mova [dstq + mmsize * (2 + %%i)], m2 +mova [dstq + mmsize * (3 + %%i)], m3 %if %3 -mova [dstq+mmsize*4*%%i], m7 -mova [dstq+mmsize*5*%%i], m8 -mova [dstq+mmsize*6*%%i], m9 -mova [dstq+mmsize*7*%%i], m10 +mova [dstq + mmsize * (4 + %%i)], m7 +mova [dstq + mmsize * (5 + %%i)], m8 +mova [dstq + mmsize * (6 + %%i)], m9 +mova [dstq + mmsize * (7 + %%i)], m10 %endif -%assign %%i %%i+1 +%assign %%i (%%i + 4 * (1 + %3)) %endrep add srcq, mmsize*4*(%2+%3) add dstq, mmsize*4*(%2+%3) ___ libav-commits mailing list libav-commits@libav.org https://lists.libav.org/mailman/listinfo/libav-commits