[libav-commits] audiodsp/x86: fix ff_vector_clip_int32_sse2

2016-09-19 Thread Anton Khirnov
Module: libav
Branch: release/11
Commit: 033e2c69e639b489a1eab300520c107fda02bdb6

Author:Anton Khirnov 
Committer: Anton Khirnov 
Date:  Tue Aug  9 14:17:15 2016 +0200

audiodsp/x86: fix ff_vector_clip_int32_sse2

This version, which is the only one doing two processing cycles per loop
iteration, computes the load/store indices incorrectly for the second
cycle.

CC: libav-sta...@libav.org
(cherry picked from commit 1d6c76e11febb58738c9647c47079d02b5e10094)
Signed-off-by: Anton Khirnov 

---

 libavcodec/x86/audiodsp.asm |   36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index f2e831d..51afbd9 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -80,17 +80,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
 SPLATDm4
 SPLATDm5
 .loop:
-%assign %%i 1
+%assign %%i 0
 %rep %2
-mova  m0,  [srcq+mmsize*0*%%i]
-mova  m1,  [srcq+mmsize*1*%%i]
-mova  m2,  [srcq+mmsize*2*%%i]
-mova  m3,  [srcq+mmsize*3*%%i]
+mova  m0,  [srcq + mmsize * (0 + %%i)]
+mova  m1,  [srcq + mmsize * (1 + %%i)]
+mova  m2,  [srcq + mmsize * (2 + %%i)]
+mova  m3,  [srcq + mmsize * (3 + %%i)]
 %if %3
-mova  m7,  [srcq+mmsize*4*%%i]
-mova  m8,  [srcq+mmsize*5*%%i]
-mova  m9,  [srcq+mmsize*6*%%i]
-mova  m10, [srcq+mmsize*7*%%i]
+mova  m7,  [srcq + mmsize * (4 + %%i)]
+mova  m8,  [srcq + mmsize * (5 + %%i)]
+mova  m9,  [srcq + mmsize * (6 + %%i)]
+mova  m10, [srcq + mmsize * (7 + %%i)]
 %endif
 CLIPD  m0,  m4, m5, m6
 CLIPD  m1,  m4, m5, m6
@@ -102,17 +102,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, 
len
 CLIPD  m9,  m4, m5, m6
 CLIPD  m10, m4, m5, m6
 %endif
-mova  [dstq+mmsize*0*%%i], m0
-mova  [dstq+mmsize*1*%%i], m1
-mova  [dstq+mmsize*2*%%i], m2
-mova  [dstq+mmsize*3*%%i], m3
+mova  [dstq + mmsize * (0 + %%i)], m0
+mova  [dstq + mmsize * (1 + %%i)], m1
+mova  [dstq + mmsize * (2 + %%i)], m2
+mova  [dstq + mmsize * (3 + %%i)], m3
 %if %3
-mova  [dstq+mmsize*4*%%i], m7
-mova  [dstq+mmsize*5*%%i], m8
-mova  [dstq+mmsize*6*%%i], m9
-mova  [dstq+mmsize*7*%%i], m10
+mova  [dstq + mmsize * (4 + %%i)], m7
+mova  [dstq + mmsize * (5 + %%i)], m8
+mova  [dstq + mmsize * (6 + %%i)], m9
+mova  [dstq + mmsize * (7 + %%i)], m10
 %endif
-%assign %%i %%i+1
+%assign %%i (%%i + 4 * (1 + %3))
 %endrep
 add srcq, mmsize*4*(%2+%3)
 add dstq, mmsize*4*(%2+%3)

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits


[libav-commits] audiodsp/x86: fix ff_vector_clip_int32_sse2

2016-09-19 Thread Anton Khirnov
Module: libav
Branch: release/12
Commit: f4a7869b4b67485f5e5756d1b3ed2fc5f555a486

Author:Anton Khirnov 
Committer: Anton Khirnov 
Date:  Tue Aug  9 14:17:15 2016 +0200

audiodsp/x86: fix ff_vector_clip_int32_sse2

This version, which is the only one doing two processing cycles per loop
iteration, computes the load/store indices incorrectly for the second
cycle.

CC: libav-sta...@libav.org
(cherry picked from commit 1d6c76e11febb58738c9647c47079d02b5e10094)
Signed-off-by: Anton Khirnov 

---

 libavcodec/x86/audiodsp.asm |   36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index 696a73b..dc38ada 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -80,17 +80,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
 SPLATDm4
 SPLATDm5
 .loop:
-%assign %%i 1
+%assign %%i 0
 %rep %2
-mova  m0,  [srcq+mmsize*0*%%i]
-mova  m1,  [srcq+mmsize*1*%%i]
-mova  m2,  [srcq+mmsize*2*%%i]
-mova  m3,  [srcq+mmsize*3*%%i]
+mova  m0,  [srcq + mmsize * (0 + %%i)]
+mova  m1,  [srcq + mmsize * (1 + %%i)]
+mova  m2,  [srcq + mmsize * (2 + %%i)]
+mova  m3,  [srcq + mmsize * (3 + %%i)]
 %if %3
-mova  m7,  [srcq+mmsize*4*%%i]
-mova  m8,  [srcq+mmsize*5*%%i]
-mova  m9,  [srcq+mmsize*6*%%i]
-mova  m10, [srcq+mmsize*7*%%i]
+mova  m7,  [srcq + mmsize * (4 + %%i)]
+mova  m8,  [srcq + mmsize * (5 + %%i)]
+mova  m9,  [srcq + mmsize * (6 + %%i)]
+mova  m10, [srcq + mmsize * (7 + %%i)]
 %endif
 CLIPD  m0,  m4, m5, m6
 CLIPD  m1,  m4, m5, m6
@@ -102,17 +102,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, 
len
 CLIPD  m9,  m4, m5, m6
 CLIPD  m10, m4, m5, m6
 %endif
-mova  [dstq+mmsize*0*%%i], m0
-mova  [dstq+mmsize*1*%%i], m1
-mova  [dstq+mmsize*2*%%i], m2
-mova  [dstq+mmsize*3*%%i], m3
+mova  [dstq + mmsize * (0 + %%i)], m0
+mova  [dstq + mmsize * (1 + %%i)], m1
+mova  [dstq + mmsize * (2 + %%i)], m2
+mova  [dstq + mmsize * (3 + %%i)], m3
 %if %3
-mova  [dstq+mmsize*4*%%i], m7
-mova  [dstq+mmsize*5*%%i], m8
-mova  [dstq+mmsize*6*%%i], m9
-mova  [dstq+mmsize*7*%%i], m10
+mova  [dstq + mmsize * (4 + %%i)], m7
+mova  [dstq + mmsize * (5 + %%i)], m8
+mova  [dstq + mmsize * (6 + %%i)], m9
+mova  [dstq + mmsize * (7 + %%i)], m10
 %endif
-%assign %%i %%i+1
+%assign %%i (%%i + 4 * (1 + %3))
 %endrep
 add srcq, mmsize*4*(%2+%3)
 add dstq, mmsize*4*(%2+%3)

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits


[libav-commits] audiodsp/x86: fix ff_vector_clip_int32_sse2

2016-09-19 Thread Anton Khirnov
Module: libav
Branch: master
Commit: 1d6c76e11febb58738c9647c47079d02b5e10094

Author:Anton Khirnov 
Committer: Anton Khirnov 
Date:  Tue Aug  9 14:17:15 2016 +0200

audiodsp/x86: fix ff_vector_clip_int32_sse2

This version, which is the only one doing two processing cycles per loop
iteration, computes the load/store indices incorrectly for the second
cycle.

CC: libav-sta...@libav.org

---

 libavcodec/x86/audiodsp.asm |   36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index 696a73b..dc38ada 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -80,17 +80,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
 SPLATDm4
 SPLATDm5
 .loop:
-%assign %%i 1
+%assign %%i 0
 %rep %2
-mova  m0,  [srcq+mmsize*0*%%i]
-mova  m1,  [srcq+mmsize*1*%%i]
-mova  m2,  [srcq+mmsize*2*%%i]
-mova  m3,  [srcq+mmsize*3*%%i]
+mova  m0,  [srcq + mmsize * (0 + %%i)]
+mova  m1,  [srcq + mmsize * (1 + %%i)]
+mova  m2,  [srcq + mmsize * (2 + %%i)]
+mova  m3,  [srcq + mmsize * (3 + %%i)]
 %if %3
-mova  m7,  [srcq+mmsize*4*%%i]
-mova  m8,  [srcq+mmsize*5*%%i]
-mova  m9,  [srcq+mmsize*6*%%i]
-mova  m10, [srcq+mmsize*7*%%i]
+mova  m7,  [srcq + mmsize * (4 + %%i)]
+mova  m8,  [srcq + mmsize * (5 + %%i)]
+mova  m9,  [srcq + mmsize * (6 + %%i)]
+mova  m10, [srcq + mmsize * (7 + %%i)]
 %endif
 CLIPD  m0,  m4, m5, m6
 CLIPD  m1,  m4, m5, m6
@@ -102,17 +102,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, 
len
 CLIPD  m9,  m4, m5, m6
 CLIPD  m10, m4, m5, m6
 %endif
-mova  [dstq+mmsize*0*%%i], m0
-mova  [dstq+mmsize*1*%%i], m1
-mova  [dstq+mmsize*2*%%i], m2
-mova  [dstq+mmsize*3*%%i], m3
+mova  [dstq + mmsize * (0 + %%i)], m0
+mova  [dstq + mmsize * (1 + %%i)], m1
+mova  [dstq + mmsize * (2 + %%i)], m2
+mova  [dstq + mmsize * (3 + %%i)], m3
 %if %3
-mova  [dstq+mmsize*4*%%i], m7
-mova  [dstq+mmsize*5*%%i], m8
-mova  [dstq+mmsize*6*%%i], m9
-mova  [dstq+mmsize*7*%%i], m10
+mova  [dstq + mmsize * (4 + %%i)], m7
+mova  [dstq + mmsize * (5 + %%i)], m8
+mova  [dstq + mmsize * (6 + %%i)], m9
+mova  [dstq + mmsize * (7 + %%i)], m10
 %endif
-%assign %%i %%i+1
+%assign %%i (%%i + 4 * (1 + %3))
 %endrep
 add srcq, mmsize*4*(%2+%3)
 add dstq, mmsize*4*(%2+%3)

___
libav-commits mailing list
libav-commits@libav.org
https://lists.libav.org/mailman/listinfo/libav-commits