---
libavcodec/x86/pngdsp-init.c | 7 +++++++
libavcodec/x86/pngdsp.asm | 20 ++++++++++++++++++++
2 files changed, 27 insertions(+), 0 deletions(-)
diff --git a/libavcodec/x86/pngdsp-init.c b/libavcodec/x86/pngdsp-init.c
index 5f67b8a..80726c4 100644
--- a/libavcodec/x86/pngdsp-init.c
+++ b/libavcodec/x86/pngdsp-init.c
@@ -29,18 +29,25 @@ void ff_add_png_paeth_prediction_ssse3(uint8_t *dst,
uint8_t *src,
uint8_t *top, int w, int bpp);
void ff_add_bytes_l2_mmx (uint8_t *dst, uint8_t *src1,
uint8_t *src2, int w);
+void ff_add_bytes_l2_sse2(uint8_t *dst, uint8_t *src1,
+ uint8_t *src2, int w);
void ff_pngdsp_init_x86(PNGDSPContext *dsp)
{
#if HAVE_YASM
int flags = av_get_cpu_flags();
+#if ARCH_X86_32
if (flags & AV_CPU_FLAG_MMX) {
dsp->add_bytes_l2 = ff_add_bytes_l2_mmx;
}
+#endif
if (flags & AV_CPU_FLAG_MMX2) {
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_mmx2;
}
+ if (flags & AV_CPU_FLAG_SSE2) {
+ dsp->add_bytes_l2 = ff_add_bytes_l2_sse2;
+ }
if (flags & AV_CPU_FLAG_SSSE3) {
dsp->add_paeth_prediction = ff_add_png_paeth_prediction_ssse3;
}
diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm
index 92425ad..bff76e0 100644
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -53,6 +53,21 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
cmp iq, waq
jl .loop_v
+%if mmsize == 16
+ ; vector loop
+ mov wq, waq
+ and waq, ~7
+ jmp .end_l
+.loop_l:
+ movq mm0, [src1q+iq]
+ paddb mm0, [src2q+iq]
+ movq [dstq+iq ], mm0
+ add iq, 8
+.end_l:
+ cmp iq, waq
+ jl .loop_l
+%endif
+
; scalar loop for leftover
jmp .end_s
.loop_s:
@@ -66,8 +81,13 @@ cglobal add_bytes_l2, 4, 6, %1, dst, src1, src2, wa, w, i
REP_RET
%endmacro
+%if ARCH_X86_32
INIT_MMX mmx
ADD_BYTES_FN 0
+%endif
+
+INIT_XMM sse2
+ADD_BYTES_FN 2
%macro ADD_PAETH_PRED_FN 1
cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
--
1.7.2.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel