From 7f39f25e3220237c8e30e8fb06f23dcf58e109fc Mon Sep 17 00:00:00 2001
From: Martin Vignali <martin.vignali@gmail.com>
Date: Sun, 22 Oct 2017 13:40:24 +0200
Subject: [PATCH 3/4] libavcodec/utvideodsp : add avx2 version for the dsp

---
 libavcodec/x86/utvideodsp.asm    | 16 ++++++++++++++++
 libavcodec/x86/utvideodsp_init.c | 11 +++++++++++
 2 files changed, 27 insertions(+)

diff --git a/libavcodec/x86/utvideodsp.asm b/libavcodec/x86/utvideodsp.asm
index 55ef127ccb..068bebef04 100644
--- a/libavcodec/x86/utvideodsp.asm
+++ b/libavcodec/x86/utvideodsp.asm
@@ -1,6 +1,7 @@
 ;******************************************************************************
 ;* SIMD-optimized UTVideo functions
 ;* Copyright (c) 2017 Paul B Mahol
+;* Copyright (c) 2017 Jokyo Images
 ;*
 ;* This file is part of FFmpeg.
 ;*
@@ -45,7 +46,11 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
 %define wq r6m
 %define hd r7mp
 %endif
+%if mmsize == 32
+    vbroadcasti128 m3, [pb_128]
+%else
     mova         m3, [pb_128]
+%endif
 .nextrow:
     mov          xq, wq
 
@@ -72,6 +77,9 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
 INIT_XMM sse2
 RESTORE_RGB_PLANES
 
+INIT_YMM avx2
+RESTORE_RGB_PLANES
+
 %macro RESTORE_RGB_PLANES10 0
 cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x
     shl          wd, 1
@@ -81,8 +89,13 @@ cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, sr
     add      src_rq, wq
     add      src_gq, wq
     add      src_bq, wq
+%if mmsize == 32
+    vbroadcasti128 m3, [pw_512]
+    vbroadcasti128 m4, [pw_1023]
+%else
     mova         m3, [pw_512]
     mova         m4, [pw_1023]
+%endif
     neg          wq
 %if ARCH_X86_64 == 0
     mov          wm, wq
@@ -117,3 +130,6 @@ DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x
 
 INIT_XMM sse2
 RESTORE_RGB_PLANES10
+
+INIT_YMM avx2
+RESTORE_RGB_PLANES10
diff --git a/libavcodec/x86/utvideodsp_init.c b/libavcodec/x86/utvideodsp_init.c
index f8b2a9b074..2b436c6c5c 100644
--- a/libavcodec/x86/utvideodsp_init.c
+++ b/libavcodec/x86/utvideodsp_init.c
@@ -28,9 +28,16 @@
 void ff_restore_rgb_planes_sse2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
                                 ptrdiff_t linesize_r, ptrdiff_t linesize_g,
                                 ptrdiff_t linesize_b, int width, int height);
+void ff_restore_rgb_planes_avx2(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b,
+                                ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+                                ptrdiff_t linesize_b, int width, int height);
+
 void ff_restore_rgb_planes10_sse2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
                                   ptrdiff_t linesize_r, ptrdiff_t linesize_g,
                                   ptrdiff_t linesize_b, int width, int height);
+void ff_restore_rgb_planes10_avx2(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b,
+                                  ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+                                  ptrdiff_t linesize_b, int width, int height);
 
 av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
 {
@@ -40,4 +47,8 @@ av_cold void ff_utvideodsp_init_x86(UTVideoDSPContext *c)
         c->restore_rgb_planes   = ff_restore_rgb_planes_sse2;
         c->restore_rgb_planes10 = ff_restore_rgb_planes10_sse2;
     }
+    if (EXTERNAL_AVX2_FAST(cpu_flags)) {
+        c->restore_rgb_planes   = ff_restore_rgb_planes_avx2;
+        c->restore_rgb_planes10 = ff_restore_rgb_planes10_avx2;
+    }
 }
-- 
2.11.0 (Apple Git-81)

