Re: [FFmpeg-devel] [PATCH 1/2] avcodec/mips: [loongson] optimize put_hevc_qpel_hv_8 with mmi.

2019-01-21 Thread Michael Niedermayer
On Mon, Jan 21, 2019 at 06:10:24PM +0800, Shiyou Yin wrote:
> Optimize put_hevc_qpel_hv_8 with mmi in the case width=4/8/12/16/24/32/48/64.
> This optimization improved HEVC decoding performance 11%(1.81x to 2.01x, 
> tested on loongson 3A3000).
> ---
>  libavcodec/mips/hevcdsp_init_mips.c |   9 ++
>  libavcodec/mips/hevcdsp_mips.h  |  37 +--
>  libavcodec/mips/hevcdsp_mmi.c   | 195 
> 
>  libavutil/mips/mmiutils.h   |   9 ++
>  4 files changed, 240 insertions(+), 10 deletions(-)

will apply the 2 patches

thx

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

"You are 36 times more likely to die in a bathtub than at the hands of a
terrorist. Also, you are 2.5 times more likely to become a president and
2 times more likely to become an astronaut, than to die in a terrorist
attack." -- Thoughty2



signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/2] avcodec/mips: [loongson] optimize put_hevc_qpel_hv_8 with mmi.

2019-01-21 Thread Shiyou Yin
Optimize put_hevc_qpel_hv_8 with mmi in the case width=4/8/12/16/24/32/48/64.
This optimization improved HEVC decoding performance 11%(1.81x to 2.01x, tested 
on loongson 3A3000).
---
 libavcodec/mips/hevcdsp_init_mips.c |   9 ++
 libavcodec/mips/hevcdsp_mips.h  |  37 +--
 libavcodec/mips/hevcdsp_mmi.c   | 195 
 libavutil/mips/mmiutils.h   |   9 ++
 4 files changed, 240 insertions(+), 10 deletions(-)

diff --git a/libavcodec/mips/hevcdsp_init_mips.c 
b/libavcodec/mips/hevcdsp_init_mips.c
index 41c9001..e5e0588 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -25,6 +25,15 @@ static av_cold void hevc_dsp_init_mmi(HEVCDSPContext *c,
   const int bit_depth)
 {
 if (8 == bit_depth) {
+c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_mmi;
+c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_mmi;
+c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_mmi;
+c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_mmi;
+c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_mmi;
+c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_mmi;
+c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_mmi;
+c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_mmi;
+
 c->put_hevc_qpel_bi[3][0][0] = ff_hevc_put_hevc_pel_bi_pixels8_8_mmi;
 c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_pel_bi_pixels16_8_mmi;
 c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_pel_bi_pixels24_8_mmi;
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index ff9401c..2351c9b 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -480,16 +480,33 @@ void ff_hevc_addblk_32x32_msa(uint8_t *dst, int16_t 
*pi16Coeffs,
 void ff_hevc_idct_luma_4x4_msa(int16_t *pi16Coeffs);
 
 /* Loongson optimization */
-#define L_BI_MC(PEL, DIR, WIDTH, TYPE) 
\
-void ff_hevc_put_hevc_##PEL##_bi_##DIR##WIDTH##_8_##TYPE(uint8_t *dst,   \
-ptrdiff_t dst_stride,  
\
-uint8_t *src,  
\
-ptrdiff_t src_stride,  
\
-int16_t *src_16bit,
\
-int height,
\
-intptr_t mx,   
\
-intptr_t my,   
\
-int width)
+#define L_MC(PEL, DIR, WIDTH, TYPE)  \
+void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_##TYPE(int16_t *dst,  \
+  uint8_t *src,  \
+  ptrdiff_t src_stride,  \
+  int height,\
+  intptr_t mx,   \
+  intptr_t my,   \
+  int width)
+L_MC(qpel, hv, 4, mmi);
+L_MC(qpel, hv, 8, mmi);
+L_MC(qpel, hv, 12, mmi);
+L_MC(qpel, hv, 16, mmi);
+L_MC(qpel, hv, 24, mmi);
+L_MC(qpel, hv, 32, mmi);
+L_MC(qpel, hv, 48, mmi);
+L_MC(qpel, hv, 64, mmi);
+
+#define L_BI_MC(PEL, DIR, WIDTH, TYPE) 
 \
+void ff_hevc_put_hevc_##PEL##_bi_##DIR##WIDTH##_8_##TYPE(uint8_t *dst, 
 \
+ ptrdiff_t dst_stride, 
 \
+ uint8_t *src, 
 \
+ ptrdiff_t src_stride, 
 \
+ int16_t *src_16bit,   
 \
+ int height,   
 \
+ intptr_t mx,  
 \
+ intptr_t my,  
 \
+ int width)
 
 L_BI_MC(pel, pixels, 8, mmi);
 L_BI_MC(pel, pixels, 16, mmi);
diff --git a/libavcodec/mips/hevcdsp_mmi.c b/libavcodec/mips/hevcdsp_mmi.c
index 60b9c18..e776a13 100644
--- a/libavcodec/mips/hevcdsp_mmi.c
+++ b/libavcodec/mips/hevcdsp_mmi.c
@@ -18,10 +18,205 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavcodec/hevcdec.h"
 #include "libavcodec/bit_depth_template.c"
 #include "libavcodec/mips/hevcdsp_mips.h"
 #include "libavutil/mips/mmiutils.h"
 
+#define