Optimize put_hevc_qpel_hv_8 with mmi in the case width=4/8/12/16/24/32/48/64.
This optimization improved HEVC decoding performance 11%(1.81x to 2.01x, tested
on loongson 3A3000).
---
libavcodec/mips/hevcdsp_init_mips.c | 9 ++
libavcodec/mips/hevcdsp_mips.h | 37 +--
libavcodec/mips/hevcdsp_mmi.c | 195
libavutil/mips/mmiutils.h | 9 ++
4 files changed, 240 insertions(+), 10 deletions(-)
diff --git a/libavcodec/mips/hevcdsp_init_mips.c
b/libavcodec/mips/hevcdsp_init_mips.c
index 41c9001..e5e0588 100644
--- a/libavcodec/mips/hevcdsp_init_mips.c
+++ b/libavcodec/mips/hevcdsp_init_mips.c
@@ -25,6 +25,15 @@ static av_cold void hevc_dsp_init_mmi(HEVCDSPContext *c,
const int bit_depth)
{
if (8 == bit_depth) {
+c->put_hevc_qpel[1][1][1] = ff_hevc_put_hevc_qpel_hv4_8_mmi;
+c->put_hevc_qpel[3][1][1] = ff_hevc_put_hevc_qpel_hv8_8_mmi;
+c->put_hevc_qpel[4][1][1] = ff_hevc_put_hevc_qpel_hv12_8_mmi;
+c->put_hevc_qpel[5][1][1] = ff_hevc_put_hevc_qpel_hv16_8_mmi;
+c->put_hevc_qpel[6][1][1] = ff_hevc_put_hevc_qpel_hv24_8_mmi;
+c->put_hevc_qpel[7][1][1] = ff_hevc_put_hevc_qpel_hv32_8_mmi;
+c->put_hevc_qpel[8][1][1] = ff_hevc_put_hevc_qpel_hv48_8_mmi;
+c->put_hevc_qpel[9][1][1] = ff_hevc_put_hevc_qpel_hv64_8_mmi;
+
c->put_hevc_qpel_bi[3][0][0] = ff_hevc_put_hevc_pel_bi_pixels8_8_mmi;
c->put_hevc_qpel_bi[5][0][0] = ff_hevc_put_hevc_pel_bi_pixels16_8_mmi;
c->put_hevc_qpel_bi[6][0][0] = ff_hevc_put_hevc_pel_bi_pixels24_8_mmi;
diff --git a/libavcodec/mips/hevcdsp_mips.h b/libavcodec/mips/hevcdsp_mips.h
index ff9401c..2351c9b 100644
--- a/libavcodec/mips/hevcdsp_mips.h
+++ b/libavcodec/mips/hevcdsp_mips.h
@@ -480,16 +480,33 @@ void ff_hevc_addblk_32x32_msa(uint8_t *dst, int16_t
*pi16Coeffs,
void ff_hevc_idct_luma_4x4_msa(int16_t *pi16Coeffs);
/* Loongson optimization */
-#define L_BI_MC(PEL, DIR, WIDTH, TYPE)
\
-void ff_hevc_put_hevc_##PEL##_bi_##DIR##WIDTH##_8_##TYPE(uint8_t *dst, \
-ptrdiff_t dst_stride,
\
-uint8_t *src,
\
-ptrdiff_t src_stride,
\
-int16_t *src_16bit,
\
-int height,
\
-intptr_t mx,
\
-intptr_t my,
\
-int width)
+#define L_MC(PEL, DIR, WIDTH, TYPE) \
+void ff_hevc_put_hevc_##PEL##_##DIR##WIDTH##_8_##TYPE(int16_t *dst, \
+ uint8_t *src, \
+ ptrdiff_t src_stride, \
+ int height,\
+ intptr_t mx, \
+ intptr_t my, \
+ int width)
+L_MC(qpel, hv, 4, mmi);
+L_MC(qpel, hv, 8, mmi);
+L_MC(qpel, hv, 12, mmi);
+L_MC(qpel, hv, 16, mmi);
+L_MC(qpel, hv, 24, mmi);
+L_MC(qpel, hv, 32, mmi);
+L_MC(qpel, hv, 48, mmi);
+L_MC(qpel, hv, 64, mmi);
+
+#define L_BI_MC(PEL, DIR, WIDTH, TYPE)
\
+void ff_hevc_put_hevc_##PEL##_bi_##DIR##WIDTH##_8_##TYPE(uint8_t *dst,
\
+ ptrdiff_t dst_stride,
\
+ uint8_t *src,
\
+ ptrdiff_t src_stride,
\
+ int16_t *src_16bit,
\
+ int height,
\
+ intptr_t mx,
\
+ intptr_t my,
\
+ int width)
L_BI_MC(pel, pixels, 8, mmi);
L_BI_MC(pel, pixels, 16, mmi);
diff --git a/libavcodec/mips/hevcdsp_mmi.c b/libavcodec/mips/hevcdsp_mmi.c
index 60b9c18..e776a13 100644
--- a/libavcodec/mips/hevcdsp_mmi.c
+++ b/libavcodec/mips/hevcdsp_mmi.c
@@ -18,10 +18,205 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavcodec/hevcdec.h"
#include "libavcodec/bit_depth_template.c"
#include "libavcodec/mips/hevcdsp_mips.h"
#include "libavutil/mips/mmiutils.h"
+#define PUT_HEVC_QPEL_HV(w