Re: [FFmpeg-devel] [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2
On Tue, May 17, 2016 at 07:14:24PM +0800, 周晓勇 wrote: > avcodec/mips/blockdsp_mmi: Version 2 of the optimizations for loongson mmi > > 1. no longer use the register names directly and optimized code format > 2. to be compatible with O32, specify type of address variable with > mips_reg and handle the address variable with PTR_ operator > > > > > > > > 在 2016-05-13 18:06:56,"周晓勇" 写道: > > From 9e5ade4c99eb23f72a89f0054f8b5626c9acceb3 Mon Sep 17 00:00:00 2001 > From: ZhouXiaoyong > Date: Fri, 13 May 2016 14:30:10 +0800 > Subject: [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2 > > > --- > libavcodec/mips/blockdsp_mmi.c | 195 > ++--- > 1 file changed, 103 insertions(+), 92 deletions(-) applied thanks [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Opposition brings concord. Out of discord comes the fairest harmony. -- Heraclitus signature.asc Description: Digital signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2
avcodec/mips/blockdsp_mmi: Version 2 of the optimizations for loongson mmi 1. no longer use the register names directly and optimized code format 2. to be compatible with O32, specify type of address variable with mips_reg and handle the address variable with PTR_ operator 在 2016-05-13 18:06:56,"周晓勇" 写道: From 9e5ade4c99eb23f72a89f0054f8b5626c9acceb3 Mon Sep 17 00:00:00 2001 From: ZhouXiaoyong Date: Fri, 13 May 2016 14:30:10 +0800 Subject: [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2 --- libavcodec/mips/blockdsp_mmi.c | 195 ++--- 1 file changed, 103 insertions(+), 92 deletions(-) diff --git a/libavcodec/mips/blockdsp_mmi.c b/libavcodec/mips/blockdsp_mmi.c index 63eaf69..6eb2bd7 100644 --- a/libavcodec/mips/blockdsp_mmi.c +++ b/libavcodec/mips/blockdsp_mmi.c @@ -22,126 +22,137 @@ */ #include "blockdsp_mips.h" +#include "libavutil/mips/asmdefs.h" void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h) { +double ftmp[1]; + __asm__ volatile ( -"move $8, %3\r\n" -"move $9, %0\r\n" -"dmtc1 %1, $f2 \r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"1: \r\n" -"gssdlc1 $f2, 7($9) \r\n" -"gssdrc1 $f2, 0($9) \r\n" -"gssdlc1 $f2, 15($9)\r\n" -"gssdrc1 $f2, 8($9) \r\n" -"daddi $8, $8, -1 \r\n" -"daddu $9, $9, %2 \r\n" -"bnez $8, 1b\r\n" -::"r"(block),"r"(value),"r"(line_size),"r"(h) -: "$8","$9" +"mtc1 %[value], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"1: \n\t" +"gssdlc1%[ftmp0], 0x07(%[block]) \n\t" +"gssdrc1%[ftmp0], 0x00(%[block]) \n\t" +PTR_ADDI"%[h], %[h], -0x01 \n\t" +"gssdlc1%[ftmp0], 0x0f(%[block]) \n\t" +"gssdrc1%[ftmp0], 0x08(%[block]) \n\t" +PTR_ADDU "%[block], %[block], %[line_size]\n\t" +"bnez %[h], 1b \n\t" +: [block]"+&r"(block), [h]"+&r"(h), + [ftmp0]"=&f"(ftmp[0]) +: [value]"r"(value),[line_size]"r"((mips_reg)line_size) +: "memory" ); } void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h) { +double ftmp0; + __asm__ volatile ( -"move $8, %3\r\n" -"move $9, %0\r\n" -"dmtc1 %1, $f2 \r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"1: \r\n" -"gssdlc1 $f2, 7($9) \r\n" -"gssdrc1 $f2, 0($9) \r\n" -"daddi $8, $8, -1 \r\n" -"daddu $9, $9, %2 \r\n" -"bnez $8, 1b\r\n" -::"r"(block),"r"(value),"r"(line_size),"r"(h) -: "$8","$9" +"mtc1 %[value], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"1: \n\t" +"gssdlc1%[ftmp0], 0x07(%[block]) \n\t" +"gssdrc1%[ftmp0], 0x00(%[block]) \n\t" +PTR_ADDI "%[h], %[h], -0x01 \n\t" +PTR_ADDU "%[block], %[block], %[line_size]\n\t" +"bnez %[h], 1b \n\t" +: [block]"+&r"(block), [h]"+&r"(h), + [ftmp0]"=&f"(ftmp0) +: [value]"r"(value),[line_size]"r"((mips_reg)line_size) +: "memory" ); } void ff_clear_block_mmi(int16_t *block) { +double ftmp[2]; + __asm__ volatile ( -"xor $f0, $f0, $f0 \r\n" -"xor $f2, $f2, $f2 \r\n" -"gssqc1 $f0, $f2, 0(%0) \r\n" -"gssqc1 $f0, $f2, 16(%0) \r\n" -"gssqc1 $f0, $f2, 32(%0) \r\n" -"gssqc1 $f0, $f2, 48(%0) \r\n"
[FFmpeg-devel] [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2
From 9e5ade4c99eb23f72a89f0054f8b5626c9acceb3 Mon Sep 17 00:00:00 2001 From: ZhouXiaoyong Date: Fri, 13 May 2016 14:30:10 +0800 Subject: [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2 --- libavcodec/mips/blockdsp_mmi.c | 195 ++--- 1 file changed, 103 insertions(+), 92 deletions(-) diff --git a/libavcodec/mips/blockdsp_mmi.c b/libavcodec/mips/blockdsp_mmi.c index 63eaf69..6eb2bd7 100644 --- a/libavcodec/mips/blockdsp_mmi.c +++ b/libavcodec/mips/blockdsp_mmi.c @@ -22,126 +22,137 @@ */ #include "blockdsp_mips.h" +#include "libavutil/mips/asmdefs.h" void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h) { +double ftmp[1]; + __asm__ volatile ( -"move $8, %3\r\n" -"move $9, %0\r\n" -"dmtc1 %1, $f2 \r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"1: \r\n" -"gssdlc1 $f2, 7($9) \r\n" -"gssdrc1 $f2, 0($9) \r\n" -"gssdlc1 $f2, 15($9)\r\n" -"gssdrc1 $f2, 8($9) \r\n" -"daddi $8, $8, -1 \r\n" -"daddu $9, $9, %2 \r\n" -"bnez $8, 1b\r\n" -::"r"(block),"r"(value),"r"(line_size),"r"(h) -: "$8","$9" +"mtc1 %[value], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"1: \n\t" +"gssdlc1%[ftmp0], 0x07(%[block]) \n\t" +"gssdrc1%[ftmp0], 0x00(%[block]) \n\t" +PTR_ADDI"%[h], %[h], -0x01 \n\t" +"gssdlc1%[ftmp0], 0x0f(%[block]) \n\t" +"gssdrc1%[ftmp0], 0x08(%[block]) \n\t" +PTR_ADDU "%[block], %[block], %[line_size]\n\t" +"bnez %[h], 1b \n\t" +: [block]"+&r"(block), [h]"+&r"(h), + [ftmp0]"=&f"(ftmp[0]) +: [value]"r"(value),[line_size]"r"((mips_reg)line_size) +: "memory" ); } void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h) { +double ftmp0; + __asm__ volatile ( -"move $8, %3\r\n" -"move $9, %0\r\n" -"dmtc1 %1, $f2 \r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"punpcklbh $f2, $f2, $f2\r\n" -"1: \r\n" -"gssdlc1 $f2, 7($9) \r\n" -"gssdrc1 $f2, 0($9) \r\n" -"daddi $8, $8, -1 \r\n" -"daddu $9, $9, %2 \r\n" -"bnez $8, 1b\r\n" -::"r"(block),"r"(value),"r"(line_size),"r"(h) -: "$8","$9" +"mtc1 %[value], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"punpcklbh %[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"1: \n\t" +"gssdlc1%[ftmp0], 0x07(%[block]) \n\t" +"gssdrc1%[ftmp0], 0x00(%[block]) \n\t" +PTR_ADDI "%[h], %[h], -0x01 \n\t" +PTR_ADDU "%[block], %[block], %[line_size]\n\t" +"bnez %[h], 1b \n\t" +: [block]"+&r"(block), [h]"+&r"(h), + [ftmp0]"=&f"(ftmp0) +: [value]"r"(value),[line_size]"r"((mips_reg)line_size) +: "memory" ); } void ff_clear_block_mmi(int16_t *block) { +double ftmp[2]; + __asm__ volatile ( -"xor $f0, $f0, $f0 \r\n" -"xor $f2, $f2, $f2 \r\n" -"gssqc1 $f0, $f2, 0(%0) \r\n" -"gssqc1 $f0, $f2, 16(%0) \r\n" -"gssqc1 $f0, $f2, 32(%0) \r\n" -"gssqc1 $f0, $f2, 48(%0) \r\n" -"gssqc1 $f0, $f2, 64(%0) \r\n" -"gssqc1 $f0, $f2, 80(%0) \r\n" -"gssqc1 $f0, $f2, 96(%0) \r\n" -"gssqc1 $f0, $f2, 112(%0) \r\n" -::"r"(block) +"xor%[ftmp0], %[ftmp0], %[ftmp0]\n\t" +"xor%[ftmp1], %[ftmp1],