Re: [FFmpeg-devel] [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2

2016-05-23 Thread Michael Niedermayer
On Tue, May 17, 2016 at 07:14:24PM +0800, 周晓勇 wrote:
> avcodec/mips/blockdsp_mmi: Version 2 of the optimizations for loongson mmi
> 
> 1. no longer use the register names directly and optimized code format
> 2. to be compatible with O32, specify type of address variable with 
> mips_reg and handle the address variable with PTR_ operator
> 
> 
> 
> 
> 
> 
> 
> 在 2016-05-13 18:06:56,"周晓勇"  写道:
> 
> From 9e5ade4c99eb23f72a89f0054f8b5626c9acceb3 Mon Sep 17 00:00:00 2001
> From: ZhouXiaoyong 
> Date: Fri, 13 May 2016 14:30:10 +0800
> Subject: [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2
> 
> 
> ---
>  libavcodec/mips/blockdsp_mmi.c | 195 
> ++---
>  1 file changed, 103 insertions(+), 92 deletions(-)

applied

thanks

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Opposition brings concord. Out of discord comes the fairest harmony.
-- Heraclitus


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2

2016-05-17 Thread 周晓勇
avcodec/mips/blockdsp_mmi: Version 2 of the optimizations for loongson mmi

1. no longer use the register names directly and optimized code format
2. to be compatible with O32, specify type of address variable with 
mips_reg and handle the address variable with PTR_ operator







在 2016-05-13 18:06:56,"周晓勇"  写道:

From 9e5ade4c99eb23f72a89f0054f8b5626c9acceb3 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 13 May 2016 14:30:10 +0800
Subject: [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2


---
 libavcodec/mips/blockdsp_mmi.c | 195 ++---
 1 file changed, 103 insertions(+), 92 deletions(-)


diff --git a/libavcodec/mips/blockdsp_mmi.c b/libavcodec/mips/blockdsp_mmi.c
index 63eaf69..6eb2bd7 100644
--- a/libavcodec/mips/blockdsp_mmi.c
+++ b/libavcodec/mips/blockdsp_mmi.c
@@ -22,126 +22,137 @@
  */
 
 #include "blockdsp_mips.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h)
 {
+double ftmp[1];
+
 __asm__ volatile (
-"move $8, %3\r\n"
-"move $9, %0\r\n"
-"dmtc1 %1, $f2  \r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7($9) \r\n"
-"gssdrc1 $f2, 0($9) \r\n"
-"gssdlc1 $f2, 15($9)\r\n"
-"gssdrc1 $f2, 8($9) \r\n"
-"daddi $8, $8, -1   \r\n"
-"daddu $9, $9, %2   \r\n"
-"bnez $8, 1b\r\n"
-::"r"(block),"r"(value),"r"(line_size),"r"(h)
-: "$8","$9"
+"mtc1   %[value],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gssdlc1%[ftmp0],   0x07(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x00(%[block])  \n\t"
+PTR_ADDI"%[h],  %[h],   -0x01   \n\t"
+"gssdlc1%[ftmp0],   0x0f(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x08(%[block])  \n\t"
+PTR_ADDU   "%[block],   %[block],   %[line_size]\n\t"
+"bnez   %[h],   1b  \n\t"
+: [block]"+&r"(block),  [h]"+&r"(h),
+  [ftmp0]"=&f"(ftmp[0])
+: [value]"r"(value),[line_size]"r"((mips_reg)line_size)
+: "memory"
 );
 }
 
 void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h)
 {
+double ftmp0;
+
 __asm__ volatile (
-"move $8, %3\r\n"
-"move $9, %0\r\n"
-"dmtc1 %1, $f2  \r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7($9) \r\n"
-"gssdrc1 $f2, 0($9) \r\n"
-"daddi $8, $8, -1   \r\n"
-"daddu $9, $9, %2   \r\n"
-"bnez $8, 1b\r\n"
-::"r"(block),"r"(value),"r"(line_size),"r"(h)
-: "$8","$9"
+"mtc1   %[value],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gssdlc1%[ftmp0],   0x07(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x00(%[block])  \n\t"
+PTR_ADDI   "%[h],   %[h],   -0x01   \n\t"
+PTR_ADDU   "%[block],   %[block],   %[line_size]\n\t"
+"bnez   %[h],   1b  \n\t"
+: [block]"+&r"(block),  [h]"+&r"(h),
+  [ftmp0]"=&f"(ftmp0)
+: [value]"r"(value),[line_size]"r"((mips_reg)line_size)
+: "memory"
 );
 }
 
 void ff_clear_block_mmi(int16_t *block)
 {
+double ftmp[2];
+
 __asm__ volatile (
-"xor $f0, $f0, $f0  \r\n"
-"xor $f2, $f2, $f2  \r\n"
-"gssqc1 $f0, $f2,   0(%0)   \r\n"
-"gssqc1 $f0, $f2,  16(%0)   \r\n"
-"gssqc1 $f0, $f2,  32(%0)   \r\n"
-"gssqc1 $f0, $f2,  48(%0)   \r\n"

[FFmpeg-devel] [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2

2016-05-13 Thread 周晓勇
From 9e5ade4c99eb23f72a89f0054f8b5626c9acceb3 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong 
Date: Fri, 13 May 2016 14:30:10 +0800
Subject: [PATCH 09/11] avcodec/mips: loongson optimize blockdsp with mmi v2


---
 libavcodec/mips/blockdsp_mmi.c | 195 ++---
 1 file changed, 103 insertions(+), 92 deletions(-)


diff --git a/libavcodec/mips/blockdsp_mmi.c b/libavcodec/mips/blockdsp_mmi.c
index 63eaf69..6eb2bd7 100644
--- a/libavcodec/mips/blockdsp_mmi.c
+++ b/libavcodec/mips/blockdsp_mmi.c
@@ -22,126 +22,137 @@
  */
 
 #include "blockdsp_mips.h"
+#include "libavutil/mips/asmdefs.h"
 
 void ff_fill_block16_mmi(uint8_t *block, uint8_t value, int line_size, int h)
 {
+double ftmp[1];
+
 __asm__ volatile (
-"move $8, %3\r\n"
-"move $9, %0\r\n"
-"dmtc1 %1, $f2  \r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7($9) \r\n"
-"gssdrc1 $f2, 0($9) \r\n"
-"gssdlc1 $f2, 15($9)\r\n"
-"gssdrc1 $f2, 8($9) \r\n"
-"daddi $8, $8, -1   \r\n"
-"daddu $9, $9, %2   \r\n"
-"bnez $8, 1b\r\n"
-::"r"(block),"r"(value),"r"(line_size),"r"(h)
-: "$8","$9"
+"mtc1   %[value],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gssdlc1%[ftmp0],   0x07(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x00(%[block])  \n\t"
+PTR_ADDI"%[h],  %[h],   -0x01   \n\t"
+"gssdlc1%[ftmp0],   0x0f(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x08(%[block])  \n\t"
+PTR_ADDU   "%[block],   %[block],   %[line_size]\n\t"
+"bnez   %[h],   1b  \n\t"
+: [block]"+&r"(block),  [h]"+&r"(h),
+  [ftmp0]"=&f"(ftmp[0])
+: [value]"r"(value),[line_size]"r"((mips_reg)line_size)
+: "memory"
 );
 }
 
 void ff_fill_block8_mmi(uint8_t *block, uint8_t value, int line_size, int h)
 {
+double ftmp0;
+
 __asm__ volatile (
-"move $8, %3\r\n"
-"move $9, %0\r\n"
-"dmtc1 %1, $f2  \r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"punpcklbh $f2, $f2, $f2\r\n"
-"1: \r\n"
-"gssdlc1 $f2, 7($9) \r\n"
-"gssdrc1 $f2, 0($9) \r\n"
-"daddi $8, $8, -1   \r\n"
-"daddu $9, $9, %2   \r\n"
-"bnez $8, 1b\r\n"
-::"r"(block),"r"(value),"r"(line_size),"r"(h)
-: "$8","$9"
+"mtc1   %[value],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"punpcklbh  %[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"1: \n\t"
+"gssdlc1%[ftmp0],   0x07(%[block])  \n\t"
+"gssdrc1%[ftmp0],   0x00(%[block])  \n\t"
+PTR_ADDI   "%[h],   %[h],   -0x01   \n\t"
+PTR_ADDU   "%[block],   %[block],   %[line_size]\n\t"
+"bnez   %[h],   1b  \n\t"
+: [block]"+&r"(block),  [h]"+&r"(h),
+  [ftmp0]"=&f"(ftmp0)
+: [value]"r"(value),[line_size]"r"((mips_reg)line_size)
+: "memory"
 );
 }
 
 void ff_clear_block_mmi(int16_t *block)
 {
+double ftmp[2];
+
 __asm__ volatile (
-"xor $f0, $f0, $f0  \r\n"
-"xor $f2, $f2, $f2  \r\n"
-"gssqc1 $f0, $f2,   0(%0)   \r\n"
-"gssqc1 $f0, $f2,  16(%0)   \r\n"
-"gssqc1 $f0, $f2,  32(%0)   \r\n"
-"gssqc1 $f0, $f2,  48(%0)   \r\n"
-"gssqc1 $f0, $f2,  64(%0)   \r\n"
-"gssqc1 $f0, $f2,  80(%0)   \r\n"
-"gssqc1 $f0, $f2,  96(%0)   \r\n"
-"gssqc1 $f0, $f2, 112(%0)   \r\n"
-::"r"(block)
+"xor%[ftmp0],   %[ftmp0],   %[ftmp0]\n\t"
+"xor%[ftmp1],   %[ftmp1],