Re: [FFmpeg-devel] [PATCH 12/12] mips/aaccoder: use variables instead of using register names directly

2015-03-03 Thread Nedeljko Babic
LGTM

-Nedeljko

Od: James Cowgill [james...@cowgill.org.uk]
Poslato: 26. februar 2015 14:42
Za: ffmpeg-devel@ffmpeg.org
Cc: Nedeljko Babic; James Cowgill
Tema: [PATCH 12/12] mips/aaccoder: use variables instead of using register 
names directly

On mips64, the registers t[4-7] do not exist. Instead of using a lot of #ifdef
or defines to handle differing register names, use variables and let GCC
allocate the registers automatically (like in the other mips assembly files).

In get_band_cost_ESC_mips, t4 and t5 were renamed to t6 and t7 to avoid a
variable name conflict.

Signed-off-by: James Cowgill james...@cowgill.org.uk
---
 libavcodec/mips/aaccoder_mips.c | 929 +---
 1 file changed, 477 insertions(+), 452 deletions(-)

diff --git a/libavcodec/mips/aaccoder_mips.c b/libavcodec/mips/aaccoder_mips.c
index 8595913..ea0bf31 100644
--- a/libavcodec/mips/aaccoder_mips.c
+++ b/libavcodec/mips/aaccoder_mips.c
@@ -221,6 +221,7 @@ static void quantize_and_encode_band_cost_SQUAD_mips(struct 
AACEncContext *s,
 for (i = 0; i  size; i += 4) {
 int curidx;
 int *in_int = (int *)in[i];
+int t0, t1, t2, t3, t4, t5, t6, t7;

 qc1 = scaled[i  ] * Q34 + 0.4054f;
 qc2 = scaled[i+1] * Q34 + 0.4054f;
@@ -235,31 +236,31 @@ static void 
quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
 slt%[qc2], $zero,  %[qc2]  \n\t
 slt%[qc3], $zero,  %[qc3]  \n\t
 slt%[qc4], $zero,  %[qc4]  \n\t
-lw $t0,0(%[in_int])\n\t
-lw $t1,4(%[in_int])\n\t
-lw $t2,8(%[in_int])\n\t
-lw $t3,12(%[in_int])   \n\t
-srl$t0,$t0,31  \n\t
-srl$t1,$t1,31  \n\t
-srl$t2,$t2,31  \n\t
-srl$t3,$t3,31  \n\t
-subu   $t4,$zero,  %[qc1]  \n\t
-subu   $t5,$zero,  %[qc2]  \n\t
-subu   $t6,$zero,  %[qc3]  \n\t
-subu   $t7,$zero,  %[qc4]  \n\t
-movn   %[qc1], $t4,$t0 \n\t
-movn   %[qc2], $t5,$t1 \n\t
-movn   %[qc3], $t6,$t2 \n\t
-movn   %[qc4], $t7,$t3 \n\t
+lw %[t0],  0(%[in_int])\n\t
+lw %[t1],  4(%[in_int])\n\t
+lw %[t2],  8(%[in_int])\n\t
+lw %[t3],  12(%[in_int])   \n\t
+srl%[t0],  %[t0],  31  \n\t
+srl%[t1],  %[t1],  31  \n\t
+srl%[t2],  %[t2],  31  \n\t
+srl%[t3],  %[t3],  31  \n\t
+subu   %[t4],  $zero,  %[qc1]  \n\t
+subu   %[t5],  $zero,  %[qc2]  \n\t
+subu   %[t6],  $zero,  %[qc3]  \n\t
+subu   %[t7],  $zero,  %[qc4]  \n\t
+movn   %[qc1], %[t4],  %[t0]   \n\t
+movn   %[qc2], %[t5],  %[t1]   \n\t
+movn   %[qc3], %[t6],  %[t2]   \n\t
+movn   %[qc4], %[t7],  %[t3]   \n\t

 .set pop   \n\t

 : [qc1]+r(qc1), [qc2]+r(qc2),
-  [qc3]+r(qc3), [qc4]+r(qc4)
+  [qc3]+r(qc3), [qc4]+r(qc4),
+  [t0]=r(t0), [t1]=r(t1), [t2]=r(t2), [t3]=r(t3),
+  [t4]=r(t4), [t5]=r(t5), [t6]=r(t6), [t7]=r(t7)
 : [in_int]r(in_int)
-: t0, t1, t2, t3,
-  t4, t5, t6, t7,
-  memory
+: memory
 );

 curidx = qc1;
@@ -295,6 +296,7 @@ static void quantize_and_encode_band_cost_UQUAD_mips(struct 
AACEncContext *s,
 int *in_int = (int *)in[i];
 uint8_t v_bits;
 unsigned int v_codes;
+int t0, t1, t2, t3, t4;

 qc1 = scaled[i  ] * Q34 + 0.4054f;
 qc2 = scaled[i+1] * Q34 + 0.4054f;
@@ -305,50 +307,51 @@ static void 
quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
 .set push  \n\t
 .set noreorder \n\t

-ori$t4,$zero,  2   \n\t
+ori%[t4],  $zero,  2   \n\t
 ori%[sign],$zero,  0   \n\t
-slt$t0,$t4,%[qc1]  \n\t
-slt$t1,$t4,%[qc2]  \n\t
-slt$t2,$t4,%[qc3]  \n\t
-slt$t3,$t4,%[qc4]  \n\t
-movn   %[qc1], $t4,$t0 \n\t
-movn   %[qc2], $t4,$t1 \n\t
-movn   %[qc3], $t4,$t2 \n\t
-movn   %[qc4], $t4,$t3 \n\t
-lw $t0,0(%[in_int])\n\t
-lw $t1,4(%[in_int])\n\t
-lw $t2,8(%[in_int])\n\t
-lw $t3,12(%[in_int])   

Re: [FFmpeg-devel] [PATCH 12/12] mips/aaccoder: use variables instead of using register names directly

2015-03-03 Thread Michael Niedermayer
On Tue, Mar 03, 2015 at 12:44:47PM +, Nedeljko Babic wrote:
 LGTM

applied

thanks

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Asymptotically faster algorithms should always be preferred if you have
asymptotical amounts of data


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 12/12] mips/aaccoder: use variables instead of using register names directly

2015-02-26 Thread James Cowgill
On mips64, the registers t[4-7] do not exist. Instead of using a lot of #ifdef
or defines to handle differing register names, use variables and let GCC
allocate the registers automatically (like in the other mips assembly files).

In get_band_cost_ESC_mips, t4 and t5 were renamed to t6 and t7 to avoid a
variable name conflict.

Signed-off-by: James Cowgill james...@cowgill.org.uk
---
 libavcodec/mips/aaccoder_mips.c | 929 +---
 1 file changed, 477 insertions(+), 452 deletions(-)

diff --git a/libavcodec/mips/aaccoder_mips.c b/libavcodec/mips/aaccoder_mips.c
index 8595913..ea0bf31 100644
--- a/libavcodec/mips/aaccoder_mips.c
+++ b/libavcodec/mips/aaccoder_mips.c
@@ -221,6 +221,7 @@ static void quantize_and_encode_band_cost_SQUAD_mips(struct 
AACEncContext *s,
 for (i = 0; i  size; i += 4) {
 int curidx;
 int *in_int = (int *)in[i];
+int t0, t1, t2, t3, t4, t5, t6, t7;
 
 qc1 = scaled[i  ] * Q34 + 0.4054f;
 qc2 = scaled[i+1] * Q34 + 0.4054f;
@@ -235,31 +236,31 @@ static void 
quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
 slt%[qc2], $zero,  %[qc2]  \n\t
 slt%[qc3], $zero,  %[qc3]  \n\t
 slt%[qc4], $zero,  %[qc4]  \n\t
-lw $t0,0(%[in_int])\n\t
-lw $t1,4(%[in_int])\n\t
-lw $t2,8(%[in_int])\n\t
-lw $t3,12(%[in_int])   \n\t
-srl$t0,$t0,31  \n\t
-srl$t1,$t1,31  \n\t
-srl$t2,$t2,31  \n\t
-srl$t3,$t3,31  \n\t
-subu   $t4,$zero,  %[qc1]  \n\t
-subu   $t5,$zero,  %[qc2]  \n\t
-subu   $t6,$zero,  %[qc3]  \n\t
-subu   $t7,$zero,  %[qc4]  \n\t
-movn   %[qc1], $t4,$t0 \n\t
-movn   %[qc2], $t5,$t1 \n\t
-movn   %[qc3], $t6,$t2 \n\t
-movn   %[qc4], $t7,$t3 \n\t
+lw %[t0],  0(%[in_int])\n\t
+lw %[t1],  4(%[in_int])\n\t
+lw %[t2],  8(%[in_int])\n\t
+lw %[t3],  12(%[in_int])   \n\t
+srl%[t0],  %[t0],  31  \n\t
+srl%[t1],  %[t1],  31  \n\t
+srl%[t2],  %[t2],  31  \n\t
+srl%[t3],  %[t3],  31  \n\t
+subu   %[t4],  $zero,  %[qc1]  \n\t
+subu   %[t5],  $zero,  %[qc2]  \n\t
+subu   %[t6],  $zero,  %[qc3]  \n\t
+subu   %[t7],  $zero,  %[qc4]  \n\t
+movn   %[qc1], %[t4],  %[t0]   \n\t
+movn   %[qc2], %[t5],  %[t1]   \n\t
+movn   %[qc3], %[t6],  %[t2]   \n\t
+movn   %[qc4], %[t7],  %[t3]   \n\t
 
 .set pop   \n\t
 
 : [qc1]+r(qc1), [qc2]+r(qc2),
-  [qc3]+r(qc3), [qc4]+r(qc4)
+  [qc3]+r(qc3), [qc4]+r(qc4),
+  [t0]=r(t0), [t1]=r(t1), [t2]=r(t2), [t3]=r(t3),
+  [t4]=r(t4), [t5]=r(t5), [t6]=r(t6), [t7]=r(t7)
 : [in_int]r(in_int)
-: t0, t1, t2, t3,
-  t4, t5, t6, t7,
-  memory
+: memory
 );
 
 curidx = qc1;
@@ -295,6 +296,7 @@ static void quantize_and_encode_band_cost_UQUAD_mips(struct 
AACEncContext *s,
 int *in_int = (int *)in[i];
 uint8_t v_bits;
 unsigned int v_codes;
+int t0, t1, t2, t3, t4;
 
 qc1 = scaled[i  ] * Q34 + 0.4054f;
 qc2 = scaled[i+1] * Q34 + 0.4054f;
@@ -305,50 +307,51 @@ static void 
quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
 .set push  \n\t
 .set noreorder \n\t
 
-ori$t4,$zero,  2   \n\t
+ori%[t4],  $zero,  2   \n\t
 ori%[sign],$zero,  0   \n\t
-slt$t0,$t4,%[qc1]  \n\t
-slt$t1,$t4,%[qc2]  \n\t
-slt$t2,$t4,%[qc3]  \n\t
-slt$t3,$t4,%[qc4]  \n\t
-movn   %[qc1], $t4,$t0 \n\t
-movn   %[qc2], $t4,$t1 \n\t
-movn   %[qc3], $t4,$t2 \n\t
-movn   %[qc4], $t4,$t3 \n\t
-lw $t0,0(%[in_int])\n\t
-lw $t1,4(%[in_int])\n\t
-lw $t2,8(%[in_int])\n\t
-lw $t3,12(%[in_int])   \n\t
-slt$t0,$t0,$zero   \n\t
-movn   %[sign],$t0,%[qc1]  \n\t
-slt$t1,$t1,$zero   \n\t
-slt$t2,$t2,$zero   \n\t
-slt$t3,$t3,$zero