On Mon, Jul 27, 2009 at 3:52 AM, Michael Niedermayer<[email protected]> wrote:
> On Mon, Jul 27, 2009 at 03:27:58AM -0300, Ramiro Polla wrote:
>> On Fri, Jul 24, 2009 at 10:05 AM, Michael Niedermayer<[email protected]> 
>> wrote:
>> > On Thu, Jul 23, 2009 at 10:56:34PM -0300, Ramiro Polla wrote:
>> >> Attached are some patches for the fast bilinear scaler, while I wait
>> >> on Michael's answer on the previous message regarding fast_bilinear.
>> >>
>> >> Hmm, should these patches go to -devel or -soc?
>> >>
>> >> Ramiro Polla
>> >
>> >>  swscale.c |   61 
>> >> +++++++++++++++++++++++++++++--------------------------------
>> >>  1 file changed, 29 insertions(+), 32 deletions(-)
>> >> d369af493c0c7e0f5d040ed71c1b232cf30a31bc  
>> >> 0001-Avoid-duplication-in-initMMX2HScaler.patch
>> >> From 05c4bf02481265155b7d25438e9b32ceefcd578e Mon Sep 17 00:00:00 2001
>> >> From: Ramiro Polla <ram...@macbuntu.(none)>
>> >> Date: Thu, 23 Jul 2009 21:56:57 -0300
>> >> Subject: [PATCH] Avoid duplication in initMMX2HScaler()
>> >
>> > might be ok but it isnt particularely readable so iam not sure ...
>>
>> What's not readable? The resulting code or the patch? There's
>
> the patch
>
>
>> currently 1 patch for factorization, then a cleanup which indents and
>> changes some {}s, and the last patch (which you didn't comment) is
>> unrelated. Should I try adding intermediate steps to make the
>> factorization more clear?
>
> if you can easily make it more readable please do

Attached patches should make it much more readable, and have no
unrelated cosmetics. The resulting code looks nicer too IMHO.
From dfa3eb729b2d2c4ed392d044854a8d28fa9ee542 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Mon, 27 Jul 2009 04:23:03 -0300
Subject: [PATCH] MMX2 scaler: add variable to ease factorization of initMMX2Scaler().

---
 swscale.c |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/swscale.c b/swscale.c
index 4d3b893..a8ec3ae 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1890,6 +1890,7 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
             int b=((xpos+xInc)>>16) - xx;
             int c=((xpos+xInc*2)>>16) - xx;
             int d=((xpos+xInc*3)>>16) - xx;
+            int inc                = (d+1<4);
 
             filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
             filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
@@ -1899,17 +1900,17 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
 
             if (d+1<4)
             {
-                int maxShift= 3-(d+1);
+                int maxShift= 3-(d+inc);
                 int shift=0;
 
                 memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
 
                 funnyCode[fragmentPos + imm8OfPShufW1B]=
-                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
+                    (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
                 funnyCode[fragmentPos + imm8OfPShufW2B]=
                     a | (b<<2) | (c<<4) | (d<<6);
 
-                if (i+3>=dstW) shift=maxShift; //avoid overread
+                if (i+4-inc>=dstW) shift=maxShift; //avoid overread
                 else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
 
                 if (shift && i>=shift)
-- 
1.6.0.4

From e1d6299981e09414e3820a57e57fafc0f48bb93c Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Mon, 27 Jul 2009 04:24:42 -0300
Subject: [PATCH] MMX2 scaler: factorize initMMX2Scaler().

---
 swscale.c |   40 ++++++++++------------------------------
 1 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/swscale.c b/swscale.c
index a8ec3ae..e12ad4b 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1891,6 +1891,10 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
             int c=((xpos+xInc*2)>>16) - xx;
             int d=((xpos+xInc*3)>>16) - xx;
             int inc                = (d+1<4);
+            uint8_t *fragment      = (d+1<4) ? fragmentB       : fragmentA;
+            x86_reg imm8OfPShufW1  = (d+1<4) ? imm8OfPShufW1B  : imm8OfPShufW1A;
+            x86_reg imm8OfPShufW2  = (d+1<4) ? imm8OfPShufW2B  : imm8OfPShufW2A;
+            x86_reg fragmentLength = (d+1<4) ? fragmentLengthB : fragmentLengthA;
 
             filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
             filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
@@ -1898,16 +1902,15 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
             filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
             filterPos[i/2]= xx;
 
-            if (d+1<4)
             {
                 int maxShift= 3-(d+inc);
                 int shift=0;
 
-                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
+                memcpy(funnyCode + fragmentPos, fragment, fragmentLength);
 
-                funnyCode[fragmentPos + imm8OfPShufW1B]=
+                funnyCode[fragmentPos + imm8OfPShufW1]=
                     (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
-                funnyCode[fragmentPos + imm8OfPShufW2B]=
+                funnyCode[fragmentPos + imm8OfPShufW2]=
                     a | (b<<2) | (c<<4) | (d<<6);
 
                 if (i+4-inc>=dstW) shift=maxShift; //avoid overread
@@ -1915,35 +1918,12 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
 
                 if (shift && i>=shift)
                 {
-                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
-                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
+                    funnyCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
+                    funnyCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
                     filterPos[i/2]-=shift;
                 }
 
-                fragmentPos+= fragmentLengthB;
-            }
-            else
-            {
-                int maxShift= 3-d;
-                int shift=0;
-
-                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
-
-                funnyCode[fragmentPos + imm8OfPShufW1A]=
-                funnyCode[fragmentPos + imm8OfPShufW2A]=
-                    a | (b<<2) | (c<<4) | (d<<6);
-
-                if (i+4>=dstW) shift=maxShift; //avoid overread
-                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
-
-                if (shift && i>=shift)
-                {
-                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
-                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
-                    filterPos[i/2]-=shift;
-                }
-
-                fragmentPos+= fragmentLengthA;
+                fragmentPos+= fragmentLength;
             }
 
             funnyCode[fragmentPos]= RET;
-- 
1.6.0.4

From a68dee5e39cf3175aa814192b6ed5719135f04ea Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Mon, 27 Jul 2009 04:26:50 -0300
Subject: [PATCH] Cosmetics.

---
 swscale.c |   35 ++++++++++++++++-------------------
 1 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/swscale.c b/swscale.c
index e12ad4b..4adc687 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1895,6 +1895,8 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
             x86_reg imm8OfPShufW1  = (d+1<4) ? imm8OfPShufW1B  : imm8OfPShufW1A;
             x86_reg imm8OfPShufW2  = (d+1<4) ? imm8OfPShufW2B  : imm8OfPShufW2A;
             x86_reg fragmentLength = (d+1<4) ? fragmentLengthB : fragmentLengthA;
+            int maxShift= 3-(d+inc);
+            int shift=0;
 
             filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
             filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
@@ -1902,30 +1904,25 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
             filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
             filterPos[i/2]= xx;
 
-            {
-                int maxShift= 3-(d+inc);
-                int shift=0;
-
-                memcpy(funnyCode + fragmentPos, fragment, fragmentLength);
+            memcpy(funnyCode + fragmentPos, fragment, fragmentLength);
 
-                funnyCode[fragmentPos + imm8OfPShufW1]=
-                    (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
-                funnyCode[fragmentPos + imm8OfPShufW2]=
-                    a | (b<<2) | (c<<4) | (d<<6);
+            funnyCode[fragmentPos + imm8OfPShufW1]=
+                (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
+            funnyCode[fragmentPos + imm8OfPShufW2]=
+                a | (b<<2) | (c<<4) | (d<<6);
 
-                if (i+4-inc>=dstW) shift=maxShift; //avoid overread
-                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
+            if (i+4-inc>=dstW) shift=maxShift; //avoid overread
+            else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
 
-                if (shift && i>=shift)
-                {
-                    funnyCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
-                    funnyCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
-                    filterPos[i/2]-=shift;
-                }
-
-                fragmentPos+= fragmentLength;
+            if (shift && i>=shift)
+            {
+                funnyCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
+                funnyCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
+                filterPos[i/2]-=shift;
             }
 
+            fragmentPos+= fragmentLength;
+
             funnyCode[fragmentPos]= RET;
         }
         xpos+=xInc;
-- 
1.6.0.4

From f90f39da7826d7e0a94e365a7d93c9046d7b82c5 Mon Sep 17 00:00:00 2001
From: Ramiro Polla <[email protected]>
Date: Mon, 27 Jul 2009 04:32:31 -0300
Subject: [PATCH] MMX2 scaler: Determine "funnyCode" size at runtime.

---
 swscale.c          |   49 ++++++++++++++++++++++++++++++-------------------
 swscale_internal.h |    2 ++
 2 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/swscale.c b/swscale.c
index 4adc687..f7c6457 100644
--- a/swscale.c
+++ b/swscale.c
@@ -1781,7 +1781,7 @@ error:
 }
 
 #ifdef COMPILE_MMX2
-static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
+static void initMMX2HScaler(int dstW, int xInc, uint8_t **funnyCodePtr, int *funnyCodeSizePtr, int16_t *filter, int32_t *filterPos, int numSplits)
 {
     uint8_t *fragmentA;
     x86_reg imm8OfPShufW1A;
@@ -1791,6 +1791,8 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
     x86_reg imm8OfPShufW1B;
     x86_reg imm8OfPShufW2B;
     x86_reg fragmentLengthB;
+    int funnyCodeSize = 1;
+    uint8_t *funnyCode;
     int fragmentPos;
 
     int xpos, i;
@@ -1880,6 +1882,27 @@ static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *fil
     xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
     fragmentPos=0;
 
+    /* Determine code size. */
+    for (i=0; i<dstW/numSplits; i+=4) {
+        if (((xpos+xInc*3)>>16) - (xpos>>16) < 3)
+            funnyCodeSize += fragmentLengthB;
+        else
+            funnyCodeSize += fragmentLengthA;
+        xpos+=xInc*4;
+    }
+    *funnyCodeSizePtr = funnyCodeSize;
+
+#ifdef MAP_ANONYMOUS
+    *funnyCodePtr = mmap(NULL, funnyCodeSize, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+#elif HAVE_VIRTUALALLOC
+    *funnyCodePtr = VirtualAlloc(NULL, funnyCodeSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+#else
+    *funnyCodePtr = av_malloc(funnyCodeSize);
+#endif
+    funnyCode = *funnyCodePtr;
+
+    xpos= 0;
+
     for (i=0; i<dstW/numSplits; i++)
     {
         int xx=xpos>>16;
@@ -2834,29 +2857,17 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int d
                    (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
                    srcFilter->chrH, dstFilter->chrH, c->param);
 
-#define MAX_FUNNY_CODE_SIZE 10000
 #if defined(COMPILE_MMX2)
 // can't downscale !!!
         if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
         {
-#ifdef MAP_ANONYMOUS
-            c->funnyYCode  = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-            c->funnyUVCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
-#elif HAVE_VIRTUALALLOC
-            c->funnyYCode  = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
-            c->funnyUVCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
-#else
-            c->funnyYCode  = av_malloc(MAX_FUNNY_CODE_SIZE);
-            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
-#endif
-
             c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
             c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
             c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
             c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
 
-            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
-            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
+            initMMX2HScaler(      dstW, c->lumXInc, &c->funnyYCode , &c->funnyYCodeSize , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
+            initMMX2HScaler(c->chrDstW, c->chrXInc, &c->funnyUVCode, &c->funnyUVCodeSize, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
         }
 #endif /* defined(COMPILE_MMX2) */
     } // initialize horizontal stuff
@@ -3525,11 +3536,11 @@ void sws_freeContext(SwsContext *c){
 
 #if ARCH_X86 && CONFIG_GPL
 #ifdef MAP_ANONYMOUS
-    if (c->funnyYCode ) munmap(c->funnyYCode , MAX_FUNNY_CODE_SIZE);
-    if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
+    if (c->funnyYCode ) munmap(c->funnyYCode , c->funnyYCodeSize);
+    if (c->funnyUVCode) munmap(c->funnyUVCode, c->funnyUVCodeSize);
 #elif HAVE_VIRTUALALLOC
-    if (c->funnyYCode ) VirtualFree(c->funnyYCode , MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
-    if (c->funnyUVCode) VirtualFree(c->funnyUVCode, MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
+    if (c->funnyYCode ) VirtualFree(c->funnyYCode , c->funnyYCodeSize , MEM_RELEASE);
+    if (c->funnyUVCode) VirtualFree(c->funnyUVCode, c->funnyUVCodeSize, MEM_RELEASE);
 #else
     av_free(c->funnyYCode );
     av_free(c->funnyUVCode);
diff --git a/swscale_internal.h b/swscale_internal.h
index 85e07c0..6430d0d 100644
--- a/swscale_internal.h
+++ b/swscale_internal.h
@@ -113,6 +113,8 @@ typedef struct SwsContext{
 
     uint8_t *funnyYCode;
     uint8_t *funnyUVCode;
+    int funnyYCodeSize;
+    int funnyUVCodeSize;
     int32_t *lumMmx2FilterPos;
     int32_t *chrMmx2FilterPos;
     int16_t *lumMmx2Filter;
-- 
1.6.0.4

_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

Reply via email to