[FFmpeg-devel] [PATCH 2/2] Moved templated c postprocessing routines into seperate file

2015-03-12 Thread Tucker DiNapoli
Currently different versions of the postprocessing routines are
generated from a template. Ultimately I intend to remove this by
replacing the inline assembly with seperate yasm files. The c routines
will still be needed, so they need to be moved to a seperate file.
The routines were added to the file introduced by the last commit.
---
 libpostproc/postprocess.c   |   7 +-
 libpostproc/postprocess_c.c | 829 
 2 files changed, 830 insertions(+), 6 deletions(-)

diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 86c0520..2cdd988 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -198,14 +198,9 @@ static inline void prefetcht2(const void *p)
 );
 }
 #endif
-
+//Plain C versions
 #include postprocess_c.c
-
 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
-//Plain C versions
-//we always compile C for testing which needs bitexactness
-#define TEMPLATE_PP_C 1
-#include postprocess_template.c
 
 #if HAVE_ALTIVEC
 #   define TEMPLATE_PP_ALTIVEC 1
diff --git a/libpostproc/postprocess_c.c b/libpostproc/postprocess_c.c
index bf22e95..5f9cb18 100644
--- a/libpostproc/postprocess_c.c
+++ b/libpostproc/postprocess_c.c
@@ -371,3 +371,832 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, 
int step,
 STOP_TIMER(stepX)
 }*/
 }
+
+#define PAVGB(a,b) REAL_PAVGB(a,b)
+
+//FIXME? |255-0| = 1 (should not be a problem ...)
+
+/**
+ * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 
block in the middle)
+ * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
+ */
+static inline void doVertLowPass_C(uint8_t *src, int stride, PPContext *c)
+{
+const int l1= stride;
+const int l2= stride + l1;
+const int l3= stride + l2;
+const int l4= stride + l3;
+const int l5= stride + l4;
+const int l6= stride + l5;
+const int l7= stride + l6;
+const int l8= stride + l7;
+const int l9= stride + l8;
+int x;
+src+= stride*3;
+for(x=0; xBLOCK_SIZE; x++){
+const int first= FFABS(src[0] - src[l1])  c-QP ? src[0] : src[l1];
+const int last= FFABS(src[l8] - src[l9])  c-QP ? src[l9] : src[l8];
+
+int sums[10];
+sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
+sums[1] = sums[0] - first  + src[l4];
+sums[2] = sums[1] - first  + src[l5];
+sums[3] = sums[2] - first  + src[l6];
+sums[4] = sums[3] - first  + src[l7];
+sums[5] = sums[4] - src[l1] + src[l8];
+sums[6] = sums[5] - src[l2] + last;
+sums[7] = sums[6] - src[l3] + last;
+sums[8] = sums[7] - src[l4] + last;
+sums[9] = sums[8] - src[l5] + last;
+
+src[l1]= (sums[0] + sums[2] + 2*src[l1])4;
+src[l2]= (sums[1] + sums[3] + 2*src[l2])4;
+src[l3]= (sums[2] + sums[4] + 2*src[l3])4;
+src[l4]= (sums[3] + sums[5] + 2*src[l4])4;
+src[l5]= (sums[4] + sums[6] + 2*src[l5])4;
+src[l6]= (sums[5] + sums[7] + 2*src[l6])4;
+src[l7]= (sums[6] + sums[8] + 2*src[l7])4;
+src[l8]= (sums[7] + sums[9] + 2*src[l8])4;
+
+src++;
+}
+}
+
+/**
+ * Experimental Filter 1
+ * will not damage linear gradients
+ * Flat blocks should look like they were passed through the 
(1,1,2,2,4,2,2,1,1) 9-Tap filter
+ * can only smooth blocks at the expected locations (it cannot smooth them if 
they did move)
+ * MMX2 version does correct clipping C version does not
+ */
+static inline void vertX1Filter_C(uint8_t *src, int stride, PPContext *co)
+{
+
+const int l1= stride;
+const int l2= stride + l1;
+const int l3= stride + l2;
+const int l4= stride + l3;
+const int l5= stride + l4;
+const int l6= stride + l5;
+const int l7= stride + l6;
+//const int l8= stride + l7;
+//const int l9= stride + l8;
+int x;
+
+src+= stride*3;
+for(x=0; xBLOCK_SIZE; x++){
+int a= src[l3] - src[l4];
+int b= src[l4] - src[l5];
+int c= src[l5] - src[l6];
+
+int d= FFABS(b) - ((FFABS(a) + FFABS(c))1);
+d= FFMAX(d, 0);
+
+if(d  co-QP*2){
+int v = d * FFSIGN(-b);
+
+src[l2] +=v3;
+src[l3] +=v2;
+src[l4] +=(3*v)3;
+src[l5] -=(3*v)3;
+src[l6] -=v2;
+src[l7] -=v3;
+}
+src++;
+}
+}
+
+static inline void doVertDefFilter_C(uint8_t src[], int stride, PPContext *c)
+{
+const int l1= stride;
+const int l2= stride + l1;
+const int l3= stride + l2;
+const int l4= stride + l3;
+const int l5= stride + l4;
+const int l6= stride + l5;
+const int l7= stride + l6;
+const int l8= stride + l7;
+//const int l9= stride + l8;
+int x;
+src+= stride*3;
+for(x=0; xBLOCK_SIZE; x++){
+const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
+if(FFABS(middleEnergy)  8*c-QP){
+const int q=(src[l4] - src[l5])/2;
+const int leftEnergy= 

Re: [FFmpeg-devel] [PATCH 2/2] Moved templated c postprocessing routines into seperate file

2015-03-11 Thread Michael Niedermayer
On Wed, Mar 11, 2015 at 01:51:13PM -0400, Tucker DiNapoli wrote:
 Currently different versions of the postprocessing routines are
 generated from a template. Ultimately I intend to remove this by
 replacing the inline assembly with seperate yasm files. The c routines
 will still be needed, so they need to be moved to a seperate file.
 The routines were added to the file introduced by the last commit.
 ---
  libpostproc/postprocess.c   |   7 +-
  libpostproc/postprocess_c.c | 829 
 
  2 files changed, 830 insertions(+), 6 deletions(-)

code should not be duplicated

for each C function you add to postprocess_c.c the corresponding
C code should be removed from  the template (unless its needed for
tha asm to function)

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No human being will ever know the Truth, for even if they happen to say it
by chance, they would not even known they had done so. -- Xenophanes


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel