From 7efb0fae8ed52b6f841d70c4d8981399da42e7bd Mon Sep 17 00:00:00 2001
From: Pedro Arthur <bygrandao@gmail.com>
Date: Sun, 24 May 2015 12:52:46 -0300
Subject: [PATCH] swscale refactor: added initial filters

Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
---
 libswscale/slice.c            | 299 ++++++++++++++++++++++++++++++++++++++++++
 libswscale/swscale.c          |  33 ++++-
 libswscale/swscale_internal.h |  41 ++++++
 libswscale/utils.c            |   3 +
 4 files changed, 374 insertions(+), 2 deletions(-)
 create mode 100644 libswscale/slice.c

diff --git a/libswscale/slice.c b/libswscale/slice.c
new file mode 100644
index 0000000..4f40ae6
--- /dev/null
+++ b/libswscale/slice.c
@@ -0,0 +1,299 @@
+#include "swscale_internal.h"
+
+/*
+int alloc_slice(SwsSlice * s, enum AVPixelFormat fmt, int lines, int v_sub_sample, int h_sub_sample);
+void free_slice(SwsSlice *s);
+int init_slice_1(SwsSlice *s, uint8_t *v, uint8_t *v2, int dstW, int sliceY, int sliceH);
+int lum_h_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH);
+int lum_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH);
+*/
+
+
+int ff_init_slice_from_src(SwsSlice * s, uint8_t *src[4], int stride[4], int srcW, int sliceY, int sliceH, int skip);
+int ff_init_slice_from_lp(SwsSlice *s, uint8_t ***linesPool, int dstW, int sliceY, int sliceH);
+
+/*
+int ff_init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal);
+int ff_init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc);
+*/
+
+int ff_init_filters(SwsContext *c);
+int ff_free_filters(SwsContext *c);
+
+
+static int alloc_slice(SwsSlice * s, enum AVPixelFormat fmt, int lines, int v_sub_sample, int h_sub_sample)
+{
+    int i;
+    int err = 0;
+
+    int size[4] = { lines,
+                    FF_CEIL_RSHIFT(lines, v_sub_sample),
+                    FF_CEIL_RSHIFT(lines, v_sub_sample),
+                    lines };
+
+    //s->width;
+    s->h_chr_sub_sample = h_sub_sample;
+    s->v_chr_sub_sample = v_sub_sample;
+    s->fmt = fmt;
+
+    for (i = 0; i < 4; ++i)
+    {
+        s->plane[i].line = av_malloc(sizeof(uint8_t*) * size[i]);
+        if (!s->plane[i].line) 
+        {
+            err = AVERROR(ENOMEM);
+            break;
+        }
+        s->plane[i].available_lines = size[i];
+        s->plane[i].sliceY = 0;
+        s->plane[i].sliceH = 0;
+    }
+
+    if (err)
+    {
+        for (--i; i >= 0; --i)
+            av_free(s->plane[i].line);
+        return err;
+    }
+    return 1;
+}
+
+static void free_slice(SwsSlice *s)
+{
+    int i;
+    for (i = 0; i < 4; ++i)
+        av_free(s->plane[i].line);
+}
+
+int ff_init_slice_from_src(SwsSlice * s, uint8_t *src[4], int stride[4], int srcW, int sliceY, int sliceH, int skip)
+{
+    int i = 0;
+
+    int start[4] = {sliceY,
+                    sliceY >> s->v_chr_sub_sample,
+                    sliceY >> s->v_chr_sub_sample,
+                    sliceY};
+
+    int stride1[4] = {stride[0],
+                    stride[1] << skip,
+                    stride[2] << skip,
+                    stride[3]};
+
+    s->width = srcW;
+
+    for (i = 0; i < 4; ++i)
+    {
+        int j;
+        int lines = FF_CEIL_RSHIFT(sliceH, s->v_chr_sub_sample);
+        lines = s->plane[i].available_lines < lines ? s->plane[i].available_lines : lines;
+
+        s->plane[i].sliceY = sliceY;
+        s->plane[i].sliceH = lines;
+
+        for (j = 0; j < lines; j+= 1 << skip)
+            s->plane[i].line[j] = src[i] + (start[i] + j) * stride1[i];
+
+    }
+
+    return 1;
+}
+
+int ff_init_slice_from_lp(SwsSlice *s, uint8_t ***linesPool, int dstW, int sliceY, int sliceH)
+{
+    int i;
+    s->width = dstW;
+    for (i = 0; i < 4; ++i)
+    {
+        int j;
+        int lines = FF_CEIL_RSHIFT(sliceH, s->v_chr_sub_sample);
+        lines = s->plane[i].available_lines < lines ? s->plane[i].available_lines : lines;
+
+        s->plane[i].sliceY = sliceY;
+        s->plane[i].sliceH = lines;
+
+        for (j = 0; j < lines; ++j)
+        {
+            uint8_t * v = linesPool[i] ? linesPool[i][j] : NULL;
+            s->plane[i].line[j] = v;
+        }
+
+    }
+    return 1;
+}
+
+static int init_slice_1(SwsSlice *s, uint8_t *v, uint8_t *v2, int dstW, int sliceY, int sliceH)
+{
+    int i;
+    uint8_t *ptr[4] = {v, v, v, v2};
+    s->width = dstW;
+    for (i = 0; i < 4; ++i)
+    {
+        int j;
+        int lines = s->plane[i].available_lines;
+
+        s->plane[i].sliceY = sliceY;
+        s->plane[i].sliceH = lines;
+
+        for (j = 0; j < lines; ++j)
+            s->plane[i].line[j] = ptr[i];
+
+    }
+    return 1;
+}
+
+
+static int lum_h_scale(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    int srcW = desc->src->width;
+    int dstW = desc->dst->width;
+    int xInc = desc->xInc;
+
+    uint8_t ** src = desc->src->plane[0].line;
+    uint8_t ** dst = desc->dst->plane[0].line;
+
+    int src_pos = sliceY - desc->src->plane[0].sliceY;
+    int dst_pos = sliceY - desc->dst->plane[0].sliceY;
+
+
+
+    if (!c->hyscale_fast) {
+        c->hyScale(c, (int16_t*)dst[dst_pos], dstW, (const uint8_t *)src[src_pos], desc->filter,
+                   desc->filter_pos, desc->filter_size);
+    } else { // fast bilinear upscale / crap downscale
+        c->hyscale_fast(c, (int16_t*)dst[dst_pos], dstW, src[src_pos], srcW, xInc);
+    }
+
+    if (c->lumConvertRange)
+        c->lumConvertRange((int16_t*)dst[dst_pos], dstW);
+
+
+    if (desc->alpha)
+    {
+        src = desc->src->plane[3].line;
+        dst = desc->dst->plane[3].line;
+
+        src_pos = sliceY - desc->src->plane[3].sliceY;
+        dst_pos = sliceY - desc->dst->plane[3].sliceY;
+
+
+
+        if (!c->hyscale_fast) {
+            c->hyScale(c, (int16_t*)dst[dst_pos], dstW, (const uint8_t *)src[src_pos], desc->filter,
+                        desc->filter_pos, desc->filter_size);
+        } else { // fast bilinear upscale / crap downscale
+            c->hyscale_fast(c, (int16_t*)dst[dst_pos], dstW, src[src_pos], srcW, xInc);
+        }
+    }
+
+
+
+    return 1;
+}
+
+static int lum_convert(SwsContext *c, SwsFilterDescriptor *desc, int sliceY, int sliceH)
+{
+    int srcW = desc->src->width;
+    uint32_t * pal = desc->pal;
+
+    int sp = sliceY - desc->src->plane[0].sliceY;
+    int dp = sliceY - desc->dst->plane[0].sliceY;
+
+    const uint8_t * src[4] = { desc->src->plane[0].line[sp],
+                        desc->src->plane[1].line[sp],
+                        desc->src->plane[2].line[sp],
+                        desc->src->plane[3].line[sp]};
+    uint8_t * dst = desc->dst->plane[0].line[0/*dp*/];
+
+    desc->dst->plane[0].sliceY = sliceY;
+    desc->dst->plane[0].sliceH = sliceH;
+    desc->dst->plane[3].sliceY = sliceY;
+    desc->dst->plane[3].sliceH = sliceH;
+
+    if (c->lumToYV12) {
+        c->lumToYV12(dst, src[0], src[1], src[2], srcW, pal);
+    } else if (c->readLumPlanar) {
+        c->readLumPlanar(dst, src, srcW, c->input_rgb2yuv_table);
+    } 
+    
+    
+    if (desc->alpha)
+    {
+        dp = sliceY - desc->dst->plane[3].sliceY;
+        dst = desc->dst->plane[3].line[dp];
+        if (c->alpToYV12) {
+            c->alpToYV12(dst, src[3], src[1], src[2], srcW, pal);
+        } else if (c->readAlpPlanar) {
+            c->readAlpPlanar(dst, src, srcW, NULL);
+        }
+    }
+
+    return 1;
+}
+
+static int init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal)
+{
+    desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt);
+    desc->pal = pal;
+    desc->src =src;
+    desc->dst = dst;
+    desc->process = &lum_convert;
+
+    return 1;
+}
+
+
+static int init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc)
+{
+    desc->alpha = isALPHA(src->fmt) && isALPHA(dst->fmt);
+    desc->filter = filter;
+    desc->filter_pos = filter_pos;
+    desc->filter_size = filter_size;
+
+    desc->src = src;
+    desc->dst = dst;
+
+    desc->xInc = xInc;
+    desc->process = &lum_h_scale;
+
+    return 1;
+}
+
+int ff_init_filters(SwsContext * c)
+{
+    int i;
+    int need_convert = c->lumToYV12 || c->readLumPlanar || c->alpToYV12 || c->readAlpPlanar;
+
+    c->numDesc = need_convert ? 2 : 1;
+    c->desc = av_malloc(sizeof(SwsFilterDescriptor) * c->numDesc);
+    c->slice = av_malloc(sizeof(SwsSlice) * (c->numDesc+1));
+
+    for (i = 0; i < c->numDesc+1; ++i)
+        alloc_slice(&c->slice[i], c->srcFormat, c->vLumFilterSize, 0, 0);
+
+    i = 0;
+    if (need_convert)
+    {
+        init_desc_fmt_convert(&c->desc[i], &c->slice[i], &c->slice[i+1], (uint32_t) usePal(c->srcFormat) ? c->pal_yuv : c->input_rgb2yuv_table);
+        init_slice_1(&c->slice[i+1], c->formatConvBuffer, (c->formatConvBuffer + FFALIGN(c->srcW*2+78, 16)), c->srcW, 0, c->vLumFilterSize);
+        c->desc[i].alpha = c->alpPixBuf != 0;
+        ++i;
+    }
+
+    
+    init_desc_hscale(&c->desc[i], &c->slice[i], &c->slice[i+1], c->hLumFilter, c->hLumFilterPos, c->hLumFilterSize, c->lumXInc);
+    c->desc[i].alpha = c->alpPixBuf != 0;
+
+    return 1;
+}
+
+int ff_free_filters(SwsContext *c)
+{
+    av_freep(&c->desc);
+    if (c->slice)
+    {
+        int i;
+        for (i = 0; i < c->numDesc+1; ++i)
+            free_slice(&c->slice[i]);
+    }
+    return 1;
+}
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 5312016..eacc2d1 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -315,6 +315,9 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
     if (DEBUG_SWSCALE_BUFFERS)                  \
         av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
 
+
+#include "slice.c"
+
 static int swscale(SwsContext *c, const uint8_t *src[],
                    int srcStride[], int srcSliceY,
                    int srcSliceH, uint8_t *dst[], int dstStride[])
@@ -371,6 +374,12 @@ static int swscale(SwsContext *c, const uint8_t *src[],
     int lastInChrBuf = c->lastInChrBuf;
     int perform_gamma = c->is_internal_gamma;
 
+    int numDesc = c->numDesc;
+    SwsSlice *src_slice = &c->slice[0];
+    SwsSlice *dst_slice = &c->slice[numDesc];
+    SwsFilterDescriptor *desc = c->desc;
+    int16_t **line_pool[4];
+
 
     if (!usePal(c->srcFormat)) {
         pal = c->input_rgb2yuv_table;
@@ -439,6 +448,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
     }
     lastDstY = dstY;
 
+
     for (; dstY < dstH; dstY++) {
         const int chrDstY = dstY >> c->chrDstVSubSample;
         uint8_t *dest[4]  = {
@@ -486,6 +496,19 @@ static int swscale(SwsContext *c, const uint8_t *src[],
                           lastLumSrcY, lastChrSrcY);
         }
 
+#define NEW_FILTER 1
+        
+
+#if NEW_FILTER
+        line_pool[0] = &lumPixBuf[lumBufIndex + 1];
+        line_pool[1] = &chrUPixBuf[chrBufIndex + 1];
+        line_pool[2] = &chrVPixBuf[chrBufIndex + 1];
+        line_pool[3] = alpPixBuf ? &alpPixBuf[lumBufIndex + 1] : NULL;
+
+        ff_init_slice_from_src(src_slice, (uint8_t**)src, srcStride, c->srcW, lastInLumBuf + 1, lastLumSrcY - lastInLumBuf, 0);
+        ff_init_slice_from_lp(dst_slice, (uint8_t ***)line_pool, dstW, lastInLumBuf + 1, lastLumSrcY - lastInLumBuf);
+
+#endif
         // Do horizontal scaling
         while (lastInLumBuf < lastLumSrcY) {
             const uint8_t *src1[4] = {
@@ -494,6 +517,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
                 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
                 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
             };
+            int i;
             lumBufIndex++;
             av_assert0(lumBufIndex < 2 * vLumBufSize);
             av_assert0(lastInLumBuf + 1 - srcSliceY < srcSliceH);
@@ -501,7 +525,10 @@ static int swscale(SwsContext *c, const uint8_t *src[],
 
             if (perform_gamma)
                 gamma_convert((uint8_t **)src1, srcW, c->inv_gamma);
-
+#if NEW_FILTER
+            for (i = 0; i < numDesc; ++i)
+                desc[i].process(c, &desc[i], lastInLumBuf + 1, 1);
+#else
             hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
                     hLumFilter, hLumFilterPos, hLumFilterSize,
                     formatConvBuffer, pal, 0);
@@ -509,6 +536,7 @@ static int swscale(SwsContext *c, const uint8_t *src[],
                 hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
                         lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
                         formatConvBuffer, pal, 1);
+#endif
             lastInLumBuf++;
             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
                           lumBufIndex, lastInLumBuf);
@@ -763,6 +791,8 @@ SwsFunc ff_getSwsFunc(SwsContext *c)
     if (ARCH_X86)
         ff_sws_init_swscale_x86(c);
 
+    ff_init_filters(c);
+
     return swscale;
 }
 
@@ -1149,4 +1179,3 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
     av_free(rgb0_tmp);
     return ret;
 }
-
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 2299aa5..8a3a1a3 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -269,6 +269,9 @@ typedef void (*yuv2anyX_fn)(struct SwsContext *c, const int16_t *lumFilter,
                             const int16_t **alpSrc, uint8_t **dest,
                             int dstW, int y);
 
+struct SwsSlice;
+struct SwsFilterDescriptor;
+
 /* This struct should be aligned on at least a 32-byte boundary. */
 typedef struct SwsContext {
     /**
@@ -319,6 +322,10 @@ typedef struct SwsContext {
     uint16_t *gamma;
     uint16_t *inv_gamma;
 
+    int numDesc;
+    struct SwsSlice *slice;
+    struct SwsFilterDescriptor *desc;
+
     uint32_t pal_yuv[256];
     uint32_t pal_rgb[256];
 
@@ -908,4 +915,38 @@ static inline void fillPlane16(uint8_t *plane, int stride, int width, int height
     }
 }
 
+
+typedef struct SwsPlane
+{
+    int available_lines;
+    int sliceY;
+    int sliceH;
+    uint8_t **line;
+} SwsPlane;
+
+typedef struct SwsSlice 
+{
+    int width;
+    int h_chr_sub_sample;
+    int v_chr_sub_sample;
+    enum AVPixelFormat fmt;
+    SwsPlane plane[4];
+} SwsSlice;
+
+typedef struct SwsFilterDescriptor
+{
+    SwsSlice * src;
+    SwsSlice * dst;
+
+    uint16_t * filter;
+    int * filter_pos;
+    int filter_size;
+
+    int alpha;
+    int xInc;
+    uint32_t * pal;
+
+   int (*process)(SwsContext*, struct SwsFilterDescriptor*, int, int);
+} SwsFilterDescriptor;
+
 #endif /* SWSCALE_SWSCALE_INTERNAL_H */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index f9f4ec6..2472656 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1981,6 +1981,8 @@ void sws_freeFilter(SwsFilter *filter)
     av_free(filter);
 }
 
+extern int ff_free_filters(SwsContext *s);
+
 void sws_freeContext(SwsContext *c)
 {
     int i;
@@ -2055,6 +2057,7 @@ void sws_freeContext(SwsContext *c)
     av_freep(&c->gamma);
     av_freep(&c->inv_gamma);
 
+    ff_free_filters(c);
 
     av_free(c);
 }
-- 
1.9.1

