>
>
> Keep in mind that the difference may be due to the QP information
> missing in mp=pp7. So you should focus on that (try to reset/disable
> qp processing in pp7 and see if it works the same as mp=pp7).
>

For qp=0, the results are bitexact. I am not able to figure out how do I
take care
of the non zero qp case. In the original code, the value is directly
accessed by
the frame (mpi->qscale).

I tried using memset, but it is giving some memory errors. In memset, we use
the first argument as the pointer to the block and the last as the size of
the block,
right?

For documentation, can you provide me with a standard image? Because, the
one I am using is not giving results which are presentable. There is not
much
difference if I vary the parameters (In fact the filters are also giving
the same result).


> --
> FFmpeg = Foolish and Frenzy Mystic Puritan Elitist Geisha
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
From 2676f37a204b48bdbb2e24359b6a85598a8c021b Mon Sep 17 00:00:00 2001
From: Arwa Arif <arwaarif1...@gmail.com>
Date: Thu, 25 Dec 2014 09:50:24 +0530
Subject: [PATCH] lavfi: port mp=pp7 to avfilter

TODO: modify version.h, Add ff_norm_qscale in internal.h
---
 LICENSE.md               |    1 +
 configure                |    1 +
 doc/filters.texi         |   31 ++++
 libavfilter/Makefile     |    1 +
 libavfilter/allfilters.c |    1 +
 libavfilter/vf_pp7.c     |  419 ++++++++++++++++++++++++++++++++++++++++++++++
 libavfilter/vf_pp7.h     |   46 +++++
 libavfilter/x86/Makefile |    1 +
 libavfilter/x86/vf_pp7.c |   68 ++++++++
 9 files changed, 569 insertions(+)
 create mode 100644 libavfilter/vf_pp7.c
 create mode 100644 libavfilter/vf_pp7.h
 create mode 100644 libavfilter/x86/vf_pp7.c

diff --git a/LICENSE.md b/LICENSE.md
index 5659973..e612c22 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -43,6 +43,7 @@ Specifically, the GPL parts of FFmpeg are:
     - vf_perspective.c
     - vf_phase.c
     - vf_pp.c
+    - vf_pp7.c
     - vf_pullup.c
     - vf_sab.c
     - vf_smartblur.c
diff --git a/configure b/configure
index 9085200..c73562b 100755
--- a/configure
+++ b/configure
@@ -2597,6 +2597,7 @@ mpdecimate_filter_select="pixelutils"
 mptestsrc_filter_deps="gpl"
 negate_filter_deps="lut_filter"
 perspective_filter_deps="gpl"
+pp7_filter_deps="gpl"
 ocv_filter_deps="libopencv"
 owdenoise_filter_deps="gpl"
 pan_filter_deps="swresample"
diff --git a/doc/filters.texi b/doc/filters.texi
index aa8bb61..428785e 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -7171,6 +7171,37 @@ pp=hb|y/vb|a
 @end example
 @end itemize
 
+@section pp7
+Apply Postprocessing filter 7. It is variant of the @ref{spp} filter,
+similar to spp = 6 with 7 point DCT, where only the center sample is
+used after IDCT.
+
+The filter accepts the following options:
+
+@table @option
+@item qp
+Force a constant quantization parameter. It accepts an integer in range
+0 to 63. If not set, the filter will use the QP from the video stream
+(if available).
+
+@item mode
+Set thresholding mode. Available modes are:
+
+@table @samp
+@item hard
+Set hard thresholding.
+@item soft
+Set soft thresholding (better de-ringing effect, but likely blurrier).
+@item medium
+Set medium thresholding (good results, default).
+@end table
+
+@item use_bframe_qp
+Enable the use of the QP from the B-Frames if set to @code{1}. Using this
+option may cause flicker since the B-Frames have often larger QP. Default is
+@code{0} (not enabled).
+@end table
+
 @section psnr
 
 Obtain the average, maximum and minimum PSNR (Peak Signal to Noise
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index cdcbe0a..8fcb270 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -164,6 +164,7 @@ OBJS-$(CONFIG_PERSPECTIVE_FILTER)            += vf_perspective.o
 OBJS-$(CONFIG_PHASE_FILTER)                  += vf_phase.o
 OBJS-$(CONFIG_PIXDESCTEST_FILTER)            += vf_pixdesctest.o
 OBJS-$(CONFIG_PP_FILTER)                     += vf_pp.o
+OBJS-$(CONFIG_PP7_FILTER)                    += vf_pp7.o
 OBJS-$(CONFIG_PSNR_FILTER)                   += vf_psnr.o dualinput.o framesync.o
 OBJS-$(CONFIG_PULLUP_FILTER)                 += vf_pullup.o
 OBJS-$(CONFIG_REMOVELOGO_FILTER)             += bbox.o lswsutils.o lavfutils.o vf_removelogo.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 2911ed9..381da4f 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -179,6 +179,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER(PHASE,          phase,          vf);
     REGISTER_FILTER(PIXDESCTEST,    pixdesctest,    vf);
     REGISTER_FILTER(PP,             pp,             vf);
+    REGISTER_FILTER(PP7,            pp7,            vf);
     REGISTER_FILTER(PSNR,           psnr,           vf);
     REGISTER_FILTER(PULLUP,         pullup,         vf);
     REGISTER_FILTER(REMOVELOGO,     removelogo,     vf);
diff --git a/libavfilter/vf_pp7.c b/libavfilter/vf_pp7.c
new file mode 100644
index 0000000..5349ffc
--- /dev/null
+++ b/libavfilter/vf_pp7.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright (c) 2005 Michael Niedermayer <michae...@gmx.at>
+ * Copyright (c) 2014 Arwa Arif <arwaarif1...@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/**
+ * @file
+ * Postprocessing filter - 7
+ *
+ * Originally written by Michael Niedermayer for the MPlayer
+ * project, and ported by Arwa Arif for FFmpeg.
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "internal.h"
+#include "vf_pp7.h"
+#include "libavcodec/avcodec.h"
+
+enum mode {
+    MODE_HARD,
+    MODE_SOFT,
+    MODE_MEDIUM
+};
+
+#define OFFSET(x) offsetof(PP7Context, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption pp7_options[] = {
+    { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
+    { "mode", "set thresholding mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_MEDIUM}, 0, 2, FLAGS, "mode" },
+        { "hard",   "hard thresholding",   0, AV_OPT_TYPE_CONST, {.i64 = MODE_HARD},   INT_MIN, INT_MAX, FLAGS, "mode" },
+        { "soft",   "soft thresholding",   0, AV_OPT_TYPE_CONST, {.i64 = MODE_SOFT},   INT_MIN, INT_MAX, FLAGS, "mode" },
+        { "medium", "medium thresholding", 0, AV_OPT_TYPE_CONST, {.i64 = MODE_MEDIUM}, INT_MIN, INT_MAX, FLAGS, "mode" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(pp7);
+
+DECLARE_ALIGNED(8, static const uint8_t, dither)[8][8] = {
+    {  0,  48,  12,  60,   3,  51,  15,  63, },
+    { 32,  16,  44,  28,  35,  19,  47,  31, },
+    {  8,  56,   4,  52,  11,  59,   7,  55, },
+    { 40,  24,  36,  20,  43,  27,  39,  23, },
+    {  2,  50,  14,  62,   1,  49,  13,  61, },
+    { 34,  18,  46,  30,  33,  17,  45,  29, },
+    { 10,  58,   6,  54,   9,  57,   5,  53, },
+    { 42,  26,  38,  22,  41,  25,  37,  21, },
+};
+
+#define N0 4
+#define N1 5
+#define N2 10
+#define SN0 2
+#define SN1 2.2360679775
+#define SN2 3.16227766017
+#define N (1 << 16)
+
+static const int factor[16] = {
+    N / (N0 * N0), N / (N0 * N1), N / (N0 * N0), N / (N0 * N2),
+    N / (N1 * N0), N / (N1 * N1), N / (N1 * N0), N / (N1 * N2),
+    N / (N0 * N0), N / (N0 * N1), N / (N0 * N0), N / (N0 * N2),
+    N / (N2 * N0), N / (N2 * N1), N / (N2 * N0), N / (N2 * N2),
+};
+
+static const int thres[16] = {
+    N / (SN0 * SN0), N / (SN0 * SN2), N / (SN0 * SN0), N / (SN0 * SN2),
+    N / (SN2 * SN0), N / (SN2 * SN2), N / (SN2 * SN0), N / (SN2 * SN2),
+    N / (SN0 * SN0), N / (SN0 * SN2), N / (SN0 * SN0), N / (SN0 * SN2),
+    N / (SN2 * SN0), N / (SN2 * SN2), N / (SN2 * SN0), N / (SN2 * SN2),
+};
+
+static inline int norm_qscale(int qscale, int type)
+{
+    switch (type) {
+    case FF_QSCALE_TYPE_MPEG1: return qscale;
+    case FF_QSCALE_TYPE_MPEG2: return qscale >> 1;
+    case FF_QSCALE_TYPE_H264:  return qscale >> 2;
+    case FF_QSCALE_TYPE_VP56:  return (63 - qscale + 2) >> 2;
+    }
+    return qscale;
+}
+
+static void init_thres2(PP7Context *p)
+{
+    int qp, i;
+    int bias = 0; //FIXME
+
+    for (qp = 0; qp < 99; qp++) {
+        for (i = 0; i < 16; i++) {
+            p->thres2[qp][i] = ((i&1) ? SN2 : SN0) * ((i&4) ? SN2 : SN0) * FFMAX(1, qp) * (1<<2) - 1 - bias;
+        }
+    }
+}
+
+static inline void dctA_c(int16_t *dst, uint8_t *src, int stride)
+{
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        int s0 = src[0 * stride] + src[6 * stride];
+        int s1 = src[1 * stride] + src[5 * stride];
+        int s2 = src[2 * stride] + src[4 * stride];
+        int s3 = src[3 * stride];
+        int s = s3 + s3;
+        s3 = s  - s0;
+        s0 = s  + s0;
+        s  = s2 + s1;
+        s2 = s2 - s1;
+        dst[0] = s0 + s;
+        dst[2] = s0 - s;
+        dst[1] = 2 * s3 +     s2;
+        dst[3] =     s3 - 2 * s2;
+        src++;
+        dst += 4;
+    }
+}
+
+static void dctB_c(int16_t *dst, int16_t *src)
+{
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        int s0 = src[0 * 4] + src[6 * 4];
+        int s1 = src[1 * 4] + src[5 * 4];
+        int s2 = src[2 * 4] + src[4 * 4];
+        int s3 = src[3 * 4];
+        int s = s3 + s3;
+        s3 = s  - s0;
+        s0 = s  + s0;
+        s  = s2 + s1;
+        s2 = s2 - s1;
+        dst[0 * 4] = s0 + s;
+        dst[2 * 4] = s0 - s;
+        dst[1 * 4] = 2 * s3 +     s2;
+        dst[3 * 4] =     s3 - 2 * s2;
+        src++;
+        dst++;
+    }
+}
+
+static int hardthresh_c(PP7Context *p, int16_t *src, int qp)
+{
+    int i;
+    int a;
+
+    a = src[0] * factor[0];
+    for (i = 1; i < 16; i++) {
+        unsigned int threshold1 = p->thres2[qp][i];
+        unsigned int threshold2 = threshold1 << 1;
+        int level = src[i];
+        if (((unsigned)(level + threshold1)) > threshold2)
+            a += level * factor[i];
+    }
+    return (a + (1 << 11)) >> 12;
+}
+
+static int mediumthresh_c(PP7Context *p, int16_t *src, int qp)
+{
+    int i;
+    int a;
+
+    a = src[0] * factor[0];
+    for (i = 1; i < 16; i++) {
+        unsigned int threshold1 = p->thres2[qp][i];
+        unsigned int threshold2 = threshold1 << 1;
+        int level = src[i];
+        if (((unsigned)(level + threshold1)) > threshold2) {
+            if (((unsigned)(level + 2 * threshold1)) > 2 * threshold2)
+                a += level * factor[i];
+            else {
+                if(level > 0) 
+                    a += 2 * (level - (int)threshold1) * factor[i];
+                else
+                    a += 2 * (level + (int)threshold1) * factor[i];
+            }
+        }
+    }
+    return (a + (1 << 11)) >> 12;
+}
+
+static int softthresh_c(PP7Context *p, int16_t *src, int qp)
+{
+    int i;
+    int a;
+
+    a = src[0] * factor[0];
+    for (i = 1; i < 16; i++) {
+        unsigned int threshold1 = p->thres2[qp][i];
+        unsigned int threshold2 = threshold1 << 1;
+        int level = src[i];
+        if (((unsigned)(level + threshold1)) > threshold2) {
+            if (level > 0)
+                a += (level - (int)threshold1) * factor[i];
+            else
+                a += (level + (int)threshold1) * factor[i];
+        }
+    }
+    return (a + (1 << 11)) >> 12;
+}
+
+static void filter(PP7Context *p, uint8_t *dst, uint8_t *src,
+                   int dst_stride, int src_stride,
+                   int width, int height,
+                   uint8_t *qp_store, int qp_stride, int is_luma)
+{
+    int x, y;
+    const int stride = is_luma ? p->temp_stride : ((width + 16 + 15) & (~15));
+    uint8_t *p_src = p->src + 8 * stride;
+    int16_t *block = (int16_t *)p->src;
+    int16_t *temp  = (int16_t *)(p->src + 32);
+
+    if (!src || !dst) return;
+    for (y = 0; y < height; y++) {
+        int index = 8 + 8 * stride + y * stride;
+        memcpy(p_src + index, src + y * src_stride, width);
+        for (x = 0; x < 8; x++) {
+            p_src[index         - x - 1]= p_src[index +         x    ];
+            p_src[index + width + x    ]= p_src[index + width - x - 1];
+        }
+    }
+    for (y = 0; y < 8; y++) {
+        memcpy(p_src + (    7 - y     ) * stride, p_src + (    y + 8     ) * stride, stride);
+        memcpy(p_src + (height + 8 + y) * stride, p_src + (height - y + 7) * stride, stride);
+    }
+    //FIXME (try edge emu)
+
+    for (y = 0; y < height; y++) {
+        for (x = -8; x < 0; x += 4) {
+            const int index = x + y * stride + (8 - 3) * (1 + stride) + 8; //FIXME silly offset
+            uint8_t *src  = p_src + index;
+            int16_t *tp   = temp + 4 * x;
+
+            dctA_c(tp + 4 * 8, src, stride);
+        }
+        for (x = 0; x < width; ) {
+            const int qps = 3 + is_luma;
+            int qp;
+            int end = FFMIN(x + 8, width);
+
+            if(p->qp)
+                qp = p->qp;
+            else {
+                qp = qp_store[ (FFMIN(x, width - 1) >> qps) + (FFMIN(y, height - 1) >> qps) * qp_stride];
+                qp = norm_qscale(qp, p->qscale_type);
+            }
+            for (; x < end; x++) {
+                const int index = x + y * stride + (8 - 3) * (1 + stride) + 8; //FIXME silly offset
+                uint8_t *src = p_src + index;
+                int16_t *tp  = temp + 4 * x;
+                int v;
+
+                if ((x & 3) == 0)
+                    dctA_c(tp + 4 * 8, src, stride);
+
+                p->dctB(block, tp);
+
+                v = p->requantize(p, block, qp);
+                v = (v + dither[y & 7][x & 7]) >> 6;
+                if((unsigned)v > 255)
+                    v = (-v) >> 31;
+                dst[x + y * dst_stride] = v;
+            }
+        }
+    }
+}
+
+static int query_formats(AVFilterContext *ctx)
+{
+    static const enum PixelFormat pix_fmts[] = {
+        AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV410P,  AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
+        AV_PIX_FMT_GRAY8,    AV_PIX_FMT_NONE
+    };
+    ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    PP7Context *pp7 = ctx->priv;
+    const int h = FFALIGN(inlink->h + 16, 16);
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+
+    pp7->hsub = desc->log2_chroma_w;
+    pp7->vsub = desc->log2_chroma_h;
+
+    pp7->temp_stride = FFALIGN(inlink->w + 16, 16);
+    pp7->src = av_malloc(pp7->temp_stride * (h + 8) * sizeof(uint8_t));
+
+    if (!pp7->src)
+        return AVERROR(ENOMEM);
+
+    init_thres2(pp7);
+
+    switch(pp7->mode) {
+        case 0: pp7->requantize = hardthresh_c; break;
+        case 1: pp7->requantize = softthresh_c; break;
+        default:
+        case 2: pp7->requantize = mediumthresh_c; break;
+    }
+
+    pp7->dctB = dctB_c;
+
+    if (ARCH_X86)
+        ff_pp7_init_x86(pp7);
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *ctx = inlink->dst;
+    PP7Context *pp7 = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out = in;
+
+    int qp_stride = 0;
+    uint8_t *qp_table = NULL;
+
+    if (!pp7->qp)
+        qp_table = av_frame_get_qp_table(in, &qp_stride, &pp7->qscale_type);
+
+    if (!ctx->is_disabled) {
+        const int cw = FF_CEIL_RSHIFT(inlink->w, pp7->hsub);
+        const int ch = FF_CEIL_RSHIFT(inlink->h, pp7->vsub);
+
+        /* get a new frame if in-place is not possible or if the dimensions
+        * are not multiple of 8 */
+        if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
+            const int aligned_w = FFALIGN(inlink->w, 8);
+            const int aligned_h = FFALIGN(inlink->h, 8);
+
+            out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
+            if (!out) {
+                av_frame_free(&in);
+                return AVERROR(ENOMEM);
+            }
+            av_frame_copy_props(out, in);
+        }
+
+        if (qp_table || pp7->qp) {
+
+            filter(pp7, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
+                   inlink->w, inlink->h, qp_table, qp_stride, 1);
+            filter(pp7, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
+                   cw,        ch,        qp_table, qp_stride, 0);
+            filter(pp7, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
+                   cw,        ch,        qp_table, qp_stride, 0);
+            emms_c();
+        }
+    }
+
+    if (in != out) {
+        if (in->data[3])
+            av_image_copy_plane(out->data[3], out->linesize[3],
+                                in ->data[3], in ->linesize[3],
+                                inlink->w, inlink->h);
+        av_frame_free(&in);
+    }
+    return ff_filter_frame(outlink, out);
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    PP7Context *pp7 = ctx->priv;
+    av_freep(&pp7->src);
+}
+
+static const AVFilterPad pp7_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+        .filter_frame = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad pp7_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_pp7 = {
+    .name            = "pp7",
+    .description     = NULL_IF_CONFIG_SMALL("Apply Postprocessing 7 filter."),
+    .priv_size       = sizeof(PP7Context),
+    .uninit          = uninit,
+    .query_formats   = query_formats,
+    .inputs          = pp7_inputs,
+    .outputs         = pp7_outputs,
+    .priv_class      = &pp7_class,
+    .flags           = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+};
diff --git a/libavfilter/vf_pp7.h b/libavfilter/vf_pp7.h
new file mode 100644
index 0000000..9aa8d73
--- /dev/null
+++ b/libavfilter/vf_pp7.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2005 Michael Niedermayer <michae...@gmx.at>
+ * Copyright (c) 2014 Arwa Arif <arwaarif1...@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef AVFILTER_PP7_H
+#define AVFILTER_PP7_H
+
+#include "avfilter.h"
+
+typedef struct PP7Context {
+    AVClass *class;
+    int thres2[99][16];
+
+    int qp;
+    int mode;
+    int qscale_type;
+    int hsub;
+    int vsub;
+    int temp_stride;
+    uint8_t *src;
+
+    int (*requantize)(struct PP7Context *p, int16_t *src, int qp);
+    void (*dctB)(int16_t *dst, int16_t *src);
+
+} PP7Context;
+
+void ff_pp7_init_x86(PP7Context *pp7);
+
+#endif /* AVFILTER_PP7_H */
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index 4f9c83d..b93154e 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -4,6 +4,7 @@ OBJS-$(CONFIG_HQDN3D_FILTER)                 += x86/vf_hqdn3d_init.o
 OBJS-$(CONFIG_IDET_FILTER)                   += x86/vf_idet_init.o
 OBJS-$(CONFIG_INTERLACE_FILTER)              += x86/vf_interlace_init.o
 OBJS-$(CONFIG_NOISE_FILTER)                  += x86/vf_noise.o
+OBJS-$(CONFIG_PP7_FILTER)                    += x86/vf_pp7.o
 OBJS-$(CONFIG_PULLUP_FILTER)                 += x86/vf_pullup_init.o
 OBJS-$(CONFIG_SPP_FILTER)                    += x86/vf_spp.o
 OBJS-$(CONFIG_TINTERLACE_FILTER)             += x86/vf_tinterlace_init.o
diff --git a/libavfilter/x86/vf_pp7.c b/libavfilter/x86/vf_pp7.c
new file mode 100644
index 0000000..64c8618
--- /dev/null
+++ b/libavfilter/x86/vf_pp7.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2005 Michael Niedermayer <michae...@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/mem.h"
+#include "libavutil/x86/asm.h"
+#include "libavfilter/vf_pp7.h"
+
+static void dctB_mmx(int16_t *dst, int16_t *src)
+{
+#if HAVE_MMX_INLINE
+    __asm__ volatile (
+        "movq  (%0), %%mm0      \n\t"
+        "movq  1*4*2(%0), %%mm1 \n\t"
+        "paddw 6*4*2(%0), %%mm0 \n\t"
+        "paddw 5*4*2(%0), %%mm1 \n\t"
+        "movq  2*4*2(%0), %%mm2 \n\t"
+        "movq  3*4*2(%0), %%mm3 \n\t"
+        "paddw 4*4*2(%0), %%mm2 \n\t"
+        "paddw %%mm3, %%mm3     \n\t" //s
+        "movq %%mm3, %%mm4      \n\t" //s
+        "psubw %%mm0, %%mm3     \n\t" //s-s0
+        "paddw %%mm0, %%mm4     \n\t" //s+s0
+        "movq %%mm2, %%mm0      \n\t" //s2
+        "psubw %%mm1, %%mm2     \n\t" //s2-s1
+        "paddw %%mm1, %%mm0     \n\t" //s2+s1
+        "movq %%mm4, %%mm1      \n\t" //s0'
+        "psubw %%mm0, %%mm4     \n\t" //s0'-s'
+        "paddw %%mm0, %%mm1     \n\t" //s0'+s'
+        "movq %%mm3, %%mm0      \n\t" //s3'
+        "psubw %%mm2, %%mm3     \n\t"
+        "psubw %%mm2, %%mm3     \n\t"
+        "paddw %%mm0, %%mm2     \n\t"
+        "paddw %%mm0, %%mm2     \n\t"
+        "movq %%mm1, (%1)       \n\t"
+        "movq %%mm4, 2*4*2(%1)  \n\t"
+        "movq %%mm2, 1*4*2(%1)  \n\t"
+        "movq %%mm3, 3*4*2(%1)  \n\t"
+        :: "r" (src), "r"(dst)
+    );
+#endif
+}
+
+av_cold void ff_pp7_init_x86(PP7Context *p)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (HAVE_MMX_INLINE && cpu_flags & AV_CPU_FLAG_MMX)
+        p->dctB = dctB_mmx;
+}
-- 
1.7.9.5

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to