From ddd25513e495b86a90f1082f54276280d2ad34c7 Mon Sep 17 00:00:00 2001
From: Thomas Mundt <loudmax@yahoo.de>
Date: Mon, 13 Mar 2017 17:04:03 +0100
Subject: [PATCH 2/2] avfilter/interlace: add complex vertical low-pass filter

This complex (-1 2 6 2 -1) filter slightly less reduces interlace 'twitter' but better retain detail and subjective sharpness impression compared to the linear (1 2 1) filter.

Signed-off-by: Thomas Mundt <loudmax@yahoo.de>
---
 doc/filters.texi                     | 27 +++++++++++++---
 libavfilter/interlace.h              |  8 +++++
 libavfilter/tinterlace.h             |  2 ++
 libavfilter/vf_interlace.c           | 45 +++++++++++++++++++++++++--
 libavfilter/vf_tinterlace.c          | 57 ++++++++++++++++++++++++++++------
 libavfilter/x86/vf_interlace.asm     | 60 ++++++++++++++++++++++++++++++++++++
 libavfilter/x86/vf_interlace_init.c  | 10 ++++--
 libavfilter/x86/vf_tinterlace_init.c | 10 ++++--
 8 files changed, 200 insertions(+), 19 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 192a81a..0803ce2 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -9109,8 +9109,21 @@ This determines whether the interlaced frame is taken from the even
 (tff - default) or odd (bff) lines of the progressive frame.
 
 @item lowpass
-Enable (default) or disable the vertical lowpass filter to avoid twitter
-interlacing and reduce moire patterns.
+Vertical lowpass filter to avoid twitter interlacing and
+reduce moire patterns.
+
+@table @samp
+@item 0, off
+Disable vertical lowpass filter
+
+@item 1, linear
+Enable linear filter (default)
+
+@item 2, complex
+Enable complex filter. This will slightly less reduce twitter and moire
+but better retain detail and subjective sharpness impression.
+
+@end table
 @end table
 
 @section kerndeint
@@ -13584,17 +13597,23 @@ Available value for @var{flags} is:
 
 @table @option
 @item low_pass_filter, vlfp
-Enable vertical low-pass filtering in the filter.
+Enable linear vertical low-pass filtering in the filter.
 Vertical low-pass filtering is required when creating an interlaced
 destination from a progressive source which contains high-frequency
 vertical detail. Filtering will reduce interlace 'twitter' and Moire
 patterning.
 
+@item complex_filter, cvlfp
+Enable complex vertical low-pass filtering.
+This will slightly less reduce interlace 'twitter' and Moire
+patterning but better retain detail and subjective sharpness impression.
+
+@end table
+
 Vertical low-pass filtering can only be enabled for @option{mode}
 @var{interleave_top} and @var{interleave_bottom}.
 
 @end table
-@end table
 
 @section transpose
 
diff --git a/libavfilter/interlace.h b/libavfilter/interlace.h
index 107c94f..7016db2 100644
--- a/libavfilter/interlace.h
+++ b/libavfilter/interlace.h
@@ -44,6 +44,12 @@ enum FieldType {
     FIELD_LOWER = 1,
 };
 
+enum VLPFilter {
+    VLPF_OFF = 0,
+    VLPF_LIN = 1,
+    VLPF_CMP = 2,
+};
+
 typedef struct InterlaceContext {
     const AVClass *class;
     enum ScanMode scan;    // top or bottom field first scanning
@@ -51,6 +57,8 @@ typedef struct InterlaceContext {
     AVFrame *cur, *next;   // the two frames from which the new one is obtained
     void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
                          ptrdiff_t mref, ptrdiff_t pref);
+    void (*lowpass_line_complex)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
+                                 ptrdiff_t mref, ptrdiff_t pref);
 } InterlaceContext;
 
 void ff_interlace_init_x86(InterlaceContext *interlace);
diff --git a/libavfilter/tinterlace.h b/libavfilter/tinterlace.h
index f52af13..1f7cb6e 100644
--- a/libavfilter/tinterlace.h
+++ b/libavfilter/tinterlace.h
@@ -55,6 +55,8 @@ typedef struct {
     int black_linesize[4];
     void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
                          ptrdiff_t mref, ptrdiff_t pref);
+    void (*lowpass_line_complex)(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
+                                 ptrdiff_t mref, ptrdiff_t pref);
 } TInterlaceContext;
 
 void ff_tinterlace_init_x86(TInterlaceContext *interlace);
diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c
index 8da8326..836977d 100644
--- a/libavfilter/vf_interlace.c
+++ b/libavfilter/vf_interlace.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2010 Baptiste Coudurier
  * Copyright (c) 2011 Stefano Sabatini
  * Copyright (c) 2013 Vittorio Giovara <vittorio.giovara@gmail.com>
+ * Copyright (c) 2017 Thomas Mundt <loudmax@yahoo.de>
  *
  * This file is part of FFmpeg.
  *
@@ -47,7 +48,13 @@ static const AVOption interlace_options[] = {
     { "bff", "bottom field first", 0,
         AV_OPT_TYPE_CONST, {.i64 = MODE_BFF }, INT_MIN, INT_MAX, .flags = FLAGS, .unit = "scan" },
     { "lowpass", "set vertical low-pass filter", OFFSET(lowpass),
-        AV_OPT_TYPE_BOOL,  {.i64 = 1 },        0, 1, .flags = FLAGS },
+        AV_OPT_TYPE_INT,   {.i64 = VLPF_LIN }, 0, 2, .flags = FLAGS, .unit = "lowpass" },
+    { "off",     "disable vertical low-pass filter", 0,
+        AV_OPT_TYPE_CONST, {.i64 = VLPF_OFF }, INT_MIN, INT_MAX, .flags = FLAGS, .unit = "lowpass" },
+    { "linear",  "linear vertical low-pass filter",  0,
+        AV_OPT_TYPE_CONST, {.i64 = VLPF_LIN }, INT_MIN, INT_MAX, .flags = FLAGS, .unit = "lowpass" },
+    { "complex", "complex vertical low-pass filter", 0,
+        AV_OPT_TYPE_CONST, {.i64 = VLPF_CMP }, INT_MIN, INT_MAX, .flags = FLAGS, .unit = "lowpass" },
     { NULL }
 };
 
@@ -68,6 +75,25 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
     }
 }
 
+static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
+                                   const uint8_t *srcp, ptrdiff_t mref,
+                                   ptrdiff_t pref)
+{
+    const uint8_t *srcp_above = srcp + mref;
+    const uint8_t *srcp_below = srcp + pref;
+    const uint8_t *srcp_above2 = srcp + mref * 2;
+    const uint8_t *srcp_below2 = srcp + pref * 2;
+    int i;
+    for (i = 0; i < linesize; i++) {
+        // this calculation is an integer representation of
+        // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
+        // '4 +' is for rounding.
+        dstp[i] = av_clip_uint8((4 + (srcp[i] << 2)
+                  + ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1)
+                  - srcp_above2[i] - srcp_below2[i]) >> 3);
+    }
+}
+
 static const enum AVPixelFormat formats_supported[] = {
     AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV444P,
     AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV410P,  AV_PIX_FMT_YUVA420P,
@@ -118,6 +144,7 @@ static int config_out_props(AVFilterLink *outlink)
 
     if (s->lowpass) {
         s->lowpass_line = lowpass_line_c;
+        s->lowpass_line_complex = lowpass_line_complex_c;
         if (ARCH_X86)
             ff_interlace_init_x86(s);
     }
@@ -151,7 +178,7 @@ static void copy_picture_field(InterlaceContext *s,
             srcp += src_frame->linesize[plane];
             dstp += dst_frame->linesize[plane];
         }
-        if (lowpass) {
+        if (lowpass == VLPF_LIN) {
             int srcp_linesize = src_frame->linesize[plane] * 2;
             int dstp_linesize = dst_frame->linesize[plane] * 2;
             for (j = lines; j > 0; j--) {
@@ -165,6 +192,20 @@ static void copy_picture_field(InterlaceContext *s,
                 dstp += dstp_linesize;
                 srcp += srcp_linesize;
             }
+        } else if (lowpass == VLPF_CMP) {
+            int srcp_linesize = src_frame->linesize[plane] * 2;
+            int dstp_linesize = dst_frame->linesize[plane] * 2;
+            for (j = lines; j > 0; j--) {
+                ptrdiff_t pref = src_frame->linesize[plane];
+                ptrdiff_t mref = -pref;
+                if (j >= (lines - 1))
+                    mref = 0;
+                else if (j <= 2)
+                    pref = 0;
+                s->lowpass_line_complex(dstp, cols, srcp, mref, pref);
+                dstp += dstp_linesize;
+                srcp += srcp_linesize;
+            }
         } else {
             av_image_copy_plane(dstp, dst_frame->linesize[plane] * 2,
                                 srcp, src_frame->linesize[plane] * 2,
diff --git a/libavfilter/vf_tinterlace.c b/libavfilter/vf_tinterlace.c
index 09ca4d3..0b5b858 100644
--- a/libavfilter/vf_tinterlace.c
+++ b/libavfilter/vf_tinterlace.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2017 Thomas Mundt <loudmax@yahoo.de>
  * Copyright (c) 2011 Stefano Sabatini
  * Copyright (c) 2010 Baptiste Coudurier
  * Copyright (c) 2003 Michael Zucchi <notzed@ximian.com>
@@ -36,6 +37,7 @@
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 #define TINTERLACE_FLAG_VLPF 01
 #define TINTERLACE_FLAG_EXACT_TB 2
+#define TINTERLACE_FLAG_CVLPF 4
 
 static const AVOption tinterlace_options[] = {
     {"mode",              "select interlace mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=MODE_MERGE}, 0, MODE_NB-1, FLAGS, "mode"},
@@ -51,6 +53,8 @@ static const AVOption tinterlace_options[] = {
     {"flags",             "set flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, INT_MAX, 0, "flags" },
     {"low_pass_filter",   "enable vertical low-pass filter",              0, AV_OPT_TYPE_CONST, {.i64 = TINTERLACE_FLAG_VLPF}, INT_MIN, INT_MAX, FLAGS, "flags" },
     {"vlpf",              "enable vertical low-pass filter",              0, AV_OPT_TYPE_CONST, {.i64 = TINTERLACE_FLAG_VLPF}, INT_MIN, INT_MAX, FLAGS, "flags" },
+    {"complex_filter",    "enable complex vertical low-pass filter",      0, AV_OPT_TYPE_CONST, {.i64 = TINTERLACE_FLAG_CVLPF},INT_MIN, INT_MAX, FLAGS, "flags" },
+    {"cvlpf",             "enable complex vertical low-pass filter",      0, AV_OPT_TYPE_CONST, {.i64 = TINTERLACE_FLAG_CVLPF},INT_MIN, INT_MAX, FLAGS, "flags" },
     {"exact_tb",          "force a timebase which can represent timestamps exactly", 0, AV_OPT_TYPE_CONST, {.i64 = TINTERLACE_FLAG_EXACT_TB}, INT_MIN, INT_MAX, FLAGS, "flags" },
 
     {NULL}
@@ -102,6 +106,24 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
     }
 }
 
+static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
+                                   ptrdiff_t mref, ptrdiff_t pref)         
+{
+    const uint8_t *srcp_above = srcp + mref;
+    const uint8_t *srcp_below = srcp + pref;
+    const uint8_t *srcp_above2 = srcp + mref * 2;
+    const uint8_t *srcp_below2 = srcp + pref * 2;
+    int i;
+    for (i = 0; i < width; i++) {
+        // this calculation is an integer representation of
+        // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
+        // '4 +' is for rounding.
+        dstp[i] = av_clip_uint8((4 + (srcp[i] << 2)
+                  + ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1)
+                  - srcp_above2[i] - srcp_below2[i]) >> 3);
+    }
+}
+
 static av_cold void uninit(AVFilterContext *ctx)
 {
     TInterlaceContext *tinterlace = ctx->priv;
@@ -144,12 +166,14 @@ static int config_out_props(AVFilterLink *outlink)
                    tinterlace->black_linesize[i] * h);
         }
     }
-    if ((tinterlace->flags & TINTERLACE_FLAG_VLPF)
+    if ((tinterlace->flags & TINTERLACE_FLAG_VLPF
+            || tinterlace->flags & TINTERLACE_FLAG_CVLPF)
             && !(tinterlace->mode == MODE_INTERLEAVE_TOP
               || tinterlace->mode == MODE_INTERLEAVE_BOTTOM)) {
-        av_log(ctx, AV_LOG_WARNING, "low_pass_filter flag ignored with mode %d\n",
+        av_log(ctx, AV_LOG_WARNING, "low_pass_filter flags ignored with mode %d\n",
                 tinterlace->mode);
         tinterlace->flags &= ~TINTERLACE_FLAG_VLPF;
+        tinterlace->flags &= ~TINTERLACE_FLAG_CVLPF;
     }
     tinterlace->preout_time_base = inlink->time_base;
     if (tinterlace->mode == MODE_INTERLACEX2) {
@@ -172,14 +196,16 @@ static int config_out_props(AVFilterLink *outlink)
         (tinterlace->flags & TINTERLACE_FLAG_EXACT_TB))
         outlink->time_base = tinterlace->preout_time_base;
 
-    if (tinterlace->flags & TINTERLACE_FLAG_VLPF) {
+    if (tinterlace->flags & TINTERLACE_FLAG_VLPF || tinterlace->flags & TINTERLACE_FLAG_CVLPF) {
         tinterlace->lowpass_line = lowpass_line_c;
+        tinterlace->lowpass_line_complex = lowpass_line_complex_c;
         if (ARCH_X86)
             ff_tinterlace_init_x86(tinterlace);
     }
 
-    av_log(ctx, AV_LOG_VERBOSE, "mode:%d filter:%s h:%d -> h:%d\n",
-           tinterlace->mode, (tinterlace->flags & TINTERLACE_FLAG_VLPF) ? "on" : "off",
+    av_log(ctx, AV_LOG_VERBOSE, "mode:%d filter:%s h:%d -> h:%d\n", tinterlace->mode,
+           (tinterlace->flags & TINTERLACE_FLAG_CVLPF) ? "complex" :
+           (tinterlace->flags & TINTERLACE_FLAG_VLPF) ? "linear" : "off",
            inlink->h, outlink->h);
 
     return 0;
@@ -223,10 +249,23 @@ void copy_picture_field(TInterlaceContext *tinterlace,
             srcp += src_linesize[plane];
         if (interleave && dst_field == FIELD_LOWER)
             dstp += dst_linesize[plane];
-        if (flags & TINTERLACE_FLAG_VLPF) {
-            // Low-pass filtering is required when creating an interlaced destination from
-            // a progressive source which contains high-frequency vertical detail.
-            // Filtering will reduce interlace 'twitter' and Moire patterning.
+        // Low-pass filtering is required when creating an interlaced destination from
+        // a progressive source which contains high-frequency vertical detail.
+        // Filtering will reduce interlace 'twitter' and Moire patterning.
+        if (flags & TINTERLACE_FLAG_CVLPF) {
+            int srcp_linesize = src_linesize[plane] * k;
+            int dstp_linesize = dst_linesize[plane] * (interleave ? 2 : 1);
+            for (h = lines; h > 0; h--) {
+                ptrdiff_t pref = src_linesize[plane];
+                ptrdiff_t mref = -pref;
+                if (h >= (lines - 1)) mref = 0;
+                else if (h <= 2)      pref = 0;
+
+                tinterlace->lowpass_line_complex(dstp, cols, srcp, mref, pref);
+                dstp += dstp_linesize;
+                srcp += srcp_linesize;
+            }
+        } else if (flags & TINTERLACE_FLAG_VLPF) {
             int srcp_linesize = src_linesize[plane] * k;
             int dstp_linesize = dst_linesize[plane] * (interleave ? 2 : 1);
             for (h = lines; h > 0; h--) {
diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm
index ab11519..e7a6341 100644
--- a/libavfilter/x86/vf_interlace.asm
+++ b/libavfilter/x86/vf_interlace.asm
@@ -3,6 +3,7 @@
 ;*
 ;* Copyright (C) 2014 Kieran Kunhya <kierank@obe.tv>
 ;* Copyright (c) 2014 Michael Niedermayer <michaelni@gmx.at>
+;* Copyright (c) 2017 Thomas Mundt <loudmax@yahoo.de>
 ;*
 ;* This file is part of FFmpeg.
 ;*
@@ -25,6 +26,8 @@
 
 SECTION_RODATA
 
+pw_4: times 8 dw 4
+
 SECTION .text
 
 %macro LOWPASS_LINE 0
@@ -51,6 +54,60 @@ cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
     sub hd, 2*mmsize
     jg .loop
 REP_RET
+
+%endmacro
+
+%macro LOWPASS_LINE_COMPLEX 0
+cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref
+    pxor m6, m6
+.loop:
+    mova m0, [srcq+mrefq]
+    mova m2, [srcq+prefq]
+    mova m1, m0
+    mova m3, m2
+    punpcklbw m0, m6
+    punpcklbw m2, m6
+    punpckhbw m1, m6
+    punpckhbw m3, m6
+    paddw m0, m2
+    paddw m1, m3
+    mova m2, [srcq+mrefq*2]
+    mova m4, [srcq+prefq*2]
+    mova m3, m2
+    mova m5, m4
+    punpcklbw m2, m6
+    punpcklbw m4, m6
+    punpckhbw m3, m6
+    punpckhbw m5, m6
+    paddw m2, m4
+    paddw m3, m5
+    mova m4, [srcq]
+    mova m5, m4
+    punpcklbw m4, m6
+    punpckhbw m5, m6
+    paddw m0, m4
+    paddw m1, m5
+    psllw m0, 1
+    psllw m1, 1
+    psllw m4, 2
+    psllw m5, 2
+    paddw m0, m4
+    paddw m1, m5
+    paddw m0, [pw_4]
+    paddw m1, [pw_4]
+    psubusw m0, m2
+    psubusw m1, m3
+    psrlw m0, 3
+    psrlw m1, 3
+    packuswb m0, m1
+    mova [dstq], m0
+
+    add dstq, mmsize
+    add srcq, mmsize
+    sub hd, mmsize
+    jg .loop
+REP_RET
+
 %endmacro
 
 INIT_XMM sse2
@@ -58,3 +115,6 @@ LOWPASS_LINE
 
 INIT_XMM avx
 LOWPASS_LINE
+
+INIT_XMM sse2
+LOWPASS_LINE_COMPLEX
diff --git a/libavfilter/x86/vf_interlace_init.c b/libavfilter/x86/vf_interlace_init.c
index 7d8acd6..535f604 100644
--- a/libavfilter/x86/vf_interlace_init.c
+++ b/libavfilter/x86/vf_interlace_init.c
@@ -33,12 +33,18 @@ void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
                           const uint8_t *srcp,
                           ptrdiff_t mref, ptrdiff_t pref);
 
+void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
+                                  const uint8_t *srcp,
+                                  ptrdiff_t mref, ptrdiff_t pref);
+
 av_cold void ff_interlace_init_x86(InterlaceContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_SSE2(cpu_flags))
-        s->lowpass_line = ff_lowpass_line_sse2;
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        s->lowpass_line         = ff_lowpass_line_sse2;
+        s->lowpass_line_complex = ff_lowpass_line_complex_sse2;
+    }
     if (EXTERNAL_AVX(cpu_flags))
         s->lowpass_line = ff_lowpass_line_avx;
 }
diff --git a/libavfilter/x86/vf_tinterlace_init.c b/libavfilter/x86/vf_tinterlace_init.c
index 175b5cf..351f93e 100644
--- a/libavfilter/x86/vf_tinterlace_init.c
+++ b/libavfilter/x86/vf_tinterlace_init.c
@@ -34,12 +34,18 @@ void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
                           const uint8_t *srcp,
                           ptrdiff_t mref, ptrdiff_t pref);
 
+void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
+                                  const uint8_t *srcp,
+                                  ptrdiff_t mref, ptrdiff_t pref);
+
 av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_SSE2(cpu_flags))
-        s->lowpass_line = ff_lowpass_line_sse2;
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        s->lowpass_line         = ff_lowpass_line_sse2;
+        s->lowpass_line_complex = ff_lowpass_line_complex_sse2;
+    }
     if (EXTERNAL_AVX(cpu_flags))
         s->lowpass_line = ff_lowpass_line_avx;
 }
-- 
2.7.4.windows.1

