Re: [FFmpeg-devel] [PATCH] x86/vc1dsp: Split the file into MC and loopfilter

2016-02-29 Thread Timothy Gu
On Mon, Feb 29, 2016 at 4:57 AM Ronald S. Bultje  wrote:

> This is kind of hard to review, but I'm going to assume that there's no
> actual code changes, in which case this LGTM.
>

No there isn't. Applied, thanks.

Timothy

>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] x86/vc1dsp: Split the file into MC and loopfilter

2016-02-29 Thread Ronald S. Bultje
Hi,

On Sun, Feb 28, 2016 at 8:26 PM, Timothy Gu  wrote:

> ---
>  libavcodec/x86/Makefile  |   3 +-
>  libavcodec/x86/vc1dsp.asm| 585
> ---
>  libavcodec/x86/vc1dsp_loopfilter.asm | 317 +++
>  libavcodec/x86/vc1dsp_mc.asm | 292 +
>  4 files changed, 611 insertions(+), 586 deletions(-)
>  delete mode 100644 libavcodec/x86/vc1dsp.asm
>  create mode 100644 libavcodec/x86/vc1dsp_loopfilter.asm
>  create mode 100644 libavcodec/x86/vc1dsp_mc.asm


This is kind of hard to review, but I'm going to assume that there's no
actual code changes, in which case this LGTM.

Thanks,
Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] x86/vc1dsp: Split the file into MC and loopfilter

2016-02-28 Thread Timothy Gu
---
 libavcodec/x86/Makefile  |   3 +-
 libavcodec/x86/vc1dsp.asm| 585 ---
 libavcodec/x86/vc1dsp_loopfilter.asm | 317 +++
 libavcodec/x86/vc1dsp_mc.asm | 292 +
 4 files changed, 611 insertions(+), 586 deletions(-)
 delete mode 100644 libavcodec/x86/vc1dsp.asm
 create mode 100644 libavcodec/x86/vc1dsp_loopfilter.asm
 create mode 100644 libavcodec/x86/vc1dsp_mc.asm

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 629b0ee..839b5bc 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -120,7 +120,8 @@ YASM-OBJS-$(CONFIG_QPELDSP)+= x86/qpeldsp.o 
\
   x86/fpel.o\
   x86/qpel.o
 YASM-OBJS-$(CONFIG_RV34DSP)+= x86/rv34dsp.o
-YASM-OBJS-$(CONFIG_VC1DSP) += x86/vc1dsp.o
+YASM-OBJS-$(CONFIG_VC1DSP) += x86/vc1dsp_loopfilter.o   \
+  x86/vc1dsp_mc.o
 YASM-OBJS-$(CONFIG_IDCTDSP)+= x86/simple_idct10.o
 YASM-OBJS-$(CONFIG_VIDEODSP)   += x86/videodsp.o
 YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o
diff --git a/libavcodec/x86/vc1dsp.asm b/libavcodec/x86/vc1dsp.asm
deleted file mode 100644
index eee42c2..000
--- a/libavcodec/x86/vc1dsp.asm
+++ /dev/null
@@ -1,585 +0,0 @@
-;**
-;* VC1 DSP optimizations
-;* Copyright (c) 2007 Christophe GISQUET 
-;* Copyright (c) 2009 David Conrad
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;**
-
-%include "libavutil/x86/x86util.asm"
-
-cextern pw_4
-cextern pw_5
-cextern pw_9
-cextern pw_128
-
-section .text
-
-; dst_low, dst_high (src), zero
-; zero-extends one vector from 8 to 16 bits
-%macro UNPACK_8TO16 4
-mova  m%2, m%3
-punpckh%1 m%3, m%4
-punpckl%1 m%2, m%4
-%endmacro
-
-%macro STORE_4_WORDS 6
-%if cpuflag(sse4)
-pextrw %1, %5, %6+0
-pextrw %2, %5, %6+1
-pextrw %3, %5, %6+2
-pextrw %4, %5, %6+3
-%else
-movd  %6d, %5
-%if mmsize==16
-psrldq %5, 4
-%else
-psrlq  %5, 32
-%endif
-mov%1, %6w
-shr%6, 16
-mov%2, %6w
-movd  %6d, %5
-mov%3, %6w
-shr%6, 16
-mov%4, %6w
-%endif
-%endmacro
-
-; in:  p1 p0 q0 q1, clobbers p0
-; out: p1 = (2*(p1 - q1) - 5*(p0 - q0) + 4) >> 3
-%macro VC1_LOOP_FILTER_A0 4
-psubw  %1, %4
-psubw  %2, %3
-paddw  %1, %1
-pmullw %2, [pw_5]
-psubw  %1, %2
-paddw  %1, [pw_4]
-psraw  %1, 3
-%endmacro
-
-; in: p0 q0 a0 a1 a2
-; m0 m1 m7 m6 m5
-; %1: size
-; out: m0=p0' m1=q0'
-%macro VC1_FILTER 1
-PABSW   m4, m7
-PABSW   m3, m6
-PABSW   m2, m5
-movam6, m4
-pminsw  m3, m2
-pcmpgtw m6, m3  ; if (a2 < a0 || a1 < a0)
-psubw   m3, m4
-pmullw  m3, [pw_5]   ; 5*(a3 - a0)
-PABSW   m2, m3
-psraw   m2, 3   ; abs(d/8)
-pxorm7, m3  ; d_sign ^= a0_sign
-
-pxorm5, m5
-movdm3, r2d
-%if %1 > 4
-punpcklbw m3, m3
-%endif
-punpcklbw m3, m5
-pcmpgtw m3, m4  ; if (a0 < pq)
-pandm6, m3
-
-movam3, m0
-psubw   m3, m1
-PABSW   m4, m3
-psraw   m4, 1
-pxorm3, m7  ; d_sign ^ clip_sign
-psraw   m3, 15
-pminsw  m2, m4  ; min(d, clip)
-pcmpgtw m4, m5
-pandm6, m4  ; filt3 (C return value)
-
-; each set of 4 pixels is not filtered if the 3rd is not
-%if mmsize==16
-pshuflw m4, m6, 0xaa
-%if %1 > 4
-pshufhw m4, m4, 0xaa
-%endif
-%else
-pshufw  m4, m6, 0xaa
-%endif
-pandn   m3, m4
-pandm2, m6
-pandm3, m2  ; d final
-
-psraw   m7, 15
-pxorm3, m7
-psubw   m3, m7
-psubw   m0, m3
-paddw   m1, m3
-packuswb m0, m0
-packuswb m1, m1
-%endmacro
-
-; 1st param: size of filter
-; 2nd param: mov suffix equivalent to the filter size
-%macro VC1_V_LOOP_FILTER 2
-pxor  m5, m5
-mov%2 m6, [r4]
-mov%2 m4, [r4+r1]
-mov%2 m7, [r4+2*r1]
-mov%2 m0, [r4+r3]
-punpcklbw m6, m5
-punpcklbw m4, m5
-