vlc | branch: master | Rémi Denis-Courmont <[email protected]> | Thu Jul 7 20:21:09 2011 +0300| [d7472f3aa1234089237fed9ae52e37b5c92d4133] | committer: Rémi Denis-Courmont
Clean up NEON chroma converter - do not assume output pitch equals (double) pixel width - improve function prototypes - hand zero-width or zero-height corner cases in ASM (totally useless) - use ARM conditon flag (opS) as appropriate > http://git.videolan.org/gitweb.cgi/vlc.git/?a=commit;h=d7472f3aa1234089237fed9ae52e37b5c92d4133 --- modules/arm_neon/Modules.am | 2 +- modules/arm_neon/chroma_neon.h | 48 ++++++++++++ modules/arm_neon/{i420_yuy2.c => chroma_yuv.c} | 59 ++++++-------- modules/arm_neon/i420_yuyv.S | 98 +++++++++++------------- 4 files changed, 118 insertions(+), 89 deletions(-) diff --git a/modules/arm_neon/Modules.am b/modules/arm_neon/Modules.am index 5b0748c..93500b7 100644 --- a/modules/arm_neon/Modules.am +++ b/modules/arm_neon/Modules.am @@ -11,7 +11,7 @@ libaudio_format_neon_plugin_la_DEPENDENCIES = libi420_yuy2_neon_plugin_la_SOURCES = \ i420_yuyv.S \ - i420_yuy2.c + chroma_yuv.c chroma_neon.h libi420_yuy2_neon_plugin_la_CFLAGS = $(AM_CFLAGS) libi420_yuy2_neon_plugin_la_LIBADD = $(AM_LIBADD) libi420_yuy2_neon_plugin_la_DEPENDENCIES = diff --git a/modules/arm_neon/chroma_neon.h b/modules/arm_neon/chroma_neon.h new file mode 100644 index 0000000..40bfbcc --- /dev/null +++ b/modules/arm_neon/chroma_neon.h @@ -0,0 +1,48 @@ +/***************************************************************************** + * chroma_neon.h + ***************************************************************************** + * Copyright (C) 2011 Rémi Denis-Courmont + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. + *****************************************************************************/ + +/* Planes must start on a 16-bytes boundary. Pitches must be multiples of 16 + * bytes even for subsampled components. */ + +/* Planar picture buffer. + * Pitch corresponds to luminance component in bytes. Chrominance pitches are + * inferred from the color subsampling ratio. */ +struct yuv_planes +{ + void *y, *u, *v; + size_t pitch; +}; + +/* Packed picture buffer. Pitch is in bytes (_not_ pixels). */ +struct yuv_pack +{ + void *yuv; + size_t pitch; +}; + +/* I420 to YUYV conversion. */ +void i420_yuyv_neon (struct yuv_pack *const out, + const struct yuv_planes *const in, + int width, int height); + +/* I420 to UYVY conversion. */ +void i420_uyvy_neon (struct yuv_pack *const out, + const struct yuv_planes *const in, + int width, int height); diff --git a/modules/arm_neon/i420_yuy2.c b/modules/arm_neon/chroma_yuv.c similarity index 67% rename from modules/arm_neon/i420_yuy2.c rename to modules/arm_neon/chroma_yuv.c index 5cc9907..0dc66ed 100644 --- a/modules/arm_neon/i420_yuy2.c +++ b/modules/arm_neon/chroma_yuv.c @@ -26,6 +26,7 @@ #include <vlc_plugin.h> #include <vlc_filter.h> #include <vlc_cpu.h> +#include "chroma_neon.h" static int Open (vlc_object_t *); @@ -35,58 +36,48 @@ vlc_module_begin () set_callbacks (Open, NULL) vlc_module_end () -void i420_yuyv_neon (uint8_t *out, const uint8_t **in, - unsigned int pitch, unsigned int s_off, - unsigned int height); +#define DEFINE_PACK(pack, pict) \ + struct yuv_pack pack = { (pict)->Y_PIXELS, (pict)->Y_PITCH } +#define DEFINE_PLANES(planes, pict) \ + struct yuv_planes planes = { \ + (pict)->Y_PIXELS, (pict)->U_PIXELS, (pict)->V_PIXELS, (pict)->Y_PITCH } +#define DEFINE_PLANES_SWAP(planes, pict) \ + struct yuv_planes planes = { \ + (pict)->Y_PIXELS, (pict)->V_PIXELS, (pict)->U_PIXELS, (pict)->Y_PITCH } static void I420_YUYV (filter_t *filter, picture_t *src, picture_t *dst) { - uint8_t *out = dst->p->p_pixels; - const uint8_t *yuv[3] = { src->Y_PIXELS, src->U_PIXELS, src->V_PIXELS, }; - size_t height = filter->fmt_in.video.i_height; - int i_pitch = (dst->p->i_pitch >> 1) & ~0xF; - int s_offset = src->p->i_pitch - i_pitch; - - i420_yuyv_neon (out, yuv, i_pitch, s_offset, height); + DEFINE_PACK(out, dst); + DEFINE_PLANES(in, src); + i420_yuyv_neon (&out, &in, filter->fmt_in.video.i_width, + filter->fmt_in.video.i_height); } VIDEO_FILTER_WRAPPER (I420_YUYV) static void YV12_YUYV (filter_t *filter, picture_t *src, picture_t *dst) { - uint8_t *out = dst->p->p_pixels; - const uint8_t *yuv[3] = { src->Y_PIXELS, src->V_PIXELS, src->U_PIXELS, }; - size_t height = filter->fmt_in.video.i_height; - int i_pitch = (dst->p->i_pitch >> 1) & ~0xF; - int s_offset = src->p->i_pitch - i_pitch; - - i420_yuyv_neon (out, yuv, i_pitch, s_offset, height); + DEFINE_PACK(out, dst); + DEFINE_PLANES_SWAP(in, src); + i420_yuyv_neon (&out, &in, filter->fmt_in.video.i_width, + filter->fmt_in.video.i_height); } VIDEO_FILTER_WRAPPER (YV12_YUYV) -void i420_uyvy_neon (uint8_t *out, const uint8_t **in, - uintptr_t pitch, uintptr_t s_off, uintptr_t height); - static void I420_UYVY (filter_t *filter, picture_t *src, picture_t *dst) { - uint8_t *out = dst->p->p_pixels; - const uint8_t *yuv[3] = { src->Y_PIXELS, src->U_PIXELS, src->V_PIXELS, }; - size_t height = filter->fmt_in.video.i_height; - int i_pitch = (dst->p->i_pitch >> 1) & ~0xF; - int s_offset = src->p->i_pitch - i_pitch; - - i420_uyvy_neon (out, yuv, i_pitch, s_offset, height); + DEFINE_PACK(out, dst); + DEFINE_PLANES(in, src); + i420_uyvy_neon (&out, &in, filter->fmt_in.video.i_width, + filter->fmt_in.video.i_height); } VIDEO_FILTER_WRAPPER (I420_UYVY) static void YV12_UYVY (filter_t *filter, picture_t *src, picture_t *dst) { - uint8_t *out = dst->p->p_pixels; - const uint8_t *yuv[3] = { src->Y_PIXELS, src->V_PIXELS, src->U_PIXELS, }; - size_t height = filter->fmt_in.video.i_height; - int i_pitch = (dst->p->i_pitch >> 1) & ~0xF; - int s_offset = src->p->i_pitch - i_pitch; - - i420_uyvy_neon (out, yuv, i_pitch, s_offset, height); + DEFINE_PACK(out, dst); + DEFINE_PLANES_SWAP(in, src); + i420_uyvy_neon (&out, &in, filter->fmt_in.video.i_width, + filter->fmt_in.video.i_height); } VIDEO_FILTER_WRAPPER (YV12_UYVY) diff --git a/modules/arm_neon/i420_yuyv.S b/modules/arm_neon/i420_yuyv.S index 556680f..67c3043 100644 --- a/modules/arm_neon/i420_yuyv.S +++ b/modules/arm_neon/i420_yuyv.S @@ -1,7 +1,7 @@ @***************************************************************************** @ i420_yuyv_neon.S : ARM NEONv1 I420 to YUYV chroma conversion @***************************************************************************** - @ Copyright (C) 2009 Rémi Denis-Courmont + @ Copyright (C) 2009-2011 Rémi Denis-Courmont @ @ This program is free software; you can redistribute it and/or modify @ it under the terms of the GNU General Public License as published by @@ -23,28 +23,33 @@ #define O1 r0 #define O2 r1 -#define PITCH r2 -#define S_OFF r3 +#define WIDTH r2 +#define HEIGHT r3 #define Y1 r4 #define Y2 r5 #define U r6 #define V r7 -#define HEIGHT r8 -#define END_O1 r12 +#define YPITCH r8 +#define OPAD r10 +#define YPAD r11 +#define COUNT ip +#define OPITCH lr .align .global i420_yuyv_neon .type i420_yuyv_neon, %function i420_yuyv_neon: - push {r4-r8, lr} - ldr HEIGHT, [sp, #(4*6)] - ldmia r1, {Y1, U, V} - add O2, O1, PITCH, lsl #1 - add Y2, Y1, PITCH - add Y2, S_OFF + push {r4-r8,r10-r11,lr} + ldmia r0, {O1, OPITCH} + ldmia r1, {Y1, U, V, YPITCH} + cmp HEIGHT, #0 + sub OPAD, OPITCH, WIDTH, lsl #1 + sub YPAD, YPITCH, WIDTH 1: - mov END_O1, O2 - pld [Y2] + movgts COUNT, WIDTH + add O2, O1, OPITCH + add Y2, Y1, YPITCH + pople {r4-r8,r10-r11,pc} 2: pld [U, #64] vld1.u8 {d2}, [U,:64]! @@ -52,6 +57,7 @@ i420_yuyv_neon: vld1.u8 {d3}, [V,:64]! pld [Y1, #64] vzip.u8 d2, d3 + subs COUNT, COUNT, #16 vld1.u8 {q0}, [Y1,:128]! pld [Y2, #64] vmov q3, q1 @@ -60,36 +66,29 @@ i420_yuyv_neon: vzip.u8 q2, q3 vst1.u8 {q0-q1}, [O1,:128]! vst1.u8 {q2-q3}, [O2,:128]! + bgt 2b - cmp O1, END_O1 - bne 2b - - sub HEIGHT, #2 - mov O1, O2 - add O2, PITCH, lsl #1 - add Y2, S_OFF - mov Y1, Y2 - add Y2, PITCH - add Y2, S_OFF - add U, S_OFF, lsr #1 - add V, S_OFF, lsr #1 - - cmp HEIGHT, #0 - bne 1b - - pop {r4-r8, pc} + subs HEIGHT, #2 + add O1, O2, OPAD + add Y1, Y2, YPAD + add U, U, YPAD, lsr #1 + add V, V, YPAD, lsr #1 + b 1b .global i420_uyvy_neon .type i420_uyvy_neon, %function i420_uyvy_neon: - push {r4-r8, lr} - ldr HEIGHT, [sp, #(4*6)] - ldmia r1, {Y1, U, V} - add O2, O1, PITCH, lsl #1 - add Y2, Y1, PITCH - add Y2, S_OFF + push {r4-r8,r10-r11,lr} + ldmia r0, {O1, OPITCH} + ldmia r1, {Y1, U, V, YPITCH} + cmp HEIGHT, #0 + sub OPAD, OPITCH, WIDTH, lsl #1 + sub YPAD, YPITCH, WIDTH 1: - mov END_O1, O2 + movgts COUNT, WIDTH + add O2, O1, OPITCH + add Y2, Y1, YPITCH + pople {r4-r8,r10-r11,pc} 2: pld [U, #64] vld1.u8 {d0}, [U,:64]! @@ -97,6 +96,7 @@ i420_uyvy_neon: vld1.u8 {d1}, [V,:64]! pld [Y1, #64] vzip.u8 d0, d1 + subs COUNT, COUNT, #16 vld1.u8 {q1}, [Y1,:128]! pld [Y2, #64] vmov q2, q0 @@ -105,21 +105,11 @@ i420_uyvy_neon: vzip.u8 q2, q3 vst1.u8 {q0-q1}, [O1,:128]! vst1.u8 {q2-q3}, [O2,:128]! + bgt 2b - cmp O1, END_O1 - bne 2b - - sub HEIGHT, #2 - mov O1, O2 - add O2, PITCH, lsl #1 - add Y2, S_OFF - mov Y1, Y2 - add Y2, PITCH - add Y2, S_OFF - add U, S_OFF, lsr #1 - add V, S_OFF, lsr #1 - - cmp HEIGHT, #0 - bne 1b - - pop {r4-r8, pc} + subs HEIGHT, #2 + add O1, O2, OPAD + add Y1, Y2, YPAD + add U, U, YPAD, lsr #1 + add V, V, YPAD, lsr #1 + b 1b _______________________________________________ vlc-commits mailing list [email protected] http://mailman.videolan.org/listinfo/vlc-commits
