"Ronald S. Bultje" <[email protected]> writes:
> From: Alexander Strange <[email protected]>
>
> ---
> libavcodec/dsputil.c | 19 ++++++++----
> libavcodec/dsputil.h | 4 ++-
> libavcodec/mpegvideo.c | 12 ++++++--
> libavcodec/snow.c | 12 ++++++--
> libavcodec/x86/dsputil_mmx.c | 63 ++++++++++++++++++++++-------------------
> 5 files changed, 67 insertions(+), 43 deletions(-)
>
> diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
> index 15925f6..13118d2 100644
> --- a/libavcodec/dsputil.c
> +++ b/libavcodec/dsputil.c
> @@ -298,7 +298,7 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2,
> int line_size, int h)
>
> /* draw the edges of width 'w' of an image of size width, height */
> //FIXME check that this is ok for mpeg4 interlaced
> -static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int
> w)
> +static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int
> w, int sides)
> {
> uint8_t *ptr, *last_line;
> int i;
> @@ -306,8 +306,8 @@ static void draw_edges_c(uint8_t *buf, int wrap, int
> width, int height, int w)
> last_line = buf + (height - 1) * wrap;
> for(i=0;i<w;i++) {
> /* top and bottom */
> - memcpy(buf - (i + 1) * wrap, buf, width);
> - memcpy(last_line + (i + 1) * wrap, last_line, width);
> + if (sides&EDGE_TOP) memcpy(buf - (i + 1) * wrap, buf, width);
> + if (sides&EDGE_BOTTOM) memcpy(last_line + (i + 1) * wrap, last_line,
> width);
> }
Maybe splitting this in two loops, one for top and one for bottom is
better. I'm not sure.
> /* left and right */
> ptr = buf;
> @@ -318,10 +318,15 @@ static void draw_edges_c(uint8_t *buf, int wrap, int
> width, int height, int w)
> }
> /* corners */
> for(i=0;i<w;i++) {
> - memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
> - memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right
> */
> - memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left
> */
> - memset(last_line + (i + 1) * wrap + width, last_line[width-1], w);
> /* top right */
> + if (sides&EDGE_TOP) {
> + memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
> + memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top
> right */
> + }
> +
> + if (sides&EDGE_BOTTOM) {
> + memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top
> left */
> + memset(last_line + (i + 1) * wrap + width, last_line[width-1],
> w); /* top right */
> + }
> }
Ditto.
> }
>
> diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
> index a5ae68a..99b3283 100644
> --- a/libavcodec/dsputil.h
> +++ b/libavcodec/dsputil.h
[...]
> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> index 66aba1b..1c5ff27 100644
> --- a/libavcodec/mpegvideo.c
> +++ b/libavcodec/mpegvideo.c
[...]
> diff --git a/libavcodec/snow.c b/libavcodec/snow.c
> index de5d2dc..42145f5 100644
> --- a/libavcodec/snow.c
> +++ b/libavcodec/snow.c
Changes to these files OK.
> diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
> index 4d1a305..f9165e3 100644
> --- a/libavcodec/x86/dsputil_mmx.c
> +++ b/libavcodec/x86/dsputil_mmx.c
> @@ -783,7 +783,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int
> stride, int qscale){
>
> /* draw the edges of width 'w' of an image of size width, height
> this mmx version can only handle w==8 || w==16 */
> -static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
> int w)
> +static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
> int w, int sides)
> {
> uint8_t *ptr, *last_line;
> int i;
> @@ -838,34 +838,39 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int
> width, int height, int w)
>
> for(i=0;i<w;i+=4) {
> /* top and bottom (and hopefully also the corners) */
> - ptr= buf - (i + 1) * wrap - w;
> - __asm__ volatile(
> - "1: \n\t"
> - "movq (%1, %0), %%mm0 \n\t"
> - "movq %%mm0, (%0) \n\t"
> - "movq %%mm0, (%0, %2) \n\t"
> - "movq %%mm0, (%0, %2, 2) \n\t"
> - "movq %%mm0, (%0, %3) \n\t"
> - "add $8, %0 \n\t"
> - "cmp %4, %0 \n\t"
> - " jb 1b \n\t"
> - : "+r" (ptr)
> - : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r"
> ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
> - );
> - ptr= last_line + (i + 1) * wrap - w;
> - __asm__ volatile(
> - "1: \n\t"
> - "movq (%1, %0), %%mm0 \n\t"
> - "movq %%mm0, (%0) \n\t"
> - "movq %%mm0, (%0, %2) \n\t"
> - "movq %%mm0, (%0, %2, 2) \n\t"
> - "movq %%mm0, (%0, %3) \n\t"
> - "add $8, %0 \n\t"
> - "cmp %4, %0 \n\t"
> - " jb 1b \n\t"
> - : "+r" (ptr)
> - : "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r"
> ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
> - );
> + if (sides&EDGE_TOP) {
> + ptr= buf - (i + 1) * wrap - w;
> + __asm__ volatile(
> + "1: \n\t"
> + "movq (%1, %0), %%mm0 \n\t"
> + "movq %%mm0, (%0) \n\t"
> + "movq %%mm0, (%0, %2) \n\t"
> + "movq %%mm0, (%0, %2, 2) \n\t"
> + "movq %%mm0, (%0, %3) \n\t"
> + "add $8, %0 \n\t"
> + "cmp %4, %0 \n\t"
> + " jb 1b \n\t"
> + : "+r" (ptr)
> + : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r"
> ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
> + );
> + }
> +
> + if (sides&EDGE_BOTTOM) {
> + ptr= last_line + (i + 1) * wrap - w;
> + __asm__ volatile(
> + "1: \n\t"
> + "movq (%1, %0), %%mm0 \n\t"
> + "movq %%mm0, (%0) \n\t"
> + "movq %%mm0, (%0, %2) \n\t"
> + "movq %%mm0, (%0, %2, 2) \n\t"
> + "movq %%mm0, (%0, %3) \n\t"
> + "add $8, %0 \n\t"
> + "cmp %4, %0 \n\t"
> + " jb 1b \n\t"
> + : "+r" (ptr)
> + : "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r"
> ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
> + );
> + }
> }
> }
Looks OK with the same remark as for the C code.
--
Måns Rullgård
[email protected]
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel