"Ronald S. Bultje" <[email protected]> writes:

> From: Alexander Strange <[email protected]>
>
> ---
>  libavcodec/dsputil.c         |   19 ++++++++----
>  libavcodec/dsputil.h         |    4 ++-
>  libavcodec/mpegvideo.c       |   12 ++++++--
>  libavcodec/snow.c            |   12 ++++++--
>  libavcodec/x86/dsputil_mmx.c |   63 ++++++++++++++++++++++-------------------
>  5 files changed, 67 insertions(+), 43 deletions(-)
>
> diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
> index 15925f6..13118d2 100644
> --- a/libavcodec/dsputil.c
> +++ b/libavcodec/dsputil.c
> @@ -298,7 +298,7 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, 
> int line_size, int h)
>  
>  /* draw the edges of width 'w' of an image of size width, height */
>  //FIXME check that this is ok for mpeg4 interlaced
> -static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int 
> w)
> +static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int 
> w, int sides)
>  {
>      uint8_t *ptr, *last_line;
>      int i;
> @@ -306,8 +306,8 @@ static void draw_edges_c(uint8_t *buf, int wrap, int 
> width, int height, int w)
>      last_line = buf + (height - 1) * wrap;
>      for(i=0;i<w;i++) {
>          /* top and bottom */
> -        memcpy(buf - (i + 1) * wrap, buf, width);
> -        memcpy(last_line + (i + 1) * wrap, last_line, width);
> +        if (sides&EDGE_TOP)    memcpy(buf - (i + 1) * wrap, buf, width);
> +        if (sides&EDGE_BOTTOM) memcpy(last_line + (i + 1) * wrap, last_line, 
> width);
>      }

Maybe splitting this in two loops, one for top and one for bottom is
better.  I'm not sure.

>      /* left and right */
>      ptr = buf;
> @@ -318,10 +318,15 @@ static void draw_edges_c(uint8_t *buf, int wrap, int 
> width, int height, int w)
>      }
>      /* corners */
>      for(i=0;i<w;i++) {
> -        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
> -        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right 
> */
> -        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left 
> */
> -        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); 
> /* top right */
> +        if (sides&EDGE_TOP) {
> +            memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
> +            memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top 
> right */
> +        }
> +
> +        if (sides&EDGE_BOTTOM) {
> +            memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top 
> left */
> +            memset(last_line + (i + 1) * wrap + width, last_line[width-1], 
> w); /* top right */
> +        }
>      }

Ditto.

>  }
>  
> diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
> index a5ae68a..99b3283 100644
> --- a/libavcodec/dsputil.h
> +++ b/libavcodec/dsputil.h

[...]

> diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
> index 66aba1b..1c5ff27 100644
> --- a/libavcodec/mpegvideo.c
> +++ b/libavcodec/mpegvideo.c

[...]

> diff --git a/libavcodec/snow.c b/libavcodec/snow.c
> index de5d2dc..42145f5 100644
> --- a/libavcodec/snow.c
> +++ b/libavcodec/snow.c

Changes to these files OK.

> diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
> index 4d1a305..f9165e3 100644
> --- a/libavcodec/x86/dsputil_mmx.c
> +++ b/libavcodec/x86/dsputil_mmx.c
> @@ -783,7 +783,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int 
> stride, int qscale){
>  
>  /* draw the edges of width 'w' of an image of size width, height
>     this mmx version can only handle w==8 || w==16 */
> -static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, 
> int w)
> +static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, 
> int w, int sides)
>  {
>      uint8_t *ptr, *last_line;
>      int i;
> @@ -838,34 +838,39 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int 
> width, int height, int w)
>  
>      for(i=0;i<w;i+=4) {
>          /* top and bottom (and hopefully also the corners) */
> -        ptr= buf - (i + 1) * wrap - w;
> -        __asm__ volatile(
> -                "1:                             \n\t"
> -                "movq (%1, %0), %%mm0           \n\t"
> -                "movq %%mm0, (%0)               \n\t"
> -                "movq %%mm0, (%0, %2)           \n\t"
> -                "movq %%mm0, (%0, %2, 2)        \n\t"
> -                "movq %%mm0, (%0, %3)           \n\t"
> -                "add $8, %0                     \n\t"
> -                "cmp %4, %0                     \n\t"
> -                " jb 1b                         \n\t"
> -                : "+r" (ptr)
> -                : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" 
> ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
> -        );
> -        ptr= last_line + (i + 1) * wrap - w;
> -        __asm__ volatile(
> -                "1:                             \n\t"
> -                "movq (%1, %0), %%mm0           \n\t"
> -                "movq %%mm0, (%0)               \n\t"
> -                "movq %%mm0, (%0, %2)           \n\t"
> -                "movq %%mm0, (%0, %2, 2)        \n\t"
> -                "movq %%mm0, (%0, %3)           \n\t"
> -                "add $8, %0                     \n\t"
> -                "cmp %4, %0                     \n\t"
> -                " jb 1b                         \n\t"
> -                : "+r" (ptr)
> -                : "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r" 
> ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
> -        );
> +        if (sides&EDGE_TOP) {
> +            ptr= buf - (i + 1) * wrap - w;
> +            __asm__ volatile(
> +                    "1:                             \n\t"
> +                    "movq (%1, %0), %%mm0           \n\t"
> +                    "movq %%mm0, (%0)               \n\t"
> +                    "movq %%mm0, (%0, %2)           \n\t"
> +                    "movq %%mm0, (%0, %2, 2)        \n\t"
> +                    "movq %%mm0, (%0, %3)           \n\t"
> +                    "add $8, %0                     \n\t"
> +                    "cmp %4, %0                     \n\t"
> +                    " jb 1b                         \n\t"
> +                    : "+r" (ptr)
> +                    : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" 
> ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
> +            );
> +        }
> +
> +        if (sides&EDGE_BOTTOM) {
> +            ptr= last_line + (i + 1) * wrap - w;
> +            __asm__ volatile(
> +                    "1:                             \n\t"
> +                    "movq (%1, %0), %%mm0           \n\t"
> +                    "movq %%mm0, (%0)               \n\t"
> +                    "movq %%mm0, (%0, %2)           \n\t"
> +                    "movq %%mm0, (%0, %2, 2)        \n\t"
> +                    "movq %%mm0, (%0, %3)           \n\t"
> +                    "add $8, %0                     \n\t"
> +                    "cmp %4, %0                     \n\t"
> +                    " jb 1b                         \n\t"
> +                    : "+r" (ptr)
> +                    : "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r" 
> ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
> +            );
> +        }
>      }
>  }

Looks OK with the same remark as for the C code.

-- 
Måns Rullgård
[email protected]
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to