On Thu, Jun 14, 2012 at 03:03:09PM +0100, Mans Rullgard wrote:
> From: "Ronald S. Bultje" <rsbul...@gmail.com>
> 
> ---
>  libavcodec/dwt.c             |  109 
> +++++++++++++++++++++---------------------
>  libavcodec/dwt.h             |   18 +++----
>  libavcodec/snow.c            |    4 ++
>  libavcodec/snow.h            |    2 +
>  libavcodec/snowdec.c         |    2 +-
>  libavcodec/snowenc.c         |   12 ++---
>  libavcodec/x86/snowdsp_mmx.c |    6 +--
>  7 files changed, 79 insertions(+), 74 deletions(-)
> 
> diff --git a/libavcodec/dwt.c b/libavcodec/dwt.c
> index d3d4f3b..56e4a57 100644
> --- a/libavcodec/dwt.c
> +++ b/libavcodec/dwt.c
> @@ -243,9 +243,8 @@ static av_always_inline void inv_liftS(IDWTELEM *dst, 
> IDWTELEM *src,
>  }
>  #endif /* ! liftS */
>  
> -static void horizontal_decompose53i(DWTELEM *b, int width)
> +static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width)
>  {
> -    DWTELEM temp[width];
>      const int width2 = width >> 1;
>      int x;
>      const int w2 = (width + 1) >> 1;
> @@ -311,8 +310,8 @@ static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM 
> *b1, DWTELEM *b2,
>          b1[i] += (b0[i] + b2[i] + 2) >> 2;
>  }
>  
> -static void spatial_decompose53i(DWTELEM *buffer, int width, int height,
> -                                 int stride)
> +static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp,
> +                                 int width, int height, int stride)
>  {
>      int y;
>      DWTELEM *b0 = buffer + mirror(-2 - 1, height - 1) * stride;
> @@ -323,9 +322,9 @@ static void spatial_decompose53i(DWTELEM *buffer, int 
> width, int height,
>          DWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride;
>  
>          if (y + 1 < (unsigned)height)
> -            horizontal_decompose53i(b2, width);
> +            horizontal_decompose53i(b2, temp, width);
>          if (y + 2 < (unsigned)height)
> -            horizontal_decompose53i(b3, width);
> +            horizontal_decompose53i(b3, temp, width);
>  
>          if (y + 1 < (unsigned)height)
>              vertical_decompose53iH0(b1, b2, b3, width);
> @@ -337,9 +336,8 @@ static void spatial_decompose53i(DWTELEM *buffer, int 
> width, int height,
>      }
>  }
>  
> -static void horizontal_decompose97i(DWTELEM *b, int width)
> +static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width)
>  {
> -    DWTELEM temp[width];
>      const int w2 = (width + 1) >> 1;
>  
>      lift(temp + w2, b + 1, b,         1, 2, 2, width, W_AM, W_AO, W_AS, 1, 
> 1);
> @@ -389,8 +387,8 @@ static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM 
> *b1, DWTELEM *b2,
>          b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
>  }
>  
> -static void spatial_decompose97i(DWTELEM *buffer, int width, int height,
> -                                 int stride)
> +static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp,
> +                                 int width, int height, int stride)
>  {
>      int y;
>      DWTELEM *b0 = buffer + mirror(-4 - 1, height - 1) * stride;
> @@ -403,9 +401,9 @@ static void spatial_decompose97i(DWTELEM *buffer, int 
> width, int height,
>          DWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride;
>  
>          if (y + 3 < (unsigned)height)
> -            horizontal_decompose97i(b4, width);
> +            horizontal_decompose97i(b4, temp, width);
>          if (y + 4 < (unsigned)height)
> -            horizontal_decompose97i(b5, width);
> +            horizontal_decompose97i(b5, temp, width);
>  
>          if (y + 3 < (unsigned)height)
>              vertical_decompose97iH0(b3, b4, b5, width);
> @@ -423,20 +421,20 @@ static void spatial_decompose97i(DWTELEM *buffer, int 
> width, int height,
>      }
>  }
>  
> -void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride,
> -                    int type, int decomposition_count)
> +void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height,
> +                    int stride, int type, int decomposition_count)
>  {
>      int level;
>  
>      for (level = 0; level < decomposition_count; level++) {
>          switch (type) {
>          case DWT_97:
> -            spatial_decompose97i(buffer,
> +            spatial_decompose97i(buffer, temp,
>                                   width >> level, height >> level,
>                                   stride << level);
>              break;
>          case DWT_53:
> -            spatial_decompose53i(buffer,
> +            spatial_decompose53i(buffer, temp,
>                                   width >> level, height >> level,
>                                   stride << level);
>              break;
> @@ -444,9 +442,8 @@ void ff_spatial_dwt(DWTELEM *buffer, int width, int 
> height, int stride,
>      }
>  }
>  
> -static void horizontal_compose53i(IDWTELEM *b, int width)
> +static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width)
>  {
> -    IDWTELEM temp[width];
>      const int width2 = width >> 1;
>      const int w2     = (width + 1) >> 1;
>      int x;
> @@ -506,6 +503,7 @@ static void spatial_compose53i_init(DWTCompose *cs, 
> IDWTELEM *buffer,
>  }
>  
>  static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb,
> +                                           IDWTELEM *temp,
>                                             int width, int height,
>                                             int stride_line)
>  {
> @@ -535,17 +533,18 @@ static void spatial_compose53i_dy_buffered(DWTCompose 
> *cs, slice_buffer *sb,
>      }
>  
>      if (y - 1 < (unsigned)height)
> -        horizontal_compose53i(b0, width);
> +        horizontal_compose53i(b0, temp, width);
>      if (y + 0 < (unsigned)height)
> -        horizontal_compose53i(b1, width);
> +        horizontal_compose53i(b1, temp, width);
>  
>      cs->b0  = b2;
>      cs->b1  = b3;
>      cs->y  += 2;
>  }
>  
> -static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int 
> width,
> -                                  int height, int stride)
> +static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer,
> +                                  IDWTELEM *temp, int width, int height,
> +                                  int stride)
>  {
>      int y        = cs->y;
>      IDWTELEM *b0 = cs->b0;
> @@ -559,27 +558,26 @@ static void spatial_compose53i_dy(DWTCompose *cs, 
> IDWTELEM *buffer, int width,
>          vertical_compose53iH0(b0, b1, b2, width);
>  
>      if (y - 1 < (unsigned)height)
> -        horizontal_compose53i(b0, width);
> +        horizontal_compose53i(b0, temp, width);
>      if (y + 0 < (unsigned)height)
> -        horizontal_compose53i(b1, width);
> +        horizontal_compose53i(b1, temp, width);
>  
>      cs->b0  = b2;
>      cs->b1  = b3;
>      cs->y  += 2;
>  }
>  
> -static void av_unused spatial_compose53i(IDWTELEM *buffer, int width,
> -                                         int height, int stride)
> +static void av_unused spatial_compose53i(IDWTELEM *buffer, IDWTELEM *temp,
> +                                         int width, int height, int stride)
>  {
>      DWTCompose cs;
>      spatial_compose53i_init(&cs, buffer, height, stride);
>      while (cs.y <= height)
> -        spatial_compose53i_dy(&cs, buffer, width, height, stride);
> +        spatial_compose53i_dy(&cs, buffer, temp, width, height, stride);
>  }
>  
> -void ff_snow_horizontal_compose97i(IDWTELEM *b, int width)
> +void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width)
>  {
> -    IDWTELEM temp[width];
>      const int w2 = (width + 1) >> 1;
>  
>  #if 0 //maybe more understadable but slower
> @@ -693,8 +691,9 @@ static void spatial_compose97i_init(DWTCompose *cs, 
> IDWTELEM *buffer, int height
>  }
>  
>  static void spatial_compose97i_dy_buffered(DWTContext *dsp, DWTCompose *cs,
> -                                           slice_buffer *sb, int width,
> -                                           int height, int stride_line)
> +                                           slice_buffer * sb, IDWTELEM *temp,
> +                                           int width, int height,
> +                                           int stride_line)
>  {
>      int y = cs->y;
>  
> @@ -723,9 +722,9 @@ static void spatial_compose97i_dy_buffered(DWTContext 
> *dsp, DWTCompose *cs,
>      }
>  
>      if (y - 1 < (unsigned)height)
> -        dsp->horizontal_compose97i(b0, width);
> +        dsp->horizontal_compose97i(b0, temp, width);
>      if (y + 0 < (unsigned)height)
> -        dsp->horizontal_compose97i(b1, width);
> +        dsp->horizontal_compose97i(b1, temp, width);
>  
>      cs->b0  = b2;
>      cs->b1  = b3;
> @@ -734,8 +733,9 @@ static void spatial_compose97i_dy_buffered(DWTContext 
> *dsp, DWTCompose *cs,
>      cs->y  += 2;
>  }
>  
> -static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int 
> width,
> -                                  int height, int stride)
> +static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer,
> +                                  IDWTELEM *temp, int width, int height,
> +                                  int stride)
>  {
>      int y        = cs->y;
>      IDWTELEM *b0 = cs->b0;
> @@ -755,9 +755,9 @@ static void spatial_compose97i_dy(DWTCompose *cs, 
> IDWTELEM *buffer, int width,
>          vertical_compose97iH0(b0, b1, b2, width);
>  
>      if (y - 1 < (unsigned)height)
> -        ff_snow_horizontal_compose97i(b0, width);
> +        ff_snow_horizontal_compose97i(b0, temp, width);
>      if (y + 0 < (unsigned)height)
> -        ff_snow_horizontal_compose97i(b1, width);
> +        ff_snow_horizontal_compose97i(b1, temp, width);
>  
>      cs->b0  = b2;
>      cs->b1  = b3;
> @@ -766,13 +766,13 @@ static void spatial_compose97i_dy(DWTCompose *cs, 
> IDWTELEM *buffer, int width,
>      cs->y  += 2;
>  }
>  
> -static void av_unused spatial_compose97i(IDWTELEM *buffer, int width,
> -                                         int height, int stride)
> +static void av_unused spatial_compose97i(IDWTELEM *buffer, IDWTELEM *temp,
> +                                         int width, int height, int stride)
>  {
>      DWTCompose cs;
>      spatial_compose97i_init(&cs, buffer, height, stride);
>      while (cs.y <= height)
> -        spatial_compose97i_dy(&cs, buffer, width, height, stride);
> +        spatial_compose97i_dy(&cs, buffer, temp, width, height, stride);
>  }
>  
>  void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int 
> width,
> @@ -795,9 +795,9 @@ void ff_spatial_idwt_buffered_init(DWTCompose *cs, 
> slice_buffer *sb, int width,
>  }
>  
>  void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs,
> -                                    slice_buffer *slice_buf, int width,
> -                                    int height, int stride_line, int type,
> -                                    int decomposition_count, int y)
> +                                    slice_buffer *slice_buf, IDWTELEM *temp,
> +                                    int width, int height, int stride_line,
> +                                    int type, int decomposition_count, int y)
>  {
>      const int support = type == 1 ? 3 : 5;
>      int level;
> @@ -808,13 +808,13 @@ void ff_spatial_idwt_buffered_slice(DWTContext *dsp, 
> DWTCompose *cs,
>          while (cs[level].y <= FFMIN((y >> level) + support, height >> 
> level)) {
>              switch (type) {
>              case DWT_97:
> -                spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf,
> +                spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, 
> temp,
>                                                 width >> level,
>                                                 height >> level,
>                                                 stride_line << level);
>                  break;
>              case DWT_53:
> -                spatial_compose53i_dy_buffered(cs + level, slice_buf,
> +                spatial_compose53i_dy_buffered(cs + level, slice_buf, temp,
>                                                 width >> level,
>                                                 height >> level,
>                                                 stride_line << level);
> @@ -842,8 +842,9 @@ static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM 
> *buffer, int width,
>      }
>  }
>  
> -static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int 
> width,
> -                                  int height, int stride, int type,
> +static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer,
> +                                  IDWTELEM *temp, int width, int height,
> +                                  int stride, int type,
>                                    int decomposition_count, int y)
>  {
>      const int support = type == 1 ? 3 : 5;
> @@ -855,26 +856,26 @@ static void ff_spatial_idwt_slice(DWTCompose *cs, 
> IDWTELEM *buffer, int width,
>          while (cs[level].y <= FFMIN((y >> level) + support, height >> 
> level)) {
>              switch (type) {
>              case DWT_97:
> -                spatial_compose97i_dy(cs + level, buffer, width >> level,
> +                spatial_compose97i_dy(cs + level, buffer, temp, width >> 
> level,
>                                        height >> level, stride << level);
>                  break;
>              case DWT_53:
> -                spatial_compose53i_dy(cs + level, buffer, width >> level,
> +                spatial_compose53i_dy(cs + level, buffer, temp, width >> 
> level,
>                                        height >> level, stride << level);
>                  break;
>              }
>          }
>  }
>  
> -void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride,
> -                     int type, int decomposition_count)
> +void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
> +                     int stride, int type, int decomposition_count)
>  {
>      DWTCompose cs[MAX_DECOMPOSITIONS];
>      int y;
>      ff_spatial_idwt_init(cs, buffer, width, height, stride, type,
>                           decomposition_count);
>      for (y = 0; y < height; y += 4)
> -        ff_spatial_idwt_slice(cs, buffer, width, height, stride, type,
> +        ff_spatial_idwt_slice(cs, buffer, temp, width, height, stride, type,
>                                decomposition_count, y);
>  }
>  
> @@ -883,7 +884,7 @@ static inline int w_c(void *v, uint8_t *pix1, uint8_t 
> *pix2, int line_size,
>  {
>      int s, i, j;
>      const int dec_count = w == 8 ? 3 : 4;
> -    int tmp[32 * 32];
> +    int tmp[32 * 32], tmp2[32];
>      int level, ori;
>      static const int scale[2][2][4][4] = {
>          {
> @@ -925,7 +926,7 @@ static inline int w_c(void *v, uint8_t *pix1, uint8_t 
> *pix2, int line_size,
>          pix2 += line_size;
>      }
>  
> -    ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
> +    ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count);
>  
>      s = 0;
>      assert(w == h);
> diff --git a/libavcodec/dwt.h b/libavcodec/dwt.h
> index 9229928..771a9bf 100644
> --- a/libavcodec/dwt.h
> +++ b/libavcodec/dwt.h
> @@ -50,7 +50,7 @@ typedef struct DWTContext {
>      void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
>                                  IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
>                                  int width);
> -    void (*horizontal_compose97i)(IDWTELEM *b, int width);
> +    void (*horizontal_compose97i)(IDWTELEM *b, IDWTELEM *temp, int width);
>      void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride,
>                               uint8_t **block, int b_w, int b_h, int src_x,
>                               int src_y, int src_stride, slice_buffer *sb,
> @@ -148,7 +148,7 @@ IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, 
> int line);
>  void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
>                                   IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
>                                   int width);
> -void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
> +void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width);
>  void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride,
>                                uint8_t **block, int b_w, int b_h, int src_x,
>                                int src_y, int src_stride, slice_buffer *sb,
> @@ -157,18 +157,18 @@ void ff_snow_inner_add_yblock(const uint8_t *obmc, 
> const int obmc_stride,
>  int ff_w53_32_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
>  int ff_w97_32_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
>  
> -void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type,
> -                    int decomposition_count);
> +void ff_spatial_dwt(int *buffer, int *temp, int width, int height, int 
> stride,
> +                    int type, int decomposition_count);
>  
>  void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int 
> width,
>                                     int height, int stride_line, int type,
>                                     int decomposition_count);
>  void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs,
> -                                    slice_buffer *slice_buf, int width,
> -                                    int height, int stride_line, int type,
> -                                    int decomposition_count, int y);
> -void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride,
> -                     int type, int decomposition_count);
> +                                    slice_buffer *slice_buf, IDWTELEM *temp,
> +                                    int width, int height, int stride_line,
> +                                    int type, int decomposition_count, int 
> y);
> +void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
> +                     int stride, int type, int decomposition_count);
>  
>  void ff_dwt_init(DWTContext *c);
>  void ff_dwt_init_x86(DWTContext *c);
> diff --git a/libavcodec/snow.c b/libavcodec/snow.c
> index 384cda8..edd7d07 100644
> --- a/libavcodec/snow.c
> +++ b/libavcodec/snow.c
> @@ -440,6 +440,8 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){
>  
>      s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
>      s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME 
> this does not belong here
> +    s->temp_dwt_buffer = av_mallocz(width * sizeof(DWTELEM));
> +    s->temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));

Uhm, are any of these checked?

The patch looks OK though.
_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to