On Thu, Jun 14, 2012 at 03:03:09PM +0100, Mans Rullgard wrote: > From: "Ronald S. Bultje" <rsbul...@gmail.com> > > --- > libavcodec/dwt.c | 109 > +++++++++++++++++++++--------------------- > libavcodec/dwt.h | 18 +++---- > libavcodec/snow.c | 4 ++ > libavcodec/snow.h | 2 + > libavcodec/snowdec.c | 2 +- > libavcodec/snowenc.c | 12 ++--- > libavcodec/x86/snowdsp_mmx.c | 6 +-- > 7 files changed, 79 insertions(+), 74 deletions(-) > > diff --git a/libavcodec/dwt.c b/libavcodec/dwt.c > index d3d4f3b..56e4a57 100644 > --- a/libavcodec/dwt.c > +++ b/libavcodec/dwt.c > @@ -243,9 +243,8 @@ static av_always_inline void inv_liftS(IDWTELEM *dst, > IDWTELEM *src, > } > #endif /* ! liftS */ > > -static void horizontal_decompose53i(DWTELEM *b, int width) > +static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width) > { > - DWTELEM temp[width]; > const int width2 = width >> 1; > int x; > const int w2 = (width + 1) >> 1; > @@ -311,8 +310,8 @@ static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM > *b1, DWTELEM *b2, > b1[i] += (b0[i] + b2[i] + 2) >> 2; > } > > -static void spatial_decompose53i(DWTELEM *buffer, int width, int height, > - int stride) > +static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp, > + int width, int height, int stride) > { > int y; > DWTELEM *b0 = buffer + mirror(-2 - 1, height - 1) * stride; > @@ -323,9 +322,9 @@ static void spatial_decompose53i(DWTELEM *buffer, int > width, int height, > DWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride; > > if (y + 1 < (unsigned)height) > - horizontal_decompose53i(b2, width); > + horizontal_decompose53i(b2, temp, width); > if (y + 2 < (unsigned)height) > - horizontal_decompose53i(b3, width); > + horizontal_decompose53i(b3, temp, width); > > if (y + 1 < (unsigned)height) > vertical_decompose53iH0(b1, b2, b3, width); > @@ -337,9 +336,8 @@ static void spatial_decompose53i(DWTELEM *buffer, int > width, int height, > } > } > > -static void horizontal_decompose97i(DWTELEM *b, int width) > +static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width) > { > - DWTELEM temp[width]; > const int w2 = (width + 1) >> 1; > > lift(temp + w2, b + 1, b, 1, 2, 2, width, W_AM, W_AO, W_AS, 1, > 1); > @@ -389,8 +387,8 @@ static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM > *b1, DWTELEM *b2, > b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS; > } > > -static void spatial_decompose97i(DWTELEM *buffer, int width, int height, > - int stride) > +static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp, > + int width, int height, int stride) > { > int y; > DWTELEM *b0 = buffer + mirror(-4 - 1, height - 1) * stride; > @@ -403,9 +401,9 @@ static void spatial_decompose97i(DWTELEM *buffer, int > width, int height, > DWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride; > > if (y + 3 < (unsigned)height) > - horizontal_decompose97i(b4, width); > + horizontal_decompose97i(b4, temp, width); > if (y + 4 < (unsigned)height) > - horizontal_decompose97i(b5, width); > + horizontal_decompose97i(b5, temp, width); > > if (y + 3 < (unsigned)height) > vertical_decompose97iH0(b3, b4, b5, width); > @@ -423,20 +421,20 @@ static void spatial_decompose97i(DWTELEM *buffer, int > width, int height, > } > } > > -void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, > - int type, int decomposition_count) > +void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height, > + int stride, int type, int decomposition_count) > { > int level; > > for (level = 0; level < decomposition_count; level++) { > switch (type) { > case DWT_97: > - spatial_decompose97i(buffer, > + spatial_decompose97i(buffer, temp, > width >> level, height >> level, > stride << level); > break; > case DWT_53: > - spatial_decompose53i(buffer, > + spatial_decompose53i(buffer, temp, > width >> level, height >> level, > stride << level); > break; > @@ -444,9 +442,8 @@ void ff_spatial_dwt(DWTELEM *buffer, int width, int > height, int stride, > } > } > > -static void horizontal_compose53i(IDWTELEM *b, int width) > +static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width) > { > - IDWTELEM temp[width]; > const int width2 = width >> 1; > const int w2 = (width + 1) >> 1; > int x; > @@ -506,6 +503,7 @@ static void spatial_compose53i_init(DWTCompose *cs, > IDWTELEM *buffer, > } > > static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb, > + IDWTELEM *temp, > int width, int height, > int stride_line) > { > @@ -535,17 +533,18 @@ static void spatial_compose53i_dy_buffered(DWTCompose > *cs, slice_buffer *sb, > } > > if (y - 1 < (unsigned)height) > - horizontal_compose53i(b0, width); > + horizontal_compose53i(b0, temp, width); > if (y + 0 < (unsigned)height) > - horizontal_compose53i(b1, width); > + horizontal_compose53i(b1, temp, width); > > cs->b0 = b2; > cs->b1 = b3; > cs->y += 2; > } > > -static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int > width, > - int height, int stride) > +static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, > + IDWTELEM *temp, int width, int height, > + int stride) > { > int y = cs->y; > IDWTELEM *b0 = cs->b0; > @@ -559,27 +558,26 @@ static void spatial_compose53i_dy(DWTCompose *cs, > IDWTELEM *buffer, int width, > vertical_compose53iH0(b0, b1, b2, width); > > if (y - 1 < (unsigned)height) > - horizontal_compose53i(b0, width); > + horizontal_compose53i(b0, temp, width); > if (y + 0 < (unsigned)height) > - horizontal_compose53i(b1, width); > + horizontal_compose53i(b1, temp, width); > > cs->b0 = b2; > cs->b1 = b3; > cs->y += 2; > } > > -static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, > - int height, int stride) > +static void av_unused spatial_compose53i(IDWTELEM *buffer, IDWTELEM *temp, > + int width, int height, int stride) > { > DWTCompose cs; > spatial_compose53i_init(&cs, buffer, height, stride); > while (cs.y <= height) > - spatial_compose53i_dy(&cs, buffer, width, height, stride); > + spatial_compose53i_dy(&cs, buffer, temp, width, height, stride); > } > > -void ff_snow_horizontal_compose97i(IDWTELEM *b, int width) > +void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width) > { > - IDWTELEM temp[width]; > const int w2 = (width + 1) >> 1; > > #if 0 //maybe more understadable but slower > @@ -693,8 +691,9 @@ static void spatial_compose97i_init(DWTCompose *cs, > IDWTELEM *buffer, int height > } > > static void spatial_compose97i_dy_buffered(DWTContext *dsp, DWTCompose *cs, > - slice_buffer *sb, int width, > - int height, int stride_line) > + slice_buffer * sb, IDWTELEM *temp, > + int width, int height, > + int stride_line) > { > int y = cs->y; > > @@ -723,9 +722,9 @@ static void spatial_compose97i_dy_buffered(DWTContext > *dsp, DWTCompose *cs, > } > > if (y - 1 < (unsigned)height) > - dsp->horizontal_compose97i(b0, width); > + dsp->horizontal_compose97i(b0, temp, width); > if (y + 0 < (unsigned)height) > - dsp->horizontal_compose97i(b1, width); > + dsp->horizontal_compose97i(b1, temp, width); > > cs->b0 = b2; > cs->b1 = b3; > @@ -734,8 +733,9 @@ static void spatial_compose97i_dy_buffered(DWTContext > *dsp, DWTCompose *cs, > cs->y += 2; > } > > -static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int > width, > - int height, int stride) > +static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, > + IDWTELEM *temp, int width, int height, > + int stride) > { > int y = cs->y; > IDWTELEM *b0 = cs->b0; > @@ -755,9 +755,9 @@ static void spatial_compose97i_dy(DWTCompose *cs, > IDWTELEM *buffer, int width, > vertical_compose97iH0(b0, b1, b2, width); > > if (y - 1 < (unsigned)height) > - ff_snow_horizontal_compose97i(b0, width); > + ff_snow_horizontal_compose97i(b0, temp, width); > if (y + 0 < (unsigned)height) > - ff_snow_horizontal_compose97i(b1, width); > + ff_snow_horizontal_compose97i(b1, temp, width); > > cs->b0 = b2; > cs->b1 = b3; > @@ -766,13 +766,13 @@ static void spatial_compose97i_dy(DWTCompose *cs, > IDWTELEM *buffer, int width, > cs->y += 2; > } > > -static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, > - int height, int stride) > +static void av_unused spatial_compose97i(IDWTELEM *buffer, IDWTELEM *temp, > + int width, int height, int stride) > { > DWTCompose cs; > spatial_compose97i_init(&cs, buffer, height, stride); > while (cs.y <= height) > - spatial_compose97i_dy(&cs, buffer, width, height, stride); > + spatial_compose97i_dy(&cs, buffer, temp, width, height, stride); > } > > void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int > width, > @@ -795,9 +795,9 @@ void ff_spatial_idwt_buffered_init(DWTCompose *cs, > slice_buffer *sb, int width, > } > > void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs, > - slice_buffer *slice_buf, int width, > - int height, int stride_line, int type, > - int decomposition_count, int y) > + slice_buffer *slice_buf, IDWTELEM *temp, > + int width, int height, int stride_line, > + int type, int decomposition_count, int y) > { > const int support = type == 1 ? 3 : 5; > int level; > @@ -808,13 +808,13 @@ void ff_spatial_idwt_buffered_slice(DWTContext *dsp, > DWTCompose *cs, > while (cs[level].y <= FFMIN((y >> level) + support, height >> > level)) { > switch (type) { > case DWT_97: > - spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, > + spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, > temp, > width >> level, > height >> level, > stride_line << level); > break; > case DWT_53: > - spatial_compose53i_dy_buffered(cs + level, slice_buf, > + spatial_compose53i_dy_buffered(cs + level, slice_buf, temp, > width >> level, > height >> level, > stride_line << level); > @@ -842,8 +842,9 @@ static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM > *buffer, int width, > } > } > > -static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int > width, > - int height, int stride, int type, > +static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, > + IDWTELEM *temp, int width, int height, > + int stride, int type, > int decomposition_count, int y) > { > const int support = type == 1 ? 3 : 5; > @@ -855,26 +856,26 @@ static void ff_spatial_idwt_slice(DWTCompose *cs, > IDWTELEM *buffer, int width, > while (cs[level].y <= FFMIN((y >> level) + support, height >> > level)) { > switch (type) { > case DWT_97: > - spatial_compose97i_dy(cs + level, buffer, width >> level, > + spatial_compose97i_dy(cs + level, buffer, temp, width >> > level, > height >> level, stride << level); > break; > case DWT_53: > - spatial_compose53i_dy(cs + level, buffer, width >> level, > + spatial_compose53i_dy(cs + level, buffer, temp, width >> > level, > height >> level, stride << level); > break; > } > } > } > > -void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, > - int type, int decomposition_count) > +void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height, > + int stride, int type, int decomposition_count) > { > DWTCompose cs[MAX_DECOMPOSITIONS]; > int y; > ff_spatial_idwt_init(cs, buffer, width, height, stride, type, > decomposition_count); > for (y = 0; y < height; y += 4) > - ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, > + ff_spatial_idwt_slice(cs, buffer, temp, width, height, stride, type, > decomposition_count, y); > } > > @@ -883,7 +884,7 @@ static inline int w_c(void *v, uint8_t *pix1, uint8_t > *pix2, int line_size, > { > int s, i, j; > const int dec_count = w == 8 ? 3 : 4; > - int tmp[32 * 32]; > + int tmp[32 * 32], tmp2[32]; > int level, ori; > static const int scale[2][2][4][4] = { > { > @@ -925,7 +926,7 @@ static inline int w_c(void *v, uint8_t *pix1, uint8_t > *pix2, int line_size, > pix2 += line_size; > } > > - ff_spatial_dwt(tmp, w, h, 32, type, dec_count); > + ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count); > > s = 0; > assert(w == h); > diff --git a/libavcodec/dwt.h b/libavcodec/dwt.h > index 9229928..771a9bf 100644 > --- a/libavcodec/dwt.h > +++ b/libavcodec/dwt.h > @@ -50,7 +50,7 @@ typedef struct DWTContext { > void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, > IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, > int width); > - void (*horizontal_compose97i)(IDWTELEM *b, int width); > + void (*horizontal_compose97i)(IDWTELEM *b, IDWTELEM *temp, int width); > void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, > uint8_t **block, int b_w, int b_h, int src_x, > int src_y, int src_stride, slice_buffer *sb, > @@ -148,7 +148,7 @@ IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, > int line); > void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, > IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, > int width); > -void ff_snow_horizontal_compose97i(IDWTELEM *b, int width); > +void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width); > void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, > uint8_t **block, int b_w, int b_h, int src_x, > int src_y, int src_stride, slice_buffer *sb, > @@ -157,18 +157,18 @@ void ff_snow_inner_add_yblock(const uint8_t *obmc, > const int obmc_stride, > int ff_w53_32_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); > int ff_w97_32_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); > > -void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, > - int decomposition_count); > +void ff_spatial_dwt(int *buffer, int *temp, int width, int height, int > stride, > + int type, int decomposition_count); > > void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int > width, > int height, int stride_line, int type, > int decomposition_count); > void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs, > - slice_buffer *slice_buf, int width, > - int height, int stride_line, int type, > - int decomposition_count, int y); > -void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, > - int type, int decomposition_count); > + slice_buffer *slice_buf, IDWTELEM *temp, > + int width, int height, int stride_line, > + int type, int decomposition_count, int > y); > +void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height, > + int stride, int type, int decomposition_count); > > void ff_dwt_init(DWTContext *c); > void ff_dwt_init_x86(DWTContext *c); > diff --git a/libavcodec/snow.c b/libavcodec/snow.c > index 384cda8..edd7d07 100644 > --- a/libavcodec/snow.c > +++ b/libavcodec/snow.c > @@ -440,6 +440,8 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){ > > s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM)); > s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME > this does not belong here > + s->temp_dwt_buffer = av_mallocz(width * sizeof(DWTELEM)); > + s->temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));
Uhm, are any of these checked? The patch looks OK though. _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel