Hi, 2014-02-10 9:03 GMT+01:00 Kostya Shishkov <[email protected]>: >> What you suggest is still valid, and would catch things even if they >> changed, though. > > Maybe it's not worth warning about it at all.
Well, the format allows 8N coefficients with N>=1, so a file could theoretically contain that. Whether it is crafted to crash the decoder is another issue. And I was following Luca's recommendation, which makes sense to me. > I'd simply zero all coeffs before decoding but that's me. I understand that is, as you mentioned, till the end of the buffer. But that means up to (256-8) excess coefficients. Not a big deal with an infrequently called function, but still. I took an intermediately safe solution, where I allocate extra space in the coeff buffer, and always zeros the most the DSP implementation could overread, whatever the order. -- Christophe
From e9b05761b196eb81d7a3536ccabaaeb523adec37 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet <[email protected]> Date: Sat, 24 Nov 2012 15:55:49 +0100 Subject: [PATCH 2/2] wma lossless: reuse scalarproduct_and_madd_int16 This is done by padding the coefficient buffers with 0s, as this guarantees that the DSP function will neither overread nor generate invalid results, as it processes batches of 16 elements. However, no sample with orders not multiple of 16 is known, so request one if it is found using that kind of order. Approximate relative speedup depending on instruction set: plain C: -6% mmxext: 51% sse2: 54% --- libavcodec/wmalosslessdec.c | 61 +++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c index 2f341c0..0280edf 100644 --- a/libavcodec/wmalosslessdec.c +++ b/libavcodec/wmalosslessdec.c @@ -29,6 +29,7 @@ #include "internal.h" #include "get_bits.h" #include "put_bits.h" +#include "dsputil.h" #include "wma.h" #include "wma_common.h" @@ -44,6 +45,7 @@ #define WMALL_BLOCK_MAX_SIZE (1 << WMALL_BLOCK_MAX_BITS) ///< maximum block size #define WMALL_BLOCK_SIZES (WMALL_BLOCK_MAX_BITS - WMALL_BLOCK_MIN_BITS + 1) ///< possible block sizes +#define WMALL_COEFF_PAD_SIZE 16 ///< pad coef buffers with 0 for use with SIMD functions /** * @brief frame-specific decoder context for a single channel @@ -66,6 +68,7 @@ typedef struct { typedef struct WmallDecodeCtx { /* generic decoder variables */ AVCodecContext *avctx; + DSPContext dsp; ///< accelerated DSP functions AVFrame *frame; uint8_t frame_data[MAX_FRAMESIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< compressed frame data PutBitContext pb; ///< context for filling the frame_data buffer @@ -141,9 +144,9 @@ typedef struct WmallDecodeCtx { int scaling; int coefsend; int bitsend; - int16_t coefs[MAX_ORDER]; - int16_t lms_prevvalues[MAX_ORDER * 2]; - int16_t lms_updates[MAX_ORDER * 2]; + DECLARE_ALIGNED(16, int16_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE]; + DECLARE_ALIGNED(16, int16_t, lms_prevvalues)[MAX_ORDER * 2]; + DECLARE_ALIGNED(16, int16_t, lms_updates)[MAX_ORDER * 2]; int recent; } cdlms[2][9]; @@ -179,6 +182,7 @@ static av_cold int decode_init(AVCodecContext *avctx) int i, log2_max_num_subframes; s->avctx = avctx; + ff_dsputil_init(&s->dsp, avctx); init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE); if (avctx->extradata_size >= 18) { @@ -452,6 +456,13 @@ static int decode_cdlms(WmallDecodeCtx *s) s->cdlms[0][0].order = 0; return AVERROR_INVALIDDATA; } + if (s->cdlms[c][i].order & (WMALL_COEFF_PAD_SIZE-1)) { + static int warned; + if (!warned) + avpriv_request_sample(s->avctx, "CDLMS of order %d", + s->cdlms[c][i].order); + warned = 1; + } } for (i = 0; i < s->cdlms_ttl[c]; i++) @@ -477,6 +488,11 @@ static int decode_cdlms(WmallDecodeCtx *s) (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r; } } + + // pad the coeff buffers with 0s up to a 16-aligned position + for (i = 0; i < s->cdlms_ttl[c]; i++) + memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order, 0, + WMALL_COEFF_PAD_SIZE * sizeof(s->cdlms[c][i].coefs[0])); } return 0; @@ -686,35 +702,11 @@ static void revert_mclms(WmallDecodeCtx *s, int tile_size) } } -static int lms_predict(WmallDecodeCtx *s, int ich, int ilms) -{ - int pred = 0, icoef; - int recent = s->cdlms[ich][ilms].recent; - - for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - pred += s->cdlms[ich][ilms].coefs[icoef] * - s->cdlms[ich][ilms].lms_prevvalues[icoef + recent]; - - return pred; -} - -static void lms_update(WmallDecodeCtx *s, int ich, int ilms, - int input, int residue) +static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input) { - int icoef; int recent = s->cdlms[ich][ilms].recent; int range = 1 << s->bits_per_sample - 1; - if (residue < 0) { - for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - s->cdlms[ich][ilms].coefs[icoef] -= - s->cdlms[ich][ilms].lms_updates[icoef + recent]; - } else if (residue > 0) { - for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - s->cdlms[ich][ilms].coefs[icoef] += - s->cdlms[ich][ilms].lms_updates[icoef + recent]; - } - if (recent) recent--; else { @@ -775,6 +767,9 @@ static void use_normal_update_speed(WmallDecodeCtx *s, int ich) s->update_speed[ich] = 8; } +/** Get sign of integer (1 for positive, -1 for negative and 0 for zero) */ +#define WMASIGN(x) ((x > 0) - (x < 0)) + static void revert_cdlms(WmallDecodeCtx *s, int ch, int coef_begin, int coef_end) { @@ -785,9 +780,15 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch, for (icoef = coef_begin; icoef < coef_end; icoef++) { pred = 1 << (s->cdlms[ch][ilms].scaling - 1); residue = s->channel_residues[ch][icoef]; - pred += lms_predict(s, ch, ilms); + pred += s->dsp.scalarproduct_and_madd_int16(s->cdlms[ch][ilms].coefs, + s->cdlms[ch][ilms].lms_prevvalues + + s->cdlms[ch][ilms].recent, + s->cdlms[ch][ilms].lms_updates + + s->cdlms[ch][ilms].recent, + s->cdlms[ch][ilms].order, + WMASIGN(residue)); input = residue + (pred >> s->cdlms[ch][ilms].scaling); - lms_update(s, ch, ilms, input, residue); + lms_update(s, ch, ilms, input); s->channel_residues[ch][icoef] = input; } } -- 1.8.0.msysgit.0
_______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
