On 1/8/17, Steinar H. Gunderson <steinar+ffm...@gunderson.no> wrote: > + * for the longest (10-bit) codes. > + */ > +#define ALPHA_VLC_BITS 5 > + > +typedef struct SHQContext { > + AVCodecContext *avctx; > + BlockDSPContext bdsp; > + IDCTDSPContext idsp; > + ScanTable intra_scantable; > + int quant_matrix[64]; > + enum { SHQ_SUBSAMPLING_420, SHQ_SUBSAMPLING_422, SHQ_SUBSAMPLING_444 } > + subsampling; > + enum { SHQ_NO_ALPHA, SHQ_RLE_ALPHA, SHQ_DCT_ALPHA } alpha_type; > +} SHQContext; > + > + > +/* AC codes: Very similar but not identical to MPEG-2. */ > +static uint16_t speedhq_vlc[123][2] = {
Can this be uint8_t too? > + {0x02, 2}, {0x06, 3}, {0x07, 4}, {0x1c, 5}, > + {0x1d, 5}, {0x05, 6}, {0x04, 6}, {0x7b, 7}, > + {0x7c, 7}, {0x23, 8}, {0x22, 8}, {0xfa, 8}, > + {0xfb, 8}, {0xfe, 8}, {0xff, 8}, {0x1f,14}, > + {0x1e,14}, {0x1d,14}, {0x1c,14}, {0x1b,14}, > + {0x1a,14}, {0x19,14}, {0x18,14}, {0x17,14}, > + {0x16,14}, {0x15,14}, {0x14,14}, {0x13,14}, > + {0x12,14}, {0x11,14}, {0x10,14}, {0x18,15}, > + {0x17,15}, {0x16,15}, {0x15,15}, {0x14,15}, > + {0x13,15}, {0x12,15}, {0x11,15}, {0x10,15}, [...] > + speedhq_run, > + speedhq_level, > +}; > + > +/* NOTE: The first element is always 16, unscaled. */ > +static const uint16_t unscaled_quant_matrix[64] = { This can be uint8_t > + 16, 16, 19, 22, 26, 27, 29, 34, > + 16, 16, 22, 24, 27, 29, 34, 37, > + 19, 22, 26, 27, 29, 34, 34, 38, > + 22, 22, 26, 27, 29, 34, 37, 40, > + 22, 26, 27, 29, 32, 35, 40, 48, > + 26, 27, 29, 32, 35, 40, 48, 58, > + 26, 27, 29, 34, 38, 46, 56, 69, > + 27, 29, 35, 38, 46, 56, 69, 83 > +}; > + > +static uint8_t ff_speedhq_static_rl_table_store[2][2*MAX_RUN + MAX_LEVEL + > 3]; > + > +static VLC ff_dc_lum_vlc_le; > +static VLC ff_dc_chroma_vlc_le; > +static VLC ff_dc_alpha_run_vlc_le; > +static VLC ff_dc_alpha_level_vlc_le; > + > +static inline int decode_dc_le(GetBitContext *gb, int component) > +{ > + int code, diff; > + > + if (component == 0 || component == 3) { > + code = get_vlc2(gb, ff_dc_lum_vlc_le.table, DC_VLC_BITS, 2); > + } else { > + code = get_vlc2(gb, ff_dc_chroma_vlc_le.table, DC_VLC_BITS, 2); > + } > + if (code < 0) { > + av_log(NULL, AV_LOG_ERROR, "invalid dc code at\n"); > + return 0xffff; Why this specific return value? I suppose decoding other blocks still continue? > + } > + if (!code) { > + diff = 0; > + } else { > + diff = get_xbits_le(gb, code); > + } > + return diff; > +} > + > +static inline int decode_alpha_block(const SHQContext *s, GetBitContext > *gb, uint8_t last_alpha[16], uint8_t *dest, int linesize) > +{ > + uint8_t block[128]; > + int i = 0, x, y; > + > + memset(block, 0, sizeof(block)); > + > + { > + OPEN_READER(re, gb); > + > + for ( ;; ) { > + int run, level; > + > + UPDATE_CACHE_LE(re, gb); > + GET_VLC(run, re, gb, ff_dc_alpha_run_vlc_le.table, > ALPHA_VLC_BITS, 2); > + > + if (run == 128) break; > + i += run; > + if (i >= 128) > + return AVERROR_INVALIDDATA; > + > + UPDATE_CACHE_LE(re, gb); > + GET_VLC(level, re, gb, ff_dc_alpha_level_vlc_le.table, > ALPHA_VLC_BITS, 2); > + block[i++] = level; > + } > + > + CLOSE_READER(re, gb); > + } > + > + for (y = 0; y < 8; y++) { > + for (x = 0; x < 16; x++) { > + last_alpha[x] -= block[y * 16 + x]; > + } > + memcpy(dest, last_alpha, 16); > + dest += linesize; > + } > + > + return 0; > +} > + > +static inline int decode_dct_block(const SHQContext *s, GetBitContext *gb, > int last_dc[4], int component, uint8_t *dest, int linesize) > +{ > + const int *quant_matrix = s->quant_matrix; > + const uint8_t *scantable = s->intra_scantable.permutated; > + int16_t block[64]; > + int dc_offset; > + > + s->bdsp.clear_block(block); > + > + dc_offset = decode_dc_le(gb, component); > + last_dc[component] -= dc_offset; /* Note: Opposite of most codecs. */ > + block[scantable[0]] = last_dc[component]; /* quant_matrix[0] is always > 16. */ > + > + /* Read AC coefficients. */ > + { > + int i = 0; > + OPEN_READER(re, gb); > + for ( ;; ) { > + int level, run; > + UPDATE_CACHE_LE(re, gb); > + GET_RL_VLC(level, run, re, gb, ff_rl_speedhq.rl_vlc[0], > + TEX_VLC_BITS, 2, 0); > + if (level == 127) { > + break; > + } else if (level) { > + i += run; > + if (i > MAX_INDEX) > + return AVERROR_INVALIDDATA; > + /* If next bit is 1, level = -level */ > + level = (level ^ SHOW_SBITS(re, gb, 1)) - > + SHOW_SBITS(re, gb, 1); > + LAST_SKIP_BITS(re, gb, 1); > + } else { > + /* Escape. */ > +#if MIN_CACHE_BITS < 6 + 6 + 12 > +#error MIN_CACHE_BITS is too small for the escape code, add UPDATE_CACHE > +#endif > + run = SHOW_UBITS(re, gb, 6) + 1; > + SKIP_BITS(re, gb, 6); > + level = SHOW_UBITS(re, gb, 12) - 2048; > + LAST_SKIP_BITS(re, gb, 12); > + > + i += run; > + if (i > MAX_INDEX) > + return AVERROR_INVALIDDATA; > + } > + > + block[scantable[i]] = (level * quant_matrix[i]) >> 4; > + } > + CLOSE_READER(re, gb); > + } > + > + s->idsp.idct_put(dest, linesize, block); > + > + return 0; > +} > + > +static int decode_speedhq_field(const SHQContext *s, const uint8_t *buf, > int buf_size, AVFrame *frame, int field_number, int start, int end, int > line_stride) > +{ > + int ret, slice_number, slice_offsets[5]; > + int linesize_y = frame->linesize[0] * line_stride; > + int linesize_cb = frame->linesize[1] * line_stride; > + int linesize_cr = frame->linesize[2] * line_stride; > + int linesize_a; > + > + if (s->alpha_type != SHQ_NO_ALPHA) > + linesize_a = frame->linesize[3] * line_stride; > + > + if (end < start || end - start < 3 || end > buf_size) > + return AVERROR_INVALIDDATA; > + > + slice_offsets[0] = start; > + slice_offsets[4] = end; > + for (slice_number = 1; slice_number < 4; slice_number++) { > + uint32_t last_offset, slice_len; > + > + last_offset = slice_offsets[slice_number - 1]; > + slice_len = AV_RL24(buf + last_offset); > + slice_offsets[slice_number] = last_offset + slice_len; > + > + if (slice_len < 3 || slice_offsets[slice_number] > end - 3) > + return AVERROR_INVALIDDATA; > + } > + > + for (slice_number = 0; slice_number < 4; slice_number++) { > + GetBitContext gb; > + uint32_t slice_begin, slice_end; > + int x, y; > + > + slice_begin = slice_offsets[slice_number]; > + slice_end = slice_offsets[slice_number + 1]; > + > + if ((ret = init_get_bits8(&gb, buf + slice_begin + 3, slice_end - > slice_begin - 3)) < 0) > + return ret; > + > + for (y = slice_number * 16 * line_stride; y < frame->height; y += > line_stride * 64) { > + uint8_t *dest_y, *dest_cb, *dest_cr, *dest_a; > + int last_dc[4] = { 1024, 1024, 1024, 1024 }; > + uint8_t last_alpha[16]; > + > + memset(last_alpha, 255, sizeof(last_alpha)); > + > + dest_y = frame->data[0] + frame->linesize[0] * (y + > field_number); > + if (s->subsampling == SHQ_SUBSAMPLING_420) { > + dest_cb = frame->data[1] + frame->linesize[1] * (y/2 + > field_number); > + dest_cr = frame->data[2] + frame->linesize[2] * (y/2 + > field_number); > + } else { > + dest_cb = frame->data[1] + frame->linesize[1] * (y + > field_number); > + dest_cr = frame->data[2] + frame->linesize[2] * (y + > field_number); > + } > + if (s->alpha_type != SHQ_NO_ALPHA) { > + dest_a = frame->data[3] + frame->linesize[3] * (y + > field_number); > + } > + > + for (x = 0; x < frame->width; x += 16) { > + /* Decode the four luma blocks. */ > + if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y, > linesize_y)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8, > linesize_y)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 > * linesize_y, linesize_y)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 0, dest_y + 8 > * linesize_y + 8, linesize_y)) < 0) > + return ret; > + > + /* > + * Decode the first chroma block. For 4:2:0, this is the > only one; > + * for 4:2:2, it's the top block; for 4:4:4, it's the > top-left block. > + */ > + if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb, > linesize_cb)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr, > linesize_cr)) < 0) > + return ret; > + > + if (s->subsampling != SHQ_SUBSAMPLING_420) { > + /* For 4:2:2, this is the bottom block; for 4:4:4, it's > the bottom-left block. */ > + if ((ret = decode_dct_block(s, &gb, last_dc, 1, dest_cb > + 8 * linesize_cb, linesize_cb)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 2, dest_cr > + 8 * linesize_cr, linesize_cr)) < 0) > + return ret; > + > + if (s->subsampling == SHQ_SUBSAMPLING_444) { > + /* Top-right and bottom-right blocks. */ > + if ((ret = decode_dct_block(s, &gb, last_dc, 1, > dest_cb + 8, linesize_cb)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 2, > dest_cr + 8, linesize_cr)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 1, > dest_cb + 8 * linesize_cb + 8, linesize_cb)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 2, > dest_cr + 8 * linesize_cr + 8, linesize_cr)) < 0) > + return ret; > + > + dest_cb += 8; > + dest_cr += 8; > + } > + } > + dest_y += 16; > + dest_cb += 8; > + dest_cr += 8; > + > + if (s->alpha_type == SHQ_RLE_ALPHA) { > + /* Alpha coded using 16x8 RLE blocks. */ > + if ((ret = decode_alpha_block(s, &gb, last_alpha, > dest_a, linesize_a)) < 0) > + return ret; > + if ((ret = decode_alpha_block(s, &gb, last_alpha, > dest_a + 8 * linesize_a, linesize_a)) < 0) > + return ret; > + dest_a += 16; > + } else if (s->alpha_type == SHQ_DCT_ALPHA) { > + /* Alpha encoded exactly like luma. */ > + if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a, > linesize_a)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a > + 8, linesize_a)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a > + 8 * linesize_a, linesize_a)) < 0) > + return ret; > + if ((ret = decode_dct_block(s, &gb, last_dc, 3, dest_a > + 8 * linesize_a + 8, linesize_a)) < 0) > + return ret; > + dest_a += 16; > + } > + } > + } > + } > + > + return 0; > +} > + > +static void compute_quant_matrix(int *output, int qscale) > +{ > + int i; > + for (i = 0; i < 64; i++) output[i] = unscaled_quant_matrix[i] * qscale; > +} > + > +static int speedhq_decode_frame(AVCodecContext *avctx, > + void *data, int *got_frame, > + AVPacket *avpkt) > +{ > + SHQContext * const s = avctx->priv_data; > + const uint8_t *buf = avpkt->data; > + int buf_size = avpkt->size; > + AVFrame *frame = data; > + uint8_t quality; > + uint32_t second_field_offset; > + int ret; > + > + if (buf_size < 4) > + return AVERROR_INVALIDDATA; > + > + quality = buf[0]; > + if (quality >= 100) { > + return AVERROR_INVALIDDATA; > + } > + > + compute_quant_matrix(s->quant_matrix, 100 - quality); > + > + second_field_offset = AV_RL24(buf + 1); > + if (second_field_offset >= buf_size - 3) { > + return AVERROR_INVALIDDATA; > + } > + > + avctx->coded_width = FFALIGN(avctx->width, 16); > + avctx->coded_height = FFALIGN(avctx->height, 16); > + > + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) { > + return ret; > + } > + frame->key_frame = 1; > + > + if (second_field_offset == 4) { > + /* > + * Overlapping first and second fields is used to signal > + * encoding only a single field (the second field then comes > + * as a separate, later frame). > + */ > + frame->height >>= 1; > + if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, > buf_size, 1)) < 0) > + return ret; > + } else { > + if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 0, 4, > second_field_offset, 2)) < 0) > + return ret; > + if ((ret = decode_speedhq_field(s, buf, buf_size, frame, 1, > second_field_offset, buf_size, 2)) < 0) > + return ret; > + } > + > + *got_frame = 1; > + return buf_size; > +} > + > +/* > + * Alpha VLC. Run and level are independently coded, and would be > + * outside the default limits for MAX_RUN/MAX_LEVEL, so we don't > + * bother with combining them into one table. > + */ > +static av_cold void compute_alpha_vlcs(void) > +{ > + uint16_t run_code[129], level_code[256]; > + uint8_t run_bits[129], level_bits[256]; > + int run, level; > + > + for (run = 0; run < 128; run++) { > + if (!run) { > + /* 0 -> 0. */ > + run_code[run] = 0; > + run_bits[run] = 1; > + } else if (run <= 4) { > + /* 10xx -> xx plus 1. */ > + run_code[run] = ((run - 1) << 2) | 1; > + run_bits[run] = 4; > + } else { > + /* 111xxxxxxx -> xxxxxxxx. */ > + run_code[run] = (run << 3) | 7; > + run_bits[run] = 10; > + } > + } > + > + /* 110 -> EOB. */ > + run_code[128] = 3; > + run_bits[128] = 3; > + > + INIT_LE_VLC_STATIC(&ff_dc_alpha_run_vlc_le, ALPHA_VLC_BITS, 129, > + run_bits, 1, 1, > + run_code, 2, 2, 160); > + > + for (level = 0; level < 256; level++) { > + int8_t signed_level = (int8_t)level; > + int abs_signed_level = abs(signed_level); > + int sign = (signed_level < 0) ? 1 : 0; > + > + if (abs_signed_level == 1) { > + /* 1s -> -1 or +1 (depending on sign bit). */ > + level_code[level] = (sign << 1) | 1; > + level_bits[level] = 2; > + } else if (abs_signed_level >= 2 && abs_signed_level <= 5) { > + /* 01sxx -> xx plus 2 (2..5 or -2..-5, depending on sign bit). > */ > + level_code[level] = ((abs_signed_level - 2) << 3) | (sign << 2) > | 2; > + level_bits[level] = 5; > + } else { > + /* > + * 00xxxxxxxx -> xxxxxxxx, in two's complement. 0 is > technically an > + * illegal code (that would be encoded by increasing run), but > it > + * doesn't hurt and simplifies indexing. > + */ > + level_code[level] = level << 2; > + level_bits[level] = 10; > + } > + } > + > + INIT_LE_VLC_STATIC(&ff_dc_alpha_level_vlc_le, ALPHA_VLC_BITS, 256, > + level_bits, 1, 1, > + level_code, 2, 2, 288); > +} > + > +static uint32_t reverse(uint32_t num, int bits) > +{ > + return bitswap_32(num) >> (32 - bits); > +} > + > +static void reverse_code(const uint16_t *code, const uint8_t *bits, > + uint16_t *reversed_code, int num_entries) > +{ > + int i; > + for (i = 0; i < num_entries; i++) { > + reversed_code[i] = reverse(code[i], bits[i]); > + } > +} > + > +static av_cold int speedhq_decode_init(AVCodecContext *avctx) > +{ > + static int done = 0; > + uint16_t ff_mpeg12_vlc_dc_lum_code_reversed[12]; > + uint16_t ff_mpeg12_vlc_dc_chroma_code_reversed[12]; > + SHQContext * const s = avctx->priv_data; > + > + s->avctx = avctx; > + > + if (!done) { > + int i; > + > + /* Exactly the same as MPEG-2, except little-endian. */ > + reverse_code(ff_mpeg12_vlc_dc_lum_code, > + ff_mpeg12_vlc_dc_lum_bits, > + ff_mpeg12_vlc_dc_lum_code_reversed, > + 12); > + INIT_LE_VLC_STATIC(&ff_dc_lum_vlc_le, DC_VLC_BITS, 12, > + ff_mpeg12_vlc_dc_lum_bits, 1, 1, > + ff_mpeg12_vlc_dc_lum_code_reversed, 2, 2, 512); > + reverse_code(ff_mpeg12_vlc_dc_chroma_code, > + ff_mpeg12_vlc_dc_chroma_bits, > + ff_mpeg12_vlc_dc_chroma_code_reversed, > + 12); What about "storing" reverse codes in source code, so this step is not required? _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel