After this patch, compressed data can be as much as close to destsize but not exceed.
Signed-off-by: Gao Xiang <[email protected]> --- lzma/lzma_encoder.c | 133 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 1 deletion(-) diff --git a/lzma/lzma_encoder.c b/lzma/lzma_encoder.c index b213504..98cde22 100644 --- a/lzma/lzma_encoder.c +++ b/lzma/lzma_encoder.c @@ -10,7 +10,7 @@ */ #include <stdlib.h> #include <ez/bitops.h> -#include "rc_encoder.h" +#include "rc_encoder_ckpt.h" #include "lzma_common.h" #include "mf.h" @@ -72,12 +72,23 @@ struct lzma_length_encoder { probability high[kLenNumHighSymbols]; }; +struct lzma_encoder_destsize { + struct lzma_rc_ckpt cp; + + uint8_t *op; + uint32_t capacity; + + uint32_t esz; + uint8_t ending[LZMA_REQUIRED_INPUT_MAX + 5]; +}; + struct lzma_encoder { struct lzma_mf mf; struct lzma_rc_encoder rc; uint8_t *op, *oend; bool finish; + bool need_eopm; unsigned int state; @@ -109,6 +120,8 @@ struct lzma_encoder { struct lzma_match matches[kMatchMaxLen]; unsigned int matches_count; } fast; + + struct lzma_encoder_destsize *dstsize; }; #define change_pair(smalldist, bigdist) (((bigdist) >> 7) > (smalldist)) @@ -449,6 +462,46 @@ static void rep_match(struct lzma_encoder *lzma, const uint32_t pos_state, } } +struct lzma_endstate { + struct lzma_length_encoder lenEnc; + + probability simpleMatch[2]; + probability posSlot[kNumPosSlotBits]; + probability posAlign[kNumAlignBits]; +}; + +static void encode_eopm_stateless(struct lzma_encoder *lzma, + struct lzma_endstate *endstate) +{ + const uint32_t pos_state = + (lzma->mf.cur - lzma->mf.lookahead) & lzma->pbMask; + const unsigned int state = lzma->state; + unsigned int i; + + endstate->simpleMatch[0] = lzma->isMatch[state][pos_state]; + endstate->simpleMatch[1] = lzma->isRep[state]; + endstate->lenEnc = lzma->lenEnc; + + rc_bit(&lzma->rc, endstate->simpleMatch, 1); + rc_bit(&lzma->rc, endstate->simpleMatch + 1, 0); + length(&lzma->rc, &endstate->lenEnc, pos_state, kMatchMinLen); + + for (i = 0; i < kNumPosSlotBits; ++i) { + endstate->posSlot[i] = + lzma->posSlotEncoder[0][(1 << (i + 1)) - 1]; + rc_bit(&lzma->rc, endstate->posSlot + i, 1); + } + + rc_direct(&lzma->rc, (1 << (30 - kNumAlignBits)) - 1, + 30 - kNumAlignBits); + + for (i = 0; i < kNumAlignBits; ++i) { + endstate->posAlign[i] = + lzma->posAlignEncoder[(1 << (i + 1)) - 1]; + rc_bit(&lzma->rc, endstate->posAlign + i, 1); + } +} + static void encode_eopm(struct lzma_encoder *lzma) { const uint32_t pos_state = @@ -460,8 +513,86 @@ static void encode_eopm(struct lzma_encoder *lzma) match(lzma, pos_state, UINT32_MAX, kMatchMinLen); } +static int __flush_symbol_destsize(struct lzma_encoder *lzma) +{ + uint8_t *op2; + unsigned int symbols_size; + unsigned int esz = 0; + + if (lzma->dstsize->capacity < 5) + return -ENOSPC; + + if (!lzma->rc.pos) { + rc_write_checkpoint(&lzma->rc, &lzma->dstsize->cp); + lzma->dstsize->op = lzma->op; + } + + if (rc_encode(&lzma->rc, &lzma->op, lzma->oend)) + return -ENOSPC; + + op2 = lzma->op; + symbols_size = op2 - lzma->dstsize->op; + if (lzma->dstsize->capacity < symbols_size + 5) + goto err_enospc; + + if (!lzma->need_eopm) + goto out; + + if (lzma->dstsize->capacity < symbols_size + + LZMA_REQUIRED_INPUT_MAX + 5) { + struct lzma_rc_ckpt cp2; + struct lzma_endstate endstate; + uint8_t ending[sizeof(lzma->dstsize->ending)]; + uint8_t *ep; + + rc_write_checkpoint(&lzma->rc, &cp2); + encode_eopm_stateless(lzma, &endstate); + rc_flush(&lzma->rc); + + ep = ending; + if (rc_encode(&lzma->rc, &ep, ending + sizeof(ending))) + DBG_BUGON(1); + + esz = ep - ending; + + if (lzma->dstsize->capacity < symbols_size + esz) + goto err_enospc; + rc_restore_checkpoint(&lzma->rc, &cp2); + + memcpy(lzma->dstsize->ending, ending, sizeof(ending)); + lzma->dstsize->esz = esz; + } + +out: + lzma->dstsize->capacity -= symbols_size; + lzma->dstsize->esz = esz; + return 0; + +err_enospc: + rc_restore_checkpoint(&lzma->rc, &lzma->dstsize->cp); + lzma->op = lzma->dstsize->op; + lzma->dstsize->capacity = 0; + return -ENOSPC; +} + static int flush_symbol(struct lzma_encoder *lzma) { + if (lzma->rc.count && lzma->dstsize) { + const unsigned int safemargin = + 5 + (LZMA_REQUIRED_INPUT_MAX << !!lzma->need_eopm); + uint8_t *op; + bool ret; + + if (lzma->dstsize->capacity < safemargin) + return __flush_symbol_destsize(lzma); + + op = lzma->op; + ret = rc_encode(&lzma->rc, &lzma->op, lzma->oend); + + lzma->dstsize->capacity -= lzma->op - op; + return ret ? -ENOSPC : 0; + } + return rc_encode(&lzma->rc, &lzma->op, lzma->oend) ? -ENOSPC : 0; } -- 2.20.1
