Hi, I have finished the LZW implementation. Two notes: - With cache < 5 we cannot assure that some characters are missed in a compression-decompression cycle (footer sequence sequence takes several chars). - It seems that with cache = 1 the decoder gets a point (the first bitsize change of the LZW codes) from which it stops producing sensible output. I would have fixed this but with the previous issue (which is an architecture problem, not implementation) it does not make sense. - Also, the decompression apply function might seem ugly with those 6 goto's (Dijkstra's nightmare :p) but it is actually much cleaner than when it was goto-less. Actually, that trick makes the algorithm very similar to the old-stateless implementation, and a such much easier to debug.
File attached. BTW, I am very busy for the rest of October. Is it too much to wait for the predictor's reimplementation till the first week of November? Regards, JP
# Bazaar merge directive format 2 (Bazaar 0.90) # revision_id: [EMAIL PROTECTED] # target_branch: http://bzr.savannah.gnu.org/r/pdf/libgnupdf/branches\ # /trunk/ # testament_sha1: 426b2081ac672f50e885635475c74f31a894c9ac # timestamp: 2008-10-12 14:35:06 +0200 # base_revision_id: [EMAIL PROTECTED] # # Begin patch === modified file 'src/Makefile.am' --- src/Makefile.am 2008-10-05 15:08:05 +0000 +++ src/Makefile.am 2008-10-12 12:34:46 +0000 @@ -44,7 +44,8 @@ base/pdf-stm-filter.h base/pdf-stm-filter.c \ base/pdf-stm-f-null.h base/pdf-stm-f-null.c \ base/pdf-stm-f-ahex.h base/pdf-stm-f-ahex.c \ - base/pdf-stm-f-rl.h base/pdf-stm-f-rl.c + base/pdf-stm-f-rl.h base/pdf-stm-f-rl.c \ + base/pdf-stm-f-lzw.h base/pdf-stm-f-lzw.c if ZLIB STM_MODULE_SOURCES += base/pdf-stm-f-flate.c base/pdf-stm-f-flate.h === modified file 'src/base/pdf-stm-f-lzw.c' --- src/base/pdf-stm-f-lzw.c 2008-08-28 22:46:53 +0000 +++ src/base/pdf-stm-f-lzw.c 2008-10-12 12:34:46 +0000 @@ -27,21 +27,20 @@ #include <pdf-alloc.h> #include <pdf-stm-f-lzw.h> -#define MIN_BITSIZE 9 -#define MAX_BITSIZE 12 -#define MAX_DICTSIZE (1 << MAX_BITSIZE) - -#define MAX_COMPRESSION_FACTOR 1.5 - -#define NULL_INDEX ~0U - -enum { - LZW_RESET_CODE = 256, - LZW_EOD_CODE, - LZW_FIRST_CODE -} lzw_special_codes; - - +#define LZW_DEFAULT_EARLY_CHANGE 0 + +#define LZW_CACHE_SIZE 16 +#define LZW_MIN_BITSIZE 9 +#define LZW_MAX_BITSIZE 12 +#define LZW_MAX_DICTSIZE (1 << LZW_MAX_BITSIZE) +#define LZW_NULL_INDEX ~0U + +enum lzw_special_codes_e + { + LZW_RESET_CODE = 256, + LZW_EOD_CODE, + LZW_FIRST_CODE + }; /* -- LZW code output/input -- */ @@ -49,54 +48,68 @@ * Object to read and write codes of variable bitsize in a buffer. * Warning: using both get and put functions may break the buffer. */ -typedef struct lzw_buffer_s +struct lzw_buffer_s { - pdf_char_t* curp; - pdf_char_t* endp; + pdf_stm_buffer_t buf; + pdf_char_t cache [LZW_CACHE_SIZE]; + pdf_size_t cache_size; unsigned long valbuf; unsigned valbits; unsigned bitsize; unsigned maxval; -} lzw_buffer_t; +}; +typedef struct lzw_buffer_s* lzw_buffer_t; static void -lzw_buffer_init (lzw_buffer_t* b, - pdf_char_t* ptr, - int size, +lzw_buffer_init (lzw_buffer_t b, int bitsize) { - b->curp = ptr; - b->endp = ptr + size; + b->buf = NULL; + b->cache_size = 0; b->valbuf = 0; b->valbits = 0; b->bitsize = bitsize; b->maxval = (1 << bitsize) - 1; } -static unsigned int -lzw_buffer_get_code (lzw_buffer_t* b) +static void +lzw_buffer_set (lzw_buffer_t b, + pdf_stm_buffer_t buf) +{ + b->buf = buf; +} + +static pdf_status_t +lzw_buffer_get_code (lzw_buffer_t b, + unsigned int* code) { unsigned long r; while (b->valbits <= 24) - { - if (b->curp > b->endp) - return NULL_INDEX; - - b->valbuf |= (unsigned long) *b->curp++ << (24 - b->valbits); - b->valbits += 8; - } + { + if (pdf_stm_buffer_eob_p (b->buf)) + { + return PDF_ENINPUT; + } + + b->valbuf |= + (unsigned long) b->buf->data [b->buf->rp++] << + (24 - b->valbits); + b->valbits += 8; + } r = b->valbuf >> (32 - b->bitsize); b->valbuf <<= b->bitsize; b->valbits -= b->bitsize; - return r; + *code = r; + + return PDF_OK; } /* Once finished, call with 0 as code value to flush the buffer. */ static void -lzw_buffer_put_code (lzw_buffer_t* b, +lzw_buffer_put_code (lzw_buffer_t b, unsigned int code) { b->valbuf |= (unsigned long) code << (32 - b->bitsize - b->valbits); @@ -104,17 +117,47 @@ while (b->valbits >= 8) { - *b->curp++ = b->valbuf >> 24; + if (pdf_stm_buffer_full_p (b->buf)) + { + b->cache [b->cache_size++] = b->valbuf >> 24; + } + else + { + b->buf->data [b->buf->wp++] = b->valbuf >> 24; + } b->valbuf <<= 8; b->valbits -= 8; } } +static pdf_status_t +lzw_buffer_flush (lzw_buffer_t b) +{ + int i; + + i = 0; + while (b->cache_size && + !pdf_stm_buffer_full_p (b->buf)) + { + b->buf->data [b->buf->wp++] = b->cache [i++]; + b->cache_size--; + } + + if (pdf_stm_buffer_full_p (b->buf)) + { + return PDF_ENOUTPUT; + } + + return PDF_OK; +} + static int -lzw_buffer_inc_bitsize (lzw_buffer_t* b) +lzw_buffer_inc_bitsize (lzw_buffer_t b) { - if (b->bitsize == MAX_BITSIZE) - return PDF_ERROR; + if (b->bitsize == LZW_MAX_BITSIZE) + { + return PDF_ERROR; + } ++b->bitsize; b->maxval = (1 << b->bitsize) - 1; @@ -123,20 +166,19 @@ } static void -lzw_buffer_set_bitsize (lzw_buffer_t* b, +lzw_buffer_set_bitsize (lzw_buffer_t b, int newsize) { b->bitsize = newsize; b->maxval = (1 << newsize) - 1; } - /* -- LZW dictionary handler -- */ /* * The strings are stored in a non balanced ordered binary tree. */ -typedef struct lzw_string_s +struct lzw_string_s { unsigned prefix; /* Prefix string code */ pdf_char_t suffix; /* Appended character */ @@ -144,27 +186,31 @@ unsigned first; /* First string with the same prefix. */ unsigned left; /* Next string with smaller suffix and same prefix. */ unsigned right; /* Next string with greater suffix and same prefix. */ -} lzw_string_t; +}; + +typedef struct lzw_string_s* lzw_string_t; static void -lzw_string_init (lzw_string_t* s) +lzw_string_init (lzw_string_t s) { - memset(s, 0xFF, sizeof(lzw_string_t)); + memset (s, 0xFF, sizeof (struct lzw_string_s)); } - -typedef struct lzw_dict_s +struct lzw_dict_s { - lzw_string_t table[MAX_DICTSIZE]; + struct lzw_string_s table [LZW_MAX_DICTSIZE]; unsigned size; -} lzw_dict_t; +}; +typedef struct lzw_dict_s* lzw_dict_t; static void -lzw_dict_init (lzw_dict_t* d) +lzw_dict_init (lzw_dict_t d) { int i; - memset(d->table, 0xFF, sizeof(lzw_string_t) * MAX_DICTSIZE); + memset (d->table, + LZW_NULL_INDEX, + sizeof (struct lzw_string_s) * LZW_MAX_DICTSIZE); for (i = 0; i < LZW_FIRST_CODE; i++) { @@ -174,14 +220,14 @@ d->size = LZW_FIRST_CODE; } -static int -lzw_dict_add (lzw_dict_t* d, - lzw_string_t* s) +static pdf_bool_t +lzw_dict_add (lzw_dict_t d, + lzw_string_t s) { unsigned index; int must_add; - if (s->prefix == NULL_INDEX) + if (s->prefix == LZW_NULL_INDEX) { s->prefix = s->suffix; return PDF_FALSE; /* The string is a basic character, found! */ @@ -189,7 +235,7 @@ index = d->table[s->prefix].first; - if (index == NULL_INDEX) + if (index == LZW_NULL_INDEX) { d->table[s->prefix].first = d->size; } @@ -205,7 +251,7 @@ } else if (s->suffix < d->table[index].suffix) { - if (d->table[index].left == NULL_INDEX) + if (d->table[index].left == LZW_NULL_INDEX) { d->table[index].left = d->size; must_add = PDF_TRUE; @@ -217,7 +263,7 @@ } else { - if (d->table[index].right == NULL_INDEX) + if (d->table[index].right == LZW_NULL_INDEX) { d->table[index].right = d->size; must_add = PDF_TRUE; @@ -235,10 +281,14 @@ return PDF_TRUE; } -#define lzw_dict_reset lzw_dict_init +static void +lzw_dict_reset (lzw_dict_t dict) +{ + lzw_dict_init (dict); +} static void -lzw_dict_fast_add (lzw_dict_t* d, +lzw_dict_fast_add (lzw_dict_t d, unsigned prefix, pdf_char_t suffix) { @@ -248,7 +298,7 @@ } static void -lzw_dict_decode (lzw_dict_t* d, +lzw_dict_decode (lzw_dict_t d, unsigned code, pdf_char_t** decode, unsigned* size) @@ -256,301 +306,365 @@ *size = 0; do { + //fprintf (stderr, "code: %d\n", code); *(*decode)-- = d->table[code].suffix; ++(*size); code = d->table[code].prefix; - } while (code != NULL_INDEX); + } while (code != LZW_NULL_INDEX); + (*decode)++; - } -/* -- The encoder -- */ - -static int -pdf_stm_f_lzw_encode (pdf_stm_f_lzw_data_t data, - pdf_char_t *in, - pdf_stm_pos_t in_size, - pdf_char_t **out, - pdf_stm_pos_t *out_size) -{ - lzw_buffer_t buffer; - lzw_dict_t dict; - lzw_string_t string; - - /* Allocate buffer with enough space. */ - *out_size = in_size * MAX_COMPRESSION_FACTOR; - if ((*out = (pdf_char_t *) pdf_alloc (*out_size)) == NULL) +/* -- THE ENCODER -- */ + +struct lzwenc_state_s +{ + /* cached params */ + pdf_i32_t early_change; + + /* encoding state */ + pdf_bool_t must_reset_p; + struct lzw_buffer_s buffer; + struct lzw_dict_s dict; + struct lzw_string_s string; +}; +typedef struct lzwenc_state_s* lzwenc_state_t; + +pdf_status_t +pdf_stm_f_lzwenc_init (pdf_hash_t params, + void **ext_state) +{ + pdf_i32_t* early_change_ptr; + lzwenc_state_t state; + + state = pdf_alloc (sizeof (struct lzwenc_state_s)); + if (!state) { - *out_size = 0; return PDF_ERROR; } - - /* Do the actual encoding. */ - lzw_buffer_init(&buffer, *out, *out_size, MIN_BITSIZE); - lzw_dict_init(&dict); - lzw_string_init(&string); - - lzw_buffer_put_code(&buffer, LZW_RESET_CODE); - - while (--in_size >= 0) - { - string.suffix = *in++; - - if (lzw_dict_add(&dict, &string)) + + if (pdf_hash_search (params, "EarlyChange", (void **) &early_change_ptr) + != PDF_OK) + { + state->early_change = LZW_DEFAULT_EARLY_CHANGE; + } + else + { + state->early_change = *early_change_ptr; + } + + lzw_buffer_init (&state->buffer, LZW_MIN_BITSIZE); + lzw_dict_init (&state->dict); + lzw_string_init (&state->string); + state->must_reset_p = PDF_TRUE; + + *ext_state = state; + return PDF_OK; +} + +pdf_status_t +pdf_stm_f_lzwenc_apply (pdf_hash_t params, + void *ext_state, + pdf_stm_buffer_t in, + pdf_stm_buffer_t out, + pdf_bool_t finish_p) +{ + pdf_status_t ret; + lzwenc_state_t st; + + ret = PDF_OK; + st = ext_state; + lzw_buffer_set (&st->buffer, out); + + ret = lzw_buffer_flush (&st->buffer); + if (ret != PDF_OK) + { + return ret; + } + + if (st->must_reset_p) + { + lzw_buffer_put_code (&st->buffer, LZW_RESET_CODE); + st->must_reset_p = PDF_FALSE; + } + + while (!pdf_stm_buffer_eob_p (in) && + !pdf_stm_buffer_full_p (out)) + { + st->string.suffix = in->data [in->rp++]; + if (lzw_dict_add (&st->dict, &st->string)) { - lzw_buffer_put_code(&buffer, string.prefix); - string.prefix = string.suffix; + lzw_buffer_put_code (&st->buffer, st->string.prefix); + st->string.prefix = st->string.suffix; - if (buffer.maxval - data->early_change == dict.size) + if (st->buffer.maxval - st->early_change == st->dict.size) { - if (!lzw_buffer_inc_bitsize(&buffer)) + if (!lzw_buffer_inc_bitsize(&st->buffer)) { - lzw_buffer_put_code(&buffer, LZW_RESET_CODE); - lzw_buffer_set_bitsize(&buffer, MIN_BITSIZE); - lzw_dict_reset(&dict); + lzw_buffer_put_code (&st->buffer, LZW_RESET_CODE); + lzw_buffer_set_bitsize (&st->buffer, LZW_MIN_BITSIZE); + lzw_dict_reset (&st->dict); } } } } - - lzw_buffer_put_code(&buffer, string.prefix); - if (buffer.maxval - data->early_change == dict.size) - lzw_buffer_inc_bitsize(&buffer); - lzw_buffer_put_code(&buffer, LZW_EOD_CODE); - lzw_buffer_put_code(&buffer, 0); - - /* Resize buffer to fit the data. */ - *out_size = (buffer.curp - *out); - if ((*out = pdf_realloc(*out, *out_size)) == NULL) - { - *out_size = 0; - return PDF_ERROR; - } - - return PDF_OK; -} - -/* -- The decoder -- */ - -/* Utility to write to the output. */ - -typedef struct lzw_writer_s -{ - pdf_char_t* buf; - pdf_char_t* cur; - int writen; - int allocated; -} lzw_writer_t; - -static int -lzw_writer_init (lzw_writer_t* s, - int size) -{ - if ((s->buf = pdf_alloc(size)) == NULL) - { - return PDF_ERROR; - } - - s->cur = s->buf; - s->writen = 0; - s->allocated = size; - - return PDF_OK; -} - -static int -lzw_writer_fit (lzw_writer_t* s) -{ - if ((s->buf = pdf_realloc(s->buf, s->writen)) == NULL) - { - return PDF_ERROR; - } - - s->cur = s->buf + s->writen; - s->allocated = s->writen; - - return PDF_OK; -} - -static int -lzw_writer_put (lzw_writer_t* s, - pdf_char_t* data, - unsigned size) -{ - if (s->allocated < s->writen + size) - { - s->allocated = s->allocated * 2 + 1; - if ((s->buf = pdf_realloc(s->buf, s->allocated)) == NULL) + + if (finish_p) + { + lzw_buffer_put_code (&st->buffer, st->string.prefix); + if ((st->buffer.maxval - st->early_change) == st->dict.size) { - return PDF_ERROR; + lzw_buffer_inc_bitsize(&st->buffer); } - s->cur = s->buf + s->writen; - } - - memcpy(s->cur, data, size); - s->cur += size; - s->writen += size; + + lzw_buffer_put_code (&st->buffer, LZW_EOD_CODE); + lzw_buffer_put_code (&st->buffer, 0); + lzw_buffer_put_code (&st->buffer, 0); + + ret = PDF_EEOF; + } + else if (pdf_stm_buffer_full_p (out)) + { + ret = PDF_ENOUTPUT; + } + else if (pdf_stm_buffer_eob_p (in)) + { + ret = PDF_ENINPUT; + } + return ret; +} + +pdf_status_t +pdf_stm_f_lzwenc_dealloc_state (void *state) +{ + pdf_dealloc (state); return PDF_OK; } -static void -lzw_writer_destroy (lzw_writer_t* s) -{ - pdf_dealloc (s->buf); -} - -static int -pdf_stm_f_lzw_decode (pdf_stm_f_lzw_data_t data, - pdf_char_t *in, - pdf_stm_pos_t in_size, - pdf_char_t **out, - pdf_stm_pos_t *out_size) -{ - pdf_char_t dec_buf[MAX_DICTSIZE]; +/* -- THE DECODER -- */ + + +#define LZWDEC_CHECK(st, pos, what) \ + do { (st)->state_pos = (pos); \ + if (((st)->tmp_ret = (what)) != PDF_OK) \ + { return ((st)->tmp_ret); } \ + } while (0); + +enum lzwdec_state + { + LZWDEC_STATE_START, + LZWDEC_STATE_CLEAN, + LZWDEC_STATE_WRITE, + LZWDEC_STATE_READ, + LZWDEC_STATE_LOOP_WRITE, + LZWDEC_STATE_LOOP_READ + }; + +struct lzwdec_state_s +{ + /* cached params */ + pdf_i32_t early_change; + + /* state */ + pdf_char_t dec_buf [LZW_MAX_DICTSIZE]; pdf_char_t* decoded; - unsigned dec_size; + unsigned dec_size; unsigned new_code; unsigned old_code; - - lzw_buffer_t buffer; - lzw_dict_t dict; - lzw_writer_t writer; - - *out = NULL; - *out_size = 0; - - if (lzw_writer_init(&writer, in_size) == PDF_ERROR) - return PDF_ERROR; - - lzw_buffer_init(&buffer, in, in_size, MIN_BITSIZE); - lzw_dict_init(&dict); - old_code = NULL_INDEX; - - do { - lzw_buffer_set_bitsize(&buffer, MIN_BITSIZE); - lzw_dict_reset(&dict); - - do { - new_code = lzw_buffer_get_code(&buffer); - } while(new_code == LZW_RESET_CODE); - - if (new_code == NULL_INDEX) - { - lzw_writer_destroy(&writer); - return PDF_ERROR; - } - - if (new_code != LZW_EOD_CODE) - { - if (lzw_writer_put(&writer, (pdf_char_t*)&new_code, 1) == PDF_ERROR) - return PDF_ERROR; - - old_code = new_code; - new_code = lzw_buffer_get_code(&buffer); - } - - while (new_code != LZW_EOD_CODE && new_code != LZW_RESET_CODE) - { - decoded = &(dec_buf[MAX_DICTSIZE-2]); - - if (new_code < dict.size) /* Is new code in the dict? */ - { - lzw_dict_decode(&dict, new_code, &decoded, &dec_size); - lzw_dict_fast_add(&dict, old_code, decoded[0]); - } - else - { - lzw_dict_decode(&dict, old_code, &decoded, &dec_size); - lzw_dict_fast_add(&dict, old_code, decoded[0]); - decoded[dec_size++] = decoded[0]; - } - - if (lzw_writer_put(&writer, decoded, dec_size) == PDF_ERROR) - return PDF_ERROR; - - if (dict.size == buffer.maxval - 1 - data->early_change) - if (!lzw_buffer_inc_bitsize(&buffer)); - /* break; We must wait for the reset code, don't reset yet. */ - - old_code = new_code; - new_code = lzw_buffer_get_code(&buffer); - - if (new_code == NULL_INDEX) - { - lzw_writer_destroy(&writer); - return PDF_ERROR; - } - } - } while (new_code != LZW_EOD_CODE); - - if (lzw_writer_fit(&writer) == PDF_ERROR) - return PDF_ERROR; - - *out = writer.buf; - *out_size = writer.writen; - - return PDF_OK; -} - - -/* -- PDF Filter functions --*/ - -int -pdf_stm_f_lzw_init (void **filter_data, - void *conf_data) -{ - pdf_stm_f_lzw_data_t *data; - pdf_stm_f_lzw_conf_t conf; - - data = (pdf_stm_f_lzw_data_t *) filter_data; - conf = (pdf_stm_f_lzw_conf_t) conf_data; - - /* Create the private data storage */ - *data = - (pdf_stm_f_lzw_data_t) pdf_alloc (sizeof(struct pdf_stm_f_lzw_data_s)); - (*data)->mode = conf->mode; - (*data)->early_change = conf->early_change; - - return PDF_OK; -} - -int -pdf_stm_f_lzw_apply (void *filter_data, - pdf_char_t *in, pdf_stm_pos_t in_size, - pdf_char_t **out, pdf_stm_pos_t *out_size) -{ - pdf_stm_f_lzw_data_t data; - - data = (pdf_stm_f_lzw_data_t) filter_data; - switch (data->mode) - { - case PDF_STM_F_LZW_MODE_ENCODE: - { - return pdf_stm_f_lzw_encode (data, in, in_size, out, out_size); - } - case PDF_STM_F_LZW_MODE_DECODE: - { - return pdf_stm_f_lzw_decode (data, in, in_size, out, out_size); - } + + /* flow managment */ + enum lzwdec_state state_pos; + pdf_status_t tmp_ret; + + struct lzw_buffer_s buffer; + struct lzw_dict_s dict; +}; +typedef struct lzwdec_state_s* lzwdec_state_t; + + +pdf_status_t +pdf_stm_f_lzwdec_init (pdf_hash_t params, + void **ext_state) +{ + pdf_i32_t* early_change; + lzwdec_state_t state; + + state = pdf_alloc (sizeof (struct lzwdec_state_s)); + if (!state) + { + return PDF_ERROR; + } + + if (pdf_hash_search (params, "EarlyChange", (void**) &early_change) != PDF_OK) + { + state->early_change = LZW_DEFAULT_EARLY_CHANGE; + } + else + { + state->early_change = *early_change; + } + + lzw_buffer_init (&state->buffer, LZW_MIN_BITSIZE); + lzw_dict_init (&state->dict); + state->old_code = LZW_NULL_INDEX; + state->decoded = state->dec_buf + (LZW_MAX_DICTSIZE-2); + state->dec_size = 0; + state->state_pos = LZWDEC_STATE_START; + state->tmp_ret = 0; + + *ext_state = state; + return PDF_OK; +} + +pdf_status_t +lzwdec_put_decoded (lzwdec_state_t st, pdf_stm_buffer_t out) +{ + pdf_status_t ret; + pdf_size_t to_write; + + ret = PDF_OK; + + if (st->dec_size) + { + /* output the decoded string */ + to_write = st->dec_size; + if (st->dec_size > (out->size - out->wp)) + { + to_write = out->size - out->wp; + ret = PDF_ENOUTPUT; + } + + memcpy (out->data + out->wp, st->decoded, to_write); + out->wp += to_write; + st->decoded += to_write; + st->dec_size -= to_write; + } + + return ret; +} + +pdf_status_t +lzwdec_put_code (lzwdec_state_t st, + pdf_stm_buffer_t out, + unsigned long code) +{ + if (pdf_stm_buffer_full_p (out)) + { + return PDF_ENOUTPUT; + } + + out->data [out->wp++] = code & 0xFF; + + return PDF_OK; +} + +pdf_status_t +pdf_stm_f_lzwdec_apply (pdf_hash_t params, + void *ext_state, + pdf_stm_buffer_t in, + pdf_stm_buffer_t out, + pdf_bool_t finish_p) +{ + lzwdec_state_t st; + + st = ext_state; + lzw_buffer_set (&st->buffer, in); + + switch (st->state_pos) + { + case LZWDEC_STATE_START: + break; + case LZWDEC_STATE_CLEAN: + goto lzwdec_state_clean; + case LZWDEC_STATE_WRITE: + goto lzwdec_state_write; + case LZWDEC_STATE_READ: + goto lzwdec_state_read; + case LZWDEC_STATE_LOOP_WRITE: + goto lzwdec_state_loop_write; + case LZWDEC_STATE_LOOP_READ: + goto lzwdec_state_loop_read; default: - { - return PDF_ERROR; - } + break; } - /* Not reached */ + do + { + /* need a reset */ + lzw_buffer_set_bitsize (&st->buffer, LZW_MIN_BITSIZE); + lzw_dict_reset (&st->dict); + + do + { + lzwdec_state_clean: + LZWDEC_CHECK (st, LZWDEC_STATE_CLEAN, + lzw_buffer_get_code (&st->buffer, &st->new_code)); + } + while (st->new_code == LZW_RESET_CODE); + + if (st->new_code != LZW_EOD_CODE) + { + lzwdec_state_write: + LZWDEC_CHECK (st, LZWDEC_STATE_WRITE, + lzwdec_put_code (st, out, st->new_code)); + + st->old_code = st->new_code; + lzwdec_state_read: + LZWDEC_CHECK (st, LZWDEC_STATE_READ, + lzw_buffer_get_code (&st->buffer, &st->new_code)); + } + + while (st->new_code != LZW_EOD_CODE && + st->new_code != LZW_RESET_CODE) + { + st->decoded = st->dec_buf + (LZW_MAX_DICTSIZE-2); + + /* Is new code in the dict? */ + if (st->new_code < st->dict.size) + { + lzw_dict_decode (&st->dict, st->new_code, + &st->decoded, &st->dec_size); + lzw_dict_fast_add (&st->dict, st->old_code, st->decoded[0]); + } + else + { + lzw_dict_decode (&st->dict, st->old_code, + &st->decoded, &st->dec_size); + lzw_dict_fast_add (&st->dict, st->old_code, st->decoded[0]); + st->decoded [st->dec_size++] = st->decoded [0]; + } + + /* output the decoded string */ + lzwdec_state_loop_write: + LZWDEC_CHECK (st, LZWDEC_STATE_LOOP_WRITE, + lzwdec_put_decoded (st, out)); + + if (st->dict.size == st->buffer.maxval - 1 - st->early_change) + { + if (!lzw_buffer_inc_bitsize (&st->buffer)); + /* break; We must wait for the reset code, don't reset yet. */ + } + + /* get next code */ + st->old_code = st->new_code; + lzwdec_state_loop_read: + LZWDEC_CHECK (st, LZWDEC_STATE_LOOP_READ, + lzw_buffer_get_code (&st->buffer, &st->new_code)); + + } + } + while (st->new_code != LZW_EOD_CODE); + + st->state_pos = LZWDEC_STATE_START; + return PDF_EEOF; } -int -pdf_stm_f_lzw_dealloc (void **filter_data) +pdf_status_t +pdf_stm_f_lzwdec_dealloc_state (void *state) { - pdf_stm_f_lzw_data_t *data; - - data = (pdf_stm_f_lzw_data_t *) filter_data; - pdf_dealloc (*data); - + pdf_dealloc (state); return PDF_OK; } === modified file 'src/base/pdf-stm-f-lzw.h' --- src/base/pdf-stm-f-lzw.h 2008-02-11 01:11:25 +0000 +++ src/base/pdf-stm-f-lzw.h 2008-10-12 12:34:46 +0000 @@ -27,54 +27,34 @@ #define PDF_STM_F_LZW_H #include <config.h> -#include <pdf-base.h> - -/* Configuration data */ - -/* BEGIN PUBLIC */ - -enum pdf_stm_f_lzw_mode_t -{ - PDF_STM_F_LZW_MODE_ENCODE, - PDF_STM_F_LZW_MODE_DECODE -}; - -/* END PUBLIC */ - -struct pdf_stm_f_lzw_conf_s -{ - int mode; - int early_change; /* An indication of when to increase the code - length. If the value of this entry is 0, code - length increases are postponed as long as - possible. If the value is 1, code length - increases occur one code early. This - parameter is included because LZW sample code - distributed by some vendors increases the - code length one code earlier than necessary. - - Default value: 1 */ -}; - -typedef struct pdf_stm_f_lzw_conf_s *pdf_stm_f_lzw_conf_t; - -/* Private data */ - -struct pdf_stm_f_lzw_data_s -{ - int mode; - int early_change; -}; - -typedef struct pdf_stm_f_lzw_data_s *pdf_stm_f_lzw_data_t; - -/* Filter API implementation */ - -int pdf_stm_f_lzw_init (void **filter_data, void *conf_data); -int pdf_stm_f_lzw_apply (void *filter_data, - pdf_char_t *in, pdf_stm_pos_t in_size, - pdf_char_t **out, pdf_stm_pos_t *out_size); -int pdf_stm_f_lzw_dealloc (void **filter_data); +#include <pdf-types.h> +#include <pdf-hash.h> +#include <pdf-stm-buffer.h> + +/* Filter implementation API */ + +pdf_status_t pdf_stm_f_lzwenc_init (pdf_hash_t params, + void **state); + +pdf_status_t pdf_stm_f_lzwenc_apply (pdf_hash_t params, + void *state, + pdf_stm_buffer_t in, + pdf_stm_buffer_t out, + pdf_bool_t finish_p); + +pdf_status_t pdf_stm_f_lzwenc_dealloc_state (void *state); + +pdf_status_t pdf_stm_f_lzwdec_init (pdf_hash_t params, + void **state); + +pdf_status_t pdf_stm_f_lzwdec_apply (pdf_hash_t params, + void *state, + pdf_stm_buffer_t in, + pdf_stm_buffer_t out, + pdf_bool_t finish_p); + +pdf_status_t pdf_stm_f_lzwdec_dealloc_state (void *state); + #endif /* pdf_stm_f_lzw.h */ === modified file 'src/base/pdf-stm-filter.c' --- src/base/pdf-stm-filter.c 2008-10-05 15:08:05 +0000 +++ src/base/pdf-stm-filter.c 2008-10-12 12:34:46 +0000 @@ -111,6 +111,20 @@ new->impl.dealloc_state_fn = pdf_stm_f_flatedec_dealloc_state; break; } + case PDF_STM_FILTER_LZW_ENC: + { + new->impl.init_fn = pdf_stm_f_lzwenc_init; + new->impl.apply_fn = pdf_stm_f_lzwenc_apply; + new->impl.dealloc_state_fn = pdf_stm_f_lzwenc_dealloc_state; + break; + } + case PDF_STM_FILTER_LZW_DEC: + { + new->impl.init_fn = pdf_stm_f_lzwdec_init; + new->impl.apply_fn = pdf_stm_f_lzwdec_apply; + new->impl.dealloc_state_fn = pdf_stm_f_lzwdec_dealloc_state; + break; + } default: { /* Shall not be reached, but makes the compiler happy */ === modified file 'src/base/pdf-stm-filter.h' --- src/base/pdf-stm-filter.h 2008-10-05 15:08:05 +0000 +++ src/base/pdf-stm-filter.h 2008-10-12 12:34:46 +0000 @@ -36,6 +36,7 @@ #include <pdf-stm-f-ahex.h> #include <pdf-stm-f-rl.h> #include <pdf-stm-f-flate.h> +#include <pdf-stm-f-lzw.h> /* BEGIN PUBLIC */ @@ -48,8 +49,9 @@ PDF_STM_FILTER_RL_ENC, PDF_STM_FILTER_RL_DEC, PDF_STM_FILTER_FLATE_ENC, - PDF_STM_FILTER_FLATE_DEC - + PDF_STM_FILTER_FLATE_DEC, + PDF_STM_FILTER_LZW_ENC, + PDF_STM_FILTER_LZW_DEC }; /* Filter implementation */ === modified file 'utils/pdf-filter.c' --- utils/pdf-filter.c 2008-10-05 15:08:05 +0000 +++ utils/pdf-filter.c 2008-10-12 12:34:46 +0000 @@ -247,10 +247,10 @@ *last_ret = ret; ret = pdf_stm_cfile_new (*read_mode ? stdin : stdout, - 0, - cache_size, - *read_mode ? PDF_STM_READ : PDF_STM_WRITE, - &stm); + 0, + cache_size, + *read_mode ? PDF_STM_READ : PDF_STM_WRITE, + &stm); if (ret != PDF_OK) { @@ -326,10 +326,32 @@ } case LZWENC_FILTER_ARG: { + ret = pdf_hash_new (NULL, &filter_params); + if (ret != PDF_OK) + { + pdf_error (ret, stderr, "while creating the ahexdec filter parameters hash table"); + exit (1); + } + + pdf_stm_install_filter (stm, + PDF_STM_FILTER_LZW_ENC, + filter_params); + break; } case LZWDEC_FILTER_ARG: { + ret = pdf_hash_new (NULL, &filter_params); + if (ret != PDF_OK) + { + pdf_error (ret, stderr, "while creating the ahexdec filter parameters hash table"); + exit (1); + } + + pdf_stm_install_filter (stm, + PDF_STM_FILTER_LZW_DEC, + filter_params); + break; } #ifdef HAVE_LIBZ # Begin bundle IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWZxz7QMADfn/lHz0IAB7//// ///e/r////8AAggAYBce9dvfOzV7wNnDOhTXdy95k9triOtIFKSZuaGhotbdinVDslMFaMqy2sY2 FqttWAdw4aAAGg0GgMEANDTTIBoaNMgBiBoAEojQCZJlMjRNNT1HqNHpqaaAMTQAGQAaAAAGmgRq TU0I0aGIGgBpoaGgAAAAAAABJpRCEU/RT0ap4Cn6p6j02qNkmh6nqPTIgAaANGgaB6j1ARKIE01G INGgCaZGhNpTDTSPSeieKaGQxBoNNGEBIiETCDSaTEnjUmmm1PU0jzUwjSD0gYgAZAaGmgzuAkUh AFkUBioZhN38eyo1ithBBggKRIKsggIAqJBGJIkQgvCjd0UF9senAo5qHRifR0WHFPwuYUMJe69K DQLSjR3k0jJYUZ9THZ6mhJXYzC6s5eBQzWTrBB9tOvFWIw49fl7Hk2vu77pv3PtPZ+bly1KuqARE BxMBSCohmgox8tthkkpc3a7P8MuUaR19sPy36YLJki5IiGaSEJsjMO805BumWzYy5iMhlgCw9bm9 QcbuqLoRQrujmZahrnY6UShNDNOZA5VEiKQK2HgxzaBx360CiNAi0vQtcMUgaC5cQtQ7O6q4DElM YeweHYKGBBiJDEFSuiqgFEYpZizVoDJmbOzQy5sbja6gy6L2vCQLKSpSXatm+q++1yFVpIvbYJGg c7mGt7eEBi6WWojFksbUZFCbiJdXaIlph3d2exWWgMsj6CV+1w1DgxVmIgWOFaa75UnINnA6zcIm RpYZyhUVE2Q5Yiqd8szCVUVK0dcQNPJnvjMyAxgHx3BETEKyRFGQQVCLCIogdCQNwYm4pdrMZlzQ RxiaZJkwMo6Y+TS5NGFEJVVURFiyQUFkUYqBoyVId6HPqpk4w6tODrMlSb/kMI/vSG8ROaiWGf6O dcnoP8mnzBqaYFwPbzwdFc3gYo3Wevw7GxsZBKTyM/wYvlOz50Y5VQ228LE3jDSoPDfej8Pst3/Z 8z2y6HSc2JDEkHaPstrr+3xZimyfSewszgfIs4B8YxkAZFhFkQkQOAr0agr6P8IG3t5vL5ZZTzWd 3msv80pZSIN1p3l1wVCuDKjHOdb5FEgnFRjdUgjliy4UrG0qFmglXNK6rmTayXu1F3UXwNmJgjhg i189oKEhiMhZBanwcLCSZwUMzFgKdRd7C4yG8NixslPTRSwUW9WWmDwhKQRFLNUwk1YvNQ8IrGES UdzxzutTFvu5f52gOnfCby8lCVxqHUx3qXOf2RZM6BRxdmVmFBUhe67x3efI4mKK9TbbU6S2WFt9 V0cg5homFdyoiiAqEqjH4/LWvb4D0aXmzsUDJ8Tk3pZm9mTwg8husTXmmHQpHlmm+m7EgKQREMpJ UlDdxUWZ9VCwWf19vdTMtZpam2Bw0nOoZ2rgyZRQGtTCAj/EyKZO+EqhSZUk7oMNWbRXO4yGh3Y7 /O7jYuY3AcCbHRvhRJRDh59dFVVXDu7XNpsHTRnolBiLnZUnBkGChyumDapIKEJjIqd26pRioQZW NxVYsbGnv6DMFiYGzdQm7CMlOm8PD9IksRuJy6jgJ1CPEhBGtrcjXVuPflI/Jls8aI1VcXtcbe0L y4fXnrzs2Hf5wCKe7vKANiQz1pD9CYHPNYhD9RRaBEmMTGSAtYPFBQKDJ6msgYZPBnQ8p769Hi9x +INdVZhvmENsbsI4ZD7fVnJjTIVps7iDFtrQuuY1qUgS5FkLIuIwGwYC7DaRz7Cb3ySurEZFGmc0 gzD1iriS4344wyDCBv4+3TR+6HbZYzhBDsnb0AlEirDnnFfPcJ54EyhE2jxxjX1fEryRAkSORRh2 Ib3vFPxnRcPXHQfjDDI5l9JkQtfQQVlEJTaJbX7+FCUUGXXeg5WMRb4vv9QW2nm3EwdxeEEMZxAW Rf5mvMkuF48e7Cc4TkGBSPjngplUBYCMBBnpI1JFhzaaYPE6IsOExh3QyajO8AabGBR9wRINGnAR BomI+F6vbw1617DRvoGMIN6AmHLGnOdDtqv9IFzqcgg7BLuXP6mhuT2RfJo9JEIcveqNbd/y0Bp6 KCnJy7Kg8kQL0igd9yngSsud/2chts8pqSUIMZIbC4QFw+kolFq9yQxlEiICwZpiMixdWLc5ZhMH AJePb6tN9ylAira35qOK12w/L7m80m01B2G08/IeM8GtZmto4E7po20JtLcwDRorsi8rNYDQlDhh 0uJ7G0kGQgqKoA7inI8TODCIZOJqJVFHVWgQAOA3zAISSBnJ0JX12bqpGhmhbA4dcyHDSyRItkpi LqAsFCp16PQ0Wb32RUsZRBSw/m1VRwd/iuPsWYVWwZXDal7Fr6dChqOvmNDesgbSG0bYvu2xy83R pS+t9473nuO+IlZ551SERepBiIDBIsAXz0ElC8p0FINbqXETdoAU2e9S5lsc2RTvjQefJd6UZzDo gqGhuMz4wC1sYC5DlomZeG+kF+Et2ewRlLtlSYqfyC4TgHFSZd+4GQurykXCgEhg1qAp7A2c+s1F dQ2yEzVlBBoQZFxQgv58C3NaHjADjEqRv312wycaW14CcYHMPBKqdbXY2EeMsibGDMcRAPGvkeah DIAiW4DEMsOVcG8qmQ1rFLc9DNEeVJZG3bxBuNhRAMS1ouM12R1UrB0H1o/N05ujitldZJ7grqoX G2ZkQkFyUhkExmWgVzXOcx9XhuKTemRk+WSQ6rCkISJpFhYmiFy5ITtA7Ac4BB9LLaxVWtKEij5A F6UV46dGkz8GsWlzEUemhyiFZMSJDmWpwgMbwyDYaiDYvtkRhght2eOutLoilL4ZW0lQNsqTlYHC jw1KuY1gPMFiJZiSZgUDYXKI81IjEh5yCJoF2yvpk/TJXxdabOm6E8pPi90kQ4RQhhI7xqRpIrfv KlKlKqKYxUdW1q377778NCtx1oRKkDkKERxbj/v0YJKpXNXY1GOLY96yNaBnxFNGyi6mgqi7TmRs 2cKkjdmkIbduRgKQjRcGhygGhsNDtO9Lfyev5PU38hXPgG1rHewzp0KEmBXclogu+IXpYfG8jdc3 KXJQ7PwTT3Yio4eqIXa1NPyb+fvtGelnvVCZvjKEJThRet1Y0bwu2uszXRb3NiiwcCi3HEjtjCey qMKitl2Jv9i0TbP2lNsOOxRuEJyax5X7medwek8Pm+I1cDAQfzp7MQPwIfLPywa2kpECViJPn5Ov s9qVTiWU6b4QxaMGHgEw1IPj83yqGx8eqCl8l928mCmwQgLMFnZ50DP6Aj5TrXrj9rfR6l7IAWv3 IbfIyMSST8gL8J70kJVT/RUaCyv1DRB4S1f+KXBt+6BLMG4D1kAwTEPR7A9dOwJgVUVNxKUKH2fj ce+utxd7MGIwRqRjoim4VKGdcExSwDJsRwIvKcGiTNi+cohmEDLcdjiQDdtHFbwLjYldK/KBqAM+ jOOhHdcKu+pkPBsWrk9OdfnkjISSBIfRg0YwKkT9pQhClhkBUS4INmS7wZgQoFBgX3ASjWjatUcw 6R2zeDAQMcqyQtGwwKGAPIt7gWmeoYVXGJfUWIBc6hhuqmC3rgCZyxC4L7jajIwLMUwfhx21M7wj oDjEclgKbeIFyGLtAEFA8JaJ0Efq/vwFvlHUdQ8pgfT0Ih2z2TtE0nc8KKquhhDYmZ1GJBiR6Vpx bwF40GopajxXS0ksSDioWiW1tJJ2p8R860JIUGP7n+RBgIQGeJWw4yoXCzgYMYeS0wG1LEHu95tM RC1vMnDhm24IagFD6+SJTJE+FreQm81hNhBl5TH9DGNalKHgFwDA1B6ZbyqlIa7s6X7FLhZguMYM mSZnFmd6wVFaT2jmxDBIEIQRqCtVMxlAJgpfensLAsExAsBiXCh2HN1p/Ws4iIhUF4rviX0G8nIS +c3H0Gp8h4DwnfTwFix2T5tBKmSIbdqHpIMXfmdcA4zehy8JyXp085vvzxJqNBcJ+txXOWwA+m0z V5EDAcDHjsN4QwMEoAdRVXWBugXK50DW4VIcQ6AFMCfNFCx9WzgX0wgT656mFZSpRFqQ+c8c6C4M anQc52E+AA9wL95f3f9PSdRgudo6EPZDvDBzaz7TavtLyhSVqJCAmiNCwFevUHzIIU1nalHqg+Us OIO+cpxnWW+KSRIEkVVVJOUFERJVRRRVNxx5oKvqAvNZ0HCHj0Gg1HZNuBu76BipplddhcEe/OdI bCKVPlZYgop30ySOuetOYIcJBOnWGgKmbYkKIwW+kaOYNppzm4cRydB1lXrKGYC0C0prDJXE+/pG zjM5zjthYvQg6wihkVo8IHSfKeZzGuglF6pQEuSiWES8XWak+9c/E2773I2HGbDeLBQcsg44hpqa EFVLGfGzEA2U7j6hm3VsF5578SJjFCiGwiYCpiWH2iiXEVExXDexsRIn2NkEYkpn1Z3cJNTeXeVy jtbWYKGDFSUSjKzKd4UAKK0g0QLkMx3HhPIe+eDpLOz6/UIOo9swILjAsfAbTBaEGgwiIFDIRwfv JUOHts1Z0FBJmBYDEDpJtSKHvQww4ELHKdYv6PpPgcxFxkbzUeEaEjwNAaNLdSVrKI2Msj1wd62M o9KaUBo0a/Cw/kjvVGdiSpFhninxNRPKW9jxhuDwAkQgbdaFYKQIKHIHJa3Fpvh+KBO4tyE22OCE UmaUfTBo1CBYVBdYpc7D8CuQPMOophkq08Yb68wwHOfAsU5yHgPeKHQQOsvKCvCIQVdh2EKvjDtM DAxLlzkAUqaVziidpkUNX0i3LiLgLaNbzI4EMk94uS80haQcXFgYueob/16AMLh8/MJgMjAGKvuq RqGa0N63vBQ8/uAIB5IdHiO9MlEKjekMBgMSKAxPlIDkW5A+IseIYOiGgsQ7g8OzjAAz/HjcJnCz HXak18KidgBerab5gf5pSEJCSQPwIUjJeubfDeDOgdIeNDcRMoeuAEHnFj1s8IxcOzeHrDzoLVME ufQ35kJBRGpJdYzIpq8eql/BC5EdyE0ekYHFhxxSO0R27RakE9+odAnfPTDIOo7Q+B0Joso60MhC 5AqvQbWNlAKBoI6gttA2CguagVktGA00IQc5suLnvDAZegYA8Bthv45D4qKF2QaHITngCeIwIjw7 v6Y/xXEvXKraRbeqTDcCjiLbQxBlSXF5oGdVwKqqFpGksQg6dTuczCAj3ct2+mp1tL127TyWGHT3 rh4gcWGvZWQ4pWAkA1HuZeNf71huv5xZykcLZn/eLAXJZB3yckyCExQvkCyBpvDXYP5RZoUWs0Np BqQHtdaAkSOf1JhBmxoSMgmSDGj6A1eI3NaU8DRb3OD1CuyABfYHYI6h3R2IHtv0G1Ae3I8IwMsQ g6dCHSg0fBVi27ybOtgEknVPJA3EDLz86HuJFOaxS9zlEE3QwCByCTx+PxgddMVbSlyH0z8ZQ+ET cQQPISMvlRY641KWlKaKxRVi+Y4cFVV4ye/6Ic4OCd5O7A0DZjHvge91eJmrXwHcusYL3PhXhANx 2GbbUXQHKxV7ADTYVVS6ISKXIUEOv/e0cIEHhtXQHx3ppK8AbWQXgFjVgcDDsKyPH67LSWAWhEkN fLQdEa9wm6g5YcW+iaO+peaUDB5VeG/NgGw5TJDCFyJi1b0OYsq1vPqppKQDsWZKGI5KfB2TiIKO 4Y9iQe0WDcAxA4HWmxyW1liGxpDywSGsmkn1QzCXgjQhRyxBCgQbTYeLBAWAr4My4zvUNJv0CC0f BTB72co4LGTNJih9RMmVgW2ZZirg2mrFFQa7fE3A8BogwKpmAQoakGMDUFKCHSkUL6ltDkEsQTtA 1oXGCzMhusWKbDbbb0mt3ahO48ZO1IqiwTbpAFWs/huSPTSEi++A8YIpAI+1iOUZhTG2gVzHEM7a mQ0dLAtDQxCGBkCIgNESHsXkALWGFTAZZj7uOJJcN1Fgk2pYRvM40CwI2hA2gSDeOp1tqil8UMPS FG6yDZCRWQEhCDBwirXOrzwM5iDSz3Kj1Xp2Giwq5IyID1BobL0nNEtQxcDwNUSsBL9JUz0AoBNt XJQsOZOO5B69n5dt4JvB699RHQag51zt+72Tqv2CBnNCazhRID4aHrD0UQMnleNP+6XjR4RNRq+Q 9085UN8msTqzOiD2ADEYiGMAGXQO9JvGzUvYp1OaHtAJ8JUNoIcYQU63NycocfN7XdTDKiA/XlBh Oa4d5N/OGNRBDpTTkAIAGEIAfGAdU6pOcsgd890kwGJciULm4RIwuAsMhmF/9t/8XckU4UJCcc+0 DA==
