Some time ago I complained about very slow access to compressed mboxes.
Unfortunately it looks like that it is very little interest in it, so I
have to investigate some things by myself.
Firstly: some rationale.
Why do I prefer use mbox/maildir over mdbox. Short answer "bus factor"
for support mdbox (not only dovecot)
Longer answer: if something goes wrong withm maildir/mbox i can use
other tools (mutt, or formail or even text editor) and with mdbox ...
I am not ISP, I use dovecot as a "gateway" to my (rather huge) mail
archive. Most of these mails are rather valuable for me, so I prefer use
something "well-known-and-tested".
(I can't do like most ISP's do: write in "Terms of Service" that mail
can be lost or damaged and we give no warranty :) )
So then:
Below my patch.
It contains 2 changes:
1. when buffer is compressed, we try to save last marked offset.
2. Increase temporary buffer for decompression.
without these changes 1.5 GB of bzip compressed mbox with ~20K messages
can't be open in 1.5 day
After applying 1. change it can be open in ~1.5 h
With both changes it was a few minutes.
Maybe it is a good idea to add config parameter to specify size of
decompress buffer?
Patch is against v2.0.18
diff -x '*.o' -x '*.lo' -x '*.la' -u -r ../dovecot-2.0.18/src/lib/istream.c ./src/lib/istream.c
--- ../dovecot-2.0.18/src/lib/istream.c 2011-12-13 12:38:27.000000000 +0100
+++ ./src/lib/istream.c 2012-04-14 10:27:23.790724625 +0200
@@ -452,6 +452,22 @@
stream->pos -= stream->skip;
stream->skip = 0;
+
+}
+
+void i_stream_compress1(struct istream_private *stream, size_t bytes )
+{
+
+ size_t lskip ;
+
+ lskip = (stream->skip > bytes ? bytes : stream->skip );
+
+ memmove(stream->w_buffer, stream->w_buffer + lskip ,
+ stream->pos - lskip);
+ stream->pos -= lskip;
+ stream->skip -= lskip;
+
+
}
void i_stream_grow_buffer(struct istream_private *stream, size_t bytes)
diff -x '*.o' -x '*.lo' -x '*.la' -u -r ../dovecot-2.0.18/src/lib/istream-internal.h ./src/lib/istream-internal.h
--- ../dovecot-2.0.18/src/lib/istream-internal.h 2011-12-13 12:38:27.000000000 +0100
+++ ./src/lib/istream-internal.h 2012-04-13 00:06:27.700298378 +0200
@@ -51,6 +51,7 @@
i_stream_create(struct istream_private *stream, struct istream *parent, int fd);
void i_stream_compress(struct istream_private *stream);
+void i_stream_compress1(struct istream_private *stream, size_t bytes );
void i_stream_grow_buffer(struct istream_private *stream, size_t bytes);
bool i_stream_get_buffer_space(struct istream_private *stream,
size_t wanted_size, size_t *size_r);
diff -x '*.o' -x '*.lo' -x '*.la' -u -r ../dovecot-2.0.18/src/plugins/zlib/istream-bzlib.c ./src/plugins/zlib/istream-bzlib.c
--- ../dovecot-2.0.18/src/plugins/zlib/istream-bzlib.c 2012-02-09 18:32:48.000000000 +0100
+++ ./src/plugins/zlib/istream-bzlib.c 2012-04-14 10:35:04.349800777 +0200
@@ -9,12 +9,14 @@
#include <bzlib.h>
#define CHUNK_SIZE (1024*64)
+#define BUFF_SIZE (1024*1024*16)
struct bzlib_istream {
struct istream_private istream;
-
+
bz_stream zs;
uoff_t eof_offset, stream_size;
+ uoff_t marked_offset;
size_t prev_size, high_pos;
struct stat last_parent_statbuf;
@@ -48,7 +50,6 @@
uoff_t high_offset;
size_t size;
int ret;
-
high_offset = stream->istream.v_offset + (stream->pos - stream->skip);
if (zstream->eof_offset == high_offset) {
i_assert(zstream->high_pos == 0 ||
@@ -87,7 +88,14 @@
if (stream->pos == stream->buffer_size) {
if (stream->skip > 0) {
/* lose our buffer cache */
- i_stream_compress(stream);
+ /* try to save our buffer cache as much as possible */
+
+ if (zstream->marked && (stream-> skip - (stream->istream.v_offset - zstream->marked_offset)) >0 ){
+
+ i_stream_compress1(stream, stream-> skip - (stream->istream.v_offset - zstream->marked_offset));
+ } else {
+ i_stream_compress(stream);
+ }
}
if (stream->pos == stream->buffer_size)
@@ -215,8 +223,12 @@
struct bzlib_istream *zstream = (struct bzlib_istream *) stream;
uoff_t start_offset = stream->istream.v_offset - stream->skip;
+ if (mark)
+ zstream->marked_offset = v_offset;
if (v_offset < start_offset) {
/* have to seek backwards */
+
+
i_stream_bzlib_reset(zstream);
start_offset = 0;
} else if (zstream->high_pos != 0) {
@@ -243,6 +255,7 @@
}
i_stream_skip(&stream->istream, avail);
+
} while (i_stream_read(&stream->istream) >= 0);
if (stream->istream.v_offset != v_offset) {
@@ -260,8 +273,11 @@
}
}
- if (mark)
+ if (mark){
zstream->marked = TRUE;
+ zstream->marked_offset = v_offset;
+ }
+
}
static const struct stat *
@@ -329,7 +345,9 @@
i_stream_bzlib_init(zstream);
zstream->istream.iostream.close = i_stream_bzlib_close;
- zstream->istream.max_buffer_size = input->real_stream->max_buffer_size;
+ // zstream->istream.max_buffer_size = (input->real_stream->max_buffer_size);
+ zstream->istream.max_buffer_size = BUFF_SIZE;
+
zstream->istream.read = i_stream_bzlib_read;
zstream->istream.seek = i_stream_bzlib_seek;
zstream->istream.stat = i_stream_bzlib_stat;
--
Gdyby ktoś miał zbędny Toshiba G450 - to chętnie przejmę ;)
< asuffield> a workstation is anything you can stick on somebodies desk
and con them into using
-- in #debian-devel