Re: [PATCH 2/3] lib: zlib_inflate: improves decompression performance
On Wed, Nov 11, 2020 at 5:06 PM Zhaoxiu Zeng wrote: > 在 2020/11/11 11:46, Jann Horn 写道: > > On Mon, Nov 9, 2020 at 8:27 PM wrote: > >> This patch does: > >> 1. Cleanup code and reduce branches > >> 2. Use copy_from_back to copy the matched bytes from the back output buffer > > > > What exactly is copy_from_back()? Is it like memmove()? If yes, have > > you tried using memmove() instead of the code added in patch 1/3? > > > > If use memcpy(or memmove), the code will be like this: > while (dist < len) { > memcpy(out, out - dist, dist); > out += dist; > len -= dist; > } > memcpy(out, out - dist, len); Ah, thanks. So basically it means: "repeatedly copy a pattern of length `dist` from `out-dist` to `out` until `len` bytes have been written"
Re: [PATCH 2/3] lib: zlib_inflate: improves decompression performance
在 2020/11/11 11:46, Jann Horn 写道: > On Mon, Nov 9, 2020 at 8:27 PM wrote: >> This patch does: >> 1. Cleanup code and reduce branches >> 2. Use copy_from_back to copy the matched bytes from the back output buffer > > What exactly is copy_from_back()? Is it like memmove()? If yes, have > you tried using memmove() instead of the code added in patch 1/3? > If use memcpy(or memmove), the code will be like this: while (dist < len) { memcpy(out, out - dist, dist); out += dist; len -= dist; } memcpy(out, out - dist, len); or: const u8 * const from = out - dist; while (dist < len) { memcpy(out, from, dist); out += dist; len -= dist; dist *= 2; } memcpy(out, from, len); In addition, the len is small in most cases, so the function calls are expensive.
Re: [PATCH 2/3] lib: zlib_inflate: improves decompression performance
On Mon, Nov 9, 2020 at 8:27 PM wrote: > This patch does: > 1. Cleanup code and reduce branches > 2. Use copy_from_back to copy the matched bytes from the back output buffer What exactly is copy_from_back()? Is it like memmove()? If yes, have you tried using memmove() instead of the code added in patch 1/3?
[PATCH 2/3] lib: zlib_inflate: improves decompression performance
From: Zhaoxiu Zeng This patch does: 1. Cleanup code and reduce branches 2. Use copy_from_back to copy the matched bytes from the back output buffer I tested on 5.8.18-300.fc33.x86_64. The performance of function zlib_inflate is improved by about 7%. If the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is disabled in copy_from_back.h, the performance is improved by about 5%. Signed-off-by: Zhaoxiu Zeng --- lib/zlib_inflate/inffast.c | 122 ++--- 1 file changed, 17 insertions(+), 105 deletions(-) diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c index ed1f3df27260..c27e45fc5335 100644 --- a/lib/zlib_inflate/inffast.c +++ b/lib/zlib_inflate/inffast.c @@ -4,29 +4,13 @@ */ #include +#include #include "inftrees.h" #include "inflate.h" #include "inffast.h" #ifndef ASMINF -union uu { - unsigned short us; - unsigned char b[2]; -}; - -/* Endian independed version */ -static inline unsigned short -get_unaligned16(const unsigned short *p) -{ - union uu mm; - unsigned char *b = (unsigned char *)p; - - mm.b[0] = b[0]; - mm.b[1] = b[1]; - return mm.us; -} - /* Decode literal, length, and distance codes and write out the resulting literal and match bytes until either not enough input or output is @@ -184,104 +168,32 @@ void inflate_fast(z_streamp strm, unsigned start) state->mode = BAD; break; } -from = window; -if (write == 0) { /* very common case */ -from += wsize - op; -if (op < len) { /* some from window */ -len -= op; -do { -*out++ = *from++; -} while (--op); -from = out - dist; /* rest from output */ -} -} -else if (write < op) { /* wrap around window */ -from += wsize + write - op; -op -= write; -if (op < len) { /* some from end of window */ -len -= op; -do { -*out++ = *from++; -} while (--op); -from = window; -if (write < len) { /* some from start of window */ -op = write; +from = window + write - op; +if (write < op) { /* very common case */ +from += wsize; +if (write) {/* wrap around window */ +op -= write; +if (op < len) { /* some from end of window */ len -= op; do { *out++ = *from++; } while (--op); -from = out - dist; /* rest from output */ +from = window; /* some from start of window */ +op = write; } } } -else { /* contiguous in window */ -from += write - op; -if (op < len) { /* some from window */ -len -= op; -do { -*out++ = *from++; -} while (--op); -from = out - dist; /* rest from output */ -} -} -while (len > 2) { -*out++ = *from++; -*out++ = *from++; -*out++ = *from++; -len -= 3; -} -if (len) { -*out++ = *from++; -if (len > 1) +if (op < len) { /* some from window */ +len -= op; /* rest from output */ +do { *out++ = *from++; +} while (--op); +} else { +dist = out - from; } } -else { - unsigned short *sout; - unsigned long loops; - -from = out - dist; /* copy direct from output */ - /* minimum length is three */ - /* Align out addr */ - if (!((long)(out - 1) & 1)) { - *out++ = *from++; -