On Fri, Feb 09, 2018 at 01:52:52AM +0100, Hilko Bengen wrote:
> It was brought to my attention that dumping a registry hive causes a
> lot of time spent in disk I/O activity because iconv_open() and
> iconv_close() are called for every key. Every iconv_open() call causes
> /usr/lib/.../gconv/$ENCODING.so to be opened and mapped.
>
> The iconv_t handles are now cached in the hive_h struct; they are
> opened on-demand and re-used.
>
> On my ~10 year old Lenovo T60, I have seen 57% savings in the overal
> runtime of running
>
> hivexregedit --export windows-8-enterprise-software.hive '\\'
> ---
> lib/handle.c | 43 ++-
> lib/hivex-internal.h | 31 ++-
> lib/node.c | 6 +++---
> lib/utf16.c | 38 --
> lib/value.c | 10 +-
> lib/write.c | 4 ++--
> 6 files changed, 90 insertions(+), 42 deletions(-)
>
> diff --git a/lib/handle.c b/lib/handle.c
> index 9dcf81d..7d715df 100644
> --- a/lib/handle.c
> +++ b/lib/handle.c
> @@ -31,6 +31,9 @@
> #include
> #include
>
> +#include
> +#include
This makes hivex depend unconditionally on pthread. It's possible to
make this optional so that hivex would use locking when the main
program is already linked to pthread but would skip locking otherwise,
and it's a fairly simply change:
(1) Edit ‘bootstrap’ and add ‘threadlib’ to the list of modules near
the end. You'll probably need to rerun ./bootstrap after this.
(2) Read ‘.gnulib/modules/threadlib’ and follow the instructions for
modifying configure.ac and Makefile.am.
(3) Replace #include -> #include "glthread/lock.h".
(4) Replace any calls to pthread_mutex_* with gl_lock_*. It's
probably not necessary to check return codes, unless they need to be
recursive in which case use gl_recursive_lock_* instead.
> #ifdef HAVE_MMAP
> #include
> #else
> @@ -62,6 +65,32 @@ header_checksum (const hive_h *h)
>
> #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG)
>
> +iconv_t *
> +_hivex_get_iconv (hive_h *h, recode_type t)
> +{
> + pthread_mutex_lock (&h->iconv_cache[t].mutex);
> + if (h->iconv_cache[t].handle == NULL) {
> +if (t == utf8_to_latin1)
> + h->iconv_cache[t].handle = iconv_open ("LATIN1", "UTF-8");
> +else if (t == latin1_to_utf8)
> + h->iconv_cache[t].handle = iconv_open ("UTF-8", "LATIN1");
> +else if (t == utf8_to_utf16le)
> + h->iconv_cache[t].handle = iconv_open ("UTF-16LE", "UTF-8");
> +else if (t == utf16le_to_utf8)
> + h->iconv_cache[t].handle = iconv_open ("UTF-8", "UTF-16LE");
> + } else {
> +/* reinitialize iconv context */
> +iconv (h->iconv_cache[t].handle, NULL, 0, NULL, 0);
> + }
> + return h->iconv_cache[t].handle;
> +}
> +
> +void
> +_hivex_release_iconv (hive_h *h, recode_type t)
> +{
> + pthread_mutex_unlock (&h->iconv_cache[t].mutex);
> +}
> +
> hive_h *
> hivex_open (const char *filename, int flags)
> {
> @@ -164,11 +193,17 @@ hivex_open (const char *filename, int flags)
> goto error;
>}
>
> + for (int t=0; t<3; t++) {
> +pthread_mutex_init (&h->iconv_cache[t].mutex, NULL);
> +h->iconv_cache[t].handle = NULL;
> + }
> +
>/* Last modified time. */
>h->last_modified = le64toh ((int64_t) h->hdr->last_modified);
>
>if (h->msglvl >= 2) {
> -char *name = _hivex_windows_utf16_to_utf8 (h->hdr->name, 64);
> +char *name = _hivex_recode (h, utf16le_to_utf8,
> +h->hdr->name, 64, NULL);
>
> fprintf (stderr,
> "hivex_open: header fields:\n"
> @@ -424,6 +459,12 @@ hivex_close (hive_h *h)
>else
> r = 0;
>free (h->filename);
> + for (int t=0; t<3; t++) {
> +if (h->iconv_cache[t].handle != NULL) {
> + iconv_close (h->iconv_cache[t].handle);
> + h->iconv_cache[t].handle = NULL;
> +}
> + }
>free (h);
>
>return r;
> diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
> index 9a497ed..fefdc81 100644
> --- a/lib/hivex-internal.h
> +++ b/lib/hivex-internal.h
> @@ -23,6 +23,8 @@
> #include
> #include
>
> +#include
> +
> #include "byte_conversions.h"
>
> #define STREQ(a,b) (strcmp((a),(b)) == 0)
> @@ -35,6 +37,13 @@
> #define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0)
> #define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0)
>
> +typedef enum {
> + utf8_to_latin1 = 0,
> + latin1_to_utf8,
> + utf8_to_utf16le,
> + utf16le_to_utf8,
> +} recode_type;
> +
> struct hive_h {
>char *filename;
>int fd;
> @@ -79,6 +88,11 @@ struct hive_h {
>/* Internal data for mmap replacement */
>void *p_winmap;
> #endif
> +
> + struct {
> +pthread_mutex_t mutex;
> +iconv_t *handle;
> + } iconv_cache[4];
> };
>
> /* Format of registry blocks. NB. All fields are little endian. */
> @@ -282,17 +296,16 @@ extern void _hivex_free_offset_list (offset_list *list);
> extern size_t * _hivex_return_offse