It was brought to my attention that dumping a registry hive causes a lot of time spent in disk I/O activity because iconv_open() and iconv_close() are called for every key. Every iconv_open() call causes /usr/lib/.../gconv/$ENCODING.so to be opened and mapped.
The iconv_t handles are now cached in the hive_h struct; they are opened on-demand and re-used. On my ~10 year old Lenovo T60, I have seen 57% savings in the overal runtime of running hivexregedit --export windows-8-enterprise-software.hive '\\' --- lib/handle.c | 43 ++++++++++++++++++++++++++++++++++++++++++- lib/hivex-internal.h | 31 ++++++++++++++++++++++--------- lib/node.c | 6 +++--- lib/utf16.c | 38 ++++++++++++++++---------------------- lib/value.c | 10 +++++----- lib/write.c | 4 ++-- 6 files changed, 90 insertions(+), 42 deletions(-) diff --git a/lib/handle.c b/lib/handle.c index 9dcf81d..7d715df 100644 --- a/lib/handle.c +++ b/lib/handle.c @@ -31,6 +31,9 @@ #include <errno.h> #include <assert.h> +#include <iconv.h> +#include <pthread.h> + #ifdef HAVE_MMAP #include <sys/mman.h> #else @@ -62,6 +65,32 @@ header_checksum (const hive_h *h) #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG) +iconv_t * +_hivex_get_iconv (hive_h *h, recode_type t) +{ + pthread_mutex_lock (&h->iconv_cache[t].mutex); + if (h->iconv_cache[t].handle == NULL) { + if (t == utf8_to_latin1) + h->iconv_cache[t].handle = iconv_open ("LATIN1", "UTF-8"); + else if (t == latin1_to_utf8) + h->iconv_cache[t].handle = iconv_open ("UTF-8", "LATIN1"); + else if (t == utf8_to_utf16le) + h->iconv_cache[t].handle = iconv_open ("UTF-16LE", "UTF-8"); + else if (t == utf16le_to_utf8) + h->iconv_cache[t].handle = iconv_open ("UTF-8", "UTF-16LE"); + } else { + /* reinitialize iconv context */ + iconv (h->iconv_cache[t].handle, NULL, 0, NULL, 0); + } + return h->iconv_cache[t].handle; +} + +void +_hivex_release_iconv (hive_h *h, recode_type t) +{ + pthread_mutex_unlock (&h->iconv_cache[t].mutex); +} + hive_h * hivex_open (const char *filename, int flags) { @@ -164,11 +193,17 @@ hivex_open (const char *filename, int flags) goto error; } + for (int t=0; t<3; t++) { + pthread_mutex_init (&h->iconv_cache[t].mutex, NULL); + h->iconv_cache[t].handle = NULL; + } + /* Last modified time. */ h->last_modified = le64toh ((int64_t) h->hdr->last_modified); if (h->msglvl >= 2) { - char *name = _hivex_windows_utf16_to_utf8 (h->hdr->name, 64); + char *name = _hivex_recode (h, utf16le_to_utf8, + h->hdr->name, 64, NULL); fprintf (stderr, "hivex_open: header fields:\n" @@ -424,6 +459,12 @@ hivex_close (hive_h *h) else r = 0; free (h->filename); + for (int t=0; t<3; t++) { + if (h->iconv_cache[t].handle != NULL) { + iconv_close (h->iconv_cache[t].handle); + h->iconv_cache[t].handle = NULL; + } + } free (h); return r; diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h index 9a497ed..fefdc81 100644 --- a/lib/hivex-internal.h +++ b/lib/hivex-internal.h @@ -23,6 +23,8 @@ #include <stddef.h> #include <string.h> +#include <iconv.h> + #include "byte_conversions.h" #define STREQ(a,b) (strcmp((a),(b)) == 0) @@ -35,6 +37,13 @@ #define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0) #define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0) +typedef enum { + utf8_to_latin1 = 0, + latin1_to_utf8, + utf8_to_utf16le, + utf16le_to_utf8, +} recode_type; + struct hive_h { char *filename; int fd; @@ -79,6 +88,11 @@ struct hive_h { /* Internal data for mmap replacement */ void *p_winmap; #endif + + struct { + pthread_mutex_t mutex; + iconv_t *handle; + } iconv_cache[4]; }; /* Format of registry blocks. NB. All fields are little endian. */ @@ -282,17 +296,16 @@ extern void _hivex_free_offset_list (offset_list *list); extern size_t * _hivex_return_offset_list (offset_list *list); extern void _hivex_print_offset_list (offset_list *list, FILE *fp); +/* handle.c */ +extern iconv_t * _hivex_get_iconv (hive_h *h, recode_type r); +extern void _hivex_release_iconv (hive_h *h, recode_type r); + /* utf16.c */ -extern char * _hivex_recode (const char *input_encoding, - const char *input, size_t input_len, - const char *output_encoding, size_t *output_len); -#define _hivex_windows_utf16_to_utf8(_input, _len) \ - _hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL) -#define _hivex_windows_latin1_to_utf8(_input, _len) \ - _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL) -extern char* _hivex_encode_string(const char *str, size_t *size, int *utf16); +extern char * _hivex_recode (hive_h *h, recode_type r, + const char *input, size_t input_len, size_t *output_len); +extern char* _hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16); extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len); -extern size_t _hivex_utf8_strlen (const char* str, size_t len, int utf16); +extern size_t _hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16); /* util.c */ extern void _hivex_free_strings (char **argv); diff --git a/lib/node.c b/lib/node.c index 36e61c4..21cd127 100644 --- a/lib/node.c +++ b/lib/node.c @@ -90,9 +90,9 @@ hivex_node_name (hive_h *h, hive_node_h node) } size_t flags = le16toh (nk->flags); if (flags & 0x20) { - return _hivex_windows_latin1_to_utf8 (nk->name, len); + return _hivex_recode (h, latin1_to_utf8, nk->name, len, NULL); } else { - return _hivex_windows_utf16_to_utf8 (nk->name, len); + return _hivex_recode (h, utf16le_to_utf8, nk->name, len, NULL); } } @@ -116,7 +116,7 @@ hivex_node_name_len (hive_h *h, hive_node_h node) return 0; } - return _hivex_utf8_strlen (nk->name, len, ! (le16toh (nk->flags) & 0x20)); + return _hivex_utf8_strlen (h, nk->name, len, ! (le16toh (nk->flags) & 0x20)); } diff --git a/lib/utf16.c b/lib/utf16.c index 238f40a..c0f0b05 100644 --- a/lib/utf16.c +++ b/lib/utf16.c @@ -30,24 +30,21 @@ #include "hivex-internal.h" char * -_hivex_recode (const char *input_encoding, const char *input, size_t input_len, - const char *output_encoding, size_t *output_len) +_hivex_recode (hive_h *h, recode_type t, + const char *input, size_t input_len, size_t *output_len) { - iconv_t ic = iconv_open (output_encoding, input_encoding); - if (ic == (iconv_t) -1) - return NULL; - /* iconv(3) has an insane interface ... */ size_t outalloc = input_len; + iconv_t *ic = _hivex_get_iconv (h, t); again:; size_t inlen = input_len; size_t outlen = outalloc; char *out = malloc (outlen + 1); if (out == NULL) { int err = errno; - iconv_close (ic); + _hivex_release_iconv (h, t); errno = err; return NULL; } @@ -56,18 +53,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, size_t r = iconv (ic, (ICONV_CONST char **) &inp, &inlen, &outp, &outlen); if (r == (size_t) -1) { + int err = errno; if (errno == E2BIG) { - int err = errno; /* Reset errno here because we don't want to accidentally * return E2BIG to a library caller. */ - errno = 0; size_t prev = outalloc; /* Try again with a larger output buffer. */ free (out); outalloc *= 2; if (outalloc < prev) { - iconv_close (ic); + _hivex_release_iconv (h, t); errno = err; return NULL; } @@ -75,19 +71,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, } else { /* Else some conversion failure, eg. EILSEQ, EINVAL. */ - int err = errno; - iconv_close (ic); + _hivex_release_iconv (h, t); free (out); errno = err; return NULL; } } + _hivex_release_iconv (h, t); *outp = '\0'; - iconv_close (ic); if (output_len != NULL) *output_len = outp - out; - return out; } @@ -95,17 +89,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len, * storing in the hive file, as needed. */ char* -_hivex_encode_string(const char *str, size_t *size, int *utf16) +_hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16) { char* outstr; *utf16 = 0; - outstr = _hivex_recode ("UTF-8", str, strlen(str), - "LATIN1", size); + outstr = _hivex_recode (h, utf8_to_latin1, + str, strlen(str), size); if (outstr != NULL) return outstr; *utf16 = 1; - outstr = _hivex_recode ("UTF-8", str, strlen(str), - "UTF-16LE", size); + outstr = _hivex_recode (h, utf8_to_utf16le, + str, strlen(str), size); return outstr; } @@ -128,11 +122,11 @@ _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len) } size_t -_hivex_utf8_strlen (const char* str, size_t len, int utf16) +_hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16) { - const char *encoding = utf16 ? "UTF-16LE" : "LATIN1"; + recode_type t = utf16 ? utf16le_to_utf8 : latin1_to_utf8; size_t ret = 0; - char *buf = _hivex_recode(encoding, str, len, "UTF-8", &ret); + char *buf = _hivex_recode (h, t, str, len, &ret); free(buf); return ret; } diff --git a/lib/value.c b/lib/value.c index 2dfe006..3257b53 100644 --- a/lib/value.c +++ b/lib/value.c @@ -209,7 +209,7 @@ hivex_value_key_len (hive_h *h, hive_value_h value) SET_ERRNO (EFAULT, "key length is too long (%zu, %zu)", len, seg_len); return 0; } - return _hivex_utf8_strlen (vk->name, len, ! (le16toh (vk->flags) & 0x01)); + return _hivex_utf8_strlen (h, vk->name, len, ! (le16toh (vk->flags) & 0x01)); } char * @@ -232,9 +232,9 @@ hivex_value_key (hive_h *h, hive_value_h value) return NULL; } if (flags & 0x01) { - return _hivex_windows_latin1_to_utf8 (vk->name, len); + return _hivex_recode (h, latin1_to_utf8, vk->name, len, NULL); } else { - return _hivex_windows_utf16_to_utf8 (vk->name, len); + return _hivex_recode (h, utf16le_to_utf8, vk->name, len, NULL); } } @@ -471,7 +471,7 @@ hivex_value_string (hive_h *h, hive_value_h value) if (slen < len) len = slen; - char *ret = _hivex_windows_utf16_to_utf8 (data, len); + char *ret = _hivex_recode (h, utf16le_to_utf8, data, len, NULL); free (data); if (ret == NULL) return NULL; @@ -538,7 +538,7 @@ hivex_value_multiple_strings (hive_h *h, hive_value_h value) } ret = ret2; - ret[nr_strings-1] = _hivex_windows_utf16_to_utf8 (p, plen); + ret[nr_strings-1] = _hivex_recode (h, utf16le_to_utf8, p, plen, NULL); ret[nr_strings] = NULL; if (ret[nr_strings-1] == NULL) { _hivex_free_strings (ret); diff --git a/lib/write.c b/lib/write.c index 33b64e4..70105c9 100644 --- a/lib/write.c +++ b/lib/write.c @@ -610,7 +610,7 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name) size_t recoded_name_len; int use_utf16 = 0; char *recoded_name = - _hivex_encode_string (name, &recoded_name_len, &use_utf16); + _hivex_encode_string (h, name, &recoded_name_len, &use_utf16); if (recoded_name == NULL) { SET_ERRNO (EINVAL, "malformed name"); return 0; @@ -959,7 +959,7 @@ hivex_node_set_values (hive_h *h, hive_node_h node, static const char vk_id[2] = { 'v', 'k' }; size_t recoded_name_len; int use_utf16; - char* recoded_name = _hivex_encode_string (values[i].key, &recoded_name_len, + char* recoded_name = _hivex_encode_string (h, values[i].key, &recoded_name_len, &use_utf16); seg_len = sizeof (struct ntreg_vk_record) + recoded_name_len; size_t vk_offs = allocate_block (h, seg_len, vk_id); -- 2.11.0 _______________________________________________ Libguestfs mailing list Libguestfs@redhat.com https://www.redhat.com/mailman/listinfo/libguestfs