Re: [Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t

2018-02-10 Thread Richard W.M. Jones
On Sat, Feb 10, 2018 at 01:40:36AM +0100, Hilko Bengen wrote:
> * Richard W.M. Jones:
> 
> >> +threadlib
> >>  '
> >
> > Probably better to keep these sorted.
> 
> When I rebuilt from scratch, it turned out that threadlib was not
> enough. Using the "lock" module with the gl_lock_{init,lock,unlock}
> instead of glthread_lock_{init,lock,unlock} as described in lock.h seems
> correct, though. I have pushed that change.
> 
> > I wonder if there's a way we can avoid hard-coding ‘4’ here, which
> > AIUI is the size of the enum type.  Maybe adding an extra enum case
> > ‘nr_recode_types’?
> 
> Sure, done.

Great, thanks.

I'm going to do a 1.3.15 release soonish.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org

___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t

2018-02-09 Thread Richard W.M. Jones
On Fri, Feb 09, 2018 at 03:52:20PM +0100, Hilko Bengen wrote:
> diff --git a/bootstrap b/bootstrap
> index bd82477..373fad8 100755
> --- a/bootstrap
> +++ b/bootstrap
> @@ -75,6 +75,7 @@ vc-list-files
>  warnings
>  xstrtol
>  xstrtoll
> +threadlib
>  '

Probably better to keep these sorted.

> +typedef enum {
> +  utf8_to_latin1 = 0,
> +  latin1_to_utf8,
> +  utf8_to_utf16le,
> +  utf16le_to_utf8,
> +} recode_type;
> +
>  struct hive_h {
>char *filename;
>int fd;
> @@ -79,6 +88,11 @@ struct hive_h {
>/* Internal data for mmap replacement */
>void *p_winmap;
>  #endif
> +
> +  struct {
> +gl_lock_t mutex;
> +iconv_t *handle;
> +  } iconv_cache[4];

I wonder if there's a way we can avoid hard-coding ‘4’ here, which
AIUI is the size of the enum type.  Maybe adding an extra enum case
‘nr_recode_types’?


The rest looks good.

ACK, if you sort the list of GNUlib modules in bootstrap.

And maybe try to fix the hard-coded number if there's an easy way
before pushing.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org

___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs

Re: [Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t

2018-02-09 Thread Richard W.M. Jones
On Fri, Feb 09, 2018 at 01:30:03PM +, Richard W.M. Jones wrote:
> (4) Replace any calls to pthread_mutex_* with gl_lock_*.  It's
> probably not necessary to check return codes, unless they need to be
> recursive in which case use gl_recursive_lock_* instead.

Two sentences jammed into one there, it should say:

(4) Replace any calls to pthread_mutex_* with gl_lock_*, unless they
need to be recursive in which case use gl_recursive_lock_* instead.
It's probably not necessary to check return codes,

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org

___
Libguestfs mailing list
Libguestfs@redhat.com
https://www.redhat.com/mailman/listinfo/libguestfs


Re: [Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t

2018-02-09 Thread Richard W.M. Jones
On Fri, Feb 09, 2018 at 01:52:52AM +0100, Hilko Bengen wrote:
> It was brought to my attention that dumping a registry hive causes a
> lot of time spent in disk I/O activity because iconv_open() and
> iconv_close() are called for every key. Every iconv_open() call causes
> /usr/lib/.../gconv/$ENCODING.so to be opened and mapped.
> 
> The iconv_t handles are now cached in the hive_h struct; they are
> opened on-demand and re-used.
> 
> On my ~10 year old Lenovo T60, I have seen 57% savings in the overal
> runtime of running
>
> hivexregedit --export windows-8-enterprise-software.hive '\\'
> ---
>  lib/handle.c | 43 ++-
>  lib/hivex-internal.h | 31 ++-
>  lib/node.c   |  6 +++---
>  lib/utf16.c  | 38 --
>  lib/value.c  | 10 +-
>  lib/write.c  |  4 ++--
>  6 files changed, 90 insertions(+), 42 deletions(-)
> 
> diff --git a/lib/handle.c b/lib/handle.c
> index 9dcf81d..7d715df 100644
> --- a/lib/handle.c
> +++ b/lib/handle.c
> @@ -31,6 +31,9 @@
>  #include 
>  #include 
>  
> +#include 
> +#include 

This makes hivex depend unconditionally on pthread.  It's possible to
make this optional so that hivex would use locking when the main
program is already linked to pthread but would skip locking otherwise,
and it's a fairly simply change:

(1) Edit ‘bootstrap’ and add ‘threadlib’ to the list of modules near
the end.  You'll probably need to rerun ./bootstrap after this.

(2) Read ‘.gnulib/modules/threadlib’ and follow the instructions for
modifying configure.ac and Makefile.am.

(3) Replace #include  -> #include "glthread/lock.h".

(4) Replace any calls to pthread_mutex_* with gl_lock_*.  It's
probably not necessary to check return codes, unless they need to be
recursive in which case use gl_recursive_lock_* instead.

>  #ifdef HAVE_MMAP
>  #include 
>  #else
> @@ -62,6 +65,32 @@ header_checksum (const hive_h *h)
>  
>  #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG)
>  
> +iconv_t *
> +_hivex_get_iconv (hive_h *h, recode_type t)
> +{
> +  pthread_mutex_lock (&h->iconv_cache[t].mutex);
> +  if (h->iconv_cache[t].handle == NULL) {
> +if (t == utf8_to_latin1)
> +  h->iconv_cache[t].handle = iconv_open ("LATIN1", "UTF-8");
> +else if (t == latin1_to_utf8)
> +  h->iconv_cache[t].handle = iconv_open ("UTF-8", "LATIN1");
> +else if (t == utf8_to_utf16le)
> +  h->iconv_cache[t].handle = iconv_open ("UTF-16LE", "UTF-8");
> +else if (t == utf16le_to_utf8)
> +  h->iconv_cache[t].handle = iconv_open ("UTF-8", "UTF-16LE");
> +  } else {
> +/* reinitialize iconv context */
> +iconv (h->iconv_cache[t].handle, NULL, 0, NULL, 0);
> +  }
> +  return h->iconv_cache[t].handle;
> +}
> +
> +void
> +_hivex_release_iconv (hive_h *h, recode_type t)
> +{
> +  pthread_mutex_unlock (&h->iconv_cache[t].mutex);
> +}
> +
>  hive_h *
>  hivex_open (const char *filename, int flags)
>  {
> @@ -164,11 +193,17 @@ hivex_open (const char *filename, int flags)
>  goto error;
>}
>  
> +  for (int t=0; t<3; t++) {
> +pthread_mutex_init (&h->iconv_cache[t].mutex, NULL);
> +h->iconv_cache[t].handle = NULL;
> +  }
> +
>/* Last modified time. */
>h->last_modified = le64toh ((int64_t) h->hdr->last_modified);
>  
>if (h->msglvl >= 2) {
> -char *name = _hivex_windows_utf16_to_utf8 (h->hdr->name, 64);
> +char *name = _hivex_recode (h, utf16le_to_utf8,
> +h->hdr->name, 64, NULL);
>  
>  fprintf (stderr,
>   "hivex_open: header fields:\n"
> @@ -424,6 +459,12 @@ hivex_close (hive_h *h)
>else
>  r = 0;
>free (h->filename);
> +  for (int t=0; t<3; t++) {
> +if (h->iconv_cache[t].handle != NULL) {
> +  iconv_close (h->iconv_cache[t].handle);
> +  h->iconv_cache[t].handle = NULL;
> +}
> +  }
>free (h);
>  
>return r;
> diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
> index 9a497ed..fefdc81 100644
> --- a/lib/hivex-internal.h
> +++ b/lib/hivex-internal.h
> @@ -23,6 +23,8 @@
>  #include 
>  #include 
>  
> +#include 
> +
>  #include "byte_conversions.h"
>  
>  #define STREQ(a,b) (strcmp((a),(b)) == 0)
> @@ -35,6 +37,13 @@
>  #define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0)
>  #define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0)
>  
> +typedef enum {
> +  utf8_to_latin1 = 0,
> +  latin1_to_utf8,
> +  utf8_to_utf16le,
> +  utf16le_to_utf8,
> +} recode_type;
> +
>  struct hive_h {
>char *filename;
>int fd;
> @@ -79,6 +88,11 @@ struct hive_h {
>/* Internal data for mmap replacement */
>void *p_winmap;
>  #endif
> +
> +  struct {
> +pthread_mutex_t mutex;
> +iconv_t *handle;
> +  } iconv_cache[4];
>  };
>  
>  /* Format of registry blocks. NB. All fields are little endian. */
> @@ -282,17 +296,16 @@ extern void _hivex_free_offset_list (offset_list *list);
>  extern size_t * _hivex_return_offse