[Patch v7 4/6] restore: transparently support gzipped input

2014-04-10 Thread Austin Clements
On Sat, 05 Apr 2014, David Bremner  wrote:
> We rely completely on zlib to do the right thing in detecting gzipped
> input. Since our dump format is chosen to be 7 bit ascii, this should
> be fine.
> ---
>  doc/man1/notmuch-restore.rst |  8 
>  notmuch-restore.c| 93 
> +---
>  test/T240-dump-restore.sh| 14 +++
>  3 files changed, 92 insertions(+), 23 deletions(-)
>
> diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst
> index d6cf19a..936b138 100644
> --- a/doc/man1/notmuch-restore.rst
> +++ b/doc/man1/notmuch-restore.rst
> @@ -50,6 +50,14 @@ Supported options for **restore** include
>  format, this heuristic, based the fact that batch-tag format
>  contains no parentheses, should be accurate.
>  
> +GZIPPED INPUT
> +=
> +
> +\ **notmuch restore** will detect if the input is compressed in
> +**gzip(1)** format and automatically decompress it while reading. This
> +detection does not depend on file naming and in particular works for
> +standard input.
> +
>  SEE ALSO
>  
>  
> diff --git a/notmuch-restore.c b/notmuch-restore.c
> index c54d513..7abee0a 100644
> --- a/notmuch-restore.c
> +++ b/notmuch-restore.c
> @@ -22,6 +22,7 @@
>  #include "hex-escape.h"
>  #include "tag-util.h"
>  #include "string-util.h"
> +#include "zlib-extra.h"
>  
>  static regex_t regex;
>  
> @@ -128,10 +129,10 @@ notmuch_restore_command (notmuch_config_t *config, int 
> argc, char *argv[])
>  tag_op_list_t *tag_ops;
>  
>  char *input_file_name = NULL;
> -FILE *input = stdin;
> +const char *name_for_error = NULL;
> +gzFile input = NULL;
>  char *line = NULL;
>  void *line_ctx = NULL;
> -size_t line_size;
>  ssize_t line_len;
>  
>  int ret = 0;
> @@ -157,39 +158,69 @@ notmuch_restore_command (notmuch_config_t *config, int 
> argc, char *argv[])
>  };
>  
>  opt_index = parse_arguments (argc, argv, options, 1);
> -if (opt_index < 0)
> - return EXIT_FAILURE;
> +if (opt_index < 0) {
> + ret = EXIT_FAILURE;
> + goto DONE;
> +}
> +
> +name_for_error = input_file_name ? input_file_name : "stdin";
>  
>  if (! accumulate)
>   flags |= TAG_FLAG_REMOVE_ALL;
>  
> -if (input_file_name) {
> - input = fopen (input_file_name, "r");
> - if (input == NULL) {
> - fprintf (stderr, "Error opening %s for reading: %s\n",
> -  input_file_name, strerror (errno));
> - return EXIT_FAILURE;
> +errno = 0;
> +if (input_file_name)
> + input = gzopen (input_file_name, "r");
> +else {
> + int infd = dup (STDIN_FILENO);
> + if (infd < 0) {
> + fprintf (stderr, "Error duping stdin: %s\n",
> +  strerror (errno));
> + ret = EXIT_FAILURE;
> + goto DONE;
>   }
> + input = gzdopen (infd, "r");
> + if (! input)
> + close (infd);
> +}
> +
> +if (input == NULL) {
> + fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
> +  name_for_error, strerror (errno));
> + ret = EXIT_FAILURE;
> + goto DONE;
>  }
>  
>  if (opt_index < argc) {
>   fprintf (stderr, "Unused positional parameter: %s\n", argv[opt_index]);
> - return EXIT_FAILURE;
> + ret = EXIT_FAILURE;
> + goto DONE;
>  }
>  
>  tag_ops = tag_op_list_create (config);
>  if (tag_ops == NULL) {
>   fprintf (stderr, "Out of memory.\n");
> - return EXIT_FAILURE;
> + ret = EXIT_FAILURE;
> + goto DONE;
>  }
>  
>  do {
> - line_len = getline (, _size, input);
> + util_status_t status;
> +
> + status = gz_getline (line_ctx, , _len, input);
>  
>   /* empty input file not considered an error */
> - if (line_len < 0)
> - return EXIT_SUCCESS;
> + if (status == UTIL_EOF) {
> + ret = EXIT_SUCCESS;
> + goto DONE;
> + }
>  
> + if (status) {
> + fprintf (stderr, "Error reading (gzipped) input: %s\n",
> +  gz_error_string(status, input));
> + ret = EXIT_FAILURE;
> + goto DONE;
> + }
>  } while ((line_len == 0) ||
>(line[0] == '#') ||
>/* the cast is safe because we checked about for line_len < 0 */
> @@ -254,21 +285,37 @@ notmuch_restore_command (notmuch_config_t *config, int 
> argc, char *argv[])
>   if (ret)
>   break;
>  
> -}  while ((line_len = getline (, _size, input)) != -1);
> +}  while (! (ret = gz_getline (line_ctx, , _len, input)));
> +
>  
> -if (line_ctx != NULL)
> - talloc_free (line_ctx);
> +/* EOF is normal loop termination condition, UTIL_SUCCESS is
> + * impossible here */
> +if (ret == UTIL_EOF) {
> + ret = UTIL_SUCCESS;
> +} else {
> + fprintf (stderr, "Error reading (gzipped) input: %s\n",
> +  gz_error_string (ret, input));

ret = EXIT_FAILURE; here?

> +}
> +
> +/* currently 

Re: [Patch v7 4/6] restore: transparently support gzipped input

2014-04-10 Thread Austin Clements
On Sat, 05 Apr 2014, David Bremner da...@tethera.net wrote:
 We rely completely on zlib to do the right thing in detecting gzipped
 input. Since our dump format is chosen to be 7 bit ascii, this should
 be fine.
 ---
  doc/man1/notmuch-restore.rst |  8 
  notmuch-restore.c| 93 
 +---
  test/T240-dump-restore.sh| 14 +++
  3 files changed, 92 insertions(+), 23 deletions(-)

 diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst
 index d6cf19a..936b138 100644
 --- a/doc/man1/notmuch-restore.rst
 +++ b/doc/man1/notmuch-restore.rst
 @@ -50,6 +50,14 @@ Supported options for **restore** include
  format, this heuristic, based the fact that batch-tag format
  contains no parentheses, should be accurate.
  
 +GZIPPED INPUT
 +=
 +
 +\ **notmuch restore** will detect if the input is compressed in
 +**gzip(1)** format and automatically decompress it while reading. This
 +detection does not depend on file naming and in particular works for
 +standard input.
 +
  SEE ALSO
  
  
 diff --git a/notmuch-restore.c b/notmuch-restore.c
 index c54d513..7abee0a 100644
 --- a/notmuch-restore.c
 +++ b/notmuch-restore.c
 @@ -22,6 +22,7 @@
  #include hex-escape.h
  #include tag-util.h
  #include string-util.h
 +#include zlib-extra.h
  
  static regex_t regex;
  
 @@ -128,10 +129,10 @@ notmuch_restore_command (notmuch_config_t *config, int 
 argc, char *argv[])
  tag_op_list_t *tag_ops;
  
  char *input_file_name = NULL;
 -FILE *input = stdin;
 +const char *name_for_error = NULL;
 +gzFile input = NULL;
  char *line = NULL;
  void *line_ctx = NULL;
 -size_t line_size;
  ssize_t line_len;
  
  int ret = 0;
 @@ -157,39 +158,69 @@ notmuch_restore_command (notmuch_config_t *config, int 
 argc, char *argv[])
  };
  
  opt_index = parse_arguments (argc, argv, options, 1);
 -if (opt_index  0)
 - return EXIT_FAILURE;
 +if (opt_index  0) {
 + ret = EXIT_FAILURE;
 + goto DONE;
 +}
 +
 +name_for_error = input_file_name ? input_file_name : stdin;
  
  if (! accumulate)
   flags |= TAG_FLAG_REMOVE_ALL;
  
 -if (input_file_name) {
 - input = fopen (input_file_name, r);
 - if (input == NULL) {
 - fprintf (stderr, Error opening %s for reading: %s\n,
 -  input_file_name, strerror (errno));
 - return EXIT_FAILURE;
 +errno = 0;
 +if (input_file_name)
 + input = gzopen (input_file_name, r);
 +else {
 + int infd = dup (STDIN_FILENO);
 + if (infd  0) {
 + fprintf (stderr, Error duping stdin: %s\n,
 +  strerror (errno));
 + ret = EXIT_FAILURE;
 + goto DONE;
   }
 + input = gzdopen (infd, r);
 + if (! input)
 + close (infd);
 +}
 +
 +if (input == NULL) {
 + fprintf (stderr, Error opening %s for (gzip) reading: %s\n,
 +  name_for_error, strerror (errno));
 + ret = EXIT_FAILURE;
 + goto DONE;
  }
  
  if (opt_index  argc) {
   fprintf (stderr, Unused positional parameter: %s\n, argv[opt_index]);
 - return EXIT_FAILURE;
 + ret = EXIT_FAILURE;
 + goto DONE;
  }
  
  tag_ops = tag_op_list_create (config);
  if (tag_ops == NULL) {
   fprintf (stderr, Out of memory.\n);
 - return EXIT_FAILURE;
 + ret = EXIT_FAILURE;
 + goto DONE;
  }
  
  do {
 - line_len = getline (line, line_size, input);
 + util_status_t status;
 +
 + status = gz_getline (line_ctx, line, line_len, input);
  
   /* empty input file not considered an error */
 - if (line_len  0)
 - return EXIT_SUCCESS;
 + if (status == UTIL_EOF) {
 + ret = EXIT_SUCCESS;
 + goto DONE;
 + }
  
 + if (status) {
 + fprintf (stderr, Error reading (gzipped) input: %s\n,
 +  gz_error_string(status, input));
 + ret = EXIT_FAILURE;
 + goto DONE;
 + }
  } while ((line_len == 0) ||
(line[0] == '#') ||
/* the cast is safe because we checked about for line_len  0 */
 @@ -254,21 +285,37 @@ notmuch_restore_command (notmuch_config_t *config, int 
 argc, char *argv[])
   if (ret)
   break;
  
 -}  while ((line_len = getline (line, line_size, input)) != -1);
 +}  while (! (ret = gz_getline (line_ctx, line, line_len, input)));
 +
  
 -if (line_ctx != NULL)
 - talloc_free (line_ctx);
 +/* EOF is normal loop termination condition, UTIL_SUCCESS is
 + * impossible here */
 +if (ret == UTIL_EOF) {
 + ret = UTIL_SUCCESS;
 +} else {
 + fprintf (stderr, Error reading (gzipped) input: %s\n,
 +  gz_error_string (ret, input));

ret = EXIT_FAILURE; here?

 +}
 +
 +/* currently this should not be after DONE: since we don't 
 + * know if the xregcomp was reached
 + */
  
  if (input_format == DUMP_FORMAT_SUP)
  

[Patch v7 4/6] restore: transparently support gzipped input

2014-04-05 Thread David Bremner
We rely completely on zlib to do the right thing in detecting gzipped
input. Since our dump format is chosen to be 7 bit ascii, this should
be fine.
---
 doc/man1/notmuch-restore.rst |  8 
 notmuch-restore.c| 93 +---
 test/T240-dump-restore.sh| 14 +++
 3 files changed, 92 insertions(+), 23 deletions(-)

diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst
index d6cf19a..936b138 100644
--- a/doc/man1/notmuch-restore.rst
+++ b/doc/man1/notmuch-restore.rst
@@ -50,6 +50,14 @@ Supported options for **restore** include
 format, this heuristic, based the fact that batch-tag format
 contains no parentheses, should be accurate.

+GZIPPED INPUT
+=
+
+\ **notmuch restore** will detect if the input is compressed in
+**gzip(1)** format and automatically decompress it while reading. This
+detection does not depend on file naming and in particular works for
+standard input.
+
 SEE ALSO
 

diff --git a/notmuch-restore.c b/notmuch-restore.c
index c54d513..7abee0a 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -22,6 +22,7 @@
 #include "hex-escape.h"
 #include "tag-util.h"
 #include "string-util.h"
+#include "zlib-extra.h"

 static regex_t regex;

@@ -128,10 +129,10 @@ notmuch_restore_command (notmuch_config_t *config, int 
argc, char *argv[])
 tag_op_list_t *tag_ops;

 char *input_file_name = NULL;
-FILE *input = stdin;
+const char *name_for_error = NULL;
+gzFile input = NULL;
 char *line = NULL;
 void *line_ctx = NULL;
-size_t line_size;
 ssize_t line_len;

 int ret = 0;
@@ -157,39 +158,69 @@ notmuch_restore_command (notmuch_config_t *config, int 
argc, char *argv[])
 };

 opt_index = parse_arguments (argc, argv, options, 1);
-if (opt_index < 0)
-   return EXIT_FAILURE;
+if (opt_index < 0) {
+   ret = EXIT_FAILURE;
+   goto DONE;
+}
+
+name_for_error = input_file_name ? input_file_name : "stdin";

 if (! accumulate)
flags |= TAG_FLAG_REMOVE_ALL;

-if (input_file_name) {
-   input = fopen (input_file_name, "r");
-   if (input == NULL) {
-   fprintf (stderr, "Error opening %s for reading: %s\n",
-input_file_name, strerror (errno));
-   return EXIT_FAILURE;
+errno = 0;
+if (input_file_name)
+   input = gzopen (input_file_name, "r");
+else {
+   int infd = dup (STDIN_FILENO);
+   if (infd < 0) {
+   fprintf (stderr, "Error duping stdin: %s\n",
+strerror (errno));
+   ret = EXIT_FAILURE;
+   goto DONE;
}
+   input = gzdopen (infd, "r");
+   if (! input)
+   close (infd);
+}
+
+if (input == NULL) {
+   fprintf (stderr, "Error opening %s for (gzip) reading: %s\n",
+name_for_error, strerror (errno));
+   ret = EXIT_FAILURE;
+   goto DONE;
 }

 if (opt_index < argc) {
fprintf (stderr, "Unused positional parameter: %s\n", argv[opt_index]);
-   return EXIT_FAILURE;
+   ret = EXIT_FAILURE;
+   goto DONE;
 }

 tag_ops = tag_op_list_create (config);
 if (tag_ops == NULL) {
fprintf (stderr, "Out of memory.\n");
-   return EXIT_FAILURE;
+   ret = EXIT_FAILURE;
+   goto DONE;
 }

 do {
-   line_len = getline (, _size, input);
+   util_status_t status;
+
+   status = gz_getline (line_ctx, , _len, input);

/* empty input file not considered an error */
-   if (line_len < 0)
-   return EXIT_SUCCESS;
+   if (status == UTIL_EOF) {
+   ret = EXIT_SUCCESS;
+   goto DONE;
+   }

+   if (status) {
+   fprintf (stderr, "Error reading (gzipped) input: %s\n",
+gz_error_string(status, input));
+   ret = EXIT_FAILURE;
+   goto DONE;
+   }
 } while ((line_len == 0) ||
 (line[0] == '#') ||
 /* the cast is safe because we checked about for line_len < 0 */
@@ -254,21 +285,37 @@ notmuch_restore_command (notmuch_config_t *config, int 
argc, char *argv[])
if (ret)
break;

-}  while ((line_len = getline (, _size, input)) != -1);
+}  while (! (ret = gz_getline (line_ctx, , _len, input)));
+

-if (line_ctx != NULL)
-   talloc_free (line_ctx);
+/* EOF is normal loop termination condition, UTIL_SUCCESS is
+ * impossible here */
+if (ret == UTIL_EOF) {
+   ret = UTIL_SUCCESS;
+} else {
+   fprintf (stderr, "Error reading (gzipped) input: %s\n",
+gz_error_string (ret, input));
+}
+
+/* currently this should not be after DONE: since we don't 
+ * know if the xregcomp was reached
+ */

 if (input_format == DUMP_FORMAT_SUP)
regfree ();

-if (line)
-   free (line);
+ DONE:
+if (line_ctx != NULL)
+   talloc_free (line_ctx);

-notmuch_database_destroy 

[Patch v7 4/6] restore: transparently support gzipped input

2014-04-05 Thread David Bremner
We rely completely on zlib to do the right thing in detecting gzipped
input. Since our dump format is chosen to be 7 bit ascii, this should
be fine.
---
 doc/man1/notmuch-restore.rst |  8 
 notmuch-restore.c| 93 +---
 test/T240-dump-restore.sh| 14 +++
 3 files changed, 92 insertions(+), 23 deletions(-)

diff --git a/doc/man1/notmuch-restore.rst b/doc/man1/notmuch-restore.rst
index d6cf19a..936b138 100644
--- a/doc/man1/notmuch-restore.rst
+++ b/doc/man1/notmuch-restore.rst
@@ -50,6 +50,14 @@ Supported options for **restore** include
 format, this heuristic, based the fact that batch-tag format
 contains no parentheses, should be accurate.
 
+GZIPPED INPUT
+=
+
+\ **notmuch restore** will detect if the input is compressed in
+**gzip(1)** format and automatically decompress it while reading. This
+detection does not depend on file naming and in particular works for
+standard input.
+
 SEE ALSO
 
 
diff --git a/notmuch-restore.c b/notmuch-restore.c
index c54d513..7abee0a 100644
--- a/notmuch-restore.c
+++ b/notmuch-restore.c
@@ -22,6 +22,7 @@
 #include hex-escape.h
 #include tag-util.h
 #include string-util.h
+#include zlib-extra.h
 
 static regex_t regex;
 
@@ -128,10 +129,10 @@ notmuch_restore_command (notmuch_config_t *config, int 
argc, char *argv[])
 tag_op_list_t *tag_ops;
 
 char *input_file_name = NULL;
-FILE *input = stdin;
+const char *name_for_error = NULL;
+gzFile input = NULL;
 char *line = NULL;
 void *line_ctx = NULL;
-size_t line_size;
 ssize_t line_len;
 
 int ret = 0;
@@ -157,39 +158,69 @@ notmuch_restore_command (notmuch_config_t *config, int 
argc, char *argv[])
 };
 
 opt_index = parse_arguments (argc, argv, options, 1);
-if (opt_index  0)
-   return EXIT_FAILURE;
+if (opt_index  0) {
+   ret = EXIT_FAILURE;
+   goto DONE;
+}
+
+name_for_error = input_file_name ? input_file_name : stdin;
 
 if (! accumulate)
flags |= TAG_FLAG_REMOVE_ALL;
 
-if (input_file_name) {
-   input = fopen (input_file_name, r);
-   if (input == NULL) {
-   fprintf (stderr, Error opening %s for reading: %s\n,
-input_file_name, strerror (errno));
-   return EXIT_FAILURE;
+errno = 0;
+if (input_file_name)
+   input = gzopen (input_file_name, r);
+else {
+   int infd = dup (STDIN_FILENO);
+   if (infd  0) {
+   fprintf (stderr, Error duping stdin: %s\n,
+strerror (errno));
+   ret = EXIT_FAILURE;
+   goto DONE;
}
+   input = gzdopen (infd, r);
+   if (! input)
+   close (infd);
+}
+
+if (input == NULL) {
+   fprintf (stderr, Error opening %s for (gzip) reading: %s\n,
+name_for_error, strerror (errno));
+   ret = EXIT_FAILURE;
+   goto DONE;
 }
 
 if (opt_index  argc) {
fprintf (stderr, Unused positional parameter: %s\n, argv[opt_index]);
-   return EXIT_FAILURE;
+   ret = EXIT_FAILURE;
+   goto DONE;
 }
 
 tag_ops = tag_op_list_create (config);
 if (tag_ops == NULL) {
fprintf (stderr, Out of memory.\n);
-   return EXIT_FAILURE;
+   ret = EXIT_FAILURE;
+   goto DONE;
 }
 
 do {
-   line_len = getline (line, line_size, input);
+   util_status_t status;
+
+   status = gz_getline (line_ctx, line, line_len, input);
 
/* empty input file not considered an error */
-   if (line_len  0)
-   return EXIT_SUCCESS;
+   if (status == UTIL_EOF) {
+   ret = EXIT_SUCCESS;
+   goto DONE;
+   }
 
+   if (status) {
+   fprintf (stderr, Error reading (gzipped) input: %s\n,
+gz_error_string(status, input));
+   ret = EXIT_FAILURE;
+   goto DONE;
+   }
 } while ((line_len == 0) ||
 (line[0] == '#') ||
 /* the cast is safe because we checked about for line_len  0 */
@@ -254,21 +285,37 @@ notmuch_restore_command (notmuch_config_t *config, int 
argc, char *argv[])
if (ret)
break;
 
-}  while ((line_len = getline (line, line_size, input)) != -1);
+}  while (! (ret = gz_getline (line_ctx, line, line_len, input)));
+
 
-if (line_ctx != NULL)
-   talloc_free (line_ctx);
+/* EOF is normal loop termination condition, UTIL_SUCCESS is
+ * impossible here */
+if (ret == UTIL_EOF) {
+   ret = UTIL_SUCCESS;
+} else {
+   fprintf (stderr, Error reading (gzipped) input: %s\n,
+gz_error_string (ret, input));
+}
+
+/* currently this should not be after DONE: since we don't 
+ * know if the xregcomp was reached
+ */
 
 if (input_format == DUMP_FORMAT_SUP)
regfree (regex);
 
-if (line)
-   free (line);
+ DONE:
+if (line_ctx != NULL)
+   talloc_free (line_ctx);
 
-