Hi all,

The attached patch should hopefully fix Evgenii's problem.

The patch changes the configure script to always use libz, unless it is explicitly disabled. In that case, the patch makes sure that the WARC functions do not use gzip but write to uncompressed files instead.

The funny thing is that libz was already included with the SSL support. Unless you compiled wget with --without-ssl, libz was always compiled in (even if you configured with --without-zlib).

Regards,

Gijs


Op 09-01-12 02:15 schreef Evgenii Philippov:
> Actually I currently close my work on wget.
>
> So these messages are just bug reports for wget collaborators.
>
> Some additional info:
>
> export PS1="Ok\"
> Ok
> uname -asm
> Linux <host_name> 2.6.38-13-generic #53-Ubuntu SMP Mon Nov 28 19:33:45
> UTC 2011 x86_64 x86_64 x86_64 GNU/Linux
> Ok
> lsb_release -dr
> Description: Ubuntu 11.04
> Release: 11.04
> Ok
>
> With best regards,
> Thank you for a wonderful utility,
> --
> Evgeniy
>

=== modified file 'ChangeLog'
--- ChangeLog	2011-12-12 20:30:39 +0000
+++ ChangeLog	2012-01-09 13:40:01 +0000
@@ -1,3 +1,7 @@
+2012-01-09  Gijs van Tulder  <gvtul...@gmail.com>
+
+	* configure.ac: Always try to use libz, even without SSL.
+
 2011-12-12  Giuseppe Scrivano  <gscriv...@gnu.org>
 
 	* Makefile.am (EXTRA_DIST): Add build-aux/bzr-version-gen.

=== modified file 'configure.ac'
--- configure.ac	2011-11-04 21:25:00 +0000
+++ configure.ac	2012-01-09 13:40:01 +0000
@@ -65,6 +65,9 @@
 [[  --without-ssl           disable SSL autodetection
   --with-ssl={gnutls,openssl} specify the SSL backend.  GNU TLS is the default.]])
 
+AC_ARG_WITH(zlib,
+[[  --without-zlib          disable zlib ]])
+
 AC_ARG_ENABLE(opie,
 [  --disable-opie          disable support for opie or s/key FTP login],
 ENABLE_OPIE=$enableval, ENABLE_OPIE=yes)
@@ -234,6 +237,10 @@
 dnl Checks for libraries.
 dnl
 
+AS_IF([test x"$with_zlib" != xno], [
+  AC_CHECK_LIB(z, compress)
+])
+
 AS_IF([test x"$with_ssl" = xopenssl], [
     dnl some versions of openssl use zlib compression
     AC_CHECK_LIB(z, compress)

=== modified file 'src/ChangeLog'
--- src/ChangeLog	2012-01-08 23:03:23 +0000
+++ src/ChangeLog	2012-01-09 13:40:01 +0000
@@ -1,3 +1,10 @@
+2012-01-09  Gijs van Tulder  <gvtul...@gmail.com>
+
+	* init.c: Disable WARC compression if zlib is disabled.
+	* main.c: Do not show the 'no-warc-compression' option if zlib is
+	disabled.
+	* warc.c: Do not compress WARC files if zlib is disabled.
+
 2012-01-09  Sasikantha Babu   <sasikanth....@gmail.com> (tiny change)
 	* connect.c (connect_to_ip): properly formatted ipv6 address display.
 	(socket_family): New function - returns socket family type.

=== modified file 'src/init.c'
--- src/init.c	2011-11-04 21:25:00 +0000
+++ src/init.c	2012-01-09 13:40:01 +0000
@@ -267,7 +267,9 @@
   { "waitretry",        &opt.waitretry,         cmd_time },
   { "warccdx",          &opt.warc_cdx_enabled,  cmd_boolean },
   { "warccdxdedup",     &opt.warc_cdx_dedup_filename,  cmd_file },
+#ifdef HAVE_LIBZ
   { "warccompression",  &opt.warc_compression_enabled, cmd_boolean },
+#endif
   { "warcdigests",      &opt.warc_digests_enabled, cmd_boolean },
   { "warcfile",         &opt.warc_filename,     cmd_file },
   { "warcheader",       NULL,                   cmd_spec_warc_header },
@@ -374,7 +376,11 @@
   opt.show_all_dns_entries = false;
 
   opt.warc_maxsize = 0; /* 1024 * 1024 * 1024; */
+#ifdef HAVE_LIBZ
   opt.warc_compression_enabled = true;
+#else
+  opt.warc_compression_enabled = false;
+#endif
   opt.warc_digests_enabled = true;
   opt.warc_cdx_enabled = false;
   opt.warc_cdx_dedup_filename = NULL;

=== modified file 'src/main.c'
--- src/main.c	2011-11-04 21:25:00 +0000
+++ src/main.c	2012-01-09 13:40:01 +0000
@@ -289,7 +289,9 @@
     { "wait", 'w', OPT_VALUE, "wait", -1 },
     { "waitretry", 0, OPT_VALUE, "waitretry", -1 },
     { "warc-cdx", 0, OPT_BOOLEAN, "warccdx", -1 },
+#ifdef HAVE_LIBZ
     { "warc-compression", 0, OPT_BOOLEAN, "warccompression", -1 },
+#endif
     { "warc-dedup", 0, OPT_VALUE, "warccdxdedup", -1 },
     { "warc-digests", 0, OPT_BOOLEAN, "warcdigests", -1 },
     { "warc-file", 0, OPT_VALUE, "warcfile", -1 },
@@ -674,8 +676,10 @@
        --warc-cdx                write CDX index files.\n"),
     N_("\
        --warc-dedup=FILENAME     do not store records listed in this CDX file.\n"),
+#ifdef HAVE_LIBZ
     N_("\
        --no-warc-compression     do not compress WARC files with GZIP.\n"),
+#endif
     N_("\
        --no-warc-digests         do not calculate SHA1 digests.\n"),
     N_("\

=== modified file 'src/warc.c'
--- src/warc.c	2011-11-20 17:28:19 +0000
+++ src/warc.c	2012-01-09 13:40:01 +0000
@@ -14,7 +14,9 @@
 #include <sha1.h>
 #include <base32.h>
 #include <unistd.h>
+#ifdef HAVE_LIBZ
 #include <zlib.h>
+#endif
 #ifdef HAVE_LIBUUID
 #include <uuid/uuid.h>
 #endif
@@ -42,6 +44,7 @@
 /* The current WARC file (or NULL, if WARC is disabled). */
 static FILE *warc_current_file;
 
+#ifdef HAVE_LIBZ
 /* The gzip stream for the current WARC file
    (or NULL, if WARC or gzip is disabled). */
 static gzFile *warc_current_gzfile;
@@ -51,6 +54,7 @@
 
 /* The uncompressed size (so far) of the current record. */
 static size_t warc_current_gzfile_uncompressed_size;
+# endif
 
 /* This is true until a warc_write_* method fails. */
 static bool warc_write_ok;
@@ -105,12 +109,14 @@
 static size_t
 warc_write_buffer (const char *buffer, size_t size)
 {
+#ifdef HAVE_LIBZ
   if (warc_current_gzfile)
     {
       warc_current_gzfile_uncompressed_size += size;
       return gzwrite (warc_current_gzfile, buffer, size);
     }
   else
+#endif
     return fwrite (buffer, 1, size, warc_current_file);
 }
 
@@ -155,6 +161,7 @@
   if (opt.warc_maxsize > 0 && ftell (warc_current_file) >= opt.warc_maxsize)
     warc_start_new_file (false);
 
+#ifdef HAVE_LIBZ
   /* Start a GZIP stream, if required. */
   if (opt.warc_compression_enabled)
     {
@@ -179,6 +186,7 @@
           return false;
         }
     }
+#endif
 
   warc_write_string ("WARC/1.0\r\n");
   return warc_write_ok;
@@ -247,6 +255,7 @@
 {
   warc_write_buffer ("\r\n\r\n", 4);
 
+#ifdef HAVE_LIBZ
   /* We start a new gzip stream for each record.  */
   if (warc_write_ok && warc_current_gzfile)
     {
@@ -325,6 +334,7 @@
       fflush (warc_current_file);
       fseek (warc_current_file, 0, SEEK_END);
     }
+#endif /* HAVE_LIBZ */
 
   return warc_write_ok;
 }
@@ -687,7 +697,11 @@
   char *new_filename = malloc (base_filename_length + 1 + 5 + 8 + 1);
   warc_current_filename = new_filename;
 
+#ifdef HAVE_LIBZ
   char *extension = (opt.warc_compression_enabled ? "warc.gz" : "warc");
+#else
+  char *extension = "warc";
+#endif
 
   /* If max size is enabled, we add a serial number to the file names. */
   if (meta)

Reply via email to