[Bug-wget] [PATCH 17/27] Bugfix: Remove surrounding quotes from Metalink/HTTP key's value

2016-09-28 Thread Matthew White
* src/metalink.h: Add declaration of function dequote_metalink_string()
* src/metalink.c: Add function dequote_metalink_string() remove
  surrounding quotes from string, \' or \"
* src/metalink.c (find_key_value, find_key_values): Call 
dequote_metalink_string()
  to remove the surrounding quotes from the parsed value
* src/metalink.c (test_find_key_value, test_find_key_values): Add
  quoted key's values for unit-tests
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-http-quoted.py: New file. Metalink/HTTP quoted
  values tests

Some Metalink/HTTP keys, like "type" [2], may have a quoted value [1]:
Link: ; rel=describedby;
type="application/metalink4+xml"

Wget was expecting a dequoted value from the Metalink module. This
patch addresses this problem.

References:
 [1] Metalink/HTTP: Mirrors and Hashes
 1.1. Example Metalink Server Response
 https://tools.ietf.org/html/rfc6249#section-1.1

 [2] Additional Link Relations
 6. "type"
 https://tools.ietf.org/html/rfc6903#section-6
---
 src/metalink.c   |  42 ++--
 src/metalink.h   |   1 +
 testenv/Makefile.am  |   1 +
 testenv/Test-metalink-http-quoted.py | 126 +++
 4 files changed, 163 insertions(+), 7 deletions(-)
 create mode 100755 testenv/Test-metalink-http-quoted.py

diff --git a/src/metalink.c b/src/metalink.c
index 5212742..b7f3a72 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -823,6 +823,32 @@ clean_metalink_string (char **str)
   *str = new;
 }
 
+/*
+  Remove the quotation surrounding a string.
+
+  The string is permanently modified.
+ */
+void
+dequote_metalink_string (char **str)
+{
+  char *new;
+  size_t str_len;
+
+  if (!str || !*str || ((*str)[0] != '\"' && (*str)[0] != '\''))
+return;
+
+  str_len = strlen (*str); /* current string length */
+
+  /* Verify if the current string is surrounded by quotes.  */
+  if (str_len < 2 || (*str)[0] != (*str)[str_len - 1])
+return;
+
+  /* Dequoted string.  */
+  new = xmemdup0 (*str + 1, str_len - 2);
+  xfree (*str);
+  *str = new;
+}
+
 /* Append the suffix ".badhash" to the file NAME, except without
overwriting an existing file with that name and suffix.  */
 void
@@ -970,6 +996,7 @@ find_key_value (const char *start, const char *end, const 
char *key, char **valu
   while (val_end < end && *val_end != ';' && !c_isspace (*val_end))
 val_end++;
   *value = xstrndup (val_beg, val_end - val_beg);
+  dequote_metalink_string (value);
   return true;
 }
 }
@@ -1070,6 +1097,7 @@ find_key_values (const char *start, const char *end, char 
**key, char **value)
 
   *key = xstrndup (key_start, key_end - key_start);
   *value = xstrndup (val_start, val_end - val_start);
+  dequote_metalink_string (value);
 
   /* Skip trailing whitespaces.  */
   while (val_end < end && c_isspace (*val_end))
@@ -1082,10 +1110,10 @@ find_key_values (const char *start, const char *end, 
char **key, char **value)
 const char *
 test_find_key_values (void)
 {
-  static const char *header_data = "key1=val1;key2=val2 ;key3=val3; key4=val4"\
-   " ; key5=val5;key6 =val6;key7= val7; "\
-   "key8 = val8 ;key9=   val9   "\
-   ",key10= val10,key11,key12=val12";
+  static const char *header_data = "key1=val1;key2=\"val2\" ;key3=val3; 
key4=val4"\
+   " ; key5=val5;key6 ='val6';key7= val7; "\
+   "key8 = val8 ;key9=   \"val9\"  
 "\
+   ",key10= 'val10',key11,key12=val12";
   static const struct
   {
 const char *key;
@@ -1126,9 +1154,9 @@ test_find_key_values (void)
 const char *
 test_find_key_value (void)
 {
-  static const char *header_data = "key1=val1;key2=val2 ;key3=val3; key4=val4"\
-   " ; key5=val5;key6 =val6;key7= val7; "\
-   "key8 = val8 ;key9=   val9   ";
+  static const char *header_data = "key1=val1;key2=val2 ;key3='val3'; 
key4=val4"\
+   " ; key5='val5';key6 =val6;key7= \"val7\"; 
"\
+   "key8 = \"val8\" ;key9=   val9  
 ";
   static const struct
   {
 const char *key;
diff --git a/src/metalink.h b/src/metalink.h
index 4095262..6bd61f5 100644
--- a/src/metalink.h
+++ b/src/metalink.h
@@ -53,6 +53,7 @@ char *last_component (char const *name);
 char *get_metalink_basename (char *name);
 void append_suffix_number (char **str, const char *sep, wgint num);
 void clean_metalink_string (char **str);
+void dequote_metalink_string (char **str);
 void badhash_suffix (char *name);
 void badhash_or_remove (char *name);
 
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index 4ad7d0a..32b3db9 100644
--- 

[Bug-wget] [PATCH 16/27] Bugfix: Process Metalink/XML url strings containing white spaces and CRLF

2016-09-28 Thread Matthew White
* src/metalink.h: Add declaration of function clean_metalink_string()
* src/metalink.c: Add directive #include "xmemdup0.h"
* src/metalink.c: Add function clean_metalink_string() remove leading
  and trailing white spaces and CRLF from string
* src/metalink.c (retrieve_from_metalink): Remove leading and trailing
  white spaces and CRLF from url resource mres->url
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-xml-urlbreak.py: New test. Metalink/XML white
  spaces and CRLF in url resources tests

White spaces and CRLF are not automatically removed by libmetalink
from url strings. The Wget's Metalink module was unable to process
such url strings. This patch implements the processing of such url
strings cleaning off leading and trailing white spaces and CRLF.

If a parsed Metalink/XML url string contains strings separated by
CRLF, only the first of the series is accepted.
---
 src/metalink.c|  43 +++
 src/metalink.h|   1 +
 testenv/Makefile.am   |   3 +-
 testenv/Test-metalink-xml-urlbreak.py | 236 ++
 4 files changed, 282 insertions(+), 1 deletion(-)
 create mode 100755 testenv/Test-metalink-xml-urlbreak.py

diff --git a/src/metalink.c b/src/metalink.c
index 5108a5e..5212742 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -41,6 +41,7 @@ as that of the covered work.  */
 #include "sha256.h"
 #include "sha512.h"
 #include "dosname.h"
+#include "xmemdup0.h"
 #include "xstrndup.h"
 #include "c-strcase.h"
 #include 
@@ -197,6 +198,8 @@ retrieve_from_metalink (const metalink_t* metalink)
   struct url *url;
   int url_err;
 
+  clean_metalink_string (>url);
+
   if (!RES_TYPE_SUPPORTED (mres->type))
 {
   logprintf (LOG_VERBOSE,
@@ -780,6 +783,46 @@ append_suffix_number (char **str, const char *sep, wgint 
num)
   *str = new;
 }
 
+/*
+  Remove the string's trailing/leading whitespaces and line breaks.
+
+  The string is permanently modified.
+*/
+void
+clean_metalink_string (char **str)
+{
+  int c;
+  size_t len;
+  char *new, *beg, *end;
+
+  if (!str || !*str)
+return;
+
+  beg = *str;
+
+  while ((c = *beg) && (c == '\n' || c == '\r' || c == '\t' || c == ' '))
+beg++;
+
+  end = beg;
+
+  /* To not truncate a string containing spaces, search the first '\r'
+ or '\n' which ipotetically marks the end of the string.  */
+  while ((c = *end) && (c != '\r') && (c != '\n'))
+end++;
+
+  /* If we are at the end of the string, search the first legit
+ character going backward.  */
+  if (*end == '\0')
+while ((c = *(end - 1)) && (c == '\n' || c == '\r' || c == '\t' || c == ' 
'))
+  end--;
+
+  len = end - beg;
+
+  new = xmemdup0 (beg, len);
+  xfree (*str);
+  *str = new;
+}
+
 /* Append the suffix ".badhash" to the file NAME, except without
overwriting an existing file with that name and suffix.  */
 void
diff --git a/src/metalink.h b/src/metalink.h
index c9dd73e..4095262 100644
--- a/src/metalink.h
+++ b/src/metalink.h
@@ -52,6 +52,7 @@ int metalink_check_safe_path (const char *path);
 char *last_component (char const *name);
 char *get_metalink_basename (char *name);
 void append_suffix_number (char **str, const char *sep, wgint num);
+void clean_metalink_string (char **str);
 void badhash_suffix (char *name);
 void badhash_or_remove (char *name);
 
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index e6f9a23..4ad7d0a 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -46,7 +46,8 @@ if METALINK_IS_ENABLED
 Test-metalink-xml-absprefix-trust.py\
 Test-metalink-xml-homeprefix-trust.py   \
 Test-metalink-xml-size.py   \
-Test-metalink-xml-nourls.py
+Test-metalink-xml-nourls.py \
+Test-metalink-xml-urlbreak.py
 else
   METALINK_TESTS =
 endif
diff --git a/testenv/Test-metalink-xml-urlbreak.py 
b/testenv/Test-metalink-xml-urlbreak.py
new file mode 100755
index 000..e53ae11
--- /dev/null
+++ b/testenv/Test-metalink-xml-urlbreak.py
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+
+"""
+This is to test Metalink/XML white spaces in url resources.
+
+With --trust-server-names, trust the metalink:file names.
+
+Without --trust-server-names, don't trust the metalink:file names:
+use the basename of --input-metalink, and add a sequential number
+(e.g. .#1, .#2, etc.).
+
+Strip the directory from unsafe paths.
+"""
+# File Definitions ###
+bad = "Ouch!"
+
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+
+File2 = "This is gonna be good"
+File2_lowPref = "Not this one too"
+File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
+

[Bug-wget] [PATCH 10/27] Implement Metalink/XML --directory-prefix option in Metalink module

2016-09-28 Thread Matthew White
* NEWS: Mention the effect of --directory-prefix over Metalink
* src/metalink.c (retrieve_from_metalink): Add opt.dir_prefix as
  prefix to the metalink:file name mfile->name
* doc/metalink.txt: Update document. Explain --directory-prefix

When --directory-prefix= is used, set the top of the retrieval
tree to prefix. The default is . (the current directory). Metalink/XML
and Metalink/HTTP files will be downloaded under prefix.
---
 NEWS |  3 +++
 doc/metalink.txt |  8 ++--
 src/metalink.c   | 61 
 3 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/NEWS b/NEWS
index 04718d5..bfb3bef 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,9 @@ Please send GNU Wget bug reports to .
 
 * Changes in Wget X.Y.Z
 
+* When processing a Metalink file, --directory-prefix= sets
+  the top of the retrieval tree to prefix for Metalink downloads.
+
 * When processing a Metalink file, reject downloaded files which don't
   agree with their own metalink:size value:
   https://tools.ietf.org/html/rfc5854#section-4.2.16
diff --git a/doc/metalink.txt b/doc/metalink.txt
index 904ef2e..94a07ba 100644
--- a/doc/metalink.txt
+++ b/doc/metalink.txt
@@ -157,9 +157,5 @@ References:
 '-P prefix'
 '--directory-prefix=prefix'
 
-Do not apply to Metalink/XML files (aka --input-metalink=).
-
-Apply to Metalink/HTTP downloads.
-
-The directory prefix is the directory where all other files and
-subdirectories will be saved to, see wget(1).
+Set the top of the retrieval tree to prefix for both Metalink/XML
+and Metalink/HTTP downloads, see wget(1).
diff --git a/src/metalink.c b/src/metalink.c
index 8e074fe..3e03aee 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -87,6 +87,7 @@ retrieve_from_metalink (const metalink_t* metalink)
   metalink_file_t *mfile = *mfile_ptr;
   metalink_resource_t **mres_ptr;
   char *filename = NULL;
+  char *destname = NULL;
   bool hash_ok = false;
 
   uerr_t retr_err = METALINK_MISSING_RESOURCE;
@@ -100,6 +101,13 @@ retrieve_from_metalink (const metalink_t* metalink)
 
   output_stream = NULL;
 
+  /* The directory prefix for opt.metalink_over_http is handled by
+ src/url.c (url_file_name), do not add it a second time.  */
+  if (!metalink->origin && opt.dir_prefix && strlen (opt.dir_prefix))
+filename = aprintf ("%s/%s", opt.dir_prefix, mfile->name);
+  else
+filename = xstrdup (mfile->name);
+
   DEBUGP (("Processing metalink file %s...\n", quote (mfile->name)));
 
   /* Resources are sorted by priority.  */
@@ -133,12 +141,12 @@ retrieve_from_metalink (const metalink_t* metalink)
 
   fclose (output_stream);
   output_stream = NULL;
-  badhash_or_remove (filename);
-  xfree (filename);
+  badhash_or_remove (destname);
+  xfree (destname);
 }
-  else if (!output_stream && filename)
+  else if (!output_stream && destname)
 {
-  xfree (filename);
+  xfree (destname);
 }
 
   retr_err = METALINK_RETR_ERROR;
@@ -180,10 +188,10 @@ retrieve_from_metalink (const metalink_t* metalink)
  after we are finished with the file.  */
   if (opt.always_rest)
 /* continue previous download */
-output_stream = fopen (mfile->name, "ab");
+output_stream = fopen (filename, "ab");
   else
 /* create a file with an unique name */
-output_stream = unique_create (mfile->name, true, 
);
+output_stream = unique_create (filename, true, );
 }
 
   output_stream_regular = true;
@@ -203,27 +211,27 @@ retrieve_from_metalink (const metalink_t* metalink)
 * src/http.c (open_output_stream): If output_stream is
   NULL, create the opt.output_document "path/file"
   */
-  if (!filename)
-filename = xstrdup (mfile->name);
+  if (!destname)
+destname = xstrdup (filename);
 
   /* Store the real file name for displaying in messages,
  and for proper RFC5854 "path/file" handling.  */
-  opt.output_document = filename;
+  opt.output_document = destname;
 
   opt.metalink_over_http = false;
-  DEBUGP (("Storing to %s\n", filename));
+  DEBUGP (("Storing to %s\n", destname));
   retr_err = retrieve_url (url, mres->url, NULL, NULL,
NULL, NULL, opt.recursive, iri, false);
   opt.metalink_over_http = _metalink_http;
 
   /*
 Bug: output_stream is NULL, but retrieve_url() somehow
-created filename.
+created 

[Bug-wget] [PATCH 15/27] New test: Detect when there are no good Metalink url resources

2016-09-28 Thread Matthew White
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-xml-nourls.py: New file. Metalink/XML unknown
  urls tests

Test if when there are no good Metalink url resources there is any
segmentation fault.
---
 testenv/Makefile.am |   3 +-
 testenv/Test-metalink-xml-nourls.py | 195 
 2 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100755 testenv/Test-metalink-xml-nourls.py

diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index b671438..e6f9a23 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -45,7 +45,8 @@ if METALINK_IS_ENABLED
 Test-metalink-xml-relprefix-trust.py\
 Test-metalink-xml-absprefix-trust.py\
 Test-metalink-xml-homeprefix-trust.py   \
-Test-metalink-xml-size.py
+Test-metalink-xml-size.py   \
+Test-metalink-xml-nourls.py
 else
   METALINK_TESTS =
 endif
diff --git a/testenv/Test-metalink-xml-nourls.py 
b/testenv/Test-metalink-xml-nourls.py
new file mode 100755
index 000..23f27db
--- /dev/null
+++ b/testenv/Test-metalink-xml-nourls.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+
+"""
+This is to test Metalink/XML with unknown url types.
+
+With --trust-server-names, trust the metalink:file names.
+
+Without --trust-server-names, don't trust the metalink:file names:
+use the basename of --input-metalink, and add a sequential number
+(e.g. .#1, .#2, etc.).
+
+Strip the directory from unsafe paths.
+"""
+# File Definitions ###
+bad = "Ouch!"
+
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+
+File2 = "This is gonna be good"
+File2_lowPref = "Not this one too"
+File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
+
+File3 = "A little more, please"
+File3_lowPref = "That's just too much"
+File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest ()
+
+File4 = "Maybe a biscuit?"
+File4_lowPref = "No, thanks"
+File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest ()
+
+File5 = "More Tea...?"
+File5_lowPref = "I have to go..."
+File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest ()
+
+MetaXml = \
+"""
+http://www.metalinker.org/;>
+  
+GNU Wget
+  
+  
+GNU GPL
+http://www.gnu.org/licenses/gpl.html
+  
+  Wget Test Files
+  1.2.3
+  Wget Test Files description
+  
+
+  
+{{FILE1_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+ 
+  
+{{FILE2_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File2
+  
+
+
+  
+{{FILE3_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File3
+  
+
+ 
+  
+{{FILE4_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File4
+  
+
+
+  
+{{FILE5_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File5
+  
+
+  
+
+"""
+
+wrong_file = WgetFile ("wrong_file", bad)
+
+File1_orig = WgetFile ("File1", File1)
+File1_down = WgetFile ("test.metalink.#1", File1)
+File1_nono = WgetFile ("File1_lowPref", File1_lowPref)
+
+# all unknown resources, test SIGSEGV
+File2_orig = WgetFile ("File2", File2)
+File2_nono = WgetFile ("File2_lowPref", File2_lowPref)
+
+File3_orig = WgetFile ("File3", File3)
+File3_down = WgetFile ("test.metalink.#3", File3)
+File3_nono = WgetFile ("File3_lowPref", File3_lowPref)
+
+File4_orig = WgetFile ("File4", File4)
+File4_down = WgetFile ("test.metalink.#4", File4)
+File4_nono = WgetFile ("File4_lowPref", File4_lowPref)
+
+File5_orig = WgetFile ("File5", File5)
+File5_down = WgetFile ("test.metalink.#5", File5)
+File5_nono = WgetFile ("File5_lowPref", File5_lowPref)
+
+MetaFile = WgetFile ("test.metalink", MetaXml)
+
+WGET_OPTIONS = "--input-metalink test.metalink"
+WGET_URLS = [[]]
+
+Files = [[
+wrong_file,
+File1_orig, File1_nono,
+File2_orig, File2_nono,
+File3_orig, File3_nono,

[Bug-wget] [PATCH 26/27] New: --metalink-over-http Content-Type/Disposition Metalink/XML processing

2016-09-28 Thread Matthew White
* src/http.c (metalink_from_http): Process the Content-Type header.
  Add an application/metalink4+xml URL as metalink metaurl.  If the
  option opt.content_disposition is true, the Content-Disposition's
  filename is the metaurl's name
* doc/wget.texi: Update --content-disposition and --metalink-over-http
* doc/metalink-standard.txt: Update doc. Content-Type/Disposition
  processing through --metalink-over-http. Update download naming
  system about --trust-server-names and --content-disposition
* testenv/Makefile.am: Add new files
* testenv/Test-metalink-http-xml-type.py: New file. Metalink/HTTP
  Content-Type/Disposition header automated Metalink/XML tests
* testenv/Test-metalink-http-xml-type-trust.py: New file. Metalink/HTTP
  Content-Type/Disposition header with --trust-server-names automated
  Metalink/XML tests
* testenv/Test-metalink-http-xml-type-content.py: New file. Metalink/HTTP
  Content-Type/Disposition header with --content-disposition automated
  Metalink/XML tests
* testenv/Test-metalink-http-xml-type-trust-content.py: New file.
  Metalink/HTTP Content-Type/Disposition header with --trust-server-names
  and --content-disposition automated Metalink/XML tests

Process the Content-Type header, identify an application/metalink4+xml
file.  The Content-Disposition could provide an alternate name through
the "filename" field for the metalink xml file.  Respectively, the cli
options --metalink-over-http and --content-disposition are required.

When Metalink/XML auto-processing, to use the Content-Disposition's
filename, the cli option --trust-server-names is also required.
---
 doc/metalink-standard.txt  |   4 +
 doc/wget.texi  |   5 +
 src/http.c |  81 
 testenv/Makefile.am|   4 +
 testenv/Test-metalink-http-xml-type-content.py | 221 +
 .../Test-metalink-http-xml-type-trust-content.py   | 221 +
 testenv/Test-metalink-http-xml-type-trust.py   | 221 +
 testenv/Test-metalink-http-xml-type.py | 221 +
 8 files changed, 978 insertions(+)
 create mode 100755 testenv/Test-metalink-http-xml-type-content.py
 create mode 100755 testenv/Test-metalink-http-xml-type-trust-content.py
 create mode 100755 testenv/Test-metalink-http-xml-type-trust.py
 create mode 100755 testenv/Test-metalink-http-xml-type.py

diff --git a/doc/metalink-standard.txt b/doc/metalink-standard.txt
index 4836a85..d54e83e 100644
--- a/doc/metalink-standard.txt
+++ b/doc/metalink-standard.txt
@@ -86,6 +86,10 @@ the mother URL is trusted.
 Any Metalink/HTTP application/metalink4+xml file is saved using the
 basename of its own Link header "name" field, if available.
 
+In conjunction with the option --content-disposition, a 'Content-Type:
+application/metalink4+xml' file is saved using the basename of its own
+Content-Disposition header "filename" field, if available.
+
 3.1.2 The final name
 
 
diff --git a/doc/wget.texi b/doc/wget.texi
index 8cf3230..f42773e 100644
--- a/doc/wget.texi
+++ b/doc/wget.texi
@@ -523,6 +523,7 @@ without overwriting existing files.
 Issues HTTP HEAD request instead of GET and extracts Metalink metadata
 from response headers. Then it switches to Metalink download.
 If no valid Metalink metadata is found, it falls back to ordinary HTTP 
download.
+Enables @samp{Content-Type: application/metalink4+xml} files 
download/processing.
 
 @cindex metalink-index
 @item --metalink-index=@var{number}
@@ -1686,6 +1687,10 @@ This option is useful for some file-downloading CGI 
programs that use
 @code{Content-Disposition} headers to describe what the name of a
 downloaded file should be.
 
+When combined with @samp{--metalink-over-http} and @samp{--trust-server-names},
+a @samp{Content-Type: application/metalink4+xml} file is named using the
+@code{Content-Disposition} filename field, if available.
+
 @cindex Content On Error
 @item --content-on-error
 
diff --git a/src/http.c b/src/http.c
index 8fdf49d..89d496b 100644
--- a/src/http.c
+++ b/src/http.c
@@ -2570,6 +2570,87 @@ metalink_from_http (const struct response *resp, const 
struct http_stat *hs,
   mfile->resources = xnew0 (metalink_resource_t *);
   mfile->metaurls = xnew0 (metalink_metaurl_t *);
 
+  /* Process the Content-Type header.  */
+  if (resp_header_locate (resp, "Content-Type", 0, _beg, _end) != -1)
+{
+  metalink_metaurl_t murl = {0};
+
+  const char *type_beg, *type_end;
+  char *typestr = NULL;
+  char *namestr = NULL;
+  size_t type_len;
+
+  DEBUGP (("Processing Content-Type header...\n"));
+
+  /* Find beginning of type.  */
+  type_beg = val_beg;
+  while (type_beg < val_end && c_isspace (*type_beg))
+type_beg++;
+
+  /* Find end of type.  */
+  type_end = type_beg + 1;
+  while (type_end < val_end &&
+ *type_end != ';' 

[Bug-wget] [PATCH 08/27] Add file size computation in Metalink module

2016-09-28 Thread Matthew White
* NEWS: Mention Metalink's file size verification
* src/metalink.c (retrieve_from_metalink): Add file size computation
* doc/metalink.txt: Update document. Remove resolved bugs

Reject downloaded files when they do not agree with their Metalink/XML
metalink:size: https://tools.ietf.org/html/rfc5854#section-4.2.14

At the moment of writing, Metalink/HTTP headers do not provide a file
size field. This information could be obtained from the Content-Length
header field: https://tools.ietf.org/html/rfc6249#section-7
---
 NEWS |  4 
 doc/metalink.txt | 11 ---
 src/metalink.c   | 35 +++
 3 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/NEWS b/NEWS
index 6011250..04718d5 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,10 @@ Please send GNU Wget bug reports to .
 
 * Changes in Wget X.Y.Z
 
+* When processing a Metalink file, reject downloaded files which don't
+  agree with their own metalink:size value:
+  https://tools.ietf.org/html/rfc5854#section-4.2.16
+
 * When processing a Metalink file, with --continue resume partially
   downloaded files and keep fully downloaded files even if they fail
   the verification.
diff --git a/doc/metalink.txt b/doc/metalink.txt
index 0f3706a..904ef2e 100644
--- a/doc/metalink.txt
+++ b/doc/metalink.txt
@@ -67,7 +67,8 @@ References:
 When a Metalink/XML file is parsed:
 1. create the metalink:file "path/file" tree;
 2. download the metalink:url file as "path/file";
-3. verify the "path/file" checksum.
+3. verify the "path/file" size, if declared;
+4. verify the "path/file" checksum.
 
 All the above conform to the RFC5854 standard.
 
@@ -79,11 +80,6 @@ References:
 
 If more metalink:file elements are the same, wget downloads them all.
 
-4.4 Bugs
-
-
-The download is OK even when metalink:file size is wrong.
-
 5. `wget --metalink-over-http`
 **
 
@@ -107,7 +103,8 @@ References:
 When a Metalink/HTTP header is parsed:
 1. extract metalink metadata from the header;
 2. download the file from the mirror with the highest priority;
-3. verify the file's checksum.
+3. verify the file's size, if declared;
+4. verify the file's checksum.
 
 All the above comform to the usual Wget's download behaviour and to
 the RFC6249 standard.
diff --git a/src/metalink.c b/src/metalink.c
index 03a0bb1..ee0ed08 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -240,6 +240,41 @@ retrieve_from_metalink (const metalink_t* metalink)
   continue;
 }
 
+  logprintf (LOG_VERBOSE, _("Computing size for %s\n"), quote 
(filename));
+
+  if (!mfile->size)
+{
+  logprintf (LOG_VERBOSE, _("File size not declared. Skipping 
check.\n"));
+}
+  else
+{
+  wgint local_file_size = file_size (filename);
+
+  if (local_file_size == -1)
+{
+  logprintf (LOG_NOTQUIET, _("Could not get downloaded 
file's size.\n"));
+  fclose (local_file);
+  local_file = NULL;
+  continue;
+}
+
+  /* FIXME: what about int64?  */
+  DEBUGP (("Declared size: %lld\n", mfile->size));
+  DEBUGP (("Computed size: %lld\n", (long long) 
local_file_size));
+
+  if (local_file_size != (wgint) mfile->size)
+{
+  logprintf (LOG_NOTQUIET, _("Size mismatch for file 
%s.\n"), quote (filename));
+  fclose (local_file);
+  local_file = NULL;
+  continue;
+}
+  else
+{
+  logputs (LOG_VERBOSE, _("Size matches.\n"));
+}
+}
+
   for (mchksum_ptr = mfile->checksums; *mchksum_ptr; mchksum_ptr++)
 {
   char md2[MD2_DIGEST_SIZE];
-- 
2.7.3




[Bug-wget] [PATCH 25/27] Bugfix: Set NULL variable due to --content-disposition to Metalink origin

2016-09-28 Thread Matthew White
* src/http.c (http_loop): Prevent SIGSEGV when hstat.local_file is
  NULL, opt.content_disposition has a role in leaving the value unset
* src/http.c (gethttp): If hs->local_file is NULL (aka http_loop()'s
  hstat.local_file), set it to the value of hs->metalink->origin
---
 src/http.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/http.c b/src/http.c
index 4466b31..8fdf49d 100644
--- a/src/http.c
+++ b/src/http.c
@@ -3378,6 +3378,9 @@ gethttp (const struct url *u, struct url *original_url, 
struct http_stat *hs,
   if (metalink)
 {
   hs->metalink = metalink_from_http (resp, hs, u);
+  /* Bugfix: hs->local_file is NULL (opt.content_disposition).  */
+  if (!hs->local_file && hs->metalink && hs->metalink->origin)
+hs->local_file = xstrdup (hs->metalink->origin);
   xfree (hs->message);
   retval = RETR_WITH_METALINK;
   CLOSE_FINISH (sock);
@@ -4497,7 +4500,10 @@ exit:
   if ((ret == RETROK || opt.content_on_error) && local_file)
 {
   xfree (*local_file);
-  *local_file = xstrdup (hstat.local_file);
+  /* Bugfix: Prevent SIGSEGV when hstat.local_file was left NULL
+ (i.e. due to opt.content_disposition).  */
+  if (hstat.local_file)
+*local_file = xstrdup (hstat.local_file);
 }
   free_hstat ();
 
-- 
2.7.3




[Bug-wget] [PATCH 24/27] New: --trust-server-names saves Metalink/HTTP xml files using the "name" field

2016-09-28 Thread Matthew White
* src/metalink.c (retrieve_from_metalink): If opt.trustservernames is
  true, use the basename of the metaurl's name to save the xml file
* doc/metalink-standard.txt: Update doc. With --trust-server-names any
  Metalink/HTTP Link application/metalink4+xml file is saved using the
  basename of the "name" field, if any. Update Metalink/HTTP examples
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-http-xml-trust-name.py: New file. Metalink/HTTP
  automated Metalink/XML, save xml files using the "name" field tests
---
 doc/metalink-standard.txt|  10 +
 src/metalink.c   |   2 +-
 testenv/Makefile.am  |   1 +
 testenv/Test-metalink-http-xml-trust-name.py | 272 +++
 4 files changed, 284 insertions(+), 1 deletion(-)
 create mode 100755 testenv/Test-metalink-http-xml-trust-name.py

diff --git a/doc/metalink-standard.txt b/doc/metalink-standard.txt
index 78709fb..4836a85 100644
--- a/doc/metalink-standard.txt
+++ b/doc/metalink-standard.txt
@@ -83,6 +83,9 @@ When --trust-server-names is on, the metalink:file "name" 
field parsed
 from Metalink/XML files is trusted. When no Metalink/XML is available,
 the mother URL is trusted.
 
+Any Metalink/HTTP application/metalink4+xml file is saved using the
+basename of its own Link header "name" field, if available.
+
 3.1.2 The final name
 
 
@@ -183,6 +186,13 @@ type="application/pgp-signature"
 Digest: SHA-256=MWVkMWQxYTRiMzk5MDQ0MzI3NGU5NDEyZTk5OWY1ZGFmNzgyZTJlO
 DYzYjRjYzFhOTlmNTQwYzI2M2QwM2U2MQ==
 
+See [2 #section-4].
+
+Link: ; rel=describedby;
+type="application/x-bittorrent"; name="differentname.ext"
+Link: ; rel=describedby;
+type="application/metalink4+xml"
+
 5.4 Saving files
 
 
diff --git a/src/metalink.c b/src/metalink.c
index 0c77fa8..c5a25db 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -241,7 +241,7 @@ retrieve_from_metalink (const metalink_t* metalink)
 metafile = xstrdup (safename);
 
 if (opt.trustservernames)
-  replace_metalink_basename (, murl->url);
+  replace_metalink_basename (, murl->name ? 
murl->name : murl->url);
 else
   append_suffix_number (, ".meta#", meta_count);
 
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index 725b76b..a82a925 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -32,6 +32,7 @@ if METALINK_IS_ENABLED
 Test-metalink-http-baddigest.py \
 Test-metalink-http-xml.py   \
 Test-metalink-http-xml-trust.py \
+Test-metalink-http-xml-trust-name.py\
 Test-metalink-xml.py\
 Test-metalink-xml-continue.py   \
 Test-metalink-xml-relpath.py\
diff --git a/testenv/Test-metalink-http-xml-trust-name.py 
b/testenv/Test-metalink-http-xml-trust-name.py
new file mode 100755
index 000..7dae50e
--- /dev/null
+++ b/testenv/Test-metalink-http-xml-trust-name.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+from base64 import b64encode
+
+"""
+This is to test Metalink/HTTP with Metalink/XML Link headers.
+
+With --trust-server-names, trust the metalink:file names.
+
+Without --trust-server-names, don't trust the metalink:file names:
+use the basename of --input-metalink, and add a sequential number
+(e.g. .#1, .#2, etc.).
+
+Strip the directory from unsafe paths.
+"""
+
+# File Definitions ###
+bad = "Ouch!"
+bad_sha256 = hashlib.sha256 (bad.encode ('UTF-8')).hexdigest ()
+
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+
+File2 = "This is gonna be good"
+File2_lowPref = "Not this one too"
+File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
+
+File3 = "A little more, please"
+File3_lowPref = "That's just too much"
+File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest ()
+
+File4 = "Maybe a biscuit?"
+File4_lowPref = "No, thanks"
+File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest ()
+
+File5 = "More Tea...?"
+File5_lowPref = "I have to go..."
+File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest ()
+
+MetaXml1 = \
+"""
+http://www.metalinker.org/;>
+  
+GNU Wget
+  
+  
+GNU GPL
+http://www.gnu.org/licenses/gpl.html
+  
+  Wget Test Files
+  1.2.3
+  Wget Test Files description
+  
+
+  
+{{FILE1_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
+

[Bug-wget] [PATCH 23/27] Bugfix: Detect when a metalink:file doesn't have any hash

2016-09-28 Thread Matthew White
* src/metalink.c (retrieve_from_metalink): Reject any metalink:file
  without hashes. Prompt the error and switch to the next file
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-xml-nohash.py: New file. Metalink/XML with no
  hashes tests

Prevent SIGSEGV.
---
 src/metalink.c  |   9 +-
 testenv/Makefile.am |   1 +
 testenv/Test-metalink-xml-nohash.py | 180 
 3 files changed, 189 insertions(+), 1 deletion(-)
 create mode 100755 testenv/Test-metalink-xml-nohash.py

diff --git a/src/metalink.c b/src/metalink.c
index 16e247d..0c77fa8 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -368,7 +368,8 @@ retrieve_from_metalink (const metalink_t* metalink)
 }
 
   /* Resources are sorted by priority.  */
-  for (mres_ptr = mfile->resources; *mres_ptr && !skip_mfile; mres_ptr++)
+  for (mres_ptr = mfile->resources;
+   *mres_ptr && mfile->checksums && !skip_mfile; mres_ptr++)
 {
   metalink_resource_t *mres = *mres_ptr;
   metalink_checksum_t **mchksum_ptr, *mchksum;
@@ -859,6 +860,12 @@ gpg_skip_verification:
 } /* endif RETR_OK.  */
 } /* Iterate over resources.  */
 
+  if (!mfile->checksums)
+{
+  logprintf (LOG_NOTQUIET, _("No checksums found.\n"));
+  retr_err = METALINK_CHKSUM_ERROR;
+}
+
   if (retr_err != RETROK)
 {
   logprintf (LOG_VERBOSE, _("Failed to download %s. Skipping 
resource.\n"),
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index ff9fe05..725b76b 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -51,6 +51,7 @@ if METALINK_IS_ENABLED
 Test-metalink-xml-homeprefix-trust.py   \
 Test-metalink-xml-emptyprefix-trust.py  \
 Test-metalink-xml-size.py   \
+Test-metalink-xml-nohash.py \
 Test-metalink-xml-nourls.py \
 Test-metalink-xml-urlbreak.py
 else
diff --git a/testenv/Test-metalink-xml-nohash.py 
b/testenv/Test-metalink-xml-nohash.py
new file mode 100755
index 000..3985918
--- /dev/null
+++ b/testenv/Test-metalink-xml-nohash.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+
+"""
+This is to test if Metalink/XML with no hashes generates a SIGSEGV.
+
+With --trust-server-names, trust the metalink:file names.
+
+Without --trust-server-names, don't trust the metalink:file names:
+use the basename of --input-metalink, and add a sequential number
+(e.g. .#1, .#2, etc.).
+
+Strip the directory from unsafe paths.
+"""
+# File Definitions ###
+bad = "Ouch!"
+
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+
+File2 = "This is gonna be good"
+File2_lowPref = "Not this one too"
+File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
+
+File3 = "A little more, please"
+File3_lowPref = "That's just too much"
+
+File4 = "Maybe a biscuit?"
+File4_lowPref = "No, thanks"
+File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest ()
+
+File5 = "More Tea...?"
+File5_lowPref = "I have to go..."
+
+MetaXml = \
+"""
+http://www.metalinker.org/;>
+  
+GNU Wget
+  
+  
+GNU GPL
+http://www.gnu.org/licenses/gpl.html
+  
+  Wget Test Files
+  1.2.3
+  Wget Test Files description
+  
+
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+
+  
+{{FILE2_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File2
+  
+
+
+  
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File3
+  
+
+
+  
+{{FILE4_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File4
+  
+
+
+  
+
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File5
+  
+
+  
+
+"""
+
+wrong_file = WgetFile ("wrong_file", bad)
+
+File1_orig = WgetFile ("File1", File1)
+File1_nono = WgetFile ("File1_lowPref", File1_lowPref)
+
+File2_orig = WgetFile ("File2", File2)
+File2_down = 

[Bug-wget] [PATCH 13/27] New: Metalink/XML and Metalink/HTTP file naming safety rules

2016-09-28 Thread Matthew White
* NEWS: Mention the effect of --trust-server-names over Metalink
* src/metalink.h: Add declaration of function append_suffix_number()
* src/metalink.c: Add function append_suffix_number() append number to
  string
* src/metalink.c (retrieve_from_metalink): Safer Metalink/XML and
  Metalink/HTTP download naming system, opt.trustservernames based
* doc/metalink-standard.txt: Update doc. Explain new Metalink/XML and
  Metalin/HTTP download naming system and --trust-server-names role
* testenv/Makefile.am: Add new files
* testenv/Test-metalink-xml-continue.py: Update test. Metalink/XML
  continue/keep existing files (HTTP 416) with --continue tests
* testenv/Test-metalink-xml.py: Update test. Metalink/XML naming tests
* testenv/Test-metalink-xml-trust.py: New file. Metalink/XML naming
  tests with --trust-server-names
* testenv/Test-metalink-xml-abspath.py: Update test. Metalink/XML
  absolute path tests
* testenv/Test-metalink-xml-abspath-trust.py: New file. Metalink/XML
  absolute path tests with --trust-server-names
* testenv/Test-metalink-xml-relpath.py: Update test. Metalink/XML
  relative path tests
* testenv/Test-metalink-xml-relpath-trust.py: New file. Metalink/XML
  relative path tests with --trust-server-names
* testenv/Test-metalink-xml-homepath.py: Update test. Metalink/XML
  home path and ~ (tilde) tests
* testenv/Test-metalink-xml-homepath-trust.py: New file. Metalink/XML
  home path and ~ (tilde) tests with --trust-server-names
* testenv/Test-metalink-xml-prefix.py: New file. Metalink/XML naming
  tests with --directory-prefix
* testenv/Test-metalink-xml-prefix-trust.py: New file. Metalink/XML
  naming tests with --directory-prefix and --trust-server-names
* testenv/Test-metalink-xml-absprefix.py: New file. Metalink/XML
  absolute --directory-prefix tests
* testenv/Test-metalink-xml-absprefix-trust.py: New file. Metalink/XML
  absolute --directory-prefix tests with --trust-server-names
* testenv/Test-metalink-xml-relprefix.py: New file. Metalink/XML
  relative --directory-prefix tests
* testenv/Test-metalink-xml-relprefix-trust.py: New file. Metalink/XML
  relative --directory-prefix tests with --trust-server-names
* testenv/Test-metalink-xml-homeprefix.py: New file. Metalink/XML home
  --directory-prefix tests
* testenv/Test-metalink-xml-homeprefix-trust.py: New file. Metalink/XML
  home --directory-prefix tests with --trust-server-names

The option --trust-server-names allows to use the file names parsed
from a Metalink/XML file.  Without --trust-server-names, the safety
mechanism provides secure and predictable file names.
---
 NEWS  |   4 +
 doc/metalink-standard.txt |  59 ++--
 src/metalink.c| 107 +++---
 src/metalink.h|   1 +
 testenv/Makefile.am   |  14 +-
 testenv/Test-metalink-xml-abspath-trust.py| 129 +
 testenv/Test-metalink-xml-abspath.py  |  61 ++--
 testenv/Test-metalink-xml-absprefix-trust.py  | 193 +
 testenv/Test-metalink-xml-absprefix.py| 193 +
 testenv/Test-metalink-xml-continue.py |  20 ++-
 testenv/Test-metalink-xml-homepath-trust.py   | 194 +
 testenv/Test-metalink-xml-homepath.py | 126 +++--
 testenv/Test-metalink-xml-homeprefix-trust.py | 193 +
 testenv/Test-metalink-xml-homeprefix.py   | 193 +
 testenv/Test-metalink-xml-prefix-trust.py | 193 +
 testenv/Test-metalink-xml-prefix.py   | 193 +
 testenv/Test-metalink-xml-relpath-trust.py| 192 +
 testenv/Test-metalink-xml-relpath.py  | 126 +
 testenv/Test-metalink-xml-relprefix-trust.py  | 193 +
 testenv/Test-metalink-xml-relprefix.py| 193 +
 testenv/Test-metalink-xml-trust.py| 196 ++
 testenv/Test-metalink-xml.py  | 126 +++--
 22 files changed, 2809 insertions(+), 90 deletions(-)
 create mode 100755 testenv/Test-metalink-xml-abspath-trust.py
 create mode 100755 testenv/Test-metalink-xml-absprefix-trust.py
 create mode 100755 testenv/Test-metalink-xml-absprefix.py
 create mode 100755 testenv/Test-metalink-xml-homepath-trust.py
 create mode 100755 testenv/Test-metalink-xml-homeprefix-trust.py
 create mode 100755 testenv/Test-metalink-xml-homeprefix.py
 create mode 100755 testenv/Test-metalink-xml-prefix-trust.py
 create mode 100755 testenv/Test-metalink-xml-prefix.py
 create mode 100755 testenv/Test-metalink-xml-relpath-trust.py
 create mode 100755 testenv/Test-metalink-xml-relprefix-trust.py
 create mode 100755 testenv/Test-metalink-xml-relprefix.py
 create mode 100755 testenv/Test-metalink-xml-trust.py

diff --git a/NEWS b/NEWS
index 72f8728..2153d9a 

[Bug-wget] [PATCH 09/27] Change mfile->name to filename in Metalink module's messages

2016-09-28 Thread Matthew White
* src/metalink.c (retrieve_from_metalink): Change mfile->name to
  filename when referring to the downloaded file

The file name could have been changed by unique_create() (or by any
other mean) before downloading. Use the name of the downloaded file
(filename) when printing output which refer to it.
---
 src/metalink.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/metalink.c b/src/metalink.c
index ee0ed08..8e074fe 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -325,7 +325,7 @@ retrieve_from_metalink (const metalink_t* metalink)
 }
 
   logprintf (LOG_VERBOSE, _("Computing checksum for %s\n"),
- quote (mfile->name));
+ quote (filename));
 
   DEBUGP (("Declared hash: %s\n", mchksum->hash));
 
@@ -408,7 +408,7 @@ retrieve_from_metalink (const metalink_t* metalink)
 {
   logprintf (LOG_NOTQUIET,
  _("Checksum mismatch for file %s.\n"),
- quote (mfile->name));
+ quote (filename));
 }
 
   /* Stop as soon as we checked the supported checksum.  */
@@ -592,14 +592,14 @@ gpg_skip_verification:
   if (retr_err != RETROK)
 {
   logprintf (LOG_VERBOSE, _("Failed to download %s. Skipping 
resource.\n"),
- quote (mfile->name));
+ quote (filename ? filename : mfile->name));
 }
   else if (!hash_ok)
 {
   retr_err = METALINK_CHKSUM_ERROR;
   logprintf (LOG_NOTQUIET,
  _("File %s retrieved but checksum does not match. "
-   "\n"), quote (mfile->name));
+   "\n"), quote (filename));
 }
 #ifdef HAVE_GPGME
 /* Signature will be only validated if hash check was successful.  */
@@ -608,7 +608,7 @@ gpg_skip_verification:
   retr_err = METALINK_SIG_ERROR;
   logprintf (LOG_NOTQUIET,
  _("File %s retrieved but signature does not match. "
-   "\n"), quote (mfile->name));
+   "\n"), quote (filename));
 }
 #endif
   last_retr_err = retr_err == RETROK ? last_retr_err : retr_err;
-- 
2.7.3




[Bug-wget] [PATCH 21/27] New option --metalink-index to process Metalink application/metalink4+xml

2016-09-28 Thread Matthew White
* NEWS: Mention the effect of --metalink-index over Metalink
* src/init.c: Add new option metalinkindex (opt.metalink_index),
  initialize to -1
* src/main.c: Add new option metalink-index (--metalink-index=NUMBER)
* src/options.h: Add new option metalink_index (int)
* src/metalink.h: Add declaration of functions fetch_metalink_file(),
  replace_metalink_basename()
* src/metalink.c: Add functions fetch_metalink_file() simple file
  fetch, replace_metalink_basename() replace file basename
* src/metalink.c (retrieve_from_metalink): New. Process Metalink
  application/metalink4+xml of opt.metalink_index ordinal number
* doc/wget.texi: Add new option metalink-index (--metalink-index)
  documentation
* doc/metalink-standard.txt: Updated doc. Add documentation about
  Metalink application/metalink4+xml metaurls download naming system
* doc/metalink-standard.txt: Update Metalink/XML and HTTP examples
* testenv/Makefile.am: Add new files
* testenv/Test-metalink-http-xml.py: New file. Metalink/HTTP automated
  Metalink/XML "application/metalink4+xml" --metalink-index tests
* testenv/Test-metalink-http-xml-trust.py: New file. Metalink/HTTP
  automated Metalink/XML "application/metalink4+xml" --metalink-index
  retrieval with --trust-server-names tests

WARNING: Do not use lib/dirname.c (dir_name) to get the directory
name, it may append a dot '.' character to the directory name.
---
 NEWS|   3 +
 doc/metalink-standard.txt   |  36 +++-
 doc/wget.texi   |   9 +
 src/init.c  |   5 +
 src/main.c  |   3 +
 src/metalink.c  | 348 
 src/metalink.h  |   4 +
 src/options.h   |   1 +
 testenv/Makefile.am |   2 +
 testenv/Test-metalink-http-xml-trust.py | 272 +
 testenv/Test-metalink-http-xml.py   | 272 +
 11 files changed, 947 insertions(+), 8 deletions(-)
 create mode 100755 testenv/Test-metalink-http-xml-trust.py
 create mode 100755 testenv/Test-metalink-http-xml.py

diff --git a/NEWS b/NEWS
index 2153d9a..ca7eaba 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,9 @@ Please send GNU Wget bug reports to .
 
 * Changes in Wget X.Y.Z
 
+* When processing a Metalink header, --metalink-index= allows
+  to process the header's application/metalink4+xml files.
+
 * When processing a Metalink file, --trust-server-names enables the
   use of the destination file names specified in the Metalink file,
   otherwise a safe destination file name is computed.
diff --git a/doc/metalink-standard.txt b/doc/metalink-standard.txt
index 18acaaa..78709fb 100644
--- a/doc/metalink-standard.txt
+++ b/doc/metalink-standard.txt
@@ -65,10 +65,16 @@ ignored, see '1. Security features'.
 
 
 When --trust-server-names is off, the basename of the --input-metalink
-file, if available, or of the mother URL is trusted.
+file, if available, or of the mother URL is trusted. This trusted name
+is the radix of any subsequent file name.
+
+When a Metalink/HTTP in encountered, any fetched Metalink/XML file has
+its own ordinal number appended as suffix to the trusted name. In this
+case scenario, an unique Metalink/XML file is saved each time applying
+an additional suffix to the currently computed name when necessary.
 
 The files described by a Metalink/XML file will be named sequentially
-applying a suffix to the trusted name.
+applying an additional suffix to the currently trusted/computed name.
 
 3.1.1.2 With --trust-server-names
 =
@@ -91,6 +97,8 @@ found unsafe too, the file is not downloaded.
 4.1 Example files
 =
 
+See [1 #section-1.1].
+
 cat > bugus.meta4 << EOF
 
 
@@ -134,8 +142,9 @@ be informed to the caller of libmetalink's 
metalink_parse_file().
 Fetched metalink:file elements shall be wrote using the unique "name"
 field as file name [1 #section-4.1.2.1].
 
-A metalink:file url's file name shall not substitute the "name" field,
-see '3. Download file name'.
+A metalink:file url's file name shall not substitute the "name" field.
+
+Security exceptions are explained in '3. Download file name'.
 
 4.5 Multi-Source download
 =
@@ -160,9 +169,19 @@ $ wget --metalink-over-http http://127.0.0.1/dir/file.ext
 5.3 Metalink/HTTP header answer
 ===
 
-Link: http://ftpmirror.gnu.org/bash/bash-4.3-patches/bash43-001; 
rel=duplicate; pref; pri=2
-Link: http://another.url/common_name; rel=duplicate; pref; pri=1
-Digest: SHA-256=7LPf8mSGZ1E+MVVLOtBUzNifzjjjM2fJRZrDooUVN0I=
+See [2 #section-1.1].
+
+Etag: "thvDyvhfIqlvFe+A9MYgxAfm1q5="
+Link: ; rel=duplicate
+Link: ; rel=duplicate
+Link: ; rel=describedby;

[Bug-wget] [PATCH 07/27] Update Metalink/XML tests and add a new test for home paths

2016-09-28 Thread Matthew White
* testenv/Test-metalink-xml-relpath.py: Update test
* testenv/Test-metalink-xml-homepath.py: New file. Reject home paths
* testenv/Makefile.am: Add new file

When --input-metalink= is used, each metalink:file name is
verified by libmetalink's metalink_check_safe_path(). By design,
absolute, relative and home paths are rejected.

At the moment of writing, when --metalink-over-http is used, absolute,
relative, and home paths aren't a concern. The destination file name
is a combination of URL's file name and cli's "Directory Options"
handled by src/url.c (url_file_name).
---
 testenv/Makefile.am   |  3 +-
 testenv/Test-metalink-xml-homepath.py | 86 +++
 testenv/Test-metalink-xml-relpath.py  | 42 -
 3 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100755 testenv/Test-metalink-xml-homepath.py

diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index 94be023..f14be07 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -31,7 +31,8 @@ if METALINK_IS_ENABLED
 Test-metalink-xml.py\
 Test-metalink-xml-continue.py   \
 Test-metalink-xml-relpath.py\
-Test-metalink-xml-abspath.py
+Test-metalink-xml-abspath.py\
+Test-metalink-xml-homepath.py
 else
   METALINK_TESTS =
 endif
diff --git a/testenv/Test-metalink-xml-homepath.py 
b/testenv/Test-metalink-xml-homepath.py
new file mode 100755
index 000..aa06f2b
--- /dev/null
+++ b/testenv/Test-metalink-xml-homepath.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+
+"""
+This is to test if Metalink XML file escapes current directory.
+"""
+# File Definitions ###
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+MetaXml = \
+"""
+http://www.metalinker.org/;>
+  
+GNU Wget
+  
+  
+GNU GPL
+http://www.gnu.org/licenses/gpl.html
+  
+  Wget Test File 1
+  1.2.3
+  Wget Test File 1 description
+  
+
+  
+{{FILE1_HASH}}
+  
+  
+http://broken.example/File1
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+  
+
+"""
+
+A_File = WgetFile ("File1", File1)
+B_File = WgetFile ("File1_lowPref", File1_lowPref)
+MetaFile = WgetFile ("test.metalink", MetaXml)
+
+WGET_OPTIONS = "--input-metalink test.metalink"
+WGET_URLS = [[]]
+
+Files = [[A_File, B_File]]
+Existing_Files = [MetaFile]
+
+ExpectedReturnCode = 0
+ExpectedDownloadedFiles = [MetaFile]
+
+ Pre and Post Test Hooks #
+pre_test = {
+"ServerFiles"   : Files,
+"LocalFiles": Existing_Files
+}
+test_options = {
+"WgetCommands"  : WGET_OPTIONS,
+"Urls"  : WGET_URLS
+}
+post_test = {
+"ExpectedFiles" : ExpectedDownloadedFiles,
+"ExpectedRetcode"   : ExpectedReturnCode
+}
+
+http_test = HTTPTest (
+pre_hook=pre_test,
+test_params=test_options,
+post_hook=post_test,
+)
+
+http_test.server_setup()
+### Get and use dynamic server sockname
+srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
+
+MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256)
+MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host)
+MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port))
+MetaFile.content = MetaXml
+
+err = http_test.begin ()
+
+exit (err)
diff --git a/testenv/Test-metalink-xml-relpath.py 
b/testenv/Test-metalink-xml-relpath.py
index c565302..9790cd4 100755
--- a/testenv/Test-metalink-xml-relpath.py
+++ b/testenv/Test-metalink-xml-relpath.py
@@ -25,11 +25,51 @@ MetaXml = \
   1.2.3
   Wget Test File 1 description
   
+
+  
+{{FILE1_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+
+  
+{{FILE1_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
 
   
 {{FILE1_HASH}}
   
   
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+
+  
+{{FILE1_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+
+  
+{{FILE1_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+
+  
+{{FILE1_HASH}}
+  
+  
 http://broken.example/File1
 http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
 http://{{SRV_HOST}}:{{SRV_PORT}}/File1
@@ -50,7 +90,7 @@ Files = [[A_File, B_File]]
 Existing_Files = [MetaFile]
 
 ExpectedReturnCode = 0
-ExpectedDownloadedFiles = [MetaFile]
+ExpectedDownloadedFiles = [WgetFile ("dir/subdir/File1", File1), MetaFile]
 
  Pre 

[Bug-wget] [PATCH 22/27] Bugfix: Detect malformed base64 Metalink/HTTP Digest header

2016-09-28 Thread Matthew White
* src/http.c (metalink_from_http): Fix hash_bin_len type. Use ssize_t
  instead than size_t. Reject -1 as base64_decode() return value
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-http-baddigest.py: New file. Metalink/HTTP
  malformed base64 Digest header tests

On malformed base64 input, ssize_t base64_decode() returns -1. Such
value is too big for a size_t variable, and used as xmalloc() value
will exaust all the memory.
---
 src/http.c  | 14 +++--
 testenv/Makefile.am |  1 +
 testenv/Test-metalink-http-baddigest.py | 93 +
 3 files changed, 105 insertions(+), 3 deletions(-)
 create mode 100755 testenv/Test-metalink-http-baddigest.py

diff --git a/src/http.c b/src/http.c
index e6af7c1..4466b31 100644
--- a/src/http.c
+++ b/src/http.c
@@ -2894,10 +2894,18 @@ metalink_from_http (const struct response *resp, const 
struct http_stat *hs,
  Therefore we convert: base64 -> binary -> hex.  */
   const size_t dig_hash_str_len = strlen (dig_hash);
   char *bin_hash = alloca (dig_hash_str_len * 3 / 4 + 1);
-  size_t hash_bin_len;
+  ssize_t hash_bin_len;
 
   hash_bin_len = base64_decode (dig_hash, bin_hash);
 
+  /* Detect malformed base64 input.  */
+  if (hash_bin_len < 0)
+{
+  xfree (dig_type);
+  xfree (dig_hash);
+  continue;
+}
+
   /* One slot for me, one for zero-termination.  */
   mfile->checksums =
   xrealloc (mfile->checksums,
@@ -2905,8 +2913,8 @@ metalink_from_http (const struct response *resp, const 
struct http_stat *hs,
   mfile->checksums[hash_count] = xnew (metalink_checksum_t);
   mfile->checksums[hash_count]->type = dig_type;
 
-  mfile->checksums[hash_count]->hash = xmalloc (hash_bin_len * 2 + 1);
-  wg_hex_to_string (mfile->checksums[hash_count]->hash, bin_hash, 
hash_bin_len);
+  mfile->checksums[hash_count]->hash = xmalloc ((size_t)hash_bin_len * 
2 + 1);
+  wg_hex_to_string (mfile->checksums[hash_count]->hash, bin_hash, 
(size_t)hash_bin_len);
 
   xfree (dig_hash);
 
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index daba609..ff9fe05 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -29,6 +29,7 @@
 if METALINK_IS_ENABLED
   METALINK_TESTS = Test-metalink-http.py\
 Test-metalink-http-quoted.py\
+Test-metalink-http-baddigest.py \
 Test-metalink-http-xml.py   \
 Test-metalink-http-xml-trust.py \
 Test-metalink-xml.py\
diff --git a/testenv/Test-metalink-http-baddigest.py 
b/testenv/Test-metalink-http-baddigest.py
new file mode 100755
index 000..2496da7
--- /dev/null
+++ b/testenv/Test-metalink-http-baddigest.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+from base64 import b64encode
+
+"""
+This is to test Metalink/HTTP with a malformed base64 Digest header.
+
+With --trust-server-names, trust the metalink:file names.
+
+Without --trust-server-names, don't trust the metalink:file names:
+use the basename of --input-metalink, and add a sequential number
+(e.g. .#1, .#2, etc.).
+
+Strip the directory from unsafe paths.
+"""
+
+# File Definitions ###
+bad = "Ouch!"
+bad_sha256 = b64encode (hashlib.sha256 (bad.encode ('UTF-8')).digest 
()).decode ('ascii')
+
+LinkHeaders = ["; rel=duplicate; 
pri=1"]
+DigestHeader = "SHA-256=bad_base64,SHA-256={{BAD_HASH}}"
+
+# This will be filled as soon as we know server hostname and port
+MetaHTTPRules = {'SendHeader' : {}}
+
+MetaHTTP = WgetFile ("main.metalink", rules=MetaHTTPRules)
+
+wrong_file = WgetFile ("wrong_file", bad)
+wrong_file_down = WgetFile ("main.metalink", bad)
+
+WGET_OPTIONS = "--metalink-over-http"
+WGET_URLS = [["main.metalink"]]
+
+RequestList = [[
+"HEAD /main.metalink",
+"GET /wrong_file"
+]]
+
+Files = [[
+MetaHTTP,
+wrong_file
+]]
+Existing_Files = []
+
+ExpectedReturnCode = 0
+ExpectedDownloadedFiles = [wrong_file_down]
+
+ Pre and Post Test Hooks #
+pre_test = {
+"ServerFiles"   : Files,
+"LocalFiles": Existing_Files
+}
+test_options = {
+"WgetCommands"  : WGET_OPTIONS,
+"Urls"  : WGET_URLS
+}
+post_test = {
+"ExpectedFiles" : ExpectedDownloadedFiles,
+"ExpectedRetcode"   : ExpectedReturnCode,
+"FilesCrawled"  : RequestList
+}
+
+http_test = HTTPTest (
+pre_hook=pre_test,
+test_params=test_options,
+post_hook=post_test
+)
+
+http_test.server_setup()
+### Get 

[Bug-wget] [PATCH 06/27] Bugfix: Keep the download progress when alternating metalink:url

2016-09-28 Thread Matthew White
* NEWS: Mention the effects of --continue over Metalink
* src/metalink.c (retrieve_from_metalink): On download error, resume
  output_stream with the next mres->url. Keep fully downloaded files
  started with --continue, otherwise rename/remove the file
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-xml-continue.py: New file. Metalink/XML
  continue/keep existing files (HTTP 416) with --continue tests

Before this patch, with --continue, existing and/or fully retrieved
files which fail the sanity tests were renamed (--keep-badhash), or
removed.

This patch ensures that --continue doesn't rename/remove existing
and/or fully retrieved files (HTTP 416) which fail the sanity tests.
---
 NEWS  |   4 +
 src/metalink.c|  62 
 testenv/Makefile.am   |   1 +
 testenv/Test-metalink-xml-continue.py | 185 ++
 4 files changed, 230 insertions(+), 22 deletions(-)
 create mode 100644 testenv/Test-metalink-xml-continue.py

diff --git a/NEWS b/NEWS
index 0299418..6011250 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,10 @@ Please send GNU Wget bug reports to .
 
 * Changes in Wget X.Y.Z
 
+* When processing a Metalink file, with --continue resume partially
+  downloaded files and keep fully downloaded files even if they fail
+  the verification.
+
 * When processing a Metalink file, create the parent directories of a
   "path/file" destination file name:
   https://tools.ietf.org/html/rfc5854#section-4.1.2.1
diff --git a/src/metalink.c b/src/metalink.c
index 8294a7e..03a0bb1 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -96,12 +96,14 @@ retrieve_from_metalink (const metalink_t* metalink)
  1 -> verified successfully  */
   char sig_status = 0;
 
+  bool skip_mfile = false;
+
   output_stream = NULL;
 
   DEBUGP (("Processing metalink file %s...\n", quote (mfile->name)));
 
   /* Resources are sorted by priority.  */
-  for (mres_ptr = mfile->resources; *mres_ptr; mres_ptr++)
+  for (mres_ptr = mfile->resources; *mres_ptr && !skip_mfile; mres_ptr++)
 {
   metalink_resource_t *mres = *mres_ptr;
   metalink_checksum_t **mchksum_ptr, *mchksum;
@@ -117,26 +119,30 @@ retrieve_from_metalink (const metalink_t* metalink)
   continue;
 }
 
-  retr_err = METALINK_RETR_ERROR;
-
-  /* If output_stream is not NULL, then we have failed on
- previous resource and are retrying. Thus, rename/remove
- the file.  */
-  if (output_stream)
+  /* The file is fully downloaded, but some problems were
+ encountered (checksum failure?).  The loop had been
+ continued to switch to the next url.  */
+  if (output_stream && retr_err == RETROK)
 {
+  /* Do not rename/remove a continued file. Skip it.  */
+  if (opt.always_rest)
+{
+  skip_mfile = true;
+  continue;
+}
+
   fclose (output_stream);
   output_stream = NULL;
   badhash_or_remove (filename);
   xfree (filename);
 }
-  else if (filename)
+  else if (!output_stream && filename)
 {
-  /* Rename/remove the file downloaded previously before
- downloading it again.  */
-  badhash_or_remove (filename);
   xfree (filename);
 }
 
+  retr_err = METALINK_RETR_ERROR;
+
   /* Parse our resource URL.  */
   iri = iri_new ();
   set_uri_encoding (iri, opt.locale, true);
@@ -156,17 +162,29 @@ retrieve_from_metalink (const metalink_t* metalink)
   /* Avoid recursive Metalink from HTTP headers.  */
   bool _metalink_http = opt.metalink_over_http;
 
-  /* Assure proper local file name regardless of the URL
- of particular Metalink resource.
- To do that we create the local file here and put
- it as output_stream. We restore the original configuration
- after we are finished with the file.  */
-  if (opt.always_rest)
-/* continue previous download */
-output_stream = fopen (mfile->name, "ab");
+  /* If output_stream is not NULL, then we have failed on
+ previous resource and are retrying. Thus, continue
+ with the next resource.  Do not close output_stream
+ while iterating over the resources, or the download
+ progress will be lost.  */
+  if (output_stream)
+{
+  DEBUGP (("Previous resource failed, continue with next 
resource.\n"));
+}
   else
-/* create a file with an unique name */
-output_stream = 

[Bug-wget] [PATCH 20/27] Bugfix: Prevent sorting when there are less than two elements

2016-09-28 Thread Matthew White
* src/utils.c (stable_sort): Add condition nmemb > 1, sort only when
  there is more than one element

Prevent SIGSEGV.
---
 src/utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils.c b/src/utils.c
index 9ab1b90..dcf90a6 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -2428,7 +2428,7 @@ void
 stable_sort (void *base, size_t nmemb, size_t size,
  int (*cmpfun) (const void *, const void *))
 {
-  if (size > 1)
+  if (nmemb > 1 && size > 1)
 {
   void *temp = xmalloc (nmemb * size);
   mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
-- 
2.7.3




[Bug-wget] [PATCH 19/27] New: Parse Metalink/HTTP header for application/metalink4+xml

2016-09-28 Thread Matthew White
* src/http.c (metalink_from_http): Parse Metalink/HTTP header for
  metaurls application/metalink4+xml media types
* src/metalink.h: Add function declaration metalink_meta_cmp()
* src/metalink.c: Add function metalink_meta_cmp() compare metalink
  metaurls priorities

Add Metalink/HTTP application/metalink4+xml media types as metaurls to
the metalink variable that will be used to download the files.
---
 src/http.c | 71 --
 src/metalink.c |  9 
 src/metalink.h |  1 +
 3 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/src/http.c b/src/http.c
index 7e2c4ec..e6af7c1 100644
--- a/src/http.c
+++ b/src/http.c
@@ -2555,7 +2555,7 @@ metalink_from_http (const struct response *resp, const 
struct http_stat *hs,
   metalink_t *metalink = NULL;
   metalink_file_t *mfile = xnew0 (metalink_file_t);
   const char *val_beg, *val_end;
-  int res_count = 0, hash_count = 0, sig_count = 0, i;
+  int res_count = 0, meta_count = 0, hash_count = 0, sig_count = 0, i;
 
   DEBUGP (("Checking for Metalink in HTTP response\n"));
 
@@ -2568,6 +2568,7 @@ metalink_from_http (const struct response *resp, const 
struct http_stat *hs,
   /* Begin with 1-element array (for 0-termination). */
   mfile->checksums = xnew0 (metalink_checksum_t *);
   mfile->resources = xnew0 (metalink_resource_t *);
+  mfile->metaurls = xnew0 (metalink_metaurl_t *);
 
   /* Find all Link headers.  */
   for (i = 0;
@@ -2628,14 +2629,14 @@ metalink_from_http (const struct response *resp, const 
struct http_stat *hs,
   DEBUGP (("URL=%s\n", urlstr));
   DEBUGP (("rel=%s\n", rel));
 
+  if (!strcmp (rel, "describedby"))
+find_key_value (attrs_beg, val_end, "type", );
+
   /* Handle signatures.
  Libmetalink only supports one signature per file. Therefore we stop
  as soon as we successfully get first supported signature.  */
   if (sig_count == 0 &&
-  !strcmp (rel, "describedby") &&
-  find_key_value (attrs_beg, val_end, "type", ) &&
-  !strcmp (reltype, "application/pgp-signature")
-  )
+  reltype && !strcmp (reltype, "application/pgp-signature"))
 {
   /* Download the signature to a temporary file.  */
   FILE *_output_stream = output_stream;
@@ -2801,6 +2802,60 @@ metalink_from_http (const struct response *resp, const 
struct http_stat *hs,
   res_count++;
 }
 } /* Handle resource link (rel=duplicate).  */
+
+  /* Handle Metalink/XML resources.  */
+  else if (reltype && !strcmp (reltype, "application/metalink4+xml"))
+{
+  metalink_metaurl_t murl = {0};
+  char *pristr;
+
+  /*
+ Valid ranges for the "pri" attribute are from
+ 1 to 99.  Mirror servers with a lower value of the "pri"
+ attribute have a higher priority, while mirrors with an undefined
+ "pri" attribute are considered to have a value of 99, which is
+ the lowest priority.
+
+ rfc6249 section 3.1
+   */
+  murl.priority = DEFAULT_PRI;
+  if (find_key_value (url_end, val_end, "pri", ))
+{
+  long pri;
+  char *end_pristr;
+  /* Do not care for errno since 0 is error in this case.  */
+  pri = strtol (pristr, _pristr, 10);
+  if (end_pristr != pristr + strlen (pristr) ||
+  !VALID_PRI_RANGE (pri))
+{
+  /* This is against the specification, so let's inform the 
user.  */
+  logprintf (LOG_NOTQUIET,
+ _("Invalid pri value. Assuming %d.\n"),
+ DEFAULT_PRI);
+}
+  else
+murl.priority = pri;
+  xfree (pristr);
+}
+
+  murl.mediatype = xstrdup (reltype);
+
+  DEBUGP (("MEDIATYPE=%s\n", murl.mediatype));
+
+  /* At this point we have validated the new resource.  */
+
+  find_key_value (url_end, val_end, "name", );
+
+  murl.url = urlstr;
+  urlstr = NULL;
+
+  /* 1 slot from new resource, 1 slot for null-termination.  */
+  mfile->metaurls = xrealloc (mfile->metaurls,
+   sizeof (metalink_metaurl_t *) * 
(meta_count + 2));
+  mfile->metaurls[meta_count] = xnew0 (metalink_metaurl_t);
+  *mfile->metaurls[meta_count] = murl;
+  meta_count++;
+} /* Handle resource link (rel=describedby).  */
   else
 DEBUGP (("This link header was not used for Metalink\n"));
 
@@ -2811,8 +2866,9 @@ metalink_from_http (const struct response *resp, const 
struct http_stat *hs,
 
   /* Null-terminate resources array.  */
   mfile->resources[res_count] = 0;
+  mfile->metaurls[meta_count] = 0;
 
-  if (res_count == 0)
+  if (res_count == 0 && meta_count == 0)
 {
   

[Bug-wget] [PATCH 18/27] New test: Metalink shall not concatenate '/' to an empty directory prefix

2016-09-28 Thread Matthew White
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-xml-emptyprefix-trust.py: New file.
  Metalink/XML empty directory prefix (--directory-prefix '') tests

Detect a '/' character wrongfully concatenated to an empty directory
prefix '' (not NULL), resulting in an absolute path as '/dir/file',
instead than 'dir/file'.
---
 testenv/Makefile.am|   1 +
 testenv/Test-metalink-xml-emptyprefix-trust.py | 193 +
 2 files changed, 194 insertions(+)
 create mode 100755 testenv/Test-metalink-xml-emptyprefix-trust.py

diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index 32b3db9..b6bad8d 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -46,6 +46,7 @@ if METALINK_IS_ENABLED
 Test-metalink-xml-relprefix-trust.py\
 Test-metalink-xml-absprefix-trust.py\
 Test-metalink-xml-homeprefix-trust.py   \
+Test-metalink-xml-emptyprefix-trust.py  \
 Test-metalink-xml-size.py   \
 Test-metalink-xml-nourls.py \
 Test-metalink-xml-urlbreak.py
diff --git a/testenv/Test-metalink-xml-emptyprefix-trust.py 
b/testenv/Test-metalink-xml-emptyprefix-trust.py
new file mode 100755
index 000..4831752
--- /dev/null
+++ b/testenv/Test-metalink-xml-emptyprefix-trust.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+
+"""
+This is to test Metalink/XML with an empty directory prefix.
+
+With --trust-server-names, trust the metalink:file names.
+
+Without --trust-server-names, don't trust the metalink:file names:
+use the basename of --input-metalink, and add a sequential number
+(e.g. .#1, .#2, etc.).
+
+Strip the directory from unsafe paths.
+"""
+# File Definitions ###
+bad = "Ouch!"
+
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+
+File2 = "This is gonna be good"
+File2_lowPref = "Not this one too"
+File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
+
+File3 = "A little more, please"
+File3_lowPref = "That's just too much"
+File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest ()
+
+File4 = "Maybe a biscuit?"
+File4_lowPref = "No, thanks"
+File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest ()
+
+File5 = "More Tea...?"
+File5_lowPref = "I have to go..."
+File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest ()
+
+MetaXml = \
+"""
+http://www.metalinker.org/;>
+  
+GNU Wget
+  
+  
+GNU GPL
+http://www.gnu.org/licenses/gpl.html
+  
+  Wget Test Files
+  1.2.3
+  Wget Test Files description
+  
+
+  
+{{FILE1_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+ 
+  
+{{FILE2_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File2
+  
+
+ 
+  
+{{FILE3_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File3
+  
+
+ 
+  
+{{FILE4_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File4
+  
+
+
+  
+{{FILE5_HASH}}
+  
+  
+http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+http://{{SRV_HOST}}:{{SRV_PORT}}/404
+http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File5
+  
+
+  
+
+"""
+
+wrong_file = WgetFile ("wrong_file", bad)
+
+File1_orig = WgetFile ("File1", File1)
+File1_down = WgetFile ("subdir/File1", File1)
+File1_nono = WgetFile ("File1_lowPref", File1_lowPref)
+
+# rejected by libmetalink
+File2_orig = WgetFile ("File2", File2)
+File2_nono = WgetFile ("File2_lowPref", File2_lowPref)
+
+# rejected by libmetalink
+File3_orig = WgetFile ("File3", File3)
+File3_nono = WgetFile ("File3_lowPref", File3_lowPref)
+
+# rejected by libmetalink
+File4_orig = WgetFile ("File4", File4)
+File4_nono = WgetFile ("File4_lowPref", File4_lowPref)
+
+File5_orig = WgetFile ("File5", File5)
+File5_down = WgetFile ("subdir/File5", File5)
+File5_nono = WgetFile ("File5_lowPref", File5_lowPref)
+
+MetaFile = WgetFile ("test.metalink", MetaXml)
+
+WGET_OPTIONS = "--trust-server-names 

[Bug-wget] [PATCH 03/27] Use python .replace instead than re.sub in Metalink tests

2016-09-28 Thread Matthew White
* testenv/Test-metalink-http.py: Use python .replace
* testenv/Test-metalink-xml.py: Use python .replace
* testenv/Test-metalink-xml-abspath.py: Use python .replace
* testenv/Test-metalink-xml-relpath.py: Use python .replace

Use python .replace instead than re.sub, remove 'import re'.
---
 testenv/Test-metalink-http.py| 7 +++
 testenv/Test-metalink-xml-abspath.py | 7 +++
 testenv/Test-metalink-xml-relpath.py | 7 +++
 testenv/Test-metalink-xml.py | 7 +++
 4 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/testenv/Test-metalink-http.py b/testenv/Test-metalink-http.py
index 993642b..b119da6 100755
--- a/testenv/Test-metalink-http.py
+++ b/testenv/Test-metalink-http.py
@@ -2,7 +2,6 @@
 from sys import exit
 from test.http_test import HTTPTest
 from misc.wget_file import WgetFile
-import re
 import hashlib
 from base64 import b64encode
 
@@ -12,9 +11,9 @@ from base64 import b64encode
 
 # Helper function for hostname, port and digest substitution
 def SubstituteServerInfo (text, host, port, digest):
-text = re.sub (r'{{FILE1_HASH}}', digest, text)
-text = re.sub (r'{{SRV_HOST}}', host, text)
-text = re.sub (r'{{SRV_PORT}}', str (port), text)
+text = text.replace('{{FILE1_HASH}}', digest)
+text = text.replace('{{SRV_HOST}}', host)
+text = text.replace('{{SRV_PORT}}', str (port))
 return text
 
 # File Definitions ###
diff --git a/testenv/Test-metalink-xml-abspath.py 
b/testenv/Test-metalink-xml-abspath.py
index b618ca0..4447b0e 100755
--- a/testenv/Test-metalink-xml-abspath.py
+++ b/testenv/Test-metalink-xml-abspath.py
@@ -2,7 +2,6 @@
 from sys import exit
 from test.http_test import HTTPTest
 from misc.wget_file import WgetFile
-import re
 import hashlib
 
 """
@@ -77,9 +76,9 @@ http_test.server_setup()
 ### Get and use dynamic server sockname
 srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
 
-MetaXml = re.sub (r'{{FILE1_HASH}}', File1_sha256, MetaXml)
-MetaXml = re.sub (r'{{SRV_HOST}}', srv_host, MetaXml)
-MetaXml = re.sub (r'{{SRV_PORT}}', str (srv_port), MetaXml)
+MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256)
+MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host)
+MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port))
 MetaFile.content = MetaXml
 
 err = http_test.begin ()
diff --git a/testenv/Test-metalink-xml-relpath.py 
b/testenv/Test-metalink-xml-relpath.py
index 5ecb2b4..c565302 100755
--- a/testenv/Test-metalink-xml-relpath.py
+++ b/testenv/Test-metalink-xml-relpath.py
@@ -2,7 +2,6 @@
 from sys import exit
 from test.http_test import HTTPTest
 from misc.wget_file import WgetFile
-import re
 import hashlib
 
 """
@@ -77,9 +76,9 @@ http_test.server_setup()
 ### Get and use dynamic server sockname
 srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
 
-MetaXml = re.sub (r'{{FILE1_HASH}}', File1_sha256, MetaXml)
-MetaXml = re.sub (r'{{SRV_HOST}}', srv_host, MetaXml)
-MetaXml = re.sub (r'{{SRV_PORT}}', str (srv_port), MetaXml)
+MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256)
+MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host)
+MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port))
 MetaFile.content = MetaXml
 
 err = http_test.begin ()
diff --git a/testenv/Test-metalink-xml.py b/testenv/Test-metalink-xml.py
index 2541a38..fdeede6 100755
--- a/testenv/Test-metalink-xml.py
+++ b/testenv/Test-metalink-xml.py
@@ -2,7 +2,6 @@
 from sys import exit
 from test.http_test import HTTPTest
 from misc.wget_file import WgetFile
-import re
 import hashlib
 
 """
@@ -77,9 +76,9 @@ http_test.server_setup()
 ### Get and use dynamic server sockname
 srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
 
-MetaXml = re.sub (r'{{FILE1_HASH}}', File1_sha256, MetaXml)
-MetaXml = re.sub (r'{{SRV_HOST}}', srv_host, MetaXml)
-MetaXml = re.sub (r'{{SRV_PORT}}', str (srv_port), MetaXml)
+MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256)
+MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host)
+MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port))
 MetaFile.content = MetaXml
 
 err = http_test.begin ()
-- 
2.7.3




[Bug-wget] [PATCH 14/27] New: Metalink file size mismatch returns error code METALINK_SIZE_ERROR

2016-09-28 Thread Matthew White
* src/wget.h (uerr_t): Add error code METALINK_SIZE_ERROR to enum
* src/metalink.c (retrieve_from_metalink): Use boolean variable
  size_ok, when false set retr_err to METALINK_SIZE_ERROR
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-xml-size.py: New file. Metalink/XML file size
  tests ()

Before this patch, no appropriate error code was returned to inform a
file size mismatch.

This patch introduces the error code METALINK_SIZE_ERROR to inform a
file size mismatch.
---
 src/metalink.c|  11 +++
 src/wget.h|   3 +-
 testenv/Makefile.am   |   3 +-
 testenv/Test-metalink-xml-size.py | 202 ++
 4 files changed, 217 insertions(+), 2 deletions(-)
 create mode 100755 testenv/Test-metalink-xml-size.py

diff --git a/src/metalink.c b/src/metalink.c
index b729450..5108a5e 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -102,6 +102,7 @@ retrieve_from_metalink (const metalink_t* metalink)
   char *basename = NULL;
   char *safename = NULL;
   char *destname = NULL;
+  bool size_ok = false;
   bool hash_ok = false;
 
   uerr_t retr_err = METALINK_MISSING_RESOURCE;
@@ -325,10 +326,12 @@ retrieve_from_metalink (const metalink_t* metalink)
   continue;
 }
 
+  size_ok = false;
   logprintf (LOG_VERBOSE, _("Computing size for %s\n"), quote 
(destname));
 
   if (!mfile->size)
 {
+  size_ok = true;
   logprintf (LOG_VERBOSE, _("File size not declared. Skipping 
check.\n"));
 }
   else
@@ -356,6 +359,7 @@ retrieve_from_metalink (const metalink_t* metalink)
 }
   else
 {
+  size_ok = true;
   logputs (LOG_VERBOSE, _("Size matches.\n"));
 }
 }
@@ -679,6 +683,13 @@ gpg_skip_verification:
   logprintf (LOG_VERBOSE, _("Failed to download %s. Skipping 
resource.\n"),
  quote (destname ? destname : safename));
 }
+  else if (!size_ok)
+{
+  retr_err = METALINK_SIZE_ERROR;
+  logprintf (LOG_NOTQUIET,
+ _("File %s retrieved but size does not match. "
+   "\n"), quote (destname));
+}
   else if (!hash_ok)
 {
   retr_err = METALINK_CHKSUM_ERROR;
diff --git a/src/wget.h b/src/wget.h
index eacf725..ed57245 100644
--- a/src/wget.h
+++ b/src/wget.h
@@ -361,7 +361,8 @@ typedef enum
   TIMECONV_ERR,
   METALINK_PARSE_ERROR, METALINK_RETR_ERROR,
   METALINK_CHKSUM_ERROR, METALINK_SIG_ERROR, METALINK_MISSING_RESOURCE,
-  RETR_WITH_METALINK
+  RETR_WITH_METALINK,
+  METALINK_SIZE_ERROR
 } uerr_t;
 
 /* 2005-02-19 SMS.
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index f8c3da9..b671438 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -44,7 +44,8 @@ if METALINK_IS_ENABLED
 Test-metalink-xml-prefix-trust.py   \
 Test-metalink-xml-relprefix-trust.py\
 Test-metalink-xml-absprefix-trust.py\
-Test-metalink-xml-homeprefix-trust.py
+Test-metalink-xml-homeprefix-trust.py   \
+Test-metalink-xml-size.py
 else
   METALINK_TESTS =
 endif
diff --git a/testenv/Test-metalink-xml-size.py 
b/testenv/Test-metalink-xml-size.py
new file mode 100755
index 000..4c7ccb7
--- /dev/null
+++ b/testenv/Test-metalink-xml-size.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+
+"""
+This is to test Metalink/XML file size check in Wget.
+
+With --trust-server-names, trust the metalink:file names.
+
+Without --trust-server-names, don't trust the metalink:file names:
+use the basename of --input-metalink, and add a sequential number
+(e.g. .#1, .#2, etc.).
+
+Strip the directory from unsafe paths.
+"""
+# File Definitions ###
+bad = "Ouch!"
+
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+
+File2 = "This is gonna be good"
+File2_lowPref = "Not this one too"
+File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
+
+File3 = "A little more, please"
+File3_lowPref = "That's just too much"
+File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest ()
+
+File4 = "Maybe a biscuit?"
+File4_lowPref = "No, thanks"
+File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest ()
+
+File5 = "More Tea...?"
+File5_lowPref = "I have to go..."
+File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest ()
+File5_size = str (len (File5))
+
+MetaXml = \
+"""
+http://www.metalinker.org/;>
+  
+GNU Wget
+  
+  
+GNU GPL
+

[Bug-wget] [PATCH 05/27] Bugfix: Fix NULL filename and output_stream in Metalink module

2016-09-28 Thread Matthew White
* NEWS: Mention the Metalink "path/file" name format handling
* src/metalink.c (retrieve_from_metalink): Fix NULL filename, set
  filename to the right "path/file" value
* src/metalink.c (retrieve_from_metalink): Fix NULL output_stream, set
  output_stream to filename when it is created by retrieve_url()
* src/metalink.c (retrieve_from_metalink): Add RFC5854 comments about
  proper metalink:file "path/file" name format handling
* doc/metalink.txt: Update document. Remove resolved bugs

If unique_create() cannot create/open the destination file, filename
and output_stream remain NULL. If fopen() is used instead, filename
always remains NULL. Both functions cannot create "path/file" trees.

Setting filename to the right value is sufficient to prevent SIGSEGV
generating from testing a NULL value. This also allows retrieve_url()
to create a "path/file" tree through opt.output_document.

Reading NULL as output_stream, when it shall not be, leads to wrong
results. For instance, a non-NULL output_stream tells when a stream
was interrupted, reading NULL instead means to assume the contrary.

This patch conforms to the RFC5854 specification:
  The Metalink Download Description Format
  4.1.2.1.  The "name" Attribute
  https://tools.ietf.org/html/rfc5854#section-4.1.2.1
---
 NEWS |   5 ++
 doc/metalink.txt | 139 ++-
 src/metalink.c   |  30 +++-
 3 files changed, 119 insertions(+), 55 deletions(-)

diff --git a/NEWS b/NEWS
index 56c21a5..0299418 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,11 @@ Please send GNU Wget bug reports to .
 
 * Changes in Wget X.Y.Z
 
+* When processing a Metalink file, create the parent directories of a
+  "path/file" destination file name:
+  https://tools.ietf.org/html/rfc5854#section-4.1.2.1
+  https://tools.ietf.org/html/rfc5854#section-4.2.8.3
+
 * On a recursive download, append a .tmp suffix to temporary files
   that will be deleted after being parsed, and create them
   readable/writable only by the owner.
diff --git a/doc/metalink.txt b/doc/metalink.txt
index 31734a3..0f3706a 100644
--- a/doc/metalink.txt
+++ b/doc/metalink.txt
@@ -1,24 +1,26 @@
-GNU Wget Metalink module (--input-metalink)
+GNU Wget Metalink module
 
-  Evaluation of "Directory Options" on the command line
+  Evaluation of the Metalink/XML and Metalink/HTTP implementations
 
 
 1. Introduction
 ***
 
 This document, and the results contained in it, is focused over the
-testing of the metalink:file "path/file" name format.
+evaluation of the Metalink/XML and Metalink/HTTP implementations.
 
 The "Directory Options" mentioned here are used on the command line in
-conjunction with the option '--input-metalink=file':
+conjunction with the option '--input-metalink=file' for Metalink/XML,
+and '--metalink-over-http' for Metalink/HTTP.
 
-$ wget --input-metalink=file 
+$ wget --input-metalink= [directory options]
+$ wget --metalink-over-http [directory options] 
 
 2. Notes
 
 
-Tests containing a metalink:file "/path/file", "./path/file", or
-"../path/file" name shall be run manually due to security concerns.
+Tests for metalink:file names beginning with '/', '~/', './', or '../'
+(e.g. "/path/file") shall be run manually due to security concerns.
 
 3. Metalink files used as reference
 ***
@@ -47,17 +49,30 @@ EOF
 4.1 Implemented safety features
 ===
 
-Do not follow relative or absolute paths: "/path/file", "./path/file",
-and "../path/file" as metalink:file name formats are all ignored (wget
-refuses to start). The options --trust-server-names changes nothing.
+Any metalink:file name containing an absolute, relative, or home path
+(see '2. Notes') parsed from Metalink/XML files is rejected.
 
-4.2 Actual behaviour
-
+This is a libmetalink's design decision implemented in the function
+metalink_check_safe_path().  This feature shall not be modified.
 
-Given a metalink:file "path/file" name, if "path" exists, download
-"path/file", then compute its checksum. If "path" doesn't exist,
-download the url's file in the working directory; then the checksum
-fails: cannot find "path/file".
+All the above conform to the RFC5854 standard.
+
+References:
+ https://tools.ietf.org/html/rfc5854#section-4.1.2.1
+ https://tools.ietf.org/html/rfc5854#section-4.2.8.3
+
+4.2 File download behaviour
+===
+
+When a Metalink/XML file is parsed:
+1. create the metalink:file "path/file" tree;
+2. download the metalink:url file as "path/file";
+3. verify the "path/file" checksum.
+
+All the above conform to the RFC5854 standard.
+
+References:
+ https://tools.ietf.org/html/rfc5854
 
 4.3 Questionable behaviours
 ===
@@ -69,69 +84,85 @@ If more metalink:file elements are the same, wget downloads 
them all.
 
 The download is OK even when metalink:file size is wrong.
 
-5. Directory Options
+5. `wget 

[Bug-wget] [PATCH 04/27] Add metalink description

2016-09-28 Thread Matthew White
* doc/metalink.txt

Evaluation of "Directory Options" on the command line interacting with
the option '--input-metalink=file':

$ wget --input-metalink=file 
---
 doc/metalink.txt | 137 +++
 1 file changed, 137 insertions(+)
 create mode 100644 doc/metalink.txt

diff --git a/doc/metalink.txt b/doc/metalink.txt
new file mode 100644
index 000..31734a3
--- /dev/null
+++ b/doc/metalink.txt
@@ -0,0 +1,137 @@
+GNU Wget Metalink module (--input-metalink)
+
+  Evaluation of "Directory Options" on the command line
+
+
+1. Introduction
+***
+
+This document, and the results contained in it, is focused over the
+testing of the metalink:file "path/file" name format.
+
+The "Directory Options" mentioned here are used on the command line in
+conjunction with the option '--input-metalink=file':
+
+$ wget --input-metalink=file 
+
+2. Notes
+
+
+Tests containing a metalink:file "/path/file", "./path/file", or
+"../path/file" name shall be run manually due to security concerns.
+
+3. Metalink files used as reference
+***
+
+3.1 Test: metalink:file with "path/file" name format
+
+
+cat > test.meta4 << EOF
+
+
+  
+543
+d37d3965f8e1a7b16504b4273b09c392776b7e4dd17e601256c7b2fd9ce5f56e
+0f6ff5cdc15603f1b81227b5a296f001
+http://wrongurl.really/gnu/wget/wget-1.18.tar.xz.sig
+http://ftpmirror.gnu.org/wget/wget-1.18.tar.xz.sig
+http://ftp.gnu.org/gnu/wget/wget-1.18.tar.xz.sig
+http://nl.mirror.babylon.network/gnu/wget/wget-1.18.tar.xz.sig
+  
+
+EOF
+
+4. `wget --input-metalink=test.meta4`
+*
+
+4.1 Implemented safety features
+===
+
+Do not follow relative or absolute paths: "/path/file", "./path/file",
+and "../path/file" as metalink:file name formats are all ignored (wget
+refuses to start). The options --trust-server-names changes nothing.
+
+4.2 Actual behaviour
+
+
+Given a metalink:file "path/file" name, if "path" exists, download
+"path/file", then compute its checksum. If "path" doesn't exist,
+download the url's file in the working directory; then the checksum
+fails: cannot find "path/file".
+
+4.3 Questionable behaviours
+===
+
+If more metalink:file elements are the same, wget downloads them all.
+
+4.4 Bugs
+
+
+The download is OK even when metalink:file size is wrong.
+
+5. Directory Options
+
+
+'-nd'
+'--no-directories'
+
+Used alone has no effect (see `wget --input-metalink=test.meta4`).
+
+Used in conjunction with --recursive, given "path/file", if "path"
+exists, download "path/file" and compute its checksum.  If "path"
+doesnt' exist, download the url's file in the working directory,
+then the checksum fails: cannot find "path/file".
+
+'-x'
+'--force-directories'
+
+Given "path/file", if "path" exists, download "path/file", then
+compute its checksum.  If "path" doesn't exist, create the url
+hierarchy, then the checksum fails: cannot find "path/file".
+
+'-nH'
+'--no-host-directories'
+
+Given "path/file", if "path" exists, download "path/file", then
+compute its checksum.  If "path" doesn't exist, download the url's
+file in the working directory, then the checksum fails: cannot
+find "path/file"; in this context, if --force-directories is
+present, create the url hierarchy omitting the host component.
+
+'--protocol-directories'
+
+Used alone has no effect (see `wget --input-metalink=test.meta4`).
+
+In conjunction with --force-directories, use the protocol name as
+the first directory component (see --force-directories).
+
+'--cut-dirs=number'
+
+Used alone has no effect (see `wget --input-metalink=test.meta4`).
+
+In conjunction with --force-directories, ignore 'number' directory
+components after the domain (see --force-directories).
+
+'-P prefix'
+'--directory-prefix=prefix'
+
+This is buggy or non-intuitive.
+
+Given "path/file", and more metalink:url uris for the same file,
+if '-P path' is specified, the first url's file is downloaded as
+"path/", and the second url's file as "path/file". The
+first file fails the checksum: cannot find "path/file". The file
+"path/file" passes the checksum verification.
+
+Given "path/file", and more metalink:url uris for the same file,
+if '-P newp' is specified, all the urls' files are downloaded as
+"newp/. A suffix counter is added to the file names to
+not overwrite existing files. Then all the checksums fail: cannot
+find "path/file".
+
+Given "path/file", and more metalink:url uris for the same file,
+if '-P ../path' is specified, the same things as if '-P ../newp'
+or '-P newp' will happen, e.g. "newp/ and checksums
+failures.
+
+[write here more wrong things happening]
-- 
2.7.3




[Bug-wget] [PATCH 12/27] New document: Metalink/XML and Metalink/HTTP standard reference

2016-09-28 Thread Matthew White
* doc/metalink-standard.txt: New doc. Implemented and recommended
  Metalink/XML and Metalink/HTTP standard features
---
 doc/metalink-standard.txt | 156 ++
 1 file changed, 156 insertions(+)
 create mode 100644 doc/metalink-standard.txt

diff --git a/doc/metalink-standard.txt b/doc/metalink-standard.txt
new file mode 100644
index 000..d00c384
--- /dev/null
+++ b/doc/metalink-standard.txt
@@ -0,0 +1,156 @@
+GNU Wget Metalink recommended behaviour
+
+  Metalink/XML and Metalink/HTTP standard reference
+
+
+1. Security features
+
+
+Only metalink:file elements with safe "name" fields shall be accepted
+[1 #section-4.1.2.1]. If unsafe metalink:file elements are saved, any
+related test shall fail (see '2. Tests').
+
+By design, libmetalink rejects unsafe metalink:file elements [3]:
+* lib/metalink_helper.c (metalink_check_safe_path): Verify path
+
+1.1 Exceptions
+==
+
+The option --directory-prefix could allow to use an absolute, relative
+or home path.
+
+2. Tests
+
+
+Saving a file to an unexpected path poses a security problem. We must
+ensure that Wget's automated tests never modify the root and the home
+paths or descend/escalate to a relative path unexpectedly.
+
+2.1 Metalink/XML implemented tests
+==
+
+* testenv/Test-metalink-xml.py: Accept safe paths
+* testenv/Test-metalink-xml-abspath.py: Reject absolute paths
+* testenv/Test-metalink-xml-relpath.py: Reject relative paths
+* testenv/Test-metalink-xml-homepath.py: Reject home paths
+
+3. Download file name
+*
+
+Computing the file name to wrote from the followed urls only leads to
+uncertainty. Reason why an unique name shall be used. Respectively, it
+shall be the metalink:file "name" field for Metalink/XML and a derived
+cli's url for Metalink/HTTP.
+
+4. Metalink/XML
+***
+
+4.1 Example files
+=
+
+cat > bugus.meta4 << EOF
+
+
+  
+1617
+ecb3dff2648667513e31554b3ad054ccd89fce38e33367c9459ac3a285153742
+http://another.url/common_name
+http://ftpmirror.gnu.org/bash/bash-4.3-patches/bash43-001
+  
+  
+1594
+eee7cd7062ab29a9e4f02924d9c367264dcb8b162703f74ff6eb8f175a91502b
+http://another.url/again/common_name
+http://ftpmirror.gnu.org/bash/bash-4.3-patches/bash43-002
+  
+
+EOF
+
+4.2 Command line example
+
+
+$ wget --input-metalink=bogus.meta4
+
+4.3 Metalink/XML file parsing
+=
+
+The metalink xml file is parsed by one of the following libmetalink's
+functions [3], depending upon the library configured to use:
+* lib/libexpat_metalink_parser.c (metalink_parse_file): Expat [4]
+* lib/libxml2_metalink_parser.c (metalink_parse_file): Libxml2 [5]
+
+The result returned doesn't include unsafe metalink:file elements, as
+stated at point '1. Security features'.
+
+An empty result shall not be considered an error.  Parsing errors will
+be informed to the caller of libmetalink's metalink_parse_file().
+
+4.4 Saving files
+
+
+Fetched metalink:file elements shall be wrote using the unique "name"
+field as file name [1 #section-4.1.2.1].
+
+A metalink:file url's file name shall not substitute the "name" field,
+see '3. Download file name'.
+
+4.5 Multi-Source download
+=
+
+Parallel range requests are allowed [1 #section-1].
+
+5. Metalink/HTTP
+
+
+5.1 HTTP server
+===
+
+The local server http://127.0.0.1 is used as reference in the course
+of this chapter. Any server service capable of sending Metalink/HTTP
+header answers may be used.
+
+5.2 Command line example
+
+
+$ wget --metalink-over-http http://127.0.0.1/dir/file.ext
+
+5.3 Metalink/HTTP header answer
+===
+
+Link: http://ftpmirror.gnu.org/bash/bash-4.3-patches/bash43-001; 
rel=duplicate; pref; pri=2
+Link: http://another.url/common_name; rel=duplicate; pref; pri=1
+Digest: SHA-256=7LPf8mSGZ1E+MVVLOtBUzNifzjjjM2fJRZrDooUVN0I=
+
+5.4 Saving files
+
+
+When none of --output-document and/or --content-disposition is used,
+the file name to wrote is computed from the cli's url hierarchy. The
+purpose of the "Directory Options" is as usual, and the file name is
+the cli's url file name, see wget(1).
+
+The url followed to download the file shall not substitute the cli's
+url to compute the file name to wrote, see '3. Download file name'.
+
+5.5 Multi-Source download
+=
+
+Parallel range requests are allowed [2 #section-7].
+
+4. References
+*
+
+[1] The Metalink Download Description Format
+https://tools.ietf.org/html/rfc5854
+
+[2] Metalink/HTTP: Mirrors and Hashes
+https://tools.ietf.org/html/rfc6249
+
+[3] Libmetalink
+https://github.com/metalink-dev/libmetalink
+
+[4] Expat
+http://www.libexpat.org
+
+[5] Libxml2
+http://xmlsoft.org
-- 
2.7.3




[Bug-wget] [PATCH 11/27] Enforce Metalink file name verification, strip directory if necessary

2016-09-28 Thread Matthew White
* NEWS: Mention the use of a safe Metalink destination path
* src/metalink.h: Add declaration of functions get_metalink_basename(),
  last_component(), metalink_check_safe_path()
* src/metalink.c: Add directive #include "dosname.h"
* src/metalink.c: Add function get_metalink_basename() to return the
  basename of a file name, strip w32's drive letter prefixes
* src/metalink.c (retrieve_from_metalink): Enforce Metalink file name
  verification, if the file name is unsafe try its basename
* doc/metalink.txt: Update document. Explain --directory-prefix

The function get_metalink_basename() uses FILE_SYSTEM_PREFIX_LEN to
catch any 'C:D:file' (w32 environment), then it removes each drive
letter prefix, i.e. 'C:' and 'D:'.

Unsafe file names contain an absolute, relative, or home path.  Safe
paths can be verified by libmetalink's metalink_check_safe_path().
---
 NEWS |  7 +++
 doc/metalink.txt |  4 
 src/metalink.c   | 62 
 src/metalink.h   |  4 
 4 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index bfb3bef..72f8728 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,13 @@ Please send GNU Wget bug reports to .
 
 * Changes in Wget X.Y.Z
 
+* When processing a Metalink file, enforce a safe destination path.
+  Remove any drive letter prefix under w32, i.e. 'C:D:file'.  Call
+  libmetalink's metalink_check_safe_path() to prevent absolute,
+  relative, or home paths:
+  https://tools.ietf.org/html/rfc5854#section-4.1.2.1
+  https://tools.ietf.org/html/rfc5854#section-4.2.8.3
+
 * When processing a Metalink file, --directory-prefix= sets
   the top of the retrieval tree to prefix for Metalink downloads.
 
diff --git a/doc/metalink.txt b/doc/metalink.txt
index 94a07ba..9d9dea2 100644
--- a/doc/metalink.txt
+++ b/doc/metalink.txt
@@ -159,3 +159,7 @@ References:
 
 Set the top of the retrieval tree to prefix for both Metalink/XML
 and Metalink/HTTP downloads, see wget(1).
+
+If combining the prefix with the file name results in an absolute,
+relative, or home path, the directory components are stripped and
+only the basename is used. See '4.1 Implemented safety features'.
diff --git a/src/metalink.c b/src/metalink.c
index 3e03aee..e64504e 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -40,6 +40,7 @@ as that of the covered work.  */
 #include "sha1.h"
 #include "sha256.h"
 #include "sha512.h"
+#include "dosname.h"
 #include "xstrndup.h"
 #include "c-strcase.h"
 #include 
@@ -87,6 +88,8 @@ retrieve_from_metalink (const metalink_t* metalink)
   metalink_file_t *mfile = *mfile_ptr;
   metalink_resource_t **mres_ptr;
   char *filename = NULL;
+  char *basename = NULL;
+  char *safename = NULL;
   char *destname = NULL;
   bool hash_ok = false;
 
@@ -110,6 +113,23 @@ retrieve_from_metalink (const metalink_t* metalink)
 
   DEBUGP (("Processing metalink file %s...\n", quote (mfile->name)));
 
+  /* Enforce libmetalink's metalink_check_safe_path().  */
+  basename = get_metalink_basename (filename);
+  safename = metalink_check_safe_path (filename) ? filename : basename;
+
+  if (filename != safename)
+logprintf (LOG_NOTQUIET,
+   _("Unsafe metalink file %s. Stripping directory...\n"),
+   quote (filename));
+
+  if (!basename)
+{
+  logprintf (LOG_NOTQUIET,
+ _("Rejecting metalink file. Invalid basename.\n"));
+  xfree (filename);
+  continue;
+}
+
   /* Resources are sorted by priority.  */
   for (mres_ptr = mfile->resources; *mres_ptr && !skip_mfile; mres_ptr++)
 {
@@ -170,6 +190,12 @@ retrieve_from_metalink (const metalink_t* metalink)
   /* Avoid recursive Metalink from HTTP headers.  */
   bool _metalink_http = opt.metalink_over_http;
 
+  /* FIXME: could be useless.  */
+  if (strcmp (url->file, basename))
+logprintf (LOG_VERBOSE,
+   _("URL file name %s and Metalink file name %s are 
different.\n"),
+   quote_n (0, url->file), quote_n (1, basename));
+
   /* If output_stream is not NULL, then we have failed on
  previous resource and are retrying. Thus, continue
  with the next resource.  Do not close output_stream
@@ -188,10 +214,10 @@ retrieve_from_metalink (const metalink_t* metalink)
  after we are finished with the file.  */
   if (opt.always_rest)
 /* continue previous download */
-output_stream = fopen (filename, "ab");
+output_stream = fopen (safename, "ab");
   else
 /* create a file with an unique name */
-output_stream = unique_create (filename, true, );
+output_stream = 

[Bug-wget] [PATCH 01/27] Add two Metalink/XML tests

2016-09-28 Thread Matthew White
From: Tim Rühsen 

* testenv/Test-metalink-xml-abspath.py: Reject absolute paths
* testenv/Test-metalink-xml-relpath.py: Reject relative paths
* testenv/Makefile.am: Add both new files to metalink tests
---
 testenv/Makefile.am  |  6 ++-
 testenv/Test-metalink-xml-abspath.py | 87 
 testenv/Test-metalink-xml-relpath.py | 87 
 3 files changed, 178 insertions(+), 2 deletions(-)
 create mode 100755 testenv/Test-metalink-xml-abspath.py
 create mode 100755 testenv/Test-metalink-xml-relpath.py

diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index deef18e..b68ef8f 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -27,8 +27,10 @@
 
 
 if METALINK_IS_ENABLED
-  METALINK_TESTS = Test-metalink-xml.py \
-Test-metalink-http.py
+  METALINK_TESTS = Test-metalink-http.py\
+Test-metalink-xml.py\
+Test-metalink-xml-relpath.py\
+Test-metalink-xml-abspath.py
 else
   METALINK_TESTS =
 endif
diff --git a/testenv/Test-metalink-xml-abspath.py 
b/testenv/Test-metalink-xml-abspath.py
new file mode 100755
index 000..62aabb9
--- /dev/null
+++ b/testenv/Test-metalink-xml-abspath.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import re
+import hashlib
+
+"""
+This is to test if Metalink XML file escapes current directory.
+"""
+# File Definitions ###
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+MetaXml = \
+"""
+http://www.metalinker.org/;>
+  
+GNU Wget
+  
+  
+GNU GPL
+http://www.gnu.org/licenses/gpl.html
+  
+  Wget Test File 1
+  1.2.3
+  Wget Test File 1 description
+  
+
+  
+{{FILE1_HASH}}
+  
+  
+http://broken.example/File1
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+  
+
+"""
+
+A_File = WgetFile ("File1", File1)
+B_File = WgetFile ("File1_lowPref", File1_lowPref)
+MetaFile = WgetFile ("test.meta4", MetaXml)
+
+WGET_OPTIONS = "--input-metalink test.meta4"
+WGET_URLS = [[]]
+
+Files = [[A_File, B_File]]
+Existing_Files = [MetaFile]
+
+ExpectedReturnCode = 0
+ExpectedDownloadedFiles = [MetaFile]
+
+ Pre and Post Test Hooks #
+pre_test = {
+"ServerFiles"   : Files,
+"LocalFiles": Existing_Files
+}
+test_options = {
+"WgetCommands"  : WGET_OPTIONS,
+"Urls"  : WGET_URLS
+}
+post_test = {
+"ExpectedFiles" : ExpectedDownloadedFiles,
+"ExpectedRetcode"   : ExpectedReturnCode
+}
+
+http_test = HTTPTest (
+pre_hook=pre_test,
+test_params=test_options,
+post_hook=post_test,
+)
+
+http_test.server_setup()
+### Get and use dynamic server sockname
+srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
+
+MetaXml = re.sub (r'{{FILE1_HASH}}', File1_sha256, MetaXml)
+MetaXml = re.sub (r'{{SRV_HOST}}', srv_host, MetaXml)
+MetaXml = re.sub (r'{{SRV_PORT}}', str (srv_port), MetaXml)
+MetaFile.content = MetaXml
+
+err = http_test.begin ()
+
+exit (err)
diff --git a/testenv/Test-metalink-xml-relpath.py 
b/testenv/Test-metalink-xml-relpath.py
new file mode 100755
index 000..041d772
--- /dev/null
+++ b/testenv/Test-metalink-xml-relpath.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import re
+import hashlib
+
+"""
+This is to test if Metalink XML file escapes current directory.
+"""
+# File Definitions ###
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+MetaXml = \
+"""
+http://www.metalinker.org/;>
+  
+GNU Wget
+  
+  
+GNU GPL
+http://www.gnu.org/licenses/gpl.html
+  
+  Wget Test File 1
+  1.2.3
+  Wget Test File 1 description
+  
+
+  
+{{FILE1_HASH}}
+  
+  
+http://broken.example/File1
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
+http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+  
+
+  
+
+"""
+
+A_File = WgetFile ("File1", File1)
+B_File = WgetFile ("File1_lowPref", File1_lowPref)
+MetaFile = WgetFile ("test.meta4", MetaXml)
+
+WGET_OPTIONS = "--input-metalink test.meta4"
+WGET_URLS = [[]]
+
+Files = [[A_File, B_File]]
+Existing_Files = [MetaFile]
+
+ExpectedReturnCode = 0
+ExpectedDownloadedFiles = [MetaFile]
+
+ Pre and Post Test Hooks #
+pre_test = {
+"ServerFiles"   : Files,
+"LocalFiles": 

[Bug-wget] [PATCH 02/27] Fix: Change Metalink/XML v3 file name into test.metalink

2016-09-28 Thread Matthew White
* testenv/Test-metalink-xml-abspath.py: Change Metalink/XML v3 file
  name from test.meta4 into test.metalink
* testenv/Test-metalink-xml-relpath.py: Change Metalink/XML v3 file
  name from test.meta4 into test.metalink
* testenv/Test-metalink-xml.py: Change Metalink/XML v3 file name from
  test.meta4 into test.metalink
---
 testenv/Test-metalink-xml-abspath.py | 4 ++--
 testenv/Test-metalink-xml-relpath.py | 4 ++--
 testenv/Test-metalink-xml.py | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/testenv/Test-metalink-xml-abspath.py 
b/testenv/Test-metalink-xml-abspath.py
index 62aabb9..b618ca0 100755
--- a/testenv/Test-metalink-xml-abspath.py
+++ b/testenv/Test-metalink-xml-abspath.py
@@ -42,9 +42,9 @@ MetaXml = \
 
 A_File = WgetFile ("File1", File1)
 B_File = WgetFile ("File1_lowPref", File1_lowPref)
-MetaFile = WgetFile ("test.meta4", MetaXml)
+MetaFile = WgetFile ("test.metalink", MetaXml)
 
-WGET_OPTIONS = "--input-metalink test.meta4"
+WGET_OPTIONS = "--input-metalink test.metalink"
 WGET_URLS = [[]]
 
 Files = [[A_File, B_File]]
diff --git a/testenv/Test-metalink-xml-relpath.py 
b/testenv/Test-metalink-xml-relpath.py
index 041d772..5ecb2b4 100755
--- a/testenv/Test-metalink-xml-relpath.py
+++ b/testenv/Test-metalink-xml-relpath.py
@@ -42,9 +42,9 @@ MetaXml = \
 
 A_File = WgetFile ("File1", File1)
 B_File = WgetFile ("File1_lowPref", File1_lowPref)
-MetaFile = WgetFile ("test.meta4", MetaXml)
+MetaFile = WgetFile ("test.metalink", MetaXml)
 
-WGET_OPTIONS = "--input-metalink test.meta4"
+WGET_OPTIONS = "--input-metalink test.metalink"
 WGET_URLS = [[]]
 
 Files = [[A_File, B_File]]
diff --git a/testenv/Test-metalink-xml.py b/testenv/Test-metalink-xml.py
index 1659165..2541a38 100755
--- a/testenv/Test-metalink-xml.py
+++ b/testenv/Test-metalink-xml.py
@@ -42,9 +42,9 @@ MetaXml = \
 
 A_File = WgetFile ("File1", File1)
 B_File = WgetFile ("File1_lowPref", File1_lowPref)
-MetaFile = WgetFile ("test.meta4", MetaXml)
+MetaFile = WgetFile ("test.metalink", MetaXml)
 
-WGET_OPTIONS = "--input-metalink test.meta4"
+WGET_OPTIONS = "--input-metalink test.metalink"
 WGET_URLS = [[]]
 
 Files = [[A_File, B_File]]
-- 
2.7.3




[Bug-wget] [PATCH v2 01/27] new Metalink functionalities

2016-09-28 Thread Matthew White
Series of patches to implement new Metalink functionalities.

In response to Giuseppe 
http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00127.html , here you 
find my revised series of patches.

Posting with `git send-email` as requested.

This series of patches supersedes the following:

[Patch 01/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00017.html
[Patch 02/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00018.html
[Patch 03/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00019.html
[Patch 04/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00020.html
[Patch 05/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00021.html
[Patch 06/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00022.html
[Patch 07/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00023.html
[Patch 08/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00024.html
[Patch 09/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00025.html
[Patch 10/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00026.html
[Patch 11/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00027.html
[Patch 12/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00028.html
[Patch 13/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00029.html
[Patch 14/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00030.html
[Patch 15/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00031.html
[Patch 16/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00032.html
[Patch 17/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00033.html
[Patch 18/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00034.html
[Patch 19/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00035.html
[Patch 20/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00036.html
[Patch 21/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00037.html
[Patch 22/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00038.html
[Patch 23/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00039.html
[Patch 24/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00040.html
[Patch 25/25] http://lists.gnu.org/archive/html/bug-wget/2016-09/msg00041.html

Regards,
Matthew




Re: [Bug-wget] bug #45790: wget prints it's progress even when background

2016-09-28 Thread Piotr
I would like to avoid forcing users to hack like this ;).
Wget should print to std* when in fg and print to wget.log when in bg, no 
matter how user gets there.
I don't think getpgrp() == tcgetpgrp(STDOUT_FILENO) is heavy and should probaby 
be ok to check it when printing lines.

Piotr

28 wrz 2016 17:47 wor...@alum.mit.edu napisał(a): > > "Wajda, Piotr" writes: > 
> The case with stopping wget is obvious. CTRL+Z and bg should make wget > > 
write to file and I can catch bg with SIGCONT. > > But I wonder what to do when 
after CTRL+Z and bg, user runs fg. In this > > case there's no signal between 
bg anf fg, > > Though the user could, instead of just "fg", do "fg", then 
Ctrl-Z, then > "fg" again.  The second "fg" would cause a SIGCONT, and wget 
could at > that point theck that it had been foregrounded.  Not elegant, but 
fairly > simple. > > Dale

Re: [Bug-wget] bug #45790: wget prints it's progress even when background

2016-09-28 Thread Dale R. Worley
"Wajda, Piotr"  writes:
> The case with stopping wget is obvious. CTRL+Z and bg should make wget 
> write to file and I can catch bg with SIGCONT.
> But I wonder what to do when after CTRL+Z and bg, user runs fg. In this 
> case there's no signal between bg anf fg,

Though the user could, instead of just "fg", do "fg", then Ctrl-Z, then
"fg" again.  The second "fg" would cause a SIGCONT, and wget could at
that point theck that it had been foregrounded.  Not elegant, but fairly
simple.

Dale