[Coverity Scan is ok, make syntax-check is ok, make check-valgrind is ok, 
contrib/check-hard is ok]

This fixes problems introduced by processing a Metalink/XML file containing 
metalink:url strings separated by CRLF.

The following description is verbatim from the patch:
-----
White spaces and CRLF are not automatically removed by libmetalink
from url strings. The Wget's Metalink module was unable to process
such url strings. This patch implements the processing of such url
strings cleaning off leading and trailing white spaces and CRLF.

If a parsed Metalink/XML url string contains strings separated by
CRLF, only the first of the series is accepted.
-----

Regards,
Matthew

-- 
Matthew White <[email protected]>
>From d213fedd77668d6be891b9be4cd8de4b077a2c49 Mon Sep 17 00:00:00 2001
From: Matthew White <[email protected]>
Date: Tue, 23 Aug 2016 21:59:15 +0200
Subject: [PATCH 15/25] Bugfix: Process Metalink/XML url strings containing
 white spaces and CRLF

* src/metalink.h: Add declaration of function clean_metalink_string()
* src/metalink.c: Add directive #include "xmemdup0.h"
* src/metalink.c: Add function clean_metalink_string() remove leading
  and trailing white spaces and CRLF from string
* src/metalink.c (retrieve_from_metalink): Remove leading and trailing
  white spaces and CRLF from url resource mres->url
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-xml-urlbreak.py: New test. Metalink/XML white
  spaces and CRLF in url resources tests

White spaces and CRLF are not automatically removed by libmetalink
from url strings. The Wget's Metalink module was unable to process
such url strings. This patch implements the processing of such url
strings cleaning off leading and trailing white spaces and CRLF.

If a parsed Metalink/XML url string contains strings separated by
CRLF, only the first of the series is accepted.
---
 src/metalink.c                        |  43 ++++++
 src/metalink.h                        |   1 +
 testenv/Makefile.am                   |   3 +-
 testenv/Test-metalink-xml-urlbreak.py | 237 ++++++++++++++++++++++++++++++++++
 4 files changed, 283 insertions(+), 1 deletion(-)
 create mode 100755 testenv/Test-metalink-xml-urlbreak.py

diff --git a/src/metalink.c b/src/metalink.c
index 9967705..01d824b 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -40,6 +40,7 @@ as that of the covered work.  */
 #include "sha1.h"
 #include "sha256.h"
 #include "sha512.h"
+#include "xmemdup0.h"
 #include "xstrndup.h"
 #include "c-strcase.h"
 #include <errno.h>
@@ -196,6 +197,8 @@ retrieve_from_metalink (const metalink_t* metalink)
           struct url *url;
           int url_err;
 
+          clean_metalink_string (&mres->url);
+
           if (!RES_TYPE_SUPPORTED (mres->type))
             {
               logprintf (LOG_VERBOSE,
@@ -776,6 +779,46 @@ append_suffix_number (char **str, const char *sep, wgint num)
   *str = new;
 }
 
+/*
+  Remove the string's trailing/leading whitespaces and line breaks.
+
+  The string is permanently modified.
+*/
+void
+clean_metalink_string (char **str)
+{
+  int c;
+  size_t len;
+  char *new, *beg, *end;
+
+  if (!str || !*str)
+    return;
+
+  beg = *str;
+
+  while ((c = *beg) && (c == '\n' || c == '\r' || c == '\t' || c == ' '))
+    beg++;
+
+  end = beg;
+
+  /* To not truncate a string containing spaces, search the first '\r'
+     or '\n' which ipotetically marks the end of the string.  */
+  while ((c = *end) && (c != '\r') && (c != '\n'))
+    end++;
+
+  /* If we are at the end of the string, search the first legit
+     character going backward.  */
+  if (*end == '\0')
+    while ((c = *(end - 1)) && (c == '\n' || c == '\r' || c == '\t' || c == ' '))
+      end--;
+
+  len = end - beg;
+
+  new = xmemdup0 (beg, len);
+  xfree (*str);
+  *str = new;
+}
+
 /* Append the suffix ".badhash" to the file NAME, except without
    overwriting an existing file with that name and suffix.  */
 void
diff --git a/src/metalink.h b/src/metalink.h
index 0cfc948..3244b83 100644
--- a/src/metalink.h
+++ b/src/metalink.h
@@ -51,6 +51,7 @@ int metalink_check_safe_path(const char *path);
 
 char *get_metalink_basename (char *name);
 void append_suffix_number (char **str, const char *sep, wgint num);
+void clean_metalink_string (char **str);
 void badhash_suffix (char *name);
 void badhash_or_remove (char *name);
 
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index 3d9ea44..a896cca 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -46,7 +46,8 @@ if METALINK_IS_ENABLED
     Test-metalink-xml-homeprefix-trust.py           \
     Test-metalink-xml-continue.py                   \
     Test-metalink-xml-size.py                       \
-    Test-metalink-xml-nourls.py
+    Test-metalink-xml-nourls.py                     \
+    Test-metalink-xml-urlbreak.py
 else
   METALINK_TESTS =
 endif
diff --git a/testenv/Test-metalink-xml-urlbreak.py b/testenv/Test-metalink-xml-urlbreak.py
new file mode 100755
index 0000000..ae6107e
--- /dev/null
+++ b/testenv/Test-metalink-xml-urlbreak.py
@@ -0,0 +1,237 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import re
+import hashlib
+
+"""
+    This is to test Metalink/XML white spaces in url resources.
+
+    With --trust-server-names, trust the metalink:file names.
+
+    Without --trust-server-names, don't trust the metalink:file names:
+    use the basename of --input-metalink, and add a sequential number
+    (e.g. .#1, .#2, etc.).
+
+    Strip the directory from unsafe paths.
+"""
+############# File Definitions ###############################################
+bad = "Ouch!"
+
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+
+File2 = "This is gonna be good"
+File2_lowPref = "Not this one too"
+File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
+
+File3 = "A little more, please"
+File3_lowPref = "That's just too much"
+File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest ()
+
+File4 = "Maybe a biscuit?"
+File4_lowPref = "No, thanks"
+File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest ()
+
+File5 = "More Tea...?"
+File5_lowPref = "I have to go..."
+File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest ()
+
+MetaXml = \
+"""<?xml version="1.0" encoding="utf-8"?>
+<metalink version="3.0" xmlns="http://www.metalinker.org/";>
+  <publisher>
+    <name>GNU Wget</name>
+  </publisher>
+  <license>
+    <name>GNU GPL</name>
+    <url>http://www.gnu.org/licenses/gpl.html</url>
+  </license>
+  <identity>Wget Test Files</identity>
+  <version>1.2.3</version>
+  <description>Wget Test Files description</description>
+  <files>
+    <file name="File1">
+      <verification>
+        <hash type="sha256">{{FILE1_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" preference="35">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+        </url>
+        <url type="http" preference="40">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/404
+        </url>
+        <url type="http" preference="25">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref
+        </url>
+        <url type="http" preference="30">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File1
+        </url>
+      </resources>
+    </file>
+    <file name="File2">
+      <verification>
+        <hash type="sha256">{{FILE2_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" preference="35">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+        </url>
+        <url type="http" preference="40">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/404
+        </url>
+        <url type="http" preference="25">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref
+        </url>
+        <url type="http" preference="30">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File2
+        </url>
+      </resources>
+    </file>
+    <file name="File3">
+      <verification>
+        <hash type="sha256">{{FILE3_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" preference="35">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+        </url>
+        <url type="http" preference="40">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/404
+        </url>
+        <url type="http" preference="25">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref
+        </url>
+        <url type="http" preference="30">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File3
+        </url>
+      </resources>
+    </file>
+    <file name="File4">
+      <verification>
+        <hash type="sha256">{{FILE4_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" preference="35">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+        </url>
+        <url type="http" preference="40">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/404
+        </url>
+        <url type="http" preference="25">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref
+        </url>
+        <url type="http" preference="30">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File4
+        </url>
+      </resources>
+    </file>
+    <file name="File5">
+      <verification>
+        <hash type="sha256">{{FILE5_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" preference="35">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file
+        </url>
+        <url type="http" preference="40">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/404
+        </url>
+        <url type="http" preference="25">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref
+        </url>
+        <url type="http" preference="30">
+                   http://{{SRV_HOST}}:{{SRV_PORT}}/File5
+        </url>
+      </resources>
+    </file>
+  </files>
+</metalink>
+"""
+
+wrong_file = WgetFile ("wrong_file", bad)
+
+File1_orig = WgetFile ("File1", File1)
+File1_down = WgetFile ("test.meta4.#1", File1)
+File1_nono = WgetFile ("File1_lowPref", File1_lowPref)
+
+File2_orig = WgetFile ("File2", File2)
+File2_down = WgetFile ("test.meta4.#2", File2)
+File2_nono = WgetFile ("File2_lowPref", File2_lowPref)
+
+File3_orig = WgetFile ("File3", File3)
+File3_down = WgetFile ("test.meta4.#3", File3)
+File3_nono = WgetFile ("File3_lowPref", File3_lowPref)
+
+File4_orig = WgetFile ("File4", File4)
+File4_down = WgetFile ("test.meta4.#4", File4)
+File4_nono = WgetFile ("File4_lowPref", File4_lowPref)
+
+File5_orig = WgetFile ("File5", File5)
+File5_down = WgetFile ("test.meta4.#5", File5)
+File5_nono = WgetFile ("File5_lowPref", File5_lowPref)
+
+MetaFile = WgetFile ("test.meta4", MetaXml)
+
+WGET_OPTIONS = "--input-metalink test.meta4"
+WGET_URLS = [[]]
+
+Files = [[
+    wrong_file,
+    File1_orig, File1_nono,
+    File2_orig, File2_nono,
+    File3_orig, File3_nono,
+    File4_orig, File4_nono,
+    File5_orig, File5_nono
+]]
+Existing_Files = [MetaFile]
+
+ExpectedReturnCode = 0
+ExpectedDownloadedFiles = [
+    File1_down,
+    File2_down,
+    File3_down,
+    File4_down,
+    File5_down,
+    MetaFile
+]
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+    "ServerFiles"       : Files,
+    "LocalFiles"        : Existing_Files
+}
+test_options = {
+    "WgetCommands"      : WGET_OPTIONS,
+    "Urls"              : WGET_URLS
+}
+post_test = {
+    "ExpectedFiles"     : ExpectedDownloadedFiles,
+    "ExpectedRetcode"   : ExpectedReturnCode
+}
+
+http_test = HTTPTest (
+                pre_hook=pre_test,
+                test_params=test_options,
+                post_hook=post_test,
+)
+
+http_test.server_setup()
+### Get and use dynamic server sockname
+srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
+
+MetaXml = re.sub (r'{{FILE1_HASH}}', File1_sha256, MetaXml)
+MetaXml = re.sub (r'{{FILE2_HASH}}', File2_sha256, MetaXml)
+MetaXml = re.sub (r'{{FILE3_HASH}}', File3_sha256, MetaXml)
+MetaXml = re.sub (r'{{FILE4_HASH}}', File4_sha256, MetaXml)
+MetaXml = re.sub (r'{{FILE5_HASH}}', File5_sha256, MetaXml)
+MetaXml = re.sub (r'{{SRV_HOST}}', srv_host, MetaXml)
+MetaXml = re.sub (r'{{SRV_PORT}}', str (srv_port), MetaXml)
+MetaFile.content = MetaXml
+
+err = http_test.begin ()
+
+exit (err)
-- 
2.7.3

Attachment: pgpgrGsFaP87z.pgp
Description: PGP signature

Reply via email to