Hi friends,

I just submitted this patch to bug #1200, which is a new function in libcurl that removes ".." and "." sequences from the path part of URLs as specified by RFC3986. Bug #1200 details how the lack of this functionality in this case causes a failure:

  https://sourceforge.net/p/curl/bugs/1200/

This is my first version of the patch and I intend to wait until after the 7.31.0 release before I merge anything into master just to avoid havoc this close to the release.

I'll add some further tests for this functionality too until then.

Can anyone think of reasons why YOU would want an option to disable this functionality? (I much rather have people speak about their own needs and wishes rather than hypothetical others.) This is a somewhat new behavior of libcurl as we've otherwise always tried to use exactly what gets passed in as much as possible, but this now "sanitizes" the input somewhat in a way that possibly some users won't appreciate...

--

 / daniel.haxx.se
From 580d825e038d0c56db5c3951b821cf0d0da86fbb Mon Sep 17 00:00:00 2001
From: Daniel Stenberg <[email protected]>
Date: Sat, 15 Jun 2013 23:47:02 +0200
Subject: [PATCH] dotdot: introducing dot file path cleanup

RFC3986 details how a path part passed in as part of a URI should be
"cleaned" from dot sequences before getting used. The described
algorithm is now implemented in lib/dotdot.c with the accompanied test
case in test 1395.

Bug: http://curl.haxx.se/bug/view.cgi?id=1200
Reported-by: Alex Vinnik
---
 lib/Makefile.inc        |    4 +-
 lib/dotdot.c            |  170 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/dotdot.h            |   25 +++++++
 lib/url.c               |   16 ++++-
 tests/data/test1395     |   26 ++++++++
 tests/unit/Makefile.inc |    6 +-
 tests/unit/unit1395.c   |   87 ++++++++++++++++++++++++
 7 files changed, 330 insertions(+), 4 deletions(-)
 create mode 100644 lib/dotdot.c
 create mode 100644 lib/dotdot.h
 create mode 100644 tests/data/test1395
 create mode 100644 tests/unit/unit1395.c

diff --git a/lib/Makefile.inc b/lib/Makefile.inc
index f76e1ec..4228bf6 100644
--- a/lib/Makefile.inc
+++ b/lib/Makefile.inc
@@ -25,7 +25,7 @@ CSOURCES = file.c timeval.c base64.c hostip.c progress.c formdata.c	\
   http_proxy.c non-ascii.c asyn-ares.c asyn-thread.c curl_gssapi.c	\
   curl_ntlm.c curl_ntlm_wb.c curl_ntlm_core.c curl_ntlm_msgs.c		\
   curl_sasl.c curl_schannel.c curl_multibyte.c curl_darwinssl.c		\
-  hostcheck.c bundles.c conncache.c pipeline.c
+  hostcheck.c bundles.c conncache.c pipeline.c dotdot.c
 
 HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h	\
   progress.h formdata.h cookie.h http.h sendf.h ftp.h url.h dict.h	\
@@ -44,4 +44,4 @@ HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h	\
   asyn.h curl_ntlm.h curl_gssapi.h curl_ntlm_wb.h curl_ntlm_core.h	\
   curl_ntlm_msgs.h curl_sasl.h curl_schannel.h curl_multibyte.h		\
   curl_darwinssl.h hostcheck.h bundles.h conncache.h curl_setup_once.h	\
-  multihandle.h setup-vms.h pipeline.h
+  multihandle.h setup-vms.h pipeline.h dotdot.h
diff --git a/lib/dotdot.c b/lib/dotdot.c
new file mode 100644
index 0000000..95b6367
--- /dev/null
+++ b/lib/dotdot.c
@@ -0,0 +1,170 @@
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2013, Daniel Stenberg, <[email protected]>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at http://curl.haxx.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+
+#include "curl_setup.h"
+
+#include "dotdot.h"
+
+#include "curl_memory.h"
+/* The last #include file should be: */
+#include "memdebug.h"
+
+/*
+ * "Remove Dot Segments"
+ * http://tools.ietf.org/html/rfc3986#section-5.2.4
+ */
+
+/*
+ * Curl_dedotdotify()
+ *
+ * This function gets a zero-terminated path with dot and dotdot sequences
+ * passed in and strips them off according to the rules in RFC 3986 section
+ * 5.2.5.
+ *
+ * The function handles a query part ('?' + stuff) appended but it expects
+ * that fragments ('#' + stuff) have already been cut off.
+ *
+ * RETURNS
+ *
+ * an allocated dedotdotified output string
+ */
+char *Curl_dedotdotify(char *input)
+{
+  size_t inlen = strlen(input);
+  char *clone;
+  size_t clen = inlen; /* the length of the cloned input */
+  char *out = malloc(inlen+1);
+  char *outp;
+  char *orgclone;
+  char *queryp;
+  if(!out)
+    return NULL; /* out of memory */
+
+  /* get a cloned copy of the input */
+  clone = strdup(input);
+  if(!clone) {
+    free(out);
+    return NULL;
+  }
+  orgclone = clone;
+  outp = out;
+
+  /*
+   * To handle query-parts properly, we must find it and remove it during the
+   * dotdot-operation and then append it again at the end to the output
+   * string.
+   */
+  queryp = strchr(clone, '?');
+  if(queryp)
+    *queryp = 0;
+
+  do {
+
+    /*  A.  If the input buffer begins with a prefix of "../" or "./", then
+        remove that prefix from the input buffer; otherwise, */
+
+    if(!strncmp("./", clone, 2)) {
+      clone+=2;
+      clen-=2;
+    }
+    else if(!strncmp("../", clone, 3)) {
+      clone+=3;
+      clen-=3;
+    }
+
+    /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
+        "."  is a complete path segment, then replace that prefix with "/" in
+        the input buffer; otherwise, */
+    else if(!strncmp("/./", clone, 3)) {
+      clone+=2;
+      clen-=2;
+    }
+    else if(!strcmp("/.", clone)) {
+      clone[1]='/';
+      clone++;
+      clen-=1;
+    }
+
+    /*  C.  if the input buffer begins with a prefix of "/../" or "/..", where
+        ".." is a complete path segment, then replace that prefix with "/" in
+        the input buffer and remove the last segment and its preceding "/" (if
+        any) from the output buffer; otherwise, */
+
+    else if(!strncmp("/../", clone, 4)) {
+      clone+=3;
+      clen-=3;
+      /* remove the last segment from the output buffer */
+      while(outp > out) {
+        outp--;
+        if(*outp == '/')
+          break;
+      }
+      *outp = 0; /* zero-terminate where it stops */
+    }
+    else if(!strcmp("/..", clone)) {
+      clone[2]='/';
+      clone+=2;
+      clen-=2;
+      /* remove the last segment from the output buffer */
+      while(outp > out) {
+        outp--;
+        if(*outp == '/')
+          break;
+      }
+      *outp = 0; /* zero-terminate where it stops */
+    }
+
+    /*  D.  if the input buffer consists only of "." or "..", then remove
+        that from the input buffer; otherwise, */
+
+    else if(!strcmp(".", clone) || !strcmp("..", clone)) {
+      *clone=0;
+    }
+
+    else {
+      /*  E.  move the first path segment in the input buffer to the end of
+          the output buffer, including the initial "/" character (if any) and
+          any subsequent characters up to, but not including, the next "/"
+          character or the end of the input buffer. */
+
+      do {
+        *outp++ = *clone++;
+        clen--;
+      } while(*clone && (*clone != '/'));
+      *outp=0;
+    }
+
+  } while(*clone);
+
+  if(queryp) {
+    size_t qlen;
+    /* There was a query part, append that to the output. The 'clone' string
+       may now have been altered so we copy from the original input string
+       from the correct index. */
+    size_t oindex = queryp - orgclone;
+    qlen = strlen(&input[oindex]);
+    memcpy(outp, &input[oindex], qlen+1); /* include the ending zero byte */
+  }
+
+  free(orgclone);
+  return out;
+}
diff --git a/lib/dotdot.h b/lib/dotdot.h
new file mode 100644
index 0000000..c3487c1
--- /dev/null
+++ b/lib/dotdot.h
@@ -0,0 +1,25 @@
+#ifndef HEADER_CURL_DOTDOT_H
+#define HEADER_CURL_DOTDOT_H
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2013, Daniel Stenberg, <[email protected]>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at http://curl.haxx.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+char *Curl_dedotdotify(char *input);
+#endif
diff --git a/lib/url.c b/lib/url.c
index e116a5b..3c5e067 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -124,6 +124,7 @@ int curl_win32_idn_to_ascii(const char *in, char **out);
 #include "conncache.h"
 #include "multihandle.h"
 #include "pipeline.h"
+#include "dotdot.h"
 
 #define _MPRINTF_REPLACE /* use our functions only */
 #include <curl/mprintf.h>
@@ -3844,7 +3845,20 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
     path[0] = '/';
     fix_slash = TRUE;
   }
-
+  else {
+    /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
+    char *newp = Curl_dedotdotify(path);
+
+    if(strcmp(newp, path)) {
+      fix_slash = TRUE;
+      free(data->state.pathbuffer);
+      data->state.pathbuffer = newp;
+      data->state.path = newp;
+      path = newp;
+    }
+    else
+      free(newp);
+  }
 
   /*
    * "fix_slash" means that the URL was malformatted so we need to generate an
diff --git a/tests/data/test1395 b/tests/data/test1395
new file mode 100644
index 0000000..967c8d4
--- /dev/null
+++ b/tests/data/test1395
@@ -0,0 +1,26 @@
+<testcase>
+<info>
+<keywords>
+unittest
+</keywords>
+</info>
+
+#
+# Client-side
+<client>
+<server>
+none
+</server>
+<features>
+unittest
+</features>
+ <name>
+Curl_dedotdotify
+ </name>
+<tool>
+unit1395
+</tool>
+
+</client>
+
+</testcase>
diff --git a/tests/unit/Makefile.inc b/tests/unit/Makefile.inc
index 372f5e6..cffd78d 100644
--- a/tests/unit/Makefile.inc
+++ b/tests/unit/Makefile.inc
@@ -6,7 +6,7 @@ UNITFILES = curlcheck.h \
 
 # These are all unit test programs
 UNITPROGS = unit1300 unit1301 unit1302 unit1303 unit1304 unit1305 unit1307 \
- unit1308 unit1309 unit1330 unit1394
+ unit1308 unit1309 unit1330 unit1394 unit1395
 
 unit1300_SOURCES = unit1300.c $(UNITFILES)
 unit1300_CPPFLAGS = $(AM_CPPFLAGS)
@@ -43,3 +43,7 @@ unit1394_CPPFLAGS = $(AM_CPPFLAGS) $(LIBMETALINK_CPPFLAGS)
 unit1394_LDADD = @LIBMETALINK_LIBS@ $(top_builddir)/lib/libcurl.la @LIBCURL_LIBS@
 unit1394_LDFLAGS = @LIBMETALINK_LDFLAGS@ $(top_builddir)/src/libcurltool.la
 unit1394_LIBS =
+
+unit1395_SOURCES = unit1395.c $(UNITFILES)
+unit1395_CPPFLAGS = $(AM_CPPFLAGS)
+
diff --git a/tests/unit/unit1395.c b/tests/unit/unit1395.c
new file mode 100644
index 0000000..8b0b0a0
--- /dev/null
+++ b/tests/unit/unit1395.c
@@ -0,0 +1,87 @@
+/***************************************************************************
+ *                                  _   _ ____  _
+ *  Project                     ___| | | |  _ \| |
+ *                             / __| | | | |_) | |
+ *                            | (__| |_| |  _ <| |___
+ *                             \___|\___/|_| \_\_____|
+ *
+ * Copyright (C) 1998 - 2013, Daniel Stenberg, <[email protected]>, et al.
+ *
+ * This software is licensed as described in the file COPYING, which
+ * you should have received as part of this distribution. The terms
+ * are also available at http://curl.haxx.se/docs/copyright.html.
+ *
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
+ * copies of the Software, and permit persons to whom the Software is
+ * furnished to do so, under the terms of the COPYING file.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ***************************************************************************/
+#include "curlcheck.h"
+
+#include "dotdot.h"
+
+#include "memdebug.h"
+
+static CURLcode unit_setup(void)
+{
+  return CURLE_OK;
+}
+
+static void unit_stop(void)
+{
+
+}
+
+struct dotdot {
+  const char *input;
+  const char *output;
+};
+
+UNITTEST_START
+
+  unsigned int i;
+  int fails=0;
+  struct dotdot pairs[] = {
+    { "/a/b/c/./../../g", "/a/g" },
+    { "mid/content=5/../6", "mid/6" },
+    { "/hello/../moo", "/moo" },
+    { "/1/../1", "/1" },
+    { "/1/./1", "/1/1" },
+    { "/1/..", "/" },
+    { "/1/.", "/1/" },
+    { "/1/./..", "/" },
+    { "/1/./../2", "/2" },
+    { "/hello/1/./../2", "/hello/2" },
+    { "test/this", "test/this" },
+    { "test/this/../now", "test/now" },
+    { "/1../moo../foo", "/1../moo../foo"},
+    { "/../../moo", "/moo"},
+    { "/../../moo?andnot/../yay", "/moo?andnot/../yay"},
+    { "/123?foo=/./&bar=/../", "/123?foo=/./&bar=/../"},
+    { "/../moo/..?what", "/?what" },
+  };
+
+  for(i=0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
+    char *out = Curl_dedotdotify((char *)pairs[i].input);
+
+    if(strcmp(out, pairs[i].output)) {
+      fprintf(stderr, "Test %d: '%s' gave '%s' instead of '%s'\n",
+              i, pairs[i].input, out, pairs[i].output);
+      fail("Test case output mismatched");
+      fails++;
+    }
+    else
+      fprintf(stderr, "Test %d: OK\n", i);
+    free(out);
+  }
+
+  return fails;
+
+UNITTEST_STOP
+
+
+
+
-- 
1.7.10.4

-------------------------------------------------------------------
List admin: http://cool.haxx.se/list/listinfo/curl-library
Etiquette:  http://curl.haxx.se/mail/etiquette.html

Reply via email to