Sorry for the top-post

I was wondering if some of you (active) 'ftp' maintainers could evaluate my 
patch below.

I posted this to the list ages ago, and tried following up with some former 
committers on the ftp code, but for whatever reason I never received any 
communication at all.

It is a very simple patch, if someone could review it, and let me know if this 
is OK or not.

I would apprciate it.

original message follows...

Hi,

I was trying to use the 'ftp' program to retrieve the following URL:

https://pypi.python.org/packages/source/p/pip/pip-1.0.2.tar.gz#md5=47ec6ff3f6d962696fe08d4c8264ad49

Which fails because it considers the fragment as part of the path, and
so the server returns 404. These type of links are quite common these
days and so it would be nice if 'ftp' would handle them.

This is my first patch to OpenBSD, so please let me know if there is
anyhthing else I can do to get this feature implemented. The patch is
very simple, and I think, correct, as '#' is an "unsafe" character, and
must always be URL-encoded if it is to appear literally in a URL and
not be interpreted as the start of a fragment identifier.

Regards,
Eric P. Mangold

Index: fetch.c
===================================================================
RCS file: /cvs/src/usr.bin/ftp/fetch.c,v
retrieving revision 1.103
diff -u -r1.103 fetch.c
--- fetch.c     25 Aug 2010 20:32:37 -0000      1.103
+++ fetch.c     29 Oct 2011 03:34:02 -0000
@@ -205,6 +205,11 @@
        if (newline == NULL)
                errx(1, "Can't allocate memory to parse URL");
        if (strncasecmp(newline, HTTP_URL, sizeof(HTTP_URL) - 1) == 0) {
+                /* Remove any trailing fragment identifier from the HTTP URL.
+                 * Fragments (HTTP anchors) are identified by a hash char 
('#'),
+                 * as per RFC 3986. */
+                newline = strsep(&newline, "#");
+
                host = newline + sizeof(HTTP_URL) - 1;
 #ifndef SMALL
                scheme = HTTP_URL;
@@ -221,6 +226,11 @@
 #ifndef SMALL
                scheme = FILE_URL;
        } else if (strncasecmp(newline, HTTPS_URL, sizeof(HTTPS_URL) - 1) == 0) 
{
+                /* Remove any trailing fragment identifier from the HTTPS URL.
+                 * Fragments (HTTP anchors) are identified by a hash char 
('#'),
+                 * as per RFC 3986. */
+                newline = strsep(&newline, "#");
+
                host = newline + sizeof(HTTPS_URL) - 1;
                ishttpsurl = 1;
                scheme = HTTPS_URL;


On Mon, Aug 06, 2012 at 10:56:28PM +0200, Christiano F. Haesbaert wrote:
> Please ignore the other thread, it takes ages for me to open my sent box
> over gprs, so I'm opening a new one.
> 
> 
> Index: fetch.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/ftp/fetch.c,v
> retrieving revision 1.105
> diff -d -u -p -r1.105 fetch.c
> --- fetch.c   30 Apr 2012 13:41:26 -0000      1.105
> +++ fetch.c   6 Aug 2012 20:49:51 -0000
> @@ -177,7 +177,7 @@ url_get(const char *origline, const char
>  {
>       char pbuf[NI_MAXSERV], hbuf[NI_MAXHOST], *cp, *portnum, *path, ststr[4];
>       char *hosttail, *cause = "unknown", *newline, *host, *port, *buf = NULL;
> -     char *epath, *redirurl, *loctail;
> +     char *epath, *redirurl, *loctail, *h, *p;
>       int error, i, isftpurl = 0, isfileurl = 0, isredirect = 0, rval = -1;
>       struct addrinfo hints, *res0, *res, *ares = NULL;
>       const char * volatile savefile;
> @@ -191,7 +191,7 @@ url_get(const char *origline, const char
>       size_t len, wlen;
>  #ifndef SMALL
>       char *sslpath = NULL, *sslhost = NULL;
> -     char *locbase, *full_host = NULL;
> +     char *locbase, *full_host = NULL, *auth = NULL;
>       const char *scheme;
>       int ishttpsurl = 0;
>       SSL_CTX *ssl_ctx = NULL;
> @@ -228,7 +228,20 @@ url_get(const char *origline, const char
>  #endif /* !SMALL */
>       } else
>               errx(1, "url_get: Invalid URL '%s'", newline);
> -
> +#ifndef SMALL
> +     /* Look for auth header */
> +     if (proxyenv == NULL &&
> +         (!strcmp(scheme, HTTP_URL) || !strcmp(scheme, HTTPS_URL))) {
> +             if ((p = strchr(host, '@')) != NULL) {
> +                     *p = 0; /* Kill @ */
> +                     if ((auth = calloc(1, 64)) == NULL)
> +                             err(1, "Can't allocate memory for 
> authorization");
> +                     if (b64_ntop(host, strlen(host), auth, 64) == -1)
> +                             errx(1, "error in base64 encoding");
> +                     host = p + 1;
> +             }
> +     }
> +#endif /* SMALL */
>       if (isfileurl) {
>               path = host;
>       } else {
> @@ -639,14 +652,19 @@ again:
>                               restart_point = 0;
>               }
>  #endif /* !SMALL */
> -             ftp_printf(fin, ssl, "GET /%s %s\r\nHost: ", epath,
>  #ifndef SMALL
> -                     restart_point ? "HTTP/1.1\r\nConnection: close" :
> -#endif /* !SMALL */
> -                     "HTTP/1.0");
> +             if (auth) {
> +                     ftp_printf(fin, ssl,
> +                         "GET /%s %s\r\nAuthorization: Basic %s\r\nHost: ",
> +                         epath, restart_point ?
> +                         "HTTP/1.1\r\nConnection: close" : "HTTP/1.0",
> +                         auth);
> +                     free(auth);
> +             } else
> +#endif       /* SMALL */
> +             ftp_printf(fin, ssl, "GET /%s %s\r\nHost: ", epath,
> +                 "HTTP/1.0");
>               if (strchr(host, ':')) {
> -                     char *h, *p;
> -
>                       /*
>                        * strip off scoped address portion, since it's
>                        * local to node
> Index: ftp.1
> ===================================================================
> RCS file: /cvs/src/usr.bin/ftp/ftp.1,v
> retrieving revision 1.82
> diff -d -u -p -r1.82 ftp.1
> --- ftp.1     30 Apr 2012 13:41:26 -0000      1.82
> +++ ftp.1     6 Aug 2012 20:22:14 -0000
> @@ -61,7 +61,8 @@
>  .Op Fl o Ar output
>  .Op Fl s Ar srcaddr
>  .Sm off
> -.No http:// Ar host Oo : Ar port
> +.No http:// Oo Ar user : password No @
> +.Oc Ar host Oo : Ar port
>  .Oc No / Ar file
>  .Sm on
>  .Ar ...
> @@ -71,7 +72,8 @@
>  .Op Fl o Ar output
>  .Op Fl s Ar srcaddr
>  .Sm off
> -.No https:// Ar host Oo : Ar port
> +.No https:// Oo Ar user : password No @
> +.Oc Ar host Oo : Ar port
>  .Oc No / Ar file
>  .Sm on
>  .Ar ...
> @@ -1278,12 +1280,12 @@ isn't defined, log in as
>  .Ar user
>  with a password of
>  .Ar password .
> -.It http://host[:port]/file
> +.It http://[user:password@]host[:port]/file
>  An HTTP URL, retrieved using the HTTP protocol.
>  If
>  .Ev http_proxy
>  is defined, it is used as a URL to an HTTP proxy server.
> -.It https://host[:port]/file
> +.It https://[user:password@]host[:port]/file
>  An HTTPS URL, retrieved using the HTTPS protocol.
>  If
>  .Ev http_proxy
> Index: main.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/ftp/main.c,v
> retrieving revision 1.83
> diff -d -u -p -r1.83 main.c
> --- main.c    19 May 2012 02:04:22 -0000      1.83
> +++ main.c    6 Aug 2012 19:54:15 -0000
> @@ -775,12 +775,14 @@ usage(void)
>  #endif /* !SMALL */
>           "[-o output] "
>  #ifndef SMALL
> -         "[-s srcaddr] "
> +         "[-s srcaddr]\n"
> +         "           "
>  #endif /* !SMALL */
> -         "http://host[:port]/file ...\n"
> +         "http://[user:password@]host[:port]/file ...\n"
>  #ifndef SMALL
> -         "       %s [-C] [-c cookie] [-o output] [-s srcaddr] "
> -         "https://host[:port]/file\n";
> +         "       %s [-C] [-c cookie] [-o output] [-s srcaddr]\n"
> +         "           "
> +         "https://[user:password@]host[:port]/file\n";
>           "           ...\n"
>  #endif /* !SMALL */
>           "       %s "

Reply via email to