In message <1249265808.20182.448.ca...@duiker>
          John-Mark Bell <[email protected]> wrote:

> On Mon, 2009-08-03 at 00:32 +0100, John Tytgat wrote:
> > Now the questions:
> > 
> > 1. Is this hack acceptable for the time being ?
> > 2. Or should we shoot for, what I think is, the right approach, i.e.
> >    when no prot_space_data ptr can be found in path_date structs, await
> >    a 401 and with the realm given at that point take the right
> >    prot_space_data or offer the uses the possibility to enter a new
> >    username/password for that realm.  But that needs to work for all
> >    content fetched, not only html (what I think is happening).
> >    TBH, this is probably above my head to implement for the time I can
> >    spend on.
> 
> I'd prefer (2), which is why there's been a couple of related
> authentication bugs sat in the tracker since February.

With jmb's help I managed get rid of the hack (so we never speculatively
give authentication data and when 401 arrives with a realm of which we
know the auth data, we do a refetch).

It solves 
<URL:http://sourceforge.net/tracker/?func=detail&aid=2830829&group_id=51719&atid=464312>.
Following patch should be ready for checkin. Final comments please.

- content/urldb.c(auth_data): Removed;
  (prot_space_data): Added, it lives linked in the leaf host_part
  struct and together with its scheme and port (which defins canonical root
  url) and realm this defines a protection space.
  (path_data): Removed auth_data field and replaced by a prot_space_data
  pointer.
  (host_part::prot_space): Added linked list of protection space data
  structs.
  (urldb_get_auth_details): Given an URL fetch fetches its auth.
  (urldb_set_auth_details): Creates or updates the contents of a
  protection space to which given URL belongs.
  (urldb_destroy_host_tree): Delete protection data space structures
  using urldb_destroy_prot_space.
  (urldb_destroy_prot_space): Added.
- content/urldb.h(urldb_get_auth_details): Added realm parameter.
- content/fetchers/fetch_curl.c(fetch_curl_set_options): Update
  urldb_get_auth_details call (we don't know realm at this point).
- content/fetchcache.c(fetchcache_callback, fetchcache_auth): At FETCH_AUTH,
  use realm to determine if we really don't know auth data and if so,
  refetch content.
- content/content.h(struct content): Add content::tried_with_auth.
- content/content.c(content_create): Initialize content::tried_with_auth.
- riscos/401login.c(ro_gui_401login_open): Show known authentication
  data in dialogue so user can see what was wrong with it and correct it.

-8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<-
Index: content/urldb.c
===================================================================
--- content/urldb.c     (revision 8988)
+++ content/urldb.c     (working copy)
@@ -1,5 +1,6 @@
 /*
  * Copyright 2006 John M Bell <[email protected]>
+ * Copyright 2009 John Tytgat <[email protected]>
  *
  * This file is part of NetSurf, http://www.netsurf-browser.org/
  *
@@ -132,10 +133,22 @@
        struct cookie_internal_data *next;      /**< Next in list */
 };
 
-struct auth_data {
+/* A protection space is defined as a tuple canonical_root_url and realm.
+ * This structure lives as linked list element in a leaf host_part struct
+ * so we need additional scheme and port to have a canonical_root_url.  */
+struct prot_space_data {
+       char *scheme;           /**< URL scheme of canonical hostname of this
+                                * protection space. */
+       unsigned int port;      /**< Port number of canonical hostname of this
+                                * protection space. When 0, it means the
+                                * default port for given scheme, i.e. 80
+                                * (http), 443 (https). */
        char *realm;            /**< Protection realm */
-       char *auth;             /**< Authentication details in form
+
+       char *auth;             /**< Authentication details for this
+                                * protection space in form
                                 * username:password */
+       struct prot_space_data *next;   /**< Next sibling */
 };
 
 struct cache_internal_data {
@@ -152,7 +165,9 @@
 struct path_data {
        char *url;              /**< Full URL */
        char *scheme;           /**< URL scheme for data */
-       unsigned int port;      /**< Port number for data */
+       unsigned int port;      /**< Port number for data. When 0, it means
+                                * the default port for given scheme, i.e.
+                                * 80 (http), 443 (https). */
        char *segment;          /**< Path segment for this node */
        unsigned int frag_cnt;  /**< Number of entries in ::fragment */
        char **fragment;        /**< Array of fragments */
@@ -161,7 +176,11 @@
        struct bitmap *thumb;   /**< Thumbnail image of resource */
        struct url_internal_data urld;  /**< URL data for resource */
        struct cache_internal_data cache;       /**< Cache data for resource */
-       struct auth_data auth;  /**< Authentication data for resource */
+       const struct prot_space_data *prot_space;       /**< Protection space
+                                * to which this resource belongs too. Can be
+                                * NULL when it does not belong to a protection
+                                * space or when it is not known. No
+                                * ownership (is with struct 
host_part::prot_space). */
        struct cookie_internal_data *cookies;   /**< Cookies associated with 
resource */
        struct cookie_internal_data *cookies_end;       /**< Last cookie in 
list */
 
@@ -183,6 +202,10 @@
 
        char *part;             /**< Part of host string */
 
+       struct prot_space_data *prot_space;     /**< Linked list of all known
+                                * proctection spaces known for his host and
+                                * all its schems and ports. */
+
        struct host_part *next; /**< Next sibling */
        struct host_part *prev; /**< Previous sibling */
        struct host_part *parent;       /**< Parent host part */
@@ -203,6 +226,7 @@
 static void urldb_destroy_path_tree(struct path_data *root);
 static void urldb_destroy_path_node_content(struct path_data *node);
 static void urldb_destroy_cookie(struct cookie_internal_data *c);
+static void urldb_destroy_prot_space(struct prot_space_data *space);
 static void urldb_destroy_search_tree(struct search_node *root);
 
 /* Saving */
@@ -925,11 +949,13 @@
  * Look up authentication details in database
  *
  * \param url Absolute URL to search for
+ * \param realm When non-NULL, it is realm which can be used to determine
+ * the protection space when that's not been done before for given URL.
  * \return Pointer to authentication details, or NULL if not found
  */
-const char *urldb_get_auth_details(const char *url)
+const char *urldb_get_auth_details(const char *url, const char *realm)
 {
-       struct path_data *p, *q = NULL;
+       struct path_data *p, *p_cur, *p_top;
 
        assert(url);
 
@@ -940,29 +966,33 @@
        if (!p)
                return NULL;
 
-       /* Check for any auth details attached to this node */
-       if (p && p->auth.realm && p->auth.auth)
-               return p->auth.auth;
+       /* Check for any auth details attached to the path_data node or any of
+        * its parents. */
+       for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
+               if (p_cur->prot_space) {
+                       return p_cur->prot_space->auth;
+               }
+       }
 
-       /* Now consider ancestors */
-       for (; p; p = p->parent) {
-               /* The parent path entry is stored hung off the
-                * parent entry with an empty (not NULL) segment string.
-                * We look for this here.
-                */
-               for (q = p->children; q; q = q->next) {
-                       if (q->segment[0] == '\0')
-                               break;
+       /* Only when we have a realm (and canonical root of given URL), we can
+        * uniquely locate the protection space. */
+       if (realm != NULL) {
+               const struct host_part *h = (const struct host_part *)p_top;
+               const struct prot_space_data *space;
+
+               /* Search for a possible matching protection space. */
+               for (space = h->prot_space; space != NULL;
+                               space = space->next) {
+                       if (!strcmp(space->realm, realm)
+                                       && !strcmp(space->scheme, p->scheme)
+                                       && space->port == p->port) {
+                               p->prot_space = space;
+                               return p->prot_space->auth;
+                       }
                }
-
-               if (q && q->auth.realm && q->auth.auth)
-                       break;
        }
 
-       if (!q)
-               return NULL;
-
-       return q->auth.auth;
+       return NULL;
 }
 
 /**
@@ -975,7 +1005,7 @@
 bool urldb_get_cert_permissions(const char *url)
 {
        struct path_data *p;
-       struct host_part *h;
+       const struct host_part *h;
 
        assert(url);
 
@@ -985,8 +1015,9 @@
 
        for (; p && p->parent; p = p->parent)
                /* do nothing */;
+       assert(p);
 
-       h = (struct host_part *)p;
+       h = (const struct host_part *)p;
 
        return h->permit_invalid_certs;
 }
@@ -1001,48 +1032,63 @@
 void urldb_set_auth_details(const char *url, const char *realm,
                const char *auth)
 {
-       struct path_data *p;
-       char *urlt, *t1, *t2;
+       struct path_data *p, *pi;
+       struct host_part *h;
+       struct prot_space_data *space, *space_alloc;
+       char *realm_alloc, *auth_alloc, *scheme_alloc;
 
        assert(url && realm && auth);
 
-       urlt = strdup(url);
-       if (!urlt)
-               return;
-
-       /* strip leafname from URL */
-       t1 = strrchr(urlt, '/');
-       if (t1) {
-               *(t1 + 1) = '\0';
-       }
-
        /* add url, in case it's missing */
-       urldb_add_url(urlt);
+       urldb_add_url(url);
 
-       p = urldb_find_url(urlt);
+       p = urldb_find_url(url);
 
-       free(urlt);
-
        if (!p)
                return;
 
-       /** \todo search subtree for same realm/auth details
-        * and remove them (as the lookup routine searches up the tree) */
+       /* Search for host_part */
+       for (pi = p; pi->parent != NULL; pi = pi->parent)
+               ;
+       h = (struct host_part *)pi;
 
-       t1 = strdup(realm);
-       t2 = strdup(auth);
+       /* Search if given URL belongs to a protection space we already know 
of. */
+       for (space = h->prot_space; space; space = space->next) {
+               if (!strcmp(space->realm, realm)
+                               && !strcmp(space->scheme, p->scheme)
+                               && space->port == p->port)
+                       break;
+       }
 
-       if (!t1 || !t2) {
-               free(t1);
-               free(t2);
-               return;
+       if (space != NULL) {
+               /* Overrule existing auth. */
+               free(space->auth);
+               space->auth = strdup(auth);
+       } else {
+               /* Create a new protection space. */
+               space = space_alloc = malloc(sizeof(struct prot_space_data));
+               scheme_alloc = strdup(p->scheme);
+               realm_alloc = strdup(realm);
+               auth_alloc = strdup(auth);
+
+               if (!space_alloc || !scheme_alloc
+                               || !realm_alloc || !auth_alloc) {
+                       free(space_alloc);
+                       free(scheme_alloc);
+                       free(realm_alloc);
+                       free(auth_alloc);
+                       return;
+               }
+
+               space->scheme = scheme_alloc;
+               space->port = p->port;
+               space->realm = realm_alloc;
+               space->auth = auth_alloc;
+               space->next = h->prot_space;
+               h->prot_space = space;
        }
 
-       free(p->auth.realm);
-       free(p->auth.auth);
-
-       p->auth.realm = t1;
-       p->auth.auth = t2;
+       p->prot_space = space;
 }
 
 /**
@@ -1067,6 +1113,7 @@
 
        for (; p && p->parent; p = p->parent)
                /* do nothing */;
+       assert(p);
 
        h = (struct host_part *)p;
 
@@ -3878,6 +3925,7 @@
 {
        struct host_part *a, *b;
        struct path_data *p, *q;
+       struct prot_space_data *s, *t;
 
        /* Destroy children */
        for (a = root->children; a; a = b) {
@@ -3894,6 +3942,12 @@
        /* Root path */
        urldb_destroy_path_node_content(&root->paths);
 
+       /* Proctection space data */
+       for (s = root->prot_space; s; s = t) {
+               t = s->next;
+               urldb_destroy_prot_space(s);
+       }
+
        /* And ourselves */
        free(root->part);
        free(root);
@@ -3955,8 +4009,6 @@
                bitmap_destroy(node->thumb);
 
        free(node->urld.title);
-       free(node->auth.realm);
-       free(node->auth.auth);
 
        for (a = node->cookies; a; a = b) {
                b = a->next;
@@ -3981,6 +4033,21 @@
 }
 
 /**
+ * Destroy protection space data
+ *
+ * \param space Protection space to destroy
+ */
+void urldb_destroy_prot_space(struct prot_space_data *space)
+{
+       free(space->scheme);
+       free(space->realm);
+       free(space->auth);
+
+       free(space);
+}
+
+
+/**
  * Destroy a search tree
  *
  * \param root Root node of tree to destroy
Index: content/urldb.h
===================================================================
--- content/urldb.h     (revision 8988)
+++ content/urldb.h     (working copy)
@@ -85,7 +85,7 @@
 /* Authentication modification / lookup */
 void urldb_set_auth_details(const char *url, const char *realm,
                const char *auth);
-const char *urldb_get_auth_details(const char *url);
+const char *urldb_get_auth_details(const char *url, const char *realm);
 
 /* SSL certificate permissions */
 void urldb_set_cert_permissions(const char *url, bool permit);
Index: content/fetchers/fetch_curl.c
===================================================================
--- content/fetchers/fetch_curl.c       (revision 8988)
+++ content/fetchers/fetch_curl.c       (working copy)
@@ -560,7 +560,7 @@
                SETOPT(CURLOPT_COOKIE, NULL);
        }
 
-       if ((auth = urldb_get_auth_details(f->url)) != NULL) {
+       if ((auth = urldb_get_auth_details(f->url, NULL)) != NULL) {
                SETOPT(CURLOPT_HTTPAUTH, CURLAUTH_ANY);
                SETOPT(CURLOPT_USERPWD, auth);
        } else {
Index: content/fetchcache.c
===================================================================
--- content/fetchcache.c        (revision 8988)
+++ content/fetchcache.c        (working copy)
@@ -1,5 +1,6 @@
 /*
  * Copyright 2005 James Bursa <[email protected]>
+ * Copyright 2009 John-Mark Bell <[email protected]>
  *
  * This file is part of NetSurf, http://www.netsurf-browser.org/
  *
@@ -37,6 +38,7 @@
 #include "content/content.h"
 #include "content/fetchcache.h"
 #include "content/fetch.h"
+#include "content/urldb.h"
 #include "utils/log.h"
 #include "utils/messages.h"
 #include "utils/talloc.h"
@@ -58,6 +60,7 @@
 static void fetchcache_notmodified(struct content *c, const void *data);
 static void fetchcache_redirect(struct content *c, const void *data,
                unsigned long size);
+static void fetchcache_auth(struct content *c, const char *realm);
 
 
 /**
@@ -516,14 +519,7 @@
                        break;
 
                case FETCH_AUTH:
-                       /* data -> string containing the Realm */
-                       LOG(("FETCH_AUTH, '%s'", (const char *)data));
-                       c->fetch = 0;
-                       msg_data.auth_realm = data;
-                       content_broadcast(c, CONTENT_MSG_AUTH, msg_data);
-                       /* set the status to ERROR so that the content is
-                        * destroyed in content_clean() */
-                       c->status = CONTENT_STATUS_ERROR;
+                       fetchcache_auth(c, data);
                        break;
 
                case FETCH_CERT_ERR:
@@ -1136,6 +1132,92 @@
        free(referer);
 }
 
+/**
+ * Authentication callback handler
+ */
+
+void fetchcache_auth(struct content *c, const char *realm)
+{
+       char *referer;
+       const char *ref;
+       const char *auth;
+       struct content *parent;
+       bool parent_was_verifiable;
+       union content_msg_data msg_data;
+       char *headers = NULL;
+
+       /* Preconditions */
+       assert(c && realm);
+       assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
+
+       /* Extract fetch details */
+       ref = fetch_get_referer(c->fetch);
+       parent = fetch_get_parent(c->fetch);
+       parent_was_verifiable = fetch_get_verifiable(c->fetch);
+
+       /* Clone referer -- original is destroyed in fetch_abort() */
+       referer = ref ? strdup(ref) : NULL;
+
+       fetch_abort(c->fetch);
+       c->fetch = NULL;
+
+       /* Ensure that referer cloning succeeded 
+        * _must_ be after content invalidation */
+       if (ref && !referer) {
+               LOG(("Failed cloning referer"));
+
+               c->status = CONTENT_STATUS_ERROR;
+               msg_data.error = messages_get("BadRedirect");
+               content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+               return;
+       }
+
+       /* Now, see if we've got some auth details */
+       auth = urldb_get_auth_details(c->url, realm);
+
+       if (auth == NULL || c->tried_with_auth) {
+               /* No authentication details or we tried what we had, so ask
+                * our client for them. */
+               c->tried_with_auth = false; /* Allow rety. */
+
+               c->status = CONTENT_STATUS_ERROR;
+               msg_data.auth_realm = realm;
+               content_broadcast(c, CONTENT_MSG_AUTH, msg_data);
+
+               return;
+       }
+       /* Flag we're retry fetching with auth data. Will be used to detect
+        * wrong auth data so that we can ask our client for better auth. */
+       c->tried_with_auth = true;
+
+       /* We have authentication details. Fetch with them. */
+       /** \todo all the useful things like headers, POST. */
+       c->fetch = fetch_start(c->url, referer,
+                       fetchcache_callback, c,
+                       c->no_error_pages,
+                       NULL, NULL, parent_was_verifiable,
+                       parent, &headers);
+       if (c->fetch == NULL) {
+               char error_message[500];
+
+               LOG(("warning: fetch_start failed"));
+               snprintf(error_message, sizeof error_message,
+                               messages_get("InvalidURL"),
+                               c->url);
+               if (c->no_error_pages) {
+                       c->status = CONTENT_STATUS_ERROR;
+                       msg_data.error = error_message;
+                       content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+               } else {
+                       fetchcache_error_page(c, error_message);
+               }
+       }
+
+       /* Clean up */
+       free(referer);
+}
+
 #ifdef TEST
 
 #include <unistd.h>
Index: content/content.h
===================================================================
--- content/content.h   (revision 8988)
+++ content/content.h   (working copy)
@@ -259,6 +259,7 @@
 
        bool no_error_pages;            /**< Used by fetchcache(). */
        bool download;                  /**< Used by fetchcache(). */
+       bool tried_with_auth;           /**< Used by fetchcache(). */
        unsigned int redirect_count;    /**< Used by fetchcache(). */
 
        /** Array of first n rendering errors or warnings. */
Index: content/content.c
===================================================================
--- content/content.c   (revision 8988)
+++ content/content.c   (working copy)
@@ -443,6 +443,7 @@
        c->http_code = 0;
        c->no_error_pages = false;
        c->download = false;
+       c->tried_with_auth = false;
        c->redirect_count = 0;
        c->error_count = 0;
        c->cache_data.req_time = 0;
Index: riscos/401login.c
===================================================================
--- riscos/401login.c   (revision 8988)
+++ riscos/401login.c   (working copy)
@@ -98,6 +98,7 @@
 {
        struct session_401 *session;
        wimp_w w;
+       const char *auth;
 
        session = calloc(1, sizeof(struct session_401));
        if (!session) {
@@ -111,10 +112,28 @@
                warn_user("NoMemory", 0);
                return;
        }
-       session->uname[0] = '\0';
-       session->pwd[0] = '\0';
+       if (realm == NULL)
+               realm = "Secure Area";
+        auth = urldb_get_auth_details(session->url, realm);
+       if (auth == NULL) {
+               session->uname[0] = '\0';
+               session->pwd[0] = '\0';
+       } else {
+               const char *pwd;
+               size_t pwd_len;
+
+               pwd = strchr(auth, ':');
+               assert(pwd && pwd < auth + sizeof(session->uname));
+               memcpy(session->uname, auth, pwd - auth);
+               session->uname[pwd - auth] = '\0';
+               ++pwd;
+               pwd_len = strlen(pwd);
+               assert(pwd_len < sizeof(session->pwd));
+               memcpy(session->pwd, pwd, pwd_len);
+               session->pwd[pwd_len] = '\0';
+       }
        session->host = strdup(host);
-       session->realm = strdup(realm ? realm : "Secure Area");
+       session->realm = strdup(realm);
        session->bwin = bw;
        if ((!session->host) || (!session->realm)) {
                free(session->host);
-8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<-

John.
-- 
John Tytgat
[email protected]

Reply via email to