Hi,

This is an updated version of the patch to build against the latest
libpsl release which changes the so-numbers.

P.S.: In case anyone is trying to test this on a Arch Linux system,
I've uploaded libpsl to the AUR.
For users of debian and derivative systems, I think there's a package
coming very soon. Tim would know better about the current state. But
you can always build and install manually using make install.

On Fri, May 30, 2014 at 10:24 PM, Darshit Shah <[email protected]> wrote:
> I've attached a patch that adds support for using libpsl for cookie
> domain checking in Wget.
>
> The old heuristic checks still remain as a fallback. When the libpsl
> library on the system is built without the builtin list, Wget simply
> fallsback to the old heuristic checks. Similarly, if wget is built
> without libpsl support, it continues to use the old cookie domain
> checking code.
>
> I've removed the check for numeric addresses since it seems unneeded.
> The host and cookie_host variables will be compared for a full check
> either ways.
>
> --
> Thanking You,
> Darshit Shah



-- 
Thanking You,
Darshit Shah
From 9596b6842c66d83f97c38c0b971d579877f23f6f Mon Sep 17 00:00:00 2001
From: Darshit Shah <[email protected]>
Date: Fri, 30 May 2014 22:10:12 +0530
Subject: [PATCH] Support libpsl for cookie domain checking

---
 ChangeLog           |  5 +++++
 NEWS                |  2 ++
 README.checkout     | 39 +++++++++++++++++++++------------------
 configure.ac        | 11 +++++++++++
 src/ChangeLog       |  6 +++++-
 src/build_info.c.in |  1 +
 src/cookies.c       | 24 +++++++++++++++++++-----
 7 files changed, 64 insertions(+), 24 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a48e469..cb3114d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2014-05-30  Darshit Shah  <[email protected]>
+
+	* configure.ac: Allow compilation without libpsl.
+	* README.checkout: Add libpsl as a dependency.
+
 2014-05-24  Giuseppe Scrivano  <[email protected]>
 
 	* gnulib: update module.
diff --git a/NEWS b/NEWS
index d911feb..2922cde 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,8 @@ Please send GNU Wget bug reports to <[email protected]>.
 
 * Changes in Wget X.Y.Z
 
+** Use libpsl for verifying cookie domains
+
 ** Default progress bar output changed
 
 ** Introduce --show-progress to force display the progress bar
diff --git a/README.checkout b/README.checkout
index 7c0f42b..ef0eded 100644
--- a/README.checkout
+++ b/README.checkout
@@ -68,6 +68,8 @@ Compiling From Repository Sources
 
      * [34]git is used to fetch gnulib files trough the bootstrap.sh script.
 
+     * [35]libpsl is (optionally) required for checking cookie domains.
+
    For those who might be confused as to what to do once they check out
    the source code, considering configure and Makefile do not yet exist at
    that point, a shell script called bootstrap.sh has been provided. After
@@ -96,21 +98,21 @@ Compiling From Repository Sources
 
  Originally written by Hrvoje Niksic <[email protected]>.
 
-     * [35]Edit
-     * [36]Comments
-     * [37]Info
-     * [38]Attachments
+     * [36]Edit
+     * [37]Comments
+     * [38]Info
+     * [39]Attachments
      * More Actions:
        [Raw Text................] Do
 
-     * [39]MoinMoin Powered
-     * [40]Python Powered
-     * [41]GPL licensed
-     * [42]Valid HTML 4.01
+     * [40]MoinMoin Powered
+     * [41]Python Powered
+     * [42]GPL licensed
+     * [43]Valid HTML 4.01
      __________________________________________________________________
 
    All content © 2007 Free Software Foundation. For terms of use,
-   redistribution, and modification, please see the [43]WikiLicense page.
+   redistribution, and modification, please see the [44]WikiLicense page.
 
 References
 
@@ -129,12 +131,13 @@ References
   32. http://www.gnu.org/software/libidn/
   33. http://www.gnu.org/software/libiconv/
   34. http://git-scm.com/
-  35. http://wget.addictivecode.org/CompilingRepoSources?action=edit&editor=text
-  36. http://wget.addictivecode.org/CompilingRepoSources
-  37. http://wget.addictivecode.org/CompilingRepoSources?action=info
-  38. http://wget.addictivecode.org/CompilingRepoSources?action=AttachFile
-  39. http://moinmo.in/
-  40. http://moinmo.in/Python
-  41. http://moinmo.in/GPL
-  42. http://validator.w3.org/check?uri=referer
-  43. http://wget.addictivecode.org/WikiLicense
+  35. https://github.com/rockdaboot/libpsl
+  36. http://wget.addictivecode.org/CompilingRepoSources?action=edit&editor=text
+  37. http://wget.addictivecode.org/CompilingRepoSources
+  38. http://wget.addictivecode.org/CompilingRepoSources?action=info
+  39. http://wget.addictivecode.org/CompilingRepoSources?action=AttachFile
+  40. http://moinmo.in/
+  41. http://moinmo.in/Python
+  42. http://moinmo.in/GPL
+  43. http://validator.w3.org/check?uri=referer
+  44. http://wget.addictivecode.org/WikiLicense
diff --git a/configure.ac b/configure.ac
index c5437bf..d2c2e70 100644
--- a/configure.ac
+++ b/configure.ac
@@ -61,6 +61,10 @@ dnl
 dnl Process features.
 dnl
 
+AC_ARG_WITH(libpsl,
+    AS_HELP_STRING([--without-libpsl],
+                   [disable support for libpsl cookie checking.]))
+
 AC_ARG_WITH(ssl,
 [[  --without-ssl           disable SSL autodetection
   --with-ssl={gnutls,openssl} specify the SSL backend.  GNU TLS is the default.]])
@@ -237,6 +241,11 @@ dnl
 dnl Checks for libraries.
 dnl
 
+AS_IF([test x"$with_libpsl" != xno], [
+  with_libpsl=yes
+  AC_CHECK_LIB([psl], [psl_builtin])
+])
+
 AS_IF([test x"$with_zlib" != xno], [
   with_zlib=yes
   AC_CHECK_LIB(z, compress)
@@ -358,6 +367,7 @@ else
   fi
 fi
 
+
 dnl **********************************************************************
 dnl Checks for IPv6
 dnl **********************************************************************
@@ -580,6 +590,7 @@ AC_MSG_NOTICE([Summary of build options:
   Libs:              $LIBS
   SSL:               $with_ssl
   Zlib:              $with_zlib
+  PSL:               $with_libpsl
   Digest:            $ENABLE_DIGEST
   NTLM:              $ENABLE_NTLM
   OPIE:              $ENABLE_OPIE
diff --git a/src/ChangeLog b/src/ChangeLog
index ceac5aa..10dd42b 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,5 +1,10 @@
 2014-05-30  Darshit Shah  <[email protected]>
 
+	* cookies.c (check_domain_match): Use libpsl to check if the cookie domain
+	is valid. Also remove unneeded test for numeric addresses.
+
+2014-05-30  Darshit Shah  <[email protected]>
+
 	* connect.{c,h}, convert.{c,h}, cookies.{c,h}, ftp-ls.c, ftp.h, gettext.h,
 	hash.h, host.h, html-parse.h, html-url.h, http.c, init.c, main.c, mswindows.c,
 	netrc.h, openssl.c, options.h, ptimer.h, recur.c, retr.c, sysdep.h, url.h,
@@ -22,7 +27,6 @@
 	needs to modify the string.
 	(bar_set_params): Add support for noscroll parameter to bar.
 
-
 2014-05-03  Tim Ruehsen  <[email protected]>
 
 	* ftp-ls.c (ftp_parse_vms_ls): Explicitly typecast strlen's output
diff --git a/src/build_info.c.in b/src/build_info.c.in
index c0b1677..7b46da8 100644
--- a/src/build_info.c.in
+++ b/src/build_info.c.in
@@ -7,6 +7,7 @@ large-file      SIZEOF_OFF_T >= 8
 nls             defined ENABLE_NLS
 ntlm            defined ENABLE_NTLM
 opie            defined ENABLE_OPIE
+psl             defined HAVE_LIBPSL
 
 ssl choice:
     openssl     defined HAVE_LIBSSL || defined HAVE_LIBSSL32
diff --git a/src/cookies.c b/src/cookies.c
index 7f5ba96..2c78fdf 100644
--- a/src/cookies.c
+++ b/src/cookies.c
@@ -51,6 +51,7 @@ as that of the covered work.  */
 #include <assert.h>
 #include <errno.h>
 #include <time.h>
+#include <libpsl.h>
 #include "utils.h"
 #include "hash.h"
 #include "cookies.h"
@@ -503,14 +504,27 @@ numeric_address_p (const char *addr)
 static bool
 check_domain_match (const char *cookie_domain, const char *host)
 {
+
+#ifdef HAVE_LIBPSL
   DEBUGP (("cdm: 1"));
+  const psl_ctx_t *psl;
+  int is_acceptable;
+
+  if (!(psl = psl_builtin()))
+    {
+      DEBUGP (("\nlibpsl not built with a public suffix list. "
+               "Falling back to simple heuristics.\n"));
+      goto no_psl;
+    }
+
+  is_acceptable = psl_is_cookie_domain_acceptable (psl, host, cookie_domain);
+  return true ? (is_acceptable == 1) : false;
 
-  /* Numeric address requires exact match.  It also requires HOST to
-     be an IP address.  */
-  if (numeric_address_p (cookie_domain))
-    return 0 == strcmp (cookie_domain, host);
+no_psl:
+#endif
 
-  DEBUGP ((" 2"));
+  /* For efficiency make some elementary checks first */
+  DEBUGP (("cdm: 2"));
 
   /* For the sake of efficiency, check for exact match first. */
   if (0 == strcasecmp (cookie_domain, host))
-- 
1.9.3

Reply via email to