Hi,

I've attached 3 patches to this mail. The first one eliminates some of
the error codes in Wget as a first step to cleaning up the error
reporting method.

The second is siimply fixing indentation issues.

The third updates our usage of libpsl according to the latest release
version and converts all domain names to lowercase before passing them
to libpsl for checking.

-- 
Thanking You,
Darshit Shah
From 64b57d26b6283fe1039999ad3aabbee8dbaa4c97 Mon Sep 17 00:00:00 2001
From: Darshit Shah <[email protected]>
Date: Sat, 5 Jul 2014 12:08:09 +0530
Subject: [PATCH 3/3] Convert domains to lowercase before libpsl checks

---
 src/ChangeLog |  5 +++++
 src/cookies.c | 26 ++++++++++++++++++++++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/ChangeLog b/src/ChangeLog
index 5306cb2..6360303 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,5 +1,10 @@
 2014-07-05  Darshit Shah  <[email protected]>
 
+	* cookies.c (check_domain_match): Libpsl requires that all domain names
+	passed to it be in utf8 lower case.
+
+2014-07-05  Darshit Shah  <[email protected]>
+
 	* http.c (gethttp): Fix indentation of conditional block
 	(gethttp): Remove unneeded variable
 
diff --git a/src/cookies.c b/src/cookies.c
index a46aeee..b478060 100644
--- a/src/cookies.c
+++ b/src/cookies.c
@@ -501,7 +501,17 @@ numeric_address_p (const char *addr)
 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
    Originally I tried to make the check compliant with rfc2109, but
    the sites deviated too often, so I had to fall back to "tail
-   matching", as defined by the original Netscape's cookie spec.  */
+   matching", as defined by the original Netscape's cookie spec.
+
+   Wget now uses libpsl to check domain names against a public suffix
+   list to see if they are valid. However, since we don't provide a
+   psl on our own, if libpsl is compiled without a public suffix list,
+   fall back to using the original "tail matching" heuristic. Also if
+   libpsl is unable to convert the domain to lowercase, which means that
+   it doesnt have any runtime conversion support, we again fall back to
+   "tail matching" since libpsl states the results are unpredictable with
+   upper case strings.
+   */
 
 static bool
 check_domain_match (const char *cookie_domain, const char *host)
@@ -509,6 +519,7 @@ check_domain_match (const char *cookie_domain, const char *host)
 
 #ifdef HAVE_LIBPSL
   DEBUGP (("cdm: 1"));
+  char * cookie_domain_lower, * host_lower;
   const psl_ctx_t *psl;
   int is_acceptable;
 
@@ -519,7 +530,18 @@ check_domain_match (const char *cookie_domain, const char *host)
       goto no_psl;
     }
 
-  is_acceptable = psl_is_cookie_domain_acceptable (psl, host, cookie_domain);
+  if (psl_str_to_utf8lower (cookie_domain, NULL, NULL, &cookie_domain_lower) != PSL_SUCCESS ||
+      psl_str_to_utf8lower (host, NULL, NULL, &host_lower) != PSL_SUCCESS)
+    {
+        DEBUGP (("libpsl unable to parse domain name. "
+                 "Falling back to simple heuristics.\n"));
+        goto no_psl;
+    }
+
+  is_acceptable = psl_is_cookie_domain_acceptable (psl, host_lower, cookie_domain_lower);
+  xfree (cookie_domain_lower);
+  xfree (host_lower);
+
   return true ? (is_acceptable == 1) : false;
 
 no_psl:
-- 
2.0.1

From eccb40cac9aa5a446a2b6c1eb61710930223106b Mon Sep 17 00:00:00 2001
From: Darshit Shah <[email protected]>
Date: Sat, 5 Jul 2014 11:54:46 +0530
Subject: [PATCH 2/3] Fix indentation and remove excess variable

---
 src/ChangeLog |  5 +++++
 src/http.c    | 21 ++++++++++-----------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/ChangeLog b/src/ChangeLog
index 7962213..5306cb2 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2014-07-05  Darshit Shah  <[email protected]>
+
+	* http.c (gethttp): Fix indentation of conditional block
+	(gethttp): Remove unneeded variable
+
 2014-07-03  Darshit Shah  <[email protected]>
 
 	* wget.h (uerr_t): Remove unused error codes
diff --git a/src/http.c b/src/http.c
index 4fac3ba..e3c105f 100644
--- a/src/http.c
+++ b/src/http.c
@@ -2902,19 +2902,18 @@ read_header:
         }
       else if (ALLOW_CLOBBER || count > 0)
         {
-      if (opt.unlink && file_exists_p (hs->local_file))
-        {
-          int res = unlink (hs->local_file);
-          if (res < 0)
+          if (opt.unlink && file_exists_p (hs->local_file))
             {
-              logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file,
-                         strerror (errno));
-              CLOSE_INVALIDATE (sock);
-              xfree (head);
-              xfree_null (type);
-              return UNLINKERR;
+              if (unlink (hs->local_file) < 0)
+                {
+                  logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file,
+                             strerror (errno));
+                  CLOSE_INVALIDATE (sock);
+                  xfree (head);
+                  xfree_null (type);
+                  return UNLINKERR;
+                }
             }
-        }
 
 #ifdef __VMS
           int open_id;
-- 
2.0.1

From 91fe00af91825bd80f4b300ba45d5874c3c79a46 Mon Sep 17 00:00:00 2001
From: Darshit Shah <[email protected]>
Date: Thu, 3 Jul 2014 20:53:33 +0530
Subject: [PATCH 1/3] Remove usunused error codes

---
 src/ChangeLog |  5 +++++
 src/http.c    |  2 +-
 src/wget.h    | 14 ++++++--------
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/ChangeLog b/src/ChangeLog
index d19dc8d..7962213 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2014-07-03  Darshit Shah  <[email protected]>
+
+	* wget.h (uerr_t): Remove unused error codes
+	* http.c: (http_loop): Remove reference to unused error code
+
 2014-06-30  Giuseppe Scrivano  <[email protected]>
 
 	* convert.c (local_quote_string): Initialize newname.
diff --git a/src/http.c b/src/http.c
index 383e9f3..4fac3ba 100644
--- a/src/http.c
+++ b/src/http.c
@@ -3169,7 +3169,7 @@ Spider mode enabled. Check if remote file exists.\n"));
 
       switch (err)
         {
-        case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
+        case HERR: case HEOF: case CONSOCKERR:
         case CONERROR: case READERR: case WRITEFAILED:
         case RANGEERR: case FOPEN_EXCL_ERR:
           /* Non-fatal errors continue executing the loop, which will
diff --git a/src/wget.h b/src/wget.h
index 331e8e2..3b6ff86 100644
--- a/src/wget.h
+++ b/src/wget.h
@@ -334,21 +334,19 @@ typedef enum
 {
   /*  0  */
   NOCONERROR, HOSTERR, CONSOCKERR, CONERROR, CONSSLERR,
-  CONIMPOSSIBLE, NEWLOCATION, NOTENOUGHMEM /* ! */,
-  CONPORTERR /* ! */, CONCLOSED /* ! */,
+  CONIMPOSSIBLE, NEWLOCATION,
   /* 10  */
   FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR, FTPSYSERR,
-  FTPNSFOD, FTPRETROK /* ! */, FTPUNKNOWNTYPE, FTPRERR, FTPREXC /* ! */,
+  FTPNSFOD, FTPUNKNOWNTYPE, FTPRERR,
   /* 20  */
   FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR, FOPENERR,
-  FOPEN_EXCL_ERR, FWRITEERR, HOK /* ! */, HLEXC /* ! */, HEOF,
+  FOPEN_EXCL_ERR, FWRITEERR, HEOF,
   /* 30  */
-  HERR, RETROK, RECLEVELEXC, FTPACCDENIED /* ! */, WRONGCODE,
+  HERR, RETROK, RECLEVELEXC, WRONGCODE,
   FTPINVPASV, FTPNOPASV, CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED,
   /* 40  */
-  READERR, TRYLIMEXC, URLBADPATTERN /* ! */, FILEBADFILE /* ! */, RANGEERR,
-  RETRBADPATTERN, RETNOTSUP /* ! */, ROBOTSOK /* ! */, NOROBOTS /* ! */,
-  PROXERR,
+  READERR, TRYLIMEXC, FILEBADFILE, RANGEERR,
+  RETRBADPATTERN, PROXERR,
   /* 50  */
   AUTHFAILED, QUOTEXC, WRITEFAILED, SSLINITFAILED, VERIFCERTERR,
   UNLINKERR, NEWLOCATION_KEEP_POST, CLOSEFAILED, ATTRMISSING, UNKNOWNATTR,
-- 
2.0.1

Reply via email to