On Wed, 15 May 2002 18:44:19 +0900, Kiyotaka Doumae <[EMAIL PROTECTED]> wrote:

>We have following HTML document.
>
>https://www.example.com/index.html
>-----
><html>
><body>
><a href="http://www.wget.org/";>Another Website</a>
></body>
></html>
>-----
>
>We run wget with -r option.
>
>> wget -r https://www.example.com/index.html
>
>wget gets http://www.wget.org/ and other url which 
>linked from http://www.wget.org/.

Thanks again for the bug report and the proposed patch.  I thought some
of the scheme tests in recur.c were getting messy, so propose the
following patch that uses a function to check for similar schemes.

The patch incorporates your bug-fix in step 7 of download_child_p() and
makes a similar change in step 4 for consistency.

src/ChangeLog entry:

2002-05-15  Ian Abbott  <[EMAIL PROTECTED]>

        * url.c (schemes_are_similar_p): New function to test enumerated
        scheme codes for similarity.

        * url.h: Declare it.

        * recur.c (download_child_p): Use it to compare schemes.  This
        also fixes a bug that allows hosts to be spanned (without the
        -H option) when the parent scheme is https and the child's is
        http or vice versa.

Index: src/recur.c
===================================================================
RCS file: /pack/anoncvs/wget/src/recur.c,v
retrieving revision 1.48
diff -u -r1.48 recur.c
--- src/recur.c 2002/04/21 04:25:07     1.48
+++ src/recur.c 2002/05/15 13:05:35
@@ -415,6 +415,7 @@
 {
   struct url *u = upos->url;
   const char *url = u->url;
+  int u_scheme_like_http;
 
   DEBUGP (("Deciding whether to enqueue \"%s\".\n", url));
 
@@ -445,12 +446,11 @@
      More time- and memory- consuming tests should be put later on
      the list.  */
 
+  /* Determine whether URL under consideration has a HTTP-like scheme. */
+  u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
+
   /* 1. Schemes other than HTTP are normally not recursed into. */
-  if (u->scheme != SCHEME_HTTP
-#ifdef HAVE_SSL
-      && u->scheme != SCHEME_HTTPS
-#endif
-      && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
+  if (!u_scheme_like_http && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
     {
       DEBUGP (("Not following non-HTTP schemes.\n"));
       goto out;
@@ -458,11 +458,7 @@
 
   /* 2. If it is an absolute link and they are not followed, throw it
      out.  */
-  if (u->scheme == SCHEME_HTTP
-#ifdef HAVE_SSL
-      || u->scheme == SCHEME_HTTPS
-#endif
-      )
+  if (schemes_are_similar_p (u->scheme, SCHEME_HTTP))
     if (opt.relative_only && !upos->link_relative_p)
       {
        DEBUGP (("It doesn't really look like a relative link.\n"));
@@ -483,7 +479,7 @@
      opt.no_parent.  Also ignore it for documents needed to display
      the parent page when in -p mode.  */
   if (opt.no_parent
-      && u->scheme == start_url_parsed->scheme
+      && schemes_are_similar_p (u->scheme, start_url_parsed->scheme)
       && 0 == strcasecmp (u->host, start_url_parsed->host)
       && u->port == start_url_parsed->port
       && !(opt.page_requisites && upos->link_inline_p))
@@ -526,7 +522,7 @@
     }
 
   /* 7. */
-  if (u->scheme == parent->scheme)
+  if (schemes_are_similar_p (u->scheme, parent->scheme))
     if (!opt.spanhost && 0 != strcasecmp (parent->host, u->host))
       {
        DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
@@ -535,13 +531,7 @@
       }
 
   /* 8. */
-  if (opt.use_robots
-      && (u->scheme == SCHEME_HTTP
-#ifdef HAVE_SSL
-         || u->scheme == SCHEME_HTTPS
-#endif
-         )
-      )
+  if (opt.use_robots && u_scheme_like_http)
     {
       struct robot_specs *specs = res_get_specs (u->host, u->port);
       if (!specs)
Index: src/url.c
===================================================================
RCS file: /pack/anoncvs/wget/src/url.c,v
retrieving revision 1.74
diff -u -r1.74 url.c
--- src/url.c   2002/04/13 03:04:47     1.74
+++ src/url.c   2002/05/15 13:05:36
@@ -2472,6 +2472,24 @@
       downloaded_files_hash = NULL;
     }
 }
+
+/* Return non-zero if scheme a is similar to scheme b.
+ 
+   Schemes are similar if they are equal.  If SSL is supported, schemes
+   are also similar if one is http (SCHEME_HTTP) and the other is https
+   (SCHEME_HTTPS).  */
+int
+schemes_are_similar_p (enum url_scheme a, enum url_scheme b)
+{
+  if (a == b)
+    return 1;
+#ifdef HAVE_SSL
+  if ((a == SCHEME_HTTP && b == SCHEME_HTTPS)
+      || (a == SCHEME_HTTPS && b == SCHEME_HTTP))
+    return 1;
+#endif
+  return 0;
+}
 
 #if 0
 /* Debugging and testing support for path_simplify. */
Index: src/url.h
===================================================================
RCS file: /pack/anoncvs/wget/src/url.h,v
retrieving revision 1.23
diff -u -r1.23 url.h
--- src/url.h   2002/04/13 03:04:47     1.23
+++ src/url.h   2002/05/15 13:05:36
@@ -158,4 +158,6 @@
 
 char *rewrite_shorthand_url PARAMS ((const char *));
 
+int schemes_are_similar_p PARAMS ((enum url_scheme a, enum url_scheme b));
+
 #endif /* URL_H */

                

Reply via email to