On 11 Apr 2002 at 21:00, Hrvoje Niksic wrote:
This change is fine with me. I vaguely remember that this test is
performed in two places; you might want to create a function.
I've found three places where it checks the suffix, so I called a
new function in all three places for consistency. One of those
places performed a case-insensitive comparison so I made my
function do that too.
Hrvoje, you may wish to review whether checking the new extensions
in all three places (but particularly recur.c) is a good idea or
not before I commit the patch.
src/ChangeLog entry:
2002-04-12 Ian Abbott [EMAIL PROTECTED]
* utils.c (has_html_suffix_p): New function to text filename for common
html extensions.
* utils.h: Declare it.
* http.c (http_loop): Use it instead of previous test.
* retr.c (retrieve_url): Ditto.
* recur.c (download_child_p): Ditto.
Index: src/http.c
===
RCS file: /pack/anoncvs/wget/src/http.c,v
retrieving revision 1.86
diff -u -r1.86 http.c
--- src/http.c 2002/04/11 17:49:32 1.86
+++ src/http.c 2002/04/12 17:35:02
@@ -1405,7 +1405,7 @@
int use_ts, got_head = 0;/* time-stamping info */
char *filename_plus_orig_suffix;
char *local_filename = NULL;
- char *tms, *suf, *locf, *tmrate;
+ char *tms, *locf, *tmrate;
uerr_t err;
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
long local_size = 0; /* the size of the local file */
@@ -1465,9 +1465,8 @@
*dt |= RETROKF;
/* Bogusness alert. */
- /* If its suffix is html or htm, assume text/html. */
- if (((suf = suffix (*hstat.local_file)) != NULL)
- (!strcmp (suf, html) || !strcmp (suf, htm)))
+ /* If its suffix is html or htm or similar, assume text/html. */
+ if (has_html_suffix_p (*hstat.local_file))
*dt |= TEXTHTML;
FREE_MAYBE (dummy);
Index: src/recur.c
===
RCS file: /pack/anoncvs/wget/src/recur.c,v
retrieving revision 1.43
diff -u -r1.43 recur.c
--- src/recur.c 2002/02/19 06:09:57 1.43
+++ src/recur.c 2002/04/12 17:35:02
@@ -510,7 +510,6 @@
/* 6. */
{
-char *suf;
/* Check for acceptance/rejection rules. We ignore these rules
for HTML documents because they might lead to other files which
need to be downloaded. Of course, we don't know which
@@ -521,14 +520,13 @@
* u-file is not (i.e. it is not a directory)
and either:
+ there is no file suffix,
-+ or there is a suffix, but is not html or htm,
++ or there is a suffix, but is not html or htm or similar,
+ both:
- recursion is not infinite,
- and we are at its very end. */
if (u-file[0] != '\0'
-((suf = suffix (url)) == NULL
- || (0 != strcmp (suf, html) 0 != strcmp (suf, htm))
+(!has_html_suffix_p (url)
|| (opt.reclevel != INFINITE_RECURSION depth = opt.reclevel)))
{
if (!acceptable (u-file))
Index: src/retr.c
===
RCS file: /pack/anoncvs/wget/src/retr.c,v
retrieving revision 1.50
diff -u -r1.50 retr.c
--- src/retr.c 2002/01/30 19:12:20 1.50
+++ src/retr.c 2002/04/12 17:35:03
@@ -384,12 +384,11 @@
/* There is a possibility of having HTTP being redirected to
FTP. In these cases we must decide whether the text is HTML
-according to the suffix. The HTML suffixes are `.html' and
-`.htm', case-insensitive. */
+according to the suffix. The HTML suffixes are `.html',
+`.htm' and a few others, case-insensitive. */
if (redirection_count local_file u-scheme == SCHEME_FTP)
{
- char *suf = suffix (local_file);
- if (suf (!strcasecmp (suf, html) || !strcasecmp (suf, htm)))
+ if (has_html_suffix_p (local_file))
*dt |= TEXTHTML;
}
}
Index: src/utils.c
===
RCS file: /pack/anoncvs/wget/src/utils.c,v
retrieving revision 1.44
diff -u -r1.44 utils.c
--- src/utils.c 2002/01/17 01:03:33 1.44
+++ src/utils.c 2002/04/12 17:35:03
@@ -792,6 +792,30 @@
return NULL;
}
+/* Checks whether a filename is has a typical HTML suffix or not. The
+ following suffixes are presumed to be html files (case insensitive):
+
+ html
+ htm
+ ?html (where ? is any character)
+
+ This is not necessarily a good indication that the file actually contains
+ HTML! */
+int has_html_suffix_p (const char *fname)
+{
+ char *suf;
+
+ if ((suf = suffix (fname)) == NULL)
+return 0;
+ if (!strcasecmp (suf, html))
+return 1;
+ if (!strcasecmp (suf, htm))
+return 1;
+ if (suf[0] !strcasecmp (suf + 1, html))
+return 1;
+ return 0;
+}
+
/* Read a line from FP and return the pointer to