Hi,
Thought it might have been fixed in this version, but it wasn't.
When case_sensitive is false in the configuration file, it should
ignore "/Foobar.Html" if it already has visited "/foobar.html".
But this was not the case. Basically, what this patch does,
is if "case_sensitive:0", then lowercase every URL found within
a document.
--Patrick
--- Retriever.cc Wed Sep 22 12:18:40 1999
+++ Retriever.cc.new Wed Sep 29 21:16:41 1999
@@ -984,9 +984,15 @@
{
DocumentRef *ref;
Server *server;
+ String caseurl;
+ String casesens;
+ casesens = config["case_sensitive"];
+ caseurl << url.get();
+ if (strstr(casesens,"0"))
+ caseurl.lowercase();
if (debug > 2)
- cout << "href: " << url.get() << " (" << description << ')' << endl;
+ cout << "href: " << caseurl << " (" << description << ')' << endl;
n_links++;
@@ -1004,7 +1010,7 @@
//
if (debug > 2)
{
- cout << "resolving '" << url.get() << "'\n";
+ cout << "resolving '" << caseurl << "'\n";
cout.flush();
}
@@ -1018,17 +1024,17 @@
// current document is never referenced before, as in a
// start_url.
- if (strcmp(url.get(), current_ref->DocURL()) == 0)
+ if (strcmp(caseurl, current_ref->DocURL()) == 0)
{
current_ref->DocBackLinks(current_ref->DocBackLinks() + 1);
current_ref->AddDescription(description);
}
- else if (limitsn.FindFirst(url.get()) >= 0)
+ else if (limitsn.FindFirst(caseurl) >= 0)
{
//
// First add it to the document database
//
- ref = docs[url.get()];
+ ref = docs[caseurl];
// if ref exists we have to call AddDescription even
// if max_hop_count is reached
if (!ref && currenthopcount + 1 > max_hop_count)
@@ -1045,7 +1051,7 @@
ref->DocHopCount(currenthopcount + 1);
}
ref->DocBackLinks(ref->DocBackLinks() + 1); // This one!
- ref->DocURL(url.get());
+ ref->DocURL(caseurl);
ref->AddDescription(description);
//
@@ -1068,7 +1074,7 @@
//
// Now put it in the list of URLs to still visit.
//
- if (Need2Get(url.get()))
+ if (Need2Get(caseurl))
{
if (debug > 1)
cout << "\n pushing " << url.get() << endl;
@@ -1084,10 +1090,10 @@
//
// Let's just be sure we're not pushing an empty URL
//
- if (strlen(url.get()))
- server->push(url.get(), ref->DocHopCount(), base->get());
+ if (strlen(caseurl))
+ server->push(caseurl, ref->DocHopCount(), base->get());
- String temp = url.get();
+ String temp = caseurl;
visited.Add(temp, 0);
if (debug)
cout << '+';
------------------------------------
To unsubscribe from the htdig3-dev mailing list, send a message to
[EMAIL PROTECTED] containing the single word "unsubscribe" in
the SUBJECT of the message.