According to Joe R. Jah:
> To get a pure relative URL page in order to have the browser prepend the
> rest of the URL you can do:
>
> indexing.conf:
> url_part_aliases: http://users.mysite.it/ *2
>
> searching.conf:
> url_part_aliases: /./ *2
>
> First I tried it with just "/", but if you do it like:
>
> searching.conf:
> url_part_aliases: / *2
>
> You would get a premature end of script error in your server error log.
> There may be a way to patch htlib/HtWordCodec.cc to allow that too. I
> think it expects more than one character;-/
OK, I finally took some time to get to the bottom of this one, which was
a bit of a pain to track down. The problem isn't the HtWordCodec class
itself. It handles single characters just fine. The problem is the way
htsearch 3.1.5 uses the class. 3.2 doesn't suffer from this problem.
The problem is that the URL gets decoded just fine, but then when htsearch
needs to look up the db.docdb record for a given URL, it re-encodes the
URL. This is because in 3.1, the db.docdb is keyed by URL, not by docID
as in 3.2. But, when you re-encode / to *2, all slashes get hit, so it
can't find the matching docdb record. The fix is to keep the encoded
URL for docdb lookups.
Here's the patch for 3.1.5, to fix the problem. NOTE: THIS PATCH IS
INCOMPATIBLE WITH THE dateRange.1 PATCH ON ftp.ccsf.org! See below for
the fix. Apply this patch by cutting it out of this message, saving it
to a file, and using "patch -p0 < your-file".
As always, feedback is welcome. Please let me know if there are any problems
with this code.
-------- 8< -------- (cut here) -------- 8< --------
--- htcommon/DocumentDB.h.refbug Thu Feb 24 20:29:10 2000
+++ htcommon/DocumentDB.h Mon Jul 16 17:12:05 2001
@@ -58,6 +58,7 @@ public:
int Add(DocumentRef &);
DocumentRef *operator [] (char *url);
+ DocumentRef *FindCoded(char *url);
int Exists(char *url);
int Delete(char *url);
--- htcommon/DocumentDB.cc.refbug Thu Feb 24 20:29:10 2000
+++ htcommon/DocumentDB.cc Mon Jul 16 17:12:05 2001
@@ -152,6 +152,24 @@ DocumentRef *DocumentDB::operator [] (ch
//*****************************************************************************
+// DocumentRef *DocumentDB::FindCoded(char *u)
+//
+DocumentRef *DocumentDB::FindCoded(char *u)
+{
+ String data;
+ String url = u;
+
+ if (dbf->Get(url, data) == NOTOK
+ && (! myTryUncoded || dbf->Get(HtURLCodec::instance()->decode(url), data) ==
+NOTOK))
+ return 0;
+
+ DocumentRef *ref = new DocumentRef;
+ ref->Deserialize(data);
+ return ref;
+}
+
+
+//*****************************************************************************
// int DocumentDB::Exists(char *u)
//
int DocumentDB::Exists(char *u)
--- htsearch/Display.cc.refbug Thu Feb 24 20:29:11 2000
+++ htsearch/Display.cc Mon Jul 16 17:14:28 2001
@@ -179,7 +179,7 @@ Display::display(int pageNumber)
{
if (currentMatch >= startAt)
{
- match->setRef(docDB[match->getURL()]);
+ match->setRef(docDB.FindCoded(match->getURL()));
DocumentRef *ref = match->getRef();
if (!ref)
continue; // The document isn't present for some reason
@@ -235,8 +235,9 @@ Display::displayMatch(ResultMatch *match
DocumentRef *ref = match->getRef();
- char *url = match->getURL();
- vars.Add("URL", new String(url));
+ char *coded_url = match->getURL();
+ String url = HtURLCodec::instance()->decode(coded_url);
+ vars.Add("URL", new String(url.get()));
int iA = ref->DocAnchor();
@@ -1029,7 +1030,7 @@ Display::buildMatchList()
thisMatch = new ResultMatch();
- thisMatch->setURL(url);
+ thisMatch->setURL(coded_url);
thisMatch->setRef(NULL);
//
@@ -1056,7 +1057,7 @@ Display::buildMatchList()
if (date_factor != 0.0 || backlink_factor != 0.0 || typ != SortByScore)
{
- DocumentRef *thisRef = docDB[thisMatch->getURL()];
+ DocumentRef *thisRef = docDB.FindCoded(thisMatch->getURL());
if (thisRef) // We better hope it's not null!
{
score += date_factor *
-------- 8< -------- (cut here) -------- 8< --------
If you've already applied the dateRange.1 patch to 3.1.5, use the same
patches above for DocumentDB.h and .cc, but replace the Display.cc patch
above with this one before applying:
-------- 8< -------- (cut here) -------- 8< --------
--- htsearch/Display.cc.refbug Mon Jul 16 17:19:49 2001
+++ htsearch/Display.cc Mon Jul 16 17:42:32 2001
@@ -179,7 +179,7 @@ Display::display(int pageNumber)
{
if (currentMatch >= startAt)
{
- match->setRef(docDB[match->getURL()]);
+ match->setRef(docDB.FindCoded(match->getURL()));
DocumentRef *ref = match->getRef();
if (!ref)
continue; // The document isn't present for some reason
@@ -235,8 +235,9 @@ Display::displayMatch(ResultMatch *match
DocumentRef *ref = match->getRef();
- char *url = match->getURL();
- vars.Add("URL", new String(url));
+ char *coded_url = match->getURL();
+ String url = HtURLCodec::instance()->decode(coded_url);
+ vars.Add("URL", new String(url.get()));
int iA = ref->DocAnchor();
@@ -1220,7 +1221,7 @@ Display::buildMatchList()
thisMatch = new ResultMatch();
- thisMatch->setURL(url);
+ thisMatch->setURL(coded_url);
thisMatch->setRef(NULL);
//
@@ -1249,19 +1250,19 @@ Display::buildMatchList()
if (date_factor != 0.0 || backlink_factor != 0.0 || typ != SortByScore
|| timet_startdate > 0 || enddate.tm_year < endoftime->tm_year)
{
- DocumentRef *thisRef = docDB[thisMatch->getURL()];
-
- // code added by Mike Grommet for date search ranges
- // check for valid date range. toss it out if it isn't relevant.
- if(thisRef->DocTime() < timet_startdate || thisRef->DocTime() >
timet_enddate)
- {
- delete thisMatch;
- delete thisRef;
- continue;
- }
-
+ DocumentRef *thisRef = docDB.FindCoded(thisMatch->getURL());
if (thisRef) // We better hope it's not null!
{
+ // code added by Mike Grommet for date search ranges
+ // check for valid date range. toss it out if it isn't relevant.
+ if(thisRef->DocTime() < timet_startdate ||
+ thisRef->DocTime() > timet_enddate)
+ {
+ delete thisMatch;
+ delete thisRef;
+ continue;
+ }
+
score += date_factor *
((thisRef->DocTime() * 1000 / (double)time(0)) - 900);
int links = thisRef->DocLinks();
-------- 8< -------- (cut here) -------- 8< --------
--
Gilles R. Detillieux E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba Phone: (204)789-3766
Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930
_______________________________________________
htdig-general mailing list <[EMAIL PROTECTED]>
To unsubscribe, send a message to <[EMAIL PROTECTED]> with a
subject of unsubscribe
FAQ: http://htdig.sourceforge.net/FAQ.html