Hi all.  Geoff & I discussed a problem with htmerge a week and a half ago,
but I never did post a patch for it.  It prevents description text words from
clobbering the anchor number of merged text words after an anchor.

--- htmerge/words.cc.anchorbug  Tue Feb 16 23:03:56 1999
+++ htmerge/words.cc    Thu Mar 18 18:11:52 1999
@@ -208,7 +208,8 @@ mergeWords(char *wordtmp, char *wordfile
                last_wr.weight += wr.weight;
                if (wr.location < last_wr.location)
                  last_wr.location = wr.location;
-               if (wr.anchor < last_wr.anchor)
+               if (wr.anchor > 0 && wr.anchor < last_wr.anchor
+                   || last_wr.anchor == 0)
                  last_wr.anchor = wr.anchor;
                continue;
              }

While I'm at it, here's another patch I've added to docs.cc, so you can
find out why htmerge is deleting some documents from the database, with
-vv.

--- htmerge/docs.cc.quiet       Tue Feb 16 23:03:56 1999
+++ htmerge/docs.cc     Fri Mar 19 08:36:47 1999
@@ -63,16 +63,34 @@ convertDocs(char *doc_db, char *doc_inde
            // For some reason, this document doesn't have an excerpt
            // (probably because of a noindex directive, or disallowed
            // by robots.txt or server_max_docs). Remove it
+           if (verbose > 1)
+           {
+               cout << "htmerge: " << url->get() << " (id " << id
+                    << ") discarded: empty or disallowed, no DocHead" << endl;
+               cout.flush();
+           }
            db.Delete(url->get());
          }
        else if ((ref->DocState()) == Reference_noindex)
          {
            // This document has been marked with a noindex tag. Remove it
+           if (verbose > 1)
+           {
+               cout << "htmerge: " << url->get() << " (id " << id
+                    << ") discarded: noindex meta tag" << endl;
+               cout.flush();
+           }
            db.Delete(url->get());
          }
        else if (remove_unused && discard_list.Exists(id))
          {
            // This document is not valid anymore.  Remove it
+           if (verbose > 1)
+           {
+               cout << "htmerge: " << url->get() << " (id " << id
+                    << ") discarded: gone or modified" << endl;
+               cout.flush();
+           }
            db.Delete(url->get());
          }
        else

-- 
Gilles R. Detillieux              E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre       WWW:    http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba  Phone:  (204)789-3766
Winnipeg, MB  R3E 3J7  (Canada)   Fax:    (204)789-3930
------------------------------------
To unsubscribe from the htdig mailing list, send a message to
[EMAIL PROTECTED] containing the single word "unsubscribe" in
the SUBJECT of the message.

Reply via email to