According to Joe R. Jah:
> You are right; I must have been in a big hurry;) I extracted the fresh
> source, patched and compiled it without a problem, but when I randig I
> got several hundreds of:
>
> DB2 problem...: missing or empty key value specified
>
> and an empty database;( Does it use the same format in the config file
> as the Armstrong's patch?
Yes, but there's a bug in the code. I got the same results. Adding
some debugging prints to htdig/Retriever.cc helped to find the problem.
The URL was getting clobbered down to an empty string, hence the error
message above. The problem was that URL::rewrite() and HtURLRewriter.h
assumed HtRegexList::Replace() returned a String, but in fact it returns
an int and modifies the String argument in place. I fixed these, and it
seems to work like a charm. Here's the updated patch, with the files
I replaced. I also added a url.rewrite() call in Retriever::got_redirect(),
as it seems rewrites should be done there too, and I added in the extra
debugging prints in Retriever.cc.
This patch should apply cleanly to last Sunday's 3.1.6 snapshot, using
"patch -p0 < this-message" in the main source directory.
--- htdig/Retriever.cc.orig Fri Jun 29 11:57:34 2001
+++ htdig/Retriever.cc Wed Sep 26 16:12:24 2001
@@ -1192,6 +1192,7 @@ Retriever::got_href(URL &url, char *desc
//
if (IsValidURL(url.get()))
{
+ String oldurl;
//
// It is valid. Normalize it (resolve cnames for the server)
// and check again...
@@ -1200,9 +1201,16 @@ Retriever::got_href(URL &url, char *desc
{
cout << "resolving '" << url.get() << "'\n";
cout.flush();
+ oldurl = url.get();
}
url.normalize();
+ url.rewrite();
+ if (debug > 2 && strcmp(oldurl.get(), url.get()) != 0)
+ {
+ cout << "normalized/rewritten as '" << url.get() << "'\n";
+ cout.flush();
+ }
// If it is a backlink from the current document,
// just update that field. Writing to the database
@@ -1341,6 +1349,7 @@ Retriever::got_redirect(char *new_url, D
//
if (IsValidURL(url.get()))
{
+ String oldurl;
//
// It is valid. Normalize it (resolve cnames for the server)
// and check again...
@@ -1349,9 +1358,17 @@ Retriever::got_redirect(char *new_url, D
{
cout << "resolving '" << url.get() << "'\n";
cout.flush();
+ oldurl = url.get();
}
url.normalize();
+ url.rewrite();
+ if (debug > 2 && strcmp(oldurl.get(), url.get()) != 0)
+ {
+ cout << "normalized/rewritten as '" << url.get() << "'\n";
+ cout.flush();
+ }
+
if (limitsn.FindFirst(url.get()) >= 0)
{
//
Index: htdig/htdig.cc
===================================================================
RCS file: /cvsroot/htdig/htdig/htdig/htdig.cc,v
retrieving revision 1.3.2.8
diff -c -3 -p -r1.3.2.8 htdig.cc
*** htdig/htdig.cc 2001/07/25 22:49:34 1.3.2.8
--- htdig/htdig.cc 2001/09/24 03:20:37
***************
*** 19,24 ****
--- 19,25 ----
#include "htdig.h"
#include "defaults.h"
#include "HtURLCodec.h"
+ #include "HtURLRewriter.h"
#include "HtWordType.h"
// If we have this, we probably want it.
*************** main(int ac, char **av)
*** 161,166 ****
--- 162,176 ----
if (url_part_errors.length() != 0)
reportError(form("Invalid url_part_aliases or common_url_parts: %s",
url_part_errors.get()));
+
+ //
+ // Check url_rewrite_rules for errors.
+ String url_rewrite_rules = HtURLRewriter::instance()->ErrMsg();
+
+ if (url_rewrite_rules.length() != 0)
+ reportError(form("Invalid url_rewrite_rules: %s",
+ url_rewrite_rules.get()));
+
//
// If indicated, change the database file names to have the .work
Index: htlib/HtRegex.cc
===================================================================
RCS file: HtRegex.cc
diff -N HtRegex.cc
*** /dev/null Thu May 24 22:33:05 2001
--- htlib/HtRegex.cc Sun Sep 23 20:20:37 2001
***************
*** 0 ****
--- 1,105 ----
+ //
+ // HtRegex.cc
+ //
+ // HtRegex: A simple C++ wrapper class for the system regex routines.
+ //
+ // Part of the ht://Dig package <http://www.htdig.org/>
+ // Copyright (c) 1999-2001 The ht://Dig Group
+ // For copyright details, see the file COPYING in your distribution
+ // or the GNU General Public License version 2 or later
+ // <http://www.gnu.org/copyleft/gpl.html>
+ //
+ // $Id: HtRegex.cc,v 1.9.2.8 2001/05/16 16:36:45 ghutchis Exp $
+ //
+
+ #ifdef HAVE_CONFIG_H
+ #include "htconfig.h"
+ #endif /* HAVE_CONFIG_H */
+
+ #include "HtRegex.h"
+ #include <locale.h>
+
+
+ HtRegex::HtRegex() : compiled(0) { }
+
+ HtRegex::HtRegex(const char *str, int case_sensitive) : compiled(0)
+ {
+ set(str, case_sensitive);
+ }
+
+ HtRegex::~HtRegex()
+ {
+ if (compiled != 0) regfree(&re);
+ compiled = 0;
+ }
+
+ const String &HtRegex::lastError()
+ {
+ return lastErrorMessage;
+ }
+
+ int
+ HtRegex::set(const char * str, int case_sensitive)
+ {
+ if (compiled != 0) regfree(&re);
+
+ int err;
+ compiled = 0;
+ if (str == NULL) return 0;
+ if (strlen(str) <= 0) return 0;
+ if (err = regcomp(&re, str, case_sensitive ? REG_EXTENDED :
+(REG_EXTENDED|REG_ICASE)), err == 0)
+ {
+ compiled = 1;
+ }
+ else
+ {
+ size_t len = regerror(err, &re, 0, 0);
+ char *buf = new char[len];
+ regerror(err, &re, buf, len);
+ lastErrorMessage = buf;
+ delete buf;
+ }
+ return compiled;
+ }
+
+ int
+ HtRegex::setEscaped(StringList &list, int case_sensitive)
+ {
+ String *str;
+ String transformedLimits;
+ list.Start_Get();
+ while ((str = (String *) list.Get_Next()))
+ {
+ if (str->indexOf('[') == 0 && str->lastIndexOf(']') == str->length()-1)
+ {
+ transformedLimits << str->sub(1,str->length()-2).get();
+ }
+ else // Backquote any regex special characters
+ {
+ for (int pos = 0; pos < str->length(); pos++)
+ {
+ if (strchr("^.[$()|*+?{\\", str->Nth(pos)))
+ transformedLimits << '\\';
+ transformedLimits << str->Nth(pos);
+ }
+ }
+ transformedLimits << "|";
+ }
+ transformedLimits.chop(1);
+
+ return set(transformedLimits, case_sensitive);
+ }
+
+ int
+ HtRegex::match(const char * str, int nullpattern, int nullstr)
+ {
+ int rval;
+
+ if (compiled == 0) return(nullpattern);
+ if (str == NULL) return(nullstr);
+ if (strlen(str) <= 0) return(nullstr);
+ rval = regexec(&re, str, (size_t) 0, NULL, 0);
+ if (rval == 0) return(1);
+ else return(0);
+ }
+
Index: htlib/HtRegex.h
===================================================================
RCS file: HtRegex.h
diff -N HtRegex.h
*** /dev/null Thu May 24 22:33:05 2001
--- htlib/HtRegex.h Sun Sep 23 20:20:37 2001
***************
*** 0 ****
--- 1,65 ----
+ //
+ // HtRegex.h
+ //
+ // HtRegex: A simple C++ wrapper class for the system regex routines.
+ //
+ // Part of the ht://Dig package <http://www.htdig.org/>
+ // Copyright (c) 1999, 2000 The ht://Dig Group
+ // For copyright details, see the file COPYING in your distribution
+ // or the GNU General Public License version 2 or later
+ // <http://www.gnu.org/copyleft/gpl.html>
+ //
+ // $Id: HtRegex.h,v 1.5.2.4 2000/08/21 02:29:15 ghutchis Exp $
+ //
+ //
+
+ #ifndef _HtRegex_h_
+ #define _HtRegex_h_
+
+ #include "Object.h"
+ #include "StringList.h"
+
+ // This is an attempt to get around compatibility problems
+ // with the included regex
+ #ifdef HAVE_BROKEN_REGEX
+ #include <regex.h>
+ #else
+ #include "regex.h"
+ #endif
+
+ #include <sys/types.h>
+ #include <fstream.h>
+
+ class HtRegex : public Object
+ {
+ public:
+ //
+ // Construction/Destruction
+ //
+ HtRegex();
+ HtRegex(const char *str, int case_sensitive = 0);
+ virtual ~HtRegex();
+
+ //
+ // Methods for setting the pattern
+ //
+ int set(const String& str, int case_sensitive = 0) { return
+set(str.get(), case_sensitive); }
+ int set(const char *str, int case_sensitive = 0);
+ int setEscaped(StringList &list, int case_sensitive = 0);
+
+ virtual const String &lastError(); // returns the last error message
+
+ //
+ // Methods for checking a match
+ //
+ int match(const String& str, int nullmatch, int nullstr) { return
+match(str.get(), nullmatch, nullstr); }
+ int match(const char *str, int nullmatch, int nullstr);
+
+ protected:
+ int compiled;
+ regex_t re;
+
+ String lastErrorMessage;
+ };
+
+ #endif
Index: htlib/HtRegexReplace.cc
===================================================================
RCS file: HtRegexReplace.cc
diff -N HtRegexReplace.cc
*** /dev/null Thu May 24 22:33:05 2001
--- htlib/HtRegexReplace.cc Sun Sep 23 20:20:38 2001
***************
*** 0 ****
--- 1,141 ----
+ //
+ // HtRegexReplace.cc
+ //
+ // HtRegexReplace: A subclass of HtRegex that can perform replacements
+ //
+ // Part of the ht://Dig package <http://www.htdig.org/>
+ // Copyright (c) 2000 The ht://Dig Group
+ // For copyright details, see the file COPYING in your distribution
+ // or the GNU Public License version 2 or later
+ // <http://www.gnu.org/copyleft/gpl.html>
+ //
+ // $Id: HtRegexReplace.cc,v 1.1.2.2 2001/05/16 16:36:45 ghutchis Exp $
+ //
+
+ #include "HtRegexReplace.h"
+ #include <locale.h>
+
+
+ HtRegexReplace::HtRegexReplace()
+ {
+ }
+
+ HtRegexReplace::HtRegexReplace(const char *from, const char *to, int case_sensitive)
+ : HtRegex(from, case_sensitive)
+ {
+ memset(®s, 0, sizeof(regs));
+ repBuf = 0;
+ segSize =
+ segUsed = 0;
+ segMark = 0;
+ repLen = 0;
+
+ setReplace(to);
+ }
+
+ HtRegexReplace::~HtRegexReplace()
+ {
+ empty();
+ }
+
+ int HtRegexReplace::replace(String &str, int nullpattern, int nullstr)
+ {
+ const int regCount = sizeof(regs) / sizeof(regs[0]);
+ if (compiled == 0 || repBuf == 0) return nullpattern;
+ if (str.length() == 0) return nullstr;
+
+ if (regexec(&re, str.get(), regCount, regs, 0) == 0)
+ {
+ // Firstly work out how long the result string will be. We think this
+will be more effecient
+ // than letting the buffer grow in stages as we build the result, but
+who knows?
+ //cout << "!!! Match !!!" << endl;
+ size_t resLen = repLen;
+ int i, reg, repPos;
+ const char *src = str.get();
+
+ for (i = 1; i < (int) segUsed; i += 2)
+ {
+ reg = segMark[i];
+ if (reg < regCount && regs[reg].rm_so != -1)
+ resLen += regs[reg].rm_eo - regs[reg].rm_so;
+ }
+ //cout << "result will be " << resLen << " chars long" << endl;
+ String result(resLen); // Make the result string preallocating the
+buffer size
+ for (i = 0, repPos = 0;; )
+ {
+ //cout << "appending segment " << i << endl;
+ result.append(repBuf + repPos, segMark[i] - repPos);
+ // part of the replace string
+ repPos = segMark[i]; // move forward
+ if (++i == (int) segUsed) break; // was that the last
+segment?
+ reg = segMark[i++]; // get the register
+number
+ if (reg < regCount && regs[reg].rm_so != -1)
+ result.append((char *) src + regs[reg].rm_so,
+regs[reg].rm_eo - regs[reg].rm_so);
+ }
+ str = result;
+ //cout << "return " << result.get() << endl;
+
+ return 1;
+ }
+
+ return 0;
+ }
+
+ // Private: place a mark in the mark buffer growing it if necessary.
+ void HtRegexReplace::putMark(int n)
+ {
+ // assert(segUsed <= segSize);
+ if (segUsed == segSize)
+ {
+ size_t newSize = segSize * 2 + 5; // grow in chunks
+ int *newMark = new int[newSize]; // do we assume that
+new can't fail?
+ memcpy(newMark, segMark, segSize * sizeof(int));
+ delete segMark;
+ segMark = newMark;
+ segSize = newSize;
+ }
+ segMark[segUsed++] = n;
+ }
+
+ void HtRegexReplace::empty()
+ {
+ // Destroy any existing replace pattern
+ delete repBuf; repBuf = 0;
+ segSize = segUsed = 0;
+ delete segMark; segMark = 0;
+ repLen = 0;
+ }
+
+ void HtRegexReplace::setReplace(const char *to)
+ {
+ empty();
+
+ repBuf = new char[strlen(to)]; // replace buffer can never contain
+more text than to string
+ int bufPos = 0; // our position within the output buffer
+
+ while (*to)
+ {
+ if (*to == '\\')
+ {
+ if (*++to == '\0') break;
+ if (*to >= '0' && *to <= '9')
+ {
+ putMark(bufPos);
+ putMark(*to - '0');
+ }
+ else
+ {
+ // We could handle some C style escapes here, but
+instead we just pass the character
+ // after the backslash through. This means that \\, \"
+and \' will do the right thing.
+ // It's unlikely that anyone will need any C style
+escapes in ht://Dig anyway.
+ repBuf[bufPos++] = *to;
+ }
+ to++;
+ }
+ else
+ {
+ repBuf[bufPos++] = *to++;
+ }
+ }
+ putMark(bufPos);
+ repLen = (size_t) bufPos;
+ }
Index: htlib/HtRegexReplace.h
===================================================================
RCS file: HtRegexReplace.h
diff -N HtRegexReplace.h
*** /dev/null Thu May 24 22:33:05 2001
--- htlib/HtRegexReplace.h Sun Sep 23 20:20:38 2001
***************
*** 0 ****
--- 1,58 ----
+ //
+ // HtRegexReplace.h
+ //
+ // HtRegexReplace: A subclass of HtRegex that can perform replacements
+ //
+ // Part of the ht://Dig package <http://www.htdig.org/>
+ // Copyright (c) 2000 The ht://Dig Group
+ // For copyright details, see the file COPYING in your distribution
+ // or the GNU Public License version 2 or later
+ // <http://www.gnu.org/copyleft/gpl.html>
+ //
+ // $Id: HtRegexReplace.h,v 1.1.2.1 2000/08/21 02:33:13 ghutchis Exp $
+ //
+
+ #ifndef _HtRegexReplace_h_
+ #define _HtRegexReplace_h_
+
+ #ifdef HAVE_CONFIG_H
+ #include "htconfig.h"
+ #endif /* HAVE_CONFIG_H */
+
+ #include "HtRegex.h"
+
+ class HtRegexReplace : public HtRegex
+ {
+ public:
+ //
+ // Construction/Destruction
+ //
+ HtRegexReplace();
+ HtRegexReplace(const char *from, const char *to, int case_sensitive = 0);
+ virtual ~HtRegexReplace();
+
+ //
+ // Methods for setting the replacement pattern
+ //
+ void setReplace(const String& str) { setReplace(str.get()); }
+ void setReplace(const char *str);
+
+ //
+ // Methods for replacing
+ //
+ int replace(String &str, int nullpattern = 0, int nullstr = 0);
+
+ protected:
+ char *repBuf; // Replace text.
+ size_t segSize, segUsed;
+ int *segMark;
+ size_t repLen;
+
+ regmatch_t regs[10];
+
+ // Various private methods
+ void putMark(int n);
+ void empty();
+ };
+
+ #endif
Index: htlib/HtRegexReplaceList.cc
===================================================================
RCS file: HtRegexReplaceList.cc
diff -N HtRegexReplaceList.cc
*** /dev/null Thu May 24 22:33:05 2001
--- htlib/HtRegexReplaceList.cc Sun Sep 23 20:20:38 2001
***************
*** 0 ****
--- 1,72 ----
+ //
+ // HtRegexReplaceList.cc
+ //
+ // HtRegexReplaceList: Perform RegexReplace on a list of from/to pairs.
+ // Patterns are applied in order; pattern matching
+ // doesn't stop when a match occurs.
+ //
+ // Part of the ht://Dig package <http://www.htdig.org/>
+ // Copyright (c) 2000-2001 The ht://Dig Group
+ // For copyright details, see the file COPYING in your distribution
+ // or the GNU Public License version 2 or later
+ // <http://www.gnu.org/copyleft/gpl.html>
+ //
+ // $Id: HtRegexReplaceList.cc,v 1.1.2.3 2001/07/06 23:43:12 ghutchis Exp $
+ //
+ //
+
+ #include "HtRegexReplaceList.h"
+ #include <iostream.h>
+
+ HtRegexReplaceList::HtRegexReplaceList(StringList &list, int case_sensitive )
+ {
+ if (list.Count() & 1)
+ {
+ lastErrorMessage = "HtRegexReplaceList needs an even number of
+strings";
+ return;
+ }
+
+ int i;
+ String err;
+
+ for (i = 0; i < list.Count(); i += 2)
+ {
+ String from = list[i];
+ String to = list[i+1];
+ HtRegexReplace *replacer = new HtRegexReplace(from.get(), to.get(),
+case_sensitive);
+ replacers.Add(replacer); // Stash it even if there's an
+error so it will get destroyed later
+ const String &err = replacer->lastError();
+ if (err.length() != 0)
+ {
+ lastErrorMessage = err;
+ return;
+ }
+ }
+ }
+
+ HtRegexReplaceList::~HtRegexReplaceList()
+ {
+ // replacers gets chucked away
+ }
+
+ int HtRegexReplaceList::Replace(String &str, int nullpattern , int nullstr )
+ {
+ int repCount = replacers.Count();
+ int doneCount = 0;
+
+ for (int rep = 0; rep < repCount; rep++)
+ {
+ HtRegexReplace *replacer = (HtRegexReplace *) replacers[rep];
+ if (replacer->replace(str, nullpattern, nullstr) > 0)
+ doneCount++;
+ }
+
+ return doneCount;
+ }
+
+ const String &HtRegexReplaceList::lastError()
+ {
+ return lastErrorMessage;
+ }
+
+ // End of HtRegexReplaceList.cc
Index: htlib/HtRegexReplaceList.h
===================================================================
RCS file: HtRegexReplaceList.h
diff -N HtRegexReplaceList.h
*** /dev/null Thu May 24 22:33:05 2001
--- htlib/HtRegexReplaceList.h Sun Sep 23 20:20:38 2001
***************
*** 0 ****
--- 1,39 ----
+ //
+ // HtRegexReplaceList.h
+ //
+ // HtRegexReplaceList: Perform RegexReplace on a list of from/to pairs.
+ // Patterns are applied in order; pattern matching
+ // doesn't stop when a match occurs.
+ //
+ // Part of the ht://Dig package <http://www.htdig.org/>
+ // Copyright (c) 2000 The ht://Dig Group
+ // For copyright details, see the file COPYING in your distribution
+ // or the GNU Public License version 2 or later
+ // <http://www.gnu.org/copyleft/gpl.html>
+ //
+ // $Id: HtRegexReplaceList.h,v 1.1.2.1 2000/08/21 02:33:13 ghutchis Exp $
+ //
+
+ #ifndef __HtRegexReplaceList_h
+ #define __HtRegexReplaceList_h
+
+ #include "HtRegexReplace.h"
+ #include "List.h"
+ #include "StringList.h"
+
+ class HtRegexReplaceList : public Object
+ {
+ public:
+ // Construct a HtRegexReplaceList. |list| should contain an even
+ // number of strings that constitute from/to pairs.
+ HtRegexReplaceList(StringList &list, int case_sensitive = 0);
+ virtual ~HtRegexReplaceList();
+ int Replace(String &str, int nullpattern = 0, int nullstr = 0);
+ virtual const String &lastError();
+
+ private:
+ List replacers;
+ String lastErrorMessage;
+ };
+
+ #endif /* __HtRegexReplaceList_h */
Index: htlib/HtURLRewriter.cc
===================================================================
RCS file: HtURLRewriter.cc
diff -N HtURLRewriter.cc
*** /dev/null Thu May 24 22:33:05 2001
--- htlib/HtURLRewriter.cc Sun Sep 23 20:20:38 2001
***************
*** 0 ****
--- 1,49 ----
+ //
+ // Methods for HtURLRewriter
+ //
+ // $Id: HtURLRewriter.cc,v 1.0 2000/08/16 14:43:00 aarmstrong Exp $
+ //
+ //
+
+ #include "HtURLRewriter.h"
+ #include "defaults.h" // For "config"
+
+ // Constructor: parses the appropriate parameters using the
+ // encapsulated RegexReplaceList class.
+ // Only used in privacy.
+ HtURLRewriter::HtURLRewriter()
+ {
+ StringList list(config["url_rewrite_rules"], " \t");
+
+ myRegexReplace = new HtRegexReplaceList(list);
+ }
+
+
+ HtURLRewriter::~HtURLRewriter()
+ {
+ delete myRegexReplace;
+ }
+
+ // Supposedly used as HtURLRewriter::instance()->ErrMsg()
+ // to check if RegexReplaceList liked what was fed.
+ const String& HtURLRewriter::ErrMsg()
+ {
+ return myRegexReplace->lastError();
+ }
+
+
+ // Canonical singleton interface.
+ HtURLRewriter *
+ HtURLRewriter::instance()
+ {
+ static HtURLRewriter *_instance = 0;
+
+ if (_instance == 0)
+ {
+ _instance = new HtURLRewriter();
+ }
+
+ return _instance;
+ }
+
+ // End of HtURLRewriter.cc
Index: htlib/HtURLRewriter.h
===================================================================
RCS file: HtURLRewriter.h
diff -N HtURLRewriter.h
*** /dev/null Thu May 24 22:33:05 2001
--- htlib/HtURLRewriter.h Sun Sep 23 20:20:38 2001
***************
*** 0 ****
--- 1,46 ----
+ //
+ // HtURLRewriter
+ //
+ // $Id: HtURLRewriter.h,v 1.0 2000/08/16 14:43:00 aarmstrong Exp $
+ //
+ #ifndef __HtURLRewriter_h
+ #define __HtURLRewriter_h
+
+ #include "HtRegexReplaceList.h"
+
+ // Container for a RegexReplaceList (not subclassed from it due to
+ // portability-problems using initializers).
+ // Not for subclassing.
+ class HtURLRewriter
+ {
+ public:
+ static HtURLRewriter *instance();
+ virtual ~HtURLRewriter();
+
+ inline int Replace(String &source) { return myRegexReplace->Replace(source); }
+
+ // If an error was discovered during the parsing of
+ // config directives, this member gives a
+ // nonempty String with an error message.
+ const String& ErrMsg();
+
+ // egcs-1.1 (and some earlier versions) always erroneously
+ // warns (even without warning flags) about classic singleton
+ // constructs ("only defines private constructors and has no
+ // friends"). Rather than adding autoconf tests to shut these
+ // versions up with -Wno-ctor-dtor-privacy, we fake normal
+ // conformism for it here (the minimal effort).
+ friend void my_friend_Harvey__a_faked_friend_function();
+
+ private:
+ // Hide default-constructor, copy-constructor and assignment
+ // operator, making this a singleton.
+ HtURLRewriter();
+ HtURLRewriter(const HtURLRewriter &);
+ void operator= (const HtURLRewriter &);
+
+ HtRegexReplaceList *myRegexReplace;
+ String myErrMsg;
+ };
+
+ #endif /* __HtURLRewriter_h */
Index: htlib/Makefile.in
===================================================================
RCS file: /cvsroot/htdig/htdig/htlib/Makefile.in,v
retrieving revision 1.13.2.2
diff -c -3 -p -r1.13.2.2 Makefile.in
*** htlib/Makefile.in 1999/03/29 15:53:48 1.13.2.2
--- htlib/Makefile.in 2001/09/24 03:20:38
*************** OBJS= Configuration.o Connection.o Datab
*** 16,22 ****
URL.o URLTrans.o cgi.o \
good_strtok.o io.o strcasecmp.o \
strptime.o mytimegm.o HtCodec.o HtWordCodec.o \
! HtURLCodec.o regex.o HtWordType.o
TARGET= libht.a
--- 16,24 ----
URL.o URLTrans.o cgi.o \
good_strtok.o io.o strcasecmp.o \
strptime.o mytimegm.o HtCodec.o HtWordCodec.o \
! HtURLCodec.o regex.o HtWordType.o \
! HtRegex.o HtRegexReplace.o HtRegexReplaceList.o \
! HtURLRewriter.o
TARGET= libht.a
--- htlib/URL.cc.orig Fri Aug 31 16:07:32 2001
+++ htlib/URL.cc Wed Sep 26 16:43:17 2001
@@ -13,6 +13,7 @@ static char RCSid[] = "$Id: URL.cc,v 1.1
#include "Configuration.h"
#include "StringMatch.h"
#include "StringList.h"
+#include "HtURLRewriter.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
@@ -222,6 +223,13 @@ URL::URL(char *ref, URL &parent)
_url << _path;
}
+void URL::rewrite()
+{
+ String _old = _url.get();
+ HtURLRewriter::instance()->Replace(_url);
+ if (strcmp(_old.get(), _url.get()) != 0)
+ parse(_url.get());
+}
//*****************************************************************************
// void URL::parse(char *u)
Index: htlib/URL.h
===================================================================
RCS file: /cvsroot/htdig/htdig/htlib/Attic/URL.h,v
retrieving revision 1.4.2.1
diff -c -3 -p -r1.4.2.1 URL.h
*** htlib/URL.h 2000/02/16 21:14:59 1.4.2.1
--- htlib/URL.h 2001/09/24 03:20:38
*************** public:
*** 58,63 ****
--- 58,64 ----
char *get() {return _url;}
void dump();
void normalize();
+ void rewrite();
char *signature();
private:
Index: htlib/htString.h
===================================================================
RCS file: /cvsroot/htdig/htdig/htlib/htString.h,v
retrieving revision 1.5.2.1
diff -c -3 -p -r1.5.2.1 htString.h
*** htlib/htString.h 2001/06/07 20:23:59 1.5.2.1
--- htlib/htString.h 2001/09/24 03:20:38
*************** public:
*** 79,84 ****
--- 79,85 ----
// Access to specific characters
//
char &operator [] (int n);
+ char Nth(int n) {return (*this)[n];}
char last();
//
--
Gilles R. Detillieux E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba Phone: (204)789-3766
Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930
_______________________________________________
htdig-dev mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/htdig-dev