According to [EMAIL PROTECTED]:
> I m quite interested in the accent patch By Robert Marchand.
> 
> But i have problem to apply the patch to the 3.1.5 tree.
> 
> I had to apply it by hand. I reindex my base and add the accents
> clause in the fuzzy line in the htdig.conf.

Robert didn't use diff -c or diff -u to make the patch, and didn't
include the paths to the files, so it doesn't readily apply on its
own, leaving you little choice but to apply it by hand.

> When i search my database i got far less answer than without the accent
> patch.

Strange.  I could see how it would have no effect, if it wasn't working,
but I don't know why you'd get fewer results.  Perhaps you broke something
in the code when applying the changes, or you accidentally disabled some
of the other fuzzy algorithms in your search_algorithms attribute.

> I guess that the patch should have worked at least for one person :)
> 
> My question is this one
> how do you apply the patch?
> 
> Thanks in advance for any help

I've just whipped together this patch for 3.1.5, which you should be able
to apply with "patch -p1 < this_file" while in the main source directory.
I should warn you that I haven't tested this under 3.1.5 yet.  I did apply
it to 3.2, with a few necessary changes, and my preliminary tests there
should it worked.  I made one change to Robert's code: when using the
characters as subscripts into the MinusculeISOLAT1 array, it's necessary
to cast them to unsigned char, or this will break on systems where chars
are signed by default.

        ----------------------------------
        diff -c3prN htdig-3.1.5{,.accents}
        ----------------------------------
diff -c3prN htdig-3.1.5/htcommon/defaults.cc htdig-3.1.5.accents/htcommon/defaults.cc
*** htdig-3.1.5/htcommon/defaults.cc    Thu Feb 24 20:29:10 2000
--- htdig-3.1.5.accents/htcommon/defaults.cc    Thu Mar  2 11:20:55 2000
*************** ConfigDefaults  defaults[] =
*** 27,32 ****
--- 27,33 ----
      //
      // General defaults
      //
+     {"accents_db",                    "${database_base}.accents.db"},
      {"add_anchors_to_excerpt",                "true"},
      {"allow_in_form",                 ""},
      {"allow_numbers",                 "false"},
diff -c3prN htdig-3.1.5/htfuzzy/Accents.cc htdig-3.1.5.accents/htfuzzy/Accents.cc
*** htdig-3.1.5/htfuzzy/Accents.cc      Wed Dec 31 18:00:00 1969
--- htdig-3.1.5.accents/htfuzzy/Accents.cc      Thu Mar  2 11:25:42 2000
***************
*** 0 ****
--- 1,173 ----
+ //
+ // Accents.cc
+ //
+ // Implementation of Accents
+ //
+ //
+ //
+ #if RELEASE
+ static char RCSid[] = "$Id: $";
+ #endif
+ 
+ #include "Configuration.h"
+ #include "htconfig.h"
+ #include "Accents.h"
+ #include "Dictionary.h"
+ #include <ctype.h>
+ #include <fstream.h>
+ 
+ extern int debug;
+ 
+ /*---------------------------------------------------------------.
+ | Ajoute par Robert Marchand pour permettre le traitement adequat de |
+ | l'ISO-LATIN         (provient du code de Pierre Rosa)              |
+ `---------------------------------------------------------------*/
+ 
+ /*--------------------------------------------------.
+ | table iso-latin1 "minusculisee" et "de-accentuee" |
+ `--------------------------------------------------*/
+   
+ static char MinusculeISOLAT1[256] = {
+      0,   1,   2,   3,   4,   5,   6,   7,
+      8,   9,  10,  11,  12,  13,  14,  15,
+     16,  17,  18,  19,  20,  21,  22,  23,
+     24,  25,  26,  27,  28,  29,  30,  31,
+     32,  33,  34,  35,  36,  37,  38,  39,
+     40,  41,  42,  43,  44,  45,  46,  47,
+     48,  49,  50,  51,  52,  53,  54,  55,
+     56,  57,  58,  59,  60,  61,  62,  63,
+     64, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+    'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+    'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+    'x', 'y', 'z',  91,  92,  93,  94,  95,
+     96, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+    'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+    'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+    'x', 'y', 'z', 123, 124, 125, 126, 127,
+    128, 129, 130, 131, 132, 133, 134, 135,
+    136, 137, 138, 139, 140, 141, 142, 143,
+    144, 145, 146, 147, 148, 149, 150, 151,
+    152, 153, 154, 155, 156, 157, 158, 159,
+    160, 161, 162, 163, 164, 165, 166, 167,
+    168, 168, 170, 171, 172, 173, 174, 175,
+    176, 177, 178, 179, 180, 181, 182, 183,
+    184, 185, 186, 187, 188, 189, 190, 191,
+    'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+    'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+    208, 'n', 'o', 'o', 'o', 'o', 'o', 'o',
+    'o', 'u', 'u', 'u', 'u', 'y', 222, 223,
+    'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+    'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+    240, 'n', 'o', 'o', 'o', 'o', 'o', 'o',
+    'o', 'u', 'u', 'u', 'u', 'y', 254, 255};
+   
+ 
+ //*****************************************************************************
+ // Accents::Accents()
+ //
+ Accents::Accents()
+ {
+     name = "accents";
+ }
+ 
+ 
+ //*****************************************************************************
+ // Accents::~Accents()
+ //
+ Accents::~Accents()
+ {
+ }
+ 
+ //*****************************************************************************
+ // int Accents::writeDB(Configuration &config)
+ //
+ int
+ Accents::writeDB(Configuration &config)
+ {
+     String      var = name;
+     var << "_db";
+     String      filename = config[var];
+ 
+     index = Database::getDatabaseInstance();
+     if (index->OpenReadWrite(filename, 0664) == NOTOK)
+         return NOTOK;
+ 
+     String      *s;
+     char        *fuzzyKey;
+ 
+     int         count = 0;
+ 
+     dict->Start_Get();
+     while ((fuzzyKey = dict->Get_Next()))
+     {
+         s = (String *) dict->Find(fuzzyKey);
+ 
+         // Only add if meaningfull list
+         if (mystrcasecmp(fuzzyKey, s->get()) != 0) {
+ 
+           index->Put(fuzzyKey, *s);
+ 
+           if (debug > 1)
+             {
+               cout << "htfuzzy: '" << fuzzyKey << "' ==> '" << s->get() << "'\n"
+ ;
+             }
+           count++;
+           if ((count % 100) == 0 && debug == 1)
+             {
+               cout << "htfuzzy: keys: " << count << '\n';
+               cout.flush();
+             }
+         }
+     }
+     if (debug == 1)
+     {
+         cout << "htfuzzy:Total keys: " << count << "\n";
+     }
+     return OK;
+ }
+ 
+ 
+ //*****************************************************************************
+ // void Accents::generateKey(char *word, String &key)
+ //
+ void
+ Accents::generateKey(char *word, String &key)
+ {
+ 
+     if (!word || !*word)
+       return;
+ 
+     key = '0';
+     while (*word) {
+       key << MinusculeISOLAT1[ (unsigned char) *word++ ];
+     }
+ }
+ 
+ 
+ //*****************************************************************************
+ // void Accents::addWord(char *word)
+ //
+ void
+ Accents::addWord(char *word)
+ {
+     if (!dict)
+     {
+         dict = new Dictionary;
+     }
+ 
+     String      key;
+     generateKey(word, key);
+ 
+     String      *s = (String *) dict->Find(key);
+     if (s)
+     {
+       //        if (mystrcasestr(s->get(), word) != 0)
+       (*s) << ' ' << word;
+     }
+     else
+     {
+         dict->Add(key, new String(word));
+     }
+ }
+ 
diff -c3prN htdig-3.1.5/htfuzzy/Accents.h htdig-3.1.5.accents/htfuzzy/Accents.h
*** htdig-3.1.5/htfuzzy/Accents.h       Wed Dec 31 18:00:00 1969
--- htdig-3.1.5.accents/htfuzzy/Accents.h       Thu Mar  2 11:24:56 2000
***************
*** 0 ****
--- 1,30 ----
+ //
+ // Accents.h
+ //
+ // $Id: $
+ //
+ //
+ #ifndef _Accents_h_
+ #define _Accents_h_
+ 
+ #include "Fuzzy.h"
+ 
+ class Accents : public Fuzzy
+ {
+ public:
+         //
+         // Construction/Destruction
+         //
+                                         Accents();
+         virtual                 ~Accents();
+ 
+         virtual int     writeDB(Configuration &config);
+ 
+         virtual void    generateKey(char *word, String &key);
+ 
+         virtual void    addWord(char *word);
+ 
+ private:
+ };
+ 
+ #endif
diff -c3prN htdig-3.1.5/htfuzzy/Fuzzy.cc htdig-3.1.5.accents/htfuzzy/Fuzzy.cc
*** htdig-3.1.5/htfuzzy/Fuzzy.cc        Thu Feb 24 20:29:10 2000
--- htdig-3.1.5.accents/htfuzzy/Fuzzy.cc        Thu Mar  2 11:22:14 2000
*************** static char RCSid[] = "$Id: Fuzzy.cc,v 1
*** 13,18 ****
--- 13,19 ----
  #include "Configuration.h"
  #include "List.h"
  #include "StringList.h"
+ #include "Accents.h"
  #include "Endings.h"
  #include "Exact.h"
  #include "Metaphone.h"
*************** Fuzzy::getFuzzyByName(char *name)
*** 171,176 ****
--- 172,179 ----
        return new Soundex();
      else if (mystrcasecmp(name, "metaphone") == 0)
        return new Metaphone();
+     else if (mystrcasecmp(name, "accents") == 0)
+       return new Accents();
      else if (mystrcasecmp(name, "endings") == 0)
        return new Endings();
      else if (mystrcasecmp(name, "synonyms") == 0)
diff -c3prN htdig-3.1.5/htfuzzy/Makefile.in htdig-3.1.5.accents/htfuzzy/Makefile.in
*** htdig-3.1.5/htfuzzy/Makefile.in     Thu Feb 24 20:29:10 2000
--- htdig-3.1.5.accents/htfuzzy/Makefile.in     Thu Mar  2 11:23:48 2000
*************** include $(top_builddir)/Makefile.config
*** 10,20 ****
  OBJS=         Endings.o EndingsDB.o Exact.o \
                Fuzzy.o Metaphone.o Soundex.o \
                SuffixEntry.o Synonym.o htfuzzy.o \
!               Substring.o Prefix.o
  
  LIBOBJS=      Endings.o Exact.o Fuzzy.o Metaphone.o \
                Soundex.o Synonym.o EndingsDB.o SuffixEntry.o \
!               Substring.o Prefix.o
  
  TARGET=               htfuzzy
  LIBTARGET=    libfuzzy.a
--- 10,20 ----
  OBJS=         Endings.o EndingsDB.o Exact.o \
                Fuzzy.o Metaphone.o Soundex.o \
                SuffixEntry.o Synonym.o htfuzzy.o \
!               Substring.o Prefix.o Accents.o
  
  LIBOBJS=      Endings.o Exact.o Fuzzy.o Metaphone.o \
                Soundex.o Synonym.o EndingsDB.o SuffixEntry.o \
!               Substring.o Prefix.o Accents.o
  
  TARGET=               htfuzzy
  LIBTARGET=    libfuzzy.a
diff -c3prN htdig-3.1.5/htfuzzy/htfuzzy.cc htdig-3.1.5.accents/htfuzzy/htfuzzy.cc
*** htdig-3.1.5/htfuzzy/htfuzzy.cc      Thu Feb 24 20:29:11 2000
--- htdig-3.1.5.accents/htfuzzy/htfuzzy.cc      Thu Mar  2 11:23:12 2000
*************** static char RCSid[] = "$Id: htfuzzy.cc,v
*** 43,48 ****
--- 43,49 ----
  
  #include "htfuzzy.h"
  #include "Fuzzy.h"
+ #include "Accents.h"
  #include "Soundex.h"
  #include "Endings.h"
  #include "Metaphone.h"
*************** main(int ac, char **av)
*** 108,113 ****
--- 109,118 ----
        {
            wordAlgorithms.Add(new Metaphone);
        }
+       else if (mystrcasecmp(av[i], "accents") == 0)
+       {
+           wordAlgorithms.Add(new Accents);
+       }
        else if (mystrcasecmp(av[i], "endings") == 0)
        {
            noWordAlgorithms.Add(new Endings);
*************** usage()
*** 237,242 ****
--- 242,248 ----
      cout << "Supported algorithms:\n";
      cout << "\tsoundex\n";
      cout << "\tmetaphone\n";
+     cout << "\taccents\n";
      cout << "\tendings\n";
      cout << "\tsynonyms\n";
      cout << "\n";


-- 
Gilles R. Detillieux              E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre       WWW:    http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba  Phone:  (204)789-3766
Winnipeg, MB  R3E 3J7  (Canada)   Fax:    (204)789-3930

------------------------------------
To unsubscribe from the htdig mailing list, send a message to
[EMAIL PROTECTED]
You will receive a message to confirm this.

Reply via email to