Sorry, I was wrong. You can try cp1252 (Western), 
not cp1250 (Central European).




-----------------------------------------------
This patch fixes that characters like é  
were not properly converted to unicode.

About &8217;  problem. Actually, iso-8859-1 does not
contain this character. This is why it is displayed 
as ? sign. 

There are two possible ways to solve this.


1. Change LocalChaset,BrowserCharset,Charset to cp1250. 
It is almost the same with iso-8859-1 (letters compatible), 
however has some characters which are not represented in
ISO charset, including RIGHT QUOTATION MARK ’

I tested with cp1250 and everything works fine.

Take a look into this screenshot:

 http://gw.udmsearch.izhnet.ru/~bar/snapshot1.png

Both quotation mark and e with acute works fine.


2. The second way is to use UTF-8. However personaly
I prefer single byte charset when real multi-lingual
search is not required.


I future releases we'll try to add transliteration 
support. Those characters like &8217; (when target 
charset does not support them) will be translated 
into something similar when it is possible. 
In particular, &8217; will be translated to apostrophe.
Index: sgml.c
===================================================================
RCS file: /usr/src/CVS/mnogosearch32/src/sgml.c,v
retrieving revision 1.2
diff -u -r1.2 sgml.c
--- sgml.c      2001/05/10 16:40:23     1.2
+++ sgml.c      2001/09/27 11:52:57
@@ -8,7 +8,7 @@
 static const struct udm_sgml_chars
 {
        const char      *sgml;
-       unsigned char   unicode;
+       int             unicode;
 } SGMLChars[] = {
     { "lt",          '<' } ,
     { "gt",          '>' } ,
@@ -115,13 +115,13 @@
     { ""    ,           0}    /* END */
   };
 
-static char UdmSgmlToChar(char *sgml){
+static int UdmSgmlToUni(char *sgml){
        int i;
-       char res;
+       int res;
 
        for(i=0;SGMLChars[i].unicode;i++)
                if(!strncmp(sgml,SGMLChars[i].sgml,strlen(SGMLChars[i].sgml))){
-                       res=(char)SGMLChars[i].unicode;
+                       res=SGMLChars[i].unicode;
                        return(res);
                }
        return(0);
@@ -148,7 +148,7 @@
                                }
                        }else{
                                
for(e=s+1;(e-s<15)&&(((*e<='z')&&(*e>='a'))||((*e<='Z')&&(*e>='A')));e++);
-                               if((*e==';')&&(c=UdmSgmlToChar(s+1))){
+                               if((*e==';')&&(c=(char)UdmSgmlToUni(s+1))){
                                        *s=c;
                                        memmove(s+1,e+1,strlen(e+1)+1);
                                        
@@ -183,7 +183,7 @@
                                        if(*sgml=='#'){
                                                code=atoi(sgml+1);
                                        }else{
-                                               code=UdmSgmlToChar(sgml);
+                                               code=UdmSgmlToUni(sgml);
                                        }
                                        if(code){
                                                *(s-1)=code;

Reply via email to