Here is a patch that fixes smartquotes problem and contains a workaround for
crashes when iconv(cd,0,0,0,0) is called.
As for smartquotes problem: analysis is very funny. (Following text applies
to ISO-8859-1 encoding, not any cyrillic one).
First, there are two sets of unicode symbols that are both "nice quotes":
(listed only left ones)
0xab:LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (aka "LEFT POINTING GUILLEMET")
0x201c:LEFT DOUBLE QUOTATION MARK (aka "DOUBLE TURNED COMMA QUOTATION MARK")
Counterparts are 0xbb and 0x201d
From the names, it's obvious that the symbol in fonts shipped with AW ('<<')
is better characterised as "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK"
rather than "DOUBLE TURNED COMMA QUOTATION MARK").
While 0xab (0xbb) are present directly in charsets ISO8859-1 and cp1252
(windows encoding for latin1), 0x201c (0x201d) is not there, exactly:
1) libiconv's iconv maps 0x201c to '"'
2) glibc's iconv can't map 0x201c to anything.
State after that patch:
In first case (linking with libiconv) remapping to "nice" glyphs works
only when printing, the plain quotes are shown on the screen.
In second case, modifying remapGlyphsDefault (included in patch) makes "nice"
quotes to appear on screen and in printed version.
I think that we should try to use 0xab and 0xbb instead of 0x201c/0x201d as
smartquotes. Does anyone knows why it wasn't chosen from first time? If there
was no reasonable reason for such choice, it may be wise idea to (try to)
switch to 0xab/0xbb as smartquotes.
What do you think about all this?
Best regards,
-Vlad
diff -ru abi-0.7.12pre-orig/abi-0.7.11/CREDITS.TXT
abi-0.7.12pre-cur/abi-0.7.11/CREDITS.TXT
--- abi-0.7.12pre-orig/abi-0.7.11/CREDITS.TXT Wed Nov 15 16:18:14 2000
+++ abi-0.7.12pre-cur/abi-0.7.11/CREDITS.TXT Wed Nov 15 20:53:52 2000
@@ -144,7 +144,7 @@
Javier Y��ez <[EMAIL PROTECTED]> Spanish
hj <[EMAIL PROTECTED]> Chinese (zh-CN)
John Profic <[EMAIL PROTECTED]> Russian
-Vadim Frolov <[EMAIL PROTECTED]> Ukranian (uk-UA)
+Vadim Frolov <[EMAIL PROTECTED]> Ukrainian (uk-UA)
unsung heroes
-------------
Only in abi-0.7.12pre-cur/abi-0.7.11/src: Untitled1.bak
diff -ru abi-0.7.12pre-orig/abi-0.7.11/src/af/gr/unix/gr_UnixGraphics.cpp
abi-0.7.12pre-cur/abi-0.7.11/src/af/gr/unix/gr_UnixGraphics.cpp
--- abi-0.7.12pre-orig/abi-0.7.11/src/af/gr/unix/gr_UnixGraphics.cpp Wed Nov 15
16:18:15 2000
+++ abi-0.7.12pre-cur/abi-0.7.11/src/af/gr/unix/gr_UnixGraphics.cpp Wed Nov 15
+18:44:56 2000
@@ -99,6 +99,7 @@
static UT_Wctomb* w = NULL;
static char text[MB_LEN_MAX];
static int text_length;
+static UT_Bool fallback_used;
#define WCTOMB_DECLS \
if (!w) { \
@@ -111,8 +112,14 @@
/* this branch is to allow Lists to function */ \
text[0] = (unsigned char)c; \
text_length = 1; \
- } else \
- w->wctomb_or_fallback(text,text_length,(wchar_t)c); \
+ fallback_used = 0; \
+ } else {\
+ fallback_used = 0; \
+ if (!w->wctomb(text,text_length,(wchar_t)c)) { \
+ w->wctomb_or_fallback(text,text_length,(wchar_t)c); \
+ fallback_used = 1; \
+ } \
+ }
// HACK: I need more speed
void GR_UnixGraphics::drawChar(UT_UCSChar Char, UT_sint32 xoff, UT_sint32 yoff)
@@ -197,6 +204,8 @@
UT_UCSChar Wide_char = c;
WCTOMB_DECLS;
CONVERT_TO_MBS(Wide_char);
+ if (fallback_used)
+ return 0;
GdkFont *font = m_pFont->getGdkFontForUCSChar(Wide_char);
return gdk_text_width(font, text, text_length);
diff -ru abi-0.7.12pre-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp
abi-0.7.12pre-cur/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp
--- abi-0.7.12pre-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp Wed Nov 15 16:18:15
2000
+++ abi-0.7.12pre-cur/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp Wed Nov 15 19:20:49
+2000
@@ -215,7 +215,7 @@
void UT_Mbtowc::initialize()
{
- iconv(cd,NULL,NULL,NULL,NULL);
+ UT_iconv_reset(cd);
m_bufLen = 0;
};
diff -ru abi-0.7.12pre-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp
abi-0.7.12pre-cur/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp
--- abi-0.7.12pre-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp Wed Nov 15 16:18:15
2000
+++ abi-0.7.12pre-cur/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp Wed Nov 15 19:20:51
+2000
@@ -155,7 +155,7 @@
#include "xap_EncodingManager.h"
void UT_Wctomb::initialize()
{
- iconv(cd,NULL,NULL,NULL,NULL);
+ UT_iconv_reset(cd);
};
void UT_Wctomb::setOutCharset(const char* charset)
diff -ru abi-0.7.12pre-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp
abi-0.7.12pre-cur/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp
--- abi-0.7.12pre-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Wed Nov 15
16:18:16 2000
+++ abi-0.7.12pre-cur/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Wed Nov 15
+17:43:47 2000
@@ -239,7 +239,7 @@
}
size_t donecnt = iconv(iconv_handle,const_cast<ICONV_CONST
char**>(&iptr),&ibuflen,&optr,&obuflen);
/* reset state */
- iconv(iconv_handle,NULL,NULL,NULL,NULL);
+ UT_iconv_reset(iconv_handle);
if (donecnt!=(size_t)-1 && ibuflen==0)
{
int len = sizeof(obuf) - obuflen;
@@ -942,3 +942,10 @@
{
return XAP_EncodingManager::instance->getLanguageISOName();
}
+
+extern "C"
+void UT_iconv_reset(iconv_t cd)
+{
+ if (XAP_EncodingManager::instance->cjk_locale())
+ iconv(cd,const_cast<ICONV_CONST char**>((char**)NULL),NULL,NULL,NULL);
+};
\ No newline at end of file
diff -ru abi-0.7.12pre-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.h
abi-0.7.12pre-cur/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.h
--- abi-0.7.12pre-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.h Wed Nov 15
16:18:16 2000
+++ abi-0.7.12pre-cur/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.h Wed Nov 15
+17:42:55 2000
@@ -30,6 +30,10 @@
#include "xmlparse.h"
#endif
+extern "C" {
+#include "iconv.h"
+}
+
struct XAP_LangInfo
{
/*no memeber can have NULL value. If string is empty, then value is
@@ -272,6 +276,12 @@
extern "C" {
extern int XAP_EncodingManager__swap_stou,XAP_EncodingManager__swap_utos;
const char * xap_encoding_manager_get_language_iso_name(void);
+
+/*
+ Use this instead of iconv(cd,NULL,NULL,NULL,NULL) since iconv in glibc-2.[01]
+ will crash otherwise.
+*/
+void UT_iconv_reset(iconv_t cd);
}
#endif /* XAP_APP_H */
diff -ru abi-0.7.12pre-orig/abi-0.7.11/src/af/xap/xp/xap_Prefs_SchemeIds.h
abi-0.7.12pre-cur/abi-0.7.11/src/af/xap/xp/xap_Prefs_SchemeIds.h
--- abi-0.7.12pre-orig/abi-0.7.11/src/af/xap/xp/xap_Prefs_SchemeIds.h Sun Aug 6
03:53:23 2000
+++ abi-0.7.12pre-cur/abi-0.7.11/src/af/xap/xp/xap_Prefs_SchemeIds.h Wed Nov 15
+20:27:30 2000
@@ -60,7 +60,11 @@
#define XAP_PREF_KEY_RemapGlyphsTable "RemapGlyphsTable"
/* pairwise table of originals and replacements, arbitrarily many pairs */
-#define XAP_PREF_DEFAULT_RemapGlyphsTable
"‘`’'“"”""
+#if 0
+# define XAP_PREF_DEFAULT_RemapGlyphsTable
+"‘`’'“"”""
+#else
+# define XAP_PREF_DEFAULT_RemapGlyphsTable
+"‘`’'“«”»"
+#endif
/* smart quotes */
#define XAP_PREF_KEY_SmartQuotesEnable "SmartQuotesEnable"
diff -ru abi-0.7.12pre-orig/wv/text.c abi-0.7.12pre-cur/wv/text.c
--- abi-0.7.12pre-orig/wv/text.c Wed Nov 15 16:18:06 2000
+++ abi-0.7.12pre-cur/wv/text.c Wed Nov 15 17:40:34 2000
@@ -236,7 +236,7 @@
}
donecnt = wv_iconv(iconv_handle,(&iptr),&ibuflen,&optr,&obuflen);
/* reset state */
- wv_iconv(iconv_handle,NULL,NULL,NULL,NULL);
+ UT_iconv_reset(iconv_handle);
if (donecnt!=(size_t)-1 && ibuflen==0)
{
int len = sizeof(obuf) - obuflen;