Revision: 7195
http://svn.sourceforge.net/mahogany/?rev=7195&view=rev
Author: vadz
Date: 2006-12-25 12:48:14 -0800 (Mon, 25 Dec 2006)
Log Message:
-----------
test if the encoding really works in GuessUnicodeCharset(); also made it public
instead of manually declaring it as extern in BareBonesEditor.cpp
Modified Paths:
--------------
trunk/M/include/strutil.h
trunk/M/src/modules/BareBonesEditor.cpp
trunk/M/src/util/strutil.cpp
Modified: trunk/M/include/strutil.h
===================================================================
--- trunk/M/include/strutil.h 2006-12-25 20:05:39 UTC (rev 7194)
+++ trunk/M/include/strutil.h 2006-12-25 20:48:14 UTC (rev 7195)
@@ -341,6 +341,11 @@
extern wxArrayString strutil_uniq_array(const wxSortedArrayString& arrSorted);
/**
+ Return our guess for the encoding of the given Unicode text.
+ */
+extern wxFontEncoding GuessUnicodeCharset(const wchar_t *pwz);
+
+/**
Try to convert text in UTF-8 or 7 to a multibyte encoding.
The conversion is done in place, i.e. the str parameter is read and written
@@ -357,7 +362,6 @@
@return the encoding of the returned string
*/
extern wxFontEncoding ConvertUTFToMB(wxString *str, wxFontEncoding utfEnc);
-extern wxFontEncoding ConvertUTF8ToMB(wxString *str);
// return the length of the line terminator if we're at the end of line or 0
// otherwise
Modified: trunk/M/src/modules/BareBonesEditor.cpp
===================================================================
--- trunk/M/src/modules/BareBonesEditor.cpp 2006-12-25 20:05:39 UTC (rev
7194)
+++ trunk/M/src/modules/BareBonesEditor.cpp 2006-12-25 20:48:14 UTC (rev
7195)
@@ -1169,9 +1169,6 @@
// BareBonesEditor contents: enumerating the different parts
// ----------------------------------------------------------------------------
-// from strutil.cpp
-extern wxFontEncoding GuessUnicodeCharset(const wchar_t *pwz);
-
EditorContentPart *BareBonesEditor::GetFirstPart()
{
m_getNextAttachement = 0;
Modified: trunk/M/src/util/strutil.cpp
===================================================================
--- trunk/M/src/util/strutil.cpp 2006-12-25 20:05:39 UTC (rev 7194)
+++ trunk/M/src/util/strutil.cpp 2006-12-25 20:48:14 UTC (rev 7195)
@@ -1379,7 +1379,7 @@
}
// guess the charset of the given Unicode text
-extern wxFontEncoding GuessUnicodeCharset(const wchar_t *pwz)
+wxFontEncoding GuessUnicodeCharset(const wchar_t *pwz)
{
typedef const unsigned short *codepage;
struct CodePageInfo
@@ -1459,83 +1459,24 @@
if ( wxFontMapper::Get()->IsEncodingAvailable(encodings[nEnc]) )
{
enc = encodings[nEnc];
- break;
- }
- }
-#ifdef DEBUG_nerijus
- // temporary HACK - Lithuanian input in both Windows and X is
- // ISO-8859-13 (or Windows-1257), not ISO-8859-4
- if ( enc == wxFONTENCODING_ISO8859_4 || enc == wxFONTENCODING_ISO8859_1 ||
enc == wxFONTENCODING_ISO8859_2 ) enc = wxFONTENCODING_ISO8859_13;
-#endif
- return enc;
-}
-// convert a string in UTF-8 or 7 into the string in some multibyte encoding:
-// of course, this doesn't work in general as Unicode is not representable as
-// an 8 bit charset but it works in some common cases and is better than no
-// UTF-8 support at all
-//
-// FIXME this won't be needed when full Unicode support is available
-wxFontEncoding ConvertUTFToMB(wxString *strUtf, wxFontEncoding enc)
-{
-#if !defined __WXGTK20__
- CHECK( strUtf, wxFONTENCODING_SYSTEM, _T("NULL string in ConvertUTFToMB") );
-
- if ( !strUtf->empty() )
- {
- if ( enc == wxFONTENCODING_UTF7 )
- {
- // try to determine which multibyte encoding is best suited for this
- // Unicode string
- wxWCharBuffer wbuf(strUtf->wc_str(wxConvUTF7));
- if ( !wbuf )
- {
- // invalid UTF-7 data, leave it as is
- enc = wxFONTENCODING_SYSTEM;
- }
- else // try to find a multibyte encoding we can show this in
- {
- enc = GuessUnicodeCharset(wbuf);
-
- // finally convert to multibyte
- wxString str;
- if ( enc == wxFONTENCODING_SYSTEM )
- {
- str = wxString(wbuf);
- }
- else
- {
- wxCSConv conv(enc);
- str = wxString(wbuf, conv);
- }
- if ( str.empty() )
- {
- // conversion failed - use original text (and display
incorrectly,
- // unfortunately)
- wxLogDebug(_T("conversion from UTF-7 to default encoding
failed"));
- }
- else
- {
- *strUtf = str;
- }
- }
+ // test if we can convert the entire text using it
+ //
+ // NB: normally we shouldn't do the real conversion here as it is
+ // awfully wasteful but currently it's possible for wxCSConv::
+ // WC2MB() to return successfully when it's used for just testing
+ // and not real conversion under Win32
+ if ( !wxString(pwz, wxCSConv(enc)).empty() )
+ break;
}
- else
- {
- ASSERT_MSG( enc == wxFONTENCODING_UTF8, _T("unknown Unicode
encoding") );
- return ConvertUTF8ToMB(strUtf);
- }
}
- else // doesn't really matter what we return from here
- {
- enc = wxFONTENCODING_SYSTEM;
- }
-#endif // !__WXGTK20__
return enc;
}
-wxFontEncoding ConvertUTF8ToMB(wxString *strUtf)
+#ifndef __WXGTK20__
+
+static wxFontEncoding ConvertToMB(wxString *strUtf, const wxMBConv& conv)
{
wxFontEncoding enc;
CHECK( strUtf, wxFONTENCODING_SYSTEM, _T("NULL string in ConvertUTF8ToMB")
);
@@ -1544,7 +1485,7 @@
{
// try to determine which multibyte encoding is best suited for this
// Unicode string
- wxWCharBuffer wbuf(strUtf->wc_str(wxConvUTF8));
+ wxWCharBuffer wbuf(strUtf->wc_str(conv));
if ( !wbuf )
{
// invalid UTF-8 data, leave it as is
@@ -1565,11 +1506,14 @@
wxCSConv conv(enc);
str = wxString(wbuf, conv);
}
+
if ( str.empty() )
{
// conversion failed - use original text (and display incorrectly,
// unfortunately)
wxLogDebug(_T("conversion from UTF-8 to default encoding failed"));
+
+ enc = wxFONTENCODING_SYSTEM;
}
else
{
@@ -1585,6 +1529,45 @@
return enc;
}
+#endif // __WXGTK20__
+
+// convert a string in UTF-8 or 7 into the string in some multibyte encoding:
+// of course, this doesn't work in general as Unicode is not representable as
+// an 8 bit charset but it works in some common cases and is better than no
+// UTF-8 support at all
+//
+// FIXME this won't be needed when full Unicode support is available
+wxFontEncoding ConvertUTFToMB(wxString *strUtf, wxFontEncoding enc)
+{
+ CHECK( strUtf, wxFONTENCODING_SYSTEM, _T("NULL string in ConvertUTFToMB") );
+
+ wxFontEncoding encConv;
+ if ( !strUtf->empty() )
+ {
+ if ( enc == wxFONTENCODING_UTF7 )
+ {
+ encConv = ConvertToMB(strUtf, wxConvUTF7);
+ }
+ else
+ {
+ ASSERT_MSG( enc == wxFONTENCODING_UTF8, _T("unknown Unicode
encoding") );
+
+ // in GTK+ 2.0 we can use UTF-8 directly
+#ifdef __WXGTK20__
+ encConv = wxFONTENCODING_SYSTEM;
+#else
+ return ConvertToMB(strUtf, wxConvUTF8);
+#endif // !__WXGTK20__
+ }
+ }
+ else // doesn't really matter what we return from here
+ {
+ encConv = wxFONTENCODING_SYSTEM;
+ }
+
+ return encConv;
+}
+
// return the length of the line terminator if we're at the end of line or 0
// otherwise
size_t IsEndOfLine(const wxChar *p)
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Mahogany-cvsupdates mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mahogany-cvsupdates