I am working on a mobile internet browser that has to deal with entities. I
use the following functions at the bottom to lookup an entity and convert it
to the page's encoding and put it in an output string so that I can later
convert from the page's encoding to the device's encoding. This works fine
when the page's encoding is UTF-8 (or the like), but fails when the page's
encoding is ISO-8859-1.
I think it might have to do with the fact that TxtConvertEncoding only
really works with device's endcoding <-> UTF-8, UCS*. It looks as if this is
the case because I get an error: txtErrUnknownEncoding.
Is there an alternate method that I could use for converting the
"conversion" ptr (see below) to ISO-8859-1?
The code I use:
I take the entity code, such as "&" and convert it to an ascii
equivalent using a lookup table, like the fragment below:
typedef struct
{
char *tag;
char *conversion;
}TagEntry;
static TagEntry tag_list[] =
{
{ "&", "&"},
{ "&apos", "\'"},
{ "&rsquo", "\'"},
{ "&ast", "\x2a"},
{ "&bull", "." }, //"\x2022" },
{ "¢", "\xa2" },
{ "£", "\xa3" },
{ "¤", "\xa4" },
{ "¥", "\xa5" },
{ "¦", "\xa6" },
{ "§", "\xa7" },
{ "¨", "\xa8" },
{ "©", "\xa9" },
{ "ª", "\xaa" },
{ "«", "\xab" },
{ "¬", "\xac" },
{ "­", "\xad" },
{ "¯", "\xaf" },
{ "°", "\xb0" },
...
}
I then take the "conversion" and pass it as sourceStr.
Char *ConvertLatinEntityToCharset(Char *sourceStr)
{
char dchr[maxCharBytes]; // char in device encoding
char uchr[maxCharBytes+1]; // char in unicode (UTF8)
UInt16 dBytes;
UInt16 uBytes = maxCharBytes;
Char *result = NULL;
GlobalsType *g;
Err err;
FtrGet(wuAppType, ftrGlobals, (UInt32 *)&g);
dBytes = TxtSetNextChar(dchr, 0, sourceStr[0]);
switch (g->charSet) {
case UTF_8:
err = TxtConvertEncoding(true, NULL, dchr, &dBytes,
charEncodingPalmLatin,
uchr, &uBytes, charEncodingUTF8, "?", 1);
break;
/*case US_ASCII:
err = TxtConvertEncoding(true, NULL, dchr, &dBytes,
charEncodingPalmLatin,
uchr, &uBytes, charEncodingAscii, "?", 1);
break;*/
// For some reason this is having issues
case ISO_8859_1:
err = TxtConvertEncoding(true, NULL, dchr, &dBytes,
charEncodingPalmLatin,
uchr, &uBytes, charEncodingISO8859_1, "?", 1);
break;
case USC_2:
err = TxtConvertEncoding(true, NULL, dchr, &dBytes,
charEncodingPalmLatin,
uchr, &uBytes, charEncodingUCS2, "?", 1);
break;
default:
err = TxtConvertEncoding(true, NULL, dchr, &dBytes,
charEncodingPalmLatin,
uchr, &uBytes, charEncodingPalmLatin, "?", 1);
break;
}
uchr[uBytes] = 0;
if (uchr[0] == NULL)
return NULL;
result = Malloc(uBytes + 1);
StrCopy(result, uchr);
return result;
}
This converts the string produced after the entitiy conversion to the
device's encoding:
Char *ConvertFromTo(CharEncodingType srcEncoding, Char *sourceStr, Char
*destStr)
{
Err err;
UInt16 ioDstBytes = StrLen(sourceStr);
UInt16 ioSrcBytes = StrLen(sourceStr);
UInt32 encoding;
FtrGet(sysFtrCreator, sysFtrNumEncoding, &encoding); // or
charEncodingPalmLatin???
err = TxtConvertEncoding (true, NULL, sourceStr, &ioSrcBytes,
srcEncoding, destStr, &ioDstBytes, (CharEncodingType)encoding, "?", 1);
if (ioDstBytes < ioSrcBytes) destStr[ioDstBytes] = 0; // string was
transformed and will be shorter...
return (err == errNone) ? destStr : sourceStr; //if sourceStr returned than
conversion failed
}
Thanks,
Donald
--
For information on using the PalmSource Developer Forums, or to unsubscribe,
please see http://www.palmos.com/dev/support/forums/