jberry 2002/08/21 09:55:05
Modified: c/src/xercesc/util/Transcoders/MacOSUnicodeConverter
MacOSUnicodeConverter.cpp
Log:
Better unicode character classification and upper/lower case support under Carbon
APIs
Revision Changes Path
1.5 +53 -54
xml-xerces/c/src/xercesc/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.cpp
Index: MacOSUnicodeConverter.cpp
===================================================================
RCS file:
/home/cvs/xml-xerces/c/src/xercesc/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- MacOSUnicodeConverter.cpp 12 Jul 2002 16:48:50 -0000 1.4
+++ MacOSUnicodeConverter.cpp 21 Aug 2002 16:55:05 -0000 1.5
@@ -71,9 +71,8 @@
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/Platforms/MacOS/MacOSPlatformUtils.hpp>
-#if defined(XML_METROWERKS) // || (__GNUC__ >= 3)
+#if defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
// Only used under metrowerks.
- // In ProjectBuilder, the system wchar.h header is missing, which causes this
to fail.
#include <cwctype>
#endif
@@ -94,6 +93,8 @@
#include <CodeFragments.h>
#include <UnicodeConverter.h>
#include <UnicodeUtilities.h>
+ #include <CFCharacterSet.h>
+ #include <CFString.h>
#endif
@@ -103,7 +104,7 @@
// TempUniBuf is used for cases where we need a temporary conversion due to
// a mismatch between UniChar (the 16-bit type that the Unicode converter uses)
-// and wchar_t (the type the compiler uses to represent a Unicode character).
+// and XMLCH (the type that Xerces uses to represent a Unicode character).
// In the case of Metrowerks, these are the same size. For ProjectBuilder, they
// used to differ, but they are now the same since XMLCh is now always fixed
// as a 16 bit character, rather than floating with wchar_t as it used to.
@@ -123,7 +124,7 @@
};
// Detect a mismatch in unicode character size.
-const bool kUniSizeMismatch = sizeof(XMLCh) != sizeof(UniChar);
+#define UNICODE_SIZE_MISMATCH (sizeof(XMLCh) != sizeof(UniChar))
// ---------------------------------------------------------------------------
@@ -190,7 +191,7 @@
std::size_t passCnt1;
std::size_t passCnt2;
- if (kUniSizeMismatch)
+ if (UNICODE_SIZE_MISMATCH)
{
passCnt1 = std::min(cnt1, kTempUniBufCount);
passCnt2 = std::min(cnt2, kTempUniBufCount);
@@ -313,7 +314,7 @@
std::size_t passCnt1;
std::size_t passCnt2;
- if (kUniSizeMismatch)
+ if (UNICODE_SIZE_MISMATCH)
{
passCnt1 = std::min(cnt1, kTempUniBufCount);
passCnt2 = std::min(cnt2, kTempUniBufCount);
@@ -392,24 +393,18 @@
bool MacOSUnicodeConverter::isSpace(const XMLCh toCheck) const
{
-#if defined(XML_METROWERKS) // || (__GNUC__ >= 3)
+#if TARGET_API_MAC_CARBON
+
+ // Return true if the specified character is in the set.
+ CFCharacterSetRef wsSet =
CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline);
+ return CFCharacterSetIsCharacterMember(wsSet, toCheck);
+
+#elif defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
+
// Use this if there's a reasonable c library available.
// ProjectBuilder currently has no support for iswspace ;(
return (std::iswspace(toCheck) != 0);
-#elif defined(__APPLE__) || true
- // This looks fairly good, assuming we're on an ascii compiler.
- // We'll use this under ProjectBuilder for now.
- return (toCheck == L' ');
-#elif 0
- // This is okay but probably kind of slow for what we need
- UCCharPropertyValue propValue = 0;
-
- OSStatus status = UCGetCharProperty(
- reinterpret_cast<const UniChar*>(&toCheck),
- 1, // size of
toCheck in UniChars, right? Not bytes???
- kUCCharPropTypeGenlCategory,
- &propValue);
- return (status == noErr) && (propValue == kUCGenlCatSeparatorSpace);
+
#endif
}
@@ -464,24 +459,26 @@
void MacOSUnicodeConverter::upperCase(XMLCh* const toUpperCase) const
{
-#if defined(XML_METROWERKS) // || (__GNUC__ >= 3)
+#if TARGET_API_MAC_CARBON
+
+ // If we're targeting carbon, use the CFString conversion to uppercase
+ CFMutableStringRef cfString = CFStringCreateMutableWithExternalCharactersNoCopy(
+ NULL,
+ (UniChar*)toUpperCase,
+ XMLString::stringLen(toUpperCase),
+ 0,
+ kCFAllocatorNull);
+ CFStringUppercase(cfString, NULL);
+ CFRelease(cfString);
+
+#elif defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
+
// Use this if there's a reasonable c library available.
// Metrowerks does this reasonably
wchar_t c;
for (XMLCh* p = (XMLCh*)toUpperCase; ((c = *p) != 0); )
*p++ = std::towupper(c);
-#elif defined(__APPLE__) || true
- // This might work, assuming we're on an ascii compiler.
- // We'll use this under ProjectBuilder for now.
- // Note that this only handles the ascii portion of the
- // string, leaving all other characters in original case.
- XMLCh c;
- for (XMLCh* p = (XMLCh*)toUpperCase; ((c = *p) != 0); )
- {
- if (c >= 'a' && c <= 'z')
- c += 'A' - 'a';
- *p++ = c;
- }
+
#else
#error Sorry, no support for upperCase
#endif
@@ -490,24 +487,26 @@
void MacOSUnicodeConverter::lowerCase(XMLCh* const toLowerCase) const
{
-#if defined(XML_METROWERKS) // || (__GNUC__ >= 3)
+#if TARGET_API_MAC_CARBON
+
+ // If we're targeting carbon, use the CFString conversion to uppercase
+ CFMutableStringRef cfString = CFStringCreateMutableWithExternalCharactersNoCopy(
+ NULL,
+ (UniChar*)toLowerCase,
+ XMLString::stringLen(toLowerCase),
+ 0,
+ kCFAllocatorNull);
+ CFStringLowercase(cfString, NULL);
+ CFRelease(cfString);
+
+#elif defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
+
// Use this if there's a reasonable c library available.
// Metrowerks does this reasonably
wchar_t c;
for (XMLCh* p = (XMLCh*)toLowerCase; ((c = *p) != 0); )
*p++ = std::towlower(c);
-#elif defined(__APPLE__) || true
- // This might work, assuming we're on an ascii compiler.
- // We'll use this under ProjectBuilder for now.
- // Note that this only handles the ascii portion of the
- // string, leaving all other characters in original case.
- XMLCh c;
- for (XMLCh* p = (XMLCh*)toLowerCase; ((c = *p) != 0); )
- {
- if (c >= 'A' && c <= 'Z')
- c += 'a' - 'A';
- *p++ = c;
- }
+
#else
#error Sorry, no support for lowerCase
#endif
@@ -661,7 +660,7 @@
// XMLCh. We lied about the max buffer length above in
// order to leave room in our output buffer. So we know
// we're in good shape here to just convert in place.
- if (kUniSizeMismatch)
+ if (UNICODE_SIZE_MISMATCH)
CopyUniCharsToXMLChs(reinterpret_cast<UniChar* const>(toFill), toFill,
charsProduced, maxChars);
bytesEaten = bytesConsumed;
@@ -708,7 +707,7 @@
// Setup source buffer as needed to accomodate a unicode
// character size mismatch.
TempUniBuf buf;
- if (kUniSizeMismatch)
+ if (UNICODE_SIZE_MISMATCH)
{
passCnt = std::min(srcCnt, kTempUniBufCount);
passSrc = CopyXMLChsToUniChars(src, buf, passCnt,
kTempUniBufCount);
@@ -947,7 +946,7 @@
// Setup source buffer as needed to accomodate a unicode
// character size mismatch.
TempUniBuf iBuf;
- if (kUniSizeMismatch)
+ if (UNICODE_SIZE_MISMATCH)
{
passCnt = std::min(srcCnt, kTempUniBufCount);
passSrc = CopyXMLChsToUniChars(src, iBuf, passCnt,
kTempUniBufCount);
@@ -1024,7 +1023,7 @@
// Setup source buffer as needed to accomodate a unicode
// character size mismatch.
TempUniBuf iBuf;
- if (kUniSizeMismatch)
+ if (UNICODE_SIZE_MISMATCH)
{
passCnt = std::min(srcCnt, kTempUniBufCount);
passSrc = CopyXMLChsToUniChars(src, iBuf, passCnt,
kTempUniBufCount);
@@ -1221,7 +1220,7 @@
// If we have a size mismatch, then convert from UniChar to
// XMLCh in place within the output buffer.
- if (kUniSizeMismatch)
+ if (UNICODE_SIZE_MISMATCH)
CopyUniCharsToXMLChs(reinterpret_cast<UniChar* const>(toFill), toFill,
charsProduced, maxChars);
// Zero terminate the output string
@@ -1270,7 +1269,7 @@
// Setup source buffer as needed to accomodate a unicode
// character size mismatch.
TempUniBuf buf;
- if (kUniSizeMismatch)
+ if (UNICODE_SIZE_MISMATCH)
{
passCnt = std::min(srcCnt, kTempUniBufCount);
passSrc = CopyXMLChsToUniChars(src, buf, passCnt,
kTempUniBufCount);
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]