Author: stefanbidi
Date: Mon Nov 17 21:29:07 2014
New Revision: 38180
URL: http://svn.gna.org/viewcvs/gnustep?rev=38180&view=rev
Log:
* Headers/CoreFoundation/GSCharUtilities.h,
* Headers/CoreFoundation/GSCharacter.h: Rename file.
* Headers/CoreFoundation/GSUnicode.h: Add API for Unicode string.
Not yet part of build and install.
* Source/GSUnicode.c: Update part of the code to use new character
utilities.
* Source/GSUnicode.h: remove redundant functionality. This file
will eventually be completely superceded by GSCharacter.h and GSUnicode.h
Modified:
libs/corebase/trunk/Source/GSUnicode.c
libs/corebase/trunk/Source/GSUnicode.h
Modified: libs/corebase/trunk/Source/GSUnicode.c
URL:
http://svn.gna.org/viewcvs/gnustep/libs/corebase/trunk/Source/GSUnicode.c?rev=38180&r1=38179&r2=38180&view=diff
==============================================================================
--- libs/corebase/trunk/Source/GSUnicode.c (original)
+++ libs/corebase/trunk/Source/GSUnicode.c Mon Nov 17 21:29:07 2014
@@ -29,7 +29,9 @@
#include "CoreFoundation/CFByteOrder.h"
#include "CoreFoundation/CFDictionary.h"
#include "CoreFoundation/CFLocale.h"
+#include "CoreFoundation/CFString.h"
#include "CoreFoundation/CFRuntime.h"
+#include "CoreFoundation/GSCharacter.h"
#include "GSPrivate.h"
#include "GSUnicode.h"
@@ -66,21 +68,22 @@
UTF32Char u;
CFIndex count;
- count = GS_UTF8_BYTE_COUNT (*s);
- if (count == 2 && GS_UTF8_IS_TRAIL (s[1]))
+ count = GSUTF8CharacterCodeUnitCount (*s);
+ if (count == 2 && GSUTF8CharacterIsTrailing (s[1]))
{
u = (s[0] & 0x1F) << 6;
u |= s[1] & 0x3F;
}
- else if (count == 3 && GS_UTF8_IS_TRAIL (s[1])
- && GS_UTF8_IS_TRAIL (s[2]))
+ else if (count == 3 && GSUTF8CharacterIsTrailing (s[1])
+ && GSUTF8CharacterIsTrailing (s[2]))
{
u = (s[0] & 0x0F) << 12;
u |= (s[1] & 0x3F) << 6;
u |= s[2] & 0x3F;
}
- else if (count == 4 && GS_UTF8_IS_TRAIL (s[1])
- && GS_UTF8_IS_TRAIL (s[2]) && GS_UTF8_IS_TRAIL (s[3]))
+ else if (count == 4 && GSUTF8CharacterIsTrailing (s[1])
+ && GSUTF8CharacterIsTrailing (s[2])
+ && GSUTF8CharacterIsTrailing (s[3]))
{
u = (s[0] & 0x07) << 18;
u |= (s[1] & 0x3F) << 12;
@@ -94,7 +97,7 @@
s += count;
if (u < 0x10000)
{
- if (GS_UTF_IS_SURROGATE (u))
+ if (GSCharacterIsSurrogate (u))
break;
if (dlen != 0)
*d = u;
@@ -141,7 +144,7 @@
{
UTF32Char u;
- u = *s++;
+ u = GSUTF16CharacterGet (&s, slimit);
if (u < 0x80)
{
if (dlen != 0)
@@ -152,28 +155,15 @@
{
CFIndex count;
- if (GS_UTF_IS_SURROGATE (u) && GS_UTF_IS_LEAD_SURROGATE (u)
- && slimit - s > 0)
- {
- UTF16Char u16;
-
- u16 = *s++;
- if (GS_UTF_IS_TRAIL_SURROGATE (u16))
- u = GS_UTF16_GET_CHAR (u, u16);
- else if (lossChar)
+ if (u == 0)
+ {
+ if (lossChar)
u = lossChar;
else
break;
}
- else if (lossChar)
- {
- u = lossChar;
- }
- else
- {
- break;
- }
- count = GS_UTF8_LENGTH (u);
+
+ count = GSUTF8CharacterLength (u);
if (count > 4)
break;
if (dlen != 0)
@@ -225,7 +215,7 @@
u = *s++;
if (u < 0x10000)
{
- if (GS_UTF_IS_SURROGATE (u))
+ if (GSCharacterIsSurrogate (u))
break;
if (dlen != 0)
*d = u;
@@ -267,18 +257,14 @@
{
UTF32Char u;
- u = *s++;
- if (GS_UTF_IS_SURROGATE (u))
- {
- UTF16Char u16;
-
- if (slimit - s > 0 || !GS_UTF_IS_LEAD_SURROGATE (u))
- break;
- u16 = *s++;
- if (!GS_UTF_IS_TRAIL_SURROGATE (u))
- break;
- u = GS_UTF16_GET_CHAR (u, u16);
- }
+ u = GSUTF16CharacterGet (&s, slimit);
+ if (u == 0)
+ {
+ if (lossChar)
+ u = lossChar;
+ else
+ break;
+ }
if (dlen != 0)
*d = u;
d++;
@@ -510,9 +496,9 @@
}
CFIndex
-GSFromUnicode (const UniChar * s, CFIndex slen,
- CFStringEncoding enc, UniChar lossChar,
- Boolean isExtRep, UInt8 * d, CFIndex dlen, CFIndex * usedDstLen)
+GSFromUnicode (const UniChar * s, CFIndex slen, CFStringEncoding enc,
+ UniChar lossChar, Boolean isExtRep, UInt8 * d, CFIndex dlen,
+ CFIndex * usedDstLen)
{
CFIndex converted;
@@ -541,15 +527,18 @@
dst = (UniChar *) d;
if (isExtRep && enc == kCFStringEncodingUTF16 && dlen >= 2)
{
- *dst = GS_UTF16_BOM;
- dst++;
+ *dst++ = kGSUTF16CharacterByteOrderMark;
dlen -= 2;
}
copyLength =
(dlen <= slen * sizeof (UniChar)) ? dlen : (slen * sizeof (UniChar));
memcpy (dst, s, copyLength);
- if (enc == GS_UTF16_ENCODING_TO_SWAP)
+#if __BIG_ENDIAN__
+ if (enc == kCFStringEncodingUTF16LE && dlen != 0)
+#else
+ if (enc == kCFStringEncodingUTF16BE && dlen != 0)
+#endif
{
UniChar *end;
@@ -573,7 +562,7 @@
dst = (UTF32Char *) d;
if (isExtRep && enc == kCFStringEncodingUTF32 && dlen >= 4)
{
- *dst++ = GS_UTF32_BOM;
+ *dst++ = kGSUTF32CharacterByteOrderMark;
dlen -= 4;
}
/* round to the nearest multiple of 4 */
@@ -581,7 +570,11 @@
converted =
GSUnicodeToUTF32 (s, slen / sizeof (UTF32Char), lossChar,
(UTF32Char *) d, dlen, usedDstLen);
- if (enc == GS_UTF32_ENCODING_TO_SWAP && dlen != 0)
+#if __BIG_ENDIAN__
+ if (enc == kCFStringEncodingUTF32LE && dlen != 0)
+#else
+ if (enc == kCFStringEncodingUTF32BE && dlen != 0)
+#endif
{
UTF32Char *cur;
UTF32Char *end;
@@ -698,9 +691,8 @@
if (enc == kCFStringEncodingUTF8)
{
- if (isExtRep && slen > 3
- && (s[0] == 0xEF && s[1] == 0xBB && s[2] == 0xBF))
- s += 3;
+ if (isExtRep)
+ GSUTF8CharacterSkipByteOrderMark (&s, s + slen);
converted = GSUnicodeFromUTF8 (s, slen, lossChar, d, dlen, usedDstLen);
}
@@ -717,37 +709,41 @@
if (enc == kCFStringEncodingUTF16)
{
- UniChar bom;
-
- bom = (*src == 0xFEFF || *src == 0xFFFE) ? *src++ : 0;
-#if WORDS_BIGENDIAN
- if (bom == 0xFFFE)
+ if (*src == kGSUTF16CharacterByteOrderMark)
+ {
+ src += 1;
+ }
+ else if (*src == kGSUTF16CharacterSwappedByteOrderMark)
+ {
+ swap = true;
+ src += 1;
+ }
+ }
+#if __BIG_ENDIAN__
+ else if (enc == kCFStringEncodingUTF16LE)
#else
- if (bom == 0xFEFF)
+ else if (enc == kCFStringEncodingUTF16BE)
#endif
- swap = true;
- }
- else if (enc == GS_UTF16_ENCODING_TO_SWAP)
{
swap = true;
}
- if (swap && slen != 0)
- {
- UniChar *cur;
- UniChar *end;
-
- cur = (UniChar *) s;
- end = (UniChar *) (s + slen);
- while (cur < end)
- {
- *cur = CFSwapInt16 (*cur);
- ++cur;
- }
- }
copyLength =
(dlen * sizeof (UniChar) <= slen) ? (dlen * sizeof (UniChar)) : slen;
memcpy (d, s, copyLength);
+ if (swap && slen != 0)
+ {
+ UniChar *cur;
+ UniChar *end;
+
+ cur = d;
+ end = d + (copyLength / sizeof (UniChar));
+ while (cur < end)
+ {
+ *cur = CFSwapInt16 (*cur);
+ cur++;
+ }
+ }
if (usedDstLen)
*usedDstLen = slen;
converted = copyLength / sizeof (UniChar);
@@ -761,24 +757,29 @@
src = (const UTF32Char *) s;
swap = false;
+
if (enc == kCFStringEncodingUTF32)
{
- UTF32Char bom;
-
- bom = (*src == 0x0000FEFF || *src == 0xFFFE0000) ? *src++ : 0;
-#if WORDS_BIGENDIAN
- if (bom == 0xFFFE0000)
+ if (*src == kGSUTF32CharacterByteOrderMark)
+ {
+ src += 1;
+ }
+ else if (*src == kGSUTF32CharacterSwappedByteOrderMark)
+ {
+ swap = true;
+ src += 1;
+ }
+ }
+#if __BIG_ENDIAN__
+ else if (*src == kCFStringEncodingUTF32LE)
#else
- if (bom == 0x0000FEFF)
+ else if (*src == kCFStringEncodingUTF32BE)
#endif
- swap = true;
- }
- else if (enc == GS_UTF32_ENCODING_TO_SWAP)
{
swap = true;
}
/* round to the nearest multiple of 4 */
- slen &= ~0x3;
+ slen &= ~3;
if (swap && slen != 0)
{
UTF32Char *cur;
@@ -937,7 +938,7 @@
FMT_OBJECT, FMT_DOUBLE, FMT_UNKNOWN, FMT_CHARACTER,
FMT_INTEGER, FMT_DOUBLE, FMT_DOUBLE, FMT_DOUBLE,
FMT_UNKNOWN, FMT_UNKNOWN, FMT_UNKNOWN, FMT_UNKNOWN,
- FMT_MOD_LONG, FMT_UNKNOWN, FMT_UNKNOWN, FMT_OCTAL,
+ FMT_MOD_LDBL, FMT_UNKNOWN, FMT_UNKNOWN, FMT_OCTAL,
/* 0x50 */
FMT_UNKNOWN, FMT_UNKNOWN, FMT_UNKNOWN, FMT_STRING,
FMT_UNKNOWN, FMT_UINTEGER, FMT_UNKNOWN, FMT_UNKNOWN,
@@ -947,7 +948,7 @@
FMT_UNKNOWN, FMT_DOUBLE, FMT_UNKNOWN, FMT_CHARACTER,
FMT_INTEGER, FMT_DOUBLE, FMT_DOUBLE, FMT_DOUBLE,
FMT_MOD_SHORT, FMT_INTEGER, FMT_MOD_INTMAX, FMT_UNKNOWN,
- FMT_MOD_LDBL, FMT_UNKNOWN, FMT_GETCOUNT, FMT_OCTAL,
+ FMT_MOD_LONG, FMT_UNKNOWN, FMT_GETCOUNT, FMT_OCTAL,
/* 0x70 */
FMT_POINTER, FMT_UNKNOWN, FMT_UNKNOWN, FMT_STRING,
FMT_MOD_PTRDIFF, FMT_UINTEGER, FMT_UNKNOWN, FMT_UNKNOWN,
@@ -1843,8 +1844,8 @@
static const CFIndex null_string_len = 6;
-static const UniChar nan_string[] = { 'n', 'a', 'n' };
-static const UniChar inf_string[] = { 'i', 'n', 'f' };
+static const UniChar nan_string[] = { 'N', 'A', 'N' };
+static const UniChar inf_string[] = { 'I', 'N', 'F' };
static const CFIndex nan_inf_string_len = 3;
@@ -2408,39 +2409,27 @@
#if SIZEOF_LONG_DOUBLE > SIZEOF_DOUBLE /* Avoid unused warning */
fmt_double_parts:
#endif
- {
- if (string != NULL)
- {
- UniChar *buf_start;
-
- /* Must be 'nan' or 'inf' */
- buf_start = buffer;
- if (is_negative || show_sign || show_space)
- {
- string_len += 1;
- if (is_negative)
- *buf_start++ = '-';
- else if (show_sign)
- *buf_start++ = '+';
- else if (show_space)
- *buf_start++ = ' ';
- }
-
- buf_start[0] = *string++;
- buf_start[1] = *string++;
- buf_start[2] = *string;
- if (type >= 'A' && type <= 'Z')
- {
- buf_start[0] -= ('a' - 'A');
- buf_start[1] -= ('a' - 'A');
- buf_start[2] -= ('a' - 'A');
- }
- string = buffer;
-
- goto print_string;
- }
- goto handle_error;
- }
+ if (string != NULL)
+ {
+ /* Must be 'nan' or 'inf' */
+ if (is_negative || show_sign || show_space)
+ {
+ string_len += 1;
+ if (is_negative)
+ _write_char (obuf++, obuf_end, '-');
+ else if (show_sign)
+ _write_char (obuf++, obuf_end, '+');
+ else if (show_space)
+ _write_char (obuf++, obuf_end, ' ');
+ }
+
+ _write_char (obuf++, obuf_end, *string++ | (type & 0x20));
+ _write_char (obuf++, obuf_end, *string++ | (type & 0x20));
+ _write_char (obuf++, obuf_end, *string | (type & 0x20));
+
+ continue;
+ }
+ goto handle_error;
fmt_character:
if (length == FMT_MOD_LONG || type == 'C')
Modified: libs/corebase/trunk/Source/GSUnicode.h
URL:
http://svn.gna.org/viewcvs/gnustep/libs/corebase/trunk/Source/GSUnicode.h?rev=38180&r1=38179&r2=38180&view=diff
==============================================================================
--- libs/corebase/trunk/Source/GSUnicode.h (original)
+++ libs/corebase/trunk/Source/GSUnicode.h Mon Nov 17 21:29:07 2014
@@ -81,12 +81,6 @@
/*
* General Unicode Conversion Macros and Functions
*/
-#define GS_UTF_IS_SURROGATE(c) (((c) & 0xFFFFF800) == 0xD800)
-
-#define GS_UTF_IS_LEAD_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xD800)
-
-#define GS_UTF_IS_TRAIL_SURROGATE(c) (((c) & 0xFFFFFC00) == 0xDC00)
-
/** @internal
* Convert from Unicode to a specified encoding.
* @return Number of characters converted.
@@ -134,18 +128,6 @@
/*
* UTF-8 Conversion Macros and Functions
*/
-#define GS_UTF8_MAX_LENGTH 4
-
-#define GS_UTF8_BYTE_COUNT(c) \
- (((c) < 0xF8) ? 1 + ((c) >= 0xC0) + ((c) >= 0xE0) + ((c) >= 0xF0) : 0)
-
-/* Get the number of bytes needed for the code point */
-#define GS_UTF8_LENGTH(c) (((c) <= 0x10FFFF ) \
- ? 1 + ((c) > 0x007F) + ((c) > 0x07FF) \
- + ((c) > 0xFFFF) : 0)
-
-#define GS_UTF8_IS_TRAIL(c) (((c) & 0xC0) == 0x80)
-
/** @internal
* Convert from UTF-8 to Unicode
* @return Number of bytes read.
@@ -184,54 +166,8 @@
/*
- * UTF-16 Conversion Macros and Functions
- */
-#define GS_UTF16_BOM 0xFEFF
-
-#define GS_UTF16_APPEND_CHAR(_dest0, _dest1, _c) do \
- { \
- _dest0 = (_c >> 10) + 0xD7C0; \
- _dest1 = (_c & 0x3FF) | 0xDC00; \
- } while(0)
-
-#define GS_UTF16_GET_CHAR(_lead, _trail) \
- ((UTF32Char)_lead << 10) + (UTF32Char)_trail - ((0xD7C0 << 10) + 0xDC00)
-
-#if WORDS_BIGENDIAN
-#define GS_UTF16_ENCODING kCFStringEncodingUTF16BE
-#define GS_UTF16_ENCODING_TO_SWAP kCFStringEncodingUTF16LE
-#define GS_UTF16_BOM_HI 0xFE
-#define GS_UTF16_BOM_LO 0xFF
-#else
-#define GS_UTF16_ENCODING kCFStringEncodingUTF16LE
-#define GS_UTF16_ENCODING_TO_SWAP kCFStringEncodingUTF16BE
-#define GS_UTF16_BOM_HI 0xFF
-#define GS_UTF16_BOM_LO 0xFE
-#endif
-
-
-
-/*
* UTF-32 Conversion Macros and Functions
*/
-#define GS_UTF32_BOM 0x0000FEFF
-
-#if WORDS_BIGENDIAN
-#define GS_UTF32_ENCODING kCFStringEncodingUTF32BE
-#define GS_UTF32_ENCODING_TO_SWAP kCFStringEncodingUTF32LE
-#define GS_UTF32_BOM_0 0x00
-#define GS_UTF32_BOM_1 0x00
-#define GS_UTF32_BOM_2 0xFE
-#define GS_UTF32_BOM_3 0xFF
-#else
-#define GS_UTF32_ENCODING kCFStringEncodingUTF32LE
-#define GS_UTF32_ENCODING_TO_SWAP kCFStringEncodingUTF32BE
-#define GS_UTF32_BOM_0 0xFF
-#define GS_UTF32_BOM_1 0xFE
-#define GS_UTF32_BOM_2 0x00
-#define GS_UTF32_BOM_3 0x00
-#endif
-
/** @internal
* Convert from UTF-32 to Unicode
* @return Number of bytes read.
_______________________________________________
Gnustep-cvs mailing list
[email protected]
https://mail.gna.org/listinfo/gnustep-cvs