GSCharUtilities.h

stefanbidi Wed, 12 Nov 2014 20:34:06 -0800

Author: stefanbidi
Date: Thu Nov 13 05:33:14 2014
New Revision: 38171

URL: http://svn.gna.org/viewcvs/gnustep?rev=38171&view=rev
Log:
* Headers/CoreFoundation/GSCharUtilities.c: Add new public character utilities 
API.


Added:
    libs/corebase/trunk/Headers/CoreFoundation/GSCharUtilities.h

Added: libs/corebase/trunk/Headers/CoreFoundation/GSCharUtilities.h
URL: 
http://svn.gna.org/viewcvs/gnustep/libs/corebase/trunk/Headers/CoreFoundation/GSCharUtilities.h?rev=38171&view=auto
==============================================================================
--- libs/corebase/trunk/Headers/CoreFoundation/GSCharUtilities.h        (added)
+++ libs/corebase/trunk/Headers/CoreFoundation/GSCharUtilities.h        Thu Nov 
13 05:33:14 2014
@@ -0,0 +1,352 @@
+/* GSCharUtilities.h
+   
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   
+   Written by: Stefan Bidigaray
+   Date: November, 2014
+   
+   This file is part of the GNUstep CoreBase Library.
+   
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.         See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; see the file COPYING.LIB.
+   If not, see <http://www.gnu.org/licenses/> or write to the 
+   Free Software Foundation, 51 Franklin Street, Fifth Floor, 
+   Boston, MA 02110-1301, USA.
+*/
+
+#ifndef __GSCHAR_H__
+#define __GSCHAR_H__ 1
+
+#include "CoreFoundation/CFBase.h"
+
+/** \defgroup CharUtils Character Utilities
+    \{
+ */
+
+/** \name Unicode Code Point Functions
+    \{
+ */
+
+/** \brief Determine if character is in one of the supplementary planes.
+    \param[in] c Character to test.
+    \return Returns true if character is in one of the supplementary planes
+           and false if in the Basic Multilingual plane.
+ */
+CF_INLINE Boolean
+GSCharIsInSupplementaryPlane (UTF32Char c)
+{
+  return c > 0xFFFF;
+}
+
+/** \brief Determine true if character is a surrogate code point.
+    \param[in] c Character to test.
+    \return Returns true if character is a surrogate and false, otherwise.
+ */
+CF_INLINE Boolean
+GSCharIsSurrogate (UTF32Char c)
+{
+  return (c & 0xFFFFF800) == 0xD800;
+}
+
+/** \brief Determine if character is a leading surrogate code point.
+    \param[in] c Character to test.
+    \return Returns true if character is leading and false, otherwise.
+ */
+CF_INLINE Boolean
+GSCharIsLeadSurrogate (UTF32Char c)
+{
+  return (c & 0xFFFFFC00) == 0xD800;
+}
+
+/** \brief Determine if character is a trailing surrogate code point.
+    \param[in] c Character to test.
+    \return Returns true if character is trailing and false, otherwise.
+ */
+CF_INLINE Boolean
+GSCharIsTrailSurrogate (UTF32Char c)
+{
+  return (c & 0xFFFFFC00) == 0xDC00;
+}
+
+/** \}
+ */
+
+/** \name UTF-8 Utilities
+    \{
+ */
+
+/** \brief The maximum number of UTF-8 code units required to represent
+          the highest Unicode code point.
+ */
+#define kGSUTF8CharMaximumLength 4
+
+/** \brief Determine the number of code units for a UTF-8 character based on
+          the leading code unit.
+    \param[in] c Leading code unit to test.
+    \return The number of UTF-8 code units in this character.
+ */
+CF_INLINE CFIndex
+GSUTF8CharCodeUnitCount (UTF8Char c)
+{
+  return (c < 0xF8) ? (c < 128 || c >= 0xC0) + (c >= 0xC0) + (c >= 0xE0)
+    + (c >= 0xF0) : 0;
+}
+
+/** \brief Determines if the specified UTF-8 code unit is a trailing code unit.
+    \param[in] c The code unit to test.
+    \return Returns true if this UTF-8 code unit is a trailing code unit.
+ */
+CF_INLINE Boolean
+GSUTF8CharIsTrailing (UTF8Char c)
+{
+  return (c & 0xC0) == 0x80;
+}
+
+/** \brief Determine the number of UTF-8 code units required to represent
+          the specified Unicode code point.
+    \param[in] c The Unicode code point to test.
+    \return The number of UTF-8 code units required.
+ */
+CF_INLINE CFIndex
+GSUTF8CharLength (UTF32Char c)
+{
+  return (c <= 0x10FFFF) ? 1 + (c >= 0x80) + (c >= 0x800) + (c >= 0x10000) : 0;
+}
+
+/** \brief Append the UTF-8 Byte Order Mark to the string buffer.
+    \param[in,out] d A pointer to the current position of the string buffer.
+                    This value is updated after a call to the function.
+    \param[in] end The position just after the end of the buffer.  The caller
+                  must ensure this parameter is beyond the string buffer
+                  pointed to by \b d.
+    \return True if the function was successful and false, otherwise.
+ */
+CF_INLINE Boolean
+GSUTF8CharAppendByteOrderMark (UTF8Char ** d, UTF8Char *end)
+{
+  UTF8Char *p;
+
+  p = *d;
+  if ((end - p) > 3)
+    {
+      *p++ = 0xEF;
+      *p++ = 0xBB;
+      *p++ = 0xBF;
+      *d = p;
+
+      return true;
+    }
+
+  return false;
+}
+
+/** \brief Determine if a UTF-8 string buffer has a Byte Order Mark.
+    \param[in,out] s A pointer to the current position of the string buffer.
+                    This value is updated after a call to the function.
+    \param[in] end The position just after the end of the buffer.  The caller
+                  must ensure this parameter is beyond the string buffer
+                  pointed to by \b d.
+    \return True if a Byte Order Mark is found and false, otherwise.
+ */
+CF_INLINE Boolean
+GSUTF8CharSkipByteOrderMark (UTF8Char ** s, UTF8Char *end)
+{
+  UTF8Char *p;
+
+  p = *s;
+  if ((end - p) > 3)
+    {
+      if (*p++ == 0xEF && *p++ == 0xBB && *p++ == 0xBF)
+       {
+         *s = p;
+         return true;
+       }
+    }
+
+  return false;
+}
+
+/** \brief Append a character to a UTF-8 string buffer.
+    \param[in,out] d A pointer to the current position of the string buffer.
+                    This value is updated after a call to the function.
+    \param[in] end The position just after the end of the buffer.  The caller
+                  must ensure this parameter is beyond the string buffer
+                  pointed to by \b d.
+    \param[in] c The Unicode code point to write.
+    \return True if the functions was successful, and false if there is not
+           enough space left in the string buffer or the code point is
+           invalid.
+ */
+CF_INLINE Boolean
+GSUTF8CharAppend (UTF8Char ** d, UTF8Char * end, UTF32Char c)
+{
+  UTF8Char *p;
+
+  p = *d;
+  if (c < 0x80)
+    {
+      *p++ = c;
+    }
+  else if (c < 0x800 && (end - p) > 2)
+    {
+      *p++ = (c >> 6) | 0xC0;
+      *p++ = (c & 0x3F) | 0x80;
+    }
+  else if (GSUCharIsSurrogate (c) || c > 0x10FFFF)
+    {
+      return false;
+    }
+  else if (c < 0x10000 && (end - p) > 3)
+    {
+      *p++ = (c >> 12) | 0xE0;
+      *p++ = ((c >> 6) & 0x3F) | 0x80;
+      *p++ = (c & 0x3F) | 0x80;
+    }
+  else if (c >= 0x20000 && (end - p) > 4)
+    {
+      *p++ = (c >> 18) | 0xF0;
+      *p++ = ((c >> 12) & 0x3F) | 0x80;
+      *p++ = ((c >> 6) & 0x3F) | 0x80;
+      *p++ = (c & 0x3F) | 0x80;
+    }
+
+  return true;
+}
+
+/** \brief Get a Unicode code point from a UTF-8 string buffer.
+    \param[in,out] s A pointer to the current position of the buffer.
+                    This value is updated after a call to the function.
+    \param[in] end The position just after the end of the buffer.
+    \return A valid Unicode code point or a UTF-16 surrogate.
+           Will return 0 in a few cases:
+           1. The UTF-8 code unit is also a 0.
+           2. The UTF-8 code unit pointed to by \b s is not a leading code
+           unit.
+           3. The leading UTF-8 code unit does not have the correct number
+           of trailing code units.
+           4. The resulting code point is a above 0x10FFFF.
+ */
+CF_INLINE UTF32Char
+GSUTF8CharGet (UTF8Char ** s, UTF8Char * end)
+{
+  return 0;
+}
+
+/** \}
+ */
+
+/** \name UTF-16 Utilities
+    \{
+ */
+
+/** \brief The maximum number of UTF-16 code units required to represent the
+          highest Unicode code point.
+ */
+#define kGSUTF16CharMaximumLength 2
+
+/** \brief The Byte Order Mark for UTF-16 strings.
+ */
+#define kGSUTF16CharByteOrderMark 0xFEFF
+
+/** \brief The swapped Byte Order Mark for UTF-16 strings.
+ */
+#define kGSUTF16CharSwappedByteOrderMark 0xFFFE
+
+/** \brief Append a character to a UTF-16 string buffer.
+    \param[in,out] d A pointer to the current position of the buffer.
+                    This value is updated after a call to the function.
+    \param[in] end The position just after the end of the buffer.
+    \param[in] c The Unicode code point to write.
+    \return True if the functions was successful, and false if there is not
+           enough space left in the string buffer or the code point is
+           invalid.
+ */
+CF_INLINE Boolean
+GSUTF16CharAppend (UTF16Char ** d, UTF16Char * end, UTF32Char c)
+{
+  UTF16Char *p;
+
+  p = *d;
+  if (c <= 0xFFFF && (end - p) > 1)
+    {
+      if (GSUCharIsSurrogate (c))
+        return false;
+      *p++ = c;
+    }
+  else if (c <= 0x10FFFF && (end - p) > 2)
+    {
+      *p++ = (c >> 10) + 0xD7C0;
+      *p++ = (c & 0x3FF) + 0xDC00;
+    }
+  else
+    {
+      return false;
+    }
+  *d = p;
+
+  return true;
+}
+
+
+/** \brief Get a Unicode code point from a UTF-16 string buffer.
+    \param[in,out] s A pointer to the current position of the buffer.
+                    This value is updated after a call to the function.
+    \param[in] end The position just after the end of the buffer.
+    \return A valid Unicode code point.  Will return 0 in a few cases:
+           1. The UTF-16 code unit is also a 0.
+           2. The UTF-16 code unit pointed to by \b s is not a leading code
+           unit.
+           3. The leading UTF-16 code unit does not have a trailing pair.
+ */
+CF_INLINE UTF32Char
+GSUTF16CharGet (UTF16Char ** s, UTF16Char * end)
+{
+  UTF32Char c;
+  UTF16Char *p;
+
+  p = *s;
+  c = *p++;
+  if (GSUCharIsSurrogate (c))
+    {
+      if (GSUCharIsLeadSurrogate (c) && p < end && GSUCharIsTrailSurrogate 
(*p))
+        c = (c << 10) + (*p++) - ((0xD7C0 << 10) + 0xDC00);
+      else
+        return 0;
+    }
+
+  *s = p;
+  return c;
+}
+
+/** \}
+ */
+
+/** \name UTF-32 Utilities
+    \{
+ */
+
+/** \brief The Byte Order Mark for UTF-32 strings.
+ */
+#define kGSUTF32CharByteOrderMark 0x0000FEFF
+
+/** \brief The swapped Byte Order Mark for UTF-32 strings.
+ */
+#define kGSUTF32CharSwappedByteOrderMark 0xFFFE0000
+
+/** \}
+ */
+
+/** \}
+ */
+
+#endif /* __GSCHAR_H__ */


_______________________________________________
Gnustep-cvs mailing list
[email protected]
https://mail.gna.org/listinfo/gnustep-cvs

[Gnustep-cvs] r38171 - /libs/corebase/trunk/Headers/CoreFoundation/GSCharUtilities.h

Reply via email to