Unicode.m

rfm Fri, 17 Mar 2017 05:04:39 -0700

Author: rfm
Date: Fri Mar 17 13:04:19 2017
New Revision: 40395

URL: http://svn.gna.org/viewcvs/gnustep?rev=40395&view=rev
Log:
Implement OSX compatible non-lossy ascii encoding


Modified:
    libs/base/trunk/ChangeLog
    libs/base/trunk/Examples/server.h
    libs/base/trunk/Source/Additions/Unicode.m

Modified: libs/base/trunk/ChangeLog
URL: 
http://svn.gna.org/viewcvs/gnustep/libs/base/trunk/ChangeLog?rev=40395&r1=40394&r2=40395&view=diff
==============================================================================
--- libs/base/trunk/ChangeLog   (original)
+++ libs/base/trunk/ChangeLog   Fri Mar 17 13:04:19 2017
@@ -1,3 +1,8 @@
+2017-03-17  Richard Frith-Macdonald <[email protected]>
+
+       * Source/Additions/Unicode.m: Implement NSNonLossyASCIIStringEncoding
+       in a manner compatible with OSX.  Added regression tests.
+
 2017-02-11  Wolfgang Lux  <[email protected]>
 
        * Source/GSICUString.m (UTextNSStringAccess,

Modified: libs/base/trunk/Examples/server.h
URL: 
http://svn.gna.org/viewcvs/gnustep/libs/base/trunk/Examples/server.h?rev=40395&r1=40394&r2=40395&view=diff
==============================================================================
--- libs/base/trunk/Examples/server.h   (original)
+++ libs/base/trunk/Examples/server.h   Fri Mar 17 13:04:19 2017
@@ -14,10 +14,10 @@
 #include <Foundation/NSConnection.h>
 
 @interface     NSConnection (Debug)
-+ (void) setDebug: (BOOL)aFlag;
++ (int) setDebug: (int)aFlag;
 @end
 @interface     NSDistantObject (Debug)
-+ (void) setDebug: (BOOL)aFlag;
++ (int) setDebug: (int)aFlag;
 @end
 
 typedef struct _small_struct {

Modified: libs/base/trunk/Source/Additions/Unicode.m
URL: 
http://svn.gna.org/viewcvs/gnustep/libs/base/trunk/Source/Additions/Unicode.m?rev=40395&r1=40394&r2=40395&view=diff
==============================================================================
--- libs/base/trunk/Source/Additions/Unicode.m  (original)
+++ libs/base/trunk/Source/Additions/Unicode.m  Fri Mar 17 13:04:19 2017
@@ -154,7 +154,7 @@
                                         * iconv perform conversions to/from
                                         * this encoding.
                                         * NB. do not put a null pointer in this
-                                        * field in the table, use "" instread.
+                                        * field in the table, use "" instead.
                                         */
   BOOL                 eightBit;       /* Flag to say whether this encoding
                                         * can be stored in a byte array ...
@@ -193,7 +193,7 @@
   {NSSymbolStringEncoding,
     "NSSymbolStringEncoding","",0,0,0},
   {NSNonLossyASCIIStringEncoding,
-    "NSNonLossyASCIIStringEncoding","",1,1,0},
+    "NSNonLossyASCIIStringEncoding","",0,1,0},
   {NSShiftJISStringEncoding,
     "NSShiftJISStringEncoding","SHIFT-JIS",0,0,0},
   {NSISOLatin2StringEncoding,
@@ -713,6 +713,12 @@
        }
       return 0;
     }
+}
+
+static inline int
+octdigit(int c)
+{
+  return (c >= '0' && c < '8');
 }
 
 /**
@@ -1012,6 +1018,146 @@
        break;
 
       case NSNonLossyASCIIStringEncoding:
+        {
+          unsigned int  index = 0;
+          unsigned int  count = 0;
+
+          while (index < slen)
+            {
+              uint8_t  c = (uint8_t)((unc)src[index++]);
+
+              if ('\\' == c)
+                {
+                  if (index < slen)
+                    {
+                      c = (uint8_t)((unc)src[index++]);
+                      if ('\\' == c)
+                        {
+                          count++;      // Escaped backslash
+                        }
+                      else if (octdigit(c)
+                        && (index < slen && octdigit(src[index++]))
+                        && (index < slen && octdigit(src[index++])))
+                        {
+                          count++;  // Octal escape
+                        }
+                      else if (('u' == c)
+                        && (index < slen && isxdigit(src[index++]))
+                        && (index < slen && isxdigit(src[index++]))
+                        && (index < slen && isxdigit(src[index++]))
+                        && (index < slen && isxdigit(src[index++])))
+                        {
+                          count++;      // Hex escape for unicode
+                        }
+                      else
+                        {
+                          result = NO; // illegal backslash escape
+                          goto done;
+                        }
+                    }
+                  else
+                    {
+                      result = NO;     // unbalanced backslash
+                      goto done;
+                    }
+                }
+              else
+                {
+                  count++;
+                }
+            }
+          
+          if (dst == 0)
+            {
+              /* Just counting bytes.
+               */
+              dpos += count;
+            }
+          else
+            {
+              if (dpos + count + (extra ? 1 : 0) > bsize)
+                {
+                  if (zone == 0)
+                    {
+                      result = NO; /* No buffer growth possible ... fail. */
+                      goto done;
+                    }
+                  else
+                    {
+                      unsigned grow = (dpos + count) * sizeof(unichar);
+                      unichar  *tmp;
+
+                      tmp = NSZoneMalloc(zone, grow + extra * sizeof(unichar));
+                      if ((ptr == buf || ptr == *dst) && (tmp != 0))
+                        {
+                          memcpy(tmp, ptr, bsize * sizeof(unichar));
+                        }
+                      if (ptr != buf && ptr != *dst)
+                        {
+                          NSZoneFree(zone, ptr);
+                        }
+                      ptr = tmp;
+                      if (ptr == 0)
+                        {
+                          return NO;   /* Not enough memory */
+                        }
+                      bsize = grow / sizeof(unichar);
+                    }
+                }
+              while (spos < slen)
+                {
+                  uint8_t      c = (uint8_t)((unc)src[spos++]);
+
+                  if ('\\' == c)
+                    {
+                      c = (uint8_t)((unc)src[spos++]);
+                      if ('\\' == c)
+                        {
+                          ptr[dpos++] = c;
+                        }
+                      else if ('u' == c)
+                        {
+                          int   i = 0;
+
+                          for (count = 0; count < 4; count++)
+                            {
+                              c = (uint8_t)((unc)src[spos++]);
+                              i *= 16;
+                              if (isdigit(c))
+                                {
+                                  i += c - '0';
+                                }
+                              else if (isupper(c))
+                                {
+                                  i += 10 + c - 'A';
+                                }
+                              else
+                                {
+                                  i += 10 + c - 'a';
+                                }
+                            }
+                          ptr[dpos++] = i;
+                        }
+                      else
+                        {
+                          int   i = c - '0';
+
+                          c = (uint8_t)((unc)src[spos++]);
+                          i = i * 8 + c - '0';
+                          c = (uint8_t)((unc)src[spos++]);
+                          i = i * 8 + c - '0';
+                          ptr[dpos++] = i;
+                        }
+                    }
+                  else
+                    {
+                      ptr[dpos++] = c;
+                    }
+                }
+            }
+          }
+       break;
+
       case NSASCIIStringEncoding:
        if (dst == 0)
          {
@@ -1022,7 +1168,7 @@
          }
         else
          {
-           /* Because we know that each ascii chartacter is exactly
+           /* Because we know that each ascii character is exactly
             * one unicode character, we can check the destination
             * buffer size and allocate more space in one go, before
             * entering the loop where we deal with each character.
@@ -1039,7 +1185,7 @@
                    unsigned    grow = (dpos + slen) * sizeof(unichar);
                    unichar     *tmp;
 
-                   tmp = NSZoneMalloc(zone, grow + extra);
+                   tmp = NSZoneMalloc(zone, grow + extra * sizeof(unichar));
                    if ((ptr == buf || ptr == *dst) && (tmp != 0))
                      {
                        memcpy(tmp, ptr, bsize * sizeof(unichar));
@@ -1097,7 +1243,7 @@
                    unsigned    grow = (dpos + slen) * sizeof(unichar);
                    unichar     *tmp;
 
-                   tmp = NSZoneMalloc(zone, grow + extra);
+                   tmp = NSZoneMalloc(zone, grow + extra * sizeof(unichar));
                    if ((ptr == buf || ptr == *dst) && (tmp != 0))
                      {
                        memcpy(tmp, ptr, bsize * sizeof(unichar));
@@ -1180,7 +1326,7 @@
                    unsigned    grow = (dpos + slen) * sizeof(unichar);
                    unichar     *tmp;
 
-                   tmp = NSZoneMalloc(zone, grow + extra);
+                   tmp = NSZoneMalloc(zone, grow + extra * sizeof(unichar));
                    if ((ptr == buf || ptr == *dst) && (tmp != 0))
                      {
                        memcpy(tmp, ptr, bsize * sizeof(unichar));
@@ -1863,6 +2009,137 @@
         break;
 
       case NSNonLossyASCIIStringEncoding:
+        {
+          unsigned int  index = 0;
+          unsigned int  count = 0;
+
+          if (YES == swapped)
+            {
+              while (index < slen)
+                {
+                  unichar      u = src[index++];
+
+                  u = (((u & 0xff00) >> 8) + ((u & 0x00ff) << 8));
+                  if (u < 256)
+                    {
+                      if ((u >= ' ' && u < 127)
+                        || '\r' == u || '\n' == u || '\t' == u)
+                        {
+                          count++;
+                          if ('\\' == u)
+                            {
+                              count++;
+                            }
+                        }
+                      else
+                        {
+                          count += 4;
+                        }
+                    }
+                  else
+                    {
+                      count += 12;
+                    }
+                }
+            }
+          else
+            {
+              while (index < slen)
+                {
+                  unichar      u = src[index++];
+
+                  if (u < 256)
+                    {
+                      if ((u >= ' ' && u < 127)
+                        || '\r' == u || '\n' == u || '\t' == u)
+                        {
+                          count++;
+                          if ('\\' == u)
+                            {
+                              count++;
+                            }
+                        }
+                      else
+                        {
+                          count += 4;
+                        }
+                    }
+                  else
+                    {
+                      count += 6;
+                    }
+                }
+            }
+          if (dst == 0)
+            {
+              /* Just counting bytes ...
+               */
+              dpos = count;
+            }
+          else
+            {
+              /* We can now check the destination buffer size and allocate
+               * more space in one go, before entering the loop where we
+               * deal with each character.
+               */
+              if (count > bsize)
+                {
+                  if (zone == 0)
+                    {
+                      result = NO; /* No buffer growth possible ... fail. */
+                      goto done;
+                    }
+                  else
+                    {
+                      uint8_t  *tmp;
+
+                      tmp = NSZoneMalloc(zone, count + extra);
+                      if (ptr != buf && ptr != *dst)
+                        {
+                          NSZoneFree(zone, ptr);
+                        }
+                      ptr = tmp;
+                      if (ptr == 0)
+                        {
+                          return NO;   /* Not enough memory */
+                        }
+                      bsize = count;
+                    }
+                }
+              index = 0;
+              while (index < slen)
+                {
+                  unichar      u = src[index++];
+
+                  if (YES == swapped)
+                    {
+                      u = (((u & 0xff00) >> 8) + ((u & 0x00ff) << 8));
+                    }
+                  if (u < 256)
+                    {
+                      if ((u >= ' ' && u < 127)
+                        || '\r' == u || '\n' == u || '\t' == u)
+                        {
+                          ptr[dpos++] = (unsigned char)u;
+                          if ('\\' == u)
+                            {
+                              ptr[dpos++] = (unsigned char)u;
+                            }
+                        }
+                      else
+                        {
+                          dpos += sprintf((char*)&ptr[dpos], "\\%03o", u);
+                        }
+                    }
+                  else
+                    {
+                      dpos += sprintf((char*)&ptr[dpos], "\\u%04x", u);
+                    }
+                }
+            }
+          }
+        goto done;
+
       case NSASCIIStringEncoding:
        base = 128;
        goto bases;
@@ -1882,7 +2159,7 @@
          }
         else
          {
-           /* Because we know that each ascii chartacter is exactly
+           /* Because we know that each ascii character is exactly
             * one unicode character, we can check the destination
             * buffer size and allocate more space in one go, before
             * entering the loop where we deal with each character.


_______________________________________________
Gnustep-cvs mailing list
[email protected]
https://mail.gna.org/listinfo/gnustep-cvs

[Gnustep-cvs] r40395 - in /libs/base/trunk: ChangeLog Examples/server.h Source/Additions/Unicode.m

Reply via email to