Author: pmichaud
Date: Tue Nov 15 14:17:17 2005
New Revision: 10003

Modified:
   trunk/src/string.c
   trunk/t/op/string_cs.t
Log:
* Refactored string_escape code
* Escape backslashes
* Converted \xhh to \x{hh}


Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c  (original)
+++ trunk/src/string.c  Tue Nov 15 14:17:17 2005
@@ -2421,40 +2421,21 @@ string_escape_string_delimited(Interp * 
     /* more work TODO */
     ENCODING_ITER_INIT(interpreter, src, &iter);
     dp = result->strstart;
-    for (i = 0; len; --len) {
+    for (i = 0; len > 0; --len) {
         c = iter.get_and_advance(interpreter, &iter);
-        if (i >= charlen - 2) {        /* max we append ourselves */
-            /* resize - still len codepoints to go */
-            charlen += len * 2 + 16;
-            Parrot_reallocate_string(interpreter, result, charlen);
-            /* start can change */
-            dp = result->strstart;
-        }
-        if (c >= 0x10000) {
-            /* current string length, so that append works */
-            result->bufused = result->strlen = i;
-            hex = Parrot_sprintf_c(interpreter, "\\x{%x}", c);
-            /* string_append grows result if necessary */
-append:
-            result = string_append(interpreter, result, hex, 0);
-            /* adjust our insert idx */
-            i += hex->strlen;
-            /* and usable len */
-            charlen = PObj_buflen(result);
-        }
-        else if (c >= 0x100) {
-            result->bufused = result->strlen = i;
-            hex = Parrot_sprintf_c(interpreter, "\\u%04x", c);
-            goto append;
-        }
-        else if (c >= 0x7f) {
-esc_hex:
-            result->bufused = result->strlen = i;
-            hex = Parrot_sprintf_c(interpreter, "\\x%02x", c);
-            goto append;
-        }
-        else  {
+        if (c < 0x80) {
+            /* process ASCII chars */
+            if (i >= charlen - 2) {
+                /* resize - still len codepoints to go */
+                charlen += len * 2 + 16;
+                Parrot_reallocate_string(interpreter, result, charlen);
+                /* start can change */
+                dp = result->strstart;
+            }
             switch (c) {
+                case '\\':
+                    dp[i++] = '\\';
+                    break;
                 case '\a':
                     dp[i++] = '\\';
                     c = 'a';
@@ -2463,40 +2444,49 @@ esc_hex:
                     dp[i++] = '\\';
                     c = 'b';
                     break;
-                case '\t':
-                    dp[i++] = '\\';
-                    c = 't';
-                    break;
                 case '\n':
                     dp[i++] = '\\';
                     c = 'n';
                     break;
-                case '\v':
+                case '\r':
+                    dp[i++] = '\\';
+                    c = 'r';
+                    break;
+                case '\t':
                     dp[i++] = '\\';
-                    c = 'v';
+                    c = 't';
                     break;
                 case '\f':
                     dp[i++] = '\\';
                     c = 'f';
                     break;
-                case '\r':
+                case '"':
                     dp[i++] = '\\';
-                    c = 'r';
+                    c = '"';
                     break;
                 case 27:
                     dp[i++] = '\\';
                     c = 'e';
                     break;
-                case '"':
-                    dp[i++] = '\\';
-                    c = '"';
-                    break;
-                default:
-                    if (c < 32)
-                        goto esc_hex;
             }
-            dp[i++] = c;
+            if (c >= 0x20) {
+                dp[i++] = c;
+                assert(i < charlen);
+                continue;
+            }
         }
+        /* escape by appending either \uhhhh or \x{hh...} */
+        result->bufused = result->strlen = i;
+        if (c < 0x0100 || c >= 0x10000)
+            hex = Parrot_sprintf_c(interpreter, "\\x{%02x}", c);
+        else 
+            hex = Parrot_sprintf_c(interpreter, "\\u%04x", c);
+        result = string_append(interpreter, result, hex, 0);
+        /* adjust our insert idx */
+        i += hex->strlen;
+        /* and usable len */
+        charlen = PObj_buflen(result);
+        dp = result->strstart;
         assert(i < charlen);
     }
     result->bufused = result->strlen = i;

Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t      (original)
+++ trunk/t/op/string_cs.t      Tue Nov 15 14:17:17 2005
@@ -715,7 +715,7 @@ output_is( <<'CODE', <<"OUTPUT", "unicod
     print "\n"
     end
 CODE
-T\xc3\x96TSCH
+T\x{c3}\x{96}TSCH
 OUTPUT
 
 output_is( <<'CODE', <<"OUTPUT", "unicode titlecase");
@@ -729,7 +729,7 @@ output_is( <<'CODE', <<"OUTPUT", "unicod
     print "\n"
     end
 CODE
-T\xc3\xb6tsch Leo
+T\x{c3}\x{b6}tsch Leo
 OUTPUT
 
 }  # SKIP
@@ -751,7 +751,7 @@ output_is( <<'CODE', <<'OUTPUT', "escape
     print "\n"
     end
 CODE
-\x00\x01\x1f
+\x{00}\x{01}\x{1f}
 OUTPUT
 
 output_is( <<'CODE', <<'OUTPUT', "escape latin1");
@@ -761,7 +761,7 @@ output_is( <<'CODE', <<'OUTPUT', "escape
     print "\n"
     end
 CODE
-t\xf6tsch leo
+t\x{f6}tsch leo
 OUTPUT
 
 output_is( <<'CODE', <<'OUTPUT', "escape unicode" );

Reply via email to