Author: pmichaud
Date: Tue Nov 15 14:17:17 2005
New Revision: 10003
Modified:
trunk/src/string.c
trunk/t/op/string_cs.t
Log:
* Refactored string_escape code
* Escape backslashes
* Converted \xhh to \x{hh}
Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c (original)
+++ trunk/src/string.c Tue Nov 15 14:17:17 2005
@@ -2421,40 +2421,21 @@ string_escape_string_delimited(Interp *
/* more work TODO */
ENCODING_ITER_INIT(interpreter, src, &iter);
dp = result->strstart;
- for (i = 0; len; --len) {
+ for (i = 0; len > 0; --len) {
c = iter.get_and_advance(interpreter, &iter);
- if (i >= charlen - 2) { /* max we append ourselves */
- /* resize - still len codepoints to go */
- charlen += len * 2 + 16;
- Parrot_reallocate_string(interpreter, result, charlen);
- /* start can change */
- dp = result->strstart;
- }
- if (c >= 0x10000) {
- /* current string length, so that append works */
- result->bufused = result->strlen = i;
- hex = Parrot_sprintf_c(interpreter, "\\x{%x}", c);
- /* string_append grows result if necessary */
-append:
- result = string_append(interpreter, result, hex, 0);
- /* adjust our insert idx */
- i += hex->strlen;
- /* and usable len */
- charlen = PObj_buflen(result);
- }
- else if (c >= 0x100) {
- result->bufused = result->strlen = i;
- hex = Parrot_sprintf_c(interpreter, "\\u%04x", c);
- goto append;
- }
- else if (c >= 0x7f) {
-esc_hex:
- result->bufused = result->strlen = i;
- hex = Parrot_sprintf_c(interpreter, "\\x%02x", c);
- goto append;
- }
- else {
+ if (c < 0x80) {
+ /* process ASCII chars */
+ if (i >= charlen - 2) {
+ /* resize - still len codepoints to go */
+ charlen += len * 2 + 16;
+ Parrot_reallocate_string(interpreter, result, charlen);
+ /* start can change */
+ dp = result->strstart;
+ }
switch (c) {
+ case '\\':
+ dp[i++] = '\\';
+ break;
case '\a':
dp[i++] = '\\';
c = 'a';
@@ -2463,40 +2444,49 @@ esc_hex:
dp[i++] = '\\';
c = 'b';
break;
- case '\t':
- dp[i++] = '\\';
- c = 't';
- break;
case '\n':
dp[i++] = '\\';
c = 'n';
break;
- case '\v':
+ case '\r':
+ dp[i++] = '\\';
+ c = 'r';
+ break;
+ case '\t':
dp[i++] = '\\';
- c = 'v';
+ c = 't';
break;
case '\f':
dp[i++] = '\\';
c = 'f';
break;
- case '\r':
+ case '"':
dp[i++] = '\\';
- c = 'r';
+ c = '"';
break;
case 27:
dp[i++] = '\\';
c = 'e';
break;
- case '"':
- dp[i++] = '\\';
- c = '"';
- break;
- default:
- if (c < 32)
- goto esc_hex;
}
- dp[i++] = c;
+ if (c >= 0x20) {
+ dp[i++] = c;
+ assert(i < charlen);
+ continue;
+ }
}
+ /* escape by appending either \uhhhh or \x{hh...} */
+ result->bufused = result->strlen = i;
+ if (c < 0x0100 || c >= 0x10000)
+ hex = Parrot_sprintf_c(interpreter, "\\x{%02x}", c);
+ else
+ hex = Parrot_sprintf_c(interpreter, "\\u%04x", c);
+ result = string_append(interpreter, result, hex, 0);
+ /* adjust our insert idx */
+ i += hex->strlen;
+ /* and usable len */
+ charlen = PObj_buflen(result);
+ dp = result->strstart;
assert(i < charlen);
}
result->bufused = result->strlen = i;
Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t (original)
+++ trunk/t/op/string_cs.t Tue Nov 15 14:17:17 2005
@@ -715,7 +715,7 @@ output_is( <<'CODE', <<"OUTPUT", "unicod
print "\n"
end
CODE
-T\xc3\x96TSCH
+T\x{c3}\x{96}TSCH
OUTPUT
output_is( <<'CODE', <<"OUTPUT", "unicode titlecase");
@@ -729,7 +729,7 @@ output_is( <<'CODE', <<"OUTPUT", "unicod
print "\n"
end
CODE
-T\xc3\xb6tsch Leo
+T\x{c3}\x{b6}tsch Leo
OUTPUT
} # SKIP
@@ -751,7 +751,7 @@ output_is( <<'CODE', <<'OUTPUT', "escape
print "\n"
end
CODE
-\x00\x01\x1f
+\x{00}\x{01}\x{1f}
OUTPUT
output_is( <<'CODE', <<'OUTPUT', "escape latin1");
@@ -761,7 +761,7 @@ output_is( <<'CODE', <<'OUTPUT', "escape
print "\n"
end
CODE
-t\xf6tsch leo
+t\x{f6}tsch leo
OUTPUT
output_is( <<'CODE', <<'OUTPUT', "escape unicode" );