Author: leo
Date: Sat Apr 16 04:06:03 2005
New Revision: 7851
Modified:
trunk/build_tools/build_nativecall.pl
trunk/classes/default.pmc
trunk/imcc/docs/syntax.pod
trunk/imcc/pbc.c
trunk/imcc/symreg.c
trunk/imcc/symreg.h
trunk/include/parrot/charset.h
trunk/include/parrot/string_funcs.h
trunk/io/io_unix.c
trunk/io/io_win32.c
trunk/lib/Parrot/Pmc2c.pm
trunk/ops/string.ops
trunk/ops/sys.ops
trunk/src/charset.c
trunk/src/datatypes.c
trunk/src/debug.c
trunk/src/embed.c
trunk/src/exceptions.c
trunk/src/exec_start.c
trunk/src/inter_create.c
trunk/src/inter_misc.c
trunk/src/jit_debug.c
trunk/src/jit_debug_xcoff.c
trunk/src/method_util.c
trunk/src/misc.c
trunk/src/spf_render.c
trunk/src/string.c
trunk/t/op/string_cs.t
Log:
Strings. Finally. 17 - make ascii the default
* please read not on p6i WRT string change
Modified: trunk/build_tools/build_nativecall.pl
==============================================================================
--- trunk/build_tools/build_nativecall.pl (original)
+++ trunk/build_tools/build_nativecall.pl Sat Apr 16 04:06:03 2005
@@ -514,13 +514,13 @@
see which signature has an unknown type. I am sure someone can come up
with a neater way to do this.
*/
- ns = string_make(interpreter, " is an unknown signature type", 29,
"iso-8859-1", 0);
+ ns = string_make(interpreter, " is an unknown signature type", 29,
"ascii", 0);
message = string_concat(interpreter, signature, ns, 0);
#if defined(CAN_BUILD_CALL_FRAMES)
- ns = string_make(interpreter, ".\\nCAN_BUILD_CALL_FRAMES is enabled, this
should not happen", 58, "iso-8859-1", 0);
+ ns = string_make(interpreter, ".\\nCAN_BUILD_CALL_FRAMES is enabled, this
should not happen", 58, "ascii", 0);
#else
- ns = string_make(interpreter, ".\\nCAN_BUILD_CALL_FRAMES is disabled, add
the signature to src/call_list.txt", 75, "iso-8859-1", 0);
+ ns = string_make(interpreter, ".\\nCAN_BUILD_CALL_FRAMES is disabled, add
the signature to src/call_list.txt", 75, "ascii", 0);
#endif
message = string_concat(interpreter, message, ns, 0);
Modified: trunk/classes/default.pmc
==============================================================================
--- trunk/classes/default.pmc (original)
+++ trunk/classes/default.pmc Sat Apr 16 04:06:03 2005
@@ -80,7 +80,7 @@
*/
static STRING *ro;
if (!ro)
- ro = string_make(interpreter, "_ro", 3, "iso-8859-1",
+ ro = string_make(interpreter, "_ro", 3, "ascii",
PObj_constant_FLAG|PObj_external_FLAG);
if (!string_compare(interpreter, key, ro)) {
#else
Modified: trunk/imcc/docs/syntax.pod
==============================================================================
--- trunk/imcc/docs/syntax.pod (original)
+++ trunk/imcc/docs/syntax.pod Sat Apr 16 04:06:03 2005
@@ -83,16 +83,23 @@
=item "string constants"
Are delimited by B<">. A B<"> inside a string must be escaped by
-B<\">.
+B<\">. Only 7-bit ASCII is accepted in string constants; to use
+characters outside thar range, specify an encoding in the way below.
=item charset:"string constant"
-Like above with a charset attached to the string. Valid charset are
-currently: C<ascii>, C<binary>, and the default C<iso-8859-1>.
+Like above with a chracter set attached to the string. Valid character
+sets are currently: C<ascii> (the default), C<binary>, C<unicode>
+(with UTF-8 as the default encoding), and C<iso-8859-1>.
+
+=item charset:encoding:"string constant"
+
+Like above with an extra encoding attached to the string.
+Currently unimplemented.
=item 'char constant'
-Are delimited by B<'>.
+Are delimited by B<'>. They are taken to be C<ascii> encoded.
=item numeric constants
Modified: trunk/imcc/pbc.c
==============================================================================
--- trunk/imcc/pbc.c (original)
+++ trunk/imcc/pbc.c Sat Apr 16 04:06:03 2005
@@ -548,10 +548,8 @@
char *buf = r->name;
STRING *s = NULL;
char *charset = NULL;
- /*
- * VT_UNICODE should better be VT_CHARSET
- */
- if (r->type & VT_UNICODE) {
+
+ if (r->type & VT_ENCODED) {
char *p;
p = strchr(r->name, ':');
assert(p);
@@ -566,7 +564,7 @@
}
else if (*buf == '\'') { /* TODO handle python raw strings */
buf++;
- s = string_make(interpreter, buf, strlen(buf) - 1, "iso-8859-1",
+ s = string_make(interpreter, buf, strlen(buf) - 1, "ascii",
PObj_constant_FLAG);
}
else {
Modified: trunk/imcc/symreg.c
==============================================================================
--- trunk/imcc/symreg.c (original)
+++ trunk/imcc/symreg.c Sat Apr 16 04:06:03 2005
@@ -342,7 +342,7 @@
if (t == 'U') {
/* charset:"string" */
r->set = 'S';
- r->type |= VT_UNICODE;
+ r->type |= VT_ENCODED;
}
r->use_count++;
return r;
Modified: trunk/imcc/symreg.h
==============================================================================
--- trunk/imcc/symreg.h (original)
+++ trunk/imcc/symreg.h Sat Apr 16 04:06:03 2005
@@ -24,7 +24,7 @@
VT_START_ZERO = PF_VT_START_ZERO , /* .. y 0..start */
VT_END_INF = PF_VT_END_INF , /* x.. start..inf */
VT_SLICE_BITS = PF_VT_SLICE_BITS,
- VT_UNICODE = 1 << 16 /* unicode string constant */
+ VT_ENCODED = 1 << 16 /* unicode string constant */
};
/* this VARTYPE needs register allocation and such */
Modified: trunk/include/parrot/charset.h
==============================================================================
--- trunk/include/parrot/charset.h (original)
+++ trunk/include/parrot/charset.h Sat Apr 16 04:06:03 2005
@@ -28,7 +28,7 @@
extern CHARSET *Parrot_ascii_charset_ptr;
#endif
-#define PARROT_DEFAULT_CHARSET Parrot_iso_8859_1_charset_ptr
+#define PARROT_DEFAULT_CHARSET Parrot_ascii_charset_ptr
#define PARROT_BINARY_CHARSET Parrot_binary_charset
#define PARROT_UNICODE_CHARSET Parrot_unicode_charset_ptr
Modified: trunk/include/parrot/string_funcs.h
==============================================================================
--- trunk/include/parrot/string_funcs.h (original)
+++ trunk/include/parrot/string_funcs.h Sat Apr 16 04:06:03 2005
@@ -118,6 +118,8 @@
STRING* Parrot_string_trans_charset(Interp *, STRING *src,
INTVAL charset_nr, STRING *dest);
+CHARSET* string_rep_compatible (Interp *, STRING *a, const STRING *b);
+
#endif /* PARROT_IN_CORE */
#endif /* PARROT_STRING_FUNCS_H_GUARD */
Modified: trunk/io/io_unix.c
==============================================================================
--- trunk/io/io_unix.c (original)
+++ trunk/io/io_unix.c Sat Apr 16 04:06:03 2005
@@ -662,7 +662,7 @@
sa.sin_port = htons(port);
return string_make(interpreter, &sa, sizeof(struct sockaddr),
- "iso-8859-1", 0);
+ "binary", 0);
}
@@ -934,7 +934,7 @@
else {
close(io->fd);
}
- *s = string_make(interpreter, buf, bytesread, "iso-8859-1", 0);
+ *s = string_make(interpreter, buf, bytesread, "binary", 0);
if (!*s) {
PANIC("PIO_recv: Failed to allocate string");
}
Modified: trunk/io/io_win32.c
==============================================================================
--- trunk/io/io_win32.c (original)
+++ trunk/io/io_win32.c Sat Apr 16 04:06:03 2005
@@ -549,7 +549,7 @@
sa.sin_port = htons(port);
- return string_make(interpreter, &sa, sizeof(struct sockaddr),
"iso-8859-1", 0);
+ return string_make(interpreter, &sa, sizeof(struct sockaddr), "binary", 0);
}
@@ -691,7 +691,7 @@
else {
close((SOCKET)io->fd);
}
- *s = string_make(interpreter, buf, bytesread, "iso-8859-1", 0);
+ *s = string_make(interpreter, buf, bytesread, "binary", 0);
if(!*s) {
PANIC("PIO_recv: Failed to allocate string");
}
Modified: trunk/lib/Parrot/Pmc2c.pm
==============================================================================
--- trunk/lib/Parrot/Pmc2c.pm (original)
+++ trunk/lib/Parrot/Pmc2c.pm Sat Apr 16 04:06:03 2005
@@ -854,13 +854,13 @@
$cout .= <<"EOC";
vt_clone->base_type = entry;
vt_clone->whoami = string_make(interp,
- "$classname", @{[length($classname)]}, "iso-8859-1",
+ "$classname", @{[length($classname)]}, "ascii",
PObj_constant_FLAG|PObj_external_FLAG);
vt_clone->isa_str = string_make(interp,
- "$isa", @{[length($isa)]}, "iso-8859-1",
+ "$isa", @{[length($isa)]}, "ascii",
PObj_constant_FLAG|PObj_external_FLAG);
vt_clone->does_str = string_make(interp,
- "$does", @{[length($does)]}, "iso-8859-1",
+ "$does", @{[length($does)]}, "ascii",
PObj_constant_FLAG|PObj_external_FLAG);
EOC
}
Modified: trunk/ops/string.ops
==============================================================================
--- trunk/ops/string.ops (original)
+++ trunk/ops/string.ops Sat Apr 16 04:06:03 2005
@@ -327,7 +327,7 @@
char *c = (char *)&$3;
STRING *s;
- s = string_make(interpreter, c, (UINTVAL)$2, "iso-8859-1", 0);
+ s = string_make(interpreter, c, (UINTVAL)$2, "binary", 0);
$1 = string_concat(interpreter, $1, s, 1);
goto NEXT();
@@ -337,7 +337,7 @@
char *c = (char *)&$3;
STRING *s;
- s = string_make(interpreter, c, (UINTVAL)$2, "iso-8859-1", 0);
+ s = string_make(interpreter, c, (UINTVAL)$2, "binary", 0);
$1 = string_concat(interpreter, $1, s, 1);
goto NEXT();
@@ -372,7 +372,7 @@
char *t;
int i;
- s = string_make(interpreter, c, (UINTVAL)$2, "iso-8859-1", 0);
+ s = string_make(interpreter, c, (UINTVAL)$2, "binary", 0);
ln = string_length(interpreter, $1);
if (ln < $4 + $2)
{
Modified: trunk/ops/sys.ops
==============================================================================
--- trunk/ops/sys.ops (original)
+++ trunk/ops/sys.ops Sat Apr 16 04:06:03 2005
@@ -54,13 +54,13 @@
op err(out STR) {
const char *tmp = strerror(errno);
- $1 = string_make(interpreter, tmp, strlen(tmp), "iso-8859-1", 0);
+ $1 = string_make(interpreter, tmp, strlen(tmp), "ascii", 0);
goto NEXT();
}
op err(out STR, in INT) {
const char *tmp = strerror($2);
- $1 = string_make(interpreter, tmp, strlen(tmp), "iso-8859-1", 0);
+ $1 = string_make(interpreter, tmp, strlen(tmp), "ascii", 0);
goto NEXT();
}
Modified: trunk/src/charset.c
==============================================================================
--- trunk/src/charset.c (original)
+++ trunk/src/charset.c Sat Apr 16 04:06:03 2005
@@ -221,9 +221,6 @@
}
if (!strcmp("iso-8859-1", charsetname)) {
Parrot_iso_8859_1_charset_ptr = charset;
- if (!Parrot_default_charset_ptr) {
- Parrot_default_charset_ptr = charset;
- }
return register_charset(interpreter, charsetname, charset);
}
if (!strcmp("unicode", charsetname)) {
@@ -231,7 +228,9 @@
return register_charset(interpreter, charsetname, charset);
}
if (!strcmp("ascii", charsetname)) {
-
+ if (!Parrot_default_charset_ptr) {
+ Parrot_default_charset_ptr = charset;
+ }
Parrot_ascii_charset_ptr = charset;
return register_charset(interpreter, charsetname, charset);
}
@@ -243,14 +242,14 @@
{
/* the order is crucial here:
* 1) encodings, default = fixed_8
- * 2) charsets default = iso-8859-1
+ * 2) charsets default = ascii
*/
Parrot_encoding_fixed_8_init(interpreter);
Parrot_encoding_utf8_init(interpreter);
+ Parrot_charset_ascii_init(interpreter);
Parrot_charset_iso_8859_1_init(interpreter);
Parrot_charset_binary_init(interpreter);
- Parrot_charset_ascii_init(interpreter);
Parrot_charset_unicode_init(interpreter);
/*
* now install charset converters
Modified: trunk/src/datatypes.c
==============================================================================
--- trunk/src/datatypes.c (original)
+++ trunk/src/datatypes.c Sat Apr 16 04:06:03 2005
@@ -71,7 +71,7 @@
s = "illegal";
else
s = data_types[type - enum_first_type].name;
- return string_make(interpreter, s, strlen(s), "iso-8859-1",
PObj_external_FLAG);
+ return string_make(interpreter, s, strlen(s), NULL, PObj_external_FLAG);
}
/*
Modified: trunk/src/debug.c
==============================================================================
--- trunk/src/debug.c (original)
+++ trunk/src/debug.c Sat Apr 16 04:06:03 2005
@@ -152,7 +152,7 @@
str++;
}
- *strP = string_make(interpreter, string, str - string, "iso-8859-1", 0);
+ *strP = string_make(interpreter, string, str - string, NULL, 0);
if (*str)
str++;
@@ -685,7 +685,7 @@
str[i - 1] = command[i];
str[i - 1] = '\0';
condition->value = string_make(interpreter,
- str, i - 1, "iso-8859-1", PObj_external_FLAG);
+ str, i - 1, NULL, PObj_external_FLAG);
condition->type |= PDB_cond_const;
}
else if (condition->type & PDB_cond_pmc) {
@@ -892,7 +892,7 @@
c[i] = '\0';
na(command);
- arg = string_make(interpreter, c, i, "iso-8859-1", 0);
+ arg = string_make(interpreter, c, i, NULL, 0);
VTABLE_push_string(interpreter, userargv, arg);
}
@@ -2952,7 +2952,7 @@
interpreter->ctx.current_pc - PMC_sub(sub)->address
);
}
-
+
sub = interpinfo_p(interpreter, CURRENT_CONT);
while (!PMC_IS_NULL(sub) && sub->vtable->base_type ==
enum_class_Continuation) {
str = VTABLE_get_string(interpreter, sub);
Modified: trunk/src/embed.c
==============================================================================
--- trunk/src/embed.c (original)
+++ trunk/src/embed.c Sat Apr 16 04:06:03 2005
@@ -229,7 +229,7 @@
return NULL;
}
fs = interpreter->current_file = string_make(interpreter, fullname,
- strlen(fullname), "iso-8859-1", 0);
+ strlen(fullname), NULL, 0);
if (!Parrot_stat_info_intval(interpreter, fs, STAT_EXISTS)) {
PIO_eprintf(interpreter, "Parrot VM: Can't stat %s, code %i.\n",
fullname, errno);
@@ -409,7 +409,7 @@
for (i = 0; i < argc; i++) {
/* Run through argv, adding everything to @ARGS. */
STRING *arg = string_make(interpreter, argv[i], strlen(argv[i]),
- "iso-8859-1", PObj_external_FLAG);
+ NULL, PObj_external_FLAG);
if (Interp_flags_TEST(interpreter, PARROT_DEBUG_FLAG)) {
PIO_eprintf(interpreter, "\t%vd: %s\n", i, argv[i]);
Modified: trunk/src/exceptions.c
==============================================================================
--- trunk/src/exceptions.c (original)
+++ trunk/src/exceptions.c Sat Apr 16 04:06:03 2005
@@ -683,7 +683,7 @@
}
else
msg = string_make(interpreter, format, strlen(format),
- "iso-8859-1", PObj_external_FLAG);
+ NULL, PObj_external_FLAG);
/* string_from_cstring(interpreter, format, strlen(format)); */
/*
* FIXME classify errors
Modified: trunk/src/exec_start.c
==============================================================================
--- trunk/src/exec_start.c (original)
+++ trunk/src/exec_start.c Sat Apr 16 04:06:03 2005
@@ -68,7 +68,7 @@
for (i = 0; i < argc; i++) {
/* Run through argv, adding everything to @ARGS. */
STRING *arg = string_make(interpreter, argv[i], strlen(argv[i]),
- "iso-8859-1", PObj_external_FLAG);
+ NULL, PObj_external_FLAG);
if (Interp_flags_TEST(interpreter, PARROT_DEBUG_FLAG)) {
PIO_eprintf(interpreter, "\t%vd: %s\n", i, argv[i]);
Modified: trunk/src/inter_create.c
==============================================================================
--- trunk/src/inter_create.c (original)
+++ trunk/src/inter_create.c Sat Apr 16 04:06:03 2005
@@ -203,9 +203,9 @@
/* Set up defaults for line/package/file */
interpreter->current_file =
- string_make(interpreter, "(unknown file)", 14, "iso-8859-1", 0);
+ string_make(interpreter, "(unknown file)", 14, NULL, 0);
interpreter->ctx.current_package =
- string_make(interpreter, "(unknown package)", 18, "iso-8859-1", 0);;
+ string_make(interpreter, "(unknown package)", 18, NULL, 0);
SET_NULL_P(interpreter->code, struct PackFile *);
SET_NULL_P(interpreter->profile, ProfData *);
Modified: trunk/src/inter_misc.c
==============================================================================
--- trunk/src/inter_misc.c (original)
+++ trunk/src/inter_misc.c Sat Apr 16 04:06:03 2005
@@ -70,11 +70,11 @@
method = pmc_new(interpreter, enum_class_NCI);
VTABLE_set_pointer_keyed_str(interpreter, method,
string_make(interpreter, proto, strlen(proto),
- "iso-8859-1", PObj_constant_FLAG|PObj_external_FLAG),
+ NULL, PObj_constant_FLAG|PObj_external_FLAG),
func);
VTABLE_set_pmc_keyed_str(interpreter, method_table,
string_make(interpreter, name,
- strlen(name), "iso-8859-1",
+ strlen(name), NULL,
PObj_constant_FLAG|PObj_external_FLAG),
method);
#else
@@ -82,12 +82,12 @@
method = pmc_new(interpreter, enum_class_NCI);
VTABLE_set_pointer_keyed_str(interpreter, method,
string_make(interpreter, proto, strlen(proto),
- "iso-8859-1", PObj_constant_FLAG|PObj_external_FLAG),
+ NULL, PObj_constant_FLAG|PObj_external_FLAG),
func);
Parrot_store_global(interpreter,
Parrot_base_vtables[type]->whoami,
string_make(interpreter, name,
- strlen(name), "iso-8859-1",
+ strlen(name), NULL,
PObj_constant_FLAG|PObj_external_FLAG),
method);
Modified: trunk/src/jit_debug.c
==============================================================================
--- trunk/src/jit_debug.c (original)
+++ trunk/src/jit_debug.c Sat Apr 16 04:06:03 2005
@@ -258,7 +258,7 @@
STRING *ret;
ret = string_copy(interpreter, file);
ret = string_append(interpreter, ret,
- string_make(interpreter, ext, strlen(ext), "iso-8859-1",
+ string_make(interpreter, ext, strlen(ext), NULL,
PObj_external_FLAG),
0);
return ret;
@@ -290,7 +290,7 @@
if (interpreter->code->cur_cs->debugs) {
char *ext;
char *src = interpreter->code->cur_cs->debugs->filename;
- pasmfile = string_make(interpreter, src, strlen(src), "iso-8859-1",
+ pasmfile = string_make(interpreter, src, strlen(src), NULL,
PObj_external_FLAG);
file = string_copy(interpreter, pasmfile);
/* chop pasm/imc */
@@ -302,7 +302,7 @@
file = string_chopn(interpreter, file, 3);
else if (!ext) /* EVAL_n */
file = string_append(interpreter, file,
- string_make(interpreter, ".", 1, "iso-8859-1",
PObj_external_FLAG),
+ string_make(interpreter, ".", 1, NULL, PObj_external_FLAG),
0);
}
else {
Modified: trunk/src/jit_debug_xcoff.c
==============================================================================
--- trunk/src/jit_debug_xcoff.c (original)
+++ trunk/src/jit_debug_xcoff.c Sat Apr 16 04:06:03 2005
@@ -227,7 +227,7 @@
STRING *ret;
ret = string_copy(interpreter, file);
ret = string_append(interpreter, ret,
- string_make(interpreter, ext, strlen(ext), "iso-8859-1",
+ string_make(interpreter, ext, strlen(ext), NULL,
PObj_external_FLAG),
0);
return ret;
@@ -258,7 +258,7 @@
if (interpreter->code->cur_cs->debugs) {
char *ext;
char *src = interpreter->code->cur_cs->debugs->filename;
- pasmfile = string_make(interpreter, src, strlen(src), "iso-8859-1",
+ pasmfile = string_make(interpreter, src, strlen(src), NULL,
PObj_external_FLAG);
file = string_copy(interpreter, pasmfile);
/* chop pasm/imc */
@@ -270,7 +270,7 @@
file = string_chopn(interpreter, file, 3);
else if (!ext) /* EVAL_n */
file = string_append(interpreter, file,
- string_make(interpreter, ".", 1, "iso-8859-1",
PObj_external_FLAG),
+ string_make(interpreter, ".", 1, NULL, PObj_external_FLAG),
0);
}
else {
Modified: trunk/src/method_util.c
==============================================================================
--- trunk/src/method_util.c (original)
+++ trunk/src/method_util.c Sat Apr 16 04:06:03 2005
@@ -171,7 +171,7 @@
while (recp->name != NULL) {
PMC *csub = Parrot_new_csub(interp, recp->sub);
STRING *name = string_make(interp, recp->name, strlen(recp->name),
- "iso-8859-1", 0);
+ NULL, 0);
key_set_string(interp, k, name);
VTABLE_set_pmc_keyed(interp, hash, k, csub);
++recp;
Modified: trunk/src/misc.c
==============================================================================
--- trunk/src/misc.c (original)
+++ trunk/src/misc.c Sat Apr 16 04:06:03 2005
@@ -87,7 +87,7 @@
STRING *realpat, *ret;
realpat = string_make(interpreter, pat, strlen(pat),
- "iso-8859-1", PObj_external_FLAG);
+ NULL, PObj_external_FLAG);
ret = Parrot_vsprintf_s(interpreter, realpat, args);
Modified: trunk/src/spf_render.c
==============================================================================
--- trunk/src/spf_render.c (original)
+++ trunk/src/spf_render.c Sat Apr 16 04:06:03 2005
@@ -66,7 +66,7 @@
} while (num /= base);
if (minus)
*--p = '-';
- return string_make(interpreter, p, tail - p, "iso-8859-1", 0);
+ return string_make(interpreter, p, tail - p, "ascii", 0);
}
/*
@@ -696,7 +696,7 @@
if (obj->getstring == pmc_core.getstring) {
PMC *tmp =
VTABLE_get_pmc_keyed_int(interpreter,
((PMC *)obj->data), (obj->index));
-
+
obj->index++;
string = (VTABLE_get_repr(interpreter, tmp));
Modified: trunk/src/string.c
==============================================================================
--- trunk/src/string.c (original)
+++ trunk/src/string.c Sat Apr 16 04:06:03 2005
@@ -402,6 +402,20 @@
return s;
}
+CHARSET *
+string_rep_compatible (Interp *interpreter, STRING *a, const STRING *b)
+{
+ if (a->encoding != b->encoding) /* XXX utf8 ascii */
+ return NULL;
+ if (a->charset == b->charset)
+ return a->charset;
+ if (b->charset == Parrot_ascii_charset_ptr)
+ return a->charset;
+ if (a->charset == Parrot_ascii_charset_ptr)
+ return b->charset;
+ return NULL;
+}
+
/*
=item C<STRING *
@@ -420,6 +434,8 @@
{
UINTVAL a_capacity, b_len;
UINTVAL total_length;
+ CHARSET *cs;
+
UNUSED(Uflags);
/* If B isn't real, we just bail */
@@ -456,8 +472,9 @@
/* A is now ready to receive the contents of B */
- /* if same rep, can memcopy */
- if (a->encoding == b->encoding && a->charset == b->charset) {
+ /* if compatible rep, can memcopy */
+ if ( (cs = string_rep_compatible(interpreter, a, b))) {
+ a->charset = cs;
/* Tack B on the end of A */
mem_sys_memcopy((void *)((ptrcast_t)a->strstart + a->bufused),
b->strstart, b->bufused);
@@ -546,7 +563,7 @@
{
switch (representation) {
case enum_stringrep_one:
- return "iso-8859-1";
+ return "ascii";
break;
default:
internal_exception(INVALID_STRING_REPRESENTATION,
@@ -593,7 +610,7 @@
'ascii'
'binary'
-If C<charset> is unspecified the default charset 'iso-8859-1' will be
+If C<charset> is unspecified the default charset 'ascii' will be
used.
The value of C<flags> is optionally one or more C<PObj_*> flags C<OR>-ed
@@ -609,11 +626,10 @@
{
ENCODING *encoding;
CHARSET *charset;
+
if (!charset_name) {
- internal_exception(MISSING_ENCODING_NAME,
- "string_make: no charset name specified");
+ charset_name = "ascii";
}
-
charset = Parrot_find_charset(interpreter, charset_name);
if (!charset) {
internal_exception(UNIMPLEMENTED,
@@ -648,7 +664,7 @@
s->charset = charset;
if (encoding == Parrot_fixed_8_encoding_ptr &&
- charset == Parrot_iso_8859_1_charset_ptr) {
+ charset == Parrot_ascii_charset_ptr) {
/*
* fast path for external (constant) strings - don't allocate
* and copy data
@@ -1108,13 +1124,19 @@
UINTVAL true_offset;
UINTVAL true_length;
INTVAL diff;
+ CHARSET *cs;
true_offset = (UINTVAL)offset;
true_length = (UINTVAL)length;
/* may have different reps..... */
- if (src->encoding != rep->encoding || src->charset != rep->charset) {
- internal_exception(UNIMPLEMENTED, "Can't handle mixed types yet");
+ if ( !(cs = string_rep_compatible(interpreter, src, rep))) {
+ internal_exception(UNIMPLEMENTED,
+ "Cross-type string replace (%s/%s) (%s/%s) unsupported",
+ ((ENCODING *)(src->encoding))->name,
+ ((CHARSET *)(src->charset))->name,
+ ((ENCODING *)(rep->encoding))->name,
+ ((CHARSET *)(rep->charset))->name);
}
/* abs(-offset) may not be > strlen-1 */
@@ -1141,6 +1163,7 @@
UINTVAL length_bytes = string_max_bytes(interpreter, src, true_length);
dest = string_make_empty(interpreter, enum_stringrep_one, true_length);
+ dest->charset = src->charset;
mem_sys_memcopy(dest->strstart,
(char *)src->strstart
@@ -1153,6 +1176,7 @@
*d = dest;
}
+ src->charset = cs;
/* Now do the replacement */
@@ -1401,6 +1425,7 @@
STRING *res = NULL;
size_t minlen = 0;
parrot_string_representation_t maxrep = enum_stringrep_one;
+ CHARSET *cs;
/* think about case of dest string is one of the operands */
if (s1 && s2) {
@@ -1422,11 +1447,13 @@
res->strlen = 0;
return res;
}
- else {
- if (s1->encoding != s2->encoding || s1->charset != s2->charset) {
- internal_exception(UNIMPLEMENTED,
- "Can't do cross-type bitwwise and");
- }
+ if ( !(cs = string_rep_compatible(interpreter, s1, s2))) {
+ internal_exception(UNIMPLEMENTED,
+ "Cross-type string bitwise_and (%s/%s) (%s/%s) unsupported",
+ ((ENCODING *)(s1->encoding))->name,
+ ((CHARSET *)(s1->charset))->name,
+ ((ENCODING *)(s2->encoding))->name,
+ ((CHARSET *)(s2->charset))->name);
}
#if ! DISABLE_GC_DEBUG
/* trigger GC for debug */
@@ -1435,6 +1462,7 @@
#endif
make_writable(interpreter, &res, minlen, enum_stringrep_one);
+ res->charset = cs;
BITWISE_AND_STRINGS(Parrot_UInt1, Parrot_UInt1,
Parrot_UInt1, s1, s2, res, minlen);
@@ -1509,6 +1537,7 @@
STRING *res = NULL;
size_t maxlen = 0;
parrot_string_representation_t maxrep = enum_stringrep_one;
+ CHARSET *cs;
maxlen = s1 ? s1->bufused: 0;
if (s2 && s2->bufused > maxlen)
@@ -1527,6 +1556,20 @@
return res;
}
+ if (!s1)
+ cs = s2->charset;
+ else if (!s2)
+ cs = s1->charset;
+ else {
+ if ( !(cs = string_rep_compatible(interpreter, s1, s2))) {
+ internal_exception(UNIMPLEMENTED,
+ "Cross-type string bitwise_or (%s/%s) (%s/%s) unsupported",
+ ((ENCODING *)(s1->encoding))->name,
+ ((CHARSET *)(s1->charset))->name,
+ ((ENCODING *)(s2->encoding))->name,
+ ((CHARSET *)(s2->charset))->name);
+ }
+ }
#if ! DISABLE_GC_DEBUG
/* trigger GC for debug */
if (interpreter && GC_DEBUG(interpreter))
@@ -1534,6 +1577,7 @@
#endif
make_writable(interpreter, &res, maxlen, enum_stringrep_one);
+ res->charset = cs;
BITWISE_OR_STRINGS(Parrot_UInt1, Parrot_UInt1, Parrot_UInt1,
s1, s2, res, maxlen, |);
@@ -1567,6 +1611,7 @@
STRING *res = NULL;
size_t maxlen = 0;
parrot_string_representation_t maxrep = enum_stringrep_one;
+ CHARSET *cs;
maxlen = s1 ? s1->bufused: 0;
if (s2 && s2->bufused > maxlen)
@@ -1585,6 +1630,20 @@
return res;
}
+ if (!s1)
+ cs = s2->charset;
+ else if (!s2)
+ cs = s1->charset;
+ else {
+ if ( !(cs = string_rep_compatible(interpreter, s1, s2))) {
+ internal_exception(UNIMPLEMENTED,
+ "Cross-type string bitwise_xor (%s/%s) (%s/%s)
unsupported",
+ ((ENCODING *)(s1->encoding))->name,
+ ((CHARSET *)(s1->charset))->name,
+ ((ENCODING *)(s2->encoding))->name,
+ ((CHARSET *)(s2->charset))->name);
+ }
+ }
#if ! DISABLE_GC_DEBUG
/* trigger GC for debug */
if (interpreter && GC_DEBUG(interpreter))
@@ -1592,6 +1651,7 @@
#endif
make_writable(interpreter, &res, maxlen, enum_stringrep_one);
+ res->charset = cs;
BITWISE_OR_STRINGS(Parrot_UInt1, Parrot_UInt1, Parrot_UInt1,
s1, s2, res, maxlen, ^);
@@ -2289,7 +2349,7 @@
--clength;
flags = PObj_constant_FLAG;
if (!charset)
- charset = "iso-8859-1";
+ charset = "ascii";
else
flags |= PObj_private7_FLAG; /* Pythonic unicode flag */
result = string_make(interpreter, cstring, clength, charset, flags);
Modified: trunk/t/op/string_cs.t
==============================================================================
--- trunk/t/op/string_cs.t (original)
+++ trunk/t/op/string_cs.t Sat Apr 16 04:06:03 2005
@@ -34,7 +34,7 @@
OUTPUT
output_is( <<'CODE', <<OUTPUT, "charset name" );
- set S0, ascii:"ok 1\n"
+ set S0, "ok 1\n"
charset I0, S0
charsetname S1, I0
print S1
@@ -66,7 +66,7 @@
OUTPUT
output_is( <<'CODE', <<OUTPUT, "downcase" );
- set S0, "AEIOU_���\n"
+ set S0, iso-8859-1:"AEIOU_���\n"
downcase S1, S0
print S1
end
@@ -75,7 +75,7 @@
OUTPUT
output_is( <<'CODE', <<OUTPUT, "upcase" );
- set S0, "aeiou_����\n"
+ set S0, iso-8859-1:"aeiou_����\n"
upcase S1, S0
print S1
end
@@ -84,7 +84,7 @@
OUTPUT
output_is( <<'CODE', <<OUTPUT, "titlecase" );
- set S0, "zAEIOU_���\n"
+ set S0, iso-8859-1:"zAEIOU_���\n"
titlecase S1, S0
print S1
end
@@ -93,7 +93,7 @@
OUTPUT
output_is( <<'CODE', <<OUTPUT, "is_whitespace");
- set S0, "a\t\n \xa0"
+ set S0, iso-8859-1:"a\t\n \xa0"
is_whitespace I0, S0, 0
is_whitespace I1, S0, 1
is_whitespace I2, S0, 2
@@ -248,7 +248,7 @@
output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i");
set S0, "abc"
- find_charset I0, "ascii"
+ find_charset I0, "iso-8859-1"
trans_charset S1, S0, I0
print S1
print "\n"
@@ -259,12 +259,12 @@
end
CODE
abc
-ascii
+iso-8859-1
OUTPUT
output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i");
set S1, "abc"
- find_charset I0, "ascii"
+ find_charset I0, "iso-8859-1"
trans_charset S1, I0
print S1
print "\n"
@@ -275,12 +275,12 @@
end
CODE
abc
-ascii
+iso-8859-1
OUTPUT
output_like( <<'CODE', <<OUTPUT, "trans_charset_s_i - lossy");
- set S1, "abc�"
+ set S1, iso-8859-1:"abc�"
find_charset I0, "ascii"
trans_charset S1, I0
print "never\n"
@@ -306,7 +306,7 @@
OUTPUT
output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i iso-8859-1 to binary");
- set S0, "abc"
+ set S0, iso-8859-1:"abc"
find_charset I0, "binary"
trans_charset S1, S0, I0
print S1
@@ -322,7 +322,7 @@
OUTPUT
output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i iso-8859-1 to binary");
- set S1, "abc"
+ set S1, iso-8859-1:"abc"
find_charset I0, "binary"
trans_charset S1, I0
print S1
@@ -402,7 +402,7 @@
OUTPUT
output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i iso-8859-1 to unicode");
- set S0, "abc_�_"
+ set S0, iso-8859-1:"abc_�_"
find_charset I0, "unicode"
trans_charset S1, S0, I0
print S1