Hey all.
This is a much nicer implementation of ord, which does it properly as a
string encoding "method".
The only real uglyness I see is the "I have no mouth and I must scream"
problem. I could take a Perl_Interpreter * parameter, but the only reason I
need it is to fire off exceptions.
WARNING: This only implements the encoding method for strnative. This
isn't really so much a problem, since it's impossible to make strings in
other encodings, and I wanted to get this off to see if the style is good
before I figure out utf8 and utf16 versions of this (utf32 should be
trivial). There is a halfharted attempt to make it die cleanly if this
happens, but there's nothing to guarantee that the compiler will put a NULL
there.
Please tell me any problems you see with this.
I don't see what chr() should look like, though. What's the interface to
multiple encodings on the opcode level? I'd like to just say that chr
always creates a utf32 string. String encodings don't have fixed numbers in
a plugable-encoding world (and I assume that's where we're going), so I
can't take an i|ic parameter for that. String encodings are an enum, so I
can't take the name of the encoding as an s|sc parameter. Ideas?
-=- James Mastros
Index: core.ops
===================================================================
RCS file: /home/perlcvs/parrot/core.ops,v
retrieving revision 1.17
diff -u -r1.17 core.ops
--- core.ops 2001/10/22 23:34:47 1.17
+++ core.ops 2001/10/23 06:47:57
@@ -991,6 +991,18 @@
$1 = string_substr(interpreter, $2, $3, $4, &$1);
}
+########################################
+
+=item B<ord>(i, s)
+=item B<ord>(i, sc)
+
+Set $1 to the codepoint of the first character in $2.
+
+=cut
+
+AUTO_OP ord(i, s|sc) {
+ $1 = string_ord($2);
+}
=back
Index: string.c
===================================================================
RCS file: /home/perlcvs/parrot/string.c,v
retrieving revision 1.15
diff -u -r1.15 string.c
--- string.c 2001/10/22 23:34:47 1.15
+++ string.c 2001/10/23 06:47:57
@@ -168,6 +168,24 @@
return (ENC_VTABLE(s1)->compare)(s1, s2);
}
+/*=for api string string_ord
+ * get the codepoint of the first char of the string.
+ * (FIXME: Document in docs/strings.pod)
+ */
+INTVAL
+string_ord(STRING* s) {
+ /* FIXME: How should I report this error?
+ * Should I require an interpreter param just so that I can
+ * raise an exception properly?
+ */
+ if (ENC_VTABLE(s)->ord != NULL)
+ return (ENC_VTABLE(s)->ord)(s);
+ else {
+ printf("I have no mouth and I must scream: no ord() for encoding %d!\n",
+s->encoding->which);
+ exit(-1);
+ }
+}
+
/*
* Local variables:
* c-indentation-style: bsd
Index: strnative.c
===================================================================
RCS file: /home/perlcvs/parrot/strnative.c,v
retrieving revision 1.19
diff -u -r1.19 strnative.c
--- strnative.c 2001/10/22 23:34:47 1.19
+++ strnative.c 2001/10/23 06:47:57
@@ -105,6 +105,14 @@
return cmp;
}
+/*=for api string_native string_native_ord
+ returns the value of the first byte of the string.
+ */
+INTVAL
+string_native_ord (STRING* s) {
+ return (INTVAL)*(char *)(s->bufstart);
+}
+
/*=for api string_native string_native_vtable
return the vtable for the native string
*/
@@ -118,6 +126,7 @@
string_native_chopn,
string_native_substr,
string_native_compare,
+ string_native_ord,
};
return sv;
}
Index: include/parrot/string.h
===================================================================
RCS file: /home/perlcvs/parrot/include/parrot/string.h,v
retrieving revision 1.8
diff -u -r1.8 string.h
--- include/parrot/string.h 2001/10/22 23:34:48 1.8
+++ include/parrot/string.h 2001/10/23 06:47:57
@@ -45,6 +45,7 @@
string_iv_to_string_t chopn; /* Remove n characters from the end of a
string */
substr_t substr; /* Substring operation */
two_strings_to_iv_t compare; /* Compare operation */
+ string_to_iv_t ord; /* Return the codepoint of the first
+character of the string */
};
struct parrot_string {
@@ -55,7 +56,7 @@
INTVAL strlen;
STRING_VTABLE* encoding;
INTVAL type;
- INTVAL lanugage;
+ INTVAL language;
};
@@ -73,6 +74,8 @@
string_substr(struct Parrot_Interp *interpreter, STRING*, INTVAL, INTVAL, STRING**);
INTVAL
string_compare(struct Parrot_Interp *, STRING*, STRING*);
+INTVAL
+string_ord(STRING*);
/* Declarations of other functions */
INTVAL
#! perl -w
use Parrot::Test tests => 1;
output_is( <<'CODE', <<OUTPUT, "ord_i_s (native)" );
set S1, "J"
set S2, "A"
set S3, "P"
set S4, "H"
ord I1, S1
ord I2, S2
ord I3, S3
ord I4, S4
print I1
print I2
print I3
print I4
print "\n"
end
CODE
74658072
OUTPUT
__END__
output_is( <<'CODE', <<OUTPUT, "chr_s_i (ASCII)" );
set I1, 74
set I2, 65
set I3, 80
set I4, 72
chr S1, I1
chr S2, I2
chr S3, I3
chr S4, I4
print S1
print S2
print S3
print S4
print "\n"
end
CODE
JAPH
OUTPUT
output_is( <<'CODE', <<OUTPUT, "char and ord (roundtripping)" );
set I1, 74
set I2, 65
set I3, 80
set I4, 72
chr S1, I1
chr S2, I2
chr S3, I3
chr S4, I4
ord I1, S1
ord I2, S2
ord I3, S3
ord I4, S4
print I1
print I2
print I3
print I4
print "\n"
end
CODE
74658072
OUTPUT