cvsuser 04/11/05 14:24:02
Modified: examples/benchmarks Tag: pluggable_encodings fib.imc
include/parrot Tag: pluggable_encodings charset.h
src Tag: pluggable_encodings global_setup.c string.c
Log:
case mangling in
Revision Changes Path
No revision
No revision
1.5.2.1 +34 -1 parrot/examples/benchmarks/fib.imc
Index: fib.imc
===================================================================
RCS file: /cvs/public/parrot/examples/benchmarks/fib.imc,v
retrieving revision 1.5
retrieving revision 1.5.2.1
diff -u -r1.5 -r1.5.2.1
--- fib.imc 4 Nov 2004 08:03:32 -0000 1.5
+++ fib.imc 5 Nov 2004 22:23:59 -0000 1.5.2.1
@@ -1,5 +1,5 @@
# Copyright (C) 2001-2003 The Perl Foundation. All rights reserved.
-# $Id: fib.imc,v 1.5 2004/11/04 08:03:32 leo Exp $
+# $Id: fib.imc,v 1.5.2.1 2004/11/05 22:23:59 dan Exp $
=head1 NAME
@@ -41,6 +41,39 @@
fin -= start
print fin
print "s\n"
+
+ interpinfo I1, 1
+ print "A total of "
+ print I1
+ print " bytes were allocated\n"
+
+ interpinfo I1, 2
+ print "A total of "
+ print I1
+ print " DOD runs were made\n"
+
+ interpinfo I1, 3
+ print "A total of "
+ print I1
+ print " collection runs were made\n"
+
+ interpinfo I1, 10
+ print "Copying a total of "
+ print I1
+ print " bytes\n"
+
+ interpinfo I1, 5
+ print "There are "
+ print I1
+ print " active Buffer structs\n"
+
+ interpinfo I1, 7
+ print "There are "
+ print I1
+ print " total Buffer structs\n"
+
+
+
end
.end
No revision
No revision
1.3.2.1 +47 -1 parrot/include/parrot/charset.h
Index: charset.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/charset.h,v
retrieving revision 1.3
retrieving revision 1.3.2.1
diff -u -r1.3 -r1.3.2.1
--- charset.h 3 Nov 2004 19:22:24 -0000 1.3
+++ charset.h 5 Nov 2004 22:23:59 -0000 1.3.2.1
@@ -1,7 +1,7 @@
/* charset.h
* Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: charset.h,v 1.3 2004/11/03 19:22:24 dan Exp $
+ * $Id: charset.h,v 1.3.2.1 2004/11/05 22:23:59 dan Exp $
* Overview:
* This is the header for the 8-bit fixed-width encoding
* Data Structure and Algorithms:
@@ -91,6 +91,52 @@
charset_find_word_boundary_t find_word_boundary;
};
+#define CHARSET_GET_GRAPEMES(interp, source, offset, count) ((CHARSET
*)source->charset)->get_graphemes(interpreter, source, offset, count)
+#define CHARSET_SET_GRAPHEMES(interp, source, offset, replace_count, insert)
((CHARSET *)source->charset)->set_graphemes(interpreter, source, offset,
replace_count, insert)
+#define CHARSET_TO_CHARSET(interp, source, new_charset) ((CHARSET
*)source->charset)->to_charset(interpreter, source, new_charset)
+#define CHARSET_COPY_TO_CHARSET(interp, source, new_charset) ((CHARSET
*)source->charset)->copy_to_charset(interpreter, source, new_charset)
+#define CHARSET_TO_UNICODE(interp, source) ((CHARSET
*)source->charset)->to_unicode(interpreter, source)
+#define CHARSET_COMPOSE(interp, source) ((CHARSET
*)source->charset)->compose(interpreter, source)
+#define CHARSET_DECOMPOSE(interp, source) ((CHARSET
*)source->charset)->decompose(interpreter, source)
+#define CHARSET_UPCASE(interp, source) ((CHARSET
*)source->charset)->upcase(interpreter, source)
+#define CHARSET_DOWNCASE(interp, source) ((CHARSET
*)source->charset)->downcase(interpreter, source)
+#define CHARSET_TITLECASE(interp, source) ((CHARSET
*)source->charset)->titlecase(interpreter, source)
+#define CHARSET_UPCASE_FIRST(interp, source) ((CHARSET
*)source->charset)->upcase_first(interpreter, source)
+#define CHARSET_DOWNCASE_FIRST(interp, source) ((CHARSET
*)source->charset)->downcase_first(interpreter, source)
+#define CHARSET_TITLECASE_FIRST(interp, source) ((CHARSET
*)source->charset)->titlecase_first(interpreter, source)
+#define CHARSET_COMPARE(interp, lhs, rhs) ((CHARSET
*)source->charset)->compare(interpreter, lhs, rhs)
+#define CHARSET_INDEX(interp, source, search, offset) ((CHARSET
*)source->charset)->index(interpreter, source, search, offset)
+#define CHARSET_RINDEX(interp, source, search, offset) ((CHARSET
*)source->charset)->rindex(interpreter, source, search, offset)
+#define CHARSET_VALIDATE(interp, source, offset) ((CHARSET
*)source->charset)->validate(interpreter, source)
+#define CHARSET_IS_WORDCHAR(interp, source, offset) ((CHARSET
*)source->charset)->is_wordchar(interpreter, source, offset)
+#define CHARSET_FIND_WORDCHAR(interp, source, offset) ((CHARSET
*)source->charset)->find_wordchar(interpreter, source, offset)
+#define CHARSET_FIND_NOT_WORDCHAR(interp, source, offset) ((CHARSET
*)source->charset)->find_not_wordchar(interpreter, source, offset)
+#define CHARSET_IS_WHITESPACE(interp, source, offset) ((CHARSET
*)source->charset)->is_whitespace(interpreter, source, offset)
+#define CHARSET_FIND_WHITESPACE(interp, source, offset) ((CHARSET
*)source->charset)->find_whitespace(interpreter, source, offset)
+#define CHARSET_FIND_NOT_WHITESPACE(interp, source, offset) ((CHARSET
*)source->charset)->find_not_whitespace(interpreter, source, offset)
+#define CHARSET_IS_DIGIT(interp, source, offset) ((CHARSET
*)source->charset)->is_digit(interpreter, source, offset)
+#define CHARSET_FIND_DIGIT(interp, source, offset) ((CHARSET
*)source->charset)->find_digit(interpreter, source, offset)
+#define CHARSET_FIND_NOT_DIGIT(interp, source, offset) ((CHARSET
*)source->charset)->find_not_digit(interpreter, source, offset)
+#define CHARSET_IS_PUNCTUATION(interp, source, offset) ((CHARSET
*)source->charset)->is_punctuation(interpreter, source, offset)
+#define CHARSET_FIND_PUNCTUATION(interp, source, offset) ((CHARSET
*)source->charset)->find_punctuation(interpreter, source, offset)
+#define CHARSET_FIND_NOT_PUNCTUATION(interp, source, offset) ((CHARSET
*)source->charset)->find_not_punctuation(interpreter, source, offset)
+#define CHARSET_IS_NEWLINE(interp, source, offset) ((CHARSET
*)source->charset)->is_newline(interpreter, source, offset)
+#define CHARSET_FIND_NEWLINE(interp, source, offset) ((CHARSET
*)source->charset)->find_newline(interpreter, source, offset)
+#define CHARSET_FIND_NOT_NEWLINE(interp, source, offset) ((CHARSET
*)source->charset)->find_not_newline(interpreter, source, offset)
+#define CHARSET_FIND_WORD_BOUNDARY(interp, source, offset) ((CHARSET
*)source->charset)->find_word_boundary(interpreter, source, offset)
+#define CHARSET_TO_ENCODING(interp, source, offset, count) ((ENCODING
*)source->encoding)->to_encoding(interp, source, offset, count)
+#define CHARSET_COPY_TO_ENCODING(interp, source) ((ENCODING
*)source->encoding)->copy_to_encoding(interp, source)
+#define CHARSET_GET_CODEPOINT(interp, source, offset) ((ENCODING
*)source->encoding)->get_codepoint(interp, source, offset)
+#define CHARSET_SET_CODEPOINT(interp, source, offset, codepoint) ((ENCODING
*)source->encoding)->set_codepoint(interp, source, offset, codepoint)
+#define CHARSET_GET_BYTE(interp, source, offset) ((ENCODING
*)source->encoding)->get_byte(interp, source, offset)
+#define CHARSET_SET_BYTE(interp, source, offset, value) ((ENCODING
*)source->encoding)->set_byte(interp, source, offset, value)
+#define CHARSET_GET_CODEPOINTS(interp, source, offset, count) ((ENCODING
*)source->encoding)->get_codepoints(interp, source, offset, count)
+#define CHARSET_GET_BYTES(interp, source, offset, count) ((ENCODING
*)source->encoding)->get_bytes(interp, source, offset, count)
+#define CHARSET_SET_CODEPOINTS(interp, source, offset, count, newdata) ((ENCODING
*)source->encoding)->set_codepoints(interp, source, offset, count, newdata)
+#define CHARSET_SET_BYTES(interp, source, offset, count, newdata) ((ENCODING
*)source->encoding)->set_bytes(interp, source, offset, count, newdata)
+#define CHARSET_BECOME_ENCODING(interp, source) ((ENCODING
*)source->encoding)->become_encoding(interp, source)
+#define CHARSET_CODEPOINTS(interp, source) ((ENCODING
*)source->encoding)->codepoints(interp, source)
+#define CHARSET_BYTES(interp, source) ((ENCODING *)source->encoding)->bytes(interp,
source)
#endif /* PARROT_CHARSET_H_GUARD */
No revision
No revision
1.54.2.1 +2 -2 parrot/src/global_setup.c
Index: global_setup.c
===================================================================
RCS file: /cvs/public/parrot/src/global_setup.c,v
retrieving revision 1.54
retrieving revision 1.54.2.1
diff -u -r1.54 -r1.54.2.1
--- global_setup.c 4 Nov 2004 18:46:10 -0000 1.54
+++ global_setup.c 5 Nov 2004 22:24:02 -0000 1.54.2.1
@@ -1,6 +1,6 @@
/*
Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
-$Id: global_setup.c,v 1.54 2004/11/04 18:46:10 dan Exp $
+$Id: global_setup.c,v 1.54.2.1 2004/11/05 22:24:02 dan Exp $
=head1 NAME
@@ -62,7 +62,7 @@
Parrot_encoding_fixed_8_init(interpreter);
Parrot_charset_binary_init(interpreter);
Parrot_charset_ascii_init(interpreter);
-
+ Parrot_charset_iso_8859_1_init(interpreter);
/*
* TODO allocate core vtable table only once - or per interpreter
1.229.2.2 +51 -32 parrot/src/string.c
Index: string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.229.2.1
retrieving revision 1.229.2.2
diff -u -r1.229.2.1 -r1.229.2.2
--- string.c 5 Nov 2004 18:01:50 -0000 1.229.2.1
+++ string.c 5 Nov 2004 22:24:02 -0000 1.229.2.2
@@ -1,6 +1,6 @@
/*
Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
-$Id: string.c,v 1.229.2.1 2004/11/05 18:01:50 dan Exp $
+$Id: string.c,v 1.229.2.2 2004/11/05 22:24:02 dan Exp $
=head1 NAME
@@ -357,6 +357,11 @@
s = new_string_header(interpreter, 0);
s->representation = representation;
+
+ if (representation == enum_stringrep_one) {
+ s->encoding = Parrot_fixed_8_encoding_ptr;
+ s->charset = Parrot_iso_8859_1_charset_ptr;
+ }
Parrot_allocate_string(interpreter,
s, string_max_bytes(interpreter, s, capacity));
@@ -802,6 +807,8 @@
if (strcmp(encoding_name, "iso-8859-1") == 0 ) {
s->representation = enum_stringrep_one;
+ s->encoding = Parrot_fixed_8_encoding_ptr;
+ s->charset = Parrot_iso_8859_1_charset_ptr;
/*
* fast path for external (constant) strings - don't allocate
* and copy data
@@ -3190,15 +3197,19 @@
if (!s)
return;
- Parrot_unmake_COW(interpreter, s);
- set_char_at = set_char_setter(s);
- for (i = 0; i < s->strlen; ++i) {
- o = string_ord(interpreter, s, i);
- if (o >= 'a' && o <= 'z')
- set_char_at(i, s, o - 32);
- else if (o >= 0x80)
- internal_exception(INTERNAL_NOT_IMPLEMENTED,
- "Case mangling for non-ASCII not yet implemented");
+ if (s->representation == enum_stringrep_one) {
+ CHARSET_UPCASE(interpreter, s);
+ } else {
+ Parrot_unmake_COW(interpreter, s);
+ set_char_at = set_char_setter(s);
+ for (i = 0; i < s->strlen; ++i) {
+ o = string_ord(interpreter, s, i);
+ if (o >= 'a' && o <= 'z')
+ set_char_at(i, s, o - 32);
+ else if (o >= 0x80)
+ internal_exception(INTERNAL_NOT_IMPLEMENTED,
+ "Case mangling for non-ASCII not yet
implemented");
+ }
}
}
@@ -3250,15 +3261,19 @@
if (!s)
return;
- Parrot_unmake_COW(interpreter, s);
- set_char_at = set_char_setter(s);
- for (i = 0; i < s->strlen; ++i) {
- o = string_ord(interpreter, s, i);
- if (o >= 'A' && o <= 'Z')
- set_char_at(i, s, o + 32);
- else if (o >= 0x80)
- internal_exception(INTERNAL_NOT_IMPLEMENTED,
- "Case mangling for non-ASCII not yet implemented");
+ if (s->representation == enum_stringrep_one) {
+ CHARSET_DOWNCASE(interpreter, s);
+ } else {
+ Parrot_unmake_COW(interpreter, s);
+ set_char_at = set_char_setter(s);
+ for (i = 0; i < s->strlen; ++i) {
+ o = string_ord(interpreter, s, i);
+ if (o >= 'A' && o <= 'Z')
+ set_char_at(i, s, o + 32);
+ else if (o >= 0x80)
+ internal_exception(INTERNAL_NOT_IMPLEMENTED,
+ "Case mangling for non-ASCII not yet
implemented");
+ }
}
}
@@ -3310,21 +3325,25 @@
if (!s)
return;
- Parrot_unmake_COW(interpreter, s);
- set_char_at = set_char_setter(s);
- o = string_ord(interpreter, s, 0);
- if (o >= 'a' && o <= 'z')
- set_char_at(0, s, o - 32);
- else if (o >= 0x80)
- internal_exception(INTERNAL_NOT_IMPLEMENTED,
- "Case mangling for non-ASCII not yet implemented");
- for (i = 1; i < s->strlen; ++i) {
- o = string_ord(interpreter, s, i);
- if (o >= 'A' && o <= 'Z')
- set_char_at(i, s, o + 32);
+ if (s->representation == enum_stringrep_one) {
+ CHARSET_TITLECASE(interpreter, s);
+ } else {
+ Parrot_unmake_COW(interpreter, s);
+ set_char_at = set_char_setter(s);
+ o = string_ord(interpreter, s, 0);
+ if (o >= 'a' && o <= 'z')
+ set_char_at(0, s, o - 32);
else if (o >= 0x80)
internal_exception(INTERNAL_NOT_IMPLEMENTED,
- "Case mangling for non-ASCII not yet implemented");
+ "Case mangling for non-ASCII not yet implemented");
+ for (i = 1; i < s->strlen; ++i) {
+ o = string_ord(interpreter, s, i);
+ if (o >= 'A' && o <= 'Z')
+ set_char_at(i, s, o + 32);
+ else if (o >= 0x80)
+ internal_exception(INTERNAL_NOT_IMPLEMENTED,
+ "Case mangling for non-ASCII not yet
implemented");
+ }
}
}