On Sat Sep 13 05:13:50 2008, [EMAIL PROTECTED] wrote:
> Hello,
>
> I am sorry for the late response. I was some days offline. The patch is
> now in a single patch file (generated with svn diff) and extended a
> little bit.
>
Gerd:
Thanks for your patch. For future reference: There's no need to tar up
the patch before attaching it. We expect the patch to be in plain-text
format with a name ending in '.patch'. I have converted your patch to
such a format and reattached it as 'encoding_option.patch', which
applies cleanly.
Can you explain a bit more about the rationale for adding this option to
Configure.pl?
Also, can you explain a bit about the changes to config/auto/icu.pm?
Also, your patch causes two tests in t/steps/auto_icu-01.t to fail.
t/steps/auto_icu-01......................6/136
# Failed test 'No icushared, as expected'
# at t/steps/auto_icu-01.t line 178.
# Failed test 'No longer trying to configure with ICU'
# at t/steps/auto_icu-01.t line 179.
# got: '0'
# expected: '1'
# Looks like you failed 2 tests of 136.
t/steps/auto_icu-01...................... Dubious, test returned 2
(wstat 512, 0x200)
Failed 2/136 subtests
(less 31 skipped subtests: 103 okay)
Can you take a look at this? Since your patch removed code from
config/auto/icu.pm, we may be able to delete tests in auto_icu-01.t that
existed to test the code proposed for deletion. You can test the
individual file with: 'prove -v t/steps/auto_icu-01.t', and then re-run
configuration with: 'perl Configure.pl --test'.
Thank you very much.
kid51
Index: src/charset.c
===================================================================
--- src/charset.c (Revision 31065)
+++ src/charset.c (Arbeitskopie)
@@ -429,19 +429,38 @@
void
Parrot_charsets_encodings_init(PARROT_INTERP)
{
+
+#define SET_DEFAULT_IF_ENCODING_IS(z) if (!strcmp(PARROT_DEF_ENCODING, z)) \
+ { Parrot_make_default_encoding(interp, " ", default_encoding_ptr); }
+
+ ENCODING *default_encoding_ptr;
+ CHARSET *default_charset_ptr;
+
/* the order is crucial here:
- * 1) encodings, default = fixed_8
- * 2) charsets default = ascii
+ * 1) encodings: default = fixed_8, if not set as configuration-option
+ * 2) charsets: default = ascii, for fixed_8-encoding
+ * default = Unicode, for utf8-, ucs2 and utf16-encoding
*/
- Parrot_encoding_fixed_8_init(interp);
- Parrot_encoding_utf8_init(interp);
- Parrot_encoding_ucs2_init(interp);
- Parrot_encoding_utf16_init(interp);
+ default_encoding_ptr = Parrot_encoding_fixed_8_init(interp);
+ SET_DEFAULT_IF_ENCODING_IS("fixed_8");
+ default_encoding_ptr = Parrot_encoding_utf8_init(interp);
+ SET_DEFAULT_IF_ENCODING_IS("UTF-8");
+ default_encoding_ptr = Parrot_encoding_ucs2_init(interp);
+ SET_DEFAULT_IF_ENCODING_IS("UCS-2");
+ default_encoding_ptr = Parrot_encoding_utf16_init(interp);
+ SET_DEFAULT_IF_ENCODING_IS("UTF-16");
+ default_encoding_ptr = Parrot_default_encoding(interp);
+
Parrot_charset_ascii_init(interp);
Parrot_charset_iso_8859_1_init(interp);
Parrot_charset_binary_init(interp);
- Parrot_charset_unicode_init(interp);
+ default_charset_ptr = Parrot_charset_unicode_init(interp);
+ if ( STREQ(default_encoding_ptr->name, "utf8") ||
+ STREQ(default_encoding_ptr->name, "utf16") ||
+ STREQ(default_encoding_ptr->name, "ucs2") ) {
+ Parrot_make_default_charset(interp, " ", default_charset_ptr);
+ }
/*
* now encoding strings don't have a charset yet - set default
Index: src/charset/unicode.c
===================================================================
--- src/charset/unicode.c (Revision 31065)
+++ src/charset/unicode.c (Arbeitskopie)
@@ -158,6 +158,7 @@
# include <unicode/uchar.h>
# include <unicode/ustring.h>
# include <unicode/unorm.h>
+# include <unicode/usearch.h>
#endif
#define EXCEPTION(err, str) \
Parrot_ex_throw_from_c_args(interp, NULL, (err), (str))
@@ -613,10 +614,29 @@
*/
static INTVAL
-cs_rindex(PARROT_INTERP, SHIM(STRING *source_string),
- SHIM(STRING *search_string), UINTVAL offset)
+cs_rindex(PARROT_INTERP, ARGIN(STRING *source_string),
+ ARGIN(STRING *search_string), UINTVAL offset)
{
- UNIMPL;
+#if PARROT_HAS_ICU
+ INTVAL pos;
+ UChar target[source_string->strlen];
+ UChar pattern[search_string->strlen];
+ UStringSearch *search;
+ UErrorCode status = U_ZERO_ERROR;
+
+ u_uastrcpy(target, (const char *)source_string->cache._b._bufstart);
+ u_uastrcpy(pattern, (const char *)search_string->cache._b._bufstart);
+
+ search = usearch_open(pattern, -1, target, -1, "en_US", NULL, &status);
+ usearch_setOffset(search, offset, &status);
+ pos = usearch_last(search, &status);
+ usearch_close(search);
+
+ return pos;
+#else
+ Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_LIBRARY_ERROR,
+ "no ICU lib loaded");
+#endif
}
/*
Index: lib/Parrot/Configure/Options/Conf.pm
===================================================================
--- lib/Parrot/Configure/Options/Conf.pm (Revision 31065)
+++ lib/Parrot/Configure/Options/Conf.pm (Arbeitskopie)
@@ -45,6 +45,8 @@
--nomanicheck Don't check the MANIFEST
--languages="list of languages"
Specify a list of languages to process
+ --encoding="fixed_8|UTF-8|UCS-2|UTF-16"
+ Set the default encoding to one of the four values
--ask Have Configure ask for commonly-changed info
--test=configure Run tests of configuration tools before configuring
Index: lib/Parrot/Configure/Options/Conf/Shared.pm
===================================================================
--- lib/Parrot/Configure/Options/Conf/Shared.pm (Revision 31065)
+++ lib/Parrot/Configure/Options/Conf/Shared.pm (Arbeitskopie)
@@ -21,6 +21,7 @@
datadir
debugging
define
+ encoding
exec-prefix
execcapable
fatal
Index: compilers/imcc/pbc.c
===================================================================
--- compilers/imcc/pbc.c (Revision 31065)
+++ compilers/imcc/pbc.c (Arbeitskopie)
@@ -826,8 +826,10 @@
return s;
}
else if (*buf == '"') {
+ CHARSET *default_charset_ptr = Parrot_default_charset(interp);
buf++;
- return string_unescape_cstring(interp, buf, '"', NULL);
+ return string_unescape_cstring(interp, buf, '"',
+ default_charset_ptr->name);
}
else if (*buf == '\'') { /* TODO handle python raw strings */
buf++;
Index: config/init/defaults.pm
===================================================================
--- config/init/defaults.pm (Revision 31065)
+++ config/init/defaults.pm (Arbeitskopie)
@@ -235,6 +235,8 @@
# Extra flags needed for libnci_test.so
ncilib_link_extra => '',
+ def_encoding => $conf->options->get('encoding') || 'fixed_8',
+
);
# add profiling if needed
Index: config/auto/icu.pm
===================================================================
--- config/auto/icu.pm (Revision 31065)
+++ config/auto/icu.pm (Arbeitskopie)
@@ -39,7 +39,6 @@
# during testing.
$data{icuconfig_default} = q{icu-config};
$data{icu_headers} = [ qw(ucnv.h utypes.h uchar.h) ];
- $data{icu_shared_pattern} = qr/-licui18n\w*/;
return \%data;
}
@@ -315,7 +314,6 @@
my ($icushared, $without) = @_;
if ( defined $icushared ) {
chomp $icushared;
- $icushared =~ s/$self->{icu_shared_pattern}//; # "-licui18n32" too
if (length $icushared == 0) {
$without = 1;
}
Index: config/gen/config_h/config_h.in
===================================================================
--- config/gen/config_h/config_h.in (Revision 31065)
+++ config/gen/config_h/config_h.in (Arbeitskopie)
@@ -145,6 +145,9 @@
/* ICU. */
#define PARROT_HAS_ICU @has_icu@
+/* Encoding */
+#define PARROT_DEF_ENCODING "@def_encoding@"
+
/* Int and float formats. */
#define INTVAL_FMT "@intvalfmt@"
#define FLOATVAL_FMT "@floatvalfmt@"