cvsuser 05/02/28 10:01:30
Modified: charset ascii.c
include/parrot string_funcs.h
ops ops.num string.ops
src string.c
t/op string_cs.t
Log:
Strings. Finally. 5 - character find opcodes
* find_digit, ... find_word_boundary opcodes
and interface functions
* some tests
Revision Changes Path
1.11 +5 -4 parrot/charset/ascii.c
Index: ascii.c
===================================================================
RCS file: /cvs/public/parrot/charset/ascii.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- ascii.c 28 Feb 2005 17:17:51 -0000 1.10
+++ ascii.c 28 Feb 2005 18:01:21 -0000 1.11
@@ -1,6 +1,6 @@
/*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: ascii.c,v 1.10 2005/02/28 17:17:51 leo Exp $
+$Id: ascii.c,v 1.11 2005/02/28 18:01:21 leo Exp $
=head1 NAME
@@ -51,7 +51,7 @@
{
for (; start < string->strlen; start++) {
- if (table[ENCODING_GET_CODEPOINT(interpreter, string, start)] ==
type) {
+ if (table[ENCODING_GET_CODEPOINT(interpreter, string, start)] &
type) {
return start;
}
}
@@ -66,7 +66,8 @@
INTVAL found = 0;
for (; start < string->strlen; start++) {
- if (table[ENCODING_GET_CODEPOINT(interpreter, string, start)] !=
type) {
+ if (!(table[ENCODING_GET_CODEPOINT(interpreter, string, start)]
+ &type)) {
found = 1;
break;
}
@@ -393,7 +394,7 @@
int is_wc1, is_wc2;
len = string->strlen;
- if (!len)
+ if (!len || offset >= len)
return -1;
c = ENCODING_GET_CODEPOINT(interpreter, string, offset);
is_wc1 = (table[c] & WORDCHAR) ? 1 : 0;
1.49 +9 -1 parrot/include/parrot/string_funcs.h
Index: string_funcs.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/string_funcs.h,v
retrieving revision 1.48
retrieving revision 1.49
diff -u -r1.48 -r1.49
--- string_funcs.h 28 Feb 2005 17:17:54 -0000 1.48
+++ string_funcs.h 28 Feb 2005 18:01:22 -0000 1.49
@@ -1,7 +1,7 @@
/* string_funcs.h
* Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info
- * $Id: string_funcs.h,v 1.48 2005/02/28 17:17:54 leo Exp $
+ * $Id: string_funcs.h,v 1.49 2005/02/28 18:01:22 leo Exp $
* Overview:
* This is the api header for the string subsystem
* Data Structure and Algorithms:
@@ -108,6 +108,14 @@
INTVAL Parrot_string_is_punctuation(Interp *, STRING *, INTVAL offset);
INTVAL Parrot_string_is_newline(Interp *, STRING *, INTVAL offset);
+INTVAL Parrot_string_find_whitespace(Interp *, STRING *, INTVAL offset);
+INTVAL Parrot_string_find_digit(Interp *, STRING *, INTVAL offset);
+INTVAL Parrot_string_find_wordchar(Interp *, STRING *, INTVAL offset);
+INTVAL Parrot_string_find_punctuation(Interp *, STRING *, INTVAL offset);
+INTVAL Parrot_string_find_newline(Interp *, STRING *, INTVAL offset);
+INTVAL Parrot_string_find_word_boundary(Interp *, STRING *, INTVAL offset);
+
+
#endif /* PARROT_IN_CORE */
#endif /* PARROT_STRING_FUNCS_H_GUARD */
1.59 +24 -0 parrot/ops/ops.num
Index: ops.num
===================================================================
RCS file: /cvs/public/parrot/ops/ops.num,v
retrieving revision 1.58
retrieving revision 1.59
diff -u -r1.58 -r1.59
--- ops.num 28 Feb 2005 17:17:55 -0000 1.58
+++ ops.num 28 Feb 2005 18:01:24 -0000 1.59
@@ -1398,3 +1398,27 @@
is_newline_i_s_ic 1368
is_newline_i_sc_i 1369
is_newline_i_sc_ic 1370
+find_whitespace_i_s_i 1371
+find_whitespace_i_s_ic 1372
+find_whitespace_i_sc_i 1373
+find_whitespace_i_sc_ic 1374
+find_digit_i_s_i 1375
+find_digit_i_s_ic 1376
+find_digit_i_sc_i 1377
+find_digit_i_sc_ic 1378
+find_wordchar_i_s_i 1379
+find_wordchar_i_s_ic 1380
+find_wordchar_i_sc_i 1381
+find_wordchar_i_sc_ic 1382
+find_punctuation_i_s_i 1383
+find_punctuation_i_s_ic 1384
+find_punctuation_i_sc_i 1385
+find_punctuation_i_sc_ic 1386
+find_newline_i_s_i 1387
+find_newline_i_s_ic 1388
+find_newline_i_sc_i 1389
+find_newline_i_sc_ic 1390
+find_word_boundary_i_s_i 1391
+find_word_boundary_i_s_ic 1392
+find_word_boundary_i_sc_i 1393
+find_word_boundary_i_sc_ic 1394
1.34 +56 -0 parrot/ops/string.ops
Index: string.ops
===================================================================
RCS file: /cvs/public/parrot/ops/string.ops,v
retrieving revision 1.33
retrieving revision 1.34
diff -u -r1.33 -r1.34
--- string.ops 28 Feb 2005 17:17:55 -0000 1.33
+++ string.ops 28 Feb 2005 18:01:24 -0000 1.34
@@ -703,6 +703,62 @@
}
+=item B<find_whitespace>(out INT, in STR, in INT)
+
+Set $1 to the offset of the next whitespace codepoint or to -1.
+
+=item B<find_wordchar>(out INT, in STR, in INT)
+
+Set $1 to the offset of the next wordchar codepoint or to -1.
+
+=item B<find_digit>(out INT, in STR, in INT)
+
+Set $1 to the offset of the next digit codepoint or to -1.
+
+=item B<find_punctuation>(out INT, in STR, in INT)
+
+Set $1 to the offset of the next punctuation codepoint or to -1.
+
+=item B<find_newline>(out INT, in STR, in INT)
+
+Set $1 to the offset of the next newline codepoint or to -1.
+
+=item B<find_word_boundary>(out INT, in STR, in INT)
+
+Set $1 to the offset of the next word boundary or to -1.
+
+=cut
+
+op find_whitespace(out INT, in STR, in INT) {
+ $1 = Parrot_string_find_whitespace(interpreter, $2, $3);
+ goto NEXT();
+}
+
+op find_wordchar(out INT, in STR, in INT) {
+ $1 = Parrot_string_find_wordchar(interpreter, $2, $3);
+ goto NEXT();
+}
+
+op find_digit(out INT, in STR, in INT) {
+ $1 = Parrot_string_find_digit(interpreter, $2, $3);
+ goto NEXT();
+}
+
+op find_punctuation(out INT, in STR, in INT) {
+ $1 = Parrot_string_find_punctuation(interpreter, $2, $3);
+ goto NEXT();
+}
+
+op find_newline(out INT, in STR, in INT) {
+ $1 = Parrot_string_find_newline(interpreter, $2, $3);
+ goto NEXT();
+}
+
+op find_word_boundary(out INT, in STR, in INT) {
+ $1 = Parrot_string_find_word_boundary(interpreter, $2, $3);
+ goto NEXT();
+}
+
=back
=head1 COPYRIGHT
1.237 +49 -1 parrot/src/string.c
Index: string.c
===================================================================
RCS file: /cvs/public/parrot/src/string.c,v
retrieving revision 1.236
retrieving revision 1.237
diff -u -r1.236 -r1.237
--- string.c 28 Feb 2005 17:17:56 -0000 1.236
+++ string.c 28 Feb 2005 18:01:28 -0000 1.237
@@ -1,6 +1,6 @@
/*
Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
-$Id: string.c,v 1.236 2005/02/28 17:17:56 leo Exp $
+$Id: string.c,v 1.237 2005/02/28 18:01:28 leo Exp $
=head1 NAME
@@ -2543,6 +2543,54 @@
return CHARSET_IS_NEWLINE(interpreter, s, offset);
}
+INTVAL
+Parrot_string_find_whitespace(Interp *interpreter, STRING *s, INTVAL offset)
+{
+ if (!s)
+ return -1;
+ return CHARSET_FIND_WHITESPACE(interpreter, s, offset);
+}
+
+INTVAL
+Parrot_string_find_digit(Interp *interpreter, STRING *s, INTVAL offset)
+{
+ if (!s)
+ return -1;
+ return CHARSET_FIND_DIGIT(interpreter, s, offset);
+}
+
+INTVAL
+Parrot_string_find_wordchar(Interp *interpreter, STRING *s, INTVAL offset)
+{
+ if (!s)
+ return -1;
+ return CHARSET_FIND_WORDCHAR(interpreter, s, offset);
+}
+
+INTVAL
+Parrot_string_find_punctuation(Interp *interpreter, STRING *s, INTVAL offset)
+{
+ if (!s)
+ return -1;
+ return CHARSET_FIND_PUNCTUATION(interpreter, s, offset);
+}
+
+INTVAL
+Parrot_string_find_newline(Interp *interpreter, STRING *s, INTVAL offset)
+{
+ if (!s)
+ return -1;
+ return CHARSET_FIND_NEWLINE(interpreter, s, offset);
+}
+
+INTVAL
+Parrot_string_find_word_boundary(Interp *interpreter, STRING *s, INTVAL
offset)
+{
+ if (!s)
+ return -1;
+ return CHARSET_FIND_WORD_BOUNDARY(interpreter, s, offset);
+}
+
/*
=back
1.5 +67 -2 parrot/t/op/string_cs.t
Index: string_cs.t
===================================================================
RCS file: /cvs/public/parrot/t/op/string_cs.t,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- string_cs.t 28 Feb 2005 17:17:57 -0000 1.4
+++ string_cs.t 28 Feb 2005 18:01:30 -0000 1.5
@@ -1,6 +1,6 @@
#! perl -w
# Copyright: 2001-2004 The Perl Foundation. All Rights Reserved.
-# $Id: string_cs.t,v 1.4 2005/02/28 17:17:57 leo Exp $
+# $Id: string_cs.t,v 1.5 2005/02/28 18:01:30 leo Exp $
=head1 NAME
@@ -16,7 +16,7 @@
=cut
-use Parrot::Test tests => 12;
+use Parrot::Test tests => 16;
use Test::More;
output_is( <<'CODE', <<OUTPUT, "basic syntax" );
@@ -179,4 +179,69 @@
01
OUTPUT
+output_is( <<'CODE', <<OUTPUT, "find_wordchar");
+ set S0, "_ ab 09"
+ set I0, 0
+lp:
+ find_wordchar I0, S0, I0
+ print I0
+ print " "
+ eq I0, -1, done
+ inc I0
+ branch lp
+done:
+ print "ok\n"
+ end
+CODE
+0 2 3 5 6 -1 ok
+OUTPUT
+output_is( <<'CODE', <<OUTPUT, "find_digit");
+ set S0, "_ ab 09"
+ set I0, 0
+lp:
+ find_digit I0, S0, I0
+ print I0
+ print " "
+ eq I0, -1, done
+ inc I0
+ branch lp
+done:
+ print "ok\n"
+ end
+CODE
+5 6 -1 ok
+OUTPUT
+output_is( <<'CODE', <<OUTPUT, "find_punctuation");
+ set S0, "_ .b ,9"
+ set I0, 0
+lp:
+ find_punctuation I0, S0, I0
+ print I0
+ print " "
+ eq I0, -1, done
+ inc I0
+ branch lp
+done:
+ print "ok\n"
+ end
+CODE
+2 5 -1 ok
+OUTPUT
+
+output_is( <<'CODE', <<OUTPUT, "find_word_boundary");
+ set S0, "_ab 09z"
+ set I0, 0
+lp:
+ find_word_boundary I0, S0, I0
+ print I0
+ print " "
+ eq I0, -1, done
+ inc I0
+ branch lp
+done:
+ print "ok\n"
+ end
+CODE
+0 2 3 6 -1 ok
+OUTPUT