cvsuser     05/02/28 10:01:30

  Modified:    charset  ascii.c
               include/parrot string_funcs.h
               ops      ops.num string.ops
               src      string.c
               t/op     string_cs.t
  Log:
  Strings. Finally. 5 - character find opcodes
  * find_digit, ... find_word_boundary opcodes
    and interface functions
  * some tests
  
  Revision  Changes    Path
  1.11      +5 -4      parrot/charset/ascii.c
  
  Index: ascii.c
  ===================================================================
  RCS file: /cvs/public/parrot/charset/ascii.c,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- ascii.c   28 Feb 2005 17:17:51 -0000      1.10
  +++ ascii.c   28 Feb 2005 18:01:21 -0000      1.11
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: ascii.c,v 1.10 2005/02/28 17:17:51 leo Exp $
  +$Id: ascii.c,v 1.11 2005/02/28 18:01:21 leo Exp $
   
   =head1 NAME
   
  @@ -51,7 +51,7 @@
   {
   
       for (; start < string->strlen; start++) {
  -        if (table[ENCODING_GET_CODEPOINT(interpreter, string, start)] == 
type) {
  +        if (table[ENCODING_GET_CODEPOINT(interpreter, string, start)] & 
type) {
               return start;
           }
       }
  @@ -66,7 +66,8 @@
       INTVAL found = 0;
   
       for (; start < string->strlen; start++) {
  -        if (table[ENCODING_GET_CODEPOINT(interpreter, string, start)] != 
type) {
  +        if (!(table[ENCODING_GET_CODEPOINT(interpreter, string, start)]
  +                    &type)) {
               found = 1;
               break;
           }
  @@ -393,7 +394,7 @@
       int is_wc1, is_wc2;
   
       len = string->strlen;
  -    if (!len)
  +    if (!len || offset >= len)
           return -1;
       c = ENCODING_GET_CODEPOINT(interpreter, string, offset);
       is_wc1 = (table[c] & WORDCHAR) ? 1 : 0;
  
  
  
  1.49      +9 -1      parrot/include/parrot/string_funcs.h
  
  Index: string_funcs.h
  ===================================================================
  RCS file: /cvs/public/parrot/include/parrot/string_funcs.h,v
  retrieving revision 1.48
  retrieving revision 1.49
  diff -u -r1.48 -r1.49
  --- string_funcs.h    28 Feb 2005 17:17:54 -0000      1.48
  +++ string_funcs.h    28 Feb 2005 18:01:22 -0000      1.49
  @@ -1,7 +1,7 @@
   /* string_funcs.h
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: string_funcs.h,v 1.48 2005/02/28 17:17:54 leo Exp $
  + *     $Id: string_funcs.h,v 1.49 2005/02/28 18:01:22 leo Exp $
    *  Overview:
    *     This is the api header for the string subsystem
    *  Data Structure and Algorithms:
  @@ -108,6 +108,14 @@
   INTVAL Parrot_string_is_punctuation(Interp *, STRING *, INTVAL offset);
   INTVAL Parrot_string_is_newline(Interp *, STRING *, INTVAL offset);
   
  +INTVAL Parrot_string_find_whitespace(Interp *, STRING *, INTVAL offset);
  +INTVAL Parrot_string_find_digit(Interp *, STRING *, INTVAL offset);
  +INTVAL Parrot_string_find_wordchar(Interp *, STRING *, INTVAL offset);
  +INTVAL Parrot_string_find_punctuation(Interp *, STRING *, INTVAL offset);
  +INTVAL Parrot_string_find_newline(Interp *, STRING *, INTVAL offset);
  +INTVAL Parrot_string_find_word_boundary(Interp *, STRING *, INTVAL offset);
  +
  +
   #endif /* PARROT_IN_CORE */
   #endif /* PARROT_STRING_FUNCS_H_GUARD */
   
  
  
  
  1.59      +24 -0     parrot/ops/ops.num
  
  Index: ops.num
  ===================================================================
  RCS file: /cvs/public/parrot/ops/ops.num,v
  retrieving revision 1.58
  retrieving revision 1.59
  diff -u -r1.58 -r1.59
  --- ops.num   28 Feb 2005 17:17:55 -0000      1.58
  +++ ops.num   28 Feb 2005 18:01:24 -0000      1.59
  @@ -1398,3 +1398,27 @@
   is_newline_i_s_ic              1368
   is_newline_i_sc_i              1369
   is_newline_i_sc_ic             1370
  +find_whitespace_i_s_i          1371
  +find_whitespace_i_s_ic         1372
  +find_whitespace_i_sc_i         1373
  +find_whitespace_i_sc_ic        1374
  +find_digit_i_s_i               1375
  +find_digit_i_s_ic              1376
  +find_digit_i_sc_i              1377
  +find_digit_i_sc_ic             1378
  +find_wordchar_i_s_i            1379
  +find_wordchar_i_s_ic           1380
  +find_wordchar_i_sc_i           1381
  +find_wordchar_i_sc_ic          1382
  +find_punctuation_i_s_i         1383
  +find_punctuation_i_s_ic        1384
  +find_punctuation_i_sc_i        1385
  +find_punctuation_i_sc_ic       1386
  +find_newline_i_s_i             1387
  +find_newline_i_s_ic            1388
  +find_newline_i_sc_i            1389
  +find_newline_i_sc_ic           1390
  +find_word_boundary_i_s_i       1391
  +find_word_boundary_i_s_ic      1392
  +find_word_boundary_i_sc_i      1393
  +find_word_boundary_i_sc_ic     1394
  
  
  
  1.34      +56 -0     parrot/ops/string.ops
  
  Index: string.ops
  ===================================================================
  RCS file: /cvs/public/parrot/ops/string.ops,v
  retrieving revision 1.33
  retrieving revision 1.34
  diff -u -r1.33 -r1.34
  --- string.ops        28 Feb 2005 17:17:55 -0000      1.33
  +++ string.ops        28 Feb 2005 18:01:24 -0000      1.34
  @@ -703,6 +703,62 @@
   }
   
   
  +=item B<find_whitespace>(out INT, in STR, in INT)
  +
  +Set $1 to the offset of the next whitespace codepoint or to -1.
  +
  +=item B<find_wordchar>(out INT, in STR, in INT)
  +
  +Set $1 to the offset of the next wordchar codepoint or to -1.
  +
  +=item B<find_digit>(out INT, in STR, in INT)
  +
  +Set $1 to the offset of the next digit codepoint or to -1.
  +
  +=item B<find_punctuation>(out INT, in STR, in INT)
  +
  +Set $1 to the offset of the next punctuation codepoint or to -1.
  +
  +=item B<find_newline>(out INT, in STR, in INT)
  +
  +Set $1 to the offset of the next newline codepoint or to -1.
  +
  +=item B<find_word_boundary>(out INT, in STR, in INT)
  +
  +Set $1 to the offset of the next word boundary or to -1.
  +
  +=cut
  +
  +op find_whitespace(out INT, in STR, in INT) {
  +  $1 = Parrot_string_find_whitespace(interpreter, $2, $3);
  +  goto NEXT();
  +}
  +
  +op find_wordchar(out INT, in STR, in INT) {
  +  $1 = Parrot_string_find_wordchar(interpreter, $2, $3);
  +  goto NEXT();
  +}
  +
  +op find_digit(out INT, in STR, in INT) {
  +  $1 = Parrot_string_find_digit(interpreter, $2, $3);
  +  goto NEXT();
  +}
  +
  +op find_punctuation(out INT, in STR, in INT) {
  +  $1 = Parrot_string_find_punctuation(interpreter, $2, $3);
  +  goto NEXT();
  +}
  +
  +op find_newline(out INT, in STR, in INT) {
  +  $1 = Parrot_string_find_newline(interpreter, $2, $3);
  +  goto NEXT();
  +}
  +
  +op find_word_boundary(out INT, in STR, in INT) {
  +  $1 = Parrot_string_find_word_boundary(interpreter, $2, $3);
  +  goto NEXT();
  +}
  +
   =back
   
   =head1 COPYRIGHT
  
  
  
  1.237     +49 -1     parrot/src/string.c
  
  Index: string.c
  ===================================================================
  RCS file: /cvs/public/parrot/src/string.c,v
  retrieving revision 1.236
  retrieving revision 1.237
  diff -u -r1.236 -r1.237
  --- string.c  28 Feb 2005 17:17:56 -0000      1.236
  +++ string.c  28 Feb 2005 18:01:28 -0000      1.237
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -$Id: string.c,v 1.236 2005/02/28 17:17:56 leo Exp $
  +$Id: string.c,v 1.237 2005/02/28 18:01:28 leo Exp $
   
   =head1 NAME
   
  @@ -2543,6 +2543,54 @@
       return CHARSET_IS_NEWLINE(interpreter, s, offset);
   }
   
  +INTVAL
  +Parrot_string_find_whitespace(Interp *interpreter, STRING *s, INTVAL offset)
  +{
  +    if (!s)
  +        return -1;
  +    return CHARSET_FIND_WHITESPACE(interpreter, s, offset);
  +}
  +
  +INTVAL
  +Parrot_string_find_digit(Interp *interpreter, STRING *s, INTVAL offset)
  +{
  +    if (!s)
  +        return -1;
  +    return CHARSET_FIND_DIGIT(interpreter, s, offset);
  +}
  +
  +INTVAL
  +Parrot_string_find_wordchar(Interp *interpreter, STRING *s, INTVAL offset)
  +{
  +    if (!s)
  +        return -1;
  +    return CHARSET_FIND_WORDCHAR(interpreter, s, offset);
  +}
  +
  +INTVAL
  +Parrot_string_find_punctuation(Interp *interpreter, STRING *s, INTVAL offset)
  +{
  +    if (!s)
  +        return -1;
  +    return CHARSET_FIND_PUNCTUATION(interpreter, s, offset);
  +}
  +
  +INTVAL
  +Parrot_string_find_newline(Interp *interpreter, STRING *s, INTVAL offset)
  +{
  +    if (!s)
  +        return -1;
  +    return CHARSET_FIND_NEWLINE(interpreter, s, offset);
  +}
  +
  +INTVAL
  +Parrot_string_find_word_boundary(Interp *interpreter, STRING *s, INTVAL 
offset)
  +{
  +    if (!s)
  +        return -1;
  +    return CHARSET_FIND_WORD_BOUNDARY(interpreter, s, offset);
  +}
  +
   /*
   
   =back
  
  
  
  1.5       +67 -2     parrot/t/op/string_cs.t
  
  Index: string_cs.t
  ===================================================================
  RCS file: /cvs/public/parrot/t/op/string_cs.t,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- string_cs.t       28 Feb 2005 17:17:57 -0000      1.4
  +++ string_cs.t       28 Feb 2005 18:01:30 -0000      1.5
  @@ -1,6 +1,6 @@
   #! perl -w
   # Copyright: 2001-2004 The Perl Foundation.  All Rights Reserved.
  -# $Id: string_cs.t,v 1.4 2005/02/28 17:17:57 leo Exp $
  +# $Id: string_cs.t,v 1.5 2005/02/28 18:01:30 leo Exp $
   
   =head1 NAME
   
  @@ -16,7 +16,7 @@
   
   =cut
   
  -use Parrot::Test tests => 12;
  +use Parrot::Test tests => 16;
   use Test::More;
   
   output_is( <<'CODE', <<OUTPUT, "basic syntax" );
  @@ -179,4 +179,69 @@
   01
   OUTPUT
   
  +output_is( <<'CODE', <<OUTPUT, "find_wordchar");
  +    set S0, "_ ab 09"
  +    set I0, 0
  +lp:
  +    find_wordchar I0, S0, I0
  +    print I0
  +    print " "
  +    eq I0, -1, done
  +    inc I0
  +    branch lp
  +done:
  +    print "ok\n"
  +    end
  +CODE
  +0 2 3 5 6 -1 ok
  +OUTPUT
   
  +output_is( <<'CODE', <<OUTPUT, "find_digit");
  +    set S0, "_ ab 09"
  +    set I0, 0
  +lp:
  +    find_digit I0, S0, I0
  +    print I0
  +    print " "
  +    eq I0, -1, done
  +    inc I0
  +    branch lp
  +done:
  +    print "ok\n"
  +    end
  +CODE
  +5 6 -1 ok
  +OUTPUT
  +output_is( <<'CODE', <<OUTPUT, "find_punctuation");
  +    set S0, "_ .b ,9"
  +    set I0, 0
  +lp:
  +    find_punctuation I0, S0, I0
  +    print I0
  +    print " "
  +    eq I0, -1, done
  +    inc I0
  +    branch lp
  +done:
  +    print "ok\n"
  +    end
  +CODE
  +2 5 -1 ok
  +OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "find_word_boundary");
  +    set S0, "_ab 09z"
  +    set I0, 0
  +lp:
  +    find_word_boundary I0, S0, I0
  +    print I0
  +    print " "
  +    eq I0, -1, done
  +    inc I0
  +    branch lp
  +done:
  +    print "ok\n"
  +    end
  +CODE
  +0 2 3 6 -1 ok
  +OUTPUT
  
  
  

Reply via email to