cvsuser     05/02/28 07:10:58

  Modified:    charset  ascii.c ascii.h iso-8859-1.c iso-8859-1.h
               config/inter charset.pl
               src      charset.c
               t/op     string_cs.t
  Log:
  Strings. Finally. 3 - iso-8859-1 upcase ...
  
  * upcase, downcase, titlecase for iso-8859-1
  * moved some common code to ascii.c
  
  Revision  Changes    Path
  1.9       +46 -8     parrot/charset/ascii.c
  
  Index: ascii.c
  ===================================================================
  RCS file: /cvs/public/parrot/charset/ascii.c,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- ascii.c   28 Feb 2005 10:41:16 -0000      1.8
  +++ ascii.c   28 Feb 2005 15:10:55 -0000      1.9
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: ascii.c,v 1.8 2005/02/28 10:41:16 leo Exp $
  +$Id: ascii.c,v 1.9 2005/02/28 15:10:55 leo Exp $
   
   =head1 NAME
   
  @@ -8,7 +8,8 @@
   
   =head1 DESCRIPTION
   
  -This file implements the charset functions for ascii data
  +This file implements the charset functions for ascii data and common
  +charset functionality for similar charsets like iso-8859-1.
   
   =cut
   
  @@ -39,9 +40,46 @@
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 240-255 */
   };
   
  +INTVAL
  +ascii_find_thing(Interp *interpreter, STRING *string, UINTVAL start,
  +        unsigned char type, const unsigned char *table)
  +{
  +    INTVAL retval = -1;
  +    INTVAL found = 0;
  +
  +    for (; start < string->strlen; start++) {
  +        if (table[ENCODING_GET_CODEPOINT(interpreter, string, start)] == 
type) {
  +            found = 1;
  +            break;
  +        }
  +    }
  +    if (found) {
  +        retval = start;
  +    }
  +    return retval;
  +}
   
  -static STRING *
  -get_graphemes(Interp *interpreter, STRING *source_string,
  +INTVAL
  +ascii_find_not_thing(Interp *interpreter, STRING *string, UINTVAL start,
  +        unsigned char type, const unsigned char *table)
  +{
  +    INTVAL retval = -1;
  +    INTVAL found = 0;
  +
  +    for (; start < string->strlen; start++) {
  +        if (table[ENCODING_GET_CODEPOINT(interpreter, string, start)] != 
type) {
  +            found = 1;
  +            break;
  +        }
  +    }
  +    if (found) {
  +        retval = start;
  +    }
  +    return retval;
  +}
  +
  +STRING *
  +ascii_get_graphemes(Interp *interpreter, STRING *source_string,
           UINTVAL offset, UINTVAL count)
   {
       return ENCODING_GET_BYTES(interpreter, source_string, offset, count);
  @@ -56,8 +94,8 @@
   
   }
   
  -static STRING *
  -get_graphemes_inplace(Interp *interpreter, STRING *source_string,
  +STRING *
  +ascii_get_graphemes_inplace(Interp *interpreter, STRING *source_string,
           STRING *dest_string, UINTVAL offset, UINTVAL count)
   {
       return ENCODING_GET_BYTES_INPLACE(interpreter, source_string,
  @@ -349,8 +387,8 @@
     CHARSET *return_set = Parrot_new_charset(interpreter);
     CHARSET base_set = {
         "ascii",
  -      get_graphemes,
  -      get_graphemes_inplace,
  +      ascii_get_graphemes,
  +      ascii_get_graphemes_inplace,
         set_graphemes,
         to_charset,
         copy_to_charset,
  
  
  
  1.7       +16 -3     parrot/charset/ascii.h
  
  Index: ascii.h
  ===================================================================
  RCS file: /cvs/public/parrot/charset/ascii.h,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- ascii.h   27 Feb 2005 11:03:38 -0000      1.6
  +++ ascii.h   28 Feb 2005 15:10:55 -0000      1.7
  @@ -1,7 +1,7 @@
   /* ascii.h
    *  Copyright: 2004 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: ascii.h,v 1.6 2005/02/27 11:03:38 leo Exp $
  + *     $Id: ascii.h,v 1.7 2005/02/28 15:10:55 leo Exp $
    *  Overview:
    *     This is the header for the ascii charset functions
    *  Data Structure and Algorithms:
  @@ -13,8 +13,21 @@
   #if !defined(PARROT_CHARSET_ASCII_H_GUARD)
   #define PARROT_CHARSET_ASCII_H_GUARD
   
  -static STRING *get_graphemes(Interp *, STRING *source_string, UINTVAL 
offset, UINTVAL count);
  -static STRING *get_graphemes_inplace(Interp *, STRING *source_string, STRING 
*dest_string, UINTVAL offset, UINTVAL count);
  +/*
  + * common functions for ascii-ish charsets
  + */
  +
  +INTVAL
  +ascii_find_thing(Interp *interpreter, STRING *string, UINTVAL start,
  +        unsigned char type, const unsigned char *table);
  +INTVAL
  +ascii_find_not_thing(Interp *interpreter, STRING *string, UINTVAL start,
  +        unsigned char type, const unsigned char *table);
  +STRING *ascii_get_graphemes(Interp *, STRING *source_string,
  +        UINTVAL offset, UINTVAL count);
  +STRING *ascii_get_graphemes_inplace(Interp *, STRING *source_string,
  +        STRING *dest_string, UINTVAL offset, UINTVAL count);
  +
   static void set_graphemes(Interp *, STRING *source_string, UINTVAL offset, 
UINTVAL replace_count, STRING *insert_string);
   static void to_charset(Interp *, STRING *source_string, CHARSET 
*new_charset);
   static STRING *copy_to_charset(Interp *, STRING *source_string, CHARSET 
*new_charset);
  
  
  
  1.7       +71 -84    parrot/charset/iso-8859-1.c
  
  Index: iso-8859-1.c
  ===================================================================
  RCS file: /cvs/public/parrot/charset/iso-8859-1.c,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- iso-8859-1.c      28 Feb 2005 10:41:16 -0000      1.6
  +++ iso-8859-1.c      28 Feb 2005 15:10:55 -0000      1.7
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: iso-8859-1.c,v 1.6 2005/02/28 10:41:16 leo Exp $
  +$Id: iso-8859-1.c,v 1.7 2005/02/28 15:10:55 leo Exp $
   
   =head1 NAME
   
  @@ -16,6 +16,7 @@
   
   #include "parrot/parrot.h"
   #include "iso-8859-1.h"
  +#include "ascii.h"
   
   /* The encoding we prefer, given a choice */
   static ENCODING *preferred_encoding;
  @@ -39,64 +40,12 @@
       1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 160-175 */
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, /* 176-191 */
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 192-207 */
  -    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 207-223 */
  +    2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, /* 208-223 */
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 224-239 */
  -    2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 240-255 */
  +    2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, /* 240-255 */
   };
   
  -static INTVAL
  -find_thing(Interp *interpreter, STRING *string, UINTVAL start, UINTVAL type)
  -{
  -    INTVAL retval = -1;
  -    UINTVAL offset = start;
  -    INTVAL found = 0;
  -    for (; offset < string->strlen; offset++) {
  -        if (typetable[ENCODING_GET_CODEPOINT(interpreter, string, offset)]
  -                == type) {
  -            found = 1;
  -            break;
  -        }
  -    }
  -    if (found) {
  -        retval = offset;
  -    }
  -    return retval;
  -}
  -
  -static INTVAL
  -find_not_thing(Interp *interpreter, STRING *string, UINTVAL start,
  -        UINTVAL type)
  -{
  -    INTVAL retval = -1;
  -    UINTVAL offset = start;
  -    INTVAL found = 0;
  -    for (; offset < string->strlen; offset++) {
  -        if (typetable[ENCODING_GET_CODEPOINT(interpreter, string, offset)]
  -                != type) {
  -            found = 1;
  -            break;
  -        }
  -    }
  -    if (found) {
  -        retval = offset;
  -    }
  -    return retval;
  -}
  -
  -static STRING *
  -get_graphemes(Interp *interpreter, STRING *source_string,
  -        UINTVAL offset, UINTVAL count)
  -{
  -    return ENCODING_GET_BYTES(interpreter, source_string, offset, count);
  -}
   
  -static STRING *
  -get_graphemes_inplace(Interp *interpreter, STRING *source_string,
  -        STRING *dest_string, UINTVAL offset, UINTVAL count)
  -{
  -    return ENCODING_GET_BYTES_INPLACE(interpreter, source_string,
  -            offset, count, dest_string);
  -}
   
   static void
   set_graphemes(Interp *interpreter, STRING *source_string,
  @@ -168,7 +117,7 @@
   static void
   upcase(Interp *interpreter, STRING *source_string)
   {
  -    char *buffer;
  +    unsigned char *buffer;
       UINTVAL offset = 0;
   
       if (!source_string->strlen) {
  @@ -178,7 +127,12 @@
       Parrot_unmake_COW(interpreter, source_string);
       buffer = source_string->strstart;
       for (offset = 0; offset < source_string->strlen; offset++) {
  -        buffer[offset] = toupper(buffer[offset]);
  +        unsigned int c = buffer[offset]; /* XXX use encoding ? */
  +        if (c >= 0xe0 && c != 0xf7)
  +            c &= ~0x20;
  +        else
  +            c = toupper(c);
  +        buffer[offset] = c;
       }
   }
   
  @@ -186,67 +140,94 @@
   downcase(Interp *interpreter, STRING *source_string)
   {
       UINTVAL offset = 0;
  -    char *buffer;
  +    unsigned char *buffer;
       if (!source_string->strlen) {
           return;
       }
       Parrot_unmake_COW(interpreter, source_string);
       buffer = source_string->strstart;
       for (offset = 0; offset < source_string->strlen; offset++) {
  -        buffer[offset] = tolower(buffer[offset]);
  +        unsigned int c = buffer[offset];
  +        if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
  +            c |= 0x20;
  +        else
  +            c = tolower(c);
  +        buffer[offset] = c;
       }
   }
   
   static void
   titlecase(Interp *interpreter, STRING *source_string)
   {
  -    char *buffer;
  -    UINTVAL offset = 0;
  +    unsigned char *buffer;
  +    unsigned int c;
  +    UINTVAL offset;
  +
       if (!source_string->strlen) {
           return;
       }
       Parrot_unmake_COW(interpreter, source_string);
       buffer = source_string->strstart;
  -    buffer[0] = toupper(buffer[0]);
  +    c = buffer[0];
  +    if (c >= 0xe0 && c != 0xf7)
  +        c &= ~0x20;
  +    else
  +        c = toupper(c);
  +    buffer[0] = c;
  +
       for (offset = 1; offset < source_string->strlen; offset++) {
  -        buffer[offset] = tolower(buffer[offset]);
  +        c = buffer[offset];
  +        if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
  +            c |= 0x20;
  +        else
  +            c = tolower(c);
  +        buffer[offset] = c;
       }
   }
   
   static void
   upcase_first(Interp *interpreter, STRING *source_string)
   {
  -    char *buffer;
  +    unsigned char *buffer;
  +    unsigned int c;
  +
       if (!source_string->strlen) {
           return;
       }
       Parrot_unmake_COW(interpreter, source_string);
       buffer = source_string->strstart;
  -    buffer[0] = toupper(buffer[0]);
  +    c = buffer[0];
  +    if (c >= 0xe0 && c != 0xf7)
  +        c &= ~0x20;
  +    else
  +        c = toupper(c);
  +    buffer[0] = c;
   }
   
   static void
   downcase_first(Interp *interpreter, STRING *source_string)
   {
  -    char *buffer;
  +    unsigned char *buffer;
  +    unsigned int c;
  +
       if (!source_string->strlen) {
           return;
       }
       Parrot_unmake_COW(interpreter, source_string);
       buffer = source_string->strstart;
  +    c = buffer[0];
  +    if (c >= 0xc0 && c != 0xd7 && c <= 0xde)
  +        c &= ~0x20;
  +    else
  +        c = tolower(c);
  +    buffer[0] = c;
       buffer[0] = toupper(buffer[0]);
   }
   
   static void
   titlecase_first(Interp *interpreter, STRING *source_string)
   {
  -    char *buffer;
  -    if (!source_string->strlen) {
  -        return;
  -    }
  -    Parrot_unmake_COW(interpreter, source_string);
  -    buffer = source_string->strstart;
  -    buffer[0] = toupper(buffer[0]);
  +    upcase_first(interpreter, source_string);
   }
   
   static INTVAL
  @@ -312,13 +293,14 @@
   static INTVAL
   find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
   {
  -    return find_thing(interpreter, source_string, offset, WORDCHAR);
  +    return ascii_find_thing(interpreter, source_string, offset, WORDCHAR, 
typetable);
   }
   
   static INTVAL
   find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
   {
  -    return find_not_thing(interpreter, source_string, offset, WORDCHAR);
  +    return ascii_find_not_thing(interpreter, source_string, offset, WORDCHAR,
  +            typetable);
   }
   
   static INTVAL
  @@ -332,14 +314,16 @@
   static INTVAL
   find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset)
   {
  -    return find_thing(interpreter, source_string, offset, WHITESPACE);
  +    return ascii_find_thing(interpreter, source_string, offset, WHITESPACE,
  +            typetable);
   }
   
   static INTVAL
   find_not_whitespace(Interp *interpreter, STRING *source_string,
           UINTVAL offset)
   {
  -    return find_not_thing(interpreter, source_string, offset, WHITESPACE);
  +    return ascii_find_not_thing(interpreter, source_string, offset,
  +            WHITESPACE, typetable);
   }
   
   static INTVAL
  @@ -353,13 +337,15 @@
   static INTVAL
   find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
   {
  -    return find_thing(interpreter, source_string, offset, DIGIT);
  +    return ascii_find_thing(interpreter, source_string, offset, DIGIT,
  +            typetable);
   }
   
   static INTVAL
   find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
   {
  -    return find_not_thing(interpreter, source_string, offset, DIGIT);
  +    return ascii_find_not_thing(interpreter, source_string, offset, DIGIT,
  +            typetable);
   }
   
   static INTVAL
  @@ -373,15 +359,16 @@
   static INTVAL
   find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset)
   {
  -    return find_thing(interpreter, source_string, offset, PUNCTUATION);
  +    return ascii_find_thing(interpreter, source_string, offset, PUNCTUATION,
  +            typetable);
   }
   
   static INTVAL
   find_not_punctuation(Interp *interpreter, STRING *source_string,
           UINTVAL offset)
   {
  -    return find_not_thing(interpreter, source_string, offset, PUNCTUATION);
  -
  +    return ascii_find_not_thing(interpreter, source_string, offset,
  +            PUNCTUATION, typetable);
   }
   
   static INTVAL
  @@ -441,8 +428,8 @@
       CHARSET *return_set = Parrot_new_charset(interpreter);
       CHARSET base_set = {
           "iso-8859-1",
  -        get_graphemes,
  -        get_graphemes_inplace,
  +        ascii_get_graphemes,
  +        ascii_get_graphemes_inplace,
           set_graphemes,
           to_charset,
           copy_to_charset,
  
  
  
  1.5       +1 -2      parrot/charset/iso-8859-1.h
  
  Index: iso-8859-1.h
  ===================================================================
  RCS file: /cvs/public/parrot/charset/iso-8859-1.h,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- iso-8859-1.h      27 Feb 2005 11:03:38 -0000      1.4
  +++ iso-8859-1.h      28 Feb 2005 15:10:55 -0000      1.5
  @@ -1,7 +1,7 @@
   /* iso_8859_1.h
    *  Copyright: 2004 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: iso-8859-1.h,v 1.4 2005/02/27 11:03:38 leo Exp $
  + *     $Id: iso-8859-1.h,v 1.5 2005/02/28 15:10:55 leo Exp $
    *  Overview:
    *     This is the header for the iso_8859-1 charset functions
    *  Data Structure and Algorithms:
  @@ -13,7 +13,6 @@
   #if !defined(PARROT_CHARSET_ISO_8859_1_H_GUARD)
   #define PARROT_CHARSET_ISO_8859_1_H_GUARD
   
  -static STRING *get_graphemes(Interp *interpreter, STRING *source_string, 
UINTVAL offset, UINTVAL count);
   static void set_graphemes(Interp *interpreter, STRING *source_string, 
UINTVAL offset, UINTVAL replace_count, STRING *insert_string);
   static void to_charset(Interp *interpreter, STRING *source_string, CHARSET 
*new_charset);
   static STRING *copy_to_charset(Interp *interpreter, STRING *source_string, 
CHARSET *new_charset);
  
  
  
  1.2       +2 -2      parrot/config/inter/charset.pl
  
  Index: charset.pl
  ===================================================================
  RCS file: /cvs/public/parrot/config/inter/charset.pl,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- charset.pl        4 Nov 2004 18:37:22 -0000       1.1
  +++ charset.pl        28 Feb 2005 15:10:56 -0000      1.2
  @@ -1,6 +1,6 @@
   #! perl -w
   # Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -# $Id: charset.pl,v 1.1 2004/11/04 18:37:22 dan Exp $
  +# $Id: charset.pl,v 1.2 2005/02/28 15:10:56 leo Exp $
   
   =head1 NAME
   
  @@ -54,7 +54,7 @@
     foreach my $charset (split(/\s+/, $charset_list)) {
         $charset =~ s/\.c$//;
         $TEMP_charset_build .= <<END
  -charset/$charset\$(O): charset/$charset.h charset/$charset.c 
\$(NONGEN_HEADERS)
  +charset/$charset\$(O): charset/$charset.h charset/ascii.h charset/$charset.c 
\$(NONGEN_HEADERS)
   
   
   END
  
  
  
  1.7       +11 -3     parrot/src/charset.c
  
  Index: charset.c
  ===================================================================
  RCS file: /cvs/public/parrot/src/charset.c,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- charset.c 28 Feb 2005 13:35:46 -0000      1.6
  +++ charset.c 28 Feb 2005 15:10:57 -0000      1.7
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: charset.c,v 1.6 2005/02/28 13:35:46 leo Exp $
  +$Id: charset.c,v 1.7 2005/02/28 15:10:57 leo Exp $
   
   =head1 NAME
   
  @@ -47,6 +47,15 @@
   void
   Parrot_deinit_charsets(Interp *interpreter)
   {
  +    int i, n;
  +
  +    n = all_charsets->n_charsets;
  +    for (i = 0; i < n; ++i) {
  +        mem_sys_free(all_charsets->set[i].charset);
  +    }
  +    mem_sys_free(all_charsets->set);
  +    mem_sys_free(all_charsets);
  +    all_charsets = NULL;
   }
   
   CHARSET *
  @@ -135,8 +144,7 @@
                   sizeof(One_charset));
       all_charsets->n_charsets++;
       all_charsets->set[n].charset = charset;
  -    all_charsets->set[n].name = string_from_cstring(interpreter,
  -            charsetname, 0);
  +    all_charsets->set[n].name = const_string(interpreter, charsetname);
   
       return 1;
   }
  
  
  
  1.3       +30 -2     parrot/t/op/string_cs.t
  
  Index: string_cs.t
  ===================================================================
  RCS file: /cvs/public/parrot/t/op/string_cs.t,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- string_cs.t       28 Feb 2005 13:35:47 -0000      1.2
  +++ string_cs.t       28 Feb 2005 15:10:58 -0000      1.3
  @@ -1,6 +1,6 @@
   #! perl -w
   # Copyright: 2001-2004 The Perl Foundation.  All Rights Reserved.
  -# $Id: string_cs.t,v 1.2 2005/02/28 13:35:47 leo Exp $
  +# $Id: string_cs.t,v 1.3 2005/02/28 15:10:58 leo Exp $
   
   =head1 NAME
   
  @@ -16,7 +16,7 @@
   
   =cut
   
  -use Parrot::Test tests => 4;
  +use Parrot::Test tests => 7;
   use Test::More;
   
   output_is( <<'CODE', <<OUTPUT, "basic syntax" );
  @@ -64,3 +64,31 @@
   CODE
   /charset 'no_such' not found/
   OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "downcase" );
  +    set S0, "AEIOU_���\n"
  +    downcase S1, S0
  +    print S1
  +    end
  +CODE
  +aeiou_���
  +OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "upcase" );
  +    set S0, "aeiou_����\n"
  +    upcase S1, S0
  +    print S1
  +    end
  +CODE
  +AEIOU_����
  +OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "titlecase" );
  +    set S0, "zAEIOU_���\n"
  +    titlecase S1, S0
  +    print S1
  +    end
  +CODE
  +Zaeiou_���
  +OUTPUT
  +
  
  
  

Reply via email to