Author: jrieks
Date: Sun May  8 13:28:45 2005
New Revision: 8013

Added:
   trunk/charset/gen_tables.pl
Log:
added charset/gen_tables.pl


Added: trunk/charset/gen_tables.pl
==============================================================================
--- (empty file)
+++ trunk/charset/gen_tables.pl Sun May  8 13:28:45 2005
@@ -0,0 +1,122 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use POSIX qw(locale_h);
+use locale;
+
+my ($svnid) = '$Id: gen_tables.pl jrieks $' =~ /^\$[iI][dD]:\s(.*)\$$/;
+my $fileid = '$'.'id $';
+
+#
+# charset tables to create
+#
+my %table = (
+    "en_US.iso88591" => "Parrot_iso_8859_1_typetable",
+#    "en_US.iso885915" => "Parrot_iso_8859_15_typetable",
+    "POSIX" => "Parrot_ascii_typetable",
+);
+
+my $header = <<"HEADER";
+/* $fileid
+ *  Copyright: 2005 The Perl Foundation.  All Rights Reserved.
+ *
+ * DO NOT EDIT THIS FILE DIRECTLY!
+ * please update the $0 script instead.
+ *
+ * Created by $svnid
+ *  Overview:
+ *     This file contains various charset tables.
+ *  Data Structure and Algorithms:
+ *  History:
+ *  Notes:
+ *  References:
+ */
+HEADER
+
+=item B<classify>( $chr )
+
+Character classification
+
+=cut
+
+sub classify {
+    my ($chr) = @_;
+    my $ret = 0;
+    
+    $chr = chr($chr);
+    $ret |= 0x0001 if $chr =~ /^[[:upper:]]$/;  # CCLASS_UPPERCASE
+    $ret |= 0x0002 if $chr =~ /^[[:lower:]]$/;  # CCLASS_LOWERCASE
+    $ret |= 0x0004 if $chr =~ /^[[:alpha:]]$/;  # CCLASS_ALPHABETIC
+    $ret |= 0x0008 if $chr =~ /^[[:digit:]]$/;  # CCLASS_NUMERIC        
+    $ret |= 0x0010 if $chr =~ /^[[:xdigit:]]$/; # CCLASS_HEXADECIMAL    
+    $ret |= 0x0020 if $chr =~ /^[[:space:]]$/;  # CCLASS_WHITESPACE     
+    $ret |= 0x0040 if $chr =~ /^[[:print:]]$/;  # CCLASS_PRINTING       
+    $ret |= 0x0080 if $chr =~ /^[[:graph:]]$/;  # CCLASS_GRAPHICAL      
+    $ret |= 0x0100 if $chr =~ /^[[:blank:]]$/;  # CCLASS_BLANK  
+    $ret |= 0x0200 if $chr =~ /^[[:cntrl:]]$/;  # CCLASS_CONTROL        
+    $ret |= 0x0400 if $chr =~ /^[[:punct:]]$/;  # CCLASS_PUNCTUATION    
+    $ret |= 0x0800 if $chr =~ /^[[:alnum:]]$/;  # CCLASS_ALPHANUMERIC   
+    $ret |= 0x1000 if $chr =~ /^[\n\r]$/;       # CCLASS_NEWLINE
+    $ret |= 0x2000 if $chr =~ /^[[:alnum:]_]$/; # CCLASS_WORD
+
+    return $ret;
+}
+
+=item B<create_table>( $name )
+
+Create a whole character table
+
+=cut
+
+sub create_table {
+    my ($name) = @_;
+    my $len = 8;
+
+    print "const PARROT_CCLASS_FLAGS ${name}[256] = {\n";
+    foreach my $char (0..255) {
+        printf "0x%.4x, ", classify($char);
+        print "/* @{[$char-$len+1]}-$char */\n" if $char % $len == $len-1;
+    }
+    print "};\n";
+}
+
+
+
+#
+# create 'charset/tables.c'
+#
+###########################################################################
+open STDOUT, ">charset/tables.c" or die "can not open 'charset/tables.c\n";
+print <<"END";
+$header
+#include "tables.h"
+END
+foreach my $name ( sort keys %table ) {
+    print STDERR "creating table: '$table{$name}' (charset: $name)\n";
+    setlocale(LC_CTYPE, $name);
+    create_table($table{$name});
+}
+close STDOUT;
+
+
+
+#
+# create 'charset/tables.h'
+#
+###########################################################################
+open STDOUT, ">charset/tables.h" or die "can not open 'charset/tables.c\n";
+print <<"END";
+$header
+#if !defined(PARROT_CHARSET_TABLES_H_GUARD)
+#define PARROT_CHARSET_TABLES_H_GUARD
+#include "parrot/cclass.h"
+#define WHITESPACE  enum_cclass_whitespace
+#define WORDCHAR    enum_cclass_word
+#define PUNCTUATION enum_cclass_punctuation
+#define DIGIT       enum_cclass_numeric
+END
+foreach my $name ( sort keys %table ) {
+    print "extern const PARROT_CCLASS_FLAGS ${table{$name}}[256];\n";
+}
+print "#endif /* PARROT_CHARSET_TABLES_H_GUARD */\n";
+close STDOUT;

Reply via email to