regex

bernhard Sat, 08 Oct 2005 04:55:23 -0700

Author: bernhard
Date: Sat Oct  8 04:55:03 2005
New Revision: 9406

Modified:
   trunk/languages/m4/ChangeLog
   trunk/languages/m4/TODO.pod
   trunk/languages/m4/src/input.pir
   trunk/languages/m4/src/m4.pir
   trunk/languages/m4/src/macro.pir
   trunk/languages/m4/t/basic/003_getopt.t
   trunk/languages/m4/t/regex/004_pge.t
Log:
Call it 'Parrot m4 0.0.14'.
Fix m4/t/regex/004_pge.t.
Add state['state'] and state['rulesub'],
and thus rely less on naming convention



Modified: trunk/languages/m4/ChangeLog
==============================================================================
--- trunk/languages/m4/ChangeLog        (original)
+++ trunk/languages/m4/ChangeLog        Sat Oct  8 04:55:03 2005
@@ -1,6 +1,12 @@
 # $Id$
 
 2005-06-29     Bernhard Schmalhofer
+        * Make it partly work with Parrot 0.3.0
+        * Skip testing of loading frozen states
+        * Fix t/regex/004_pge.t
+       * Call it Parrot m4 0.0.14
+
+2005-06-29     Bernhard Schmalhofer
         * Switch from PCRE to PGE
        * Call it Parrot m4 0.0.13
 

Modified: trunk/languages/m4/TODO.pod
==============================================================================
--- trunk/languages/m4/TODO.pod (original)
+++ trunk/languages/m4/TODO.pod Sat Oct  8 04:55:03 2005
@@ -231,6 +231,8 @@ by chapters in the GNU m4 documentation.
 
 =over
 
+=item Needs to be fixed, as the rx_* ops are gone
+
 =item more tests
 
 =back

Modified: trunk/languages/m4/src/input.pir
==============================================================================
--- trunk/languages/m4/src/input.pir    (original)
+++ trunk/languages/m4/src/input.pir    Sat Oct  8 04:55:03 2005
@@ -6,7 +6,7 @@ input.pir - Setting up input and reading
 
 =head1 DESCRIPTION
 
-Copyright:  2004 Bernhard Schmalhofer. All Rights Reserved.
+Copyright:  2004-2005 Bernhard Schmalhofer. All Rights Reserved.
 CVS Info:   $Id$
 History:    Ported from GNU m4 1.4
 References: http://www.gnu.org/software/m4/m4.html
@@ -15,17 +15,33 @@ References: http://www.gnu.org/software/
 
 =head2 void input_init( Hash state )
 
-Initialise some stacks and some regexes
+Initialise some stacks and put them into the Hash state['stack']: 
 
-'token_stack'
-'input_stack'   contains files, strings and macro definitions
-'wrapup_stack'
-'word_rulesub'    recognizes TOKEN_WORD.
-'string_rulesub'  recognizes TOKEN_STRING
-'simple_rulesub'  recognizes TOKEN_SIMPLE
-'comment_rulesub' recognizes comments, returned as TOKEN_SIMPLE
+=over 4
 
-TODO: recognize nested quoted strings
+=item state['stack';'token']   not used yet
+
+=item state['stack';'input']   contains files, strings and macro definitions
+
+=item state['stack';'wrapup']  not used yet
+
+=back
+
+Initialize some PGE rulesubs and put them into the Hash state['rulesub']: 
+
+=over 4
+
+=item state['rulesub';'word']    recognizes TOKEN_WORD.
+
+=item state['rulesub';'string']  recognizes TOKEN_STRING
+
+=item state['rulesub';'simple']  recognizes TOKEN_SIMPLE
+
+=item state['rulesub';'simple']  recognizes comments, returned as TOKEN_SIMPLE
+
+=back
+
+TOOO: recognize nested quoted strings
 
 =cut
 
@@ -35,27 +51,35 @@ TODO: recognize nested quoted strings
   .param pmc state         
 
   # setup of stacks
+  .local pmc stack_in_state
+  stack_in_state = new Hash
+  state['stack'] = stack_in_state
   .local pmc empty_array
   empty_array = new ResizablePMCArray
-  state['token_stack'] = empty_array
+  stack_in_state['token'] = empty_array
   empty_array = new ResizablePMCArray
-  state['input_stack'] = empty_array
+  stack_in_state['input'] = empty_array
   empty_array = new ResizablePMCArray
-  state['wrapup_stack'] = empty_array
+  stack_in_state['wrapup'] = empty_array
 
   # setup of some rules
   # these rules should be kept in sync with t/regex/002_tokens.t
+  # TODO: Use named or positional captures
+  # TODO: rulesub for frozen state 
   .local pmc p6rule
   find_global p6rule, "PGE", "p6rule"
+  .local pmc rulesub_in_state
+  rulesub_in_state = new Hash
+  state['rulesub'] = rulesub_in_state
   .local pmc rulesub
-  rulesub = p6rule( "^<[_a..zA..Z]><[_a..zA..Z0..9]>*" )
-  state['word_rulesub'] = rulesub
+  rulesub = p6rule( "^<[_a..zA..Z]><[_a..zA..Z0..9>]>*" )
+  rulesub_in_state['word'] = rulesub
   rulesub = p6rule( "^`<-[`]>*'" )
-  state['string_rulesub'] = rulesub
+  rulesub_in_state['string'] = rulesub
   rulesub = p6rule( "^<-[`#_a..zA..Z]>" )
-  state['simple_rulesub'] = rulesub
+  rulesub_in_state['simple'] = rulesub
   rulesub = p6rule( "^\#\N*\n" )
-  state['comment_rulesub'] = rulesub
+  rulesub_in_state['comment'] = rulesub
 
 .end
 
@@ -86,10 +110,10 @@ TODO: open these files and complain when
   input_string = read in, 50000
   close in
 
-  # state['input_stack'] has been created in input_init 
+  # state['stack';'input'] has been created in input_init 
   # TODO: seperate input blocks for every file
   .local pmc input_stack
-  input_stack = state['input_stack']
+  input_stack = state['stack';'input']
   .local pmc input_block
   .local int stack_size
   stack_size = input_stack
@@ -113,7 +137,7 @@ TODO: open these files and complain when
 =head2 string next_token( Hash state )
 
 Parse and return a single token from the input stream.  A token can
-either be TOKEN_EOF, if the input_stack is empty; it can be TOKEN_STRING
+either be TOKEN_EOF, if the state['stack';'input'] is empty; it can be 
TOKEN_STRING
 for a quoted string; TOKEN_WORD for something that is a potential macro
 name; and TOKEN_SIMPLE for any single character that is not a part of
 any of the previous types.                                     
@@ -128,7 +152,7 @@ Uses regular expressions for finding tok
   .param pmc state 
 
   .local pmc input_stack    
-  input_stack = state['input_stack']
+  input_stack = state['stack';'input']
   .local pmc input_block    
   input_block = shift input_stack
   .local string input_string    
@@ -144,26 +168,26 @@ Uses regular expressions for finding tok
     
   # look for 'TOKEN_SIMPLE'
   # read a whole bunch of non-macro and non-word charcters
-  rulesub = state['simple_rulesub']
+  rulesub = state['rulesub';'simple']
   token_type = 'TOKEN_SIMPLE'
   match = rulesub( input_string ) 
   if match goto MATCH
 
   # look for comments and return it as 'TOKEN_SIMPLE'
-  rulesub = state['comment_rulesub']
+  rulesub = state['rulesub';'comment']
   token_type = 'TOKEN_SIMPLE'
   match = rulesub( input_string ) 
   if match goto MATCH
 
   # look for 'TOKEN_STRING'
-  rulesub = state['string_rulesub']
+  rulesub = state['rulesub';'string']
   token_type = 'TOKEN_STRING'
   match = rulesub( input_string ) 
   if match goto MATCH
 
   # look for 'TOKEN_WORD'
   # this will be checked for macro substitution
-  rulesub = state['word_rulesub']
+  rulesub = state['rulesub';'word']
   token_type = 'TOKEN_WORD'
   match = rulesub( input_string ) 
   if match goto MATCH

Modified: trunk/languages/m4/src/m4.pir
==============================================================================
--- trunk/languages/m4/src/m4.pir       (original)
+++ trunk/languages/m4/src/m4.pir       Sat Oct  8 04:55:03 2005
@@ -134,7 +134,7 @@ Looks at the command line arguments and 
   # Was '--version' passed ?
   is_defined = defined opt["version"]
   unless is_defined goto NO_VERSION_FLAG
-    print "Parrot m4 0.0.13\n"
+    print "Parrot m4 0.0.14\n"
     end
 NO_VERSION_FLAG: 
 
@@ -255,7 +255,7 @@ NO_UNIMPLEMENTED_OPTION:
   .local pmc m4_eval_compiler_lib
   m4_eval_compiler_lib = loadlib "m4_eval_compiler"
 
-  # init of input structures, creates state['input_stack']
+  # init of input structures, creates state['stack';'input']
   input_init( state )
 
   # TODO: init of output structures

Modified: trunk/languages/m4/src/macro.pir
==============================================================================
--- trunk/languages/m4/src/macro.pir    (original)
+++ trunk/languages/m4/src/macro.pir    Sat Oct  8 04:55:03 2005
@@ -77,7 +77,7 @@ the text are just copied to the output.
         blind_no_args = symbol['blind_no_args'] 
         unless blind_no_args goto EXPAND_MACRO
           .local string input_string    
-          input_string = state['input_stack';0;'string']
+          input_string = state['stack';'input';0;'string']
           .local int first_char, open_parenthesis
           first_char = ord input_string
           open_parenthesis = ord '('
@@ -138,9 +138,9 @@ NESTING_LIMIT_NOT_REACHED_YET:
   .local string text
   ( text ) = call_macro( state, symbol, arguments )
   .local string input_string    
-  input_string = state['input_stack';0;'string']
+  input_string = state['stack';'input';0;'string']
   input_string = text . input_string 
-  state['input_stack';0;'string'] = input_string
+  state['stack';'input';0;'string'] = input_string
 
   expansion_level = state['expansion_level']
   dec expansion_level
@@ -207,7 +207,7 @@ Collect all the arguments to a call of a
   # Thus we need to remenber the start and the length of these two captures
   .local int cnt_stack
   .local string input_string    
-  input_string = state['input_stack';0;'string']
+  input_string = state['stack';'input';0;'string']
 
   # We need a '(' at beginning of string
   .local int index_opening
@@ -250,5 +250,5 @@ NO_MORE_ARGS:
   substr input_string, 0, index_closing, ''
   
 NOT_A_ARGUMENT_LIST:
-  state['input_stack';0;'string'] = input_string
+  state['stack';'input';0;'string'] = input_string
 .end

Modified: trunk/languages/m4/t/basic/003_getopt.t
==============================================================================
--- trunk/languages/m4/t/basic/003_getopt.t     (original)
+++ trunk/languages/m4/t/basic/003_getopt.t     Sat Oct  8 04:55:03 2005
@@ -34,7 +34,7 @@ END_OUT
 #--------------------------------------------
 $real_out     = `$parrot_m4 --version 2>&1`; 
 is( $real_out, << 'END_OUT', '--version' );
-Parrot m4 0.0.13
+Parrot m4 0.0.14
 END_OUT
 
 

Modified: trunk/languages/m4/t/regex/004_pge.t
==============================================================================
--- trunk/languages/m4/t/regex/004_pge.t        (original)
+++ trunk/languages/m4/t/regex/004_pge.t        Sat Oct  8 04:55:03 2005
@@ -33,6 +33,7 @@ use Parrot::Test tests => 1;
     match = word_rulesub(target)               # execute rule on target string
 
 match_loop:
+    if_null match, match_fail          # if match fails stop
     unless match goto match_fail          # if match fails stop
     print "match succeeded\n"
 
@@ -43,19 +44,14 @@ match_loop:
 
 match_fail:
     print "match failed\n"   
-    .return()
 .end
 END_PIR
 match succeeded
 : <Hello @ 0> 0
 match succeeded
-: <Hell @ 0> 0
-match succeeded
-: <Hel @ 0> 0
-match succeeded
-: <He @ 0> 0
+: <Hello @ 0> 0
 match succeeded
-: <H @ 0> 0
+: <Hello @ 0> 0
 match failed
 OUTPUT
 }

[svn:parrot] r9406 - in trunk/languages/m4: . src t/basic t/regex

Reply via email to