Stig S. Bakken wrote:

>Good idea.  I assume you want this for WidgetHTML.php? ;-)
>  
>
Yeah, I hopefully it would remove the need for preg_matching in there..

Will get back to this in a few days - want to clear off some other stuff...

regards
alan

> - Stig
>
>On Wed, 2002-06-12 at 14:26, Alan Knowles wrote:
>  
>
>>Attached hopefully is the re2c source for a html tokenizer - I added it 
>>to tokenizer.c - any thoughts on inclusion?
>>
>>regards
>>alan
>>
>>----
>>
>>    
>>
>
>  
>
>>enum {
>>        STATE_PLAIN = 0,
>>        STATE_TAG,
>>        STATE_NEXT_ARG,
>>        STATE_ARG,
>>        STATE_BEFORE_VAL,
>>        STATE_VAL
>>};
>>
>>/*!re2c
>>any = [\000-\377];
>>N = (any\[<]);
>>alpha = [a-zA-Z];
>>alphanumeric = [a-zA-Z0-9];
>>*/
>>
>>
>>
>>#define YYFILL(n) goto stop
>>#define YYCTYPE unsigned char
>>#define YYCURSOR xp
>>#define YYLIMIT end
>>#define YYMARKER q
>>#define STATE state
>>
>>PHP_FUNCTION(token_html)
>>{
>>      char *source = NULL;
>>      int argc = ZEND_NUM_ARGS();
>>      int source_len;
>>      int state;
>>      char *end, *q;
>>      char *xp;
>>      char *start;
>>      zval *tag, *attribute;
>>      
>>      if (zend_parse_parameters(argc TSRMLS_CC, "s", &source, &source_len) == 
>FAILURE) 
>>              return;
>>      
>>      YYCURSOR = source;
>>      YYLIMIT = source + source_len;
>>      STATE = STATE_PLAIN;
>>      
>>      array_init(return_value);
>>      switch (STATE) {
>>              case STATE_PLAIN:       goto state_plain;
>>              case STATE_TAG:         goto state_tag;
>>              case STATE_NEXT_ARG:    goto state_next_arg;
>>              case STATE_ARG:         goto state_arg;
>>              case STATE_BEFORE_VAL:  goto state_before_val;
>>              case STATE_VAL:         goto state_val;
>>      }
>>      
>>      /* 
>>      
>>      I need to split the stuff into:
>>              array ( "TAG", array("name"=>"value","name=>"value"))
>>              or 
>>              string
>>      
>>      
>>      add_next_index_zval(return_value, tag);handle_tag(STD_ARGS); 
>>      */
>>
>>
>>
>>state_plain_begin:
>>      STATE = STATE_PLAIN;
>>      
>>state_plain:
>>      start = YYCURSOR;
>>/*!re2c
>>  "<"                         { STATE = STATE_TAG; goto state_tag; }
>>  N+                          { add_next_index_stringl(return_value, start , xp - 
>start  , 1); goto state_plain; }
>>*/
>>
>>state_tag:    
>>      start = YYCURSOR;
>>      
>>// start -> xp contains currunt pos,  
>>// needs to deal with comments !-- and ?xml or php etc.
>>/*!re2c
>>  [/!]? alphanumeric+ { MAKE_STD_ZVAL(tag); array_init(tag); 
>add_next_index_stringl(tag, start, xp - start, 1); goto state_next_arg_begin; }
>>  "!" "-" "-"          { MAKE_STD_ZVAL(tag); array_init(tag); 
>add_next_index_stringl(tag, start, xp - start, 1); goto state_comment_begin; }
>>  any                {  add_next_index_stringl(return_value, "<",1 , 1); --YYCURSOR; 
>goto state_plain_begin; }
>>*/
>>
>>
>>
>>state_comment_begin:
>>      start = YYCURSOR;
>>        
>>state_comment_next:        
>>        
>>/*!re2c
>>    "-" "-" ">"           { add_next_index_stringl(tag, start, xp - start -3, 1); 
>add_next_index_zval(return_value, tag); goto state_plain_begin; }
>>    any                 { goto state_comment_next; }
>>*/
>>
>>state_next_arg_begin:
>>      STATE = STATE_NEXT_ARG;
>>              
>>// at first bit after < or just after a name or name='xxxx'   
>>state_next_arg:
>>      start = YYCURSOR;
>>/*!re2c
>>  ">"         { add_next_index_zval(return_value, tag); goto state_plain_begin; }
>>  [ \v\t\n]+  { goto state_next_arg; }
>>  alpha               { --YYCURSOR; STATE = STATE_ARG; goto state_arg; }
>>  "/"           { MAKE_STD_ZVAL(attribute); array_init(attribute); 
>add_next_index_stringl(attribute, start, xp - start, 1);add_next_index_zval(tag, 
>attribute);  goto state_next_arg; }
>>  ["] (any\["])* ["]  { MAKE_STD_ZVAL(attribute); array_init(attribute); 
>add_next_index_stringl(attribute, start + 1, xp - start -2, 1); 
>add_next_index_stringl(attribute, "\"", 1, 1); add_next_index_zval(tag, attribute); 
>goto state_next_arg_begin; }
>>  ['] (any\['])* [']  { MAKE_STD_ZVAL(attribute); array_init(attribute); 
>add_next_index_stringl(attribute, start + 1, xp - start -2, 1); 
>add_next_index_stringl(attribute, "'", 1, 1); add_next_index_zval(tag, attribute); 
>goto state_next_arg_begin; }
>>  any         { add_next_index_zval(return_value, tag); goto state_plain_begin; }
>>*/
>>
>>state_arg:
>>      start = YYCURSOR;
>>/*!re2c
>>  alpha+      {  MAKE_STD_ZVAL(attribute); array_init(attribute); 
>add_next_index_stringl(attribute, start, xp - start, 1); STATE = STATE_BEFORE_VAL; 
>goto state_before_val; }
>>  any         { --YYCURSOR; STATE = STATE_ARG; goto state_next_arg; }
>>*/
>>
>>state_before_val:
>>      start = YYCURSOR;
>>/*!re2c
>>  [ ]* "=" [ ]*               { STATE = STATE_VAL; goto state_val; }
>>  any                 { add_next_index_zval(tag, attribute); --YYCURSOR; goto 
>state_next_arg_begin; }
>>*/
>>
>>
>>state_val:
>>      start = YYCURSOR;
>>/*!re2c
>>  ["] (any\["])* ["]  { add_next_index_stringl(attribute, start + 1, xp - start -2, 
>1); add_next_index_stringl(attribute, "\"", 1, 1); add_next_index_zval(tag, 
>attribute); goto state_next_arg_begin; }
>>  ['] (any\['])* [']  { add_next_index_stringl(attribute, start + 1, xp - start -2, 
>1); add_next_index_stringl(attribute, "'", 1, 1); add_next_index_zval(tag, 
>attribute); goto state_next_arg_begin; }
>>  (any\[ \n>"'])+     { add_next_index_stringl(attribute, start, xp - start, 1); 
>add_next_index_zval(tag, attribute); goto state_next_arg_begin;  }
>>  any                 { add_next_index_zval(tag, attribute); --YYCURSOR; goto 
>state_next_arg_begin; }
>>*/
>>
>>stop:
>>      // should do a bit of checking - adding loose attribute or tags to return 
>value....
>>      
>>
>>}
>>
>>----
>>
>>    
>>
>
>  
>
>>-- 
>>PHP Development Mailing List <http://www.php.net/>
>>To unsubscribe, visit: http://www.php.net/unsub.php
>>    
>>




-- 
PHP Development Mailing List <http://www.php.net/>
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to