geirm       00/11/02 19:26:34

  Modified:    src/java/org/apache/velocity/runtime/parser Parser.jjt
  Log:
  The Great Escape.  Changes escape handling to be less invasive - the only time \ 
acts as
  an escape character is when it is in front of a valid reference in the context, or a 
valid
  VTL directive (so \#set is output as #set and \#schmoo is output as \#schmoo). See 
my post
  to the list(s) explaining in more detail if needed.
  
  Revision  Changes    Path
  1.16      +139 -69   
jakarta-velocity/src/java/org/apache/velocity/runtime/parser/Parser.jjt
  
  Index: Parser.jjt
  ===================================================================
  RCS file: 
/home/cvs/jakarta-velocity/src/java/org/apache/velocity/runtime/parser/Parser.jjt,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- Parser.jjt        2000/11/02 03:53:04     1.15
  +++ Parser.jjt        2000/11/03 03:26:32     1.16
  @@ -127,7 +127,7 @@
    *
    * @author <a href="mailto:[EMAIL PROTECTED]">Jason van Zyl</a>
    * @author <a href="mailto:[EMAIL PROTECTED]">Geir Magnusson Jr.</a>
  - * @version $Id: Parser.jjt,v 1.15 2000/11/02 03:53:04 geirm Exp $ 
  + * @version $Id: Parser.jjt,v 1.16 2000/11/03 03:26:32 geirm Exp $ 
   */
   public class Parser
   {
  @@ -167,16 +167,19 @@
       {
           token_source.clearStateVars();
           ReInit(stream);  
  -    
  -       // return process();
  -
  -        SimpleNode n = null;
  +        return process();
   
  -        try {
  -            n = process();
  -        } catch (Exception e ) { System.out.println( e ); }
  +        /*
  +         * leave the following. Useful for debugging the parser.  thx - geir
  +         */
   
  -        return n;
  +       //SimpleNode n = null;
  +       //
  +       // try {
  +       //     n = process();
  +       // } catch (Exception e ) { System.out.println( e ); }   
  +       // 
  +       //  return n;
       }        
   
       public void setDirectives(Hashtable directives)
  @@ -472,7 +475,7 @@
               SwitchTo( REFMOD2 ); 
       }
   
  -|   <RPAREN: ")" (" ")* ("\n")?>
  +|   <RPAREN: ")" (" ")* ( "\n" | "\r" | "\r\n" )?>
       {
          RPARENHandler();
       }    
  @@ -490,14 +493,48 @@
       }    
   }
   
  +/*----------------------------------------------
  + *
  + *  escape "\\" handling for the built-in directives
  + *
  + *--------------------------------------------- */
  +TOKEN:
  +{
  +    /*
  +     *  We have to do this, because we want these to be a Text node, and
  +     *  whatever follows to be peer to this text in the tree.
  +     *  And we don't want to try an and deal with this w/in the AST.
  +     *
  +     *  This is really simplistic.  I actually would prefer to find them in
  +     *  grammatical context, but I am neither smart nor rested, a receipe
  +     *  for disaster, another long night with Mr. Parser, or both.
  +     */
  +
  +     <ESCAPE_SET_DIRECTIVE : "\\#set">   
  +     { matchedToken.image = "#set"; }
  +|    <ESCAPE_IF_DIRECTIVE : "\\#if">   
  +     { matchedToken.image = "#if"; }
  +|    <ESCAPE_END_DIRECTIVE : "\\#end">   
  +     { matchedToken.image = "#end"; }
  +|    <ESCAPE_INCLUDE_DIRECTIVE: "\\#include"> 
  +     { matchedToken.image = "#include"; }
  +|    <ESCAPE_ELSEIF_DIRECTIVE: "\\#elseif">
  +     { matchedToken.image = "#elseif"; }
  +|    <ESCAPE_ELSE_DIRECTIVE: "\\#else"> 
  +     { matchedToken.image = "#else"; }
  +|    <ESCAPE_STOP_DIRECTIVE: "\\#stop">
  +     { matchedToken.image = "#stop"; }
  +
  +}
  +
   <*>
   MORE :
   {
  -    //
  -    //  Note : DOLLARBANG is a duplicate of DOLLAR.  They must be identical.
  -    //
  +    /*
  +     *   Note : DOLLARBANG is a duplicate of DOLLAR.  They must be identical.
  +     */
   
  -    <DOLLAR: "$"> 
  +    <DOLLAR: ("\\")? "$"> 
       { 
           if (! inComment)
           {
  @@ -548,7 +585,7 @@
           SwitchTo( IN_MULTI_LINE_COMMENT ); 
       } 
   
  -|   <HASH : "#"> 
  +|   <HASH : ("\\")? "#"> 
       { 
           if (! inComment)
           {
  @@ -565,7 +602,7 @@
   
   TOKEN :
   {
  -    <ESCAPE_SEQUENCE: "\\" ~[] >
  +    <ESCAPE: "\\">
   |   <TEXT: (~["$", "#", "\\"])+ >
   }    
   
  @@ -581,7 +618,6 @@
     { 
        inComment = false;
        stateStackPop();
  -    // was :DEFAULT
     } 
   
   }
  @@ -593,7 +629,6 @@
     {     
       inComment = false;
       stateStackPop();
  -    // was :DEFAULT
     } 
   }
   
  @@ -604,7 +639,6 @@
     { 
       inComment = false; 
       stateStackPop();
  -    // was :DEFAULT
     } 
   }
   
  @@ -642,10 +676,6 @@
           }
           incMode = false;
       
  -       // from jason :
  -       // if (lparen == 0)
  -       //     SwitchTo(DEFAULT);
  -    
           /*
            *  - if we are in REFERENCE || REFMODIFIER then " is an ender
            *  - if we are in DIRECTIVE and haven't seen ( yet, then also drop out. 
  @@ -706,7 +736,7 @@
   <DIRECTIVE> 
   TOKEN :
   {
  -    <END: "end" ("\n")?> 
  +    <END: "end" ( "\n" | "\r" | "\r\n" )?> 
       { 
           inDirective = false; 
           stateStackPop();
  @@ -760,7 +790,7 @@
    *
    *  This is more than a single state, because of the  structure of
    *  the VTL references.  We use three states because the set of tokens
  - *  for each state is different.
  + *  for each state can be different.
    *
    *  $foo.bar( "arg" )
    *  ^   ^   ^
  @@ -817,7 +847,7 @@
            *  These can terminate a reference, but are needed in subsequent states
            */
   
  -        if (image.toString().equals("=") || image.toString().equals("\n"))
  +        if (image.toString().equals("=") || image.toString().equals("\n") || 
image.toString().equals("\r") )
           {
               input_stream.backup(1);
           } 
  @@ -938,18 +968,31 @@
   }
   {
       /*
  -     *  Get the directive identifier and check to see if this is a directive.
  -     *   Note that because '#' is a <MORE> token, the identifier is preceeded by 
the '#'
  +     *  There are two things we do here :
  +     *  1) If the first char is a \, then this has been escaped, so don't 
  +     *     continue.  We will take care of the rest in ASTDirective.java
  +     *  2) Get the directive identifier and check to see if this is a directive.
  +     *     Note that because '#' is a <MORE> token, the identifier is preceeded by 
the '#'
        */
  +
       t = <WORD>
       {
  -        d = (Directive) directives.get(t.image.substring(1));
  -        
  -        if (d == null)
  +        if( t.image.startsWith("\\"))
           {
               token_source.stateStackPop();
               token_source.inDirective = false;
  -            return jjtThis;
  +            return jjtThis;            
  +        }
  +        else
  +        {
  +            d = (Directive) directives.get(t.image.substring(1));
  +        
  +            if (d == null)
  +            {
  +                token_source.stateStackPop();
  +                token_source.inDirective = false;
  +                return jjtThis;
  +            }
           }            
       }
   
  @@ -1040,11 +1083,14 @@
   |   <LPAREN>
   |   <NUMBER_LITERAL>
   |   <STRING_LITERAL>
  -|   t = <ESCAPE_SEQUENCE>
  -    {
  -        t.image = t.image.substring(1);
  -    }      
  -
  +|   <ESCAPE>
  +|   <ESCAPE_SET_DIRECTIVE>
  +|   <ESCAPE_IF_DIRECTIVE>
  +|   <ESCAPE_END_DIRECTIVE>   
  +|   <ESCAPE_INCLUDE_DIRECTIVE> 
  +|   <ESCAPE_ELSEIF_DIRECTIVE>
  +|   <ESCAPE_ELSE_DIRECTIVE> 
  +|   <ESCAPE_STOP_DIRECTIVE>
   }
   
   /* -----------------------------------------------------------------------
  @@ -1194,36 +1240,60 @@
   
   
   /* ======================================================================
  - *
  - *  Background
  - *  ----------
  - *
  - *  VTL == Velocity Template Language
  - *
  - *  The problem with parsing VTL is that an input stream consists generally of 
little
  - *  bits of VTL mixed in with 'other stuff' (hereafter referred to as 'schmoo').  
Unlike
  - *  other languages, like C or Java, where the parser can punt whenever it 
encounters 
  - *  input that doesn't conform to the grammar, the VTL parser can't do that.  
  - *
  - *  There are a few things that we do here :
  - *    - define a set of parser states (DEFAULT, DIRECTIVE, REFERENCE, etc)
  - *    - define for each parser state a set of tokens for each state
  - *    - define the VTL grammar, expressed (mostly) in the productions such as 
Text(), SetStatement(), etc.
  - *  
  - *  It is clear that this expression of the VTL grammer (the contents of this .jjt 
file) is maturing and
  - *  evolving as we learn more about how to parse VTL ( and as I learn about 
parsing...), so in the event 
  - *  this documentation is in disagreement w/ the source, the source takes 
precedence.
  - *
  - *  Parser States
  - *  -------------
  - *  DEFAULT :  This is the base or starting state, and strangely enough, the 
default state.
  - *
  - *  DIRECTIVE : This state is triggered by the '#' character, and is used when 
processing both
  - *     'defined' directives such as #if() #else #elseif() #end #set #foreach() as 
well as the 
  - *     so-called 'pluggable' directives (PDs), which include #foreach(), #macro() 
etc.  The
  - *     PDs are able to be implemented entirely outside of the parser.
  - *      - with the exception of #set, #else and #else, the 'shape' of a directive 
is  
  - *           #foo( optional args )
  - *      - 
  - */
  + 
  +   Background
  +   ----------
  + 
  +   VTL == Velocity Template Language
  + 
  +   The problem with parsing VTL is that an input stream consists generally of little
  +   bits of VTL mixed in with 'other stuff' (hereafter referred to as 'schmoo').  
Unlike
  +   other languages, like C or Java, where the parser can punt whenever it 
encounters 
  +   input that doesn't conform to the grammar, the VTL parser can't do that.  
  + 
  +   There are a few things that we do here :
  +     - define a set of parser states (DEFAULT, DIRECTIVE, REFERENCE, etc)
  +     - define for each parser state a set of tokens for each state
  +     - define the VTL grammar, expressed (mostly) in the productions such as 
Text(), SetStatement(), etc.
  +   
  +   It is clear that this expression of the VTL grammer (the contents of this .jjt 
file) is maturing and
  +   evolving as we learn more about how to parse VTL ( and as I learn about 
parsing...), so in the event 
  +   this documentation is in disagreement w/ the source, the source takes precedence.
  + 
  +   Parser States
  +   -------------
  +   DEFAULT :  This is the base or starting state, and strangely enough, the default 
state.
  + 
  +   DIRECTIVE : This state is triggered by the '#' character, and is used when 
processing both
  +      'defined' directives such as #if() #else #elseif() #end #set #foreach() as 
well as the 
  +      so-called 'pluggable' directives (PDs), which include #foreach(), #macro() 
etc.  The
  +      PDs are able to be implemented entirely outside of the parser.
  +       - with the exception of #set, #else and #else, the 'shape' of a directive is 
 
  +            #foo( optional args )
  +       - 
  + 
  +    (cont)
  +
  +    Escape Sequences
  +    ----------------
  +    The escape processing in VTL is very simple.  The '\' character acts only as an 
escape when :
  +    
  +        1) It preceeds a reference that is in the context.
  +
  +        2) It preceeds a defined directive (#set, #if, #end, etc) or a valid 
pluggable directive, 
  +            such as #foreach
  +
  +    In all other cases the '\' is just another piece of text.  The purpose of this 
is to allow the non-VTL
  +    parts of a template (the 'schmoo') to not have to be altered for processing by 
Velocity.
  +
  +    So if in the context $foo and $bar were defined and $woogie was not 
  +        
  +        \$foo  \$bar \$woogie
  +
  +    would output
  +
  +        $foo  $bar  \$woogie
  +
  +
  +*/
    
  
  
  

Reply via email to