DIV lexing)

Luke Kanies Thu, 24 Sep 2009 17:33:45 -0700

+1

On Sep 24, 2009, at 12:37 PM, Markus Roberts wrote:


>
> This is my proposed attack on the lexing problem, with a few minor
> cleanups to simplify its integration.  The strategy:
>
> * Anotate tokens with a method "acceptable?" that determines if
>  they can be generated in a given context.  Have this default
>  to true.
> * Give the lexer the notion of a context; initialize it and
>  update it as needed.  The present context records the name of
>  the last significant token generated and a start_of_line flag.
> * When a token is found to match, check if it is acceptable in
>  the present context before generating it.
>
> These changes don't result any any change in behaviour but they
> enable:
>
> * Give the REGEX token an acceptable? rule that only permits a
>  regular expression in specific contexts.
>
> The other changes were a fix to the scan bug Brice reported,
> adjusting a test and clearing up some cluttered conditions in the
> context collection path.
>
> Added tests and subsumed change restricting REGEX to one line.
>
> Signed-off-by: Markus Roberts <[email protected]>
> ---
> lib/puppet/parser/lexer.rb |   60 ++++++++++++++++++++ 
> +----------------------
> spec/unit/parser/lexer.rb  |   39 +++++++++++++++++++++++-----
> 2 files changed, 61 insertions(+), 38 deletions(-)
>
> diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb
> index e027a69..bb4fdf9 100644
> --- a/lib/puppet/parser/lexer.rb
> +++ b/lib/puppet/parser/lexer.rb
> @@ -11,7 +11,7 @@ end
> module Puppet::Parser; end
>
> class Puppet::Parser::Lexer
> -    attr_reader :last, :file
> +    attr_reader :last, :file, :lexing_context
>
>     attr_accessor :line, :indefine
>
> @@ -41,6 +41,11 @@ class Puppet::Parser::Lexer
>                 @name.to_s
>             end
>         end
> +
> +        def acceptable?(context={})
> +            # By default tokens are aceeptable in any context
> +            true
> +        end
>     end
>
>     # Maintain a list of tokens.
> @@ -171,7 +176,7 @@ class Puppet::Parser::Lexer
>         [self,value]
>     end
>
> -    TOKENS.add_token :REGEX, %r{/[^/]*/} do |lexer, value|
> +    regex_token = TOKENS.add_token :REGEX, %r{/[^/\n]*/} do |lexer,  
> value|
>         # Make sure we haven't matched an escaped /
>         while value[-2..-2] == '\\'
>             other = lexer.scan_until(%r{/})
> @@ -181,6 +186,10 @@ class Puppet::Parser::Lexer
>         [self, Regexp.new(regex)]
>     end
>
> +    def regex_token.acceptable?(context={})
> +        [:NODE,:LBRACE,:RBRACE,:MATCH,:NOMATCH,:COMMA].include?  
> context[:after]
> +    end
> +
>     TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line =>  
> true, :skip_text => true
>
>     TOKENS.add_token :SQUOTE, "'" do |lexer, value|
> @@ -286,36 +295,28 @@ class Puppet::Parser::Lexer
>     # Find the next token that matches a regex.  We look for these  
> first.
>     def find_regex_token
>         @regex += 1
> -        matched_token = nil
> -        value = ""
> -        length = 0
> +        best_token = nil
> +        best_length = 0
>
>         # I tried optimizing based on the first char, but it had
>         # a slightly negative affect and was a good bit more  
> complicated.
>         TOKENS.regex_tokens.each do |token|
> -            next unless match_length = @scanner.match?(token.regex)
> -
> -            # We've found a longer match
> -            if match_length > length
> -                value = @scanner.scan(token.regex)
> -                length = value.length
> -                matched_token = token
> +            if length = @scanner.match?(token.regex) and  
> token.acceptable?(lexing_context)
> +                # We've found a longer match
> +                if length > best_length
> +                    best_length = length
> +                    best_token = token
> +                end
>             end
>         end
>
> -        return matched_token, value
> +        return best_token, @scanner.scan(best_token.regex) if  
> best_token
>     end
>
>     # Find the next token, returning the string and the token.
>     def find_token
>         @find += 1
> -        matched_token, value = find_regex_token
> -
> -        unless matched_token
> -            matched_token, value = find_string_token
> -        end
> -
> -        return matched_token, value
> +        find_regex_token || find_string_token
>     end
>
>     def indefine?
> @@ -345,6 +346,7 @@ class Puppet::Parser::Lexer
>         @indefine = false
>         @expected = []
>         @commentstack = [ ['', @line] ]
> +        @lexing_context = {:after => nil, :start_of_line => true}
>     end
>
>     # Make any necessary changes to the token and/or value.
> @@ -417,17 +419,11 @@ class Puppet::Parser::Lexer
>                 raise "Could not match '%s'" % nword
>             end
>
> -            if matched_token.name == :RETURN
> -                # this matches a blank line
> -                if @last_return
> -                    # eat the previously accumulated comments
> -                    getcomment
> -                end
> -                # since :RETURN skips, we won't survive to  
> munge_token
> -                @last_return = true
> -            else
> -                @last_return = false
> -            end
> +            newline = matched_token.name == :RETURN
> +
> +            # this matches a blank line; eat the previously  
> accumulated comments
> +            getcomment if lexing_context[:start_of_line] and newline
> +            lexing_context[:start_of_line] = newline
>
>             final_token, token_value = munge_token(matched_token,  
> value)
>
> @@ -436,6 +432,8 @@ class Puppet::Parser::Lexer
>                 next
>             end
>
> +            lexing_context[:after]         = final_token.name  
> unless newline
> +
>             value = token_value[:value]
>
>             if match = @@pairs[value] and final_token.name ! 
> = :DQUOTE and final_token.name != :SQUOTE
> diff --git a/spec/unit/parser/lexer.rb b/spec/unit/parser/lexer.rb
> index 1c3e91b..959f360 100755
> --- a/spec/unit/parser/lexer.rb
> +++ b/spec/unit/parser/lexer.rb
> @@ -460,18 +460,43 @@ describe Puppet::Parser::Lexer::TOKENS[:REGEX]  
> do
>         @token.regex.should =~ '/this is a regex/'
>     end
>
> -    describe "when including escaped slashes" do
> -        before { @lexer = Puppet::Parser::Lexer.new }
> +    it 'should not match if there is \n in the regex' do
> +        @token.regex.should_not =~ "/this is \n a regex/"
> +    end
>
> -        it "should not consider escaped slashes to be the end of a  
> regex" do
> -            @lexer.string = "/this \\/ foo/"
> +    describe "when scanning" do
> +        def tokens_scanned_from(s)
> +            lexer = Puppet::Parser::Lexer.new
> +            lexer.string = s
>             tokens = []
> -            @lexer.scan do |name, value|
> +            lexer.scan do |name, value|
>                 tokens << value
>             end
> -            tokens[0][:value].should == Regexp.new("this / foo")
> +            tokens[0..-2]
>         end
> -    end
> +
> +        it "should not consider escaped slashes to be the end of a  
> regex" do
> +            tokens_scanned_from("$x =~ /this \\/  
> foo/").last[:value].should == Regexp.new("this / foo")
> +        end
> +
> +        it "should not lex chained division as a regex" do
> +            tokens_scanned_from("$x = $a/$b/$c").any? {|t|  
> t[:value].class == Regexp }.should == false
> +        end
> +
> +        it "should accept a regular expression after NODE" do
> +            tokens_scanned_from("node /www.*\.mysite 
> \.org/").last[:value].should == Regexp.new("www.*\.mysite\.org")
> +        end
> +
> +        it "should accept regular expressions in a CASE" do
> +            s = %q{case $variable {
> +                "something": {$othervar = 4096 / 2}
> +                /regex/: {notice("this notably sucks")}
> +                }
> +            }
> +            tokens_scanned_from(s)[12][:value].should ==  
> Regexp.new("regex")
> +        end
> +
> +   end
>
>
>     it "should return the REGEX token and a Regexp" do
> -- 
> 1.6.4
>
>
> >


-- 
The great aim of education is not knowledge but action.
     -- Herbert Spencer
---------------------------------------------------------------------
Luke Kanies | http://reductivelabs.com | http://madstop.com


--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Puppet Developers" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to 
[email protected]
For more options, visit this group at 
http://groups.google.com/group/puppet-dev?hl=en
-~----------~----~----~----~------~----~------~--~---

[Puppet-dev] Re: [PATCH/puppet 1/1] Proposed alternative solution for #2664 (REGEX / DIV lexing)

Reply via email to