This is my proposed attack on the lexing problem, with a few minor cleanups to simplify its integration. The strategy:
* Anotate tokens with a method "acceptable?" that determines if they can be generated in a given context. Have this default to true. * Give the lexer the notion of a context; initialize it and update it as needed. The present context records the name of the last significant token generated and a start_of_line flag. * When a token is found to match, check if it is acceptable in the present context before generating it. These changes don't result any any change in behaviour but they enable: * Give the REGEX token an acceptable? rule that only permits a regular expression in specific contexts. The other changes were a fix to the scan bug Brice reported, adjusting a test and clearing up some cluttered conditions in the context collection path. Signed-off-by: Markus Roberts <[email protected]> --- lib/puppet/parser/lexer.rb | 60 +++++++++++++++++++++---------------------- spec/unit/parser/lexer.rb | 4 +- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb index e027a69..f7496e2 100644 --- a/lib/puppet/parser/lexer.rb +++ b/lib/puppet/parser/lexer.rb @@ -11,7 +11,7 @@ end module Puppet::Parser; end class Puppet::Parser::Lexer - attr_reader :last, :file + attr_reader :last, :file, :lexing_context attr_accessor :line, :indefine @@ -41,6 +41,11 @@ class Puppet::Parser::Lexer @name.to_s end end + + def acceptable?(context={}) + # By default tokens are aceeptable in any context + true + end end # Maintain a list of tokens. @@ -171,7 +176,7 @@ class Puppet::Parser::Lexer [self,value] end - TOKENS.add_token :REGEX, %r{/[^/]*/} do |lexer, value| + regex_token = TOKENS.add_token :REGEX, %r{/[^/]*/} do |lexer, value| # Make sure we haven't matched an escaped / while value[-2..-2] == '\\' other = lexer.scan_until(%r{/}) @@ -181,6 +186,10 @@ class Puppet::Parser::Lexer [self, Regexp.new(regex)] end + def regex_token.acceptable?(context={}) + [:NODE,:LBRACE,:RBRACE,:MATCH,:NOMATCH].include? context[:after] + end + TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, :skip_text => true TOKENS.add_token :SQUOTE, "'" do |lexer, value| @@ -286,36 +295,28 @@ class Puppet::Parser::Lexer # Find the next token that matches a regex. We look for these first. def find_regex_token @regex += 1 - matched_token = nil - value = "" - length = 0 + best_token = nil + best_length = 0 # I tried optimizing based on the first char, but it had # a slightly negative affect and was a good bit more complicated. TOKENS.regex_tokens.each do |token| - next unless match_length = @scanner.match?(token.regex) - - # We've found a longer match - if match_length > length - value = @scanner.scan(token.regex) - length = value.length - matched_token = token + if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context) + # We've found a longer match + if length > best_length + best_length = length + best_token = token + end end end - return matched_token, value + return best_token, @scanner.scan(best_token.regex) if best_token end # Find the next token, returning the string and the token. def find_token @find += 1 - matched_token, value = find_regex_token - - unless matched_token - matched_token, value = find_string_token - end - - return matched_token, value + find_regex_token || find_string_token end def indefine? @@ -345,6 +346,7 @@ class Puppet::Parser::Lexer @indefine = false @expected = [] @commentstack = [ ['', @line] ] + @lexing_context = {:after => nil, :start_of_line => true} end # Make any necessary changes to the token and/or value. @@ -417,17 +419,13 @@ class Puppet::Parser::Lexer raise "Could not match '%s'" % nword end - if matched_token.name == :RETURN - # this matches a blank line - if @last_return - # eat the previously accumulated comments - getcomment - end - # since :RETURN skips, we won't survive to munge_token - @last_return = true - else - @last_return = false - end + newline = matched_token.name == :RETURN + + # this matches a blank line; eat the previously accumulated comments + getcomment if lexing_context[:start_of_line] and newline + + lexing_context[:after] = matched_token.name unless newline + lexing_context[:start_of_line] = newline final_token, token_value = munge_token(matched_token, value) diff --git a/spec/unit/parser/lexer.rb b/spec/unit/parser/lexer.rb index 1c3e91b..894cfc8 100755 --- a/spec/unit/parser/lexer.rb +++ b/spec/unit/parser/lexer.rb @@ -464,12 +464,12 @@ describe Puppet::Parser::Lexer::TOKENS[:REGEX] do before { @lexer = Puppet::Parser::Lexer.new } it "should not consider escaped slashes to be the end of a regex" do - @lexer.string = "/this \\/ foo/" + @lexer.string = "$x =~ /this \\/ foo/" tokens = [] @lexer.scan do |name, value| tokens << value end - tokens[0][:value].should == Regexp.new("this / foo") + tokens[-2][:value].should == Regexp.new("this / foo") end end -- 1.6.4 --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "Puppet Developers" group. To post to this group, send email to [email protected] To unsubscribe from this group, send email to [email protected] For more options, visit this group at http://groups.google.com/group/puppet-dev?hl=en -~----------~----~----~----~------~----~------~--~---
