+1 On Sep 24, 2009, at 12:37 PM, Markus Roberts wrote:
> > This is my proposed attack on the lexing problem, with a few minor > cleanups to simplify its integration. The strategy: > > * Anotate tokens with a method "acceptable?" that determines if > they can be generated in a given context. Have this default > to true. > * Give the lexer the notion of a context; initialize it and > update it as needed. The present context records the name of > the last significant token generated and a start_of_line flag. > * When a token is found to match, check if it is acceptable in > the present context before generating it. > > These changes don't result any any change in behaviour but they > enable: > > * Give the REGEX token an acceptable? rule that only permits a > regular expression in specific contexts. > > The other changes were a fix to the scan bug Brice reported, > adjusting a test and clearing up some cluttered conditions in the > context collection path. > > Added tests and subsumed change restricting REGEX to one line. > > Signed-off-by: Markus Roberts <[email protected]> > --- > lib/puppet/parser/lexer.rb | 60 ++++++++++++++++++++ > +---------------------- > spec/unit/parser/lexer.rb | 39 +++++++++++++++++++++++----- > 2 files changed, 61 insertions(+), 38 deletions(-) > > diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb > index e027a69..bb4fdf9 100644 > --- a/lib/puppet/parser/lexer.rb > +++ b/lib/puppet/parser/lexer.rb > @@ -11,7 +11,7 @@ end > module Puppet::Parser; end > > class Puppet::Parser::Lexer > - attr_reader :last, :file > + attr_reader :last, :file, :lexing_context > > attr_accessor :line, :indefine > > @@ -41,6 +41,11 @@ class Puppet::Parser::Lexer > @name.to_s > end > end > + > + def acceptable?(context={}) > + # By default tokens are aceeptable in any context > + true > + end > end > > # Maintain a list of tokens. > @@ -171,7 +176,7 @@ class Puppet::Parser::Lexer > [self,value] > end > > - TOKENS.add_token :REGEX, %r{/[^/]*/} do |lexer, value| > + regex_token = TOKENS.add_token :REGEX, %r{/[^/\n]*/} do |lexer, > value| > # Make sure we haven't matched an escaped / > while value[-2..-2] == '\\' > other = lexer.scan_until(%r{/}) > @@ -181,6 +186,10 @@ class Puppet::Parser::Lexer > [self, Regexp.new(regex)] > end > > + def regex_token.acceptable?(context={}) > + [:NODE,:LBRACE,:RBRACE,:MATCH,:NOMATCH,:COMMA].include? > context[:after] > + end > + > TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => > true, :skip_text => true > > TOKENS.add_token :SQUOTE, "'" do |lexer, value| > @@ -286,36 +295,28 @@ class Puppet::Parser::Lexer > # Find the next token that matches a regex. We look for these > first. > def find_regex_token > @regex += 1 > - matched_token = nil > - value = "" > - length = 0 > + best_token = nil > + best_length = 0 > > # I tried optimizing based on the first char, but it had > # a slightly negative affect and was a good bit more > complicated. > TOKENS.regex_tokens.each do |token| > - next unless match_length = @scanner.match?(token.regex) > - > - # We've found a longer match > - if match_length > length > - value = @scanner.scan(token.regex) > - length = value.length > - matched_token = token > + if length = @scanner.match?(token.regex) and > token.acceptable?(lexing_context) > + # We've found a longer match > + if length > best_length > + best_length = length > + best_token = token > + end > end > end > > - return matched_token, value > + return best_token, @scanner.scan(best_token.regex) if > best_token > end > > # Find the next token, returning the string and the token. > def find_token > @find += 1 > - matched_token, value = find_regex_token > - > - unless matched_token > - matched_token, value = find_string_token > - end > - > - return matched_token, value > + find_regex_token || find_string_token > end > > def indefine? > @@ -345,6 +346,7 @@ class Puppet::Parser::Lexer > @indefine = false > @expected = [] > @commentstack = [ ['', @line] ] > + @lexing_context = {:after => nil, :start_of_line => true} > end > > # Make any necessary changes to the token and/or value. > @@ -417,17 +419,11 @@ class Puppet::Parser::Lexer > raise "Could not match '%s'" % nword > end > > - if matched_token.name == :RETURN > - # this matches a blank line > - if @last_return > - # eat the previously accumulated comments > - getcomment > - end > - # since :RETURN skips, we won't survive to > munge_token > - @last_return = true > - else > - @last_return = false > - end > + newline = matched_token.name == :RETURN > + > + # this matches a blank line; eat the previously > accumulated comments > + getcomment if lexing_context[:start_of_line] and newline > + lexing_context[:start_of_line] = newline > > final_token, token_value = munge_token(matched_token, > value) > > @@ -436,6 +432,8 @@ class Puppet::Parser::Lexer > next > end > > + lexing_context[:after] = final_token.name > unless newline > + > value = token_value[:value] > > if match = @@pairs[value] and final_token.name ! > = :DQUOTE and final_token.name != :SQUOTE > diff --git a/spec/unit/parser/lexer.rb b/spec/unit/parser/lexer.rb > index 1c3e91b..959f360 100755 > --- a/spec/unit/parser/lexer.rb > +++ b/spec/unit/parser/lexer.rb > @@ -460,18 +460,43 @@ describe Puppet::Parser::Lexer::TOKENS[:REGEX] > do > @token.regex.should =~ '/this is a regex/' > end > > - describe "when including escaped slashes" do > - before { @lexer = Puppet::Parser::Lexer.new } > + it 'should not match if there is \n in the regex' do > + @token.regex.should_not =~ "/this is \n a regex/" > + end > > - it "should not consider escaped slashes to be the end of a > regex" do > - @lexer.string = "/this \\/ foo/" > + describe "when scanning" do > + def tokens_scanned_from(s) > + lexer = Puppet::Parser::Lexer.new > + lexer.string = s > tokens = [] > - @lexer.scan do |name, value| > + lexer.scan do |name, value| > tokens << value > end > - tokens[0][:value].should == Regexp.new("this / foo") > + tokens[0..-2] > end > - end > + > + it "should not consider escaped slashes to be the end of a > regex" do > + tokens_scanned_from("$x =~ /this \\/ > foo/").last[:value].should == Regexp.new("this / foo") > + end > + > + it "should not lex chained division as a regex" do > + tokens_scanned_from("$x = $a/$b/$c").any? {|t| > t[:value].class == Regexp }.should == false > + end > + > + it "should accept a regular expression after NODE" do > + tokens_scanned_from("node /www.*\.mysite > \.org/").last[:value].should == Regexp.new("www.*\.mysite\.org") > + end > + > + it "should accept regular expressions in a CASE" do > + s = %q{case $variable { > + "something": {$othervar = 4096 / 2} > + /regex/: {notice("this notably sucks")} > + } > + } > + tokens_scanned_from(s)[12][:value].should == > Regexp.new("regex") > + end > + > + end > > > it "should return the REGEX token and a Regexp" do > -- > 1.6.4 > > > > -- The great aim of education is not knowledge but action. -- Herbert Spencer --------------------------------------------------------------------- Luke Kanies | http://reductivelabs.com | http://madstop.com --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "Puppet Developers" group. To post to this group, send email to [email protected] To unsubscribe from this group, send email to [email protected] For more options, visit this group at http://groups.google.com/group/puppet-dev?hl=en -~----------~----~----~----~------~----~------~--~---
