DIV lexing)

Markus Roberts Wed, 23 Sep 2009 17:10:33 -0700

This is my proposed attack on the lexing problem, with a few minor
cleanups to simplify its integration.  The strategy:


* Anotate tokens with a method "acceptable?" that determines if
  they can be generated in a given context.  Have this default
  to true.
* Give the lexer the notion of a context; initialize it and
  update it as needed.  The present context records the name of
  the last significant token generated and a start_of_line flag.
* When a token is found to match, check if it is acceptable in
  the present context before generating it.

These changes don't result any any change in behaviour but they
enable:

* Give the REGEX token an acceptable? rule that only permits a
  regular expression in specific contexts.

The other changes were a fix to the scan bug Brice reported,
adjusting a test and clearing up some cluttered conditions in the
context collection path.

Signed-off-by: Markus Roberts <[email protected]>
---
 lib/puppet/parser/lexer.rb |   60 +++++++++++++++++++++----------------------
 spec/unit/parser/lexer.rb  |    4 +-
 2 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb
index e027a69..f7496e2 100644
--- a/lib/puppet/parser/lexer.rb
+++ b/lib/puppet/parser/lexer.rb
@@ -11,7 +11,7 @@ end
 module Puppet::Parser; end
 
 class Puppet::Parser::Lexer
-    attr_reader :last, :file
+    attr_reader :last, :file, :lexing_context
 
     attr_accessor :line, :indefine
 
@@ -41,6 +41,11 @@ class Puppet::Parser::Lexer
                 @name.to_s
             end
         end
+        
+        def acceptable?(context={})
+            # By default tokens are aceeptable in any context
+            true 
+        end
     end
 
     # Maintain a list of tokens.
@@ -171,7 +176,7 @@ class Puppet::Parser::Lexer
         [self,value]
     end
 
-    TOKENS.add_token :REGEX, %r{/[^/]*/} do |lexer, value|
+    regex_token = TOKENS.add_token :REGEX, %r{/[^/]*/} do |lexer, value|
         # Make sure we haven't matched an escaped /
         while value[-2..-2] == '\\'
             other = lexer.scan_until(%r{/})
@@ -181,6 +186,10 @@ class Puppet::Parser::Lexer
         [self, Regexp.new(regex)]
     end
 
+    def regex_token.acceptable?(context={})
+        [:NODE,:LBRACE,:RBRACE,:MATCH,:NOMATCH].include? context[:after]
+    end
+
     TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, 
:skip_text => true
 
     TOKENS.add_token :SQUOTE, "'" do |lexer, value|
@@ -286,36 +295,28 @@ class Puppet::Parser::Lexer
     # Find the next token that matches a regex.  We look for these first.
     def find_regex_token
         @regex += 1
-        matched_token = nil
-        value = ""
-        length = 0
+        best_token = nil
+        best_length = 0
 
         # I tried optimizing based on the first char, but it had
         # a slightly negative affect and was a good bit more complicated.
         TOKENS.regex_tokens.each do |token|
-            next unless match_length = @scanner.match?(token.regex)
-
-            # We've found a longer match
-            if match_length > length
-                value = @scanner.scan(token.regex)
-                length = value.length
-                matched_token = token
+            if length = @scanner.match?(token.regex) and 
token.acceptable?(lexing_context)
+                # We've found a longer match
+                if length > best_length
+                    best_length = length
+                    best_token = token
+                end
             end
         end
 
-        return matched_token, value
+        return best_token, @scanner.scan(best_token.regex) if best_token
     end
 
     # Find the next token, returning the string and the token.
     def find_token
         @find += 1
-        matched_token, value = find_regex_token
-
-        unless matched_token
-            matched_token, value = find_string_token
-        end
-
-        return matched_token, value
+        find_regex_token || find_string_token
     end
 
     def indefine?
@@ -345,6 +346,7 @@ class Puppet::Parser::Lexer
         @indefine = false
         @expected = []
         @commentstack = [ ['', @line] ]
+        @lexing_context = {:after => nil, :start_of_line => true}
     end
 
     # Make any necessary changes to the token and/or value.
@@ -417,17 +419,13 @@ class Puppet::Parser::Lexer
                 raise "Could not match '%s'" % nword
             end
 
-            if matched_token.name == :RETURN
-                # this matches a blank line
-                if @last_return
-                    # eat the previously accumulated comments
-                    getcomment
-                end
-                # since :RETURN skips, we won't survive to munge_token
-                @last_return = true
-            else
-                @last_return = false
-            end
+            newline = matched_token.name == :RETURN
+
+            # this matches a blank line; eat the previously accumulated 
comments
+            getcomment if lexing_context[:start_of_line] and newline
+
+            lexing_context[:after]         = matched_token.name unless newline
+            lexing_context[:start_of_line] = newline
 
             final_token, token_value = munge_token(matched_token, value)
 
diff --git a/spec/unit/parser/lexer.rb b/spec/unit/parser/lexer.rb
index 1c3e91b..894cfc8 100755
--- a/spec/unit/parser/lexer.rb
+++ b/spec/unit/parser/lexer.rb
@@ -464,12 +464,12 @@ describe Puppet::Parser::Lexer::TOKENS[:REGEX] do
         before { @lexer = Puppet::Parser::Lexer.new }
 
         it "should not consider escaped slashes to be the end of a regex" do
-            @lexer.string = "/this \\/ foo/"
+            @lexer.string = "$x =~ /this \\/ foo/"
             tokens = []
             @lexer.scan do |name, value|
                 tokens << value
             end
-            tokens[0][:value].should == Regexp.new("this / foo")
+            tokens[-2][:value].should == Regexp.new("this / foo")
         end
     end
 
-- 
1.6.4


--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Puppet Developers" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to 
[email protected]
For more options, visit this group at 
http://groups.google.com/group/puppet-dev?hl=en
-~----------~----~----~----~------~----~------~--~---

[Puppet-dev] [PATCH/puppet 1/1] Proposed alternative solution for 2666 (REGEX / DIV lexing)

Reply via email to