Reviewers: dcarney,

Message:
Committed patchset #1 manually as r18026.

Description:
Experimentar scanner: Unify the API to Scanner API some more.

This doesn't yet contain the unicode line terminator fix.

Also added a test file.

[email protected]
BUG=

Committed: https://code.google.com/p/v8/source/detail?r=18026

Please review this at https://codereview.chromium.org/83693002/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+42, -13 lines):
  M src/lexer/experimental-scanner.h
  M src/lexer/lexer_py.re
  A + test/lexer/cornercases/multiline-and-weird-html-comment.js
  M tools/lexer_generator/code_generator.jinja


Index: src/lexer/experimental-scanner.h
diff --git a/src/lexer/experimental-scanner.h b/src/lexer/experimental-scanner.h index b9ce4826e4d54c123e570630ea9b51fa93e3ac75..46ca28de90e00fe8997738aa1bcbbff26de79e1b 100644
--- a/src/lexer/experimental-scanner.h
+++ b/src/lexer/experimental-scanner.h
@@ -84,26 +84,55 @@ class ExperimentalScanner {

   ~ExperimentalScanner();

+  // Returns the next token and advances input.
   Token::Value Next() {
+    has_line_terminator_before_next_ = false;
     current_ = next_;
     Scan();  // will fill in next_.
     return current_.token;
   }

+  // Returns the current token again.
+  Token::Value current_token() { return current_.token; }
+
+  // Returns the location information for the current token
+  // (the token last returned by Next()).
   Location location() {
     return Location(current_.beg_pos, current_.end_pos);
   }

-  void SetHarmonyNumericLiterals(bool numeric_literals) {
-    harmony_numeric_literals_ = numeric_literals;
-  }
+  // One token look-ahead (past the token returned by Next()).
+  Token::Value peek() const { return next_.token; }
+
+  Location peek_location() const { return next_.location; }

+  UnicodeCache* unicode_cache() { return unicode_cache_; }
+
+  bool HarmonyScoping() const {
+    return harmony_scoping_;
+  }
+  void SetHarmonyScoping(bool scoping) {
+    harmony_scoping_ = scoping;
+  }
+  bool HarmonyModules() const {
+    return harmony_modules_;
+  }
   void SetHarmonyModules(bool modules) {
     harmony_modules_ = modules;
   }
+  bool HarmonyNumericLiterals() const {
+    return harmony_numeric_literals_;
+  }
+  void SetHarmonyNumericLiterals(bool numeric_literals) {
+    harmony_numeric_literals_ = numeric_literals;
+  }

-  void SetHarmonyScoping(bool scoping) {
-    harmony_scoping_ = scoping;
+  // Returns true if there was a line terminator before the peek'ed token,
+  // possibly inside a multi-line comment.
+  bool HasAnyLineTerminatorBeforeNext() const {
+    return has_line_terminator_before_next_;
+    // FIXME: do we need to distinguish between newlines inside and outside
+    // multiline comments? Atm doesn't look like we need to.
   }

  private:
@@ -127,7 +156,7 @@ class ExperimentalScanner {
   YYCTYPE* start_;
   YYCTYPE* cursor_;
   YYCTYPE* marker_;
-  bool just_seen_line_terminator_;
+  bool has_line_terminator_before_next_;

   YYCTYPE yych;

@@ -147,7 +176,7 @@ ExperimentalScanner<YYCTYPE>::ExperimentalScanner(
     YYCTYPE* source_end,
     Isolate* isolate)
     : unicode_cache_(isolate->unicode_cache()),
-      just_seen_line_terminator_(true),
+      has_line_terminator_before_next_(true),
       harmony_numeric_literals_(false),
       harmony_modules_(false),
       harmony_scoping_(false) {
Index: src/lexer/lexer_py.re
diff --git a/src/lexer/lexer_py.re b/src/lexer/lexer_py.re
index 1414ff5f0ae3b830ef49976165d3a4b2b9f8f51f..8879f5b4ba5e224d8c4bd86935e3c96736634b4e 100644
--- a/src/lexer/lexer_py.re
+++ b/src/lexer/lexer_py.re
@@ -79,7 +79,7 @@ eos = [:eos:];


 "-->" <{
-  if (!just_seen_line_terminator_) {
+  if (!has_line_terminator_before_next_) {
     BACKWARD(1);
     PUSH_TOKEN(Token::DEC);
   }
Index: test/lexer/cornercases/multiline-and-weird-html-comment.js
diff --git a/test/lexer/utf-1.js b/test/lexer/cornercases/multiline-and-weird-html-comment.js
similarity index 89%
copy from test/lexer/utf-1.js
copy to test/lexer/cornercases/multiline-and-weird-html-comment.js
index 2f1ef5209649d4c955c9e0478fd95036e529719c..f16d96f0d428ca44dd5018d91bbfb07cc1e1edba 100644
--- a/test/lexer/utf-1.js
+++ b/test/lexer/cornercases/multiline-and-weird-html-comment.js
@@ -25,7 +25,8 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-// This file is in utf-8 (or rather, ASCII), but doesn't contain any weird
-// characters.
+// The "-->" at the beginning is a weird SpiderMonkey compatibility hack.

-var foobar = 16;
\ No newline at end of file
+/* here is a multiline comment with
+a newline*/ --> this is now inside a comment too!
+but --> this is not
\ No newline at end of file
Index: tools/lexer_generator/code_generator.jinja
diff --git a/tools/lexer_generator/code_generator.jinja b/tools/lexer_generator/code_generator.jinja index 00cbf9b1fffbee8403850b0a03a4b2ea19f35b5c..a80fb0d15c0fd4e162a449dbec2898e1b475ee01 100644
--- a/tools/lexer_generator/code_generator.jinja
+++ b/tools/lexer_generator/code_generator.jinja
@@ -193,7 +193,6 @@
   next_.beg_pos = start_ - buffer_;   \
   next_.end_pos = cursor_ - buffer_;  \
   start_ = cursor_;                   \
-  just_seen_line_terminator_ = false; \
 }

 #define PUSH_TOKEN(T) {               \
@@ -216,7 +215,7 @@

 #define PUSH_LINE_TERMINATOR(s) {     \
   start_ = cursor_;                   \
-  just_seen_line_terminator_ = true;  \
+  has_line_terminator_before_next_ = true; \
 }

 #define FORWARD() {                               \


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to