Revision: 19797
Author:   [email protected]
Date:     Tue Mar 11 12:27:04 2014 UTC
Log:      Experimental parser: add ability to compare lexer shell outputs

[email protected]

BUG=

Review URL: https://codereview.chromium.org/194693003
http://code.google.com/p/v8/source/detail?r=19797

Modified:
 /branches/experimental/parser/src/lexer/lexer-shell.cc
/branches/experimental/parser/tools/lexer_generator/test/run_lexing_tests.py

=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Tue Mar 11 09:03:35 2014 UTC +++ /branches/experimental/parser/src/lexer/lexer-shell.cc Tue Mar 11 12:27:04 2014 UTC
@@ -60,6 +60,7 @@
 struct LexerShellSettings {
   Encoding encoding;
   bool print_tokens;
+  bool print_tokens_for_compare;
   bool break_after_illegal;
   bool eos_test;
   int repeat;
@@ -69,6 +70,7 @@
   LexerShellSettings()
       : encoding(LATIN1),
         print_tokens(false),
+        print_tokens_for_compare(false),
         break_after_illegal(false),
         eos_test(false),
         repeat(1),
@@ -185,44 +187,6 @@
 }


-struct TokenWithLocation {
-  Token::Value value;
-  size_t beg;
-  size_t end;
-  std::vector<int> literal;
-  bool is_ascii;
-  // The location of the latest octal position when the token was seen.
-  int octal_beg;
-  int octal_end;
-  TokenWithLocation() :
-      value(Token::ILLEGAL), beg(0), end(0), is_ascii(false) { }
-  TokenWithLocation(Token::Value value, size_t beg, size_t end,
-                    int octal_beg) :
- value(value), beg(beg), end(end), is_ascii(false), octal_beg(octal_beg) {
-  }
-  bool operator==(const TokenWithLocation& other) {
-    return value == other.value && beg == other.beg && end == other.end &&
-           literal == other.literal && is_ascii == other.is_ascii &&
-        octal_beg == other.octal_beg;
-  }
-  bool operator!=(const TokenWithLocation& other) {
-    return !(*this == other);
-  }
-  void Print(const char* prefix) const {
-    printf("%s %11s at (%d, %d)",
-           prefix, Token::Name(value),
-           static_cast<int>(beg), static_cast<int>(end));
-    if (literal.size() > 0) {
-      for (size_t i = 0; i < literal.size(); i++) {
-        printf(is_ascii ? " %02x" : " %04x", literal[i]);
-      }
-      printf(" (is ascii: %d)", is_ascii);
-    }
-    printf(" (last octal start: %d)\n", octal_beg);
-  }
-};
-
-
 static bool HasLiteral(Token::Value token) {
   return token == Token::IDENTIFIER ||
          token == Token::STRING ||
@@ -231,37 +195,72 @@


 template<typename Char>
-static std::vector<int> ToStdVector(const Vector<Char>& literal) {
-  std::vector<int> result;
+static void Copy(const Vector<Char>& literal,
+                 SmartArrayPointer<const uint16_t>* result,
+                 int* literal_length) {
+  uint16_t* data = new uint16_t[literal.length()];
+  result->Reset(data);
   for (int i = 0; i < literal.length(); i++) {
-    result.push_back(literal[i]);
+    data[i] = literal[i];
   }
-  return result;
+  *literal_length = literal.length();
 }


-template<typename Scanner>
-static TokenWithLocation GetTokenWithLocation(
-    Scanner *scanner, Token::Value token) {
-  int beg = scanner->location().beg_pos;
-  int end = scanner->location().end_pos;
- TokenWithLocation result(token, beg, end, scanner->octal_position().beg_pos);
-  if (HasLiteral(token)) {
-    result.is_ascii = scanner->is_literal_ascii();
-    if (scanner->is_literal_ascii()) {
-      result.literal = ToStdVector(scanner->literal_ascii_string());
-    } else {
-      result.literal = ToStdVector(scanner->literal_utf16_string());
+class TokenWithLocation {
+ public:
+  Token::Value value;
+  int beg;
+  int end;
+  bool is_one_byte;
+  SmartArrayPointer<const uint16_t> literal;
+  int literal_length;
+  // The location of the latest octal position when the token was seen.
+  int octal_beg;
+  int octal_end;
+  TokenWithLocation(Token::Value token, Scanner* scanner) : value(token) {
+    beg = scanner->location().beg_pos;
+    end = scanner->location().end_pos;
+    octal_beg = scanner->octal_position().beg_pos;
+    octal_end = scanner->octal_position().end_pos;
+    is_one_byte = false;
+    literal_length = 0;
+    if (HasLiteral(token)) {
+      is_one_byte = scanner->is_literal_ascii();
+      if (scanner->is_literal_ascii()) {
+        Copy(scanner->literal_ascii_string(), &literal, &literal_length);
+      } else {
+        Copy(scanner->literal_utf16_string(), &literal, &literal_length);
+      }
     }
   }
-  return result;
-}
+  void Print(bool do_compare) const {
+    if (value == Token::ILLEGAL && do_compare) {
+      printf("%-15s (%d)\n", Token::Name(value), beg);
+      return;
+    }
+    printf("%-15s (%d, %d)", Token::Name(value), beg, end);
+    if (literal_length > 0) {
+      // TODO(dcarney): need some sort of checksum.
+      for (int i = 0; i < literal_length; i++) {
+        printf(is_one_byte ? " %02x" : " %04x", literal[i]);
+      }
+      printf(" (is_one_byte: %d)", is_one_byte);
+    }
+    if (octal_beg >= 0) {
+      printf(" (last octal start: %d)", octal_beg);
+    }
+    printf("\n");
+  }

+ private:
+  DISALLOW_COPY_AND_ASSIGN(TokenWithLocation);
+};
+

 static TimeDelta RunLexer(const uint16_t* source,
                           const uint8_t* source_end,
                           Isolate* isolate,
-                          std::vector<TokenWithLocation>* tokens,
                           const LexerShellSettings& settings) {
   SmartPointer<Utf16CharacterStream> stream;
const uint8_t* one_byte_source = reinterpret_cast<const uint8_t*>(source);
@@ -293,13 +292,14 @@
   scanner.SetHarmonyModules(settings.harmony_modules);
   scanner.SetHarmonyScoping(settings.harmony_scoping);
   ElapsedTimer timer;
+  std::vector<TokenWithLocation*> tokens;
   timer.Start();
   scanner.Initialize(stream.get());
   Token::Value token;
   do {
     token = scanner.Next();
     if (settings.print_tokens) {
-      tokens->push_back(GetTokenWithLocation(&scanner, token));
+      tokens.push_back(new TokenWithLocation(token, &scanner));
     } else if (HasLiteral(token)) {
       if (scanner.is_literal_ascii()) {
         scanner.literal_ascii_string();
@@ -307,7 +307,20 @@
         scanner.literal_utf16_string();
       }
     }
+    if (token == Token::ILLEGAL && settings.break_after_illegal) break;
   } while (token != Token::EOS);
+  // Dump tokens.
+  if (settings.print_tokens) {
+    if (!settings.print_tokens_for_compare) {
+      printf("No of tokens:\t%d\n", static_cast<int>(tokens.size()));
+    }
+    for (size_t i = 0; i < tokens.size(); ++i) {
+      tokens[i]->Print(settings.print_tokens_for_compare);
+    }
+  }
+  for (size_t i = 0; i < tokens.size(); ++i) {
+    delete tokens[i];
+  }
   return timer.Elapsed();
 }

@@ -318,11 +331,10 @@
     const LexerShellSettings& settings,
     int truncate_by,
     bool* can_truncate) {
-  if (settings.print_tokens) {
+  if (settings.print_tokens && !settings.print_tokens_for_compare) {
printf("Processing file %s, truncating by %d bytes\n", fname, truncate_by);
   }
   HandleScope handle_scope(isolate);
-  std::vector<TokenWithLocation> tokens;
   TimeDelta time;
   {
     unsigned length_in_bytes;
@@ -333,20 +345,11 @@
       *can_truncate = false;
     } else {
       buffer_end -= truncate_by;
-      time = RunLexer(buffer, buffer_end, isolate, &tokens, settings);
+      time = RunLexer(buffer, buffer_end, isolate, settings);
     }
     delete[] buffer;
   }
-  if (settings.print_tokens) {
-    printf("No of tokens:\t%d\n", static_cast<int>(tokens.size()));
-    for (size_t i = 0; i < tokens.size(); ++i) {
-      tokens[i].Print("=>");
-      if (tokens[i].value == Token::ILLEGAL) {
-        if (settings.break_after_illegal)
-          break;
-      }
-    }
-  }
+
   return time;
 }

@@ -371,6 +374,9 @@
 #endif
     } else if (strcmp(argv[i], "--print-tokens") == 0) {
       settings.print_tokens = true;
+    } else if (strcmp(argv[i], "--print-tokens-for-compare") == 0) {
+      settings.print_tokens = true;
+      settings.print_tokens_for_compare = true;
     } else if (strcmp(argv[i], "--no-baseline") == 0) {
       // Ignore.
     } else if (strcmp(argv[i], "--no-experimental") == 0) {
@@ -416,7 +422,9 @@
         ++truncate_by;
       } while (can_truncate);
     }
-    printf("RunTime: %.f ms\n", total_time);
+    if (!settings.print_tokens_for_compare) {
+      printf("RunTime: %.f ms\n", total_time);
+    }
   }
   v8::V8::Dispose();
   return 0;
=======================================
--- /branches/experimental/parser/tools/lexer_generator/test/run_lexing_tests.py Mon Mar 10 16:13:09 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/test/run_lexing_tests.py Tue Mar 11 12:27:04 2014 UTC
@@ -27,6 +27,7 @@

 import argparse
 import subprocess
+import select
 import sys
 import time
 import logging
@@ -40,25 +41,47 @@
     self.left_path = args.left_path
     self.right_path = args.right_path
     self.max_process_count = args.parallel_process_count
+    self.buffer_size = 16*1024
     self.args = ['--break-after-illegal']
     if args.use_harmony:
       self.args.append('--use-harmony')
     self.args.append('--%s' % args.encoding)
     if self.right_path:
-      self.args.append('--print-tokens')
+      self.args.append('--print-tokens-for-compare')

   def build_process_map(self):
     process_map = self.process_map
     for i, f in enumerate(self.files):
-      process_map[2 * i] = {
-        'file': f, 'path' : self.left_path, 'type' : 'left' }
+      def data(path, cmp_id):
+ return {'file': f, 'path' : path, 'cmp_id' : cmp_id, 'buffer' : [] }
+      process_map[2 * i] = data(self.left_path, 2 * i + 1)
       if self.right_path:
-        process_map[2 * i + 1] = {
-          'file': f, 'path' : self.right, 'type' : 'right' }
+        process_map[2 * i + 1] = data(self.right_path, 2 * i)
+
+  def read_running_processes(self, running_processes):
+    if not self.right_path:
+      return
+    stdouts = {
+      self.process_map[i]['process'].stdout : self.process_map[i]['buffer']
+        for i in running_processes }
+    while True:
+      ready = select.select(stdouts.iterkeys(), [], [], 0)[0]
+      if not ready:
+        return
+      did_something = False
+      for fd in ready:
+        c = fd.read(self.buffer_size)
+        if c == "":
+          continue
+        did_something = True
+        stdouts[fd].append(c)
+      if not did_something:
+        break

   def wait_processes(self, running_processes):
     complete_ids = []
     while True:
+      self.read_running_processes(running_processes)
       for i in running_processes:
         data = self.process_map[i]
         response = data['process'].poll()
@@ -73,20 +96,52 @@
       running_processes.remove(i)
       del self.process_map[i]

+  @staticmethod
+  def crashed(data):
+    return data['process'].returncode != 0
+
+  @staticmethod
+  def buffer_contents(data):
+    data['buffer'].append(data['process'].stdout.read())
+    return ''.join(data['buffer'])
+
+  def compare_results(self, left, right):
+    f = left['file']
+    assert f == right['file']
+    logging.info('checking results for %s' % f)
+    if self.crashed(left) or self.crashed(right):
+      print "%s failed" % f
+      return
+    if left['path'] == self.right_path:
+      left, right = right, left
+    left_data = self.buffer_contents(left)
+    right_data = self.buffer_contents(right)
+    if left_data != right_data:
+      # TODO(dcarney): analyse differences
+      print "%s failed" % f
+      return
+    print "%s succeeded" % f
+
   def process_complete_processes(self):
     complete_processes = self.complete_processes
     complete_ids = []
     for i, data in complete_processes.iteritems():
-      p = data['process']
       if not self.right_path:
-        if p.returncode:
+        assert not i in complete_ids
+        if self.crashed(data):
           print "%s failed" % data['file']
         else:
           print "%s succeeded" % data['file']
         complete_ids.append(i)
       else:
-        # TODO(dcarney): perform compare
-        pass
+        if i in complete_ids:
+          continue
+        cmp_id = data['cmp_id']
+        if not cmp_id in complete_processes:
+          continue
+        complete_ids.append(i)
+        complete_ids.append(cmp_id)
+        self.compare_results(data, complete_processes[cmp_id])
     # clear processed data
     for i in complete_ids:
       del complete_processes[i]
@@ -104,13 +159,13 @@
             continue
           if len(running_processes) == self.max_process_count:
             break
-          out = sys.PIPE if self.right_path else dev_null
+          out = subprocess.PIPE if self.right_path else dev_null
           args = [data['path'], data['file']] + self.args
           logging.info("running [%s]" % ' '.join(args))
           data['process'] = subprocess.Popen(args,
                                              stdout=out,
                                              stderr=dev_null,
-                                             bufsize=16*1024)
+                                             bufsize=self.buffer_size)
           running_processes.add(id)
         if not running_processes:
           break

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to