Revision: 19797
Author: [email protected]
Date: Tue Mar 11 12:27:04 2014 UTC
Log: Experimental parser: add ability to compare lexer shell outputs
[email protected]
BUG=
Review URL: https://codereview.chromium.org/194693003
http://code.google.com/p/v8/source/detail?r=19797
Modified:
/branches/experimental/parser/src/lexer/lexer-shell.cc
/branches/experimental/parser/tools/lexer_generator/test/run_lexing_tests.py
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Tue Mar 11
09:03:35 2014 UTC
+++ /branches/experimental/parser/src/lexer/lexer-shell.cc Tue Mar 11
12:27:04 2014 UTC
@@ -60,6 +60,7 @@
struct LexerShellSettings {
Encoding encoding;
bool print_tokens;
+ bool print_tokens_for_compare;
bool break_after_illegal;
bool eos_test;
int repeat;
@@ -69,6 +70,7 @@
LexerShellSettings()
: encoding(LATIN1),
print_tokens(false),
+ print_tokens_for_compare(false),
break_after_illegal(false),
eos_test(false),
repeat(1),
@@ -185,44 +187,6 @@
}
-struct TokenWithLocation {
- Token::Value value;
- size_t beg;
- size_t end;
- std::vector<int> literal;
- bool is_ascii;
- // The location of the latest octal position when the token was seen.
- int octal_beg;
- int octal_end;
- TokenWithLocation() :
- value(Token::ILLEGAL), beg(0), end(0), is_ascii(false) { }
- TokenWithLocation(Token::Value value, size_t beg, size_t end,
- int octal_beg) :
- value(value), beg(beg), end(end), is_ascii(false),
octal_beg(octal_beg) {
- }
- bool operator==(const TokenWithLocation& other) {
- return value == other.value && beg == other.beg && end == other.end &&
- literal == other.literal && is_ascii == other.is_ascii &&
- octal_beg == other.octal_beg;
- }
- bool operator!=(const TokenWithLocation& other) {
- return !(*this == other);
- }
- void Print(const char* prefix) const {
- printf("%s %11s at (%d, %d)",
- prefix, Token::Name(value),
- static_cast<int>(beg), static_cast<int>(end));
- if (literal.size() > 0) {
- for (size_t i = 0; i < literal.size(); i++) {
- printf(is_ascii ? " %02x" : " %04x", literal[i]);
- }
- printf(" (is ascii: %d)", is_ascii);
- }
- printf(" (last octal start: %d)\n", octal_beg);
- }
-};
-
-
static bool HasLiteral(Token::Value token) {
return token == Token::IDENTIFIER ||
token == Token::STRING ||
@@ -231,37 +195,72 @@
template<typename Char>
-static std::vector<int> ToStdVector(const Vector<Char>& literal) {
- std::vector<int> result;
+static void Copy(const Vector<Char>& literal,
+ SmartArrayPointer<const uint16_t>* result,
+ int* literal_length) {
+ uint16_t* data = new uint16_t[literal.length()];
+ result->Reset(data);
for (int i = 0; i < literal.length(); i++) {
- result.push_back(literal[i]);
+ data[i] = literal[i];
}
- return result;
+ *literal_length = literal.length();
}
-template<typename Scanner>
-static TokenWithLocation GetTokenWithLocation(
- Scanner *scanner, Token::Value token) {
- int beg = scanner->location().beg_pos;
- int end = scanner->location().end_pos;
- TokenWithLocation result(token, beg, end,
scanner->octal_position().beg_pos);
- if (HasLiteral(token)) {
- result.is_ascii = scanner->is_literal_ascii();
- if (scanner->is_literal_ascii()) {
- result.literal = ToStdVector(scanner->literal_ascii_string());
- } else {
- result.literal = ToStdVector(scanner->literal_utf16_string());
+class TokenWithLocation {
+ public:
+ Token::Value value;
+ int beg;
+ int end;
+ bool is_one_byte;
+ SmartArrayPointer<const uint16_t> literal;
+ int literal_length;
+ // The location of the latest octal position when the token was seen.
+ int octal_beg;
+ int octal_end;
+ TokenWithLocation(Token::Value token, Scanner* scanner) : value(token) {
+ beg = scanner->location().beg_pos;
+ end = scanner->location().end_pos;
+ octal_beg = scanner->octal_position().beg_pos;
+ octal_end = scanner->octal_position().end_pos;
+ is_one_byte = false;
+ literal_length = 0;
+ if (HasLiteral(token)) {
+ is_one_byte = scanner->is_literal_ascii();
+ if (scanner->is_literal_ascii()) {
+ Copy(scanner->literal_ascii_string(), &literal, &literal_length);
+ } else {
+ Copy(scanner->literal_utf16_string(), &literal, &literal_length);
+ }
}
}
- return result;
-}
+ void Print(bool do_compare) const {
+ if (value == Token::ILLEGAL && do_compare) {
+ printf("%-15s (%d)\n", Token::Name(value), beg);
+ return;
+ }
+ printf("%-15s (%d, %d)", Token::Name(value), beg, end);
+ if (literal_length > 0) {
+ // TODO(dcarney): need some sort of checksum.
+ for (int i = 0; i < literal_length; i++) {
+ printf(is_one_byte ? " %02x" : " %04x", literal[i]);
+ }
+ printf(" (is_one_byte: %d)", is_one_byte);
+ }
+ if (octal_beg >= 0) {
+ printf(" (last octal start: %d)", octal_beg);
+ }
+ printf("\n");
+ }
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TokenWithLocation);
+};
+
static TimeDelta RunLexer(const uint16_t* source,
const uint8_t* source_end,
Isolate* isolate,
- std::vector<TokenWithLocation>* tokens,
const LexerShellSettings& settings) {
SmartPointer<Utf16CharacterStream> stream;
const uint8_t* one_byte_source = reinterpret_cast<const
uint8_t*>(source);
@@ -293,13 +292,14 @@
scanner.SetHarmonyModules(settings.harmony_modules);
scanner.SetHarmonyScoping(settings.harmony_scoping);
ElapsedTimer timer;
+ std::vector<TokenWithLocation*> tokens;
timer.Start();
scanner.Initialize(stream.get());
Token::Value token;
do {
token = scanner.Next();
if (settings.print_tokens) {
- tokens->push_back(GetTokenWithLocation(&scanner, token));
+ tokens.push_back(new TokenWithLocation(token, &scanner));
} else if (HasLiteral(token)) {
if (scanner.is_literal_ascii()) {
scanner.literal_ascii_string();
@@ -307,7 +307,20 @@
scanner.literal_utf16_string();
}
}
+ if (token == Token::ILLEGAL && settings.break_after_illegal) break;
} while (token != Token::EOS);
+ // Dump tokens.
+ if (settings.print_tokens) {
+ if (!settings.print_tokens_for_compare) {
+ printf("No of tokens:\t%d\n", static_cast<int>(tokens.size()));
+ }
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ tokens[i]->Print(settings.print_tokens_for_compare);
+ }
+ }
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ delete tokens[i];
+ }
return timer.Elapsed();
}
@@ -318,11 +331,10 @@
const LexerShellSettings& settings,
int truncate_by,
bool* can_truncate) {
- if (settings.print_tokens) {
+ if (settings.print_tokens && !settings.print_tokens_for_compare) {
printf("Processing file %s, truncating by %d bytes\n", fname,
truncate_by);
}
HandleScope handle_scope(isolate);
- std::vector<TokenWithLocation> tokens;
TimeDelta time;
{
unsigned length_in_bytes;
@@ -333,20 +345,11 @@
*can_truncate = false;
} else {
buffer_end -= truncate_by;
- time = RunLexer(buffer, buffer_end, isolate, &tokens, settings);
+ time = RunLexer(buffer, buffer_end, isolate, settings);
}
delete[] buffer;
}
- if (settings.print_tokens) {
- printf("No of tokens:\t%d\n", static_cast<int>(tokens.size()));
- for (size_t i = 0; i < tokens.size(); ++i) {
- tokens[i].Print("=>");
- if (tokens[i].value == Token::ILLEGAL) {
- if (settings.break_after_illegal)
- break;
- }
- }
- }
+
return time;
}
@@ -371,6 +374,9 @@
#endif
} else if (strcmp(argv[i], "--print-tokens") == 0) {
settings.print_tokens = true;
+ } else if (strcmp(argv[i], "--print-tokens-for-compare") == 0) {
+ settings.print_tokens = true;
+ settings.print_tokens_for_compare = true;
} else if (strcmp(argv[i], "--no-baseline") == 0) {
// Ignore.
} else if (strcmp(argv[i], "--no-experimental") == 0) {
@@ -416,7 +422,9 @@
++truncate_by;
} while (can_truncate);
}
- printf("RunTime: %.f ms\n", total_time);
+ if (!settings.print_tokens_for_compare) {
+ printf("RunTime: %.f ms\n", total_time);
+ }
}
v8::V8::Dispose();
return 0;
=======================================
---
/branches/experimental/parser/tools/lexer_generator/test/run_lexing_tests.py
Mon Mar 10 16:13:09 2014 UTC
+++
/branches/experimental/parser/tools/lexer_generator/test/run_lexing_tests.py
Tue Mar 11 12:27:04 2014 UTC
@@ -27,6 +27,7 @@
import argparse
import subprocess
+import select
import sys
import time
import logging
@@ -40,25 +41,47 @@
self.left_path = args.left_path
self.right_path = args.right_path
self.max_process_count = args.parallel_process_count
+ self.buffer_size = 16*1024
self.args = ['--break-after-illegal']
if args.use_harmony:
self.args.append('--use-harmony')
self.args.append('--%s' % args.encoding)
if self.right_path:
- self.args.append('--print-tokens')
+ self.args.append('--print-tokens-for-compare')
def build_process_map(self):
process_map = self.process_map
for i, f in enumerate(self.files):
- process_map[2 * i] = {
- 'file': f, 'path' : self.left_path, 'type' : 'left' }
+ def data(path, cmp_id):
+ return {'file': f, 'path' : path, 'cmp_id' : cmp_id, 'buffer' : []
}
+ process_map[2 * i] = data(self.left_path, 2 * i + 1)
if self.right_path:
- process_map[2 * i + 1] = {
- 'file': f, 'path' : self.right, 'type' : 'right' }
+ process_map[2 * i + 1] = data(self.right_path, 2 * i)
+
+ def read_running_processes(self, running_processes):
+ if not self.right_path:
+ return
+ stdouts = {
+ self.process_map[i]['process'].stdout : self.process_map[i]['buffer']
+ for i in running_processes }
+ while True:
+ ready = select.select(stdouts.iterkeys(), [], [], 0)[0]
+ if not ready:
+ return
+ did_something = False
+ for fd in ready:
+ c = fd.read(self.buffer_size)
+ if c == "":
+ continue
+ did_something = True
+ stdouts[fd].append(c)
+ if not did_something:
+ break
def wait_processes(self, running_processes):
complete_ids = []
while True:
+ self.read_running_processes(running_processes)
for i in running_processes:
data = self.process_map[i]
response = data['process'].poll()
@@ -73,20 +96,52 @@
running_processes.remove(i)
del self.process_map[i]
+ @staticmethod
+ def crashed(data):
+ return data['process'].returncode != 0
+
+ @staticmethod
+ def buffer_contents(data):
+ data['buffer'].append(data['process'].stdout.read())
+ return ''.join(data['buffer'])
+
+ def compare_results(self, left, right):
+ f = left['file']
+ assert f == right['file']
+ logging.info('checking results for %s' % f)
+ if self.crashed(left) or self.crashed(right):
+ print "%s failed" % f
+ return
+ if left['path'] == self.right_path:
+ left, right = right, left
+ left_data = self.buffer_contents(left)
+ right_data = self.buffer_contents(right)
+ if left_data != right_data:
+ # TODO(dcarney): analyse differences
+ print "%s failed" % f
+ return
+ print "%s succeeded" % f
+
def process_complete_processes(self):
complete_processes = self.complete_processes
complete_ids = []
for i, data in complete_processes.iteritems():
- p = data['process']
if not self.right_path:
- if p.returncode:
+ assert not i in complete_ids
+ if self.crashed(data):
print "%s failed" % data['file']
else:
print "%s succeeded" % data['file']
complete_ids.append(i)
else:
- # TODO(dcarney): perform compare
- pass
+ if i in complete_ids:
+ continue
+ cmp_id = data['cmp_id']
+ if not cmp_id in complete_processes:
+ continue
+ complete_ids.append(i)
+ complete_ids.append(cmp_id)
+ self.compare_results(data, complete_processes[cmp_id])
# clear processed data
for i in complete_ids:
del complete_processes[i]
@@ -104,13 +159,13 @@
continue
if len(running_processes) == self.max_process_count:
break
- out = sys.PIPE if self.right_path else dev_null
+ out = subprocess.PIPE if self.right_path else dev_null
args = [data['path'], data['file']] + self.args
logging.info("running [%s]" % ' '.join(args))
data['process'] = subprocess.Popen(args,
stdout=out,
stderr=dev_null,
- bufsize=16*1024)
+ bufsize=self.buffer_size)
running_processes.add(id)
if not running_processes:
break
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.