Title: [97879] trunk/Tools
Revision
97879
Author
[email protected]
Date
2011-10-19 14:38:25 -0700 (Wed, 19 Oct 2011)

Log Message

NRWT ServerProcess can't read lines from stderr and stdio separately
https://bugs.webkit.org/show_bug.cgi?id=70435

Reviewed by Adam Barth.

This is the underlying bug behind our lack of WK2 support, and timeout detection on Windows.
DRT expects RWT to read one line at a time from stderr and stdout independently.
This allows DRT to communicate errors over stderr and have them responded to immediately
regardless of what state the stdio stream may be in.  Previously NRWT's ServerProcess
class only exposed a read_line function which would read a line from stdout
and batch all output from stderr into an error buffer.  Callers would only be able
to respond to commands over stderr if they happened to correspond with a stdout line boundary.

* Scripts/webkitpy/layout_tests/port/server_process.py:
* Scripts/webkitpy/layout_tests/port/webkit.py:

Modified Paths

Diff

Modified: trunk/Tools/ChangeLog (97878 => 97879)


--- trunk/Tools/ChangeLog	2011-10-19 21:38:03 UTC (rev 97878)
+++ trunk/Tools/ChangeLog	2011-10-19 21:38:25 UTC (rev 97879)
@@ -1,3 +1,21 @@
+2011-10-19  Eric Seidel  <[email protected]>
+
+        NRWT ServerProcess can't read lines from stderr and stdio separately
+        https://bugs.webkit.org/show_bug.cgi?id=70435
+
+        Reviewed by Adam Barth.
+
+        This is the underlying bug behind our lack of WK2 support, and timeout detection on Windows.
+        DRT expects RWT to read one line at a time from stderr and stdout independently.
+        This allows DRT to communicate errors over stderr and have them responded to immediately
+        regardless of what state the stdio stream may be in.  Previously NRWT's ServerProcess
+        class only exposed a read_line function which would read a line from stdout
+        and batch all output from stderr into an error buffer.  Callers would only be able
+        to respond to commands over stderr if they happened to correspond with a stdout line boundary.
+
+        * Scripts/webkitpy/layout_tests/port/server_process.py:
+        * Scripts/webkitpy/layout_tests/port/webkit.py:
+
 2011-10-19  Chang Shu  <[email protected]>
 
         [Qt] Support WK1-only Skipped list

Modified: trunk/Tools/Scripts/webkitpy/layout_tests/port/server_process.py (97878 => 97879)


--- trunk/Tools/Scripts/webkitpy/layout_tests/port/server_process.py	2011-10-19 21:38:03 UTC (rev 97878)
+++ trunk/Tools/Scripts/webkitpy/layout_tests/port/server_process.py	2011-10-19 21:38:25 UTC (rev 97879)
@@ -68,10 +68,10 @@
 
     def _reset(self):
         self._proc = None
-        self._output = ''
+        self._output = str()  # bytesarray() once we require Python 2.6
+        self._error = str()  # bytesarray() once we require Python 2.6
         self.crashed = False
         self.timed_out = False
-        self.error = ''
 
     def _start(self):
         if self._proc:
@@ -127,43 +127,51 @@
             # stop() calls _reset(), so we have to set crashed to True after calling stop().
             self.crashed = True
 
-    def read_line(self, timeout):
-        """Read a single line from the subprocess, waiting until the deadline.
-        If the deadline passes, the call times out. Note that even if the
-        subprocess has crashed or the deadline has passed, if there is output
-        pending, it will be returned.
+    def _pop_stdout_line_if_ready(self):
+        index_after_newline = self._output.find('\n') + 1
+        if index_after_newline > 0:
+            return self._pop_output_bytes(index_after_newline)
+        return None
 
-        Args:
-            timeout: floating-point number of seconds the call is allowed
-                to block for. A zero or negative number will attempt to read
-                any existing data, but will not block. There is no way to
-                block indefinitely.
-        Returns:
-            output: data returned, if any. If no data is available and the
-                call times out or crashes, an empty string is returned. Note
-                that the returned string includes the newline ('\n')."""
-        return self._read(timeout, size=0)
+    def _pop_stderr_line_if_ready(self):
+        index_after_newline = self._error.find('\n') + 1
+        if index_after_newline > 0:
+            return self._pop_error_bytes(index_after_newline)
+        return None
 
-    def read(self, timeout, size):
-        """Attempts to read size characters from the subprocess, waiting until
-        the deadline passes. If the deadline passes, any available data will be
-        returned. Note that even if the deadline has passed or if the
-        subprocess has crashed, any available data will still be returned.
+    def read_stdout_line(self, timeout_seconds):
+        return self._read(timeout_seconds, self._pop_stdout_line_if_ready)
 
-        Args:
-            timeout: floating-point number of seconds the call is allowed
-                to block for. A zero or negative number will attempt to read
-                any existing data, but will not block. There is no way to
-                block indefinitely.
-            size: amount of data to read. Must be a postive integer.
-        Returns:
-            output: data returned, if any. If no data is available, an empty
-                string is returned.
-        """
+    def read_stderr_line(self, timeout_seconds):
+        return self._read(timeout_seconds, self._pop_stderr_line_if_ready)
+
+    def read_either_stdout_or_stderr_line(self, timeout_seconds):
+        def retrieve_bytes_from_buffers():
+            stdout_line = self._pop_stdout_line_if_ready()
+            if stdout_line:
+                return stdout_line, None
+            stderr_line = self._pop_stderr_line_if_ready()
+            if stderr_line:
+                return None, stderr_line
+            return None  # Instructs the caller to keep waiting.
+
+        return_value = self._read(timeout_seconds, retrieve_bytes_from_buffers)
+        # FIXME: This is a bit of a hack around the fact that _read normally only returns one value, but this caller wants it to return two.
+        if return_value is None:
+            return None, None
+        return return_value
+
+    def read_stdout(self, timeout_seconds, size):
         if size <= 0:
             raise ValueError('ServerProcess.read() called with a non-positive size: %d ' % size)
-        return self._read(timeout, size)
 
+        def retrieve_bytes_from_stdout_buffer():
+            if len(self._output) >= size:
+                return self._pop_output_bytes(size)
+            return None
+
+        return self._read(timeout_seconds, retrieve_bytes_from_stdout_buffer)
+
     def _check_for_crash(self):
         if self._proc.poll() != None:
             self.crashed = True
@@ -191,48 +199,63 @@
         except ScriptError, e:
             self._log('Unable to sample process.')
 
-    def _read(self, timeout, size):
-        """Internal routine that actually does the read."""
-        index = -1
+    def _handle_timeout(self):
+        self._executive.wait_newest(self._port.is_crash_reporter)
+        if not self.crashed:
+            self._check_for_crash()
+        self.timed_out = True
+        if not self.crashed:
+            self._sample()
+
+    def _split_string_after_index(self, string, index):
+        return string[0:index], string[index:]
+
+    def _pop_output_bytes(self, bytes_count):
+        output, self._output = self._split_string_after_index(self._output, bytes_count)
+        return output
+
+    def _pop_error_bytes(self, bytes_count):
+        output, self._error = self._split_string_after_index(self._error, bytes_count)
+        return output
+
+    def _wait_for_data_and_update_buffers(self, ms_to_wait):
         out_fd = self._proc.stdout.fileno()
         err_fd = self._proc.stderr.fileno()
         select_fds = (out_fd, err_fd)
-        deadline = time.time() + timeout
-        while not self.timed_out and not self.crashed:
-            self._check_for_crash()
+        read_fds, _, _ = select.select(select_fds, [], select_fds, ms_to_wait)
+        try:
+            if out_fd in read_fds:
+                self._output += self._proc.stdout.read()
+            if err_fd in read_fds:
+                self._error += self._proc.stderr.read()
+        except IOError, e:
+            # FIXME: Why do we ignore all IOErrors here?
+            pass
 
-            now = time.time()
-            if now > deadline:
-                self._executive.wait_newest(self._port.is_crash_reporter)
-                if not self.crashed:
-                    self._check_for_crash()
-                self.timed_out = True
-                if not self.crashed:
-                    self._sample()
+    def _check_for_abort(self, deadline):
+        self._check_for_crash()
 
-            # Check to see if we have any output we can return.
-            if size and len(self._output) >= size:
-                index = size
-            elif size == 0:
-                index = self._output.find('\n') + 1
+        if time.time() > deadline:
+            self._handle_timeout()
 
-            if index > 0 or self.crashed or self.timed_out:
-                output = self._output[0:index]
-                self._output = self._output[index:]
-                return output
+        return self.crashed or self.timed_out
 
-            # Nope - wait for more data.
-            (read_fds, write_fds, err_fds) = select.select(select_fds, [], select_fds, deadline - now)
-            try:
-                if out_fd in read_fds:
-                    self._output += self._proc.stdout.read()
-                if err_fd in read_fds:
-                    self.error += self._proc.stderr.read()
-            except IOError, e:
-                pass
+    # This read function is a bit oddly-designed, as it polls both stdout and stderr, yet
+    # only reads/returns from one of them (buffering both in local self._output/self._error).
+    # It might be cleaner to pass in the file descriptor to poll instead.
+    def _read(self, timeout, fetch_bytes_from_buffers_callback):
+        deadline = time.time() + timeout
+        while True:
+            if self._check_for_abort(deadline):
+                return None
 
+            bytes = fetch_bytes_from_buffers_callback()
+            if bytes is not None:
+                return bytes
+
+            self._wait_for_data_and_update_buffers(deadline - time.time())
+
     def stop(self):
-        """Stop (shut down) the subprocess), if it is running."""
         if not self._proc:
             return
 

Modified: trunk/Tools/Scripts/webkitpy/layout_tests/port/webkit.py (97878 => 97879)


--- trunk/Tools/Scripts/webkitpy/layout_tests/port/webkit.py	2011-10-19 21:38:03 UTC (rev 97878)
+++ trunk/Tools/Scripts/webkitpy/layout_tests/port/webkit.py	2011-10-19 21:38:25 UTC (rev 97879)
@@ -179,7 +179,7 @@
     def _read_image_diff(self, sp):
         timeout = 2.0
         deadline = time.time() + timeout
-        output = sp.read_line(timeout)
+        output = sp.read_stdout_line(timeout)
         output_image = ""
         diff_percent = 0
         while not sp.timed_out and not sp.crashed and output:
@@ -187,14 +187,14 @@
                 m = re.match('Content-Length: (\d+)', output)
                 content_length = int(m.group(1))
                 timeout = deadline - time.time()
-                output_image = sp.read(timeout, content_length)
-                output = sp.read_line(timeout)
+                output_image = sp.read_stdout(timeout, content_length)
+                output = sp.read_stdout_line(timeout)
                 break
             elif output.startswith('diff'):
                 break
             else:
                 timeout = deadline - time.time()
-                output = sp.read_line(deadline)
+                output = sp.read_stdout_line(deadline)
 
         if sp.timed_out:
             _log.error("ImageDiff timed out")
@@ -420,6 +420,14 @@
         Driver.__init__(self, port, worker_number)
         self._driver_tempdir = port._filesystem.mkdtemp(prefix='%s-' % self._port.driver_name())
 
+        # stderr reading is scoped on a per-test (not per-block) basis, so we store the accumulated
+        # stderr output, as well as if we've seen #EOF on this driver instance.
+        # FIXME: We should probably remove _read_first_block and _read_optional_image_block and
+        # instead scope these locally in run_test.
+        self.error_from_test = str()
+        self.err_seen_eof = False
+
+
     # FIXME: This may be unsafe, as python does not guarentee any ordering of __del__ calls
     # I believe it's possible that self._port or self._port._filesystem may already be destroyed.
     def __del__(self):
@@ -470,26 +478,23 @@
         return command + "\n"
 
     def _read_first_block(self, deadline):
-        """Reads a block from the server_process and returns (text_content, audio_content)."""
-        if self.detected_crash():
-            return (None, None)
-
+        # returns (text_content, audio_content)
         block = self._read_block(deadline)
         if block.content_type == 'audio/wav':
             return (None, block.decoded_content)
         return (block.decoded_content, None)
 
     def _read_optional_image_block(self, deadline):
-        """Reads a block from the server_process and returns (image, actual_image_hash)."""
-        if self.detected_crash():
-            return (None, None)
-
-        block = self._read_block(deadline)
+        # returns (image, actual_image_hash)
+        block = self._read_block(deadline, wait_for_stderr_eof=True)
         if block.content and block.content_type == 'image/png':
             return (block.decoded_content, block.content_hash)
         return (None, block.content_hash)
 
     def run_test(self, driver_input):
+        self.error_from_test = str()
+        self.err_seen_eof = False
+
         command = self._command_from_driver_input(driver_input)
         start_time = time.time()
         deadline = time.time() + int(driver_input.timeout) / 1000.0
@@ -498,62 +503,78 @@
         text, audio = self._read_first_block(deadline)  # First block is either text or audio
         image, actual_image_hash = self._read_optional_image_block(deadline)  # The second (optional) block is image data.
 
-        error_lines = self._server_process.error.splitlines()
-        # FIXME: This is a hack.  It is unclear why sometimes
-        # we do not get any error lines from the server_process
-        # probably we are not flushing stderr.
-        if error_lines and error_lines[-1] == "#EOF":
-            error_lines.pop()  # Remove the expected "#EOF"
-        error = "\n".join(error_lines)
-
-        # FIXME: This seems like the wrong section of code to be resetting _server_process.error.
-        self._server_process.error = ""
+        # We may not have read all output (if an error occured), but we certainly should have read all error bytes.
+        assert not self._server_process._error, "Unprocessed error output: %s" % self._server_process._error
         return DriverOutput(text, image, actual_image_hash, audio,
             crash=self.detected_crash(), test_time=time.time() - start_time,
-            timeout=self._server_process.timed_out, error=error)
+            timeout=self._server_process.timed_out, error=self.error_from_test)
 
-    LENGTH_HEADER = 'Content-Length: '
-    HASH_HEADER = 'ActualHash: '
-    TYPE_HEADER = 'Content-Type: '
-    ENCODING_HEADER = 'Content-Transfer-Encoding: '
+    def _read_header(self, block, line, header_text, header_attr, header_filter=None):
+        if line.startswith(header_text) and getattr(block, header_attr) is None:
+            value = line.split()[1]
+            if header_filter:
+                value = header_filter(value)
+            setattr(block, header_attr, value)
+            return True
+        return False
 
-    def _read_line_until(self, deadline):
-        return self._server_process.read_line(deadline - time.time())
+    def _process_stdout_line(self, block, line):
+        if (self._read_header(block, line, 'Content-Type: ', 'content_type')
+            or self._read_header(block, line, 'Content-Transfer-Encoding: ', 'encoding')
+            or self._read_header(block, line, 'Content-Length: ', '_content_length', int)
+            or self._read_header(block, line, 'ActualHash: ', 'content_hash')):
+            return
+        # If the line wasn't a header, we just append it to the content.
+        block.content += line
 
-    def _read_block(self, deadline):
-        content_type = None
-        encoding = None
-        content_hash = None
-        content_length = None
+    def _strip_eof(self, line):
+        if line and line.endswith("#EOF\n"):
+            return line[:-5], True
+        return line, False
 
-        # Content is treated as binary data even though the text output is usually UTF-8.
-        content = str()  # FIXME: Should be bytearray() once we require Python 2.6.
-        line = self._read_line_until(deadline)
-        eof = False
-        while (not self._server_process.timed_out and not self.detected_crash() and not eof):
-            chomped_line = line.rstrip()  # FIXME: This will remove trailing lines from test output.  Is that right?
-            if chomped_line.endswith("#EOF"):
-                eof = True
-                line = chomped_line[:-4]
+    def _read_block(self, deadline, wait_for_stderr_eof=False):
+        block = ContentBlock()
+        out_seen_eof = False
 
-            if line.startswith(self.TYPE_HEADER) and content_type is None:
-                content_type = line.split()[1]
-            elif line.startswith(self.ENCODING_HEADER) and encoding is None:
-                encoding = line.split()[1]
-            elif line.startswith(self.LENGTH_HEADER) and content_length is None:
-                content_length = int(line[len(self.LENGTH_HEADER):])
-                # FIXME: In real HTTP there should probably be a blank line
-                # after headers before content, but DRT doesn't write one.
-                content = self._server_process.read(deadline - time.time(), content_length)
-            elif line.startswith(self.HASH_HEADER):
-                content_hash = line.split()[1]
-            elif line:
-                content += line
-            if eof:
+        while True:
+            if out_seen_eof and (self.err_seen_eof or not wait_for_stderr_eof):
                 break
-            line = self._read_line_until(deadline)
-        return ContentBlock(content_type, encoding, content_hash, content)
 
+            if self._server_process.timed_out or self.detected_crash():
+                break
+
+            timeout = deadline - time.time()
+
+            if self.err_seen_eof:
+                out_line = self._server_process.read_stdout_line(timeout)
+                err_line = None
+            elif out_seen_eof:
+                out_line = None
+                err_line = self._server_process.read_stderr_line(timeout)
+            else:
+                out_line, err_line = self._server_process.read_either_stdout_or_stderr_line(timeout)
+
+            if out_line:
+                assert not out_seen_eof
+                out_line, out_seen_eof = self._strip_eof(out_line)
+            if err_line:
+                assert not self.err_seen_eof
+                err_line, self.err_seen_eof = self._strip_eof(err_line)
+
+            if out_line:
+                self._process_stdout_line(block, out_line)
+                # FIXME: Unlike HTTP, DRT dumps the content right after printing a Content-Length header.
+                # Don't wait until we're done with headers, just read the binary blob right now.
+                if block._content_length is not None and not block.content:
+                    block.content = self._server_process.read_stdout(deadline - time.time(), block._content_length)
+
+            # FIXME: Need to handle "#CRASHED - WebProcess" and "#CRASHED".
+            if err_line:
+                self.error_from_test += err_line
+
+        block.decode_content()
+        return block
+
     def stop(self):
         if self._server_process:
             self._server_process.stop()
@@ -561,12 +582,17 @@
 
 
 class ContentBlock(object):
-    def __init__(self, content_type, encoding, content_hash, content):
-        self.content_type = content_type
-        self.encoding = encoding
-        self.content_hash = content_hash
-        self.content = content
+    def __init__(self):
+        self.content_type = None
+        self.encoding = None
+        self.content_hash = None
+        self._content_length = None
+        # Content is treated as binary data even though the text output is usually UTF-8.
+        self.content = str()  # FIXME: Should be bytearray() once we require Python 2.6.
+        self.decoded_content = None
+
+    def decode_content(self):
         if self.encoding == 'base64':
-            self.decoded_content = base64.b64decode(content)
+            self.decoded_content = base64.b64decode(self.content)
         else:
-            self.decoded_content = content
+            self.decoded_content = self.content

Modified: trunk/Tools/Scripts/webkitpy/layout_tests/port/webkit_unittest.py (97878 => 97879)


--- trunk/Tools/Scripts/webkitpy/layout_tests/port/webkit_unittest.py	2011-10-19 21:38:03 UTC (rev 97878)
+++ trunk/Tools/Scripts/webkitpy/layout_tests/port/webkit_unittest.py	2011-10-19 21:38:25 UTC (rev 97879)
@@ -205,10 +205,14 @@
         self.crashed = False
         self.lines = lines or []
 
-    def read_line(self, timeout):
-        return self.lines.pop(0)
+    def read_stdout_line(self, timeout):
+        return self.lines.pop(0) + "\n"
 
+    def read_either_stdout_or_stderr_line(self, timeout):
+        # FIXME: We should have tests which intermix stderr and stdout lines.
+        return self.read_stdout_line(timeout), None
 
+
 class WebKitDriverTest(unittest.TestCase):
     def test_read_block(self):
         port = TestWebKitPort()
_______________________________________________
webkit-changes mailing list
[email protected]
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to