Title: [285177] trunk/Source/WebKit
Revision
285177
Author
sihui_...@apple.com
Date
2021-11-02 12:42:06 -0700 (Tue, 02 Nov 2021)

Log Message

Terminate unresponsive network process by crashing it
https://bugs.webkit.org/show_bug.cgi?id=232603

Reviewed by Chris Dumez.

UI process currently kills network process when it does not respond message in some time (network process being
unresponsive). We've found one common case where network process becomes unresponsive is that it is blocked by
some slow operation on the main thread (like file operation in rdar://84511633). To understand what the
operations are and make a fix, we now ask network process to crash itself on IPC thread. In this way, we can get
crash report that includes the call stack of the main thread. To avoid generating too many crash reports, we
only send the crash message to network process when it becomes unresponsive multiple times in a short time
period.

* Platform/IPC/Connection.cpp:
(IPC::terminateDueToIPCTerminateMessage):
(IPC::Connection::processIncomingMessage):
* Scripts/webkit/model.py:
* Scripts/webkit/tests/MessageNames.cpp:
(IPC::description):
(IPC::receiverName):
(IPC::isValidMessageName):
* Scripts/webkit/tests/MessageNames.h:
* UIProcess/Network/NetworkProcessProxy.cpp:
(WebKit::shouldTerminateNetworkProcessBySendingMessage):
(WebKit::NetworkProcessProxy::didBecomeUnresponsive):

Modified Paths

Diff

Modified: trunk/Source/WebKit/ChangeLog (285176 => 285177)


--- trunk/Source/WebKit/ChangeLog	2021-11-02 19:38:53 UTC (rev 285176)
+++ trunk/Source/WebKit/ChangeLog	2021-11-02 19:42:06 UTC (rev 285177)
@@ -1,3 +1,31 @@
+2021-11-02  Sihui Liu  <sihui_...@apple.com>
+
+        Terminate unresponsive network process by crashing it
+        https://bugs.webkit.org/show_bug.cgi?id=232603
+
+        Reviewed by Chris Dumez.
+
+        UI process currently kills network process when it does not respond message in some time (network process being 
+        unresponsive). We've found one common case where network process becomes unresponsive is that it is blocked by 
+        some slow operation on the main thread (like file operation in rdar://84511633). To understand what the 
+        operations are and make a fix, we now ask network process to crash itself on IPC thread. In this way, we can get 
+        crash report that includes the call stack of the main thread. To avoid generating too many crash reports, we 
+        only send the crash message to network process when it becomes unresponsive multiple times in a short time 
+        period.
+
+        * Platform/IPC/Connection.cpp:
+        (IPC::terminateDueToIPCTerminateMessage):
+        (IPC::Connection::processIncomingMessage):
+        * Scripts/webkit/model.py:
+        * Scripts/webkit/tests/MessageNames.cpp:
+        (IPC::description):
+        (IPC::receiverName):
+        (IPC::isValidMessageName):
+        * Scripts/webkit/tests/MessageNames.h:
+        * UIProcess/Network/NetworkProcessProxy.cpp:
+        (WebKit::shouldTerminateNetworkProcessBySendingMessage):
+        (WebKit::NetworkProcessProxy::didBecomeUnresponsive):
+
 2021-11-02  Kate Cheney  <katherine_che...@apple.com>
 
         PCM: Safari on iOS and macOS are not sending ad click attribution reports for Private Click Measurement

Modified: trunk/Source/WebKit/Platform/IPC/Connection.cpp (285176 => 285177)


--- trunk/Source/WebKit/Platform/IPC/Connection.cpp	2021-11-02 19:38:53 UTC (rev 285176)
+++ trunk/Source/WebKit/Platform/IPC/Connection.cpp	2021-11-02 19:42:06 UTC (rev 285177)
@@ -42,6 +42,7 @@
 
 #if PLATFORM(COCOA)
 #include "MachMessage.h"
+#include "WKCrashReporter.h"
 #endif
 
 #if USE(UNIX_DOMAIN_SOCKETS)
@@ -751,6 +752,16 @@
     // This can happen if the send timed out, so it's fine to ignore.
 }
 
+static NEVER_INLINE NO_RETURN_DUE_TO_CRASH void terminateDueToIPCTerminateMessage()
+{
+#if PLATFORM(COCOA)
+    WebKit::logAndSetCrashLogMessage("Receives Terminate message");
+#else
+    WTFLogAlways("Receives Terminate message");
+#endif
+    CRASH();
+}
+
 void Connection::processIncomingMessage(std::unique_ptr<Decoder> message)
 {
     ASSERT(message->messageReceiverName() != ReceiverName::Invalid);
@@ -760,6 +771,9 @@
         return;
     }
 
+    if (message->messageName() == MessageName::Terminate)
+        return terminateDueToIPCTerminateMessage();
+
     if (!MessageReceiveQueueMap::isValidMessage(*message)) {
         RunLoop::main().dispatch([protectedThis = Ref { *this }, messageName = message->messageName()]() mutable {
             protectedThis->dispatchDidReceiveInvalidMessage(messageName);

Modified: trunk/Source/WebKit/Scripts/webkit/model.py (285176 => 285177)


--- trunk/Source/WebKit/Scripts/webkit/model.py	2021-11-02 19:38:53 UTC (rev 285176)
+++ trunk/Source/WebKit/Scripts/webkit/model.py	2021-11-02 19:42:06 UTC (rev 285177)
@@ -77,7 +77,8 @@
     Message('InitializeConnection', [], [], attributes=[BUILTIN_ATTRIBUTE], condition="PLATFORM(COCOA)"),
     Message('LegacySessionState', [], [], attributes=[BUILTIN_ATTRIBUTE], condition=None),
     Message('SetStreamDestinationID', [], [], attributes=[BUILTIN_ATTRIBUTE], condition=None),
-    Message('ProcessOutOfStreamMessage', [], [], attributes=[BUILTIN_ATTRIBUTE], condition=None)
+    Message('ProcessOutOfStreamMessage', [], [], attributes=[BUILTIN_ATTRIBUTE], condition=None),
+    Message('Terminate', [], [], attributes=[BUILTIN_ATTRIBUTE], condition=None),
 ], condition=None)
 
 

Modified: trunk/Source/WebKit/Scripts/webkit/tests/MessageNames.cpp (285176 => 285177)


--- trunk/Source/WebKit/Scripts/webkit/tests/MessageNames.cpp	2021-11-02 19:38:53 UTC (rev 285176)
+++ trunk/Source/WebKit/Scripts/webkit/tests/MessageNames.cpp	2021-11-02 19:42:06 UTC (rev 285177)
@@ -156,6 +156,8 @@
         return "SetStreamDestinationID";
     case MessageName::SyncMessageReply:
         return "SyncMessageReply";
+    case MessageName::Terminate:
+        return "Terminate";
     case MessageName::TestWithSuperclass_TestAsyncMessageReply:
         return "TestWithSuperclass_TestAsyncMessageReply";
     case MessageName::TestWithSuperclass_TestAsyncMessageWithConnectionReply:
@@ -258,6 +260,7 @@
     case MessageName::ProcessOutOfStreamMessage:
     case MessageName::SetStreamDestinationID:
     case MessageName::SyncMessageReply:
+    case MessageName::Terminate:
         return ReceiverName::IPC;
     case MessageName::TestWithSuperclass_TestAsyncMessageReply:
     case MessageName::TestWithSuperclass_TestAsyncMessageWithConnectionReply:
@@ -466,6 +469,8 @@
         return true;
     if (messageName == IPC::MessageName::SyncMessageReply)
         return true;
+    if (messageName == IPC::MessageName::Terminate)
+        return true;
 #if ENABLE(TEST_FEATURE)
     if (messageName == IPC::MessageName::TestWithSuperclass_TestAsyncMessageReply)
         return true;

Modified: trunk/Source/WebKit/Scripts/webkit/tests/MessageNames.h (285176 => 285177)


--- trunk/Source/WebKit/Scripts/webkit/tests/MessageNames.h	2021-11-02 19:38:53 UTC (rev 285176)
+++ trunk/Source/WebKit/Scripts/webkit/tests/MessageNames.h	2021-11-02 19:42:06 UTC (rev 285177)
@@ -107,6 +107,7 @@
     , ProcessOutOfStreamMessage
     , SetStreamDestinationID
     , SyncMessageReply
+    , Terminate
     , TestWithSuperclass_TestAsyncMessageReply
     , TestWithSuperclass_TestAsyncMessageWithConnectionReply
     , TestWithSuperclass_TestAsyncMessageWithMultipleArgumentsReply

Modified: trunk/Source/WebKit/UIProcess/Network/NetworkProcessProxy.cpp (285176 => 285177)


--- trunk/Source/WebKit/UIProcess/Network/NetworkProcessProxy.cpp	2021-11-02 19:38:53 UTC (rev 285176)
+++ trunk/Source/WebKit/UIProcess/Network/NetworkProcessProxy.cpp	2021-11-02 19:42:06 UTC (rev 285177)
@@ -87,6 +87,8 @@
 using namespace WebCore;
 
 static constexpr Seconds networkProcessResponsivenessTimeout = 6_s;
+static constexpr int unresponsivenessCountLimit = 3;
+static constexpr Seconds unresponsivenessCheckPeriod = 15_s;
 
 static HashSet<NetworkProcessProxy*>& networkProcessesSet()
 {
@@ -95,6 +97,25 @@
     return set;
 }
 
+static bool shouldTerminateNetworkProcessBySendingMessage()
+{
+    static WallTime unresponsivenessPeriodStartTime = WallTime::now();
+    static int unresponsivenessCountDuringThisPeriod = 0;
+    auto now = WallTime::now();
+
+    if (now - unresponsivenessPeriodStartTime > unresponsivenessCheckPeriod) {
+        unresponsivenessCountDuringThisPeriod = 1;
+        unresponsivenessPeriodStartTime = now;
+        return false;
+    }
+
+    ++unresponsivenessCountDuringThisPeriod;
+    if (unresponsivenessCountDuringThisPeriod >= unresponsivenessCountLimit)
+        return true;
+
+    return false;
+}
+
 Vector<Ref<NetworkProcessProxy>> NetworkProcessProxy::allNetworkProcesses()
 {
     Vector<Ref<NetworkProcessProxy>> processes;
@@ -128,6 +149,19 @@
 void NetworkProcessProxy::didBecomeUnresponsive()
 {
     RELEASE_LOG_ERROR(Process, "NetworkProcessProxy::didBecomeUnresponsive: NetworkProcess with PID %d became unresponsive, terminating it", processIdentifier());
+
+    // Let network process terminates itself and generate crash report for investigation of hangs.
+    // We currently only do this when network process becomes unresponsive multiple times in a short
+    // time period to avoid generating too many crash reports with same back trace on user's device.
+    if (shouldTerminateNetworkProcessBySendingMessage()) {
+        sendMessage(makeUniqueRef<IPC::Encoder>(IPC::MessageName::Terminate, 0), { });
+        RunLoop::main().dispatchAfter(1_s, [weakThis = WeakPtr { *this }] () mutable {
+            if (weakThis)
+                weakThis->terminate();
+        });
+        return;
+    }
+
     terminate();
 }
 
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to