Title: [286587] trunk/Source/bmalloc
Revision
286587
Author
fpi...@apple.com
Date
2021-12-06 21:52:22 -0800 (Mon, 06 Dec 2021)

Log Message

[libpas] Clean up what the machine code looks like under LTO
https://bugs.webkit.org/show_bug.cgi?id=233909

Reviewed by Yusuke Suzuki.

During the very painful perf burndown of libpas that got it to be a progression (rather than a
regression) versus bmalloc, I found that certain key fast paths - like fastMalloc and fastFree - perform
better when they do not have a stack frame. For this to happen, they must only make tail calls.

Sadly, LTO was inlining a slow path into fastFree (i.e. pas_deallocate), and this slow path had an
assertion, and the call to pas_assertion_failed was not a tail call.

This fixes the problem comprehensively:

- We now compile pas_assertion_failed as a breakpoint, just like WebKit would do. This is better for
  code size and maybe it could sometimes be better for performance.

- The slow path that was getting inlined is now marked PAS_NEVER_INLINE.

- Inspecting the machine code, I found a couple other slow paths that were being inlined in a bunch of
  places, and also marked them NEVER_INLINE.

If my past experiences are right, this could be a tiny speed-up on malloc-heavy workloads.

* libpas/src/libpas/pas_lock.c:
(pas_lock_lock_slow):
* libpas/src/libpas/pas_lock.h:
* libpas/src/libpas/pas_monotonic_time.c:
(get_timebase_info_slow):
(get_timebase_info):
* libpas/src/libpas/pas_thread_local_cache.c:
(pas_thread_local_cache_append_deallocation_slow):
* libpas/src/libpas/pas_thread_local_cache.h:
* libpas/src/libpas/pas_utils.c:
* libpas/src/libpas/pas_utils.h:
(pas_assertion_failed):
(pas_assertion_failed_noreturn_silencer):

Modified Paths

Diff

Modified: trunk/Source/bmalloc/ChangeLog (286586 => 286587)


--- trunk/Source/bmalloc/ChangeLog	2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/ChangeLog	2021-12-07 05:52:22 UTC (rev 286587)
@@ -1,3 +1,43 @@
+2021-12-06  Filip Pizlo  <fpi...@apple.com>
+
+        [libpas] Clean up what the machine code looks like under LTO
+        https://bugs.webkit.org/show_bug.cgi?id=233909
+
+        Reviewed by Yusuke Suzuki.
+
+        During the very painful perf burndown of libpas that got it to be a progression (rather than a
+        regression) versus bmalloc, I found that certain key fast paths - like fastMalloc and fastFree - perform
+        better when they do not have a stack frame. For this to happen, they must only make tail calls.
+
+        Sadly, LTO was inlining a slow path into fastFree (i.e. pas_deallocate), and this slow path had an
+        assertion, and the call to pas_assertion_failed was not a tail call.
+
+        This fixes the problem comprehensively:
+
+        - We now compile pas_assertion_failed as a breakpoint, just like WebKit would do. This is better for
+          code size and maybe it could sometimes be better for performance.
+
+        - The slow path that was getting inlined is now marked PAS_NEVER_INLINE.
+
+        - Inspecting the machine code, I found a couple other slow paths that were being inlined in a bunch of
+          places, and also marked them NEVER_INLINE.
+
+        If my past experiences are right, this could be a tiny speed-up on malloc-heavy workloads.
+
+        * libpas/src/libpas/pas_lock.c:
+        (pas_lock_lock_slow):
+        * libpas/src/libpas/pas_lock.h:
+        * libpas/src/libpas/pas_monotonic_time.c:
+        (get_timebase_info_slow):
+        (get_timebase_info):
+        * libpas/src/libpas/pas_thread_local_cache.c:
+        (pas_thread_local_cache_append_deallocation_slow):
+        * libpas/src/libpas/pas_thread_local_cache.h:
+        * libpas/src/libpas/pas_utils.c:
+        * libpas/src/libpas/pas_utils.h:
+        (pas_assertion_failed):
+        (pas_assertion_failed_noreturn_silencer):
+
 2021-12-06  Yusuke Suzuki  <ysuz...@apple.com>
 
         [libpas] Set pthread_setspecific with marker in TLS destructor to detect TLS is destroyed

Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_lock.c (286586 => 286587)


--- trunk/Source/bmalloc/libpas/src/libpas/pas_lock.c	2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_lock.c	2021-12-07 05:52:22 UTC (rev 286587)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Apple Inc. All rights reserved.
+ * Copyright (c) 2020-2021 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -31,7 +31,7 @@
 
 #if PAS_USE_SPINLOCKS
 
-void pas_lock_lock_slow(pas_lock* lock)
+PAS_NEVER_INLINE void pas_lock_lock_slow(pas_lock* lock)
 {
     static const size_t a_lot = 256;
 

Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_lock.h (286586 => 286587)


--- trunk/Source/bmalloc/libpas/src/libpas/pas_lock.h	2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_lock.h	2021-12-07 05:52:22 UTC (rev 286587)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Apple Inc. All rights reserved.
+ * Copyright (c) 2018-2021 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -78,7 +78,7 @@
                           code path. */
 }
 
-PAS_API void pas_lock_lock_slow(pas_lock* lock);
+PAS_API PAS_NEVER_INLINE void pas_lock_lock_slow(pas_lock* lock);
 
 static inline void pas_lock_lock(pas_lock* lock)
 {

Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_monotonic_time.c (286586 => 286587)


--- trunk/Source/bmalloc/libpas/src/libpas/pas_monotonic_time.c	2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_monotonic_time.c	2021-12-07 05:52:22 UTC (rev 286587)
@@ -36,21 +36,25 @@
 static mach_timebase_info_data_t timebase_info;
 static mach_timebase_info_data_t* timebase_info_ptr;
 
+static PAS_NEVER_INLINE mach_timebase_info_data_t* get_timebase_info_slow(void)
+{
+    kern_return_t kern_return;
+    kern_return = mach_timebase_info(&timebase_info);
+    PAS_ASSERT(kern_return == KERN_SUCCESS);
+    pas_fence();
+    timebase_info_ptr = &timebase_info;
+    return &timebase_info;
+}
+
 static mach_timebase_info_data_t* get_timebase_info(void)
 {
     mach_timebase_info_data_t* result;
 
     result = timebase_info_ptr;
-    if (!result) {
-        kern_return_t kern_return;
-        kern_return = mach_timebase_info(&timebase_info);
-        PAS_ASSERT(kern_return == KERN_SUCCESS);
-        pas_fence();
-        timebase_info_ptr = &timebase_info;
-        result = &timebase_info;
-    }
+    if (PAS_LIKELY(result))
+        return result;
 
-    return result;
+    return get_timebase_info_slow();
 }
 
 uint64_t pas_get_current_monotonic_time_nanoseconds(void)

Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.c (286586 => 286587)


--- trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.c	2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.c	2021-12-07 05:52:22 UTC (rev 286587)
@@ -793,9 +793,10 @@
     return result;
 }
 
-void pas_thread_local_cache_append_deallocation_slow(pas_thread_local_cache* thread_local_cache,
-                                                     uintptr_t begin,
-                                                     pas_segregated_page_config_kind_and_role kind_and_role)
+PAS_NEVER_INLINE void pas_thread_local_cache_append_deallocation_slow(
+    pas_thread_local_cache* thread_local_cache,
+    uintptr_t begin,
+    pas_segregated_page_config_kind_and_role kind_and_role)
 {
     unsigned index;
 

Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.h (286586 => 286587)


--- trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.h	2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.h	2021-12-07 05:52:22 UTC (rev 286587)
@@ -334,7 +334,7 @@
                                             pas_deallocator_scavenge_action deallocator_action,
                                             pas_lock_hold_mode heap_lock_hold_mode);
 
-PAS_API void pas_thread_local_cache_append_deallocation_slow(
+PAS_API PAS_NEVER_INLINE void pas_thread_local_cache_append_deallocation_slow(
     pas_thread_local_cache* thread_local_cache,
     uintptr_t begin,
     pas_segregated_page_config_kind_and_role kind_and_role);

Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_utils.c (286586 => 286587)


--- trunk/Source/bmalloc/libpas/src/libpas/pas_utils.c	2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_utils.c	2021-12-07 05:52:22 UTC (rev 286587)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Apple Inc. All rights reserved.
+ * Copyright (c) 2018-2021 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -48,10 +48,12 @@
     __builtin_trap();
 }
 
+#if PAS_ENABLE_TESTING
 void pas_assertion_failed(const char* filename, int line, const char* function, const char* _expression_)
 {
     pas_panic("%s:%d: %s: assertion %s failed.\n", filename, line, function, _expression_);
 }
+#endif /* PAS_ENABLE_TESTING */
 
 static void (*deallocation_did_fail_callback)(const char* reason, void* begin);
 

Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_utils.h (286586 => 286587)


--- trunk/Source/bmalloc/libpas/src/libpas/pas_utils.h	2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_utils.h	2021-12-07 05:52:22 UTC (rev 286587)
@@ -106,10 +106,23 @@
                                                                       size_t old_size,
                                                                       size_t new_size);
 
-PAS_API PAS_NO_RETURN void pas_assertion_failed(const char* filename, int line, const char* function, const char* _expression_);
+#if PAS_ENABLE_TESTING
+PAS_API PAS_NO_RETURN void pas_assertion_failed(
+    const char* filename, int line, const char* function, const char* _expression_);
+#else /* PAS_ENABLE_TESTING -> so !PAS_ENABLE_TESTING */
+static PAS_ALWAYS_INLINE PAS_NO_RETURN void pas_assertion_failed(
+    const char* filename, int line, const char* function, const char* _expression_)
+{
+    PAS_UNUSED_PARAM(filename);
+    PAS_UNUSED_PARAM(line);
+    PAS_UNUSED_PARAM(function);
+    PAS_UNUSED_PARAM(_expression_);
+    __builtin_trap();
+}
+#endif /* PAS_ENABLE_TESTING -> so end of !PAS_ENABLE_TESTING */
 
 PAS_IGNORE_WARNINGS_BEGIN("missing-noreturn")
-static inline void pas_assertion_failed_noreturn_silencer(
+static PAS_ALWAYS_INLINE void pas_assertion_failed_noreturn_silencer(
     const char* filename, int line, const char* function, const char* _expression_)
 {
     pas_assertion_failed(filename, line, function, _expression_);
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to