- Revision
- 286587
- Author
- fpi...@apple.com
- Date
- 2021-12-06 21:52:22 -0800 (Mon, 06 Dec 2021)
Log Message
[libpas] Clean up what the machine code looks like under LTO
https://bugs.webkit.org/show_bug.cgi?id=233909
Reviewed by Yusuke Suzuki.
During the very painful perf burndown of libpas that got it to be a progression (rather than a
regression) versus bmalloc, I found that certain key fast paths - like fastMalloc and fastFree - perform
better when they do not have a stack frame. For this to happen, they must only make tail calls.
Sadly, LTO was inlining a slow path into fastFree (i.e. pas_deallocate), and this slow path had an
assertion, and the call to pas_assertion_failed was not a tail call.
This fixes the problem comprehensively:
- We now compile pas_assertion_failed as a breakpoint, just like WebKit would do. This is better for
code size and maybe it could sometimes be better for performance.
- The slow path that was getting inlined is now marked PAS_NEVER_INLINE.
- Inspecting the machine code, I found a couple other slow paths that were being inlined in a bunch of
places, and also marked them NEVER_INLINE.
If my past experiences are right, this could be a tiny speed-up on malloc-heavy workloads.
* libpas/src/libpas/pas_lock.c:
(pas_lock_lock_slow):
* libpas/src/libpas/pas_lock.h:
* libpas/src/libpas/pas_monotonic_time.c:
(get_timebase_info_slow):
(get_timebase_info):
* libpas/src/libpas/pas_thread_local_cache.c:
(pas_thread_local_cache_append_deallocation_slow):
* libpas/src/libpas/pas_thread_local_cache.h:
* libpas/src/libpas/pas_utils.c:
* libpas/src/libpas/pas_utils.h:
(pas_assertion_failed):
(pas_assertion_failed_noreturn_silencer):
Modified Paths
Diff
Modified: trunk/Source/bmalloc/ChangeLog (286586 => 286587)
--- trunk/Source/bmalloc/ChangeLog 2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/ChangeLog 2021-12-07 05:52:22 UTC (rev 286587)
@@ -1,3 +1,43 @@
+2021-12-06 Filip Pizlo <fpi...@apple.com>
+
+ [libpas] Clean up what the machine code looks like under LTO
+ https://bugs.webkit.org/show_bug.cgi?id=233909
+
+ Reviewed by Yusuke Suzuki.
+
+ During the very painful perf burndown of libpas that got it to be a progression (rather than a
+ regression) versus bmalloc, I found that certain key fast paths - like fastMalloc and fastFree - perform
+ better when they do not have a stack frame. For this to happen, they must only make tail calls.
+
+ Sadly, LTO was inlining a slow path into fastFree (i.e. pas_deallocate), and this slow path had an
+ assertion, and the call to pas_assertion_failed was not a tail call.
+
+ This fixes the problem comprehensively:
+
+ - We now compile pas_assertion_failed as a breakpoint, just like WebKit would do. This is better for
+ code size and maybe it could sometimes be better for performance.
+
+ - The slow path that was getting inlined is now marked PAS_NEVER_INLINE.
+
+ - Inspecting the machine code, I found a couple other slow paths that were being inlined in a bunch of
+ places, and also marked them NEVER_INLINE.
+
+ If my past experiences are right, this could be a tiny speed-up on malloc-heavy workloads.
+
+ * libpas/src/libpas/pas_lock.c:
+ (pas_lock_lock_slow):
+ * libpas/src/libpas/pas_lock.h:
+ * libpas/src/libpas/pas_monotonic_time.c:
+ (get_timebase_info_slow):
+ (get_timebase_info):
+ * libpas/src/libpas/pas_thread_local_cache.c:
+ (pas_thread_local_cache_append_deallocation_slow):
+ * libpas/src/libpas/pas_thread_local_cache.h:
+ * libpas/src/libpas/pas_utils.c:
+ * libpas/src/libpas/pas_utils.h:
+ (pas_assertion_failed):
+ (pas_assertion_failed_noreturn_silencer):
+
2021-12-06 Yusuke Suzuki <ysuz...@apple.com>
[libpas] Set pthread_setspecific with marker in TLS destructor to detect TLS is destroyed
Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_lock.c (286586 => 286587)
--- trunk/Source/bmalloc/libpas/src/libpas/pas_lock.c 2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_lock.c 2021-12-07 05:52:22 UTC (rev 286587)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Apple Inc. All rights reserved.
+ * Copyright (c) 2020-2021 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -31,7 +31,7 @@
#if PAS_USE_SPINLOCKS
-void pas_lock_lock_slow(pas_lock* lock)
+PAS_NEVER_INLINE void pas_lock_lock_slow(pas_lock* lock)
{
static const size_t a_lot = 256;
Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_lock.h (286586 => 286587)
--- trunk/Source/bmalloc/libpas/src/libpas/pas_lock.h 2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_lock.h 2021-12-07 05:52:22 UTC (rev 286587)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Apple Inc. All rights reserved.
+ * Copyright (c) 2018-2021 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -78,7 +78,7 @@
code path. */
}
-PAS_API void pas_lock_lock_slow(pas_lock* lock);
+PAS_API PAS_NEVER_INLINE void pas_lock_lock_slow(pas_lock* lock);
static inline void pas_lock_lock(pas_lock* lock)
{
Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_monotonic_time.c (286586 => 286587)
--- trunk/Source/bmalloc/libpas/src/libpas/pas_monotonic_time.c 2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_monotonic_time.c 2021-12-07 05:52:22 UTC (rev 286587)
@@ -36,21 +36,25 @@
static mach_timebase_info_data_t timebase_info;
static mach_timebase_info_data_t* timebase_info_ptr;
+static PAS_NEVER_INLINE mach_timebase_info_data_t* get_timebase_info_slow(void)
+{
+ kern_return_t kern_return;
+ kern_return = mach_timebase_info(&timebase_info);
+ PAS_ASSERT(kern_return == KERN_SUCCESS);
+ pas_fence();
+ timebase_info_ptr = &timebase_info;
+ return &timebase_info;
+}
+
static mach_timebase_info_data_t* get_timebase_info(void)
{
mach_timebase_info_data_t* result;
result = timebase_info_ptr;
- if (!result) {
- kern_return_t kern_return;
- kern_return = mach_timebase_info(&timebase_info);
- PAS_ASSERT(kern_return == KERN_SUCCESS);
- pas_fence();
- timebase_info_ptr = &timebase_info;
- result = &timebase_info;
- }
+ if (PAS_LIKELY(result))
+ return result;
- return result;
+ return get_timebase_info_slow();
}
uint64_t pas_get_current_monotonic_time_nanoseconds(void)
Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.c (286586 => 286587)
--- trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.c 2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.c 2021-12-07 05:52:22 UTC (rev 286587)
@@ -793,9 +793,10 @@
return result;
}
-void pas_thread_local_cache_append_deallocation_slow(pas_thread_local_cache* thread_local_cache,
- uintptr_t begin,
- pas_segregated_page_config_kind_and_role kind_and_role)
+PAS_NEVER_INLINE void pas_thread_local_cache_append_deallocation_slow(
+ pas_thread_local_cache* thread_local_cache,
+ uintptr_t begin,
+ pas_segregated_page_config_kind_and_role kind_and_role)
{
unsigned index;
Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.h (286586 => 286587)
--- trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.h 2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.h 2021-12-07 05:52:22 UTC (rev 286587)
@@ -334,7 +334,7 @@
pas_deallocator_scavenge_action deallocator_action,
pas_lock_hold_mode heap_lock_hold_mode);
-PAS_API void pas_thread_local_cache_append_deallocation_slow(
+PAS_API PAS_NEVER_INLINE void pas_thread_local_cache_append_deallocation_slow(
pas_thread_local_cache* thread_local_cache,
uintptr_t begin,
pas_segregated_page_config_kind_and_role kind_and_role);
Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_utils.c (286586 => 286587)
--- trunk/Source/bmalloc/libpas/src/libpas/pas_utils.c 2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_utils.c 2021-12-07 05:52:22 UTC (rev 286587)
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Apple Inc. All rights reserved.
+ * Copyright (c) 2018-2021 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -48,10 +48,12 @@
__builtin_trap();
}
+#if PAS_ENABLE_TESTING
void pas_assertion_failed(const char* filename, int line, const char* function, const char* _expression_)
{
pas_panic("%s:%d: %s: assertion %s failed.\n", filename, line, function, _expression_);
}
+#endif /* PAS_ENABLE_TESTING */
static void (*deallocation_did_fail_callback)(const char* reason, void* begin);
Modified: trunk/Source/bmalloc/libpas/src/libpas/pas_utils.h (286586 => 286587)
--- trunk/Source/bmalloc/libpas/src/libpas/pas_utils.h 2021-12-07 04:12:27 UTC (rev 286586)
+++ trunk/Source/bmalloc/libpas/src/libpas/pas_utils.h 2021-12-07 05:52:22 UTC (rev 286587)
@@ -106,10 +106,23 @@
size_t old_size,
size_t new_size);
-PAS_API PAS_NO_RETURN void pas_assertion_failed(const char* filename, int line, const char* function, const char* _expression_);
+#if PAS_ENABLE_TESTING
+PAS_API PAS_NO_RETURN void pas_assertion_failed(
+ const char* filename, int line, const char* function, const char* _expression_);
+#else /* PAS_ENABLE_TESTING -> so !PAS_ENABLE_TESTING */
+static PAS_ALWAYS_INLINE PAS_NO_RETURN void pas_assertion_failed(
+ const char* filename, int line, const char* function, const char* _expression_)
+{
+ PAS_UNUSED_PARAM(filename);
+ PAS_UNUSED_PARAM(line);
+ PAS_UNUSED_PARAM(function);
+ PAS_UNUSED_PARAM(_expression_);
+ __builtin_trap();
+}
+#endif /* PAS_ENABLE_TESTING -> so end of !PAS_ENABLE_TESTING */
PAS_IGNORE_WARNINGS_BEGIN("missing-noreturn")
-static inline void pas_assertion_failed_noreturn_silencer(
+static PAS_ALWAYS_INLINE void pas_assertion_failed_noreturn_silencer(
const char* filename, int line, const char* function, const char* _expression_)
{
pas_assertion_failed(filename, line, function, _expression_);