Running the Cyberpunk 2077 benchmark we can observe that the lookup helper is relatively hot, but the 97% of the calls are for a single object. (~3% for two points, and never more than three points. While a more trivial workload like vkmark under Plasma is even more skewed to single point lookups.)
Therefore lets add a fast path to bypass the kmalloc_array/kfree and use a pre-allocated stack array for those cases. Signed-off-by: Tvrtko Ursulin <[email protected]> Reviewed-by: MaĆra Canal <[email protected]> # v2 --- v2: * Added comments describing how the fast path arrays were sized. * Make container freeing criteria clearer by using a boolean. v3: * Rebased to be standalone. --- drivers/gpu/drm/drm_syncobj.c | 92 +++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 99aada85865d..ba64119a664c 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1310,21 +1310,33 @@ static int drm_syncobj_array_wait(struct drm_device *dev, static int drm_syncobj_array_find(struct drm_file *file_private, void __user *user_handles, uint32_t count_handles, + uint32_t *stack_handles, + struct drm_syncobj **stack_syncobjs, + u32 stack_count, struct drm_syncobj ***syncobjs_out) { uint32_t i, *handles; struct drm_syncobj **syncobjs; int ret; - handles = memdup_array_user(user_handles, count_handles, - sizeof(*handles)); - if (IS_ERR(handles)) - return PTR_ERR(handles); + if (count_handles > stack_count) { + handles = memdup_array_user(user_handles, count_handles, + sizeof(*handles)); + if (IS_ERR(handles)) + return PTR_ERR(handles); - syncobjs = kmalloc_array(count_handles, sizeof(*syncobjs), GFP_KERNEL); - if (syncobjs == NULL) { - ret = -ENOMEM; - goto err_free_handles; + syncobjs = kmalloc_array(count_handles, sizeof(*syncobjs), + GFP_KERNEL); + if (!syncobjs) { + ret = -ENOMEM; + goto err_free_handles; + } + } else { + handles = stack_handles; + syncobjs = stack_syncobjs; + if (copy_from_user(handles, user_handles, + count_handles * sizeof(*handles))) + return -EFAULT; } for (i = 0; i < count_handles; i++) { @@ -1335,34 +1347,42 @@ static int drm_syncobj_array_find(struct drm_file *file_private, } } - kfree(handles); + if (handles != stack_handles) + kfree(handles); *syncobjs_out = syncobjs; return 0; err_put_syncobjs: while (i-- > 0) drm_syncobj_put(syncobjs[i]); - kfree(syncobjs); + if (syncobjs != stack_syncobjs) + kfree(syncobjs); err_free_handles: - kfree(handles); + if (handles != stack_handles) + kfree(handles); return ret; } static void drm_syncobj_array_free(struct drm_syncobj **syncobjs, - uint32_t count) + uint32_t count, + bool free_container) { uint32_t i; for (i = 0; i < count; i++) drm_syncobj_put(syncobjs[i]); - kfree(syncobjs); + + if (free_container) + kfree(syncobjs); } int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) { + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; + uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; struct drm_syncobj_wait *args = data; struct drm_syncobj **syncobjs; unsigned int possible_flags; @@ -1385,6 +1405,9 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), args->count_handles, + stack_handles, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret; @@ -1397,7 +1420,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, ret = drm_syncobj_array_wait(dev, file_private, args, NULL, syncobjs, false, tp); - drm_syncobj_array_free(syncobjs, args->count_handles); + drm_syncobj_array_free(syncobjs, args->count_handles, + syncobjs != stack_syncobjs); return ret; } @@ -1406,6 +1430,8 @@ int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) { + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; + uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; struct drm_syncobj_timeline_wait *args = data; struct drm_syncobj **syncobjs; unsigned int possible_flags; @@ -1429,6 +1455,9 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), args->count_handles, + stack_handles, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret; @@ -1441,7 +1470,8 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, ret = drm_syncobj_array_wait(dev, file_private, NULL, args, syncobjs, true, tp); - drm_syncobj_array_free(syncobjs, args->count_handles); + drm_syncobj_array_free(syncobjs, args->count_handles, + syncobjs != stack_syncobjs); return ret; } @@ -1553,6 +1583,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) { + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; + uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; struct drm_syncobj_array *args = data; struct drm_syncobj **syncobjs; uint32_t i; @@ -1570,6 +1602,9 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), args->count_handles, + stack_handles, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret; @@ -1577,7 +1612,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, for (i = 0; i < args->count_handles; i++) drm_syncobj_replace_fence(syncobjs[i], NULL); - drm_syncobj_array_free(syncobjs, args->count_handles); + drm_syncobj_array_free(syncobjs, args->count_handles, + syncobjs != stack_syncobjs); return 0; } @@ -1586,6 +1622,8 @@ int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) { + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; + uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; struct drm_syncobj_array *args = data; struct drm_syncobj **syncobjs; uint32_t i; @@ -1603,6 +1641,9 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), args->count_handles, + stack_handles, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret; @@ -1613,7 +1654,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, break; } - drm_syncobj_array_free(syncobjs, args->count_handles); + drm_syncobj_array_free(syncobjs, args->count_handles, + syncobjs != stack_syncobjs); return ret; } @@ -1622,6 +1664,8 @@ int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) { + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; + uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; struct drm_syncobj_timeline_array *args = data; struct drm_syncobj **syncobjs; struct dma_fence_chain **chains; @@ -1641,6 +1685,9 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), args->count_handles, + stack_handles, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret; @@ -1686,7 +1733,8 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, err_points: kfree(points); out: - drm_syncobj_array_free(syncobjs, args->count_handles); + drm_syncobj_array_free(syncobjs, args->count_handles, + syncobjs != stack_syncobjs); return ret; } @@ -1694,6 +1742,8 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) { + struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; + uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES]; struct drm_syncobj_timeline_array *args = data; struct drm_syncobj **syncobjs; uint64_t __user *points = u64_to_user_ptr(args->points); @@ -1712,6 +1762,9 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, ret = drm_syncobj_array_find(file_private, u64_to_user_ptr(args->handles), args->count_handles, + stack_handles, + stack_syncobjs, + ARRAY_SIZE(stack_syncobjs), &syncobjs); if (ret < 0) return ret; @@ -1755,7 +1808,8 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, if (ret) break; } - drm_syncobj_array_free(syncobjs, args->count_handles); + drm_syncobj_array_free(syncobjs, args->count_handles, + syncobjs != stack_syncobjs); return ret; } -- 2.48.0
