Running the Cyberpunk 2077 benchmark we can observe that the lookup helper
is relatively hot, but the 97% of the calls are for a single object. (~3%
for two points, and never more than three points. While a more trivial
workload like vkmark under Plasma is even more skewed to single point
lookups.)

Therefore lets add a fast path to bypass the kmalloc_array/kfree and use a
pre-allocated stack array for those cases.

Signed-off-by: Tvrtko Ursulin <[email protected]>
Reviewed-by: MaĆ­ra Canal <[email protected]> # v2
---
v2:
 * Added comments describing how the fast path arrays were sized.
 * Make container freeing criteria clearer by using a boolean.

v3:
 * Rebased to be standalone.
---
 drivers/gpu/drm/drm_syncobj.c | 92 +++++++++++++++++++++++++++--------
 1 file changed, 73 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 99aada85865d..ba64119a664c 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1310,21 +1310,33 @@ static int drm_syncobj_array_wait(struct drm_device 
*dev,
 static int drm_syncobj_array_find(struct drm_file *file_private,
                                  void __user *user_handles,
                                  uint32_t count_handles,
+                                 uint32_t *stack_handles,
+                                 struct drm_syncobj **stack_syncobjs,
+                                 u32 stack_count,
                                  struct drm_syncobj ***syncobjs_out)
 {
        uint32_t i, *handles;
        struct drm_syncobj **syncobjs;
        int ret;
 
-       handles = memdup_array_user(user_handles, count_handles,
-                                   sizeof(*handles));
-       if (IS_ERR(handles))
-               return PTR_ERR(handles);
+       if (count_handles > stack_count) {
+               handles = memdup_array_user(user_handles, count_handles,
+                                           sizeof(*handles));
+               if (IS_ERR(handles))
+                       return PTR_ERR(handles);
 
-       syncobjs = kmalloc_array(count_handles, sizeof(*syncobjs), GFP_KERNEL);
-       if (syncobjs == NULL) {
-               ret = -ENOMEM;
-               goto err_free_handles;
+               syncobjs = kmalloc_array(count_handles, sizeof(*syncobjs),
+                                        GFP_KERNEL);
+               if (!syncobjs) {
+                       ret = -ENOMEM;
+                       goto err_free_handles;
+               }
+       } else {
+               handles = stack_handles;
+               syncobjs = stack_syncobjs;
+               if (copy_from_user(handles, user_handles,
+                                  count_handles * sizeof(*handles)))
+                       return -EFAULT;
        }
 
        for (i = 0; i < count_handles; i++) {
@@ -1335,34 +1347,42 @@ static int drm_syncobj_array_find(struct drm_file 
*file_private,
                }
        }
 
-       kfree(handles);
+       if (handles != stack_handles)
+               kfree(handles);
        *syncobjs_out = syncobjs;
        return 0;
 
 err_put_syncobjs:
        while (i-- > 0)
                drm_syncobj_put(syncobjs[i]);
-       kfree(syncobjs);
+       if (syncobjs != stack_syncobjs)
+               kfree(syncobjs);
 err_free_handles:
-       kfree(handles);
+       if (handles != stack_handles)
+               kfree(handles);
 
        return ret;
 }
 
 static void drm_syncobj_array_free(struct drm_syncobj **syncobjs,
-                                  uint32_t count)
+                                  uint32_t count,
+                                  bool free_container)
 {
        uint32_t i;
 
        for (i = 0; i < count; i++)
                drm_syncobj_put(syncobjs[i]);
-       kfree(syncobjs);
+
+       if (free_container)
+               kfree(syncobjs);
 }
 
 int
 drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
                       struct drm_file *file_private)
 {
+       struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
+       uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
        struct drm_syncobj_wait *args = data;
        struct drm_syncobj **syncobjs;
        unsigned int possible_flags;
@@ -1385,6 +1405,9 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
        ret = drm_syncobj_array_find(file_private,
                                     u64_to_user_ptr(args->handles),
                                     args->count_handles,
+                                    stack_handles,
+                                    stack_syncobjs,
+                                    ARRAY_SIZE(stack_syncobjs),
                                     &syncobjs);
        if (ret < 0)
                return ret;
@@ -1397,7 +1420,8 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
        ret = drm_syncobj_array_wait(dev, file_private,
                                     args, NULL, syncobjs, false, tp);
 
-       drm_syncobj_array_free(syncobjs, args->count_handles);
+       drm_syncobj_array_free(syncobjs, args->count_handles,
+                              syncobjs != stack_syncobjs);
 
        return ret;
 }
@@ -1406,6 +1430,8 @@ int
 drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *file_private)
 {
+       struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
+       uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
        struct drm_syncobj_timeline_wait *args = data;
        struct drm_syncobj **syncobjs;
        unsigned int possible_flags;
@@ -1429,6 +1455,9 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, 
void *data,
        ret = drm_syncobj_array_find(file_private,
                                     u64_to_user_ptr(args->handles),
                                     args->count_handles,
+                                    stack_handles,
+                                    stack_syncobjs,
+                                    ARRAY_SIZE(stack_syncobjs),
                                     &syncobjs);
        if (ret < 0)
                return ret;
@@ -1441,7 +1470,8 @@ drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, 
void *data,
        ret = drm_syncobj_array_wait(dev, file_private,
                                     NULL, args, syncobjs, true, tp);
 
-       drm_syncobj_array_free(syncobjs, args->count_handles);
+       drm_syncobj_array_free(syncobjs, args->count_handles,
+                              syncobjs != stack_syncobjs);
 
        return ret;
 }
@@ -1553,6 +1583,8 @@ int
 drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
                        struct drm_file *file_private)
 {
+       struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
+       uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
        struct drm_syncobj_array *args = data;
        struct drm_syncobj **syncobjs;
        uint32_t i;
@@ -1570,6 +1602,9 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void 
*data,
        ret = drm_syncobj_array_find(file_private,
                                     u64_to_user_ptr(args->handles),
                                     args->count_handles,
+                                    stack_handles,
+                                    stack_syncobjs,
+                                    ARRAY_SIZE(stack_syncobjs),
                                     &syncobjs);
        if (ret < 0)
                return ret;
@@ -1577,7 +1612,8 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void 
*data,
        for (i = 0; i < args->count_handles; i++)
                drm_syncobj_replace_fence(syncobjs[i], NULL);
 
-       drm_syncobj_array_free(syncobjs, args->count_handles);
+       drm_syncobj_array_free(syncobjs, args->count_handles,
+                              syncobjs != stack_syncobjs);
 
        return 0;
 }
@@ -1586,6 +1622,8 @@ int
 drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
                         struct drm_file *file_private)
 {
+       struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
+       uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
        struct drm_syncobj_array *args = data;
        struct drm_syncobj **syncobjs;
        uint32_t i;
@@ -1603,6 +1641,9 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
        ret = drm_syncobj_array_find(file_private,
                                     u64_to_user_ptr(args->handles),
                                     args->count_handles,
+                                    stack_handles,
+                                    stack_syncobjs,
+                                    ARRAY_SIZE(stack_syncobjs),
                                     &syncobjs);
        if (ret < 0)
                return ret;
@@ -1613,7 +1654,8 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void 
*data,
                        break;
        }
 
-       drm_syncobj_array_free(syncobjs, args->count_handles);
+       drm_syncobj_array_free(syncobjs, args->count_handles,
+                              syncobjs != stack_syncobjs);
 
        return ret;
 }
@@ -1622,6 +1664,8 @@ int
 drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
                                  struct drm_file *file_private)
 {
+       struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
+       uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
        struct drm_syncobj_timeline_array *args = data;
        struct drm_syncobj **syncobjs;
        struct dma_fence_chain **chains;
@@ -1641,6 +1685,9 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, 
void *data,
        ret = drm_syncobj_array_find(file_private,
                                     u64_to_user_ptr(args->handles),
                                     args->count_handles,
+                                    stack_handles,
+                                    stack_syncobjs,
+                                    ARRAY_SIZE(stack_syncobjs),
                                     &syncobjs);
        if (ret < 0)
                return ret;
@@ -1686,7 +1733,8 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, 
void *data,
 err_points:
        kfree(points);
 out:
-       drm_syncobj_array_free(syncobjs, args->count_handles);
+       drm_syncobj_array_free(syncobjs, args->count_handles,
+                              syncobjs != stack_syncobjs);
 
        return ret;
 }
@@ -1694,6 +1742,8 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, 
void *data,
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
                            struct drm_file *file_private)
 {
+       struct drm_syncobj *stack_syncobjs[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
+       uint32_t stack_handles[DRM_SYNCOBJ_FAST_PATH_ENTRIES];
        struct drm_syncobj_timeline_array *args = data;
        struct drm_syncobj **syncobjs;
        uint64_t __user *points = u64_to_user_ptr(args->points);
@@ -1712,6 +1762,9 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void 
*data,
        ret = drm_syncobj_array_find(file_private,
                                     u64_to_user_ptr(args->handles),
                                     args->count_handles,
+                                    stack_handles,
+                                    stack_syncobjs,
+                                    ARRAY_SIZE(stack_syncobjs),
                                     &syncobjs);
        if (ret < 0)
                return ret;
@@ -1755,7 +1808,8 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void 
*data,
                if (ret)
                        break;
        }
-       drm_syncobj_array_free(syncobjs, args->count_handles);
+       drm_syncobj_array_free(syncobjs, args->count_handles,
+                              syncobjs != stack_syncobjs);
 
        return ret;
 }
-- 
2.48.0

Reply via email to