calc_sizes() updates region_size[] with the extent of weight[0] and
scale[0] within their hardware regions, but omits weight[1..3] and
scale[1]. Since no NPU_SET_WEIGHT1_REGION, NPU_SET_WEIGHT2_REGION,
NPU_SET_WEIGHT3_REGION, or NPU_SET_SCALE1_REGION commands exist in the
command set, secondary buffers implicitly share the same hardware region
as weight[0] and scale[0] respectively.

The omission means region_size[] reflects only the primary buffer extent.
If a secondary weight or scale buffer extends beyond the primary one,
region_size[] may underestimate the required GEM buffer size.

Fix by extending the region_size[] update in calc_sizes() to cover
weight[1..3] and scale[1], skipping entries that still hold the
U64_MAX/U32_MAX sentinel values written by cmd_state_init().

Fixes: 5a5e9c0228e6 ("accel: Add Arm Ethos-U NPU driver")
Cc: [email protected]
Suggested-by: Rob Herring <[email protected]>
Signed-off-by: Muhammad Bilal <[email protected]>
---
 drivers/accel/ethosu/ethosu_gem.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/accel/ethosu/ethosu_gem.c 
b/drivers/accel/ethosu/ethosu_gem.c
index 7994e7073..152733b6a 100644
--- a/drivers/accel/ethosu/ethosu_gem.c
+++ b/drivers/accel/ethosu/ethosu_gem.c
@@ -271,6 +271,8 @@ static int calc_sizes(struct drm_device *ddev,
        }
 
        if (weight) {
+               int i;
+
                dev_dbg(ddev->dev, "op %d: W:%d:0x%llx-0x%llx\n",
                        op, st->weight[0].region, st->weight[0].base,
                        st->weight[0].base + st->weight[0].length - 1);
@@ -280,6 +282,14 @@ static int calc_sizes(struct drm_device *ddev,
                info->region_size[st->weight[0].region] =
                        max(info->region_size[st->weight[0].region],
                            st->weight[0].base + st->weight[0].length);
+               for (i = 1; i < ARRAY_SIZE(st->weight); i++) {
+                       if (st->weight[i].base == U64_MAX ||
+                           st->weight[i].length == U32_MAX)
+                               continue;
+                       info->region_size[st->weight[0].region] =
+                               max(info->region_size[st->weight[0].region],
+                                   st->weight[i].base + st->weight[i].length);
+               }
        }
 
        if (scale) {
@@ -292,6 +302,11 @@ static int calc_sizes(struct drm_device *ddev,
                info->region_size[st->scale[0].region] =
                        max(info->region_size[st->scale[0].region],
                            st->scale[0].base + st->scale[0].length);
+               if (st->scale[1].base != U64_MAX &&
+                   st->scale[1].length != U32_MAX)
+                       info->region_size[st->scale[0].region] =
+                               max(info->region_size[st->scale[0].region],
+                                   st->scale[1].base + st->scale[1].length);
        }
 
        len = feat_matrix_length(info, &st->ofm, st->ofm.width,
-- 
2.54.0

Reply via email to