[PATCH 11/11] drm/nouveau/dp: Honor GSP link training retry timeouts

2023-12-21 Thread Dave Airlie
From: Lyude Paul 

Turns out that one of the ways that Nvidia's driver handles the pre-LT
timeout for eDP panels is by providing a retry timeout in their link
training callbacks that we're expected to wait for. Up until now we didn't
pay any attention to this parameter.

So, start honoring the timeout if link training fails - and retry up to 3
times. The "3 times" bit comes from OpenRM's link training code.

[airlied: this fixes the panel on one of my laptops]
Signed-off-by: Lyude Paul 
Signed-off-by: Dave Airlie 
---
 .../gpu/drm/nouveau/nvkm/engine/disp/r535.c   | 62 ---
 1 file changed, 40 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
index 1b4f988df7ed..b287ab19a51f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
@@ -957,40 +957,58 @@ r535_dp_train_target(struct nvkm_outp *outp, u8 target, 
bool mst, u8 link_nr, u8
 {
struct nvkm_disp *disp = outp->disp;
NV0073_CTRL_DP_CTRL_PARAMS *ctrl;
-   int ret;
-
-   ctrl = nvkm_gsp_rm_ctrl_get(>rm.objcom, NV0073_CTRL_CMD_DP_CTRL, 
sizeof(*ctrl));
-   if (IS_ERR(ctrl))
-   return PTR_ERR(ctrl);
+   int ret, retries;
+   u32 cmd, data;
 
-   ctrl->subDeviceInstance = 0;
-   ctrl->displayId = BIT(outp->index);
-   ctrl->cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) |
-   NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) |
-   NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES);
-   ctrl->data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) |
-NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) |
-NVVAL(NV0073_CTRL, DP_DATA, TARGET, target);
+   cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) |
+ NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) |
+ NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES);
+   data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) |
+  NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) |
+  NVVAL(NV0073_CTRL, DP_DATA, TARGET, target);
 
if (mst)
-   ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, 
MULTI_STREAM);
+   cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, 
MULTI_STREAM);
 
if (outp->dp.dpcd[DPCD_RC02] & DPCD_RC02_ENHANCED_FRAME_CAP)
-   ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, 
TRUE);
+   cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, TRUE);
 
if (target == 0 &&
 (outp->dp.dpcd[DPCD_RC02] & 0x20) &&
!(outp->dp.dpcd[DPCD_RC03] & DPCD_RC03_TPS4_SUPPORTED))
-   ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, 
YES);
+   cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES);
 
-   ret = nvkm_gsp_rm_ctrl_push(>rm.objcom, , sizeof(*ctrl));
-   if (ret) {
-   nvkm_gsp_rm_ctrl_done(>rm.objcom, ctrl);
-   return ret;
+   /* We should retry up to 3 times, but only if GSP asks politely */
+   for (retries = 0; retries < 3; ++retries) {
+   ctrl = nvkm_gsp_rm_ctrl_get(>rm.objcom, 
NV0073_CTRL_CMD_DP_CTRL,
+   sizeof(*ctrl));
+   if (IS_ERR(ctrl))
+   return PTR_ERR(ctrl);
+
+   ctrl->subDeviceInstance = 0;
+   ctrl->displayId = BIT(outp->index);
+   ctrl->retryTimeMs = 0;
+   ctrl->cmd = cmd;
+   ctrl->data = data;
+
+   ret = nvkm_gsp_rm_ctrl_push(>rm.objcom, , 
sizeof(*ctrl));
+   if (ret == -EAGAIN && ctrl->retryTimeMs) {
+   /* Device (likely an eDP panel) isn't ready yet, wait 
for the time specified
+* by GSP before retrying again */
+   nvkm_debug(>engine.subdev,
+  "Waiting %dms for GSP LT panel delay before 
retrying\n",
+  ctrl->retryTimeMs);
+   msleep(ctrl->retryTimeMs);
+   nvkm_gsp_rm_ctrl_done(>rm.objcom, ctrl);
+   } else {
+   /* GSP didn't say to retry, or we were successful */
+   if (ctrl->err)
+   ret = -EIO;
+   nvkm_gsp_rm_ctrl_done(>rm.objcom, ctrl);
+   break;
+   }
}
 
-   ret = ctrl->err ? -EIO : 0;
-   nvkm_gsp_rm_ctrl_done(>rm.objcom, ctrl);
return ret;
 }
 
-- 
2.43.0



[PATCH 10/11] nouveau: push event block/allowing out of the fence context

2023-12-21 Thread Dave Airlie
There is a deadlock between the irq and fctx locks,
the irq handling takes irq then fctx lock
the fence signalling takes fctx then irq lock

This splits the fence signalling path so the code that hits
the irq lock is done in a separate work queue.

This seems to fix crashes/hangs when using nouveau gsp with
i915 primary GPU.

Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/nouveau/nouveau_fence.c | 28 -
 drivers/gpu/drm/nouveau/nouveau_fence.h |  5 -
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ca762ea55413..5057d976fa57 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence)
if (test_bit(DMA_FENCE_FLAG_USER_BITS, >base.flags)) {
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
 
-   if (!--fctx->notify_ref)
+   if (atomic_dec_and_test(>notify_ref))
drop = 1;
}
 
@@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, 
int error)
 void
 nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
 {
+   cancel_work_sync(>allow_block_work);
nouveau_fence_context_kill(fctx, 0);
nvif_event_dtor(>event);
fctx->dead = 1;
@@ -167,6 +168,18 @@ nouveau_fence_wait_uevent_handler(struct nvif_event 
*event, void *repv, u32 repc
return ret;
 }
 
+static void
+nouveau_fence_work_allow_block(struct work_struct *work)
+{
+   struct nouveau_fence_chan *fctx = container_of(work, struct 
nouveau_fence_chan,
+  allow_block_work);
+
+   if (atomic_read(>notify_ref) == 0)
+   nvif_event_block(>event);
+   else
+   nvif_event_allow(>event);
+}
+
 void
 nouveau_fence_context_new(struct nouveau_channel *chan, struct 
nouveau_fence_chan *fctx)
 {
@@ -178,6 +191,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, 
struct nouveau_fence_cha
} args;
int ret;
 
+   INIT_WORK(>allow_block_work, nouveau_fence_work_allow_block);
INIT_LIST_HEAD(>flip);
INIT_LIST_HEAD(>pending);
spin_lock_init(>lock);
@@ -521,15 +535,19 @@ static bool nouveau_fence_enable_signaling(struct 
dma_fence *f)
struct nouveau_fence *fence = from_fence(f);
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
bool ret;
+   bool do_work;
 
-   if (!fctx->notify_ref++)
-   nvif_event_allow(>event);
+   if (atomic_inc_return(>notify_ref) == 0)
+   do_work = true;
 
ret = nouveau_fence_no_signaling(f);
if (ret)
set_bit(DMA_FENCE_FLAG_USER_BITS, >base.flags);
-   else if (!--fctx->notify_ref)
-   nvif_event_block(>event);
+   else if (atomic_dec_and_test(>notify_ref))
+   do_work = true;
+
+   if (do_work)
+   schedule_work(>allow_block_work);
 
return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h 
b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 64d33ae7f356..28f5cf013b89 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -3,6 +3,7 @@
 #define __NOUVEAU_FENCE_H__
 
 #include 
+#include 
 #include 
 
 struct nouveau_drm;
@@ -45,7 +46,9 @@ struct nouveau_fence_chan {
char name[32];
 
struct nvif_event event;
-   int notify_ref, dead, killed;
+   struct work_struct allow_block_work;
+   atomic_t notify_ref;
+   int dead, killed;
 };
 
 struct nouveau_fence_priv {
-- 
2.43.0



[PATCH 08/11] nouveau/gsp: don't free ctrl messages on errors

2023-12-21 Thread Dave Airlie
It looks like for some messages the upper layers need to get access to the
results of the message so we can interpret it.

Rework the ctrl push interface to not free things and cleanup properly
whereever it errors out.

Requested-by: Lyude
Signed-off-by: Dave Airlie 
---
 .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h |  17 +--
 .../gpu/drm/nouveau/nvkm/engine/disp/r535.c   | 108 +++---
 .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c|  36 +++---
 3 files changed, 100 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h 
b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index 2fa0445d8928..d1437c08645f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -187,7 +187,7 @@ struct nvkm_gsp {
void (*rpc_done)(struct nvkm_gsp *gsp, void *repv);
 
void *(*rm_ctrl_get)(struct nvkm_gsp_object *, u32 cmd, u32 
argc);
-   void *(*rm_ctrl_push)(struct nvkm_gsp_object *, void *argv, u32 
repc);
+   int (*rm_ctrl_push)(struct nvkm_gsp_object *, void **argv, u32 
repc);
void (*rm_ctrl_done)(struct nvkm_gsp_object *, void *repv);
 
void *(*rm_alloc_get)(struct nvkm_gsp_object *, u32 oclass, u32 
argc);
@@ -265,7 +265,7 @@ nvkm_gsp_rm_ctrl_get(struct nvkm_gsp_object *object, u32 
cmd, u32 argc)
return object->client->gsp->rm->rm_ctrl_get(object, cmd, argc);
 }
 
-static inline void *
+static inline int
 nvkm_gsp_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc)
 {
return object->client->gsp->rm->rm_ctrl_push(object, argv, repc);
@@ -275,21 +275,24 @@ static inline void *
 nvkm_gsp_rm_ctrl_rd(struct nvkm_gsp_object *object, u32 cmd, u32 repc)
 {
void *argv = nvkm_gsp_rm_ctrl_get(object, cmd, repc);
+   int ret;
 
if (IS_ERR(argv))
return argv;
 
-   return nvkm_gsp_rm_ctrl_push(object, argv, repc);
+   ret = nvkm_gsp_rm_ctrl_push(object, , repc);
+   if (ret)
+   return ERR_PTR(ret);
+   return argv;
 }
 
 static inline int
 nvkm_gsp_rm_ctrl_wr(struct nvkm_gsp_object *object, void *argv)
 {
-   void *repv = nvkm_gsp_rm_ctrl_push(object, argv, 0);
-
-   if (IS_ERR(repv))
-   return PTR_ERR(repv);
+   int ret = nvkm_gsp_rm_ctrl_push(object, , 0);
 
+   if (ret)
+   return ret;
return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
index 1c8c4cca0957..1b4f988df7ed 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
@@ -282,7 +282,7 @@ r535_sor_bl_get(struct nvkm_ior *sor)
 {
struct nvkm_disp *disp = sor->disp;
NV0073_CTRL_SPECIFIC_BACKLIGHT_BRIGHTNESS_PARAMS *ctrl;
-   int lvl;
+   int ret, lvl;
 
ctrl = nvkm_gsp_rm_ctrl_get(>rm.objcom,

NV0073_CTRL_CMD_SPECIFIC_GET_BACKLIGHT_BRIGHTNESS,
@@ -292,9 +292,11 @@ r535_sor_bl_get(struct nvkm_ior *sor)
 
ctrl->displayId = BIT(sor->asy.outp->index);
 
-   ctrl = nvkm_gsp_rm_ctrl_push(>rm.objcom, ctrl, sizeof(*ctrl));
-   if (IS_ERR(ctrl))
-   return PTR_ERR(ctrl);
+   ret = nvkm_gsp_rm_ctrl_push(>rm.objcom, , sizeof(*ctrl));
+   if (ret) {
+   nvkm_gsp_rm_ctrl_done(>rm.objcom, ctrl);
+   return ret;
+   }
 
lvl = ctrl->brightness;
nvkm_gsp_rm_ctrl_done(>rm.objcom, ctrl);
@@ -649,9 +651,11 @@ r535_conn_new(struct nvkm_disp *disp, u32 id)
ctrl->subDeviceInstance = 0;
ctrl->displayId = BIT(id);
 
-   ctrl = nvkm_gsp_rm_ctrl_push(>rm.objcom, ctrl, sizeof(*ctrl));
-   if (IS_ERR(ctrl))
-   return (void *)ctrl;
+   ret = nvkm_gsp_rm_ctrl_push(>rm.objcom, , sizeof(*ctrl));
+   if (ret) {
+   nvkm_gsp_rm_ctrl_done(>rm.objcom, ctrl);
+   return ERR_PTR(ret);
+   }
 
list_for_each_entry(conn, >conns, head) {
if (conn->index == ctrl->data[0].index) {
@@ -686,7 +690,7 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda)
struct nvkm_disp *disp = outp->disp;
struct nvkm_ior *ior;
NV0073_CTRL_DFP_ASSIGN_SOR_PARAMS *ctrl;
-   int or;
+   int ret, or;
 
ctrl = nvkm_gsp_rm_ctrl_get(>rm.objcom,
NV0073_CTRL_CMD_DFP_ASSIGN_SOR, 
sizeof(*ctrl));
@@ -699,9 +703,11 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda)
if (hda)
ctrl->flags |= NVDEF(NV0073_CTRL, DFP_ASSIGN_SOR_FLAGS, AUDIO, 
OPTIMAL);
 
-   ctrl = nvkm_gsp_rm_ctrl_push(>rm.objcom, ctrl, sizeof(*ctrl));
-   if (IS_ERR(ctrl))
-   return PTR_ERR(ctrl);
+   ret = nvkm_gsp_rm_ctrl_push(>rm.objcom, , sizeof(*ctrl));
+   if (ret) {
+   nvkm_gsp_rm_ctrl_done(>rm.objcom, ctrl);
+   

[PATCH 07/11] nouveau/gsp: convert gsp errors to generic errors

2023-12-21 Thread Dave Airlie
This should let the upper layers retry as needed on EAGAIN.

There may be other values we will care about in the future, but
this covers our present needs.

Signed-off-by: Dave Airlie 
---
 .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c| 26 +++
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 774ca47b019f..54c1fbccc013 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -70,6 +70,20 @@ struct r535_gsp_msg {
 
 #define GSP_MSG_HDR_SIZE offsetof(struct r535_gsp_msg, data)
 
+static int
+r535_rpc_status_to_errno(uint32_t rpc_status)
+{
+   switch (rpc_status) {
+   case 0x55: /* NV_ERR_NOT_READY */
+   case 0x66: /* NV_ERR_TIMEOUT_RETRY */
+  return -EAGAIN;
+   case 0x51: /* NV_ERR_NO_MEMORY */
+   return -ENOMEM;
+   default:
+   return -EINVAL;
+   }
+}
+
 static void *
 r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime)
 {
@@ -584,8 +598,9 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, 
void *argv, u32 repc)
return rpc;
 
if (rpc->status) {
-   nvkm_error(>subdev, "RM_ALLOC: 0x%x\n", rpc->status);
-   ret = ERR_PTR(-EINVAL);
+   ret = ERR_PTR(r535_rpc_status_to_errno(rpc->status));
+   if (ret != -EAGAIN)
+   nvkm_error(>subdev, "RM_ALLOC: 0x%x\n", 
rpc->status);
} else {
ret = repc ? rpc->params : NULL;
}
@@ -639,9 +654,10 @@ r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, 
void *argv, u32 repc)
return rpc;
 
if (rpc->status) {
-   nvkm_error(>subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x 
failed: 0x%08x\n",
-  object->client->object.handle, object->handle, 
rpc->cmd, rpc->status);
-   ret = ERR_PTR(-EINVAL);
+   ret = ERR_PTR(r535_rpc_status_to_errno(rpc->status));
+   if (ret != -EAGAIN)
+   nvkm_error(>subdev, "cli:0x%08x obj:0x%08x ctrl 
cmd:0x%08x failed: 0x%08x\n",
+  object->client->object.handle, 
object->handle, rpc->cmd, rpc->status);
} else {
ret = repc ? rpc->params : NULL;
}
-- 
2.43.0



[PATCH 09/11] nouveau/gsp: always free the alloc messages on r535

2023-12-21 Thread Dave Airlie
Fixes a memory leak seen with kmemleak.

Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index e2810fd1a36f..cafb82826473 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -605,8 +605,7 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, 
void *argv, u32 repc)
ret = repc ? rpc->params : NULL;
}
 
-   if (ret)
-   nvkm_gsp_rpc_done(gsp, rpc);
+   nvkm_gsp_rpc_done(gsp, rpc);
 
return ret;
 }
-- 
2.43.0



[PATCH 04/11] nouveau/gsp: free acpi object after use

2023-12-21 Thread Dave Airlie
This fixes a memory leak for the acpi dod object.

Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 365dda6c002a..1a6d7c89660d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -1226,6 +1226,7 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA 
*dod)
}
 
dod->status = 0;
+   kfree(output.pointer);
 }
 #endif
 
-- 
2.43.0



[PATCH 06/11] drm/nouveau/gsp: Fix ACPI MXDM/MXDS method invocations

2023-12-21 Thread Dave Airlie
From: Lyude Paul 

Currently we get an error from ACPI because both of these arguments expect
a single argument, and we don't provide one. I'm not totally clear on what
that argument does, but we're able to find the missing value from
_acpiCacheMethodData() in src/kernel/platform/acpi_common.c in nvidia's
driver. So, let's add that - which doesn't get eDP displays to power on
quite yet, but gets rid of the argument warning at least.

Signed-off-by: Lyude Paul 
Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 1a6d7c89660d..774ca47b019f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -1150,6 +1150,8 @@ static void
 r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
 MUX_METHOD_DATA_ELEMENT *part)
 {
+   union acpi_object mux_arg = { ACPI_TYPE_INTEGER };
+   struct acpi_object_list input = { 1, _arg };
acpi_handle iter = NULL, handle_mux = NULL;
acpi_status status;
unsigned long long value;
@@ -1172,14 +1174,18 @@ r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, 
MUX_METHOD_DATA_ELEMENT *mode,
if (!handle_mux)
return;
 
-   status = acpi_evaluate_integer(handle_mux, "MXDM", NULL, );
+   /* I -think- 0 means "acquire" according to nvidia's driver source */
+   input.pointer->integer.type = ACPI_TYPE_INTEGER;
+   input.pointer->integer.value = 0;
+
+   status = acpi_evaluate_integer(handle_mux, "MXDM", , );
if (ACPI_SUCCESS(status)) {
mode->acpiId = id;
mode->mode   = value;
mode->status = 0;
}
 
-   status = acpi_evaluate_integer(handle_mux, "MXDS", NULL, );
+   status = acpi_evaluate_integer(handle_mux, "MXDS", , );
if (ACPI_SUCCESS(status)) {
part->acpiId = id;
part->mode   = value;
-- 
2.43.0



[PATCH 03/11] nouveau: fix disp disabling with GSP

2023-12-21 Thread Dave Airlie
This func ptr here is normally static allocation, but gsp r535
uses a dynamic pointer, so we need to handle that better.

This fixes a crash with GSP when you use config=disp=0 to avoid
disp problems.

Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c 
b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
index 457ec5db794d..b24eb1e560bc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
@@ -209,7 +209,7 @@ nvkm_disp_dtor(struct nvkm_engine *engine)
nvkm_head_del();
}
 
-   if (disp->func->dtor)
+   if (disp->func && disp->func->dtor)
disp->func->dtor(disp);
 
return data;
@@ -243,8 +243,10 @@ nvkm_disp_new_(const struct nvkm_disp_func *func, struct 
nvkm_device *device,
spin_lock_init(>client.lock);
 
ret = nvkm_engine_ctor(_disp, device, type, inst, true, 
>engine);
-   if (ret)
+   if (ret) {
+   disp->func = NULL;
return ret;
+   }
 
if (func->super) {
disp->super.wq = create_singlethread_workqueue("nvkm-disp");
-- 
2.43.0



[PATCH 05/11] nouveau/gsp: free userd allocation.

2023-12-21 Thread Dave Airlie
This was being leaked.

Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
index d088e636edc3..b903785056b5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
@@ -242,6 +242,7 @@ r535_chan_id_put(struct nvkm_chan *chan)
nvkm_memory_unref(>mem);
nvkm_chid_put(runl->chid, userd->chid, 
>cgrp->lock);
list_del(>head);
+   kfree(userd);
}
 
break;
-- 
2.43.0



[PATCH 01/11] nouveau/gsp: add three notifier callbacks that we see in normal operation (v2)

2023-12-21 Thread Dave Airlie
Add NULL callbacks for some things GSP calls that we don't handle, but know 
about
so we avoid the logging.

v2: Timur suggested allowing null fn.

Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 44fb86841c05..7f831f41b598 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -298,7 +298,8 @@ r535_gsp_msg_recv(struct nvkm_gsp *gsp, int fn, u32 repc)
struct nvkm_gsp_msgq_ntfy *ntfy = >msgq.ntfy[i];
 
if (ntfy->fn == msg->function) {
-   ntfy->func(ntfy->priv, ntfy->fn, msg->data, msg->length 
- sizeof(*msg));
+   if (ntfy->func)
+   ntfy->func(ntfy->priv, ntfy->fn, msg->data, 
msg->length - sizeof(*msg));
break;
}
}
@@ -2186,7 +2187,9 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp)
r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED,
  r535_gsp_msg_mmu_fault_queued, gsp);
r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_OS_ERROR_LOG, 
r535_gsp_msg_os_error_log, gsp);
-
+   r535_gsp_msg_ntfy_add(gsp, 
NV_VGPU_MSG_EVENT_PERF_BRIDGELESS_INFO_UPDATE, NULL, NULL);
+   r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, NULL, 
NULL);
+   r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_GSP_SEND_USER_SHARED_DATA, 
NULL, NULL);
ret = r535_gsp_rm_boot_ctor(gsp);
if (ret)
return ret;
-- 
2.43.0



[PATCH 02/11] nouveau/gsp: drop some acpi related debug

2023-12-21 Thread Dave Airlie
These were leftover debug, if we need to bring them back do so
for debugging later.

Signed-off-by: Dave Airlie 
---
 drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c | 7 ---
 drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c  | 9 -
 2 files changed, 16 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
index 298035070b3a..1c8c4cca0957 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
@@ -1465,8 +1465,6 @@ r535_disp_oneinit(struct nvkm_disp *disp)
bool nvhg = acpi_check_dsm(handle, 
_DSM_GUID, NVHG_DSM_REV,
   1ULL << 0x0014);
 
-   printk(KERN_ERR "bl: nbci:%d nvhg:%d\n", nbci, 
nvhg);
-
if (nbci || nvhg) {
union acpi_object argv4 = {
.buffer.type= 
ACPI_TYPE_BUFFER,
@@ -1479,9 +1477,6 @@ r535_disp_oneinit(struct nvkm_disp *disp)
if (!obj) {
acpi_handle_info(handle, 
"failed to evaluate _DSM\n");
} else {
-   printk(KERN_ERR "bl: obj type 
%d\n", obj->type);
-   printk(KERN_ERR "bl: obj len 
%d\n", obj->package.count);
-
for (int i = 0; i < 
obj->package.count; i++) {
union acpi_object *elt 
= >package.elements[i];
u32 size;
@@ -1491,12 +1486,10 @@ r535_disp_oneinit(struct nvkm_disp *disp)
else
size = 4;
 
-   printk(KERN_ERR "elt 
%03d: type %d size %d\n", i, elt->type, size);

memcpy(>backLightData[ctrl->backLightDataSize], >integer.value, 
size);
ctrl->backLightDataSize 
+= size;
}
 
-   printk(KERN_ERR "bl: data size 
%d\n", ctrl->backLightDataSize);
ctrl->status = 0;
ACPI_FREE(obj);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c 
b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 7f831f41b598..365dda6c002a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -1100,16 +1100,12 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA 
*caps)
if (!obj)
return;
 
-   printk(KERN_ERR "nvop: obj type %d\n", obj->type);
-   printk(KERN_ERR "nvop: obj len %d\n", obj->buffer.length);
-
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
return;
 
caps->status = 0;
caps->optimusCaps = *(u32 *)obj->buffer.pointer;
-   printk(KERN_ERR "nvop: caps %08x\n", caps->optimusCaps);
 
ACPI_FREE(obj);
 
@@ -1136,9 +1132,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
if (!obj)
return;
 
-   printk(KERN_ERR "jt: obj type %d\n", obj->type);
-   printk(KERN_ERR "jt: obj len %d\n", obj->buffer.length);
-
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
return;
@@ -1147,7 +1140,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
jt->jtCaps = *(u32 *)obj->buffer.pointer;
jt->jtRevId = (jt->jtCaps & 0xfff0) >> 20;
jt->bSBIOSCaps = 0;
-   printk(KERN_ERR "jt: caps %08x rev:%04x\n", jt->jtCaps, jt->jtRevId);
 
ACPI_FREE(obj);
 
@@ -1233,7 +1225,6 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA 
*dod)
dod->acpiIdListLen += sizeof(dod->acpiIdList[0]);
}
 
-   printk(KERN_ERR "_DOD: ok! len:%d\n", dod->acpiIdListLen);
dod->status = 0;
 }
 #endif
-- 
2.43.0



nouveau GSP fixes

2023-12-21 Thread Dave Airlie
This is a collection of nouveau debug prints, memory leak, a very
annoying race condition causing system hangs with prime scenarios,
and a fix from Lyude to get the panel on my laptop working.

I'd like to get these into 6.7,

Dave.