RE: [PATCH] drm/amdgpu: add RAS fatal error handler for NBIO v7.9

2023-08-06 Thread Zhang, Hawking
[AMD Official Use Only - General]

+static int nbio_v7_9_set_ras_err_event_athub_irq_state(struct amdgpu_device 
*adev,
+static int nbio_v7_9_set_ras_controller_irq_state(struct amdgpu_device *adev,

both functions could be left as dummy ones since by default it is vector #1 
selected in bare-metal environment. Only SRIOV PF driver needs to select vector 
#4.

Others look good to me. The patch is

Reviewed-by: Hawking Zhang 

Regards,
Hawking

-Original Message-
From: Zhou1, Tao 
Sent: Monday, August 7, 2023 11:06
To: amd-gfx@lists.freedesktop.org; Yang, Stanley ; Zhang, 
Hawking ; Li, Candice ; Chai, Thomas 

Cc: Zhou1, Tao 
Subject: [PATCH] drm/amdgpu: add RAS fatal error handler for NBIO v7.9

Register RAS fatal error interrupt and add handler.

Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c |   4 +
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c  | 219 
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h  |   1 +
 3 files changed, 224 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 832fa646b38f..bef0f9264b4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -35,6 +35,7 @@
 #include "amdgpu_xgmi.h"
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include "nbio_v4_3.h"
+#include "nbio_v7_9.h"
 #include "atom.h"
 #include "amdgpu_reset.h"

@@ -2663,6 +2664,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 * check DF RAS */
adev->nbio.ras = _v4_3_ras;
break;
+   case IP_VERSION(7, 9, 0):
+   adev->nbio.ras = _v7_9_ras;
+   break;
default:
/* nbio ras is not available */
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index cd1a02d30420..cc2268b871e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -451,3 +451,222 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
.get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
.init_registers = nbio_v7_9_init_registers,  };
+
+static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev,
+   void *ras_error_status)
+{
+   return;
+}
+
+static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct
+amdgpu_device *adev) {
+   uint32_t bif_doorbell_intr_cntl;
+   struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
+   struct ras_err_data err_data = {0, 0, 0, NULL};
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0,
+regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+
+   if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+   BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
+   /* driver has to clear the interrupt status when bif ring is 
disabled */
+   bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+   BIF_BX0_BIF_DOORBELL_INT_CNTL,
+   RAS_CNTLR_INTERRUPT_CLEAR, 1);
+   WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL,
+bif_doorbell_intr_cntl);
+
+   if (!ras->disable_ras_err_cnt_harvest) {
+   /*
+* clear error status after ras_controller_intr
+* according to hw team and count ue number
+* for query
+*/
+   nbio_v7_9_query_ras_error_count(adev, _data);
+
+   /* logging on error cnt and printing for awareness */
+   obj->err_data.ue_count += err_data.ue_count;
+   obj->err_data.ce_count += err_data.ce_count;
+
+   if (err_data.ce_count)
+   dev_info(adev->dev, "%ld correctable hardware "
+   "errors detected in %s block, "
+   "no user action is needed.\n",
+   obj->err_data.ce_count,
+   
get_ras_block_str(adev->nbio.ras_if));
+
+   if (err_data.ue_count)
+   dev_info(adev->dev, "%ld uncorrectable hardware 
"
+   "errors detected in %s block\n",
+   obj->err_data.ue_count,
+   
get_ras_block_str(adev->nbio.ras_if));
+   }
+
+   dev_info(adev->dev, "RAS controller interrupt triggered "
+   "by NBIF error\n");
+
+   /* ras_controller_int is dedicated for nbif ras error,
+  

[PATCH] drm/amdgpu/irq: Move irq resume to the beginning

2023-08-06 Thread Emily Deng
Need to move irq resume to the beginning of reset sriov, or if
one interrupt occurs before irq resume, then the irq won't work anymore.

Signed-off-by: Emily Deng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1338489b0b2f..8b304fdfe6db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4617,6 +4617,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device 
*adev,
r = amdgpu_virt_reset_gpu(adev);
if (r)
return r;
+   amdgpu_irq_gpu_reset_resume_helper(adev);
 
/* some sw clean up VF needs to do before recover */
amdgpu_virt_post_reset(adev);
@@ -4646,7 +4647,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device 
*adev,
amdgpu_put_xgmi_hive(hive);
 
if (!r) {
-   amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
 
amdgpu_amdkfd_post_reset(adev);
-- 
2.36.1



[PATCH] drm/amdgpu: add RAS fatal error handler for NBIO v7.9

2023-08-06 Thread Tao Zhou
Register RAS fatal error interrupt and add handler.

Signed-off-by: Tao Zhou 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c |   4 +
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c  | 219 
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h  |   1 +
 3 files changed, 224 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 832fa646b38f..bef0f9264b4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -35,6 +35,7 @@
 #include "amdgpu_xgmi.h"
 #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
 #include "nbio_v4_3.h"
+#include "nbio_v7_9.h"
 #include "atom.h"
 #include "amdgpu_reset.h"
 
@@ -2663,6 +2664,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 * check DF RAS */
adev->nbio.ras = _v4_3_ras;
break;
+   case IP_VERSION(7, 9, 0):
+   adev->nbio.ras = _v7_9_ras;
+   break;
default:
/* nbio ras is not available */
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c 
b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index cd1a02d30420..cc2268b871e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -451,3 +451,222 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = {
.get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode,
.init_registers = nbio_v7_9_init_registers,
 };
+
+static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev,
+   void *ras_error_status)
+{
+   return;
+}
+
+static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct 
amdgpu_device *adev)
+{
+   uint32_t bif_doorbell_intr_cntl;
+   struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if);
+   struct ras_err_data err_data = {0, 0, 0, NULL};
+   struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+   bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, 
regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+
+   if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+   BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) {
+   /* driver has to clear the interrupt status when bif ring is 
disabled */
+   bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+   BIF_BX0_BIF_DOORBELL_INT_CNTL,
+   RAS_CNTLR_INTERRUPT_CLEAR, 1);
+   WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, 
bif_doorbell_intr_cntl);
+
+   if (!ras->disable_ras_err_cnt_harvest) {
+   /*
+* clear error status after ras_controller_intr
+* according to hw team and count ue number
+* for query
+*/
+   nbio_v7_9_query_ras_error_count(adev, _data);
+
+   /* logging on error cnt and printing for awareness */
+   obj->err_data.ue_count += err_data.ue_count;
+   obj->err_data.ce_count += err_data.ce_count;
+
+   if (err_data.ce_count)
+   dev_info(adev->dev, "%ld correctable hardware "
+   "errors detected in %s block, "
+   "no user action is needed.\n",
+   obj->err_data.ce_count,
+   
get_ras_block_str(adev->nbio.ras_if));
+
+   if (err_data.ue_count)
+   dev_info(adev->dev, "%ld uncorrectable hardware 
"
+   "errors detected in %s block\n",
+   obj->err_data.ue_count,
+   
get_ras_block_str(adev->nbio.ras_if));
+   }
+
+   dev_info(adev->dev, "RAS controller interrupt triggered "
+   "by NBIF error\n");
+
+   /* ras_controller_int is dedicated for nbif ras error,
+* not the global interrupt for sync flood
+*/
+   amdgpu_ras_reset_gpu(adev);
+   }
+}
+
+static void nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring(struct 
amdgpu_device *adev)
+{
+   uint32_t bif_doorbell_intr_cntl;
+
+   bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, 
regBIF_BX0_BIF_DOORBELL_INT_CNTL);
+
+   if (REG_GET_FIELD(bif_doorbell_intr_cntl,
+   BIF_BX0_BIF_DOORBELL_INT_CNTL, 
RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) {
+   /* driver has to clear the interrupt status when bif ring is 
disabled */
+   bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl,
+  

[Patch v2 3/3] drm/mst: adjust the function drm_dp_remove_payload_part2()

2023-08-06 Thread Wayne Lin
[Why]
Now in drm_dp_remove_payload_part2(), it utilizes the time slot number
of the payload in old state to represent the one in the payload table
at the moment.

It would be better to clarify the idea by using the latest allocated
time slot number for the port at the moment instead and which info is
already included in new mst_state. By this, we can also remove redundant
workaround for amdgpu driver.

[How]
Remove "old_payload" input of drm_dp_remove_payload_part2() and get the
latest number of allocated time slot for the port from new mst_state
instead.

Signed-off-by: Wayne Lin 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 70 ---
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 32 ++---
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |  7 +-
 drivers/gpu/drm/nouveau/dispnv50/disp.c   |  6 +-
 include/drm/display/drm_dp_mst_helper.h   |  9 ++-
 5 files changed, 40 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 9ad509279b0a..e852da686c26 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -203,40 +203,6 @@ void dm_helpers_dp_update_branch_info(
const struct dc_link *link)
 {}
 
-static void dm_helpers_construct_old_payload(
-   struct dc_link *link,
-   int pbn_per_slot,
-   struct drm_dp_mst_atomic_payload *new_payload,
-   struct drm_dp_mst_atomic_payload *old_payload)
-{
-   struct link_mst_stream_allocation_table current_link_table =
-   
link->mst_stream_alloc_table;
-   struct link_mst_stream_allocation *dc_alloc;
-   int i;
-
-   *old_payload = *new_payload;
-
-   /* Set correct time_slots/PBN of old payload.
-* other fields (delete & dsc_enabled) in
-* struct drm_dp_mst_atomic_payload are don't care fields
-* while calling drm_dp_remove_payload_part2()
-*/
-   for (i = 0; i < current_link_table.stream_count; i++) {
-   dc_alloc =
-   _link_table.stream_allocations[i];
-
-   if (dc_alloc->vcp_id == new_payload->vcpi) {
-   old_payload->time_slots = dc_alloc->slot_count;
-   old_payload->pbn = dc_alloc->slot_count * pbn_per_slot;
-   break;
-   }
-   }
-
-   /* make sure there is an old payload*/
-   ASSERT(i != current_link_table.stream_count);
-
-}
-
 /*
  * Writes payload allocation table in immediate downstream device.
  */
@@ -248,7 +214,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
 {
struct amdgpu_dm_connector *aconnector;
struct drm_dp_mst_topology_state *mst_state;
-   struct drm_dp_mst_atomic_payload *target_payload, *new_payload, 
old_payload;
+   struct drm_dp_mst_atomic_payload *payload;
struct drm_dp_mst_topology_mgr *mst_mgr;
 
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
@@ -262,27 +228,20 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
 
mst_mgr = >mst_root->mst_mgr;
mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
-   new_payload = drm_atomic_get_mst_payload_state(mst_state, 
aconnector->mst_output_port);
-
-   if (enable) {
-   target_payload = new_payload;
+   payload = drm_atomic_get_mst_payload_state(mst_state, 
aconnector->mst_output_port);
 
+   if (enable)
/* It's OK for this to fail */
-   drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload);
-   } else {
-   /* construct old payload by VCPI*/
-   dm_helpers_construct_old_payload(stream->link, 
mst_state->pbn_div,
-   new_payload, _payload);
-   target_payload = _payload;
+   drm_dp_add_payload_part1(mst_mgr, mst_state, payload);
+   else
 
-   drm_dp_remove_payload_part1(mst_mgr, mst_state, new_payload);
-   }
+   drm_dp_remove_payload_part1(mst_mgr, mst_state, payload);
 
/* mst_mgr->->payloads are VC payload notify MST branch using DPCD or
 * AUX message. The sequence is slot 1-63 allocated sequence for each
 * stream. AMD ASIC stream slot allocation should follow the same
 * sequence. copy DRM MST allocation to dc */
-   fill_dc_mst_payload_table_from_drm(stream->link, enable, 
target_payload, proposed_table);
+   fill_dc_mst_payload_table_from_drm(stream->link, enable, payload, 
proposed_table);
 
return true;
 }
@@ -341,7 +300,7 @@ bool dm_helpers_dp_mst_send_payload_allocation(
struct amdgpu_dm_connector *aconnector;
struct drm_dp_mst_topology_state *mst_state;
struct 

[Patch v2 2/3] drm/mst: Refactor the flow for payload allocation/removement

2023-08-06 Thread Wayne Lin
[Why]
Today, the allocation/deallocation steps and status is a bit unclear.

For instance, payload->vc_start_slot = -1 stands for "the failure of
updating DPCD payload ID table" and can also represent as "payload is not
allocated yet". These two cases should be handled differently and hence
better to distinguish them for better understanding.

[How]
Define enumeration - ALLOCATION_LOCAL, ALLOCATION_DFP and ALLOCATION_REMOTE
to distinguish different allocation status. Adjust the code to handle
different status accordingly for better understanding the sequence of
payload allocation and payload removement.

For payload creation, the procedure should look like this:
DRM part 1:
* step 1 - update sw mst mgr variables to add a new payload
* step 2 - add payload at immediate DFP DPCD payload table

Driver:
* Add new payload in HW and sync up with DFP by sending ACT

DRM Part 2:
* Send ALLOCATE_PAYLOAD sideband message to allocate bandwidth along the
  virtual channel.

And as for payload removement, the procedure should look like this:
DRM part 1:
* step 1 - Send ALLOCATE_PAYLOAD sideband message to release bandwidth
   along the virtual channel
* step 2 - Clear payload allocation at immediate DFP DPCD payload table

Driver:
* Remove the payload in HW and sync up with DFP by sending ACT

DRM part 2:
* update sw mst mgr variables to remove the payload

Note that it's fine to fail when communicate with the branch device
connected at immediate downstrean-facing port, but updating variables of
SW mst mgr and HW configuration should be conducted anyway. That's because
it's under commit_tail and we need to complete the HW programming.

Changes since v1:
* Remove the set but not use variable 'old_payload' in function
  'nv50_msto_prepare'. Catched by kernel test robot 

Signed-off-by: Wayne Lin 
---
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |  20 ++-
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 159 +++---
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |  18 +-
 drivers/gpu/drm/nouveau/dispnv50/disp.c   |  21 +--
 include/drm/display/drm_dp_mst_helper.h   |  23 ++-
 5 files changed, 153 insertions(+), 88 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index d9a482908380..9ad509279b0a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -219,7 +219,7 @@ static void dm_helpers_construct_old_payload(
/* Set correct time_slots/PBN of old payload.
 * other fields (delete & dsc_enabled) in
 * struct drm_dp_mst_atomic_payload are don't care fields
-* while calling drm_dp_remove_payload()
+* while calling drm_dp_remove_payload_part2()
 */
for (i = 0; i < current_link_table.stream_count; i++) {
dc_alloc =
@@ -262,13 +262,12 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
 
mst_mgr = >mst_root->mst_mgr;
mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
-
-   /* It's OK for this to fail */
new_payload = drm_atomic_get_mst_payload_state(mst_state, 
aconnector->mst_output_port);
 
if (enable) {
target_payload = new_payload;
 
+   /* It's OK for this to fail */
drm_dp_add_payload_part1(mst_mgr, mst_state, new_payload);
} else {
/* construct old payload by VCPI*/
@@ -276,7 +275,7 @@ bool dm_helpers_dp_mst_write_payload_allocation_table(
new_payload, _payload);
target_payload = _payload;
 
-   drm_dp_remove_payload(mst_mgr, mst_state, _payload, 
new_payload);
+   drm_dp_remove_payload_part1(mst_mgr, mst_state, new_payload);
}
 
/* mst_mgr->->payloads are VC payload notify MST branch using DPCD or
@@ -342,7 +341,7 @@ bool dm_helpers_dp_mst_send_payload_allocation(
struct amdgpu_dm_connector *aconnector;
struct drm_dp_mst_topology_state *mst_state;
struct drm_dp_mst_topology_mgr *mst_mgr;
-   struct drm_dp_mst_atomic_payload *payload;
+   struct drm_dp_mst_atomic_payload *new_payload, *old_payload;
enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD;
enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
int ret = 0;
@@ -355,15 +354,20 @@ bool dm_helpers_dp_mst_send_payload_allocation(
mst_mgr = >mst_root->mst_mgr;
mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state);
 
-   payload = drm_atomic_get_mst_payload_state(mst_state, 
aconnector->mst_output_port);
+   new_payload = drm_atomic_get_mst_payload_state(mst_state, 
aconnector->mst_output_port);
 
if (!enable) {
set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
}
 
-   if (enable)
-   

[Patch v2 1/3] drm/mst: delete unnecessary case in drm_dp_add_payload_part2()

2023-08-06 Thread Wayne Lin
[Why]
There is no need to consider payload->delete case since we won't call
drm_dp_add_payload_part2() to create a payload when we're about to
remove it.

[How]
Delete unnecessary case to simplify the code.

Signed-off-by: Wayne Lin 
---
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c 
b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index ed96cfcfa304..4d80426757ab 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -3411,12 +3411,8 @@ int drm_dp_add_payload_part2(struct 
drm_dp_mst_topology_mgr *mgr,
 
ret = drm_dp_create_payload_step2(mgr, payload);
if (ret < 0) {
-   if (!payload->delete)
-   drm_err(mgr->dev, "Step 2 of creating MST payload for 
%p failed: %d\n",
-   payload->port, ret);
-   else
-   drm_dbg_kms(mgr->dev, "Step 2 of removing MST payload 
for %p failed: %d\n",
-   payload->port, ret);
+   drm_err(mgr->dev, "Step 2 of creating MST payload for %p 
failed: %d\n",
+   payload->port, ret);
}
 
return ret;
-- 
2.37.3



[Patch v2 0/3] Refactor and clean up codes of mst

2023-08-06 Thread Wayne Lin
This patch set is mainly trying to organize the mst code today a bit.
Like to clarify and organize the sequence of mst payload allocation and
removement.And also clean up some redundant codes today.

The main refactor one is the patch
"drm/mst: Refactor the flow for payload allocation/removement"
which is adding a new enum variable in stuct drm_dp_mst_atomic_payload
to represent the status of paylad alloction, and then handle the payload
accordingly. Besides, rename some drm mst functions to better express the
behind idea.

The other two patches are mainly to clean up unnecessary codes.

Changes since v1:
* Remove the set but not use variable 'old_payload' in function
  'nv50_msto_prepare'. Catched by kernel test robot 

Wayne Lin (3):
  drm/mst: delete unnecessary case in drm_dp_add_payload_part2()
  drm/mst: Refactor the flow for payload allocation/removement
  drm/mst: adjust the function drm_dp_remove_payload_part2()

 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |  60 +-
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 189 +++---
 drivers/gpu/drm/i915/display/intel_dp_mst.c   |  13 +-
 drivers/gpu/drm/nouveau/dispnv50/disp.c   |  17 +-
 include/drm/display/drm_dp_mst_helper.h   |  22 +-
 5 files changed, 159 insertions(+), 142 deletions(-)

-- 
2.37.3



RE: [PATCH 3/3] drm/mst: adjust the function drm_dp_remove_payload_part2()

2023-08-06 Thread Lin, Wayne
[AMD Official Use Only - General]

> -Original Message-
> From: Imre Deak 
> Sent: Friday, August 4, 2023 11:32 PM
> To: Lin, Wayne 
> Cc: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org;
> ly...@redhat.com; jani.nik...@intel.com; ville.syrj...@linux.intel.com;
> Wentland, Harry ; Zuo, Jerry
> 
> Subject: Re: [PATCH 3/3] drm/mst: adjust the function
> drm_dp_remove_payload_part2()
>
> On Fri, Aug 04, 2023 at 02:20:29PM +0800, Wayne Lin wrote:
> > [...]
> > diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c
> > b/drivers/gpu/drm/display/drm_dp_mst_topology.c
> > index e04f87ff755a..4270178f95f6 100644
> > --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
> > +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
> > @@ -3382,8 +3382,7 @@
> EXPORT_SYMBOL(drm_dp_remove_payload_part1);
> >   * drm_dp_remove_payload_part2() - Remove an MST payload locally
> >   * @mgr: Manager to use.
> >   * @mst_state: The MST atomic state
> > - * @old_payload: The payload with its old state
> > - * @new_payload: The payload with its latest state
> > + * @payload: The payload with its latest state
> >   *
> >   * Updates the starting time slots of all other payloads which would have
> been shifted towards
> >   * the start of the payload ID table as a result of removing a
> > payload. Driver should call this @@ -3392,25 +3391,36 @@
> EXPORT_SYMBOL(drm_dp_remove_payload_part1);
> >   */
> >  void drm_dp_remove_payload_part2(struct drm_dp_mst_topology_mgr
> *mgr,
> >  struct drm_dp_mst_topology_state
> *mst_state,
> > -const struct drm_dp_mst_atomic_payload
> *old_payload,
> > -struct drm_dp_mst_atomic_payload
> *new_payload)
> > +struct drm_dp_mst_atomic_payload
> *payload)
> >  {
> > struct drm_dp_mst_atomic_payload *pos;
> > +   u8 time_slots_to_remove;
> > +   u8 next_payload_vc_start = mgr->next_start_slot;
> > +
> > +   /* Find the current allocated time slot number of the payload */
> > +   list_for_each_entry(pos, _state->payloads, next) {
> > +   if (pos != payload &&
> > +   pos->vc_start_slot > payload->vc_start_slot &&
> > +   pos->vc_start_slot < next_payload_vc_start)
> > +   next_payload_vc_start = pos->vc_start_slot;
> > +   }
> > +
> > +   time_slots_to_remove = next_payload_vc_start -
> > +payload->vc_start_slot;
>
> Imo, the intuitive way would be to pass the old payload state to this 
> function -
> which already contains the required time_slots param - and refactor things
> instead moving vc_start_slot from the payload state to mgr suggested by Ville
> earlier.
>
> --Imre

Hi Imre,
Thanks for your feedback!
I understand it's functionally correct. But IMHO, it's still a bit conceptually
different between the time slot in old state and the time slot in current 
payload
table. My thought is the time slot at the moment when we are removing the
payload would be a better choice. And with this, we can also simplify some
codes. Especially remove workaround in amd driver. In fact, DRM mst code
maintains the payload table and all the time slot info is in it already. We 
don't
really have to pass a new parameter.

>
> > /* Remove local payload allocation */
> > list_for_each_entry(pos, _state->payloads, next) {
> > -   if (pos != new_payload && pos->vc_start_slot > new_payload-
> >vc_start_slot)
> > -   pos->vc_start_slot -= old_payload->time_slots;
> > +   if (pos != payload && pos->vc_start_slot > payload-
> >vc_start_slot)
> > +   pos->vc_start_slot -= time_slots_to_remove;
> > }
> > -   new_payload->vc_start_slot = -1;
> > +   payload->vc_start_slot = -1;
> >
> > mgr->payload_count--;
> > -   mgr->next_start_slot -= old_payload->time_slots;
> > +   mgr->next_start_slot -= time_slots_to_remove;
> >
> > -   if (new_payload->delete)
> > -   drm_dp_mst_put_port_malloc(new_payload->port);
> > +   if (payload->delete)
> > +   drm_dp_mst_put_port_malloc(payload->port);
> >
> > -   new_payload->payload_allocation_status =
> DRM_DP_MST_PAYLOAD_ALLOCATION_NONE;
> > +   payload->payload_allocation_status =
> > +DRM_DP_MST_PAYLOAD_ALLOCATION_NONE;
> >  }

--
Regards,
Wayne