[PATCH 14/14] drm/amd/display: Revert "retain/release stream pointer in link enc table"

2021-11-11 Thread Wayne Lin
From: Sung Joon Kim 

[why]
Change causing issue. Need to revert the change.

Reviewed-by: Aric Cyr 
Acked-by: Wayne Lin 
Signed-off-by: Sung Joon Kim 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
index d3c789f26a02..8b319992c71d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_enc_cfg.c
@@ -122,7 +122,6 @@ static void remove_link_enc_assignment(
stream->link_enc = NULL;

state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].eng_id = 
ENGINE_ID_UNKNOWN;

state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i].stream = NULL;
-   dc_stream_release(stream);
break;
}
}
@@ -145,7 +144,6 @@ static void add_link_enc_assignment(
 */
for (i = 0; i < state->stream_count; i++) {
if (stream == state->streams[i]) {
-   dc_stream_retain(stream);

state->res_ctx.link_enc_cfg_ctx.link_enc_assignments[i] = (struct 
link_enc_assignment){
.valid = true,
.ep_id = (struct display_endpoint_id) {
-- 
2.25.1



[PATCH 13/14] drm/amd/display: 3.2.162

2021-11-11 Thread Wayne Lin
From: Aric Cyr 

This version brings along following fixes:
- Fix issue that secondary display goes blank on Non DCN31.
- Adjust flushing data in DMCUB
- Revert patches which cause regression in hadnling MPO/Link encoder assignment
- Correct the setting within MSA of DP2.0
- Adjustment for DML isolation
- Fix FIFO erro in fast boot sequence
- Enable DSC over eDP
- Adjust the DSC power off sequence

Reviewed-by: Aric Cyr 
Acked-by: Wayne Lin 
Signed-off-by: Aric Cyr 
---
 drivers/gpu/drm/amd/display/dc/dc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 6b4c9e9705c0..a43583c6e90e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.161"
+#define DC_VER "3.2.162"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
-- 
2.25.1



[PATCH 11/14] drm/amd/display: [FW Promotion] Release 0.0.93

2021-11-11 Thread Wayne Lin
From: Anthony Koo 

 - Fix ARR39-C issue with scaled integer addition in rb func

Reviewed-by: Aric Cyr 
Acked-by: Wayne Lin 
Signed-off-by: Anthony Koo 
---
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h 
b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 014ebd7242d5..20efa1f61914 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -47,10 +47,10 @@
 
 /* Firmware versioning. */
 #ifdef DMUB_EXPOSE_VERSION
-#define DMUB_FW_VERSION_GIT_HASH 0x058d3791
+#define DMUB_FW_VERSION_GIT_HASH 0xcd0e1e7a
 #define DMUB_FW_VERSION_MAJOR 0
 #define DMUB_FW_VERSION_MINOR 0
-#define DMUB_FW_VERSION_REVISION 92
+#define DMUB_FW_VERSION_REVISION 93
 #define DMUB_FW_VERSION_TEST 0
 #define DMUB_FW_VERSION_VBIOS 0
 #define DMUB_FW_VERSION_HOTFIX 0
@@ -2726,7 +2726,7 @@ static inline bool dmub_rb_full(struct dmub_rb *rb)
 static inline bool dmub_rb_push_front(struct dmub_rb *rb,
  const union dmub_rb_cmd *cmd)
 {
-   uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + 
rb->wrpt / sizeof(uint64_t);
+   uint64_t volatile *dst = (uint64_t volatile *)((uint8_t 
*)(rb->base_address) + rb->wrpt);
const uint64_t *src = (const uint64_t *)cmd;
uint8_t i;
 
@@ -2844,7 +2844,7 @@ static inline bool dmub_rb_peek_offset(struct dmub_rb *rb,
 static inline bool dmub_rb_out_front(struct dmub_rb *rb,
 union dmub_rb_out_cmd *cmd)
 {
-   const uint64_t volatile *src = (const uint64_t volatile 
*)(rb->base_address) + rb->rptr / sizeof(uint64_t);
+   const uint64_t volatile *src = (const uint64_t volatile *)((uint8_t 
*)(rb->base_address) + rb->rptr);
uint64_t *dst = (uint64_t *)cmd;
uint8_t i;
 
@@ -2892,7 +2892,7 @@ static inline void dmub_rb_flush_pending(const struct 
dmub_rb *rb)
uint32_t wptr = rb->wrpt;
 
while (rptr != wptr) {
-   uint64_t volatile *data = (uint64_t volatile *)rb->base_address 
+ rptr / sizeof(uint64_t);
+   uint64_t volatile *data = (uint64_t volatile *)((uint8_t 
*)(rb->base_address) + rptr);
//uint64_t volatile *p = (uint64_t volatile *)data;
uint64_t temp;
uint8_t i;
-- 
2.25.1



[PATCH 12/14] drm/amd/display: fixed the DSC power off sequence during Driver PnP

2021-11-11 Thread Wayne Lin
From: Yi-Ling Chen 

[WHY]
After unloading driver, driver would not disable DSC function.
At next loading driver, driver would power all DSC engines off.
When driver powered the active DSC off, the screen would be gray
until reprograming DSC relatived register correcntly.

[HOW]
1. Remove DSC Power down code into init_pipes()
2. Depend on  the OTG mapping information and DSC status to skip
power off for the working DSC.

Reviewed-by: Anthony Koo 
Acked-by: Wayne Lin 
Signed-off-by: Yi-Ling Chen 
---
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 37 +++
 .../gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c  |  2 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_optc.c | 14 +++
 .../gpu/drm/amd/display/dc/dcn20/dcn20_optc.h |  3 ++
 .../gpu/drm/amd/display/dc/dcn30/dcn30_optc.c |  1 +
 .../drm/amd/display/dc/dcn31/dcn31_hwseq.c|  5 ---
 .../gpu/drm/amd/display/dc/dcn31/dcn31_optc.c |  1 +
 drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h   |  2 +
 .../amd/display/dc/inc/hw/timing_generator.h  |  2 +
 9 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 135e2b58cc73..c50427bfab77 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -1362,6 +1362,43 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state 
*context)
 
tg->funcs->tg_init(tg);
}
+
+   /* Power gate DSCs */
+   if (hws->funcs.dsc_pg_control != NULL) {
+   uint32_t num_opps = 0;
+   uint32_t opp_id_src0 = OPP_ID_INVALID;
+   uint32_t opp_id_src1 = OPP_ID_INVALID;
+
+   // Step 1: To find out which OPTC is running & OPTC DSC is ON
+   for (i = 0; i < dc->res_pool->res_cap->num_timing_generator; 
i++) {
+   uint32_t optc_dsc_state = 0;
+   struct timing_generator *tg = 
dc->res_pool->timing_generators[i];
+
+   if (tg->funcs->is_tg_enabled(tg)) {
+   if (tg->funcs->get_dsc_status)
+   tg->funcs->get_dsc_status(tg, 
_dsc_state);
+   // Only one OPTC with DSC is ON, so if we got 
one result, we would exit this block.
+   // non-zero value is DSC enabled
+   if (optc_dsc_state != 0) {
+   tg->funcs->get_optc_source(tg, 
_opps, _id_src0, _id_src1);
+   break;
+   }
+   }
+   }
+
+   // Step 2: To power down DSC but skip DSC  of running OPTC
+   for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) {
+   struct dcn_dsc_state s  = {0};
+
+   
dc->res_pool->dscs[i]->funcs->dsc_read_state(dc->res_pool->dscs[i], );
+
+   if ((s.dsc_opp_source == opp_id_src0 || 
s.dsc_opp_source == opp_id_src1) &&
+   s.dsc_clock_en && s.dsc_fw_en)
+   continue;
+
+   hws->funcs.dsc_pg_control(hws, 
dc->res_pool->dscs[i]->inst, false);
+   }
+   }
 }
 
 void dcn10_init_hw(struct dc *dc)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
index 79b640e202eb..ef5c4c0f4d6c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c
@@ -162,6 +162,8 @@ static void dsc2_read_state(struct 
display_stream_compressor *dsc, struct dcn_ds
REG_GET(DSCC_PPS_CONFIG2, PIC_WIDTH, >dsc_pic_width);
REG_GET(DSCC_PPS_CONFIG2, PIC_HEIGHT, >dsc_pic_height);
REG_GET(DSCC_PPS_CONFIG7, SLICE_BPG_OFFSET, >dsc_slice_bpg_offset);
+   REG_GET_2(DSCRM_DSC_FORWARD_CONFIG, DSCRM_DSC_FORWARD_EN, >dsc_fw_en,
+   DSCRM_DSC_OPP_PIPE_SOURCE, >dsc_opp_source);
 }
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
index c90b8516dcc1..8c34751b0e58 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c
@@ -190,6 +190,19 @@ void optc2_set_dsc_config(struct timing_generator *optc,
OPTC_DSC_SLICE_WIDTH, dsc_slice_width);
 }
 
+/* Get DSC-related configuration.
+ *   dsc_mode: 0 disables DSC, other values enable DSC in specified format
+ */
+void optc2_get_dsc_status(struct timing_generator *optc,
+   uint32_t *dsc_mode)
+{
+   struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+   REG_GET(OPTC_DATA_FORMAT_CONTROL,
+   OPTC_DSC_MODE, dsc_mode);
+}
+
+
 /*TEMP: Need to figure out inheritance model here.*/
 bool optc2_is_two_pixels_per_containter(const 

[PATCH 09/14] drm/amd/display: Visual Confirm Bar Height Adjust

2021-11-11 Thread Wayne Lin
From: hvanzyll 

[What]
This change allows adjustment to the Visual Confirm
height border.

[Why]
Aids debugging and testing

[How]
Use the existing infrastructure to implement logic to
draw borders

Reviewed-by: Anthony Koo 
Acked-by: Wayne Lin 
Signed-off-by: Harry VanZyllDeJong 
---
 drivers/gpu/drm/amd/display/dc/dc.h|  2 ++
 .../gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c  | 14 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h 
b/drivers/gpu/drm/amd/display/dc/dc.h
index 764663df7887..6b4c9e9705c0 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -574,6 +574,8 @@ struct dc_debug_options {
bool native422_support;
bool disable_dsc;
enum visual_confirm visual_confirm;
+   int visual_confirm_rect_height;
+
bool sanity_checks;
bool max_disp_clk;
bool surface_trace;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
index 44293d66b46b..e31a6f1516bb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
@@ -39,6 +39,10 @@
 #define BLACK_OFFSET_RGB_Y 0x0
 #define BLACK_OFFSET_CBCR  0x8000
 
+#define VISUAL_CONFIRM_RECT_HEIGHT_DEFAULT 3
+#define VISUAL_CONFIRM_RECT_HEIGHT_MIN 1
+#define VISUAL_CONFIRM_RECT_HEIGHT_MAX 10
+
 #define REG(reg)\
dpp->tf_regs->reg
 
@@ -685,9 +689,17 @@ static void dpp1_dscl_set_recout(struct dcn10_dpp *dpp,
 const struct rect *recout)
 {
int visual_confirm_on = 0;
+   unsigned short visual_confirm_rect_height = 
VISUAL_CONFIRM_RECT_HEIGHT_DEFAULT;
+
if (dpp->base.ctx->dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE)
visual_confirm_on = 1;
 
+   /* Check bounds to ensure the VC bar height was set to a sane value */
+   if ((dpp->base.ctx->dc->debug.visual_confirm_rect_height >= 
VISUAL_CONFIRM_RECT_HEIGHT_MIN) &&
+   (dpp->base.ctx->dc->debug.visual_confirm_rect_height <= 
VISUAL_CONFIRM_RECT_HEIGHT_MAX)) {
+   visual_confirm_rect_height = 
dpp->base.ctx->dc->debug.visual_confirm_rect_height;
+   }
+
REG_SET_2(RECOUT_START, 0,
  /* First pixel of RECOUT in the active OTG area */
  RECOUT_START_X, recout->x,
@@ -699,7 +711,7 @@ static void dpp1_dscl_set_recout(struct dcn10_dpp *dpp,
  RECOUT_WIDTH, recout->width,
  /* Number of RECOUT vertical lines */
  RECOUT_HEIGHT, recout->height
-- visual_confirm_on * 2 * (dpp->base.inst + 1));
+- visual_confirm_on * 2 * (dpp->base.inst + 
visual_confirm_rect_height));
 }
 
 /**
-- 
2.25.1



[PATCH 10/14] drm/amd/display: [FW Promotion] Release 0.0.92

2021-11-11 Thread Wayne Lin
From: Anthony Koo 

Reviewed-by: Anthony Koo 
Acked-by: Wayne Lin 
Signed-off-by: Anthony Koo 
---
 drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h 
b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index ba30ab11b2ae..014ebd7242d5 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -47,10 +47,10 @@
 
 /* Firmware versioning. */
 #ifdef DMUB_EXPOSE_VERSION
-#define DMUB_FW_VERSION_GIT_HASH 0x1d82d23e
+#define DMUB_FW_VERSION_GIT_HASH 0x058d3791
 #define DMUB_FW_VERSION_MAJOR 0
 #define DMUB_FW_VERSION_MINOR 0
-#define DMUB_FW_VERSION_REVISION 91
+#define DMUB_FW_VERSION_REVISION 92
 #define DMUB_FW_VERSION_TEST 0
 #define DMUB_FW_VERSION_VBIOS 0
 #define DMUB_FW_VERSION_HOTFIX 0
-- 
2.25.1



[PATCH 08/14] drm/amd/display: Fix eDP will flash when boot to OS

2021-11-11 Thread Wayne Lin
From: Brandon Syu 

[WHY]
With eDP DSC enabled and set 4K 60Hz, there would be screen
corruption when booting to OS or enabling the driver.

[HOW]
Avoid powering down VDD when we cannot apply eDP fast boot.

Reviewed-by: Nicholas Kazlauskas 
Acked-by: Wayne Lin 
Signed-off-by: Brandon Syu 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c| 5 -
 drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 2910188ba514..6379b4a4757a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -6045,7 +6045,10 @@ bool is_edp_ilr_optimization_required(struct dc_link 
*link, struct dc_crtc_timin
 
req_bw = dc_bandwidth_in_kbps_from_timing(crtc_timing);
 
-   decide_edp_link_settings(link, _setting, req_bw);
+   if (!crtc_timing->flags.DSC)
+   decide_edp_link_settings(link, _setting, req_bw);
+   else
+   decide_edp_link_settings_with_dsc(link, _setting, req_bw, 
LINK_RATE_UNKNOWN);
 
if (link->dpcd_caps.edp_supported_link_rates[link_rate_set] != 
link_setting.link_rate ||
lane_count_set.bits.LANE_COUNT_SET != 
link_setting.lane_count) {
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 665cf58b0724..3d421583e9ca 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1833,7 +1833,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct 
dc_state *context)
}
}
// We are trying to enable eDP, don't power down VDD
-   if (edp_stream_num)
+   if (edp_stream_num && can_apply_edp_fast_boot)
keep_edp_vdd_on = true;
}
 
-- 
2.25.1



[PATCH 07/14] drm/amd/display: Enable DSC over eDP

2021-11-11 Thread Wayne Lin
From: Mikita Lipski 

[why]
- Adding a DM interface to enable DSC over eDP on Linux
- DSC over eDP will allow to power savings by reducing
the bandwidth required to support panel's modes
- Apply link optimization algorithm to reduce link bandwidth
when DSC is enabled

[how]
- Read eDP panel's DSC capabilities
- Apply DSC policy on eDP panel based on its DSC capabilities
- Enable DSC encoder's on the pipe
- Enable DSC on panel's side by setting DSC_ENABLE DPCD register
- Adding link optimization algorithm to reduce link rate or lane
count based

Reviewed-by: Nicholas Kazlauskas 
Acked-by: Wayne Lin 
Signed-off-by: Mikita Lipski 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  73 +++-
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |   2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |   2 +
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  | 162 +-
 drivers/gpu/drm/amd/display/dc/dc.h   |   3 +
 drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c |   1 +
 .../drm/amd/display/dc/dcn31/dcn31_resource.c |   1 +
 drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c   |   8 +
 drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h   |   1 +
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |   8 +-
 .../amd/display/include/ddc_service_types.h   |   1 +
 11 files changed, 255 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7a54ccb794f9..a15f20aaa11e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1478,8 +1478,10 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER)
adev->dm.dc->debug.disable_stutter = true;
 
-   if (amdgpu_dc_debug_mask & DC_DISABLE_DSC)
+   if (amdgpu_dc_debug_mask & DC_DISABLE_DSC) {
adev->dm.dc->debug.disable_dsc = true;
+   adev->dm.dc->debug.disable_dsc_edp = true;
+   }
 
if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING)
adev->dm.dc->debug.disable_clock_gate = true;
@@ -6032,7 +6034,8 @@ static void update_dsc_caps(struct amdgpu_dm_connector 
*aconnector,
 {
stream->timing.flags.DSC = 0;
 
-   if (aconnector->dc_link && sink->sink_signal == 
SIGNAL_TYPE_DISPLAY_PORT) {
+   if (aconnector->dc_link && (sink->sink_signal == 
SIGNAL_TYPE_DISPLAY_PORT ||
+   sink->sink_signal == SIGNAL_TYPE_EDP)) {
dc_dsc_parse_dsc_dpcd(aconnector->dc_link->ctx->dc,
  
aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw,
  
aconnector->dc_link->dpcd_caps.dsc_caps.dsc_branch_decoder_caps.raw,
@@ -6040,6 +6043,64 @@ static void update_dsc_caps(struct amdgpu_dm_connector 
*aconnector,
}
 }
 
+static void apply_dsc_policy_for_edp(struct amdgpu_dm_connector *aconnector,
+   struct dc_sink *sink, struct 
dc_stream_state *stream,
+   struct dsc_dec_dpcd_caps *dsc_caps,
+   uint32_t max_dsc_target_bpp_limit_override)
+{
+   const struct dc_link_settings *verified_link_cap = NULL;
+   uint32_t link_bw_in_kbps;
+   uint32_t edp_min_bpp_x16, edp_max_bpp_x16;
+   struct dc *dc = sink->ctx->dc;
+   struct dc_dsc_bw_range bw_range = {0};
+   struct dc_dsc_config dsc_cfg = {0};
+
+   verified_link_cap = dc_link_get_link_cap(stream->link);
+   link_bw_in_kbps = dc_link_bandwidth_kbps(stream->link, 
verified_link_cap);
+   edp_min_bpp_x16 = 8 * 16;
+   edp_max_bpp_x16 = 8 * 16;
+
+   if (edp_max_bpp_x16 > dsc_caps->edp_max_bits_per_pixel)
+   edp_max_bpp_x16 = dsc_caps->edp_max_bits_per_pixel;
+
+   if (edp_max_bpp_x16 < edp_min_bpp_x16)
+   edp_min_bpp_x16 = edp_max_bpp_x16;
+
+   if (dc_dsc_compute_bandwidth_range(dc->res_pool->dscs[0],
+   dc->debug.dsc_min_slice_height_override,
+   edp_min_bpp_x16, edp_max_bpp_x16,
+   dsc_caps,
+   >timing,
+   _range)) {
+
+   if (bw_range.max_kbps < link_bw_in_kbps) {
+   if (dc_dsc_compute_config(dc->res_pool->dscs[0],
+   dsc_caps,
+   dc->debug.dsc_min_slice_height_override,
+   max_dsc_target_bpp_limit_override,
+   0,
+   >timing,
+   _cfg)) {
+   stream->timing.dsc_cfg = dsc_cfg;
+   stream->timing.flags.DSC = 1;
+   stream->timing.dsc_cfg.bits_per_pixel = 
edp_max_bpp_x16;
+   }
+  

[PATCH 06/14] drm/amd/display: Reset fifo after enable otg

2021-11-11 Thread Wayne Lin
From: "Xu, Jinze" 

[Why]
In fast boot sequence, when change dispclk, otg is disabled but digfe
is enabled. This may cause dig fifo error.

[How]
Reset dig fifo after enable otg.

Reviewed-by: Jun Lei 
Reviewed-by: Anthony Koo 
Acked-by: Wayne Lin 
Signed-off-by: JinZe.Xu 
---
 .../amd/display/dc/dce110/dce110_hw_sequencer.c   |  5 +
 .../amd/display/dc/dcn10/dcn10_stream_encoder.c   | 15 +++
 .../amd/display/dc/dcn10/dcn10_stream_encoder.h   |  3 +++
 .../amd/display/dc/dcn20/dcn20_stream_encoder.c   |  2 ++
 .../display/dc/dcn30/dcn30_dio_stream_encoder.c   |  2 ++
 .../drm/amd/display/dc/inc/hw/stream_encoder.h|  4 
 6 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c 
b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index e7e2aa46218d..665cf58b0724 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1602,6 +1602,11 @@ static enum dc_status apply_single_controller_ctx_to_hw(
pipe_ctx->stream_res.stream_enc,
pipe_ctx->stream_res.tg->inst);
 
+   if (dc_is_dp_signal(pipe_ctx->stream->signal) &&
+   pipe_ctx->stream_res.stream_enc->funcs->reset_fifo)
+   pipe_ctx->stream_res.stream_enc->funcs->reset_fifo(
+   pipe_ctx->stream_res.stream_enc);
+
if (dc_is_dp_signal(pipe_ctx->stream->signal))
dp_source_sequence_trace(link, 
DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index b0c08ee6bc2c..bf4436d7aaab 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -902,6 +902,19 @@ void enc1_stream_encoder_stop_dp_info_packets(
 
 }
 
+void enc1_stream_encoder_reset_fifo(
+   struct stream_encoder *enc)
+{
+   struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+   /* set DIG_START to 0x1 to reset FIFO */
+   REG_UPDATE(DIG_FE_CNTL, DIG_START, 1);
+   udelay(100);
+
+   /* write 0 to take the FIFO out of reset */
+   REG_UPDATE(DIG_FE_CNTL, DIG_START, 0);
+}
+
 void enc1_stream_encoder_dp_blank(
struct dc_link *link,
struct stream_encoder *enc)
@@ -1587,6 +1600,8 @@ static const struct stream_encoder_funcs 
dcn10_str_enc_funcs = {
enc1_stream_encoder_send_immediate_sdp_message,
.stop_dp_info_packets =
enc1_stream_encoder_stop_dp_info_packets,
+   .reset_fifo =
+   enc1_stream_encoder_reset_fifo,
.dp_blank =
enc1_stream_encoder_dp_blank,
.dp_unblank =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
index 687d7e4bf7ca..a146a41f68e9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
@@ -626,6 +626,9 @@ void enc1_stream_encoder_send_immediate_sdp_message(
 void enc1_stream_encoder_stop_dp_info_packets(
struct stream_encoder *enc);
 
+void enc1_stream_encoder_reset_fifo(
+   struct stream_encoder *enc);
+
 void enc1_stream_encoder_dp_blank(
struct dc_link *link,
struct stream_encoder *enc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
index aab25ca8343a..8a70f92795c2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
@@ -593,6 +593,8 @@ static const struct stream_encoder_funcs 
dcn20_str_enc_funcs = {
enc1_stream_encoder_send_immediate_sdp_message,
.stop_dp_info_packets =
enc1_stream_encoder_stop_dp_info_packets,
+   .reset_fifo =
+   enc1_stream_encoder_reset_fifo,
.dp_blank =
enc1_stream_encoder_dp_blank,
.dp_unblank =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c
index ebd9c35c914f..7aa9aaf5db4c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c
@@ -805,6 +805,8 @@ static const struct stream_encoder_funcs 
dcn30_str_enc_funcs = {
enc3_stream_encoder_update_dp_info_packets,
.stop_dp_info_packets =
enc1_stream_encoder_stop_dp_info_packets,
+   .reset_fifo =
+   enc1_stream_encoder_reset_fifo,
.dp_blank =
enc1_stream_encoder_dp_blank,
.dp_unblank =
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h 

[PATCH 05/14] drm/amd/display: Code change for DML isolation

2021-11-11 Thread Wayne Lin
From: Jun Lei 

[why]
DML itself is SW only, putting the logic as part of resource makes it
hw dependent and thus impossible to compile separately from dc.
Separate compilation is critical for unit testing as well as bbox tool
development

[how]
create new dml wrapper.
Copy logic from the validation functions into dml wrapper as base
implementation. Dml wrapper has internal/static implementations
for all helpers, and does not reference other functions.
It may reference dc structures/types for convenience.

This change now has all the changes for DML isolation squashed into
one.

Reviewed-by: Jun Lei 
Acked-by: Wayne Lin 
Signed-off-by: Jun Lei 
---
 .../drm/amd/display/dc/dml/display_mode_lib.h |1 +
 .../gpu/drm/amd/display/dc/dml/dml_wrapper.c  | 1889 +
 .../display/dc/dml/dml_wrapper_translation.c  |  284 +++
 .../gpu/drm/amd/display/dc/inc/dml_wrapper.h  |   34 +
 4 files changed, 2208 insertions(+)
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/inc/dml_wrapper.h

diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h 
b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
index 6905ef1e75a6..d76251fd1566 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.h
@@ -73,6 +73,7 @@ struct display_mode_lib {
struct vba_vars_st vba;
struct dal_logger *logger;
struct dml_funcs funcs;
+   struct _vcs_dpi_display_e2e_pipe_params_st dml_pipe_state[6];
 };
 
 void dml_init_instance(struct display_mode_lib *lib,
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c 
b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
new file mode 100644
index ..ece34b0b8a46
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
@@ -0,0 +1,1889 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "dml_wrapper.h"
+#include "resource.h"
+#include "core_types.h"
+#include "dsc.h"
+#include "clk_mgr.h"
+
+#ifndef DC_LOGGER_INIT
+#define DC_LOGGER_INIT
+#undef DC_LOG_WARNING
+#define DC_LOG_WARNING
+#endif
+
+#define DML_WRAPPER_TRANSLATION_
+#include "dml_wrapper_translation.c"
+#undef DML_WRAPPER_TRANSLATION_
+
+static bool is_dual_plane(enum surface_pixel_format format)
+{
+   return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == 
SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
+}
+
+static void build_clamping_params(struct dc_stream_state *stream)
+{
+   stream->clamping.clamping_level = CLAMPING_FULL_RANGE;
+   stream->clamping.c_depth = stream->timing.display_color_depth;
+   stream->clamping.pixel_encoding = stream->timing.pixel_encoding;
+}
+
+static void get_pixel_clock_parameters(
+   const struct pipe_ctx *pipe_ctx,
+   struct pixel_clk_params *pixel_clk_params)
+{
+   const struct dc_stream_state *stream = pipe_ctx->stream;
+
+   /*TODO: is this halved for YCbCr 420? in that case we might want to move
+* the pixel clock normalization for hdmi up to here instead of doing it
+* in pll_adjust_pix_clk
+*/
+   pixel_clk_params->requested_pix_clk_100hz = 
stream->timing.pix_clk_100hz;
+   pixel_clk_params->encoder_object_id = stream->link->link_enc->id;
+   pixel_clk_params->signal_type = pipe_ctx->stream->signal;
+   pixel_clk_params->controller_id = pipe_ctx->stream_res.tg->inst + 1;
+   /* TODO: un-hardcode*/
+   pixel_clk_params->requested_sym_clk = LINK_RATE_LOW *
+   LINK_RATE_REF_FREQ_IN_KHZ;
+   pixel_clk_params->flags.ENABLE_SS = 0;
+   pixel_clk_params->color_depth =
+   stream->timing.display_color_depth;
+   

[PATCH 04/14] drm/amd/display: set MSA vsp/hsp to 0 for positive polarity for DP 128b/132b

2021-11-11 Thread Wayne Lin
From: Wenjing Liu 

[why]
There is a bug in MSA programming sequence that mistakenly set
MSA vsp/hsp to 1 for positive polarity. This is incorrect.

Reviewed-by: Ariel Bernstein 
Acked-by: Wayne Lin 
Signed-off-by: Wenjing Liu 
---
 .../drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c| 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c 
b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c
index 565f12dd179a..5065904c7833 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c
@@ -358,8 +358,8 @@ static void dcn31_hpo_dp_stream_enc_set_stream_attribute(
 
h_width = hw_crtc_timing.h_border_left + hw_crtc_timing.h_addressable + 
hw_crtc_timing.h_border_right;
v_height = hw_crtc_timing.v_border_top + hw_crtc_timing.v_addressable + 
hw_crtc_timing.v_border_bottom;
-   hsp = hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY ? 0x80 : 0;
-   vsp = hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY ? 0x80 : 0;
+   hsp = hw_crtc_timing.flags.HSYNC_POSITIVE_POLARITY ? 0 : 0x80;
+   vsp = hw_crtc_timing.flags.VSYNC_POSITIVE_POLARITY ? 0 : 0x80;
v_freq = hw_crtc_timing.pix_clk_100hz * 100;
 
/*   MSA Packet Mapping to 32-bit Link Symbols - DP2 spec, section 
2.7.4.1
-- 
2.25.1



[PATCH 03/14] drm/amd/display: Revert changes for MPO underflow

2021-11-11 Thread Wayne Lin
From: Angus Wang 

[WHY]
The previous changes for fixing MPO underflow with multiple
display connected caused a regression where the machine runs
into a hang when doing multiple driver pnp with multiple displays
connected

[HOW]
Reverted offending change

Reviewed-by: Martin Leung 
Acked-by: Wayne Lin 
Signed-off-by: Angus Wang 
---
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c   | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 2 +-
 drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 83f5d9aaffcb..3883f918b3bb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -1069,7 +1069,7 @@ static const struct dc_debug_options debug_defaults_drv = 
{
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
-   .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+   .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 98852b586295..79a66e0c4303 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -840,7 +840,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
-   .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+   .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
index 16e7059393fa..fcf96cf08c76 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
@@ -211,7 +211,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
-   .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+   .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 87cec14b7870..4a9b64023675 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -193,7 +193,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
-   .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+   .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
-- 
2.25.1



[PATCH 02/14] drm/amd/display: Only flush delta from last command execution

2021-11-11 Thread Wayne Lin
From: Nicholas Kazlauskas 

[Why]
We're currently flushing commands that had been previously been
flushed or are currently being processed by the DMCUB when we don't
immediately wait for idle after command execution.

[How]
Avoiding reflushing the data by keeping track of the last wptr.

We'll treat this as the actual rptr by creating a copy of the inbox
and modifying the copy's rptr.

Reviewed-by: Eric Yang 
Acked-by: Wayne Lin 
Signed-off-by: Nicholas Kazlauskas 
---
 drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 1 +
 drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 9 -
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h 
b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
index 90065a09e76a..83855b8a32e9 100644
--- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
+++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h
@@ -411,6 +411,7 @@ struct dmub_srv {
struct dmub_srv_base_funcs funcs;
struct dmub_srv_hw_funcs hw_funcs;
struct dmub_rb inbox1_rb;
+   uint32_t inbox1_last_wptr;
/**
 * outbox1_rb is accessed without locks (dal & dc)
 * and to be used only in dmub_srv_stat_get_notification()
diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c 
b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 56a03328e8e6..6cc897dacd92 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -609,6 +609,8 @@ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub,
 
 enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
 {
+   struct dmub_rb flush_rb;
+
if (!dmub->hw_init)
return DMUB_STATUS_INVALID;
 
@@ -617,9 +619,14 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv 
*dmub)
 * been flushed to framebuffer memory. Otherwise DMCUB might
 * read back stale, fully invalid or partially invalid data.
 */
-   dmub_rb_flush_pending(>inbox1_rb);
+   flush_rb = dmub->inbox1_rb;
+   flush_rb.rptr = dmub->inbox1_last_wptr;
+   dmub_rb_flush_pending(_rb);
 
dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt);
+
+   dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt;
+
return DMUB_STATUS_OK;
 }
 
-- 
2.25.1



[PATCH 01/14] drm/amd/display: Secondary display goes blank on Non DCN31

2021-11-11 Thread Wayne Lin
From: Ahmad Othman 

[Why]
Due to integration issues with branch merging,
a regression happened that prevented secondary
displays from lighting up or enabling certain features

[How]
Separated the new logic to be for DCN31 only and retained
pre DCN31 logic for all other ASICs

Reviewed-by: Wenjing Liu 
Acked-by: Wayne Lin 
Signed-off-by: Ahmad Othman 
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 2e2dcd5518da..8a8a5aead34d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3997,7 +3997,8 @@ static void update_psp_stream_config(struct pipe_ctx 
*pipe_ctx, bool dpms_off)
config.phy_idx = link_enc->transmitter - 
TRANSMITTER_UNIPHY_A;
 
// Add flag to guard new A0 DIG mapping
-   if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == 
true) {
+   if (pipe_ctx->stream->ctx->dc->enable_c20_dtm_b0 == 
true &&
+   
pipe_ctx->stream->link->dc->ctx->dce_version == DCN_VERSION_3_1) {
config.dig_be = link_enc->preferred_engine;
config.dio_output_type = 
pipe_ctx->stream->link->ep_type;
config.dio_output_idx = link_enc->transmitter - 
TRANSMITTER_UNIPHY_A;
-- 
2.25.1



[PATCH 00/14] DC Patches November 12, 2021

2021-11-11 Thread Wayne Lin
This DC patchset brings improvements in multiple areas. In summary, we 
highlight:

- Fix issue that secondary display goes blank on Non DCN31.
- Adjust flushing data in DMCUB
- Revert patches which cause regression in hadnling MPO/Link encoder assignment
- Correct the setting within MSA of DP2.0
- Adjustment for DML isolation
- Fix FIFO erro in fast boot sequence
- Enable DSC over eDP
- Adjust the DSC power off sequence

---

Ahmad Othman (1):
  drm/amd/display: Secondary display goes blank on Non DCN31

Angus Wang (1):
  drm/amd/display: Revert changes for MPO underflow

Anthony Koo (2):
  drm/amd/display: [FW Promotion] Release 0.0.92
  drm/amd/display: [FW Promotion] Release 0.0.93

Aric Cyr (1):
  drm/amd/display: 3.2.162

Brandon Syu (1):
  drm/amd/display: Fix eDP will flash when boot to OS

Jun Lei (1):
  drm/amd/display: Code change for DML isolation

Mikita Lipski (1):
  drm/amd/display: Enable DSC over eDP

Nicholas Kazlauskas (1):
  drm/amd/display: Only flush delta from last command execution

Sung Joon Kim (1):
  drm/amd/display: Revert "retain/release stream pointer in link enc
table"

Wenjing Liu (1):
  drm/amd/display: set MSA vsp/hsp to 0 for positive polarity for DP
128b/132b

Xu, Jinze (1):
  drm/amd/display: Reset fifo after enable otg

Yi-Ling Chen (1):
  drm/amd/display: fixed the DSC power off sequence during Driver PnP

hvanzyll (1):
  drm/amd/display: Visual Confirm Bar Height Adjust

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   73 +-
 .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |2 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |5 +-
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  |  167 +-
 .../drm/amd/display/dc/core/dc_link_enc_cfg.c |2 -
 drivers/gpu/drm/amd/display/dc/dc.h   |7 +-
 drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c |1 +
 .../display/dc/dce110/dce110_hw_sequencer.c   |7 +-
 .../drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c |   14 +-
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |   37 +
 .../display/dc/dcn10/dcn10_stream_encoder.c   |   15 +
 .../display/dc/dcn10/dcn10_stream_encoder.h   |3 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_dsc.c  |2 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_optc.c |   14 +
 .../gpu/drm/amd/display/dc/dcn20/dcn20_optc.h |3 +
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |2 +-
 .../display/dc/dcn20/dcn20_stream_encoder.c   |2 +
 .../dc/dcn30/dcn30_dio_stream_encoder.c   |2 +
 .../gpu/drm/amd/display/dc/dcn30/dcn30_optc.c |1 +
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |2 +-
 .../amd/display/dc/dcn302/dcn302_resource.c   |2 +-
 .../amd/display/dc/dcn303/dcn303_resource.c   |2 +-
 .../dc/dcn31/dcn31_hpo_dp_stream_encoder.c|4 +-
 .../drm/amd/display/dc/dcn31/dcn31_hwseq.c|5 -
 .../gpu/drm/amd/display/dc/dcn31/dcn31_optc.c |1 +
 .../drm/amd/display/dc/dcn31/dcn31_resource.c |1 +
 .../drm/amd/display/dc/dml/display_mode_lib.h |1 +
 .../gpu/drm/amd/display/dc/dml/dml_wrapper.c  | 1889 +
 .../display/dc/dml/dml_wrapper_translation.c  |  284 +++
 drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c   |8 +
 .../gpu/drm/amd/display/dc/inc/dml_wrapper.h  |   34 +
 drivers/gpu/drm/amd/display/dc/inc/hw/dsc.h   |3 +
 .../amd/display/dc/inc/hw/stream_encoder.h|4 +
 .../amd/display/dc/inc/hw/timing_generator.h  |2 +
 drivers/gpu/drm/amd/display/dmub/dmub_srv.h   |1 +
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |   18 +-
 .../gpu/drm/amd/display/dmub/src/dmub_srv.c   |9 +-
 .../amd/display/include/ddc_service_types.h   |1 +
 38 files changed, 2599 insertions(+), 31 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dml_wrapper_translation.c
 create mode 100644 drivers/gpu/drm/amd/display/dc/inc/dml_wrapper.h

-- 
2.25.1



[PATCH v10 06/10] drm_print: add choice to use dynamic debug in drm-debug

2021-11-11 Thread Jim Cromie
drm's debug system writes 10 distinct categories of messages to syslog
using a small API[1]: drm_dbg*(10 names), DRM_DEV_DEBUG*(3 names),
DRM_DEBUG*(8 names).  There are thousands of these callsites, each
categorized in this systematized way.

These callsites can be enabled at runtime by their category, each
controlled by a bit in drm.debug (/sys/modules/drm/parameter/debug).
In the current "basic" implementation, drm_debug_enabled() tests these
bits in __drm_debug each time an API[1] call is executed; while cheap
individually, the costs accumulate with uptime.

This patch uses dynamic-debug with (required) jump-label to patch
enabled callsites onto their respective NOOP slots, avoiding all
runtime bit-checks of __drm_debug by drm_debug_enabled().

Dynamic debug has no concept of category, but we can emulate one by
replacing enum categories with a set of prefix-strings; "drm:core:",
"drm:kms:" "drm:driver:" etc, and prepend them (at compile time) to
the given formats.

Then we can use:

   # echo module drm format "^drm:core: " +p > control`

to enable the whole category with one query.

This conversion yields many new prdbg callsites:

  dyndbg: 207 debug prints in module drm_kms_helper
  dyndbg: 376 debug prints in module drm
  dyndbg: 1811 debug prints in module i915
  dyndbg: 3917 debug prints in module amdgpu

Each site costs 56 bytes of .data, which is a big increase for
drm modules, so CONFIG_DRM_USE_DYNAMIC_DEBUG makes it optional.

CONFIG_JUMP_LABEL is also required, to get the promised optimizations.

The "basic" -> "dyndbg" switchover is layered into the macro scheme

A. A "prefix" version of DRM_UT_ map, named DRM_DBG_CAT_

"basic":  DRM_DBG_CAT_  <===  DRM_UT_.  Identity map.
"dyndbg":
   #define DRM_DBG_CAT_KMS"drm:kms: "
   #define DRM_DBG_CAT_PRIME  "drm:prime: "
   #define DRM_DBG_CAT_ATOMIC "drm:atomic: "

DRM_UT_* are preserved, since theyre used elsewhere.  Since the
callback maintains its state in __drm_debug, drm_debug_enabled() will
stay synchronized, and continue to work.  We can address them
separately if they are called enough to be worth fixing.

B. drm_dev_dbg() & drm_debug() are interposed with macros

basic:forward to renamed fn, with args preserved
enabled:  redirect to pr_debug, dev_dbg, with CATEGORY format catenated

This is where drm_debug_enabled() is avoided.  The prefix is prepended
at compile-time, no category at runtime.

C. API[1] uses DRM_DBG_CAT_s

The API already uses B, now it uses A too, instead of DRM_UT_, to
get the correct token type for "basic" and "dyndbg" configs.

D. use DEFINE_DYNAMIC_DEBUG_CATEGORIES()

This defines the map using DRM_CAT_s, and creates the /sysfs
bitmap to control those categories.

CONFIG_DRM_USE_DYNAMIC_DEBUG is also used to adjust amdgpu, i915
makefiles to add -DDYNAMIC_DEBUG_MODULE; it includes the current
CONFIG_DYNAMIC_DEBUG_CORE and is enabled by the user.

LIMITATIONS:

dev_dbg(etal) effectively prepends twice, category then driver-name,
yielding format strings like so:

bash-5.1# grep amdgpu: /proc/dynamic_debug/control | grep drm: | cut -d= -f2-
_ "amdgpu: drm:core: fence driver on ring %s use gpu addr 0x%016llx\012"
_ "amdgpu: drm:kms: Cannot create framebuffer from imported dma_buf\012"

This means we cannot use anchored "^drm:kms: " to specify the
category, a small loss of precision.

Note that searching on "format ^amdgpu: " works, but this is less
valuable, because the same can be done with "module amdgpu".

NOTES:

Because the dyndbg callback is keeping state in __drm_debug, it
synchronizes with drm_debug_enabled() and its remaining users; the
switchover should be transparent.

Code Review is expected to catch the lack of correspondence between
bit=>prefix definitions (the selector) and the prefixes used in the
API[1] layer above pr_debug()

I've coded the categories with trailing spaces.  This excludes any
sub-categories which might get added later.  This convention protects
any "drm:atomic:fail:" callsites from getting stomped on by `echo 0 >
debug`.  Other categories could differ, but we need some default.

Dyndbg requires that the prefix be in the compiled-in format string;
run-time prefixing evades callsite selection by category.

pr_debug("%s: ...", __func__, ...) // not ideal

Unfortunately __func__ is not a macro, and cannot be catenated at
preprocess/compile time.

If you want that, you might consider +mfl flags instead;)

Signed-off-by: Jim Cromie 
---
v5:
. use DEFINE_DYNAMIC_DEBUG_CATEGORIES in drm_print.c
. s/DRM_DBG_CLASS_/DRM_DBG_CAT_/ - dont need another term
. default=y in Kconfig entry - per @DanVet
. move some commit-log prose to dyndbg commit
. add-prototyes to (param_get/set)_dyndbg
. more wrinkles found by 
. relocate ratelimit chunk from elsewhere
v6:
. add kernel doc
. fix cpp paste, drop '#'
v7:
. change __drm_debug to long, to fit with DEFINE_DYNAMIC_DEBUG_CATEGORIES
. add -DDYNAMIC_DEBUG_MODULE to ccflags if DRM_USE_DYNAMIC_DEBUG
v8:
. adapt to altered ^ insertion
. add mem 

[PATCH v10 08/10] dyndbg: add print-to-tracefs, selftest with it - RFC

2021-11-11 Thread Jim Cromie
Sean Paul proposed, in:
https://patchwork.freedesktop.org/series/78133/
drm/trace: Mirror DRM debug logs to tracefs

His patchset's objective is to be able to independently steer some of
the drm.debug stream to an alternate tracing destination, by splitting
drm_debug_enabled() into syslog & trace flavors, and enabling them
separately.  2 advantages were identified:

1- syslog is heavyweight, tracefs is much lighter
2- separate selection of enabled categories means less traffic

Dynamic-Debug can do 2nd exceedingly well:

A- all work is behind jump-label's NOOP, zero off cost.
B- exact site selectivity, precisely the useful traffic.
   can tailor enabled set interactively, at shell.

Since the tracefs interface is effective for drm (the threads suggest
so), adding that interface to dynamic-debug has real potential for
everyone including drm.

if CONFIG_TRACING:

Grab Sean's trace_init/cleanup code, use it to provide tracefs
available by default to all pr_debugs.  This will likely need some
further per-module treatment; perhaps something reflecting hierarchy
of module,file,function,line, maybe with a tuned flattening.

endif CONFIG_TRACING

Add a new +T flag to enable tracing, independent of +p, and add and
use 3 macros: dyndbg_site_is_enabled/logging/tracing(), to encapsulate
the flag checks.  Existing code treats T like other flags.

Add ddebug_validate_flags() as last step in ddebug_parse_flags().  Its
only job is to fail on +T for non-CONFIG_TRACING builds.  It only sees
the new flags, and cannot validate specific state transitions.  This
is fine, since we have no need for that; such a test would have to be
done in ddebug_change(), which actually updates the callsites.

ddebug_change() adjusts the static-key-enable/disable condition to use
_DPRINTK_ENABLED / abstraction macros.

dynamic_emit_prefix() now gates on _DPRINTK_ENABLED too, as an
optimization but mostly to allow decluttering of its users.

__dynamic_pr_debug() etal get minor changes:

 - call dynamic_emit_prefix() 1st, _enabled() optimizes.
 - if (T) call trace_array_printk
 - if (!p) go around original printk code.
   done to minimize diff,
   goto-ectomy + reindent later/separately
 - share vaf across p|T

WRT _dev, I skipped all the  specific dev_emit_prefix
additions for now.  tracefs is a fast customer with different needs,
its not clear that pretty device-ID-ish strings is useful tracefs
content (on ingest), or that couldn't be done more efficiently while
analysing or postprocesing the tracefs buffer.

SELFTEST: test_dynamic_debug.ko:

Uses the tracer facility to implement a kernel module selftest.

TODO:

Earlier core code had (tracerfn)() indirection, allowing a plugin
side-effector we could test the results of.

ATM all the tests which count +T'd callsite executions (and which
expect >0) are failing.

Now it needs a rethink to test from userspace, rather than the current
test-once at module-load.  It needs a parameters/testme button.

So remainder of this is a bit stale 

- A custom tracer counts the number of calls (of T-enabled pr_debugs),
- do_debugging(x) calls a set of categorized pr_debugs x times

- test registers the tracer on the module
  then iteratively:
  manipulates dyndbg states via query-cmds, mostly format ^prefix
  runs do_debugging()
  counts enabled callsite executions
  reports mismatches

- modprobe test_dynamic_debug use_bad_tracer=1
  attaches a bad/recursive tracer
  Bad Things (did) Happen.
  has thrown me interesting panics.
  cannot replicate atm.

RFC: (DONE)

The "tracer" interface probably needs work and a new name.  It is only
1/2 way towards a real tracefs interface; and the code I lifted from
Sean Paul in the next patch could be implemented in dynamic_debug.c
instead, and made available for all pr_debug users.

This would also eliminate need for dynamic_debug_(un)register_tracer(),
since dyndbg could just provide it when TRACING is on.

NOTES:

$> modprobe test_dynamic_debug dyndbg=+p

   it fails 3/29 tests. havent looked at why.

$> modprobe test_dynamic_debug use_bad_tracer=1

Earlier in dev, bad_tracer() exploded in recursion, I havent been able
to replicate that lately.

Signed-off-by: Jim Cromie 
---
 .../admin-guide/dynamic-debug-howto.rst   |   7 +-
 MAINTAINERS   |   1 +
 include/linux/dynamic_debug.h |  12 +-
 lib/Kconfig.debug |  11 +
 lib/Makefile  |   1 +
 lib/dynamic_debug.c   | 127 --
 lib/test_dynamic_debug.c  | 222 ++
 7 files changed, 355 insertions(+), 26 deletions(-)
 create mode 100644 lib/test_dynamic_debug.c

diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst 
b/Documentation/admin-guide/dynamic-debug-howto.rst
index a89cfa083155..bf2a561cc9bc 100644
--- a/Documentation/admin-guide/dynamic-debug-howto.rst
+++ b/Documentation/admin-guide/dynamic-debug-howto.rst
@@ -227,7 +227,8 @@ of the 

[PATCH v10 10/10] drm: use DEFINE_DYNAMIC_DEBUG_TRACE_GROUPS in 3 places

2021-11-11 Thread Jim Cromie
add sysfs knobs to enable modules' pr_debug()s ---> tracefs

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/amd/display/dc/core/dc_debug.c |  8 
 drivers/gpu/drm/drm_print.c| 13 ++---
 drivers/gpu/drm/i915/intel_gvt.c   | 15 ---
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index e49a755c6a69..58c56c1708e7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -80,6 +80,14 @@ DEFINE_DYNAMIC_DEBUG_LOG_GROUPS(debug_dc, __debug_dc,
DC_DYNDBG_BITMAP_DESC(debug_dc),
amdgpu_bitmap);
 
+#if defined(CONFIG_TRACING)
+
+unsigned long __trace_dc;
+EXPORT_SYMBOL(__trace_dc);
+DEFINE_DYNAMIC_DEBUG_LOG_GROUPS(trace_dc, __trace_dc,
+   DC_DYNDBG_BITMAP_DESC(trace_dc),
+   amdgpu_bitmap);
+#endif
 #endif
 
 #define DC_LOGGER_INIT(logger)
diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index d5e0ffad467b..ee20e9c14ce9 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -72,9 +72,16 @@ static struct dyndbg_bitdesc drm_dyndbg_bitmap[] = {
[8] = { DRM_DBG_CAT_DP },
[9] = { DRM_DBG_CAT_DRMRES }
 };
-DEFINE_DYNAMIC_DEBUG_BITGRPS(debug, __drm_debug, DRM_DEBUG_DESC,
-drm_dyndbg_bitmap);
-
+DEFINE_DYNAMIC_DEBUG_LOG_GROUPS(debug, __drm_debug, DRM_DEBUG_DESC,
+   drm_dyndbg_bitmap);
+
+#ifdef CONFIG_TRACING
+struct trace_array *trace_arr;
+unsigned long __drm_trace;
+EXPORT_SYMBOL(__drm_trace);
+DEFINE_DYNAMIC_DEBUG_TRACE_GROUPS(trace, __drm_trace, DRM_DEBUG_DESC,
+ drm_dyndbg_bitmap);
+#endif
 #endif
 
 void __drm_puts_coredump(struct drm_printer *p, const char *str)
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index efaac5777873..84348d4aedf6 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -195,8 +195,17 @@ static struct dyndbg_bitdesc i915_dyndbg_bitmap[] = {
help_(7, "gvt:render:") \
help_(8, "gvt:sched:")
 
-DEFINE_DYNAMIC_DEBUG_BITGRPS(debug_gvt, __gvt_debug,
-I915_GVT_CATEGORIES(debug_gvt),
-i915_dyndbg_bitmap);
+DEFINE_DYNAMIC_DEBUG_LOG_GROUPS(debug_gvt, __gvt_debug,
+   I915_GVT_CATEGORIES(debug_gvt),
+   i915_dyndbg_bitmap);
 
+#if defined(CONFIG_TRACING)
+
+unsigned long __gvt_trace;
+EXPORT_SYMBOL(__gvt_trace);
+DEFINE_DYNAMIC_DEBUG_TRACE_GROUPS(trace_gvt, __gvt_trace,
+ I915_GVT_CATEGORIES(trace_gvt),
+ i915_dyndbg_bitmap);
+
+#endif
 #endif
-- 
2.31.1



[PATCH v10 09/10] dyndbg: create DEFINE_DYNAMIC_DEBUG_LOG|TRACE_GROUPS

2021-11-11 Thread Jim Cromie
With the recent addition of pr_debug to tracefs via +T flag, we now
want to add drm.trace; like its model: drm.debug, it maps bits to
pr_debug categories, but this one enables/disables writing to tracefs
(iff CONFIG_TRACING).

Do this by:

1. add flags to dyndbg_bitmap_param, holds "p" or "T" to work for either.

   add DEFINE_DYNAMIC_DEBUG_BITGRPS_FLAGS to init .flags
   DEFINE_DYNAMIC_DEBUG_BITGRPS gets "p" for compat.
   use it from...

2. DEFINE_DYNAMIC_DEBUG_LOG_GROUPS as (1) with "p" flags - print to syslog
   DEFINE_DYNAMIC_DEBUG_TRACE_GROUPS as (1) with "T" flags - trace to tracefs
   add kdoc to these

NOTES

The flags args (1) is a string, not just a 'p' or 'T'.  This allows
use of decorator flags ("mflt") too, in case the module author wants
to insure those decorations are in the trace & log.

The LOG|TRACE (2) macros don't use any decorator flags, (and therefore
don't toggle them), allowing users to control those themselves.

Decorator flags are shared for both LOG and TRACE consumers,
coordination between users is expected.  ATM, theres no declarative
way to preset decorator flags, but DEFINE_DYNAMIC_DEBUG_BITGRPS_FLAGS
can be used to explicitly toggle them.

Signed-off-by: Jim Cromie 
---
 include/linux/dynamic_debug.h | 44 ++-
 lib/dynamic_debug.c   |  4 ++--
 2 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 792bcff0297e..918ac1a92358 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -255,30 +255,52 @@ struct dyndbg_bitdesc {
 
 struct dyndbg_bitmap_param {
unsigned long *bits;/* ref to shared state */
+   const char *flags;
unsigned int maplen;
struct dyndbg_bitdesc *map; /* indexed by bitpos */
 };
 
 #if defined(CONFIG_DYNAMIC_DEBUG) || \
(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+
+#define DEFINE_DYNAMIC_DEBUG_BITGRPS_FLAGS(fsname, _var, _flags, desc, data) \
+   MODULE_PARM_DESC(fsname, desc); \
+   static struct dyndbg_bitmap_param ddcats_##_var =   \
+   { .bits = &(_var), .flags = (_flags),   \
+ .map = data, .maplen = ARRAY_SIZE(data) };\
+   module_param_cb(fsname, _ops_dyndbg, _##_var, 0644)
+
+#define DEFINE_DYNAMIC_DEBUG_BITGRPS(fsname, _var, desc, data) \
+   DEFINE_DYNAMIC_DEBUG_BITGRPS_FLAGS(fsname, _var, "p", desc, data)
+
 /**
- * DEFINE_DYNAMIC_DEBUG_BITGRPS() - bitmap control of pr_debugs, by format 
match
+ * DEFINE_DYNAMIC_DEBUG_LOG_GROUPS() - bitmap control of grouped pr_debugs --> 
syslog
+ *
  * @fsname: parameter basename under /sys
  * @_var:   C-identifier holding bitmap
  * @desc:   string summarizing the controls provided
  * @bitmap: C array of struct dyndbg_bitdescs
  *
- * Intended for modules with a systematic use of pr_debug prefixes in
- * the format strings, this allows modules calling pr_debugs to
- * control them in groups by matching against their formats, and map
- * them to bits 0-N of a sysfs control point.
+ * Intended for modules having pr_debugs with prefixed/categorized
+ * formats; this lets you group them by substring match, map groups to
+ * bits, and enable per group to write to syslog, via @fsname.
  */
-#define DEFINE_DYNAMIC_DEBUG_BITGRPS(fsname, _var, desc, data) \
-   MODULE_PARM_DESC(fsname, desc); \
-   static struct dyndbg_bitmap_param ddcats_##_var =   \
-   { .bits = &(_var), .map = data, \
- .maplen = ARRAY_SIZE(data) }; \
-   module_param_cb(fsname, _ops_dyndbg, _##_var, 0644)
+#define DEFINE_DYNAMIC_DEBUG_LOG_GROUPS(fsname, _var, desc, data)  \
+   DEFINE_DYNAMIC_DEBUG_BITGRPS_FLAGS(fsname, _var, "p", desc, data)
+
+/**
+ * DEFINE_DYNAMIC_DEBUG_TRACE_GROUPS() - bitmap control of pr_debugs --> 
tracefs
+ * @fsname: parameter basename under /sys
+ * @_var:   C-identifier holding bitmap
+ * @desc:   string summarizing the controls provided
+ * @bitmap: C array of struct dyndbg_bitdescs
+ *
+ * Intended for modules having pr_debugs with prefixed/categorized
+ * formats; this lets you group them by substring match, map groups to
+ * bits, and enable per group to write to tracebuf, via @fsname.
+ */
+#define DEFINE_DYNAMIC_DEBUG_TRACE_GROUPS(fsname, _var, desc, data)\
+   DEFINE_DYNAMIC_DEBUG_BITGRPS_FLAGS(fsname, _var, "T", desc, data)
 
 extern const struct kernel_param_ops param_ops_dyndbg;
 
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index d493ed6658b9..f5ba07668020 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -634,8 +634,8 @@ int param_set_dyndbg(const char *instr, const struct 
kernel_param *kp)
for (i = 0; i < p->maplen && i < BITS_PER_LONG; map++, i++) {
if (test_bit(i, ) == test_bit(i, 

[PATCH v10 07/10] drm_print: instrument drm_debug_enabled

2021-11-11 Thread Jim Cromie
Duplicate drm_debug_enabled() code into both "basic" and "dyndbg"
ifdef branches.  Then add a pr_debug("todo: ...") into the "dyndbg"
branch.

Then convert the "dyndbg" branch's code to a macro, so that the
pr_debug() get its callsite info from the invoking function, instead
of from drm_debug_enabled() itself.

This gives us unique callsite info for the 8 remaining users of
drm_debug_enabled(), and lets us enable them individually to see how
much logging traffic they generate.  The oft-visited callsites can
then be reviewed for runtime cost and possible optimizations.

Heres what we get:

bash-5.1# modprobe drm
dyndbg: 384 debug prints in module drm
bash-5.1# grep todo: /proc/dynamic_debug/control
drivers/gpu/drm/drm_edid.c:1843 [drm]connector_bad_edid =_ "todo: maybe avoid 
via dyndbg\012"
drivers/gpu/drm/drm_print.c:309 [drm]___drm_dbg =p "todo: maybe avoid via 
dyndbg\012"
drivers/gpu/drm/drm_print.c:286 [drm]__drm_dev_dbg =p "todo: maybe avoid via 
dyndbg\012"
drivers/gpu/drm/drm_vblank.c:1491 [drm]drm_vblank_restore =_ "todo: maybe avoid 
via dyndbg\012"
drivers/gpu/drm/drm_vblank.c:787 
[drm]drm_crtc_vblank_helper_get_vblank_timestamp_internal =_ "todo: maybe avoid 
via dyndbg\012"
drivers/gpu/drm/drm_vblank.c:410 [drm]drm_crtc_accurate_vblank_count =_ "todo: 
maybe avoid via dyndbg\012"
drivers/gpu/drm/drm_atomic_uapi.c:1457 [drm]drm_mode_atomic_ioctl =_ "todo: 
maybe avoid via dyndbg\012"
drivers/gpu/drm/drm_edid_load.c:178 [drm]edid_load =_ "todo: maybe avoid via 
dyndbg\012"

At quick glance, edid won't qualify, drm_print might, drm_vblank is
strongest chance, maybe atomic-ioctl too.

Signed-off-by: Jim Cromie 
---
 include/drm/drm_print.h | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 392cff7cb95c..a902bd4d8c55 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -381,6 +381,11 @@ enum drm_debug_category {
 #define DRM_DBG_CAT_DP DRM_UT_DP
 #define DRM_DBG_CAT_DRMRES DRM_UT_DRMRES
 
+static inline bool drm_debug_enabled(enum drm_debug_category category)
+{
+   return unlikely(__drm_debug & category);
+}
+
 #else /* CONFIG_DRM_USE_DYNAMIC_DEBUG */
 
 /* join prefix + format in cpp so dyndbg can see it */
@@ -414,12 +419,13 @@ enum drm_debug_category {
 #define DRM_DBG_CAT_DP "drm:dp: "
 #define DRM_DBG_CAT_DRMRES "drm:res: "
 
-#endif /* CONFIG_DRM_USE_DYNAMIC_DEBUG */
+#define drm_debug_enabled(category)\
+   ({  \
+   pr_debug("todo: maybe avoid via dyndbg\n"); \
+   unlikely(__drm_debug & (category)); \
+   })
 
-static inline bool drm_debug_enabled(enum drm_debug_category category)
-{
-   return unlikely(__drm_debug & category);
-}
+#endif /* CONFIG_DRM_USE_DYNAMIC_DEBUG */
 
 /*
  * struct device based logging
@@ -582,7 +588,6 @@ void __drm_dev_dbg(const struct device *dev, enum 
drm_debug_category category,
 #define drm_dbg_drmres(drm, fmt, ...)  \
drm_dev_dbg((drm) ? (drm)->dev : NULL, DRM_DBG_CAT_DRMRES, fmt, 
##__VA_ARGS__)
 
-
 /*
  * printk based logging
  *
-- 
2.31.1



[PATCH v10 05/10] i915/gvt: use dyndbg.BITGRPS for existing pr_debugs

2021-11-11 Thread Jim Cromie
The gvt component of this driver has ~120 pr_debugs with formats using
one of 9 fixed string prefixes, which are quite similar to those
enumerated in DRM debug categories.  Following the interface model of
drm.debug, add a parameter to map bits to these format prefixes.

static struct dyndbg_bitdesc i915_bitmap[] = {
[0] = { "gvt:cmd:" },
[1] = { "gvt:core:" },
[2] = { "gvt:dpy:" },
[3] = { "gvt:el:" },
[4] = { "gvt:irq:" },
[5] = { "gvt:mm:" },
[6] = { "gvt:mmio:" },
[7] = { "gvt:render:" },
[8] = { "gvt:sched:" }
};
DEFINE_DYNAMIC_DEBUG_BITGRPS(debug_gvt, __gvt_debug,
"dyndbg bitmap desc",

If CONFIG_DYNAMIC_DEBUG_CORE=y, then gvt/Makefile adds
-DDYNAMIC_DEBUG_MODULE to cflags, which CONFIG_DYNAMIC_DEBUG=n
(CORE-only) builds need.  This is redone more comprehensively soon.

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/i915/Makefile|  2 ++
 drivers/gpu/drm/i915/intel_gvt.c | 38 
 2 files changed, 40 insertions(+)

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 660bb03de6fc..0fa5f53312a8 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -317,6 +317,8 @@ i915-y += intel_gvt.o
 include $(src)/gvt/Makefile
 endif
 
+ccflags-$(CONFIG_DYNAMIC_DEBUG_CORE) += -DDYNAMIC_DEBUG_MODULE
+
 obj-$(CONFIG_DRM_I915) += i915.o
 obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o
 
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index 4e70c1a9ef2e..efaac5777873 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -162,3 +162,41 @@ void intel_gvt_resume(struct drm_i915_private *dev_priv)
if (intel_gvt_active(dev_priv))
intel_gvt_pm_resume(dev_priv->gvt);
 }
+
+#if defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
+
+unsigned long __gvt_debug;
+EXPORT_SYMBOL(__gvt_debug);
+
+static struct dyndbg_bitdesc i915_dyndbg_bitmap[] = {
+   [0] = { "gvt:cmd:" },
+   [1] = { "gvt:core:" },
+   [2] = { "gvt:dpy:" },
+   [3] = { "gvt:el:" },
+   [4] = { "gvt:irq:" },
+   [5] = { "gvt:mm:" },
+   [6] = { "gvt:mmio:" },
+   [7] = { "gvt:render:" },
+   [8] = { "gvt:sched:" }
+};
+
+#define help_(_N, _cat)"\t  Bit-" #_N ":\t" _cat "\n"
+
+#define I915_GVT_CATEGORIES(name) \
+   " Enable debug output via /sys/module/i915/parameters/" #name   \
+   ", where each bit enables a debug category.\n"  \
+   help_(0, "gvt:cmd:")\
+   help_(1, "gvt:core:")   \
+   help_(2, "gvt:dpy:")\
+   help_(3, "gvt:el:") \
+   help_(4, "gvt:irq:")\
+   help_(5, "gvt:mm:") \
+   help_(6, "gvt:mmio:")   \
+   help_(7, "gvt:render:") \
+   help_(8, "gvt:sched:")
+
+DEFINE_DYNAMIC_DEBUG_BITGRPS(debug_gvt, __gvt_debug,
+I915_GVT_CATEGORIES(debug_gvt),
+i915_dyndbg_bitmap);
+
+#endif
-- 
2.31.1



[PATCH v10 04/10] i915/gvt: trim spaces from pr_debug "gvt: core:" prefixes

2021-11-11 Thread Jim Cromie
Taking embedded spaces out of existing prefixes makes them more easily
searchable; simplifying the extra quoting needed otherwise:

  $> echo format "^gvt: core:" +p >control

Dropping the internal spaces means any trailing space in a query will
more clearly terminate the prefix being searched for.

Consider a generic drm-debug example:

  # turn off ATOMIC reports
  echo format "^drm:atomic: " -p > control

  # turn off all ATOMIC:* reports, including any sub-categories
  echo format "^drm:atomic:" -p > control

  # turn on ATOMIC:FAIL: reports
  echo format "^drm:atomic:fail: " +p > control

Removing embedded spaces in the format prefixes simplifies the
corresponding match string.  This means that "quoted" match-prefixes
are only needed when the trailing space is desired, in order to
exclude explicitly sub-categorized pr-debugs; in this example,
"drm:atomic:fail:".

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/i915/gvt/debug.h | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h
index c6027125c1ec..bbecc279e077 100644
--- a/drivers/gpu/drm/i915/gvt/debug.h
+++ b/drivers/gpu/drm/i915/gvt/debug.h
@@ -36,30 +36,30 @@ do {
\
 } while (0)
 
 #define gvt_dbg_core(fmt, args...) \
-   pr_debug("gvt: core: "fmt, ##args)
+   pr_debug("gvt:core: " fmt, ##args)
 
 #define gvt_dbg_irq(fmt, args...) \
-   pr_debug("gvt: irq: "fmt, ##args)
+   pr_debug("gvt:irq: " fmt, ##args)
 
 #define gvt_dbg_mm(fmt, args...) \
-   pr_debug("gvt: mm: "fmt, ##args)
+   pr_debug("gvt:mm: " fmt, ##args)
 
 #define gvt_dbg_mmio(fmt, args...) \
-   pr_debug("gvt: mmio: "fmt, ##args)
+   pr_debug("gvt:mmio: " fmt, ##args)
 
 #define gvt_dbg_dpy(fmt, args...) \
-   pr_debug("gvt: dpy: "fmt, ##args)
+   pr_debug("gvt:dpy: " fmt, ##args)
 
 #define gvt_dbg_el(fmt, args...) \
-   pr_debug("gvt: el: "fmt, ##args)
+   pr_debug("gvt:el: " fmt, ##args)
 
 #define gvt_dbg_sched(fmt, args...) \
-   pr_debug("gvt: sched: "fmt, ##args)
+   pr_debug("gvt:sched: " fmt, ##args)
 
 #define gvt_dbg_render(fmt, args...) \
-   pr_debug("gvt: render: "fmt, ##args)
+   pr_debug("gvt:render: " fmt, ##args)
 
 #define gvt_dbg_cmd(fmt, args...) \
-   pr_debug("gvt: cmd: "fmt, ##args)
+   pr_debug("gvt:cmd: " fmt, ##args)
 
 #endif
-- 
2.31.1



[PATCH v10 03/10] amdgpu: use dyndbg.BITGRPS to control existing pr_debugs

2021-11-11 Thread Jim Cromie
logger_types.h defines many DC_LOG_*() categorized debug wrappers.
Most of these already use DRM debug API, so are controllable using
drm.debug, but others use a bare pr_debug("$prefix: .."), with 1 of 13
different class-prefixes matching [:uppercase:]

Use DEFINE_DYNAMIC_DEBUG_BITGRPS to create a sysfs location which maps
from bits to these 13 sets of categorized pr_debugs to en/disable.

Makefile adds -DDYNAMIC_DEBUG_MODULE for CONFIG_DYNAMIC_DEBUG_CORE,
otherwise BUILD_BUG_ON triggers (obvious errors on subtle misuse is
better than mysterious ones).

Signed-off-by: Jim Cromie 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |  2 +
 .../gpu/drm/amd/display/dc/core/dc_debug.c| 47 ++-
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index 653726588956..077342ca803f 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -38,6 +38,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd
 
+ccflags-$(CONFIG_DYNAMIC_DEBUG_CORE) += -DYNAMIC_DEBUG_MODULE
+
 amdgpu-y := amdgpu_drv.o
 
 # add KMS driver
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index 21be2a684393..e49a755c6a69 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -36,8 +36,53 @@
 
 #include "resource.h"
 
-#define DC_LOGGER_INIT(logger)
+#if defined(CONFIG_DRM_USE_DYNAMIC_DEBUG)
+#include 
+
+unsigned long __debug_dc;
+EXPORT_SYMBOL(__debug_dc);
+
+#define help_(_N, _cat)"\t  Bit-" #_N "\t" _cat "\n"
+
+#define DC_DYNDBG_BITMAP_DESC(name)\
+   "Control pr_debugs via /sys/module/amdgpu/parameters/" #name\
+   ", where each bit controls a debug category.\n" \
+   help_(0, "[SURFACE]:")  \
+   help_(1, "[CURSOR]:")   \
+   help_(2, "[PFLIP]:")\
+   help_(3, "[VBLANK]:")   \
+   help_(4, "[HW_LINK_TRAINING]:") \
+   help_(5, "[HW_AUDIO]:") \
+   help_(6, "[SCALER]:")   \
+   help_(7, "[BIOS]:") \
+   help_(8, "[BANDWIDTH_CALCS]:")  \
+   help_(9, "[DML]:")  \
+   help_(10, "[IF_TRACE]:")\
+   help_(11, "[GAMMA]:")   \
+   help_(12, "[SMU_MSG]:")
+
+static struct dyndbg_bitdesc amdgpu_bitmap[] = {
+   [0] = { "[CURSOR]:" },
+   [1] = { "[PFLIP]:" },
+   [2] = { "[VBLANK]:" },
+   [3] = { "[HW_LINK_TRAINING]:" },
+   [4] = { "[HW_AUDIO]:" },
+   [5] = { "[SCALER]:" },
+   [6] = { "[BIOS]:" },
+   [7] = { "[BANDWIDTH_CALCS]:" },
+   [8] = { "[DML]:" },
+   [9] = { "[IF_TRACE]:" },
+   [10] = { "[GAMMA]:" },
+   [11] = { "[SMU_MSG]:" }
+};
+
+DEFINE_DYNAMIC_DEBUG_LOG_GROUPS(debug_dc, __debug_dc,
+   DC_DYNDBG_BITMAP_DESC(debug_dc),
+   amdgpu_bitmap);
+
+#endif
 
+#define DC_LOGGER_INIT(logger)
 
 #define SURFACE_TRACE(...) do {\
if (dc->debug.surface_trace) \
-- 
2.31.1



[PATCH v10 02/10] drm: fix doc grammar

2021-11-11 Thread Jim Cromie
allocates and initializes ...

Signed-off-by: Jim Cromie 
---
 include/drm/drm_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 0cd95953cdf5..4b29261c4537 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -486,7 +486,7 @@ void *__devm_drm_dev_alloc(struct device *parent,
  * @type: the type of the struct which contains struct _device
  * @member: the name of the _device within @type.
  *
- * This allocates and initialize a new DRM device. No device registration is 
done.
+ * This allocates and initializes a new DRM device. No device registration is 
done.
  * Call drm_dev_register() to advertice the device to user space and register 
it
  * with other core subsystems. This should be done last in the device
  * initialization sequence to make sure userspace can't access an inconsistent
-- 
2.31.1



[PATCH v10 01/10] dyndbg: add DEFINE_DYNAMIC_DEBUG_BITGRPS macro and callbacks

2021-11-11 Thread Jim Cromie
DEFINE_DYNAMIC_DEBUG_BITGRPS(fsname, var, bitmap_desc, bitmap)
allows users to create a drm.debug style (bitmap) sysfs interface,
mapping each bit to a group of pr_debugs, matching on their formats.

This works well when the formats systematically include a prefix
string such as ERR|WARN|INFO, etc.

Such groups can (already) be manipulated like so:

echo "format $prefix +p" >control

This macro merely makes it easier to operate them as groups

/* standard usage */
static struct dyndbg_bitdesc my_bitmap[] = {
[0] = { "gvt:cmd:" },
[1] = { "gvt:core:" },
[2] = { "gvt:dpy:" },
[3] = { "gvt:el:" },
[4] = { "gvt:irq:" },
[5] = { "gvt:mm:" },
[6] = { "gvt:mmio:" },
[7] = { "gvt:render:" },
[8] = { "gvt:sched:" }
};
DEFINE_DYNAMIC_DEBUG_BITGRPS(debug_gvt, __gvt_debug,
 "i915/gvt bitmap desc", my_bitmap);

In addition to the macro, patch adds:

 - int param_set_dyndbg()
 - int param_get_dyndbg()
 - struct kernel_param_ops param_ops_dyndbg

Following the model of kernel/params.c STANDARD_PARAM_DEFS, these are
non-static and exported.

get/set use an augmented kernel_param; the arg refs a new struct
dyndbg_bitmap_param containing:

A- the map of "categories", an array of struct dyndbg_bitdescs,
   indexed by bitpos, defining the match against pr_debug formats.

B- a pointer to the user module's ulong holding the bits/state.
   By sharing state, we coordinate with code that still uses it
   directly.  This allows drm-debug api to be converted incrementally,
   while still using __drm_debug & drm_debug_enabled() in other parts.

param_set_dyndbg() compares new vs old bits, and only updates prdbgs
on changes.  This maximally preserves the underlying state, which may
have been customized via later `echo $cmd >control`.  So if a user
really wants to know that all prdbgs are set precisely, they must
pre-clear then set.

dynamic_debug.h:

Add DEFINE_DYNAMIC_DEBUG_BITGRPS() described above, and a stub
throwing a BUILD_BUG (RFC) when used without DYNAMIC_DEBUG support.

Add structs dyndbg_bitdesc, dyndbg_bitmap_param to support the main
macro, and several helper macros wrapping the given categories with
^prefix and ' ' suffix.  This way the callback can be more broadly
used, by using the right helper macro.

Also externs the struct kernel_param param_ops_dyndbg symbol, as is
done in moduleparams.h for all the STANDARD params.

USAGE NOTES:

Using dyndbg to query on "format $str" requires that $str must be
present in the compiled-in format string.  Searching on "%s" does not
define a useful set of callsites.

Using DEFINE_DYNAMIC_DEBUG_CATEGORIES wo support gets a BUILD_BUG.
ISTM there is already action at a declarative distance, nobody needs
mystery as to why the /sysfs thingy didn't appear.

Dyndbg is agnostic wrt the categorization scheme used, in order to
play well with any prefix convention already in use in the codebase.
In fact, "prefix" is not strictly accurate without ^ anchor.

Ad-hoc categories and sub-categories are implicitly allowed, author
discipline and review is expected.

Hierarchical classes/categories are natural:

"^drm::"   is used in a later commit
"^drm:::" is a natural extension.
"^drm:atomic:fail:" has been proposed, sounds directly useful

RFC: in a real sense we abandon enum strictures here, and lose some
compiler help, on spelling errs for example.  Obviously "drm:" != "DRM:".

Some properties of a hierarchical category deserve explication:

Trailing spaces matter !

With 1..3-space ("drm: ", "drm:atomic: ", "drm:atomic:fail: "), the
":" doesn't terminate the search-space, the trailing space does.  So a
"drm:" search spec will match all DRM categories & subcategories, and
will not be useful in an interface where all categories are already
controlled together.  That said, "drm:atomic:" & "drm:atomic: " are
different, and both are useful in cases.

Ad-Hoc categories & sub-categories:

Ad-hoc categories are those format-prefixes already in use; both
amdgpu and i915 have numerous (120,~1800) pr_debugs, most of these use
a system, a small set (9,13) of prefixes, to categorize the output.
Dyndbg already works on these, this patch just allows adding a new
bitmap knob to control them.

Ad-hoc sub-categories are slightly trickier.
  since drm_dbg_atomic("fail: ...") is a macro:
pr_debug("drm:atomic:" " " format,...) // cpp-paste in a trailing space

We get "drm:atomic: fail:", with that undesirable embedded space;
obviously not ideal wrt clear and simple prefixes.

a possible fix: drm_dbg_atomic_("fail: ..."); // trailing _ for ad-hoc subcat

Summarizing:

 - "drm:kms: " & "drm:kms:" are different
 - "drm:kms"also different - includes drm:kms2:
 - "drm:kms:\t" also different - could be troublesome
 - "drm:kms:*"  doesn't work, no wildcard on format atm.

Order matters in DEFINE_DYNAMIC_DEBUG_CATEGORIES(... @bit_descs)

Since bits are/will-stay applied 0-N, 

[PATCH v10 00/10 RESEND] use DYNAMIC_DEBUG to implement DRM.debug & DRM.trace

2021-11-11 Thread Jim Cromie
Hi Jason, Greg, DRM-everyone, everyone,

resend to add more people, after rebasing on master to pick up
306589856399 drm/print: Add deprecation notes to DRM_...() functions

This patchset has 3 separate but related parts:

1. DEFINE_DYNAMIC_DEBUG_BITGRPS macro [patch 1/10]

   Declares DRM.debug style bitmap, bits control pr_debugs by matching formats
   Adds callback to translate bits to $cmd > dynamic_debug/control
   This could obsolete EXPORT(dynamic_debug_exec_queries) not included.

   /* anticipated_usage */
   static struct dyndbg_desc drm_categories_map[] = {
  [0] = { DRM_DBG_CAT_CORE },
  [1] = { DRM_DBG_CAT_DRIVER },
  [2] = { DRM_DBG_CAT_KMS },
  [3] = { DRM_DBG_CAT_PRIME }, ... };

   DEFINE_DYNAMIC_DEBUG_BITGRPS(debug, __drm_debug,
" bits control drm.debug categories ",
drm_categories_map);

   Please consider this patch for -next/now/current:
   - new interface, new code, no users to break
   - allows DRM folks to consider in earnest.
   - api bikeshedding to do ?
 struct dyndbg_desc isnt that great a name, others too probably.

2. use (1) to reimplement drm.debug [patches 3-7]:

   1st in amdgpu & i915 to control existing pr_debugs by their formats
   POC for (1)
   then in drm-print, for all drm.debug API users
   has kernel-footprint impact:
  amdgpu has ~3k pr_debugs.  (120kb callsite data)
  i915.ko has ~2k  

   avoids drm_debug_enabled(), gives NOOP savings & new flexibility.
   changes drm.debug categories from enum to format-prefix-string
   alters in-log format to include the format-prefix-string
   Daniel Vetter liked this at -v3
   https://lore.kernel.org/lkml/YPbPvm%2FxcBlTK1wq@phenom.ffwll.local/
   Im sure Ive (still) missed stuff.


3. separately, Sean Paul proposed: drm.trace to mirror drm.debug to tracefs
   https://patchwork.freedesktop.org/series/78133/

He argues:
   tracefs is fast/lightweight compared to syslog
   independent selection (of drm categories) to tracefs
   gives tailored traffic w.o flooding syslog

ISTM he's correct.  So it follows that write-to-tracefs is also a good
feature for dyndbg, where its then available for all pr_debug users,
including all of drm, on a per-site basis, via echo +T >control.  (iff
CONFIG_TRACING).

So basically, I borg'd his:
   [patch 14/14] drm/print: Add tracefs support to the drm logging helpers

Then I added a T flag, so it can be toggled from shell:

   # turn on all drm's pr_debug --> tracefs
   echo module drm +T > /proc/dynamic_debug/control

It appears to just work: (RFC)

The instance name is a placeholder, per-module subdirs kinda fits the
tracefs pattern, but full mod/file-basename/function/line feels like
overkill, mod/basename-func.line would flatten it nicely. RFC.


[root@gandalf dyndbg-tracefs]# pwd
/sys/kernel/tracing/instances/dyndbg-tracefs
[root@gandalf dyndbg-tracefs]# echo 1 > /sys/module/drm/parameters/trace
[root@gandalf dyndbg-tracefs]# head -n16 trace | sed -e 's/^#//'
 tracer: nop

 entries-in-buffer/entries-written: 405/405   #P:24

_-=> irqs-off
   / _=> need-resched
  | / _---=> hardirq/softirq
  || / _--=> preempt-depth
  ||| / _-=> migrate-disable
   / delay
   TASK-PID CPU#  |  TIMESTAMP  FUNCTION
  | | |   | | |
   <...>-2254[000] .  7040.894352: __dynamic_pr_debug: 
drm:core: comm="gnome-shel:cs0" pid=2254, dev=0xe200, auth=1, AMDGPU_CS
   <...>-2207[015] .  7040.894654: __dynamic_pr_debug: 
drm:core: comm="gnome-shell" pid=2207, dev=0xe200, auth=1, DRM_IOCTL_MODE_ADDFB2
   <...>-2207[015] .  7040.995403: __dynamic_pr_debug: 
drm:core: comm="gnome-shell" pid=2207, dev=0xe200, auth=1, DRM_IOCTL_MODE_RMFB
   <...>-2207[015] .  7040.995413: __dynamic_pr_debug: 
drm:core: OBJ ID: 121 (2)

This is the pr-debug doing most of that logging: (from dynamic_debug/control)

  drivers/gpu/drm/drm_ioctl.c:866 [drm]drm_ioctl =T "drm:core: comm=\042%s\042 
pid=%d, dev=0x%lx, auth=%d, %s\012"

Turning on decoration flags changes the trace:

  echo module drm format drm:core: +mflt > /proc/dynamic_debug/control 

   TASK-PID CPU#  |  TIMESTAMP  FUNCTION
  | | |   | | |
   <...>-2254[003] . 15980.936660: __dynamic_pr_debug: [2254] 
drm:drm_ioctl:866: drm:core: comm="gnome-shel:cs0" pid=2254, dev=0xe200, 
auth=1, AMDGPU_CS
   <...>-2207[015] . 15980.936966: __dynamic_pr_debug: [2207] 
drm:drm_ioctl:866: drm:core: comm="gnome-shell" pid=2207, dev=0xe200, auth=1, 
DRM_IOCTL_MODE_ADDFB2
   <...>-2207[015] . 15981.037727: __dynamic_pr_debug: [2207] 
drm:drm_ioctl:866: drm:core: comm="gnome-shell" pid=2207, 

Re: [PATCH 2/5] drm/amdkfd: check child range to drain retry fault

2021-11-11 Thread philip yang

  


On 2021-11-09 10:26 p.m., Felix
  Kuehling wrote:


  
  On 2021-11-09 6:04 p.m., Philip Yang wrote:
  
  If unmapping partial range, the parent
prange list op is update

notifier, child range list op is unmap range, need check child
range to

set drain retry fault flag.


Signed-off-by: Philip Yang 

  
  
  I think this could be simplified by simply setting
  svms->drain_pagefaults in svm_range_unmap_from_cpu. The mmap
  lock ensures that this is serialized with the deferred list worker
  reading and clearing svms->drain_pagefaults. You can also use
  READ_ONCE and WRITE_ONCE to be safe.
  

Good idea, change will be in v2 patch.
Thanks,
Philip


  
  Regards,
  
    Felix
  
  
  
  ---

  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 13 -

  1 file changed, 12 insertions(+), 1 deletion(-)


diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index 77239b06b236..64f642935600 100644

--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c

@@ -2049,8 +2049,19 @@ svm_range_add_list_work(struct
svm_range_list *svms, struct svm_range *prange,

   * before the range is freed to avoid straggler interrupts
on

   * unmapped memory causing "phantom faults".

   */

-    if (op == SVM_OP_UNMAP_RANGE)

+    if (op == SVM_OP_UNMAP_RANGE) {

+    pr_debug("set range drain_pagefaults true\n");

  svms->drain_pagefaults = true;

+    } else {

+    struct svm_range *pchild;

+

+    list_for_each_entry(pchild, >child_list,
child_list)

+    if (pchild->work_item.op == SVM_OP_UNMAP_RANGE)
{

+    pr_debug("set child drain_pagefaults true\n");

+    svms->drain_pagefaults = true;

+    }

+    }

+

  /* if prange is on the deferred list */

  if (!list_empty(>deferred_list)) {

  pr_debug("update exist prange 0x%p work op %d\n",
prange, op);

  

  



Re: [PATCH 1/5] drm/amdgpu: handle IH ring1 overflow

2021-11-11 Thread philip yang

  


On 2021-11-11 8:57 a.m., Felix Kuehling
  wrote:


  Am 2021-11-11 um 8:43 a.m. schrieb Christian König:

  
Am 11.11.21 um 13:13 schrieb Felix Kuehling:


  Am 2021-11-11 um 2:00 a.m. schrieb Christian König:

  
Am 11.11.21 um 00:36 schrieb Felix Kuehling:


  On 2021-11-10 9:31 a.m., Christian König wrote:
[SNIP]
Aren't we processing interrupts out-of-order in this case. We're
processing newer ones before older ones. Is that the root of the
problem because it confuses our interrupt draining function?


Good point.



  Maybe we need to detect overflows in the interrupt draining function
to make it wait longer in that case.


Ideally we should use something which is completely separate from all
those implementation details.

Like for example using the timestamp or a separate indicator/counter
instead.

  
  Even a timestamp will be broken if the interrupt processing function
handles interrupts out of order.



We can easily detect if the timestamp is going backwards and so filter
out stale entries.



  I think we need a different amdgpu_ih_process function for IH ring 1 the
way we set it up to handle overflows. Because IH is just overwriting
older entries, and we can't read entire IH entries atomically, we have
to use a watermark. If IH WPTR passes the watermark, we have to consider
the ring overflowed and reset our RPTR. We have to set RPTR far enough
"ahead" of the current WPTR to make sure WPTR is under the watermark.
And the watermark needs to be low enough to ensure amdgpu_irq_dispatch
can read out the next IH entry before the WPTR catches up with the RPTR.

Since we don't read the WPTR on every iteration, and out page fault
handling code can take quite a while to process one fault, the watermark
needs to provide a lot of buffer. Maybe we also need to read the WPTR
register more frequently if the last read was more than a jiffy ago.



I think trying to solve that with the IH code or hardware is the
completely wrong approach.

As I said before we need to something more robust and using the
timestamp sounds like the most logical approach to me.

The only alternative I can see would be to have a hardware assisted
flag which tells you if you had an overflow or not like we have for IH
ring 0.

  
  
The *_ih_get_wptr functions already do some overflow handling. I think
we'll need a function to read the overflow bit that amdgpu_ih_process
can call separately, after reading IH entries.

Tried to increase ring1 buf size from 4KB to 256KB, overflow
  still happens, seems watermark is not feasible as recover fault
  takes longer period sometime. We already have 48bit timestamp in
  IV, I will try use it to check overflow, and update rptr to try
  catch up



  

  

E.g. something like the following:
1. Copy the next N IV from the RPTR location.
2. Get the current WPTR.
3. If the overflow bit in the WPTR is set update the RPTR to something
like WPTR+window, clear the overflow bit and repeat at #1.
4. Process the valid IVs.

  
  
OK. Current amdgpu_irq_dispatch reads directly from the IH ring. I think
you're proposing to move reading of the ring into amdgpu_ih_process
where we can discard the entries if an overflow is detected.

Then let amdgpu_irq_dispatch use a copy that's guaranteed to be consistent.


In amdgpu_ih_process (may add new function for ring1), after
  reading wptr, check if wptr overflow and update rptr

if (ring[rptr - 1].timestamp > ring[rptr].timestamp)
 rptr = wptr + 1
This may still process retry fault out of order, but drain fault
  will finish correctly with condition rptr >= checkpoint_wptr,
  we will not process stale fault after range is freed.

Regards,
Philip


  


  

The down side is that we are loosing a lot of IVs with that. That is
probably ok for the current approach, but most likely a bad idea if we
enable the CAM.

  
  
Right. Once we use the CAM we cannot afford to lose faults. If we do, we
need to clear the CAM.

Regards,
  Felix



  

Regards,
Christian.



  
Whenever an overflow (over the watermark) is detected, we can set a
sticky overflow bit that our page fault handling code can use to clean
up. E.g. once we start using the CAM filter, we'll have to invalidate
all CAM entries when this happens (although I'd expect overflows to
become impossible once we enable the CAM filter).

Thanks,
   Felix





  

  



[PATCH 2/2] drm/amdgpu: Pin MMIO/DOORBELL BO's in GTT domain

2021-11-11 Thread Ramesh Errabolu
MMIO/DOORBELL BOs encode control data and should be pinned in GTT
domain before enabling PCIe connected peer devices in accessing it

Signed-off-by: Ramesh Errabolu 
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 69 +++
 1 file changed, 69 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cfc84af682b1..90b985436878 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1295,6 +1295,55 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void 
**process_info,
return ret;
 }
 
+/**
+ * amdgpu_amdkfd_gpuvm_pin_bo() - Pins a BO using following criteria
+ * @bo: Handle of buffer object being pinned
+ * @domain: Domain into which BO should be pinned
+ *
+ *   - USERPTR BOs are UNPINNABLE and will return error
+ *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ * PIN count incremented. It is valid to PIN a BO multiple times
+ *
+ * Return: ZERO if successful in pinning, Non-Zero in case of error.
+ */
+static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
+{
+   int ret = 0;
+
+   ret = amdgpu_bo_reserve(bo, false);
+   if (unlikely(ret))
+   return ret;
+
+   ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
+   if (ret)
+   pr_err("Error in Pinning BO to domain: %d\n", domain);
+
+   amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+   amdgpu_bo_unreserve(bo);
+
+   return ret;
+}
+
+/**
+ * amdgpu_amdkfd_gpuvm_unpin_bo() - Unpins BO using following criteria
+ * @bo: Handle of buffer object being unpinned
+ *
+ *   - Is a illegal request for USERPTR BOs and is ignored
+ *   - All other BO types (GTT, VRAM, MMIO and DOORBELL) will have their
+ * PIN count decremented. Calls to UNPIN must balance calls to PIN
+ */
+void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
+{
+   int ret = 0;
+
+   ret = amdgpu_bo_reserve(bo, false);
+   if (unlikely(ret))
+   return;
+
+   amdgpu_bo_unpin(bo);
+   amdgpu_bo_unreserve(bo);
+}
+
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
   struct file *filp, u32 pasid,
   void **process_info,
@@ -1521,10 +1570,22 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (offset)
*offset = amdgpu_bo_mmap_offset(bo);
 
+   if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+   KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+   ret = amdgpu_amdkfd_gpuvm_pin_bo(bo, AMDGPU_GEM_DOMAIN_GTT);
+   if (ret) {
+   pr_err("Pinning MMIO/DOORBELL BO during ALLOC 
FAILED\n");
+   goto err_pin_bo;
+   }
+   bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+   bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+   }
+
return 0;
 
 allocate_init_user_pages_failed:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+err_pin_bo:
drm_vma_node_revoke(>vma_node, drm_priv);
 err_node_allow:
drm_gem_object_put(gobj);
@@ -1557,6 +1618,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
bool is_imported = false;
 
mutex_lock(>lock);
+
+   /* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */
+   if (mem->alloc_flags &
+   (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
+   amdgpu_amdkfd_gpuvm_unpin_bo(mem->bo);
+   }
+
mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(>lock);
-- 
2.31.1



[PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag

2021-11-11 Thread Ramesh Errabolu
Accounting system to track amount of available memory (system, TTM
and VRAM of a device) relies on BO's domain. The change is to rely
instead on allocation flag indicating BO type - VRAM, GTT, USERPTR,
MMIO or DOORBELL

Signed-off-by: Ramesh Errabolu 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  6 ++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 95 ---
 2 files changed, 65 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index d00de575c541..fcbc8a9c9e06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -301,6 +301,12 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct 
amdgpu_device *adev);
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
+
+/**
+ * @amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
+ *
+ * Allows KFD to release its resources associated with the GEM object.
+ */
 void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
 void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
 #else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 94fccf0b47ad..cfc84af682b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -120,8 +120,19 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
PAGE_ALIGN(size);
 }
 
+/**
+ * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
+ * of buffer including any reserved for control structures
+ *
+ * @adev: Device to which allocated BO belongs to
+ * @size: Size of buffer, in bytes, encapsulated by B0. This should be
+ * equivalent to amdgpu_bo_size(BO)
+ * @alloc_flag: Flag used in allocating a BO as noted above
+ *
+ * Return: returns -ENOMEM in case of error, ZERO otherwise
+ */
 static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
-   uint64_t size, u32 domain, bool sg)
+   uint64_t size, u32 alloc_flag)
 {
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
@@ -131,20 +142,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
acc_size = amdgpu_amdkfd_acc_size(size);
 
vram_needed = 0;
-   if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-   /* TTM GTT memory */
+   if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
system_mem_needed = acc_size + size;
ttm_mem_needed = acc_size + size;
-   } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
-   /* Userptr */
+   } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+   system_mem_needed = acc_size;
+   ttm_mem_needed = acc_size;
+   vram_needed = size;
+   } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
system_mem_needed = acc_size + size;
ttm_mem_needed = acc_size;
-   } else {
-   /* VRAM and SG */
+   } else if (alloc_flag &
+  (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+   KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
system_mem_needed = acc_size;
ttm_mem_needed = acc_size;
-   if (domain == AMDGPU_GEM_DOMAIN_VRAM)
-   vram_needed = size;
+   } else {
+   pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
+   return -ENOMEM;
}
 
spin_lock(_mem_limit.mem_limit_lock);
@@ -160,64 +175,72 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
amdgpu_device *adev,
(adev->kfd.vram_used + vram_needed >
 adev->gmc.real_vram_size - reserved_for_pt)) {
ret = -ENOMEM;
-   } else {
-   kfd_mem_limit.system_mem_used += system_mem_needed;
-   kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
-   adev->kfd.vram_used += vram_needed;
+   goto release;
}
 
+   /* Update memory accounting by decreasing available system
+* memory, TTM memory and GPU memory as computed above
+*/
+   adev->kfd.vram_used += vram_needed;
+   kfd_mem_limit.system_mem_used += system_mem_needed;
+   kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+
+release:
spin_unlock(_mem_limit.mem_limit_lock);
return ret;
 }
 
 static void unreserve_mem_limit(struct amdgpu_device *adev,
-   uint64_t size, u32 domain, bool sg)
+   uint64_t size, u32 alloc_flag)
 {
size_t acc_size;
 
acc_size = amdgpu_amdkfd_acc_size(size);
 
spin_lock(_mem_limit.mem_limit_lock);
-   if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+
+   if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
  

RE: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag

2021-11-11 Thread Errabolu, Ramesh
[AMD Official Use Only]

Agree with your comments. I will remove commenting the method 
amdgpu_amdkfd_reserve_system_mem() in my updated patch.

-Original Message-
From: Kuehling, Felix  
Sent: Thursday, November 11, 2021 2:54 PM
To: Errabolu, Ramesh ; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on 
allocation flag

Am 2021-11-11 um 3:45 p.m. schrieb Errabolu, Ramesh:
> [AMD Official Use Only]
>
> Resp inline
>
> Request clarification regarding - amdgpu_amdkfd_reserve_system_mem()
>
> Will send out updated patch upon clarification
>
> Regards,
> Ramesh
>
> -Original Message-
> From: Kuehling, Felix 
> Sent: Thursday, November 11, 2021 7:44 AM
> To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh 
> 
> Subject: Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to 
> rely on allocation flag
>
> Am 2021-11-09 um 1:13 a.m. schrieb Ramesh Errabolu:
>> Accounting system to track amount of available memory (system, TTM 
>> and VRAM of a device) relies on BO's domain. The change is to rely 
>> instead on allocation flag indicating BO type - VRAM, GTT, USERPTR, 
>> MMIO or DOORBELL
>>
>> Signed-off-by: Ramesh Errabolu 
> The code changes look good. Comments about comments inline ...
>
>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  16 +++
>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101
>> +++---
>>  2 files changed, 79 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 5f658823a637..8d31a742cd80 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -307,7 +307,23 @@ void
>> amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device 
>> *adev);  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>>  void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>>  struct amdgpu_vm *vm);
>> +
>> +/**
>> + * @amdgpu_amdkfd_release_notify() - Invoked when GEM object 
>> +reference count
>> + * reaches ZERO. Increases available memory by size of buffer 
>> +including any
>> + * reserved for control structures
> "Increases available memory size ..." is an implementation detail that 
> doesn't matter to the callers of this function. It should not be part of the 
> interface definition. The interface description should be more general, maybe:
>
> Ramesh: Agreed.
>
> * amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is 
> released
> *
> * Allows KFD to release its resources associated with the GEM object.
> * ...
>
> Ramesh: Used your comment as is
>
>> + *
>> + * @note: This api must be invoked on BOs that have been allocated 
>> + via
>> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
>> + */
>>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
>> +
>> +/**
>> + * @amdgpu_amdkfd_reserve_system_mem - Decrease system memory that 
>> +is
>> + * available by an amount specified by input parameter
> This is misleading. This function doesn't change availability of system 
> memory in general because it doesn't allocate any memory. You'll need to be 
> more specific:
>
> * amdgpu_amdkfd_reserve_system_mem() - Decrease system memory limit 
> for KFD applications
>
> Ramesh: Thanks for pointing out the detail. Should this be "Decrease system 
> memory available for KFD applications" as the code seems to track a counter 
> "system_mem_used". Let me know?

That's still not entirely accurate. KFD applications can allocate system memory 
in different ways: malloc, hipHostMalloc, hipHostRegister, hipMallocManaged. 
This limit affects system memory managed by the kfd_ioctl_alloc_memory_of_gpu, 
which must be physically resident while the queues are active. At the HIP API 
level this includes hipHostMalloc and hipHostRegister.

It does not affect system memory allocated with plain malloc or mmap.

My version that just mentions the limit avoids all those details because it 
doesn't need to explain what that limit applies to. If you want to go into all 
those details, I'm not sure this comment is the right place for it. I think it 
takes a more comprehensive discussion of GPU memory management with user mode 
queues.

Regards,
  Felix


>
>> + *
>> + * @size: Size of buffer in bytes
> What buffer?
>
> Ramesh: Updated it "Amount to decrease in bytes"
>
>
>> + */
>>  void amdgpu_amdkfd_reserve_system_mem(uint64_t size);  #else  static 
>> inline diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 94fccf0b47ad..08675f89bfb2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -120,8 +120,22 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
>>  PAGE_ALIGN(size);
>>  }
>>  
>> +/**
>> + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available 

Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag

2021-11-11 Thread Felix Kuehling
Am 2021-11-11 um 3:45 p.m. schrieb Errabolu, Ramesh:
> [AMD Official Use Only]
>
> Resp inline
>
> Request clarification regarding - amdgpu_amdkfd_reserve_system_mem()
>
> Will send out updated patch upon clarification
>
> Regards,
> Ramesh
>
> -Original Message-
> From: Kuehling, Felix  
> Sent: Thursday, November 11, 2021 7:44 AM
> To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh 
> Subject: Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on 
> allocation flag
>
> Am 2021-11-09 um 1:13 a.m. schrieb Ramesh Errabolu:
>> Accounting system to track amount of available memory (system, TTM and 
>> VRAM of a device) relies on BO's domain. The change is to rely instead 
>> on allocation flag indicating BO type - VRAM, GTT, USERPTR, MMIO or 
>> DOORBELL
>>
>> Signed-off-by: Ramesh Errabolu 
> The code changes look good. Comments about comments inline ...
>
>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  16 +++
>>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 
>> +++---
>>  2 files changed, 79 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 5f658823a637..8d31a742cd80 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -307,7 +307,23 @@ void 
>> amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device 
>> *adev);  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>>  void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>>  struct amdgpu_vm *vm);
>> +
>> +/**
>> + * @amdgpu_amdkfd_release_notify() - Invoked when GEM object 
>> +reference count
>> + * reaches ZERO. Increases available memory by size of buffer 
>> +including any
>> + * reserved for control structures
> "Increases available memory size ..." is an implementation detail that 
> doesn't matter to the callers of this function. It should not be part of the 
> interface definition. The interface description should be more general, maybe:
>
> Ramesh: Agreed.
>
> * amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
> *
> * Allows KFD to release its resources associated with the GEM object.
> * ...
>
> Ramesh: Used your comment as is
>
>> + *
>> + * @note: This api must be invoked on BOs that have been allocated 
>> + via
>> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
>> + */
>>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
>> +
>> +/**
>> + * @amdgpu_amdkfd_reserve_system_mem - Decrease system memory that is
>> + * available by an amount specified by input parameter
> This is misleading. This function doesn't change availability of system 
> memory in general because it doesn't allocate any memory. You'll need to be 
> more specific:
>
> * amdgpu_amdkfd_reserve_system_mem() - Decrease system memory limit for KFD 
> applications
>
> Ramesh: Thanks for pointing out the detail. Should this be "Decrease system 
> memory available for KFD applications" as the code seems to track a counter 
> "system_mem_used". Let me know?

That's still not entirely accurate. KFD applications can allocate system
memory in different ways: malloc, hipHostMalloc, hipHostRegister,
hipMallocManaged. This limit affects system memory managed by the
kfd_ioctl_alloc_memory_of_gpu, which must be physically resident while
the queues are active. At the HIP API level this includes hipHostMalloc
and hipHostRegister.

It does not affect system memory allocated with plain malloc or mmap.

My version that just mentions the limit avoids all those details because
it doesn't need to explain what that limit applies to. If you want to go
into all those details, I'm not sure this comment is the right place for
it. I think it takes a more comprehensive discussion of GPU memory
management with user mode queues.

Regards,
  Felix


>
>> + *
>> + * @size: Size of buffer in bytes
> What buffer?
>
> Ramesh: Updated it "Amount to decrease in bytes"
>
>
>> + */
>>  void amdgpu_amdkfd_reserve_system_mem(uint64_t size);  #else  static 
>> inline diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 94fccf0b47ad..08675f89bfb2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -120,8 +120,22 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
>>  PAGE_ALIGN(size);
>>  }
>>  
>> +/**
>> + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by 
>> +size
>> + * of buffer including any reserved for control structures
>> + *
>> + * @adev: Device to which allocated BO belongs to
>> + * @size: Size of buffer, in bytes, encapsulated by B0. This should 
>> +be
>> + * equivalent to amdgpu_bo_size(BO)
>> + * @alloc_flag: Flag used in allocating a BO as noted above
>> + *
>> + * @note: This api must be invoked on BOs that have been 

RE: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag

2021-11-11 Thread Errabolu, Ramesh
[AMD Official Use Only]

Resp inline

Request clarification regarding - amdgpu_amdkfd_reserve_system_mem()

Will send out updated patch upon clarification

Regards,
Ramesh

-Original Message-
From: Kuehling, Felix  
Sent: Thursday, November 11, 2021 7:44 AM
To: amd-gfx@lists.freedesktop.org; Errabolu, Ramesh 
Subject: Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on 
allocation flag

Am 2021-11-09 um 1:13 a.m. schrieb Ramesh Errabolu:
> Accounting system to track amount of available memory (system, TTM and 
> VRAM of a device) relies on BO's domain. The change is to rely instead 
> on allocation flag indicating BO type - VRAM, GTT, USERPTR, MMIO or 
> DOORBELL
>
> Signed-off-by: Ramesh Errabolu 

The code changes look good. Comments about comments inline ...


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  16 +++
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 
> +++---
>  2 files changed, 79 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 5f658823a637..8d31a742cd80 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -307,7 +307,23 @@ void 
> amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device 
> *adev);  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>  void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>   struct amdgpu_vm *vm);
> +
> +/**
> + * @amdgpu_amdkfd_release_notify() - Invoked when GEM object 
> +reference count
> + * reaches ZERO. Increases available memory by size of buffer 
> +including any
> + * reserved for control structures

"Increases available memory size ..." is an implementation detail that doesn't 
matter to the callers of this function. It should not be part of the interface 
definition. The interface description should be more general, maybe:

Ramesh: Agreed.

* amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
*
* Allows KFD to release its resources associated with the GEM object.
* ...

Ramesh: Used your comment as is

> + *
> + * @note: This api must be invoked on BOs that have been allocated 
> + via
> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
> + */
>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
> +
> +/**
> + * @amdgpu_amdkfd_reserve_system_mem - Decrease system memory that is
> + * available by an amount specified by input parameter

This is misleading. This function doesn't change availability of system memory 
in general because it doesn't allocate any memory. You'll need to be more 
specific:

* amdgpu_amdkfd_reserve_system_mem() - Decrease system memory limit for KFD 
applications

Ramesh: Thanks for pointing out the detail. Should this be "Decrease system 
memory available for KFD applications" as the code seems to track a counter 
"system_mem_used". Let me know?

> + *
> + * @size: Size of buffer in bytes

What buffer?

Ramesh: Updated it "Amount to decrease in bytes"


> + */
>  void amdgpu_amdkfd_reserve_system_mem(uint64_t size);  #else  static 
> inline diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 94fccf0b47ad..08675f89bfb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -120,8 +120,22 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
>   PAGE_ALIGN(size);
>  }
>  
> +/**
> + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by 
> +size
> + * of buffer including any reserved for control structures
> + *
> + * @adev: Device to which allocated BO belongs to
> + * @size: Size of buffer, in bytes, encapsulated by B0. This should 
> +be
> + * equivalent to amdgpu_bo_size(BO)
> + * @alloc_flag: Flag used in allocating a BO as noted above
> + *
> + * @note: This api must be invoked on BOs that have been allocated 
> +via
> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()

Who needs to call it? Your statement sounds like callers of 
amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu must call this function as well.
This is very misleading because this function is already called from 
amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu.

Ramesh: Will remove the "note"


> + *
> + * Return: returns -ENOMEM in case of error, ZERO otherwise */
>  static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> - uint64_t size, u32 domain, bool sg)
> + uint64_t size, u32 alloc_flag)
>  {
>   uint64_t reserved_for_pt =
>   ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
> @@ -131,20 +145,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
> amdgpu_device *adev,
>   acc_size = amdgpu_amdkfd_acc_size(size);
>  
>   vram_needed = 0;
> - if (domain == AMDGPU_GEM_DOMAIN_GTT) {
> - /* TTM GTT memory */
> + if 

RE: [RFC 2/2] drm/amd/pm: Add support for reacting to platform profile notification

2021-11-11 Thread Limonciello, Mario
[AMD Official Use Only]

> >
> > Even if changing the heuristic for workload as Alex suggested?
> >
> 
> Yes. I think this is meant to be BIOS driven for APU platforms and AMD
> APU + AMD dGPU with smartshift.
> 

So then it sounds like if any - it would make sense only on dGPU that isn't
using smart shift.

> 
> After seeing that this is coming under ACPI, I thought the intention is
> to have this driven by firmware primarily. The purpose of platform
> driver itself could be to optimize for those power profiles and while
> doing that it should have considered all the components in the whole
> platform (assuming platform driver testing covers the behavior of these
> modes on a particular platform).
> 
> I am not sure if it's appropriate for another driver to plug-in to this
> automatically and tinker an 'expected-to-be-well-tuned' setting by the
> platform driver. The modes selected by another driver may or may not
> match with the conditions assumed by platform driver - for ex: some
> profile could mean fans running quieter with EC control and then the
> profile chosen by another driver could disturb that intended setting.
> 

The key here is that any non-APU program won't have a path to influence
dGPU behavior via FW or smartshift will it?

So it could be useful to respond to a limited subset of hints that can be 
guaranteed to
mean the same.  Like maybe low power mode and balanced only, but don't
make changes going to high power mode?

> Thanks,
> Lijo
> 
> > *From:* Lazar, Lijo 
> > *Sent:* Wednesday, November 10, 2021 10:05
> > *To:* Alex Deucher ; Limonciello, Mario
> > 
> > *Cc:* amd-gfx list 
> > *Subject:* Re: [RFC 2/2] drm/amd/pm: Add support for reacting to
> > platform profile notification
> >
> > [Public]
> >
> > I feel it's better to leave to platform vendors. For ex: for APU cases
> > they may have implementations in which their BIOSes talk to PMFW and
> > this might be driving something else here.
> >
> > Also, not sure how to handle a case like, say a laptop with Intel CPU
> > and AMD dgpu.
> >
> > Thanks,
> > Lijo
> >
> > 
> >
> > *From:* amd-gfx  > > on behalf of Alex
> > Deucher mailto:alexdeuc...@gmail.com>>
> > *Sent:* Wednesday, 10 November 2021, 8:44 pm
> > *To:* Limonciello, Mario
> > *Cc:* amd-gfx list
> > *Subject:* Re: [RFC 2/2] drm/amd/pm: Add support for reacting to
> > platform profile notification
> >
> > On Wed, Nov 10, 2021 at 1:24 AM Mario Limonciello
> > mailto:mario.limoncie...@amd.com>> wrote:
> >  >
> >  > Various drivers provide platform profile support to let users set a hint
> >  > in their GUI whether they want to run in a high performance, low battery
> >  > life or balanced configuration.
> >  >
> >  > Drivers that provide this typically work with the firmware on their
> > system
> >  > to configure hardware.  In the case of AMDGPU however, the notification
> >  > path doesn't come through firmware and can instead be provided directly
> >  > to the driver from a notification chain.
> >  >
> >  > Use the information of the newly selected profile to tweak
> >  > `dpm_force_performance_level` to that profile IFF the user hasn't
> > manually
> >  > selected `manual` or any other `profile_*` options.
> >
> > I don't think we want to force the performance level.  This interface
> > forces various fixed clock configurations for debugging and profiling.
> > I think what we'd want to select here is the power profile (see
> > amdgpu_set_pp_power_profile_mode()).  For this interface you can
> > select various profiles (BOOTUP_DEFAULT, 3D_FULL_SCREEN,
> POWER_SAVING,
> > VIDEO, VR, COMPUTE, etc.).  These still use dynamic power management,
> > but they adjust the heuristics used by the GPU to select power states
> > so the GPU performance ramps up/down more or less aggressively.
> >
> > Alex
> >
> >  >
> >  > Signed-off-by: Mario Limonciello  > >
> >  > ---
> >  >  drivers/gpu/drm/amd/amdgpu/amdgpu.h |   3 +
> >  >  drivers/gpu/drm/amd/pm/amdgpu_pm.c  | 105
> +++-
> >  >  2 files changed, 90 insertions(+), 18 deletions(-)
> >  >
> >  > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >  > index b85b67a88a3d..27b0be23b6ac 100644
> >  > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >  > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >  > @@ -1097,6 +1097,9 @@ struct amdgpu_device {
> >  >
> >  > struct amdgpu_reset_control *reset_cntl;
> >  > uint32_t
> > ip_versions[HW_ID_MAX][HWIP_MAX_INSTANCE];
> >  > +
> >  > +   /* platform profile notifications */
> >  > +   struct notifier_block   platform_profile_notifier;
> >  >  };
> >  >
> >  >  static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> >  > diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> > b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
> >  > index 

Re: [PATCH 2/2] drm/sched: serialize job_timeout and scheduler

2021-11-11 Thread Andrey Grodzovsky



On 2021-11-10 8:24 a.m., Daniel Vetter wrote:

On Wed, Nov 10, 2021 at 11:09:50AM +0100, Christian König wrote:

Am 10.11.21 um 10:50 schrieb Daniel Vetter:

On Tue, Nov 09, 2021 at 08:17:01AM -0800, Rob Clark wrote:

On Tue, Nov 9, 2021 at 1:07 AM Daniel Vetter  wrote:

On Mon, Nov 08, 2021 at 03:39:17PM -0800, Rob Clark wrote:

I stumbled across this thread when I ran into the same issue, while
working out how to move drm/msm to use scheduler's retire +
timeout/recovery (and get rid of our own mirror list of in-flight
jobs).  We already have hw error detection enabled, and it can signal
quite fast, so assuming the first job on the list is the guilty job
just won't work.

But I was considering a slightly different approach to fixing this,
instead just handling it all in drm_sched_main() and getting rid of
the complicated kthread parking gymnastics.  Ie. something along the
lines of:

So handling timeouts in the main sched thread wont work as soon as you
have multiple engines and reset that impacts across engines:

- Nothing is simplified since you still need to stop the other scheduler
threads.

- You get deadlocks if 2 schedulers time out at the same time, and both
want to stop the other one.

Hence workqueue. Now the rule for the wq is that you can only have one per
reset domain, so
- single engine you just take the one drm/sched provides
- if reset affects all your engines in the chip, then you allocate on in
the drm_device and pass that to all
- if you have a complex of gpus all interconnected (e.g. xgmi hive for
amd), then it's one wq for the entire hive

_All_ reset related things must be run on that workqueue or things breaks,
which means if you get hw fault that also needs to be run there. I guess
we should either patch drm/sched to check you call that function from the
right workqueue, or just handle it internally.

Hmm, ok.. I guess it would be useful to better document the reasoning
for the current design, that would have steered me more towards the
approach taken in this patch.

Maybe this was because you worked on an old kernel? Boris did update the
kerneldoc as part of making gpu reset work for panfrost, which has this
multi-engine reset problem. If that's not yet clear then we need to
improve the docs further.

AMD's problem is even worse, because their reset domain is the entire xgmi
hive, so multiple pci devices.

I'm pushing for quite a while that we get something like an
amdgpu_reset_domain structure or similar for this, but we unfortunately
don't have that yet.

Maybe it should be a good idea to have something like a drm_sched_domain or
similar with all the necessary information for the inter scheduler handling.

E.g. a workqueue for reset etc...

Yeah I think as soon as we have more stuff than just the wq then a
drm_sched_reset_domain sounds good.

But if it's just driver stuff (e.g. the xgmi locking you have in amdgpu
reset comes to mind) then I think just a driver_reset_domain struct that
bundles all that stuff up seems good enough.

E.g. on i915 I'm also pondering whether some of the fw requests should be
processed by the reset wq, to avoid locking headaches, so I don't think
hiding that work too much in abstractions is a good idea.
-Daniel



I suggest we keep the drm_sched_reset_domain as a base struct to hold the wq
(and possible something else cross drivers in the future) and then embed 
it in a derived

driver specific struct to hold driver specific stuff like
the XGMI lock you mentioned.

Andrey





Regards,
Christian.


Also there might more issues in drm/sched ofc, e.g. I've looked a bit at
ordering/barriers and I'm pretty sure a lot are still missing. Or at least
we should have comments in the code explaining why it all works.
-Daniel


BR,
-R


-Daniel


-
diff --git a/drivers/gpu/drm/scheduler/sched_main.c
b/drivers/gpu/drm/scheduler/sched_main.c
index 67382621b429..4d6ce775c316 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -764,6 +764,45 @@ static bool drm_sched_blocked(struct
drm_gpu_scheduler *sched)
  return false;
   }

+static bool handle_timeout(struct drm_gpu_scheduler *sched)
+{
+   struct drm_sched_job *bad;
+
+   if (!sched->has_timeout)
+   return false;
+
+   sched->has_timeout = false;
+
+   spin_lock(>job_list_lock);
+   bad = list_first_entry_or_null(>pending_list,
+  struct drm_sched_job, list);
+
+   if (!bad) {
+   spin_unlock(>job_list_lock);
+   return false;
+   }
+
+   spin_unlock(>job_list_lock);
+
+   if (sched->timeout_wq == system_wq) {
+   /*
+* If driver has no specific requirements about serializing
+* reset wrt. other engines, just call timedout_job() directly
+*/
+   sched->ops->timedout_job(job);
+   } else {
+   /*
+* Otherwise 

Re: [PATCH 1/5] drm/amdgpu: handle IH ring1 overflow

2021-11-11 Thread Felix Kuehling
Am 2021-11-11 um 8:43 a.m. schrieb Christian König:
> Am 11.11.21 um 13:13 schrieb Felix Kuehling:
>> Am 2021-11-11 um 2:00 a.m. schrieb Christian König:
>>> Am 11.11.21 um 00:36 schrieb Felix Kuehling:
 On 2021-11-10 9:31 a.m., Christian König wrote:
 [SNIP]
 Aren't we processing interrupts out-of-order in this case. We're
 processing newer ones before older ones. Is that the root of the
 problem because it confuses our interrupt draining function?
>>> Good point.
>>>
 Maybe we need to detect overflows in the interrupt draining function
 to make it wait longer in that case.
>>> Ideally we should use something which is completely separate from all
>>> those implementation details.
>>>
>>> Like for example using the timestamp or a separate indicator/counter
>>> instead.
>> Even a timestamp will be broken if the interrupt processing function
>> handles interrupts out of order.
>
> We can easily detect if the timestamp is going backwards and so filter
> out stale entries.
>
>> I think we need a different amdgpu_ih_process function for IH ring 1 the
>> way we set it up to handle overflows. Because IH is just overwriting
>> older entries, and we can't read entire IH entries atomically, we have
>> to use a watermark. If IH WPTR passes the watermark, we have to consider
>> the ring overflowed and reset our RPTR. We have to set RPTR far enough
>> "ahead" of the current WPTR to make sure WPTR is under the watermark.
>> And the watermark needs to be low enough to ensure amdgpu_irq_dispatch
>> can read out the next IH entry before the WPTR catches up with the RPTR.
>>
>> Since we don't read the WPTR on every iteration, and out page fault
>> handling code can take quite a while to process one fault, the watermark
>> needs to provide a lot of buffer. Maybe we also need to read the WPTR
>> register more frequently if the last read was more than a jiffy ago.
>
> I think trying to solve that with the IH code or hardware is the
> completely wrong approach.
>
> As I said before we need to something more robust and using the
> timestamp sounds like the most logical approach to me.
>
> The only alternative I can see would be to have a hardware assisted
> flag which tells you if you had an overflow or not like we have for IH
> ring 0.

The *_ih_get_wptr functions already do some overflow handling. I think
we'll need a function to read the overflow bit that amdgpu_ih_process
can call separately, after reading IH entries.


>
> E.g. something like the following:
> 1. Copy the next N IV from the RPTR location.
> 2. Get the current WPTR.
> 3. If the overflow bit in the WPTR is set update the RPTR to something
> like WPTR+window, clear the overflow bit and repeat at #1.
> 4. Process the valid IVs.

OK. Current amdgpu_irq_dispatch reads directly from the IH ring. I think
you're proposing to move reading of the ring into amdgpu_ih_process
where we can discard the entries if an overflow is detected.

Then let amdgpu_irq_dispatch use a copy that's guaranteed to be consistent.


>
> The down side is that we are loosing a lot of IVs with that. That is
> probably ok for the current approach, but most likely a bad idea if we
> enable the CAM.

Right. Once we use the CAM we cannot afford to lose faults. If we do, we
need to clear the CAM.

Regards,
  Felix


>
> Regards,
> Christian.
>
>>
>> Whenever an overflow (over the watermark) is detected, we can set a
>> sticky overflow bit that our page fault handling code can use to clean
>> up. E.g. once we start using the CAM filter, we'll have to invalidate
>> all CAM entries when this happens (although I'd expect overflows to
>> become impossible once we enable the CAM filter).
>>
>> Thanks,
>>    Felix
>>
>


Re: [PATCH 1/2] drm/amdgpu: Update BO memory accounting to rely on allocation flag

2021-11-11 Thread Felix Kuehling
Am 2021-11-09 um 1:13 a.m. schrieb Ramesh Errabolu:
> Accounting system to track amount of available memory (system, TTM
> and VRAM of a device) relies on BO's domain. The change is to rely
> instead on allocation flag indicating BO type - VRAM, GTT, USERPTR,
> MMIO or DOORBELL
>
> Signed-off-by: Ramesh Errabolu 

The code changes look good. Comments about comments inline ...


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  16 +++
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 101 +++---
>  2 files changed, 79 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 5f658823a637..8d31a742cd80 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -307,7 +307,23 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct 
> amdgpu_device *adev);
>  void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>  void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>   struct amdgpu_vm *vm);
> +
> +/**
> + * @amdgpu_amdkfd_release_notify() - Invoked when GEM object reference count
> + * reaches ZERO. Increases available memory by size of buffer including any
> + * reserved for control structures

"Increases available memory size ..." is an implementation detail that
doesn't matter to the callers of this function. It should not be part of
the interface definition. The interface description should be more
general, maybe:

* amdgpu_amdkfd_release_notify() - Notify KFD when GEM object is released
*
* Allows KFD to release its resources associated with the GEM object.
* ...


> + *
> + * @note: This api must be invoked on BOs that have been allocated via
> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()
> + */
>  void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo);
> +
> +/**
> + * @amdgpu_amdkfd_reserve_system_mem - Decrease system memory that is
> + * available by an amount specified by input parameter

This is misleading. This function doesn't change availability of system
memory in general because it doesn't allocate any memory. You'll need to
be more specific:

* amdgpu_amdkfd_reserve_system_mem() - Decrease system memory limit for KFD 
applications


> + *
> + * @size: Size of buffer in bytes

What buffer?


> + */
>  void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
>  #else
>  static inline
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 94fccf0b47ad..08675f89bfb2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -120,8 +120,22 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size)
>   PAGE_ALIGN(size);
>  }
>  
> +/**
> + * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
> + * of buffer including any reserved for control structures
> + *
> + * @adev: Device to which allocated BO belongs to
> + * @size: Size of buffer, in bytes, encapsulated by B0. This should be
> + * equivalent to amdgpu_bo_size(BO)
> + * @alloc_flag: Flag used in allocating a BO as noted above
> + *
> + * @note: This api must be invoked on BOs that have been allocated via
> + * KFD interface amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu()

Who needs to call it? Your statement sounds like callers of
amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu must call this function as well.
This is very misleading because this function is already called from
amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu.


> + *
> + * Return: returns -ENOMEM in case of error, ZERO otherwise
> + */
>  static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> - uint64_t size, u32 domain, bool sg)
> + uint64_t size, u32 alloc_flag)
>  {
>   uint64_t reserved_for_pt =
>   ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
> @@ -131,20 +145,24 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct 
> amdgpu_device *adev,
>   acc_size = amdgpu_amdkfd_acc_size(size);
>  
>   vram_needed = 0;
> - if (domain == AMDGPU_GEM_DOMAIN_GTT) {
> - /* TTM GTT memory */
> + if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
>   system_mem_needed = acc_size + size;
>   ttm_mem_needed = acc_size + size;
> - } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
> - /* Userptr */
> + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> + system_mem_needed = acc_size;
> + ttm_mem_needed = acc_size;
> + vram_needed = size;
> + } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>   system_mem_needed = acc_size + size;
>   ttm_mem_needed = acc_size;
> - } else {
> - /* VRAM and SG */
> + } else if (alloc_flag &
> +(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> + 

Re: [PATCH 1/5] drm/amdgpu: handle IH ring1 overflow

2021-11-11 Thread Christian König

Am 11.11.21 um 13:13 schrieb Felix Kuehling:

Am 2021-11-11 um 2:00 a.m. schrieb Christian König:

Am 11.11.21 um 00:36 schrieb Felix Kuehling:

On 2021-11-10 9:31 a.m., Christian König wrote:
[SNIP]
Aren't we processing interrupts out-of-order in this case. We're
processing newer ones before older ones. Is that the root of the
problem because it confuses our interrupt draining function?

Good point.


Maybe we need to detect overflows in the interrupt draining function
to make it wait longer in that case.

Ideally we should use something which is completely separate from all
those implementation details.

Like for example using the timestamp or a separate indicator/counter
instead.

Even a timestamp will be broken if the interrupt processing function
handles interrupts out of order.


We can easily detect if the timestamp is going backwards and so filter 
out stale entries.



I think we need a different amdgpu_ih_process function for IH ring 1 the
way we set it up to handle overflows. Because IH is just overwriting
older entries, and we can't read entire IH entries atomically, we have
to use a watermark. If IH WPTR passes the watermark, we have to consider
the ring overflowed and reset our RPTR. We have to set RPTR far enough
"ahead" of the current WPTR to make sure WPTR is under the watermark.
And the watermark needs to be low enough to ensure amdgpu_irq_dispatch
can read out the next IH entry before the WPTR catches up with the RPTR.

Since we don't read the WPTR on every iteration, and out page fault
handling code can take quite a while to process one fault, the watermark
needs to provide a lot of buffer. Maybe we also need to read the WPTR
register more frequently if the last read was more than a jiffy ago.


I think trying to solve that with the IH code or hardware is the 
completely wrong approach.


As I said before we need to something more robust and using the 
timestamp sounds like the most logical approach to me.


The only alternative I can see would be to have a hardware assisted flag 
which tells you if you had an overflow or not like we have for IH ring 0.


E.g. something like the following:
1. Copy the next N IV from the RPTR location.
2. Get the current WPTR.
3. If the overflow bit in the WPTR is set update the RPTR to something 
like WPTR+window, clear the overflow bit and repeat at #1.

4. Process the valid IVs.

The down side is that we are loosing a lot of IVs with that. That is 
probably ok for the current approach, but most likely a bad idea if we 
enable the CAM.


Regards,
Christian.



Whenever an overflow (over the watermark) is detected, we can set a
sticky overflow bit that our page fault handling code can use to clean
up. E.g. once we start using the CAM filter, we'll have to invalidate
all CAM entries when this happens (although I'd expect overflows to
become impossible once we enable the CAM filter).

Thanks,
   Felix





Re: [PATCH 1/5] drm/amdgpu: handle IH ring1 overflow

2021-11-11 Thread Felix Kuehling
Am 2021-11-11 um 2:00 a.m. schrieb Christian König:
> Am 11.11.21 um 00:36 schrieb Felix Kuehling:
>> On 2021-11-10 9:31 a.m., Christian König wrote:
>>> Am 10.11.21 um 14:59 schrieb philip yang:

 On 2021-11-10 5:15 a.m., Christian König wrote:

> [SNIP]

 It is hard to understand, this debug log can explain more details,
 with this debug message patch

 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
 b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
 index ed6f8d24280b..8859f2bb11b1 100644
 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
 +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
 @@ -234,10 +235,12 @@ int amdgpu_ih_process(struct amdgpu_device
 *adev, struct amdgpu_ih_ring *ih)
     return IRQ_NONE;

     wptr = amdgpu_ih_get_wptr(adev, ih);
 +   if (ih == >irq.ih1)
 +   pr_debug("entering rptr 0x%x, wptr 0x%x\n",
 ih->rptr, wptr);

  restart_ih:
 +   if (ih == >irq.ih1)
 +   pr_debug("starting rptr 0x%x, wptr 0x%x\n",
 ih->rptr, wptr);

     /* Order reading of wptr vs. reading of IH ring data */
     rmb();
 @@ -245,8 +248,12 @@ int amdgpu_ih_process(struct amdgpu_device
 *adev, struct amdgpu_ih_ring *ih)
     while (ih->rptr != wptr && --count) {
     amdgpu_irq_dispatch(adev, ih);
     ih->rptr &= ih->ptr_mask;
 +   if (ih == >irq.ih1) {
 +   pr_debug("rptr 0x%x, old wptr 0x%x, new
 wptr 0x%x\n",
 +   ih->rptr, wptr,
 +   amdgpu_ih_get_wptr(adev, ih));
 +   }
     }

     amdgpu_ih_set_rptr(adev, ih);
 @@ -257,6 +264,8 @@ int amdgpu_ih_process(struct amdgpu_device
 *adev, struct amdgpu_ih_ring *ih)
     if (wptr != ih->rptr)
     goto restart_ih;

 +   if (ih == >irq.ih1)
 +   pr_debug("exiting rptr 0x%x, wptr 0x%x\n",
 ih->rptr, wptr);
     return IRQ_HANDLED;
  }

 This is log, timing 48.807028, ring1 drain is done, rptr == wptr,
 ring1 is empty, but the loop continues, to handle outdated retry
 fault.

>>>
>>> As far as I can see that is perfectly correct and expected behavior.
>>>
>>> See the ring buffer overflowed and because of that the loop
>>> continues, but that is correct because an overflow means that the
>>> ring was filled with new entries.
>>>
>>> So we are processing new entries here, not stale ones.
>>
>> Aren't we processing interrupts out-of-order in this case. We're
>> processing newer ones before older ones. Is that the root of the
>> problem because it confuses our interrupt draining function?
>
> Good point.
>
>> Maybe we need to detect overflows in the interrupt draining function
>> to make it wait longer in that case.
>
> Ideally we should use something which is completely separate from all
> those implementation details.
>
> Like for example using the timestamp or a separate indicator/counter
> instead.

Even a timestamp will be broken if the interrupt processing function
handles interrupts out of order.

I think we need a different amdgpu_ih_process function for IH ring 1 the
way we set it up to handle overflows. Because IH is just overwriting
older entries, and we can't read entire IH entries atomically, we have
to use a watermark. If IH WPTR passes the watermark, we have to consider
the ring overflowed and reset our RPTR. We have to set RPTR far enough
"ahead" of the current WPTR to make sure WPTR is under the watermark.
And the watermark needs to be low enough to ensure amdgpu_irq_dispatch
can read out the next IH entry before the WPTR catches up with the RPTR.

Since we don't read the WPTR on every iteration, and out page fault
handling code can take quite a while to process one fault, the watermark
needs to provide a lot of buffer. Maybe we also need to read the WPTR
register more frequently if the last read was more than a jiffy ago.

Whenever an overflow (over the watermark) is detected, we can set a
sticky overflow bit that our page fault handling code can use to clean
up. E.g. once we start using the CAM filter, we'll have to invalidate
all CAM entries when this happens (although I'd expect overflows to
become impossible once we enable the CAM filter).

Thanks,
  Felix


>
> Regards,
> Christian.
>
>>
>> Regards,
>>   Felix
>>
>>
>>>
>>> Regards,
>>> Christian.
>>>
 [   48.802231] amdgpu_ih_process:243: amdgpu: starting rptr 0x520,
 wptr 0xd20
 [   48.802235] amdgpu_ih_process:254: amdgpu: rptr 0x540, old wptr
 0xd20, new wptr 0xd20
 [   48.802256] amdgpu_ih_process:254: amdgpu: rptr 0x560, old wptr
 0xd20, new wptr 0xd20
 [   48.802260] amdgpu_ih_process:254: amdgpu: rptr 0x580, old wptr
 0xd20, new wptr 0xd20
 [   48.802281] amdgpu_ih_process:254: amdgpu: rptr 0x5a0, old wptr

Re: [PATCH] drm/amd/display: Clean up some inconsistent indenting

2021-11-11 Thread Christian König




Am 11.11.21 um 11:03 schrieb Jiapeng Chong:

Eliminate the follow smatch warning:

drivers/gpu/drm/amd/amdgpu/../display/dmub/src/dmub_srv.c:622
dmub_srv_cmd_execute() warn: inconsistent indenting.

Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c 
b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
index 56a0332..e9fadf1 100644
--- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
@@ -618,8 +618,8 @@ enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub)
 * read back stale, fully invalid or partially invalid data.
 */
dmub_rb_flush_pending(>inbox1_rb);
+   dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt);
  
-		dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt);

return DMUB_STATUS_OK;
  }
  




Re: [PATCH] drm/amd/display: clean up some inconsistent indenting

2021-11-11 Thread Christian König

Am 11.11.21 um 10:58 schrieb Jiapeng Chong:

Eliminate the follow smatch warning:

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:2245
dp_dsc_slice_bpg_offset_read() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:2044
dp_dsc_pic_width_read() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:2101
dp_dsc_pic_height_read() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:2173
dp_dsc_chunk_size_read() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1868
dp_dsc_bits_per_pixel_read() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1965
dp_dsc_bits_per_pixel_write() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1787
dp_dsc_slice_height_write() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1602
dp_dsc_slice_width_write() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1687
dp_dsc_slice_height_read() warn: inconsistent indenting.

vers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1417
dp_dsc_clock_en_write() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1502
dp_dsc_slice_width_read() warn: inconsistent indenting.

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_debugfs.c:1315
dp_dsc_clock_en_read() warn: inconsistent indenting.

Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 


Looks like the same code was copied over and over again, maybe make this 
an even wider cleanup and add a helper function to find the pipe_ctx for 
a specific dc_link.


Regards,
Christian.


---
  .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c  | 72 +++---
  1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 9d43ecb..50ef248 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -1312,9 +1312,9 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char 
__user *buf,
  
  	for (i = 0; i < MAX_PIPES; i++) {

pipe_ctx = 
>dc_link->dc->current_state->res_ctx.pipe_ctx[i];
-   if (pipe_ctx && pipe_ctx->stream &&
-   pipe_ctx->stream->link == aconnector->dc_link)
-   break;
+   if (pipe_ctx && pipe_ctx->stream &&
+   pipe_ctx->stream->link == aconnector->dc_link)
+   break;
}
  
  	if (!pipe_ctx)

@@ -1414,9 +1414,9 @@ static ssize_t dp_dsc_clock_en_write(struct file *f, 
const char __user *buf,
  
  	for (i = 0; i < MAX_PIPES; i++) {

pipe_ctx = 
>dc_link->dc->current_state->res_ctx.pipe_ctx[i];
-   if (pipe_ctx && pipe_ctx->stream &&
-   pipe_ctx->stream->link == aconnector->dc_link)
-   break;
+   if (pipe_ctx && pipe_ctx->stream &&
+   pipe_ctx->stream->link == aconnector->dc_link)
+   break;
}
  
  	if (!pipe_ctx || !pipe_ctx->stream)

@@ -1499,9 +1499,9 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, 
char __user *buf,
  
  	for (i = 0; i < MAX_PIPES; i++) {

pipe_ctx = 
>dc_link->dc->current_state->res_ctx.pipe_ctx[i];
-   if (pipe_ctx && pipe_ctx->stream &&
-   pipe_ctx->stream->link == aconnector->dc_link)
-   break;
+   if (pipe_ctx && pipe_ctx->stream &&
+   pipe_ctx->stream->link == aconnector->dc_link)
+   break;
}
  
  	if (!pipe_ctx)

@@ -1599,9 +1599,9 @@ static ssize_t dp_dsc_slice_width_write(struct file *f, 
const char __user *buf,
  
  	for (i = 0; i < MAX_PIPES; i++) {

pipe_ctx = 
>dc_link->dc->current_state->res_ctx.pipe_ctx[i];
-   if (pipe_ctx && pipe_ctx->stream &&
-   pipe_ctx->stream->link == aconnector->dc_link)
-   break;
+   if (pipe_ctx && pipe_ctx->stream &&
+   pipe_ctx->stream->link == aconnector->dc_link)
+   break;
}
  
  	if (!pipe_ctx || !pipe_ctx->stream)

@@ -1684,9 +1684,9 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, 
char __user *buf,
  
  	for (i = 0; i < MAX_PIPES; i++) {

pipe_ctx = 
>dc_link->dc->current_state->res_ctx.pipe_ctx[i];
-   if (pipe_ctx && pipe_ctx->stream &&
-   pipe_ctx->stream->link == 

[PATCH v1] drm/amd/pm: add GFXCLK/SCLK clocks level print support for APUs

2021-11-11 Thread Perry Yuan
add support that allow the userspace tool like RGP to get the GFX clock
value at runtime, the fix follow the old way to show the min/current/max
clocks level for compatible consideration.

=== Test ===
$ cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 200Mhz *
1: 1100Mhz
2: 1600Mhz

then run stress test on one APU system.
$ cat /sys/class/drm/card0/device/pp_dpm_sclk
0: 200Mhz
1: 1040Mhz *
2: 1600Mhz

The current GFXCLK value is updated at runtime.

BugLink: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5260
Reviewed-by: Huang Ray 
Signed-off-by: Perry Yuan 
---
 .../amd/pm/swsmu/smu11/cyan_skillfish_ppt.c   | 22 +--
 .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c  | 26 ++
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c  | 27 +++
 .../drm/amd/pm/swsmu/smu13/yellow_carp_ppt.h  |  1 +
 4 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
index 3d4c65bc29dc..6e8343907c32 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/cyan_skillfish_ppt.c
@@ -308,6 +308,7 @@ static int cyan_skillfish_print_clk_levels(struct 
smu_context *smu,
 {
int ret = 0, size = 0;
uint32_t cur_value = 0;
+   int i;
 
smu_cmn_get_sysfs_buf(, );
 
@@ -333,8 +334,6 @@ static int cyan_skillfish_print_clk_levels(struct 
smu_context *smu,
size += sysfs_emit_at(buf, size, "VDDC: %7umV  %10umV\n",
CYAN_SKILLFISH_VDDC_MIN, 
CYAN_SKILLFISH_VDDC_MAX);
break;
-   case SMU_GFXCLK:
-   case SMU_SCLK:
case SMU_FCLK:
case SMU_MCLK:
case SMU_SOCCLK:
@@ -345,6 +344,25 @@ static int cyan_skillfish_print_clk_levels(struct 
smu_context *smu,
return ret;
size += sysfs_emit_at(buf, size, "0: %uMhz *\n", cur_value);
break;
+   case SMU_SCLK:
+   case SMU_GFXCLK:
+   ret = cyan_skillfish_get_current_clk_freq(smu, clk_type, 
_value);
+   if (ret)
+   return ret;
+   if (cur_value  == CYAN_SKILLFISH_SCLK_MAX)
+   i = 2;
+   else if (cur_value == CYAN_SKILLFISH_SCLK_MIN)
+   i = 0;
+   else
+   i = 1;
+   size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", 
CYAN_SKILLFISH_SCLK_MIN,
+   i == 0 ? "*" : "");
+   size += sysfs_emit_at(buf, size, "1: %uMhz %s\n",
+   i == 1 ? cur_value : 
CYAN_SKILLFISH_SCLK_DEFAULT,
+   i == 1 ? "*" : "");
+   size += sysfs_emit_at(buf, size, "2: %uMhz %s\n", 
CYAN_SKILLFISH_SCLK_MAX,
+   i == 2 ? "*" : "");
+   break;
default:
dev_warn(smu->adev->dev, "Unsupported clock type\n");
return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index f6ef0ce6e9e2..6852e4b45589 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -683,6 +683,7 @@ static int vangogh_print_clk_levels(struct smu_context *smu,
int i, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0;
bool cur_value_match_level = false;
+   uint32_t min, max;
 
memset(, 0, sizeof(metrics));
 
@@ -743,6 +744,13 @@ static int vangogh_print_clk_levels(struct smu_context 
*smu,
if (ret)
return ret;
break;
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   ret = smu_cmn_send_smc_msg_with_param(smu, 
SMU_MSG_GetGfxclkFrequency, 0, _value);
+   if (ret) {
+   return ret;
+   }
+   break;
default:
break;
}
@@ -768,6 +776,24 @@ static int vangogh_print_clk_levels(struct smu_context 
*smu,
if (!cur_value_match_level)
size += sysfs_emit_at(buf, size, "   %uMhz *\n", 
cur_value);
break;
+   case SMU_GFXCLK:
+   case SMU_SCLK:
+   min = (smu->gfx_actual_hard_min_freq > 0) ? 
smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq;
+   max = (smu->gfx_actual_soft_max_freq > 0) ? 
smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq;
+   if (cur_value  == max)
+   i = 2;
+   else if (cur_value == min)
+   i = 0;
+   else
+   i = 1;
+   size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", min,
+   i == 0 ? "*" : "");
+   size += sysfs_emit_at(buf, size, "1: 

Re: [PATCH 2/6] drm/amdgpu: stop getting excl fence separately

2021-11-11 Thread Christian König

Just a ping to the amd-gfx list.

Trivial cleanup, can anybody give me an rb for that?

Thanks,
Christian.

Am 28.10.21 um 15:26 schrieb Christian König:

Just grab all fences for the display flip in one go.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 -
  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 6 +-
  2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d58e37fd01f4..4da7eb65e744 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -457,7 +457,6 @@ struct amdgpu_flip_work {
uint64_tbase;
struct drm_pending_vblank_event *event;
struct amdgpu_bo*old_abo;
-   struct dma_fence*excl;
unsignedshared_count;
struct dma_fence**shared;
struct dma_fence_cb cb;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index dc50c05f23fc..68108f151dad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -83,9 +83,6 @@ static void amdgpu_display_flip_work_func(struct work_struct 
*__work)
unsigned i;
int vpos, hpos;
  
-	if (amdgpu_display_flip_handle_fence(work, >excl))

-   return;
-
for (i = 0; i < work->shared_count; ++i)
if (amdgpu_display_flip_handle_fence(work, >shared[i]))
return;
@@ -203,7 +200,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc 
*crtc,
goto unpin;
}
  
-	r = dma_resv_get_fences(new_abo->tbo.base.resv, >excl,

+   r = dma_resv_get_fences(new_abo->tbo.base.resv, NULL,
>shared_count, >shared);
if (unlikely(r != 0)) {
DRM_ERROR("failed to get fences for buffer\n");
@@ -253,7 +250,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc 
*crtc,
  
  cleanup:

amdgpu_bo_unref(>old_abo);
-   dma_fence_put(work->excl);
for (i = 0; i < work->shared_count; ++i)
dma_fence_put(work->shared[i]);
kfree(work->shared);