gen9: WM memory bandwidth related workaround

Paulo Zanoni Thu, 15 Dec 2016 09:07:39 -0800

Em Qui, 2016-12-01 às 21:19 +0530, Mahesh Kumar escreveu:
> This patch implemnets Workariunds related to display arbitrated
> memory
> bandwidth. These WA are applicabe for all gen-9 based platforms.


3 typos above.

The WA is already implemented. What the patch does is that it opts-out
of the WA in case we conclude it's safe. Please say this in the commit
message.

> 
> Changes since v1:
>  - Rebase on top of Paulo's patch series
> Changes since v2:
>  - Address review comments
>  - Rebase/rework as per other patch changes in series
> Changes since v3:
>  - Rework based on Maarten's comments
> 
> Signed-off-by: Mahesh Kumar <mahesh1.ku...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h      |  11 +++
>  drivers/gpu/drm/i915/intel_display.c |  24 ++++++
>  drivers/gpu/drm/i915/intel_pm.c      | 155
> +++++++++++++++++++++++++++++++++--
>  3 files changed, 181 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 69213a4..3126259 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1216,6 +1216,13 @@ enum intel_sbi_destination {
>       SBI_MPHY,
>  };
>  
> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> +enum watermark_memory_wa {
> +     WATERMARK_WA_NONE,
> +     WATERMARK_WA_X_TILED,
> +     WATERMARK_WA_Y_TILED,
> +};
> +
>  #define QUIRK_PIPEA_FORCE (1<<0)
>  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> @@ -1788,6 +1795,10 @@ struct skl_ddb_allocation {
>  
>  struct skl_wm_values {
>       unsigned dirty_pipes;
> +     /* any WaterMark memory workaround Required */

CapitaliZation is weird Here. Besides, no need for the comment.


> +     enum watermark_memory_wa mem_wa;
> +     uint32_t pipe_bw_kbps[I915_MAX_PIPES];
> +     bool pipe_ytiled[I915_MAX_PIPES];
>       struct skl_ddb_allocation ddb;
>  };
>  
> diff --git a/drivers/gpu/drm/i915/intel_display.c
> b/drivers/gpu/drm/i915/intel_display.c
> index 5d11002..f8da488 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -14574,6 +14574,28 @@ static void intel_atomic_track_fbs(struct
> drm_atomic_state *state)
>                                 to_intel_plane(plane)-
> >frontbuffer_bit);
>  }
>  
> +static void
> +intel_update_wm_bw_wa(struct drm_atomic_state *state)
> +{
> +     struct intel_atomic_state *intel_state =
> to_intel_atomic_state(state);
> +     struct drm_i915_private *dev_priv = to_i915(state->dev);
> +     const struct drm_crtc *crtc;
> +     const struct drm_crtc_state *cstate;
> +     struct skl_wm_values *results = &intel_state->wm_results;
> +     struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
> +     int i;
> +
> +     if (!IS_GEN9(dev_priv))
> +             return;
> +
> +     for_each_crtc_in_state(state, crtc, cstate, i) {
> +             hw_vals->pipe_bw_kbps[i] = results->pipe_bw_kbps[i];
> +             hw_vals->pipe_ytiled[i] = results->pipe_ytiled[i];
> +     }
> +
> +     hw_vals->mem_wa = results->mem_wa;
> +}

I think we can just patch skl_copy_wm_for_pipe() to also copy the
fields we need instead of adding this function. Whouldn't that be much
simpler? Anyway, this one looks correct, so no need to change if you
don't want.


> +
>  /**
>   * intel_atomic_commit - commit validated state object
>   * @dev: DRM device
> @@ -14614,6 +14636,8 @@ static int intel_atomic_commit(struct
> drm_device *dev,
>       intel_shared_dpll_commit(state);
>       intel_atomic_track_fbs(state);
>  
> +     intel_update_wm_bw_wa(state);
> +
>       if (intel_state->modeset) {
>               memcpy(dev_priv->min_pixclk, intel_state-
> >min_pixclk,
>                      sizeof(intel_state->min_pixclk));
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index cc8fc84..fda6e1e 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -2878,11 +2878,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
>  
>  #define SKL_SAGV_BLOCK_TIME  30 /* µs */
>  
> -/*
> - * FIXME: We still don't have the proper code detect if we need to
> apply the WA,
> - * so assume we'll always need it in order to avoid underruns.
> - */
> -static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state)
> +static bool intel_needs_memory_bw_wa(struct intel_atomic_state
> *state)

Why are we renaming this function?

Also, in my last review I suggested that we kill this function. I don't
think it makes sense to have this function anymore.

Besides, function intel_can_enable_sagv() needs to look at the value we
assigned to enum watermark_memory_wa, not get a true/false based on
platform version.


>  {
>       struct drm_i915_private *dev_priv = to_i915(state-
> >base.dev);
>  
> @@ -3056,7 +3052,7 @@ bool intel_can_enable_sagv(struct
> drm_atomic_state *state)
>  
>               latency = dev_priv->wm.skl_latency[level];
>  
> -             if (skl_needs_memory_bw_wa(intel_state) &&
> +             if (intel_needs_memory_bw_wa(intel_state) &&
>                   plane->base.state->fb->modifier ==
>                   I915_FORMAT_MOD_X_TILED)
>                       latency += 15;
> @@ -3584,7 +3580,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>       uint32_t y_min_scanlines;
>       struct intel_atomic_state *state =
>               to_intel_atomic_state(cstate->base.state);
> -     bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
> +     enum watermark_memory_wa mem_wa;
>       bool y_tiled, x_tiled;
>  
>       if (latency == 0 || !cstate->base.active || !intel_pstate-
> >base.visible) {
> @@ -3600,7 +3596,8 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>       if (IS_KABYLAKE(dev_priv) && dev_priv->ipc_enabled)
>               latency += 4;
>  
> -     if (apply_memory_bw_wa && x_tiled)
> +     mem_wa = state->wm_results.mem_wa;

This assignment could have been done during declaration.


> +     if (mem_wa != WATERMARK_WA_NONE && x_tiled)
>               latency += 15;
>  
>       width = drm_rect_width(&intel_pstate->base.src) >> 16;
> @@ -3635,7 +3632,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>               y_min_scanlines = 4;
>       }
>  
> -     if (apply_memory_bw_wa)
> +     if (mem_wa == WATERMARK_WA_Y_TILED)
>               y_min_scanlines *= 2;
>  
>       plane_bytes_per_line = width * cpp;
> @@ -4063,6 +4060,15 @@ skl_compute_ddb(struct drm_atomic_state
> *state)
>       }
>  
>       /*
> +      * If Watermark workaround is changed we need to recalculate
> +      * watermark values for all active pipes
> +      */
> +     if (intel_state->wm_results.mem_wa != dev_priv-
> >wm.skl_hw.mem_wa) {
> +             realloc_pipes = ~0;
> +             intel_state->wm_results.dirty_pipes = ~0;
> +     }

But then skl_ddb_add_affected_planes() only actually adds to the commit
the planes that have different DDB partitioning. It seems to me that it
may be possible to have a case where the WA changes and the DDB stays
the same, so we need to find a way to include every CRTC there. Maybe
we could just go and call the function that adds every CRTC here.


> +
> +     /*
>        * We're not recomputing for the pipes not included in the
> commit, so
>        * make sure we start with the current state.
>        */
> @@ -4087,6 +4093,133 @@ skl_compute_ddb(struct drm_atomic_state
> *state)
>       return 0;
>  }
>  
> +static int
> +skl_compute_memory_bandwidth_wm_wa(struct drm_atomic_state *state)
> +{
> +     struct drm_device *dev = state->dev;
> +     struct drm_i915_private *dev_priv = to_i915(dev);
> +     struct intel_atomic_state *intel_state =
> to_intel_atomic_state(state);
> +     struct memdev_info *memdev_info = &dev_priv->memdev_info;
> +     struct skl_wm_values *results = &intel_state->wm_results;
> +     struct drm_crtc *crtc;
> +     struct drm_crtc_state *cstate;
> +     int active_pipes = 0;
> +     uint32_t max_pipe_bw_kbps = 0, total_pipe_bw_kbps;
> +     int display_bw_percentage;
> +     bool y_tile_enabled = false;
> +     int ret, i;
> +
> +     if (!intel_needs_memory_bw_wa(intel_state)) {
> +             results->mem_wa = WATERMARK_WA_NONE;
> +             return 0;
> +     }
> +
> +     if (!memdev_info->valid)
> +             goto exit;

There's no reason to use a label if it's only called here. Just move
all that code to the if statement.


> +
> +     ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
> +                     state->acquire_ctx);
> +     if (ret)
> +             return ret;

Why are we getting this here? Why this lock specifically? What's it
protecting?

Doc says:
- ww mutex protecting connector state and routing
- protects connector->encoder and encoder->crtc links 

We're not touching any of that here.


> +
> +     memcpy(results->pipe_bw_kbps, dev_priv-
> >wm.skl_hw.pipe_bw_kbps,
> +                     sizeof(results->pipe_bw_kbps));
> +     memcpy(results->pipe_ytiled, dev_priv-
> >wm.skl_hw.pipe_ytiled,
> +                     sizeof(results->pipe_ytiled));
> +
> +     for_each_crtc_in_state(state, crtc, cstate, i) {
> +             struct drm_plane *plane;
> +             const struct drm_plane_state *pstate;
> +             int active_planes = 0;
> +             uint32_t max_plane_bw_kbps = 0;
> +
> +             results->pipe_ytiled[i] = false;
> +
> +             if (!cstate->active) {
> +                     results->pipe_bw_kbps[i] = 0;
> +                     continue;
> +             }
> +
> +             drm_atomic_crtc_state_for_each_plane_state(plane,
> pstate,
> +                                                             csta
> te) {
> +                     struct drm_framebuffer *fb;
> +                     uint32_t plane_bw_kbps;
> +                     enum plane_id id = to_intel_plane(plane)-
> >id;
> +
> +                     if (!pstate->visible)
> +                             continue;
> +
> +                     if (WARN_ON(!pstate->fb))
> +                             continue;
> +
> +                     if (id == PLANE_CURSOR)
> +                             continue;
> +
> +                     fb = pstate->fb;
> +                     if ((fb->modifier == I915_FORMAT_MOD_Y_TILED
> ||
> +                             fb->modifier ==
> I915_FORMAT_MOD_Yf_TILED))
> +                             results->pipe_ytiled[i] = true;
> +
> +                     plane_bw_kbps =
> skl_adjusted_plane_pixel_rate(
> +                                             to_intel_crtc_state(
> cstate),
> +                                             to_intel_plane_state
> (pstate));
> +                     max_plane_bw_kbps = max(plane_bw_kbps,
> +                                                     max_plane_bw
> _kbps);
> +                     active_planes++;
> +             }
> +             results->pipe_bw_kbps[i] = max_plane_bw_kbps *
> active_planes;
> +     }
> +
> +     for_each_pipe(dev_priv, i) {
> +             if (results->pipe_bw_kbps[i]) {
> +                     max_pipe_bw_kbps = max(max_pipe_bw_kbps,
> +                                     results->pipe_bw_kbps[i]);
> +                     active_pipes++;
> +             }
> +             if (results->pipe_ytiled[i])
> +                     y_tile_enabled = true;
> +     }
> +
> +     total_pipe_bw_kbps = max_pipe_bw_kbps * active_pipes;
> +     display_bw_percentage = DIV_ROUND_UP_ULL(total_pipe_bw_kbps
> * 100,
> +                                             memdev_info-
> >bandwidth_kbps);

Shouldn't this be just DIV_ROUND_UP()? I don't see 64 bit variables
here, nor the possibility of overflows.


> +
> +     /*
> +      * If there is any Ytile plane enabled and arbitrated
> display
> +      * bandwidth > 20% of raw system memory bandwidth
> +      * Enale Y-tile related WA
> +      *
> +      * If memory is dual channel single rank, Xtile limit = 35%,
> else Xtile
> +      * limit = 60%
> +      * If there is no Ytile plane enabled and
> +      * arbitrated display bandwidth > Xtile limit
> +      * Enable X-tile realated WA
> +      */
> +     if (y_tile_enabled && (display_bw_percentage > 20))
> +             results->mem_wa = WATERMARK_WA_Y_TILED;
> +     else {
> +             int x_tile_percentage = 60;
> +             enum rank rank = DRAM_RANK_SINGLE;
> +
> +             if (memdev_info->rank != DRAM_RANK_INVALID)
> +                     rank = memdev_info->rank;

I really think that the previous patch should prevent this. If
memdev_info->rank is invalid then memdev_info->valid should also be
false and we'd have returned at the beginning of the function. With
that, this part of the code could be simplified a little bit since then
we'd have no need to choose a default rank.



> +
> +             if ((rank == DRAM_RANK_SINGLE) &&
> +                                     (memdev_info->num_channels
> == 2))
> +                     x_tile_percentage = 35;
> +
> +             if (display_bw_percentage > x_tile_percentage)
> +                     results->mem_wa = WATERMARK_WA_X_TILED;
> +     }
> +
> +     return 0;
> +
> +exit:
> +     results->mem_wa = WATERMARK_WA_Y_TILED;
> +     return 0;
> +}
> +
> +

Two blank lines here.


>  static void
>  skl_copy_wm_for_pipe(struct skl_wm_values *dst,
>                    struct skl_wm_values *src,
> @@ -4162,6 +4295,10 @@ skl_compute_wm(struct drm_atomic_state *state)
>       /* Clear all dirty flags */
>       results->dirty_pipes = 0;
>  
> +     ret = skl_compute_memory_bandwidth_wm_wa(state);
> +     if (ret)
> +             return ret;
> +
>       ret = skl_compute_ddb(state);
>       if (ret)
>               return ret;
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v7 8/8] drm/i915/gen9: WM memory bandwidth related workaround

Reply via email to