> -----Original Message-----
> From: Ville Syrjala <[email protected]>
> Sent: Tuesday, September 21, 2021 8:55 PM
> To: [email protected]
> Cc: Shankar, Uma <[email protected]>
> Subject: [PATCH v2 1/4] drm/i915/fbc: Rework cfb stride/size calculations
> 
> From: Ville Syrjälä <[email protected]>
> 
> The code to calculate the cfb stride/size is a bit of mess.
> The cfb size is getting calculated based purely on the plane stride and plane 
> height.
> That doesn't account for extra alignment we want for the cfb stride. The gen9
> override stride OTOH is just calculated based on the plane width, and it does 
> try to
> make things more aligned but any extra alignment added there is not 
> considered in
> the cfb size calculations.
> So not at all convinced this is working as intended. Additionally the 
> compression limit
> handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() 
> (for the
> 16bpp case), which is just confusing.
> 
> Let's streamline the whole thing:
> - Start with the plane stride, convert that into cfb stride (cfb is
>   always 4 bytes per pixel). All the calculations will assume 1:1
>   compression limit since that will give us the max values, and we
>   don't yet know how much stolen memory we will be able to allocate
> - Align the cfb stride to 512 bytes on modern platforms. This guarantees
>   the 4 line segment will be 512 byte aligned regardles of the final
>   compression limit we choose later. The 512 byte alignment for the
>   segment is required by at least some of the platforms, and just doing
>   it always seems like the easiest option
> - Figure out if we need to use the override stride or not. For X-tiled
>   it's never needed since the plane stride is already 512 byte aligned,
>   for Y-tiled it will be needed if the plane stride is not a multiple
>   of 512 bytes, and for linear it's apparently always needed because the
>   hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of
>   the PLANE_STRIDE*64 that it use with linear.
> - The cfb size will be calculated based on the aligned cfb stride to
>   guarantee we actually reserved enough stolen memory and the FBC hw
>   won't end up scribbling over whatever else is allocated in stolen
> - The compression limit handling we just do fully in the cfb allocation
>   code to make things less confusing
> 
> v2: Write the min cfb segment stride calculation in a more
>     explicit way to make it clear what is going on

Looks Good to me.
Reviewed-by: Uma Shankar <[email protected]>

> Cc: Uma Shankar <[email protected]>
> Signed-off-by: Ville Syrjälä <[email protected]>
> ---
>  drivers/gpu/drm/i915/display/intel_fbc.c | 163 +++++++++++++++--------
>  drivers/gpu/drm/i915/i915_drv.h          |   4 +-
>  2 files changed, 112 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c
> b/drivers/gpu/drm/i915/display/intel_fbc.c
> index b1c1a23c36be..f51871f39eb6 100644
> --- a/drivers/gpu/drm/i915/display/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/display/intel_fbc.c
> @@ -62,19 +62,76 @@ static void intel_fbc_get_plane_source_size(const struct
> intel_fbc_state_cache *
>               *height = cache->plane.src_h;
>  }
> 
> -static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
> -                                     const struct intel_fbc_state_cache 
> *cache)
> +/* plane stride in pixels */
> +static unsigned int intel_fbc_plane_stride(const struct
> +intel_plane_state *plane_state)
>  {
> -     int lines;
> +     const struct drm_framebuffer *fb = plane_state->hw.fb;
> +     unsigned int stride;
> +
> +     stride = plane_state->view.color_plane[0].stride;
> +     if (!drm_rotation_90_or_270(plane_state->hw.rotation))
> +             stride /= fb->format->cpp[0];
> +
> +     return stride;
> +}
> +
> +/* plane stride based cfb stride in bytes, assuming 1:1 compression
> +limit */ static unsigned int _intel_fbc_cfb_stride(const struct
> +intel_fbc_state_cache *cache) {
> +     unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
> +
> +     return cache->fb.stride * cpp;
> +}
> +
> +/* minimum acceptable cfb stride in bytes, assuming 1:1 compression
> +limit */ static unsigned int skl_fbc_min_cfb_stride(const struct
> +intel_fbc_state_cache *cache) {
> +     unsigned int limit = 4; /* 1:4 compression limit is the worst case */
> +     unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
> +     unsigned int height = 4; /* FBC segment is 4 lines */
> +     unsigned int stride;
> +
> +     /* minimum segment stride we can use */
> +     stride = cache->plane.src_w * cpp * height / limit;
> +
> +     /*
> +      * At least some of the platforms require each 4 line segment to
> +      * be 512 byte aligned. Just do it always for simplicity.
> +      */
> +     stride = ALIGN(stride, 512);
> +
> +     /* convert back to single line equivalent with 1:1 compression limit */
> +     return stride * limit / height;
> +}
> +
> +/* properly aligned cfb stride in bytes, assuming 1:1 compression limit
> +*/ static unsigned int intel_fbc_cfb_stride(struct drm_i915_private *i915,
> +                                      const struct intel_fbc_state_cache 
> *cache)
> {
> +     unsigned int stride = _intel_fbc_cfb_stride(cache);
> +
> +     /*
> +      * At least some of the platforms require each 4 line segment to
> +      * be 512 byte aligned. Aligning each line to 512 bytes guarantees
> +      * that regardless of the compression limit we choose later.
> +      */
> +     if (DISPLAY_VER(i915) == 9)
> +             return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(cache));
> +     else
> +             return stride;
> +}
> +
> +static unsigned int intel_fbc_cfb_size(struct drm_i915_private *dev_priv,
> +                                    const struct intel_fbc_state_cache 
> *cache) {
> +     int lines = cache->plane.src_h;
> 
> -     intel_fbc_get_plane_source_size(cache, NULL, &lines);
>       if (DISPLAY_VER(dev_priv) == 7)
>               lines = min(lines, 2048);
>       else if (DISPLAY_VER(dev_priv) >= 8)
>               lines = min(lines, 2560);
> 
> -     /* Hardware needs the full buffer stride, not just the active area. */
> -     return lines * cache->fb.stride;
> +     return lines * intel_fbc_cfb_stride(dev_priv, cache);
>  }
> 
>  static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv) @@ -150,15
> +207,9 @@ static bool i8xx_fbc_is_active(struct drm_i915_private *dev_priv)
> 
>  static u32 g4x_dpfc_ctl_limit(struct drm_i915_private *i915)  {
> -     const struct intel_fbc_reg_params *params = &i915->fbc.params;
> -     int limit = i915->fbc.limit;
> -
> -     if (params->fb.format->cpp[0] == 2)
> -             limit <<= 1;
> -
> -     switch (limit) {
> +     switch (i915->fbc.limit) {
>       default:
> -             MISSING_CASE(limit);
> +             MISSING_CASE(i915->fbc.limit);
>               fallthrough;
>       case 1:
>               return DPFC_CTL_LIMIT_1X;
> @@ -301,7 +352,8 @@ static bool ilk_fbc_is_active(struct drm_i915_private
> *dev_priv)
> 
>  static void gen7_fbc_activate(struct drm_i915_private *dev_priv)  {
> -     struct intel_fbc_reg_params *params = &dev_priv->fbc.params;
> +     struct intel_fbc *fbc = &dev_priv->fbc;
> +     const struct intel_fbc_reg_params *params = &fbc->params;
>       u32 dpfc_ctl;
> 
>       /* Display WA #0529: skl, kbl, bxt. */ @@ -310,7 +362,7 @@ static void
> gen7_fbc_activate(struct drm_i915_private *dev_priv)
> 
>               if (params->override_cfb_stride)
>                       val |= CHICKEN_FBC_STRIDE_OVERRIDE |
> -                             CHICKEN_FBC_STRIDE(params-
> >override_cfb_stride);
> +                             CHICKEN_FBC_STRIDE(params-
> >override_cfb_stride / fbc->limit);
> 
>               intel_de_rmw(dev_priv, CHICKEN_MISC_4,
>                            CHICKEN_FBC_STRIDE_OVERRIDE |
> @@ -443,7 +495,12 @@ static u64 intel_fbc_stolen_end(struct drm_i915_private
> *dev_priv)
>       return min(end, intel_fbc_cfb_base_max(dev_priv));
>  }
> 
> -static int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp)
> +static int intel_fbc_min_limit(int fb_cpp) {
> +     return fb_cpp == 2 ? 2 : 1;
> +}
> +
> +static int intel_fbc_max_limit(struct drm_i915_private *dev_priv)
>  {
>       /*
>        * FIXME: FBC1 can have arbitrary cfb stride, @@ -457,7 +514,7 @@ static
> int intel_fbc_max_limit(struct drm_i915_private *dev_priv, int fb_cpp)
>               return 1;
> 
>       /* FBC2 can only do 1:1, 1:2, 1:4 */
> -     return fb_cpp == 2 ? 2 : 4;
> +     return 4;
>  }
> 
>  static int find_compression_limit(struct drm_i915_private *dev_priv, @@ 
> -466,7
> +523,9 @@ static int find_compression_limit(struct drm_i915_private 
> *dev_priv,  {
>       struct intel_fbc *fbc = &dev_priv->fbc;
>       u64 end = intel_fbc_stolen_end(dev_priv);
> -     int ret, limit = 1;
> +     int ret, limit = intel_fbc_min_limit(fb_cpp);
> +
> +     size /= limit;
> 
>       /* Try to over-allocate to reduce reallocations and fragmentation. */
>       ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc-
> >compressed_fb, @@ -474,7 +533,7 @@ static int find_compression_limit(struct
> drm_i915_private *dev_priv,
>       if (ret == 0)
>               return limit;
> 
> -     for (; limit <= intel_fbc_max_limit(dev_priv, fb_cpp); limit <<= 1) {
> +     for (; limit <= intel_fbc_max_limit(dev_priv); limit <<= 1) {
>               ret = i915_gem_stolen_insert_node_in_range(dev_priv, &fbc-
> >compressed_fb,
>                                                          size >>= 1, 4096, 0, 
> end);
>               if (ret == 0)
> @@ -505,10 +564,9 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private
> *dev_priv,
>       ret = find_compression_limit(dev_priv, size, fb_cpp);
>       if (!ret)
>               goto err_llb;
> -     else if (ret > 1) {
> +     else if (ret > intel_fbc_min_limit(fb_cpp))
>               drm_info_once(&dev_priv->drm,
>                             "Reducing the compressed framebuffer size. This 
> may
> lead to less power savings than a non-reduced-size. Try to increase stolen 
> memory
> size if available in BIOS.\n");
> -     }
> 
>       fbc->limit = ret;
> 
> @@ -719,11 +777,7 @@ static void intel_fbc_update_state_cache(struct 
> intel_crtc
> *crtc,
> 
>       cache->fb.format = fb->format;
>       cache->fb.modifier = fb->modifier;
> -
> -     /* FIXME is this correct? */
> -     cache->fb.stride = plane_state->view.color_plane[0].stride;
> -     if (drm_rotation_90_or_270(plane_state->hw.rotation))
> -             cache->fb.stride *= fb->format->cpp[0];
> +     cache->fb.stride = intel_fbc_plane_stride(plane_state);
> 
>       /* FBC1 compression interval: arbitrary choice of 1 second */
>       cache->interval = drm_mode_vrefresh(&crtc_state->hw.adjusted_mode);
> @@ -746,27 +800,29 @@ static bool intel_fbc_cfb_size_changed(struct
> drm_i915_private *dev_priv)  {
>       struct intel_fbc *fbc = &dev_priv->fbc;
> 
> -     return intel_fbc_calculate_cfb_size(dev_priv, &fbc->state_cache) >
> +     return intel_fbc_cfb_size(dev_priv, &fbc->state_cache) >
>               fbc->compressed_fb.size * fbc->limit;  }
> 
> -static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv)
> +static u16 intel_fbc_override_cfb_stride(struct drm_i915_private *dev_priv,
> +                                      const struct intel_fbc_state_cache 
> *cache)
>  {
> -     struct intel_fbc *fbc = &dev_priv->fbc;
> -     struct intel_fbc_state_cache *cache = &fbc->state_cache;
> +     unsigned int stride = _intel_fbc_cfb_stride(cache);
> +     unsigned int stride_aligned = intel_fbc_cfb_stride(dev_priv, cache);
> 
> -     if ((DISPLAY_VER(dev_priv) == 9) &&
> -         cache->fb.modifier != I915_FORMAT_MOD_X_TILED)
> -             return DIV_ROUND_UP(cache->plane.src_w, 32 * fbc->limit) * 8;
> -     else
> -             return 0;
> -}
> +     /*
> +      * Override stride in 64 byte units per 4 line segment.
> +      *
> +      * Gen9 hw miscalculates cfb stride for linear as
> +      * PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so
> +      * we always need to use the override there.
> +      */
> +     if (stride != stride_aligned ||
> +         (DISPLAY_VER(dev_priv) == 9 &&
> +          cache->fb.modifier == DRM_FORMAT_MOD_LINEAR))
> +             return stride_aligned * 4 / 64;
> 
> -static bool intel_fbc_override_cfb_stride_changed(struct drm_i915_private
> *dev_priv) -{
> -     struct intel_fbc *fbc = &dev_priv->fbc;
> -
> -     return fbc->params.override_cfb_stride !=
> intel_fbc_override_cfb_stride(dev_priv);
> +     return 0;
>  }
> 
>  static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv) @@ -861,7
> +917,8 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc)
>               return false;
>       }
> 
> -     if (!stride_is_valid(dev_priv, cache->fb.modifier, cache->fb.stride)) {
> +     if (!stride_is_valid(dev_priv, cache->fb.modifier,
> +                          cache->fb.stride * cache->fb.format->cpp[0])) {
>               fbc->no_fbc_reason = "framebuffer stride not supported";
>               return false;
>       }
> @@ -949,9 +1006,9 @@ static void intel_fbc_get_reg_params(struct intel_crtc
> *crtc,
>       params->fb.modifier = cache->fb.modifier;
>       params->fb.stride = cache->fb.stride;
> 
> -     params->cfb_size = intel_fbc_calculate_cfb_size(dev_priv, cache);
> -
> -     params->override_cfb_stride = cache->override_cfb_stride;
> +     params->cfb_stride = intel_fbc_cfb_stride(dev_priv, cache);
> +     params->cfb_size = intel_fbc_cfb_size(dev_priv, cache);
> +     params->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv,
> +cache);
> 
>       params->plane_visible = cache->plane.visible;  } @@ -982,10 +1039,13 @@
> static bool intel_fbc_can_flip_nuke(const struct intel_crtc_state *crtc_state)
>       if (params->fb.stride != cache->fb.stride)
>               return false;
> 
> -     if (params->cfb_size != intel_fbc_calculate_cfb_size(dev_priv, cache))
> +     if (params->cfb_stride != intel_fbc_cfb_stride(dev_priv, cache))
>               return false;
> 
> -     if (params->override_cfb_stride != cache->override_cfb_stride)
> +     if (params->cfb_size != intel_fbc_cfb_size(dev_priv, cache))
> +             return false;
> +
> +     if (params->override_cfb_stride !=
> +intel_fbc_override_cfb_stride(dev_priv, cache))
>               return false;
> 
>       return true;
> @@ -1258,8 +1318,7 @@ static void intel_fbc_enable(struct intel_atomic_state
> *state,
> 
>       if (fbc->crtc) {
>               if (fbc->crtc != crtc ||
> -                 (!intel_fbc_cfb_size_changed(dev_priv) &&
> -                  !intel_fbc_override_cfb_stride_changed(dev_priv)))
> +                 !intel_fbc_cfb_size_changed(dev_priv))
>                       goto out;
> 
>               __intel_fbc_disable(dev_priv);
> @@ -1274,15 +1333,13 @@ static void intel_fbc_enable(struct intel_atomic_state
> *state,
>               goto out;
> 
>       if (intel_fbc_alloc_cfb(dev_priv,
> -                             intel_fbc_calculate_cfb_size(dev_priv, cache),
> +                             intel_fbc_cfb_size(dev_priv, cache),
>                               plane_state->hw.fb->format->cpp[0])) {
>               cache->plane.visible = false;
>               fbc->no_fbc_reason = "not enough stolen memory";
>               goto out;
>       }
> 
> -     cache->override_cfb_stride = intel_fbc_override_cfb_stride(dev_priv);
> -
>       drm_dbg_kms(&dev_priv->drm, "Enabling FBC on pipe %c\n",
>                   pipe_name(crtc->pipe));
>       fbc->no_fbc_reason = "FBC enabled but not active yet\n"; diff --git
> a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index
> cc355aa05dbf..804c2a470e94 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -453,7 +453,6 @@ struct intel_fbc {
>               } fb;
> 
>               unsigned int fence_y_offset;
> -             u16 override_cfb_stride;
>               u16 interval;
>               s8 fence_id;
>               bool psr2_active;
> @@ -478,7 +477,8 @@ struct intel_fbc {
>                       u64 modifier;
>               } fb;
> 
> -             int cfb_size;
> +             unsigned int cfb_stride;
> +             unsigned int cfb_size;
>               unsigned int fence_y_offset;
>               u16 override_cfb_stride;
>               u16 interval;
> --
> 2.32.0

Reply via email to