Re: [Intel-gfx] [PATCH 06/17] drm/i915/icl: Do not fix dbuf block size to 512

2018-01-29 Thread James Ausmus
On Mon, Jan 29, 2018 at 09:07:30PM -0200, Paulo Zanoni wrote:
> From: Mahesh Kumar 
> 
> GEN9/10 had fixed DBuf block size of 512. Dbuf block size is not a
> fixed number anymore in GEN11, it varies according to bits per pixel
> and tiling. If 8bpp & Yf-tile surface, block size = 256 else block
> size = 512
> 
> This patch addresses the same.
> 
> v2 (from Paulo):
>   - Make it compile.
>   - Fix a few coding style issues.
> v3:
>   - Rebase on top of upstream patches
> v4 (from Paulo):
>   - Bikeshed if statements (James).
> 
> Reviewed-by: Paulo Zanoni 
> Signed-off-by: Mahesh Kumar 
> Signed-off-by: Paulo Zanoni 

Reviewed-by: James Ausmus 

> ---
>  drivers/gpu/drm/i915/i915_drv.h |  1 +
>  drivers/gpu/drm/i915/intel_pm.c | 24 +---
>  2 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 454d8f937fae..d93e784c3f14 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1460,6 +1460,7 @@ struct skl_wm_params {
>   uint_fixed_16_16_t plane_blocks_per_line;
>   uint_fixed_16_16_t y_tile_minimum;
>   uint32_t linetime_us;
> + uint32_t dbuf_block_size;
>  };
>  
>  /*
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 11aac65d1543..985642cf1c9a 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -4312,7 +4312,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  */
>  static uint_fixed_16_16_t
>  skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
> -uint8_t cpp, uint32_t latency)
> +uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
>  {
>   uint32_t wm_intermediate_val;
>   uint_fixed_16_16_t ret;
> @@ -4321,7 +4321,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, 
> uint32_t pixel_rate,
>   return FP_16_16_MAX;
>  
>   wm_intermediate_val = latency * pixel_rate * cpp;
> - ret = div_fixed16(wm_intermediate_val, 1000 * 512);
> + ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
>  
>   if (INTEL_GEN(dev_priv) >= 10)
>   ret = add_fixed16_u32(ret, 1);
> @@ -4431,6 +4431,12 @@ skl_compute_plane_wm_params(const struct 
> drm_i915_private *dev_priv,
>   wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
>intel_pstate);
>  
> + if (INTEL_GEN(dev_priv) >= 11 &&
> + fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
> + wp->dbuf_block_size = 256;
> + else
> + wp->dbuf_block_size = 512;
> +
>   if (drm_rotation_90_or_270(pstate->rotation)) {
>  
>   switch (wp->cpp) {
> @@ -4457,7 +4463,8 @@ skl_compute_plane_wm_params(const struct 
> drm_i915_private *dev_priv,
>   wp->plane_bytes_per_line = wp->width * wp->cpp;
>   if (wp->y_tiled) {
>   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
> -wp->y_min_scanlines, 512);
> +wp->y_min_scanlines,
> +wp->dbuf_block_size);
>  
>   if (INTEL_GEN(dev_priv) >= 10)
>   interm_pbpl++;
> @@ -4465,10 +4472,12 @@ skl_compute_plane_wm_params(const struct 
> drm_i915_private *dev_priv,
>   wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
>   wp->y_min_scanlines);
>   } else if (wp->x_tiled && IS_GEN9(dev_priv)) {
> - interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
> + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
> +wp->dbuf_block_size);
>   wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
>   } else {
> - interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
> + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
> +wp->dbuf_block_size) + 1;
>   wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
>   }
>  
> @@ -4515,7 +4524,7 @@ static int skl_compute_plane_wm(const struct 
> drm_i915_private *dev_priv,
>   latency += 15;
>  
>   method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
> -  wp->cpp, latency);
> +  wp->cpp, latency, wp->dbuf_block_size);
>   method2 = skl_wm_method2(wp->plane_pixel_rate,
>cstate->base.adjusted_mode.crtc_htotal,
>latency,
> @@ -4525,7 +4534,8 @@ static int skl_compute_plane_wm(const struct 
> drm_i915_private *dev_priv,
>   selected_result = max_fixed16(method2, wp->y_tile_minimum);
>   } else {
>   if ((wp->cpp * cstate->base.adjusted_mo

[Intel-gfx] [PATCH 06/17] drm/i915/icl: Do not fix dbuf block size to 512

2018-01-29 Thread Paulo Zanoni
From: Mahesh Kumar 

GEN9/10 had fixed DBuf block size of 512. Dbuf block size is not a
fixed number anymore in GEN11, it varies according to bits per pixel
and tiling. If 8bpp & Yf-tile surface, block size = 256 else block
size = 512

This patch addresses the same.

v2 (from Paulo):
  - Make it compile.
  - Fix a few coding style issues.
v3:
  - Rebase on top of upstream patches
v4 (from Paulo):
  - Bikeshed if statements (James).

Reviewed-by: Paulo Zanoni 
Signed-off-by: Mahesh Kumar 
Signed-off-by: Paulo Zanoni 
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/intel_pm.c | 24 +---
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 454d8f937fae..d93e784c3f14 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1460,6 +1460,7 @@ struct skl_wm_params {
uint_fixed_16_16_t plane_blocks_per_line;
uint_fixed_16_16_t y_tile_minimum;
uint32_t linetime_us;
+   uint32_t dbuf_block_size;
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 11aac65d1543..985642cf1c9a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4312,7 +4312,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 */
 static uint_fixed_16_16_t
 skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
-  uint8_t cpp, uint32_t latency)
+  uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
 {
uint32_t wm_intermediate_val;
uint_fixed_16_16_t ret;
@@ -4321,7 +4321,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, 
uint32_t pixel_rate,
return FP_16_16_MAX;
 
wm_intermediate_val = latency * pixel_rate * cpp;
-   ret = div_fixed16(wm_intermediate_val, 1000 * 512);
+   ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
 
if (INTEL_GEN(dev_priv) >= 10)
ret = add_fixed16_u32(ret, 1);
@@ -4431,6 +4431,12 @@ skl_compute_plane_wm_params(const struct 
drm_i915_private *dev_priv,
wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
 intel_pstate);
 
+   if (INTEL_GEN(dev_priv) >= 11 &&
+   fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
+   wp->dbuf_block_size = 256;
+   else
+   wp->dbuf_block_size = 512;
+
if (drm_rotation_90_or_270(pstate->rotation)) {
 
switch (wp->cpp) {
@@ -4457,7 +4463,8 @@ skl_compute_plane_wm_params(const struct drm_i915_private 
*dev_priv,
wp->plane_bytes_per_line = wp->width * wp->cpp;
if (wp->y_tiled) {
interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
-  wp->y_min_scanlines, 512);
+  wp->y_min_scanlines,
+  wp->dbuf_block_size);
 
if (INTEL_GEN(dev_priv) >= 10)
interm_pbpl++;
@@ -4465,10 +4472,12 @@ skl_compute_plane_wm_params(const struct 
drm_i915_private *dev_priv,
wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
wp->y_min_scanlines);
} else if (wp->x_tiled && IS_GEN9(dev_priv)) {
-   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
+   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
+  wp->dbuf_block_size);
wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
} else {
-   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
+   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
+  wp->dbuf_block_size) + 1;
wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
}
 
@@ -4515,7 +4524,7 @@ static int skl_compute_plane_wm(const struct 
drm_i915_private *dev_priv,
latency += 15;
 
method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
-wp->cpp, latency);
+wp->cpp, latency, wp->dbuf_block_size);
method2 = skl_wm_method2(wp->plane_pixel_rate,
 cstate->base.adjusted_mode.crtc_htotal,
 latency,
@@ -4525,7 +4534,8 @@ static int skl_compute_plane_wm(const struct 
drm_i915_private *dev_priv,
selected_result = max_fixed16(method2, wp->y_tile_minimum);
} else {
if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
-512 < 1) && (wp->plane_bytes_per_line / 512 < 1))
+wp->dbuf_block_size < 1) &&
+(wp->plane_bytes_per_line / wp->dbuf_block_size < 1))
   

Re: [Intel-gfx] [PATCH 06/17] drm/i915/icl: Do not fix dbuf block size to 512

2018-01-23 Thread James Ausmus
On Tue, Jan 23, 2018 at 05:05:25PM -0200, Paulo Zanoni wrote:
> From: Mahesh Kumar 
> 
> GEN9/10 had fixed DBuf block size of 512. Dbuf block size is not a
> fixed number anymore in GEN11, it varies according to bits per pixel
> and tiling. If 8bpp & Yf-tile surface, block size = 256 else block
> size = 512
> 
> This patch addresses the same.
> 
> v2 (from Paulo):
>   - Make it compile.
>   - Fix a few coding style issues.
> v3
>   - Rebase on top of upstream patches
> 
> Signed-off-by: Mahesh Kumar 
> Signed-off-by: Paulo Zanoni 
> ---
>  drivers/gpu/drm/i915/i915_drv.h |  1 +
>  drivers/gpu/drm/i915/intel_pm.c | 27 ---
>  2 files changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8333692dac5a..cc5ac327f267 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1460,6 +1460,7 @@ struct skl_wm_params {
>   uint_fixed_16_16_t plane_blocks_per_line;
>   uint_fixed_16_16_t y_tile_minimum;
>   uint32_t linetime_us;
> + uint32_t dbuf_block_size;
>  };
>  
>  /*
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 11aac65d1543..44d952a3d9a6 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -4312,7 +4312,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>  */
>  static uint_fixed_16_16_t
>  skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
> -uint8_t cpp, uint32_t latency)
> +uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
>  {
>   uint32_t wm_intermediate_val;
>   uint_fixed_16_16_t ret;
> @@ -4321,7 +4321,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, 
> uint32_t pixel_rate,
>   return FP_16_16_MAX;
>  
>   wm_intermediate_val = latency * pixel_rate * cpp;
> - ret = div_fixed16(wm_intermediate_val, 1000 * 512);
> + ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
>  
>   if (INTEL_GEN(dev_priv) >= 10)
>   ret = add_fixed16_u32(ret, 1);
> @@ -4431,6 +4431,15 @@ skl_compute_plane_wm_params(const struct 
> drm_i915_private *dev_priv,
>   wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
>intel_pstate);
>  
> + if (INTEL_GEN(dev_priv) >= 11) {
> + if (fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
> + wp->dbuf_block_size = 256;
> + else
> + wp->dbuf_block_size = 512;
> + } else {
> + wp->dbuf_block_size = 512;
> + }

This could be simplified as (approximately)

wp->dbuf_block_size = 512;
if (INTEL_GEN(dev_priv) >= 11 && fb->modifier == I915_FORMAT_MOD_Yf_TILED &&
wp->cpp == 8)
wp->dbuf_block_size = 256;


> +
>   if (drm_rotation_90_or_270(pstate->rotation)) {
>  
>   switch (wp->cpp) {
> @@ -4457,7 +4466,8 @@ skl_compute_plane_wm_params(const struct 
> drm_i915_private *dev_priv,
>   wp->plane_bytes_per_line = wp->width * wp->cpp;
>   if (wp->y_tiled) {
>   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
> -wp->y_min_scanlines, 512);
> +wp->y_min_scanlines,
> +wp->dbuf_block_size);
>  
>   if (INTEL_GEN(dev_priv) >= 10)
>   interm_pbpl++;
> @@ -4465,10 +4475,12 @@ skl_compute_plane_wm_params(const struct 
> drm_i915_private *dev_priv,
>   wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
>   wp->y_min_scanlines);
>   } else if (wp->x_tiled && IS_GEN9(dev_priv)) {
> - interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
> + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
> +wp->dbuf_block_size);
>   wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
>   } else {
> - interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
> + interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
> +wp->dbuf_block_size) + 1;
>   wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
>   }
>  
> @@ -4515,7 +4527,7 @@ static int skl_compute_plane_wm(const struct 
> drm_i915_private *dev_priv,
>   latency += 15;
>  
>   method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
> -  wp->cpp, latency);
> +  wp->cpp, latency, wp->dbuf_block_size);
>   method2 = skl_wm_method2(wp->plane_pixel_rate,
>cstate->base.adjusted_mode.crtc_htotal,
>latency,
> @@ -4525,7 +4537,8 @@ static int skl_compute_plane_w

[Intel-gfx] [PATCH 06/17] drm/i915/icl: Do not fix dbuf block size to 512

2018-01-23 Thread Paulo Zanoni
From: Mahesh Kumar 

GEN9/10 had fixed DBuf block size of 512. Dbuf block size is not a
fixed number anymore in GEN11, it varies according to bits per pixel
and tiling. If 8bpp & Yf-tile surface, block size = 256 else block
size = 512

This patch addresses the same.

v2 (from Paulo):
  - Make it compile.
  - Fix a few coding style issues.
v3
  - Rebase on top of upstream patches

Signed-off-by: Mahesh Kumar 
Signed-off-by: Paulo Zanoni 
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/intel_pm.c | 27 ---
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8333692dac5a..cc5ac327f267 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1460,6 +1460,7 @@ struct skl_wm_params {
uint_fixed_16_16_t plane_blocks_per_line;
uint_fixed_16_16_t y_tile_minimum;
uint32_t linetime_us;
+   uint32_t dbuf_block_size;
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 11aac65d1543..44d952a3d9a6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4312,7 +4312,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 */
 static uint_fixed_16_16_t
 skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate,
-  uint8_t cpp, uint32_t latency)
+  uint8_t cpp, uint32_t latency, uint32_t dbuf_block_size)
 {
uint32_t wm_intermediate_val;
uint_fixed_16_16_t ret;
@@ -4321,7 +4321,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, 
uint32_t pixel_rate,
return FP_16_16_MAX;
 
wm_intermediate_val = latency * pixel_rate * cpp;
-   ret = div_fixed16(wm_intermediate_val, 1000 * 512);
+   ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size);
 
if (INTEL_GEN(dev_priv) >= 10)
ret = add_fixed16_u32(ret, 1);
@@ -4431,6 +4431,15 @@ skl_compute_plane_wm_params(const struct 
drm_i915_private *dev_priv,
wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
 intel_pstate);
 
+   if (INTEL_GEN(dev_priv) >= 11) {
+   if (fb->modifier == I915_FORMAT_MOD_Yf_TILED && wp->cpp == 8)
+   wp->dbuf_block_size = 256;
+   else
+   wp->dbuf_block_size = 512;
+   } else {
+   wp->dbuf_block_size = 512;
+   }
+
if (drm_rotation_90_or_270(pstate->rotation)) {
 
switch (wp->cpp) {
@@ -4457,7 +4466,8 @@ skl_compute_plane_wm_params(const struct drm_i915_private 
*dev_priv,
wp->plane_bytes_per_line = wp->width * wp->cpp;
if (wp->y_tiled) {
interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line *
-  wp->y_min_scanlines, 512);
+  wp->y_min_scanlines,
+  wp->dbuf_block_size);
 
if (INTEL_GEN(dev_priv) >= 10)
interm_pbpl++;
@@ -4465,10 +4475,12 @@ skl_compute_plane_wm_params(const struct 
drm_i915_private *dev_priv,
wp->plane_blocks_per_line = div_fixed16(interm_pbpl,
wp->y_min_scanlines);
} else if (wp->x_tiled && IS_GEN9(dev_priv)) {
-   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512);
+   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
+  wp->dbuf_block_size);
wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
} else {
-   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, 512) + 1;
+   interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line,
+  wp->dbuf_block_size) + 1;
wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl);
}
 
@@ -4515,7 +4527,7 @@ static int skl_compute_plane_wm(const struct 
drm_i915_private *dev_priv,
latency += 15;
 
method1 = skl_wm_method1(dev_priv, wp->plane_pixel_rate,
-wp->cpp, latency);
+wp->cpp, latency, wp->dbuf_block_size);
method2 = skl_wm_method2(wp->plane_pixel_rate,
 cstate->base.adjusted_mode.crtc_htotal,
 latency,
@@ -4525,7 +4537,8 @@ static int skl_compute_plane_wm(const struct 
drm_i915_private *dev_priv,
selected_result = max_fixed16(method2, wp->y_tile_minimum);
} else {
if ((wp->cpp * cstate->base.adjusted_mode.crtc_htotal /
-512 < 1) && (wp->plane_bytes_per_line / 512 < 1))
+wp->dbuf_block_size < 1) &&
+(wp->plane_bytes_per_line / wp->d