Re: [Intel-gfx] [PATCH] drm/i915: Enable Tile4 tiling mode

2022-05-13 Thread Das, Nirmoy



On 5/13/2022 7:47 AM, Zbigniew Kempczyński wrote:

On Thu, May 12, 2022 at 03:26:00PM +0200, Nirmoy Das wrote:

From: Bommu Krishnaiah 

Enable Tile4 tiling mode on platform that supports
Tile4 but no TileY like DG2.

v2: disable X-tile for iGPU in fastblit
 fix checkpath --strict warnings

Signed-off-by: Bommu Krishnaiah 
Co-developed-by: Nirmoy Das 
Signed-off-by: Nirmoy Das 
---
  .../i915/gem/selftests/i915_gem_client_blt.c  | 235 ++
  drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  22 ++
  2 files changed, 212 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index ddd0772fd828..e16661029c78 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -6,6 +6,7 @@
  #include "i915_selftest.h"
  
  #include "gt/intel_context.h"

+#include "gt/intel_engine_regs.h"
  #include "gt/intel_engine_user.h"
  #include "gt/intel_gpu_commands.h"
  #include "gt/intel_gt.h"
@@ -18,10 +19,71 @@
  #include "huge_gem_object.h"
  #include "mock_context.h"
  
+#define OW_SIZE 16  /* in bytes */

+#define F_SUBTILE_SIZE 64   /* in bytes */
+#define F_TILE_WIDTH 128/* in bytes */
+#define F_TILE_HEIGHT 32/* in pixels */
+#define F_SUBTILE_WIDTH  OW_SIZE/* in bytes */
+#define F_SUBTILE_HEIGHT 4  /* in pixels */
+
+static int linear_x_y_to_ftiled_pos(int x, int y, u32 stride, int bpp)
+{
+   int tile_base;
+   int tile_x, tile_y;
+   int swizzle, subtile;
+   int pixel_size = bpp / 8;
+   int pos;
+
+   /*
+* Subtile remapping for F tile. Note that map[a]==b implies map[b]==a
+* so we can use the same table to tile and until.
+*/
+   static const u8 f_subtile_map[] = {
+0,  1,  2,  3,  8,  9, 10, 11,
+4,  5,  6,  7, 12, 13, 14, 15,
+   16, 17, 18, 19, 24, 25, 26, 27,
+   20, 21, 22, 23, 28, 29, 30, 31,
+   32, 33, 34, 35, 40, 41, 42, 43,
+   36, 37, 38, 39, 44, 45, 46, 47,
+   48, 49, 50, 51, 56, 57, 58, 59,
+   52, 53, 54, 55, 60, 61, 62, 63
+   };
+
+   x *= pixel_size;
+   /*
+* Where does the 4k tile start (in bytes)?  This is the same for Y and
+* F so we can use the Y-tile algorithm to get to that point.
+*/
+   tile_base =
+   y / F_TILE_HEIGHT * stride * F_TILE_HEIGHT +
+   x / F_TILE_WIDTH * 4096;
+
+   /* Find pixel within tile */
+   tile_x = x % F_TILE_WIDTH;
+   tile_y = y % F_TILE_HEIGHT;
+
+   /* And figure out the subtile within the 4k tile */
+   subtile = tile_y / F_SUBTILE_HEIGHT * 8 + tile_x / F_SUBTILE_WIDTH;
+
+   /* Swizzle the subtile number according to the bspec diagram */
+   swizzle = f_subtile_map[subtile];
+
+   /* Calculate new position */
+   pos = tile_base +
+   swizzle * F_SUBTILE_SIZE +
+   tile_y % F_SUBTILE_HEIGHT * OW_SIZE +
+   tile_x % F_SUBTILE_WIDTH;
+
+   GEM_BUG_ON(!IS_ALIGNED(pos, pixel_size));
+
+   return pos / pixel_size * 4;
+}
+
  enum client_tiling {
CLIENT_TILING_LINEAR,
CLIENT_TILING_X,
CLIENT_TILING_Y,
+   CLIENT_TILING_4,
CLIENT_NUM_TILING_TYPES
  };
  
@@ -45,6 +107,21 @@ struct tiled_blits {

u32 height;
  };
  
+static bool fast_blit_ok(struct blit_buffer *buf)

+{
+   int gen = GRAPHICS_VER(buf->vma->vm->i915);
+
+   if (gen < 9)
+   return false;
+
+   if (gen < 12)
+   return true;
+
+   /* filter out platforms with unsupported X-tile support(iGPUs and DG1) 
in fastblit */
+   return !((IS_DG1(buf->vma->vm->i915) || (gen == 12 && 
!HAS_LMEM(buf->vma->vm->i915))) &&
+   buf->tiling == CLIENT_TILING_X);
+}
+

What would you say for this:

static bool supports_x_tiling(const struct drm_i915_private *i915)
{
int gen = GRAPHICS_VER(i915);

if (gen < 12)
return true;

if (!HAS_LMEM(i915) || IS_DG1(i915))
return false;

return true;
}

static bool fast_blit_ok(const struct blit_buffer *buf)
{
int gen = GRAPHICS_VER(buf->vma->vm->i915);

if (gen < 9)
return false;

if (gen < 12)
return true;

/* filter out platforms with unsupported X-tile support in fastblit */
if (buf->tiling == CLIENT_TILING_X && 
!supports_x_tiling(buf->vma->vm->i915))
return false;

return true;
}



Looks better, I  resend with that.


Thanks,

Nirmoy



Rest code looks good to me.

--
Zbigniew


  static int prepare_blit(const struct tiled_blits *t,
struct blit_buffer *dst,
struct blit_buffer *src,
@@ -59,51 

Re: [Intel-gfx] [PATCH] drm/i915: Enable Tile4 tiling mode

2022-05-12 Thread Zbigniew Kempczyński
On Thu, May 12, 2022 at 03:26:00PM +0200, Nirmoy Das wrote:
> From: Bommu Krishnaiah 
> 
> Enable Tile4 tiling mode on platform that supports
> Tile4 but no TileY like DG2.
> 
> v2: disable X-tile for iGPU in fastblit
> fix checkpath --strict warnings
> 
> Signed-off-by: Bommu Krishnaiah 
> Co-developed-by: Nirmoy Das 
> Signed-off-by: Nirmoy Das 
> ---
>  .../i915/gem/selftests/i915_gem_client_blt.c  | 235 ++
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  22 ++
>  2 files changed, 212 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
> b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
> index ddd0772fd828..e16661029c78 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
> @@ -6,6 +6,7 @@
>  #include "i915_selftest.h"
>  
>  #include "gt/intel_context.h"
> +#include "gt/intel_engine_regs.h"
>  #include "gt/intel_engine_user.h"
>  #include "gt/intel_gpu_commands.h"
>  #include "gt/intel_gt.h"
> @@ -18,10 +19,71 @@
>  #include "huge_gem_object.h"
>  #include "mock_context.h"
>  
> +#define OW_SIZE 16  /* in bytes */
> +#define F_SUBTILE_SIZE 64   /* in bytes */
> +#define F_TILE_WIDTH 128/* in bytes */
> +#define F_TILE_HEIGHT 32/* in pixels */
> +#define F_SUBTILE_WIDTH  OW_SIZE/* in bytes */
> +#define F_SUBTILE_HEIGHT 4  /* in pixels */
> +
> +static int linear_x_y_to_ftiled_pos(int x, int y, u32 stride, int bpp)
> +{
> + int tile_base;
> + int tile_x, tile_y;
> + int swizzle, subtile;
> + int pixel_size = bpp / 8;
> + int pos;
> +
> + /*
> +  * Subtile remapping for F tile. Note that map[a]==b implies map[b]==a
> +  * so we can use the same table to tile and until.
> +  */
> + static const u8 f_subtile_map[] = {
> +  0,  1,  2,  3,  8,  9, 10, 11,
> +  4,  5,  6,  7, 12, 13, 14, 15,
> + 16, 17, 18, 19, 24, 25, 26, 27,
> + 20, 21, 22, 23, 28, 29, 30, 31,
> + 32, 33, 34, 35, 40, 41, 42, 43,
> + 36, 37, 38, 39, 44, 45, 46, 47,
> + 48, 49, 50, 51, 56, 57, 58, 59,
> + 52, 53, 54, 55, 60, 61, 62, 63
> + };
> +
> + x *= pixel_size;
> + /*
> +  * Where does the 4k tile start (in bytes)?  This is the same for Y and
> +  * F so we can use the Y-tile algorithm to get to that point.
> +  */
> + tile_base =
> + y / F_TILE_HEIGHT * stride * F_TILE_HEIGHT +
> + x / F_TILE_WIDTH * 4096;
> +
> + /* Find pixel within tile */
> + tile_x = x % F_TILE_WIDTH;
> + tile_y = y % F_TILE_HEIGHT;
> +
> + /* And figure out the subtile within the 4k tile */
> + subtile = tile_y / F_SUBTILE_HEIGHT * 8 + tile_x / F_SUBTILE_WIDTH;
> +
> + /* Swizzle the subtile number according to the bspec diagram */
> + swizzle = f_subtile_map[subtile];
> +
> + /* Calculate new position */
> + pos = tile_base +
> + swizzle * F_SUBTILE_SIZE +
> + tile_y % F_SUBTILE_HEIGHT * OW_SIZE +
> + tile_x % F_SUBTILE_WIDTH;
> +
> + GEM_BUG_ON(!IS_ALIGNED(pos, pixel_size));
> +
> + return pos / pixel_size * 4;
> +}
> +
>  enum client_tiling {
>   CLIENT_TILING_LINEAR,
>   CLIENT_TILING_X,
>   CLIENT_TILING_Y,
> + CLIENT_TILING_4,
>   CLIENT_NUM_TILING_TYPES
>  };
>  
> @@ -45,6 +107,21 @@ struct tiled_blits {
>   u32 height;
>  };
>  
> +static bool fast_blit_ok(struct blit_buffer *buf)
> +{
> + int gen = GRAPHICS_VER(buf->vma->vm->i915);
> +
> + if (gen < 9)
> + return false;
> +
> + if (gen < 12)
> + return true;
> +
> + /* filter out platforms with unsupported X-tile support(iGPUs and DG1) 
> in fastblit */
> + return !((IS_DG1(buf->vma->vm->i915) || (gen == 12 && 
> !HAS_LMEM(buf->vma->vm->i915))) &&
> + buf->tiling == CLIENT_TILING_X);
> +}
> +

What would you say for this:

static bool supports_x_tiling(const struct drm_i915_private *i915)
{
int gen = GRAPHICS_VER(i915);

if (gen < 12)
return true;

if (!HAS_LMEM(i915) || IS_DG1(i915))
return false;

return true;
}

static bool fast_blit_ok(const struct blit_buffer *buf)
{
int gen = GRAPHICS_VER(buf->vma->vm->i915);

if (gen < 9)
return false;

if (gen < 12)
return true;

/* filter out platforms with unsupported X-tile support in fastblit */
if (buf->tiling == CLIENT_TILING_X && 
!supports_x_tiling(buf->vma->vm->i915))
return false;

return true;
}

Rest code looks good to me.

--
Zbigniew

>  static int prepare_blit(const struct tiled_blits *t,
>   struct blit_buffer *dst,
>   struct blit_buffer *src,
> @@ -59,51 

[Intel-gfx] [PATCH] drm/i915: Enable Tile4 tiling mode

2022-05-12 Thread Nirmoy Das
From: Bommu Krishnaiah 

Enable Tile4 tiling mode on platform that supports
Tile4 but no TileY like DG2.

v2: disable X-tile for iGPU in fastblit
fix checkpath --strict warnings

Signed-off-by: Bommu Krishnaiah 
Co-developed-by: Nirmoy Das 
Signed-off-by: Nirmoy Das 
---
 .../i915/gem/selftests/i915_gem_client_blt.c  | 235 ++
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  22 ++
 2 files changed, 212 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index ddd0772fd828..e16661029c78 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -6,6 +6,7 @@
 #include "i915_selftest.h"
 
 #include "gt/intel_context.h"
+#include "gt/intel_engine_regs.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt.h"
@@ -18,10 +19,71 @@
 #include "huge_gem_object.h"
 #include "mock_context.h"
 
+#define OW_SIZE 16  /* in bytes */
+#define F_SUBTILE_SIZE 64   /* in bytes */
+#define F_TILE_WIDTH 128/* in bytes */
+#define F_TILE_HEIGHT 32/* in pixels */
+#define F_SUBTILE_WIDTH  OW_SIZE/* in bytes */
+#define F_SUBTILE_HEIGHT 4  /* in pixels */
+
+static int linear_x_y_to_ftiled_pos(int x, int y, u32 stride, int bpp)
+{
+   int tile_base;
+   int tile_x, tile_y;
+   int swizzle, subtile;
+   int pixel_size = bpp / 8;
+   int pos;
+
+   /*
+* Subtile remapping for F tile. Note that map[a]==b implies map[b]==a
+* so we can use the same table to tile and until.
+*/
+   static const u8 f_subtile_map[] = {
+0,  1,  2,  3,  8,  9, 10, 11,
+4,  5,  6,  7, 12, 13, 14, 15,
+   16, 17, 18, 19, 24, 25, 26, 27,
+   20, 21, 22, 23, 28, 29, 30, 31,
+   32, 33, 34, 35, 40, 41, 42, 43,
+   36, 37, 38, 39, 44, 45, 46, 47,
+   48, 49, 50, 51, 56, 57, 58, 59,
+   52, 53, 54, 55, 60, 61, 62, 63
+   };
+
+   x *= pixel_size;
+   /*
+* Where does the 4k tile start (in bytes)?  This is the same for Y and
+* F so we can use the Y-tile algorithm to get to that point.
+*/
+   tile_base =
+   y / F_TILE_HEIGHT * stride * F_TILE_HEIGHT +
+   x / F_TILE_WIDTH * 4096;
+
+   /* Find pixel within tile */
+   tile_x = x % F_TILE_WIDTH;
+   tile_y = y % F_TILE_HEIGHT;
+
+   /* And figure out the subtile within the 4k tile */
+   subtile = tile_y / F_SUBTILE_HEIGHT * 8 + tile_x / F_SUBTILE_WIDTH;
+
+   /* Swizzle the subtile number according to the bspec diagram */
+   swizzle = f_subtile_map[subtile];
+
+   /* Calculate new position */
+   pos = tile_base +
+   swizzle * F_SUBTILE_SIZE +
+   tile_y % F_SUBTILE_HEIGHT * OW_SIZE +
+   tile_x % F_SUBTILE_WIDTH;
+
+   GEM_BUG_ON(!IS_ALIGNED(pos, pixel_size));
+
+   return pos / pixel_size * 4;
+}
+
 enum client_tiling {
CLIENT_TILING_LINEAR,
CLIENT_TILING_X,
CLIENT_TILING_Y,
+   CLIENT_TILING_4,
CLIENT_NUM_TILING_TYPES
 };
 
@@ -45,6 +107,21 @@ struct tiled_blits {
u32 height;
 };
 
+static bool fast_blit_ok(struct blit_buffer *buf)
+{
+   int gen = GRAPHICS_VER(buf->vma->vm->i915);
+
+   if (gen < 9)
+   return false;
+
+   if (gen < 12)
+   return true;
+
+   /* filter out platforms with unsupported X-tile support(iGPUs and DG1) 
in fastblit */
+   return !((IS_DG1(buf->vma->vm->i915) || (gen == 12 && 
!HAS_LMEM(buf->vma->vm->i915))) &&
+   buf->tiling == CLIENT_TILING_X);
+}
+
 static int prepare_blit(const struct tiled_blits *t,
struct blit_buffer *dst,
struct blit_buffer *src,
@@ -59,51 +136,103 @@ static int prepare_blit(const struct tiled_blits *t,
if (IS_ERR(cs))
return PTR_ERR(cs);
 
-   *cs++ = MI_LOAD_REGISTER_IMM(1);
-   *cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
-   cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
-   if (src->tiling == CLIENT_TILING_Y)
-   cmd |= BCS_SRC_Y;
-   if (dst->tiling == CLIENT_TILING_Y)
-   cmd |= BCS_DST_Y;
-   *cs++ = cmd;
-
-   cmd = MI_FLUSH_DW;
-   if (ver >= 8)
-   cmd++;
-   *cs++ = cmd;
-   *cs++ = 0;
-   *cs++ = 0;
-   *cs++ = 0;
-
-   cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
-   if (ver >= 8)
-   cmd += 2;
-
-   src_pitch = t->width * 4;
-   if (src->tiling) {
-   cmd |= XY_SRC_COPY_BLT_SRC_TILED;
-   src_pitch /= 4;
-   }
+   if (fast_blit_ok(dst) && fast_blit_ok(src)) {
+   struct intel_gt *gt = 

Re: [Intel-gfx] [PATCH] drm/i915: Enable Tile4 tiling mode

2022-05-11 Thread Das, Nirmoy

This also:

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5879

On 5/11/2022 4:22 PM, Nirmoy Das wrote:

From: Bommu Krishnaiah 

Enable Tile4 tiling mode on platform that supports
Tile4 but no TileY like DG2.

Signed-off-by: Bommu Krishnaiah 
Co-developed-by: Nirmoy Das 
Signed-off-by: Nirmoy Das 
---
  .../i915/gem/selftests/i915_gem_client_blt.c  | 238 ++
  drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  22 ++
  2 files changed, 214 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index ddd0772fd828..71d7e4afa136 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -6,6 +6,7 @@
  #include "i915_selftest.h"
  
  #include "gt/intel_context.h"

+#include "gt/intel_engine_regs.h"
  #include "gt/intel_engine_user.h"
  #include "gt/intel_gpu_commands.h"
  #include "gt/intel_gt.h"
@@ -18,10 +19,71 @@
  #include "huge_gem_object.h"
  #include "mock_context.h"
  
+#define OW_SIZE 16  /* in bytes */

+#define F_SUBTILE_SIZE 64   /* in bytes */
+#define F_TILE_WIDTH 128/* in bytes */
+#define F_TILE_HEIGHT 32/* in pixels */
+#define F_SUBTILE_WIDTH  OW_SIZE/* in bytes */
+#define F_SUBTILE_HEIGHT 4  /* in pixels */
+
+static int linear_x_y_to_ftiled_pos(int x, int y, u32 stride, int bpp)
+{
+   int tile_base;
+   int tile_x, tile_y;
+   int swizzle, subtile;
+   int pixel_size = bpp / 8;
+   int pos;
+
+   /*
+* Subtile remapping for F tile. Note that map[a]==b implies map[b]==a
+* so we can use the same table to tile and until.
+*/
+   static const u8 f_subtile_map[] = {
+0,  1,  2,  3,  8,  9, 10, 11,
+4,  5,  6,  7, 12, 13, 14, 15,
+   16, 17, 18, 19, 24, 25, 26, 27,
+   20, 21, 22, 23, 28, 29, 30, 31,
+   32, 33, 34, 35, 40, 41, 42, 43,
+   36, 37, 38, 39, 44, 45, 46, 47,
+   48, 49, 50, 51, 56, 57, 58, 59,
+   52, 53, 54, 55, 60, 61, 62, 63
+   };
+
+   x *= pixel_size;
+   /*
+* Where does the 4k tile start (in bytes)?  This is the same for Y and
+* F so we can use the Y-tile algorithm to get to that point.
+*/
+   tile_base =
+   y / F_TILE_HEIGHT * stride * F_TILE_HEIGHT +
+   x / F_TILE_WIDTH * 4096;
+
+   /* Find pixel within tile */
+   tile_x = x % F_TILE_WIDTH;
+   tile_y = y % F_TILE_HEIGHT;
+
+   /* And figure out the subtile within the 4k tile */
+   subtile = tile_y / F_SUBTILE_HEIGHT * 8 + tile_x / F_SUBTILE_WIDTH;
+
+   /* Swizzle the subtile number according to the bspec diagram */
+   swizzle = f_subtile_map[subtile];
+
+   /* Calculate new position */
+   pos = tile_base +
+   swizzle * F_SUBTILE_SIZE +
+   tile_y % F_SUBTILE_HEIGHT * OW_SIZE +
+   tile_x % F_SUBTILE_WIDTH;
+
+   GEM_BUG_ON(!IS_ALIGNED(pos, pixel_size));
+
+   return pos / pixel_size * 4;
+}
+
  enum client_tiling {
CLIENT_TILING_LINEAR,
CLIENT_TILING_X,
CLIENT_TILING_Y,
+   CLIENT_TILING_4,
CLIENT_NUM_TILING_TYPES
  };
  
@@ -45,6 +107,19 @@ struct tiled_blits {

u32 height;
  };
  
+static bool fast_blit_ok(struct blit_buffer *buf)

+{
+   int gen = GRAPHICS_VER(buf->vma->vm->i915);
+
+   if (gen < 9)
+   return false;
+
+   if (gen < 12)
+   return true;
+
+   return !IS_DG1(buf->vma->vm->i915) || buf->tiling != CLIENT_TILING_X;
+}
+
  static int prepare_blit(const struct tiled_blits *t,
struct blit_buffer *dst,
struct blit_buffer *src,
@@ -59,54 +134,109 @@ static int prepare_blit(const struct tiled_blits *t,
if (IS_ERR(cs))
return PTR_ERR(cs);
  
-	*cs++ = MI_LOAD_REGISTER_IMM(1);

-   *cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
-   cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
-   if (src->tiling == CLIENT_TILING_Y)
-   cmd |= BCS_SRC_Y;
-   if (dst->tiling == CLIENT_TILING_Y)
-   cmd |= BCS_DST_Y;
-   *cs++ = cmd;
-
-   cmd = MI_FLUSH_DW;
-   if (ver >= 8)
-   cmd++;
-   *cs++ = cmd;
-   *cs++ = 0;
-   *cs++ = 0;
-   *cs++ = 0;
-
-   cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
-   if (ver >= 8)
-   cmd += 2;
-
-   src_pitch = t->width * 4;
-   if (src->tiling) {
-   cmd |= XY_SRC_COPY_BLT_SRC_TILED;
-   src_pitch /= 4;
-   }
+   /*
+* On GEN12+ X-tiled format support is removed from the fast blit
+* command, so use the XY_SRC blit command for it instead.
+*/
+   if (fast_blit_ok(dst) && 

[Intel-gfx] [PATCH] drm/i915: Enable Tile4 tiling mode

2022-05-11 Thread Nirmoy Das
From: Bommu Krishnaiah 

Enable Tile4 tiling mode on platform that supports
Tile4 but no TileY like DG2.

Signed-off-by: Bommu Krishnaiah 
Co-developed-by: Nirmoy Das 
Signed-off-by: Nirmoy Das 
---
 .../i915/gem/selftests/i915_gem_client_blt.c  | 238 ++
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  22 ++
 2 files changed, 214 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index ddd0772fd828..71d7e4afa136 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -6,6 +6,7 @@
 #include "i915_selftest.h"
 
 #include "gt/intel_context.h"
+#include "gt/intel_engine_regs.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt.h"
@@ -18,10 +19,71 @@
 #include "huge_gem_object.h"
 #include "mock_context.h"
 
+#define OW_SIZE 16  /* in bytes */
+#define F_SUBTILE_SIZE 64   /* in bytes */
+#define F_TILE_WIDTH 128/* in bytes */
+#define F_TILE_HEIGHT 32/* in pixels */
+#define F_SUBTILE_WIDTH  OW_SIZE/* in bytes */
+#define F_SUBTILE_HEIGHT 4  /* in pixels */
+
+static int linear_x_y_to_ftiled_pos(int x, int y, u32 stride, int bpp)
+{
+   int tile_base;
+   int tile_x, tile_y;
+   int swizzle, subtile;
+   int pixel_size = bpp / 8;
+   int pos;
+
+   /*
+* Subtile remapping for F tile. Note that map[a]==b implies map[b]==a
+* so we can use the same table to tile and until.
+*/
+   static const u8 f_subtile_map[] = {
+0,  1,  2,  3,  8,  9, 10, 11,
+4,  5,  6,  7, 12, 13, 14, 15,
+   16, 17, 18, 19, 24, 25, 26, 27,
+   20, 21, 22, 23, 28, 29, 30, 31,
+   32, 33, 34, 35, 40, 41, 42, 43,
+   36, 37, 38, 39, 44, 45, 46, 47,
+   48, 49, 50, 51, 56, 57, 58, 59,
+   52, 53, 54, 55, 60, 61, 62, 63
+   };
+
+   x *= pixel_size;
+   /*
+* Where does the 4k tile start (in bytes)?  This is the same for Y and
+* F so we can use the Y-tile algorithm to get to that point.
+*/
+   tile_base =
+   y / F_TILE_HEIGHT * stride * F_TILE_HEIGHT +
+   x / F_TILE_WIDTH * 4096;
+
+   /* Find pixel within tile */
+   tile_x = x % F_TILE_WIDTH;
+   tile_y = y % F_TILE_HEIGHT;
+
+   /* And figure out the subtile within the 4k tile */
+   subtile = tile_y / F_SUBTILE_HEIGHT * 8 + tile_x / F_SUBTILE_WIDTH;
+
+   /* Swizzle the subtile number according to the bspec diagram */
+   swizzle = f_subtile_map[subtile];
+
+   /* Calculate new position */
+   pos = tile_base +
+   swizzle * F_SUBTILE_SIZE +
+   tile_y % F_SUBTILE_HEIGHT * OW_SIZE +
+   tile_x % F_SUBTILE_WIDTH;
+
+   GEM_BUG_ON(!IS_ALIGNED(pos, pixel_size));
+
+   return pos / pixel_size * 4;
+}
+
 enum client_tiling {
CLIENT_TILING_LINEAR,
CLIENT_TILING_X,
CLIENT_TILING_Y,
+   CLIENT_TILING_4,
CLIENT_NUM_TILING_TYPES
 };
 
@@ -45,6 +107,19 @@ struct tiled_blits {
u32 height;
 };
 
+static bool fast_blit_ok(struct blit_buffer *buf)
+{
+   int gen = GRAPHICS_VER(buf->vma->vm->i915);
+
+   if (gen < 9)
+   return false;
+
+   if (gen < 12)
+   return true;
+
+   return !IS_DG1(buf->vma->vm->i915) || buf->tiling != CLIENT_TILING_X;
+}
+
 static int prepare_blit(const struct tiled_blits *t,
struct blit_buffer *dst,
struct blit_buffer *src,
@@ -59,54 +134,109 @@ static int prepare_blit(const struct tiled_blits *t,
if (IS_ERR(cs))
return PTR_ERR(cs);
 
-   *cs++ = MI_LOAD_REGISTER_IMM(1);
-   *cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
-   cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
-   if (src->tiling == CLIENT_TILING_Y)
-   cmd |= BCS_SRC_Y;
-   if (dst->tiling == CLIENT_TILING_Y)
-   cmd |= BCS_DST_Y;
-   *cs++ = cmd;
-
-   cmd = MI_FLUSH_DW;
-   if (ver >= 8)
-   cmd++;
-   *cs++ = cmd;
-   *cs++ = 0;
-   *cs++ = 0;
-   *cs++ = 0;
-
-   cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
-   if (ver >= 8)
-   cmd += 2;
-
-   src_pitch = t->width * 4;
-   if (src->tiling) {
-   cmd |= XY_SRC_COPY_BLT_SRC_TILED;
-   src_pitch /= 4;
-   }
+   /*
+* On GEN12+ X-tiled format support is removed from the fast blit
+* command, so use the XY_SRC blit command for it instead.
+*/
+   if (fast_blit_ok(dst) && fast_blit_ok(src)) {
+   struct intel_gt *gt = t->ce->engine->gt;
+   u32 src_tiles = 0, dst_tiles = 0;
+   u32