[PATCH v15 0/1] drm/i915: Allow user to set cache at BO creation

2023-05-31 Thread fei . yang
From: Fei Yang 

This series introduce a new extension for GEM_CREATE,
1. end support for set caching ioctl [PATCH 1/2]
2. add set_pat extension for gem_create [PATCH 2/2]

v2: drop one patch that was merged separately
commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function")
v3: rebased on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: rebased on https://patchwork.freedesktop.org/series/117480/
v7: rebased on https://patchwork.freedesktop.org/series/117528/
v8: dropped the two dependent patches that has been merged
separately. Add IGT link and Tested-by (MESA).
v9: addressing comments (Andi)
v10: acked-by and tested-by MESA
v11: drop "end support for set caching ioctl" (merged)
 remove tools/include/uapi/drm/i915_drm.h
v12: drop Bspec reference in comment. add to commit message instead
v13: sent to test with igt@gem_create@create-ext-set-pat
v14: sent to test with igt@gem_create@create-ext-set-pat
v15: update commit message with documentation note and t-b/a-b from
     Media driver folks.

Fei Yang (1):
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 41 ++
 3 files changed, 83 insertions(+)

-- 
2.25.1



[PATCH v15 1/1] drm/i915: Allow user to set cache at BO creation

2023-05-31 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Note: The detailed description of PAT index is missing in current PRM
even for older hardware and will be added by the next PRM update under
chapter name "Memory Views".

BSpec: 45101

Mesa support has been submitted in this merge request:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878

The media driver is supported by the following commits:
https://github.com/intel/media-driver/commit/92c00a857433ebb34ec575e9834f473c6fcb6341
https://github.com/intel/media-driver/commit/fd375cf2c5e1f6bf6b43258ff797b3134aadc9fd
https://github.com/intel/media-driver/commit/08dd244b22484770a33464c2c8ae85430e548000

The IGT test related to this change is
igt@gem_create@create-ext-set-pat

Signed-off-by: Fei Yang 
Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Reviewed-by: Andi Shyti 
Acked-by: Jordan Justen 
Tested-by: Jordan Justen 
Acked-by: Carl Zhang 
Tested-by: Lihao Gu 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 41 ++
 3 files changed, 83 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..644a936248ad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->pat_set_by_user = true;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->

[PATCH v12 1/1] drm/i915: Allow user to set cache at BO creation

2023-05-24 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

BSpec: 45101

Test igt@gem_create@create_ext_set_pat posted at
https://patchwork.freedesktop.org/series/118314/

Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878

Signed-off-by: Fei Yang 
Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Reviewed-by: Andi Shyti 
Acked-by: Jordan Justen 
Tested-by: Jordan Justen 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 41 ++
 3 files changed, 83 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..644a936248ad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->pat_set_by_user = true;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->pat_set_by_user)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f31dfacde601..4083a23e0614 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h

[PATCH v12 0/1] drm/i915: Allow user to set cache at BO creation

2023-05-24 Thread fei . yang
From: Fei Yang 

This series introduce a new extension for GEM_CREATE,
1. end support for set caching ioctl [PATCH 1/2]
2. add set_pat extension for gem_create [PATCH 2/2]

v2: drop one patch that was merged separately
commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function")
v3: rebased on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: rebased on https://patchwork.freedesktop.org/series/117480/
v7: rebased on https://patchwork.freedesktop.org/series/117528/
v8: dropped the two dependent patches that has been merged
separately. Add IGT link and Tested-by (MESA).
v9: addressing comments (Andi)
v10: acked-by and tested-by MESA
v11: drop "end support for set caching ioctl" (merged)
 remove tools/include/uapi/drm/i915_drm.h
v12: drop Bspec reference in comment. add to commit message instead

Fei Yang (1):
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 41 ++
 3 files changed, 83 insertions(+)

-- 
2.25.1



[PATCH v11 1/1] drm/i915: Allow user to set cache at BO creation

2023-05-24 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Test igt@gem_create@create_ext_set_pat posted at
https://patchwork.freedesktop.org/series/118314/

Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878

Signed-off-by: Fei Yang 
Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Reviewed-by: Andi Shyti 
Acked-by: Jordan Justen 
Tested-by: Jordan Justen 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 42 ++
 3 files changed, 84 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..644a936248ad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->pat_set_by_user = true;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->pat_set_by_user)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f31dfacde601..75426fcb4b2e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h

[PATCH v11 0/1] drm/i915: Allow user to set cache at BO creation

2023-05-24 Thread fei . yang
From: Fei Yang 

This series introduce a new extension for GEM_CREATE,
1. end support for set caching ioctl [PATCH 1/2]
2. add set_pat extension for gem_create [PATCH 2/2]

v2: drop one patch that was merged separately
commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function")
v3: rebased on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: rebased on https://patchwork.freedesktop.org/series/117480/
v7: rebased on https://patchwork.freedesktop.org/series/117528/
v8: dropped the two dependent patches that has been merged
separately. Add IGT link and Tested-by (MESA).
v9: addressing comments (Andi)
v10: acked-by and tested-by MESA
v11: drop "end support for set caching ioctl" (merged)
 remove tools/include/uapi/drm/i915_drm.h

Fei Yang (1):
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 42 ++
 3 files changed, 84 insertions(+)

-- 
2.25.1



[PATCH v10 1/2] drm/i915/mtl: end support for set caching ioctl

2023-05-18 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 05107a6efe45..dfaaa8b66ac3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v10 2/2] drm/i915: Allow user to set cache at BO creation

2023-05-18 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Test igt@gem_create@create_ext_set_pat posted at
https://patchwork.freedesktop.org/series/117695/

Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878

Signed-off-by: Fei Yang 
Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Reviewed-by: Andi Shyti 
Acked-by: Jordan Justen 
Tested-by: Jordan Justen 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 42 ++
 tools/include/uapi/drm/i915_drm.h  | 42 ++
 4 files changed, 126 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..644a936248ad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->pat_set_by_user = true;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->pat_set_by_user)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ba40855dbc93..4f3fd650e5e1 100644
--- a/include/uapi/drm/i915_drm.h

[PATCH v10 0/2] drm/i915: Allow user to set cache at BO creation

2023-05-18 Thread fei . yang
From: Fei Yang 

This series introduce a new extension for GEM_CREATE,
1. end support for set caching ioctl [PATCH 1/2]
2. add set_pat extension for gem_create [PATCH 2/2]

v2: drop one patch that was merged separately
commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function")
v3: rebased on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: rebased on https://patchwork.freedesktop.org/series/117480/
v7: rebased on https://patchwork.freedesktop.org/series/117528/
v8: dropped the two dependent patches that has been merged
separately. Add IGT link and Tested-by (MESA).
v9: addressing comments (Andi)
v10: acked-by and tested-by MESA

Fei Yang (2):
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  |  9 -
 include/uapi/drm/i915_drm.h| 42 ++
 tools/include/uapi/drm/i915_drm.h  | 42 ++
 6 files changed, 137 insertions(+), 1 deletion(-)

-- 
2.25.1



[PATCH v9 2/2] drm/i915: Allow user to set cache at BO creation

2023-05-16 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Test igt@gem_create@create_ext_set_pat posted at
https://patchwork.freedesktop.org/series/117695/

Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878

Signed-off-by: Fei Yang 
Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Reviewed-by: Andi Shyti 
Tested-by: Jordan Justen 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 42 ++
 tools/include/uapi/drm/i915_drm.h  | 42 ++
 4 files changed, 126 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..644a936248ad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->pat_set_by_user = true;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->pat_set_by_user)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ba40855dbc93..4f3fd650e5e1 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/inc

[PATCH v9 1/2] drm/i915/mtl: end support for set caching ioctl

2023-05-16 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 05107a6efe45..dfaaa8b66ac3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v9 0/2] drm/i915: Allow user to set cache at BO creation

2023-05-16 Thread fei . yang
From: Fei Yang 

This series introduce a new extension for GEM_CREATE,
1. end support for set caching ioctl [PATCH 4/5]
2. add set_pat extension for gem_create [PATCH 5/5]

v2: drop one patch that was merged separately
commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function")
v3: rebased on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: rebased on https://patchwork.freedesktop.org/series/117480/
v7: rebased on https://patchwork.freedesktop.org/series/117528/
v8: dropped the two dependent patches that has been merged
separately. Add IGT link and Tested-by (MESA).
v9: addressing comments (Andi)

Fei Yang (2):
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 +++
 drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  |  9 -
 include/uapi/drm/i915_drm.h| 42 ++
 tools/include/uapi/drm/i915_drm.h  | 42 ++
 6 files changed, 137 insertions(+), 1 deletion(-)

-- 
2.25.1



[PATCH v8 2/2] drm/i915: Allow user to set cache at BO creation

2023-05-12 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

IGT posted at https://patchwork.freedesktop.org/series/117695/

Tested with https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22878

Tested-by: Jordan Justen 
Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..644a936248ad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->pat_set_by_user = true;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->pat_set_by_user)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ba40855dbc93..7f5597920257 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h

[PATCH v8 0/2] drm/i915: Allow user to set cache at BO creation

2023-05-12 Thread fei . yang
From: Fei Yang 

The first three patches in this series are taken from
https://patchwork.freedesktop.org/series/116868/
These patches are included here because the last patch
has dependency on the pat_index refactor.

This series is focusing on uAPI changes,
1. end support for set caching ioctl [PATCH 4/5]
2. add set_pat extension for gem_create [PATCH 5/5]

v2: drop one patch that was merged separately
commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function")
v3: rebased on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: rebased on https://patchwork.freedesktop.org/series/117480/
v7: rebased on https://patchwork.freedesktop.org/series/117528/
v8: dropped the two dependent patches that has been merged
separately. Add IGT link and Tested-by (MESA).

Fei Yang (2):
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_domain.c |  3 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  |  9 +-
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 6 files changed, 125 insertions(+), 1 deletion(-)

-- 
2.25.1



[PATCH v8 1/2] drm/i915/mtl: end support for set caching ioctl

2023-05-12 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 05107a6efe45..dfaaa8b66ac3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v7 2/4] drm/i915: use pat_index instead of cache_level

2023-05-09 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.

One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.

Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.

Bspec: 63019

Cc: Chris Wilson 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Matt Roper 

To be squashed
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 46 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 84 +--
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 53 +---
 drivers/gpu/drm/i915/i915_gem.c   | 27 +-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 453 insertions(+), 238 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 

[PATCH v7 3/4] drm/i915/mtl: end support for set caching ioctl

2023-05-09 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 05107a6efe45..dfaaa8b66ac3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v7 0/4] drm/i915: Allow user to set cache at BO creation

2023-05-09 Thread fei . yang
From: Fei Yang 

The first three patches in this series are taken from
https://patchwork.freedesktop.org/series/116868/
These patches are included here because the last patch
has dependency on the pat_index refactor.

This series is focusing on uAPI changes,
1. end support for set caching ioctl [PATCH 4/5]
2. add set_pat extension for gem_create [PATCH 5/5]

v2: drop one patch that was merged separately
commit 341ad0e8e254 ("drm/i915/mtl: Add PTE encode function")
v3: rebased on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: rebased on https://patchwork.freedesktop.org/series/117480/
v7: rebased on https://patchwork.freedesktop.org/series/117528/

Fei Yang (4):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 61 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 66 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 53 -
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  9 ++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 78 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 53 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 27 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 include/uapi/drm/i915_drm.h   | 36 +
 tools/include/uapi/drm/i915_drm.h | 36 +
 44 files changed, 682 insertions(+), 238 deletions(-)

-- 
2.25.1



[PATCH v7 1/4] drm/i915: preparation for using PAT index

2023-05-09 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  7 ++
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index bc1291887d4f..284e1aa396cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..bf0bd8e11355 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,13 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   /**
+* @I915_MAX_CACHE_LEVEL:
+*
+* Mark the last entry in the enum. Used for defining cachelevel_to_pat
+* array for cache_level to pat translation table.
+*/
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 22ec1566d2a7..bb6998d67133 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the 

[PATCH v7 4/4] drm/i915: Allow user to set cache at BO creation

2023-05-09 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..644a936248ad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->pat_set_by_user = true;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->pat_set_by_user)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..03c5c314846e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
 *
 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 * struct drm_i915_gem_create_ext_protected_content.

[PATCH v8 2/2] drm/i915: use pat_index instead of cache_level

2023-05-09 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.

One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.

Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.

Bspec: 63019

Cc: Chris Wilson 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Matt Roper 

To be squashed
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 46 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 84 +--
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 53 +---
 drivers/gpu/drm/i915/i915_gem.c   | 27 +-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 453 insertions(+), 238 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 

[PATCH v8 0/2] drm/i915: use pat_index instead of cache_level

2023-05-09 Thread fei . yang
From: Fei Yang 

This patch set was posted at
https://patchwork.freedesktop.org/series/116868/
Change title since the PTE patch was merged separately.

These patches are extracted from series
https://patchwork.freedesktop.org/series/115980/

This series refactor the cache policy programming so that the PTE
encode functions can be unified across all GEN12 platforms. This
refactor is also important in implementing the design which allows
uerspace to directly set cache policy for each Buffer Object.

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: disable {get, set}_caching ioctl
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: squash the pte_encode patch because separating them causes
bisect probelm. Also addressing some review comments from
Tvrtko and Matt.
v7: fix checkpatch errors and warnings.
v8: BUILD_BUG_ON instead of WARN_ON_ONCE. Some updates in
comments.

Fei Yang (2):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 60 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 53 -
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 78 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 53 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 27 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 40 files changed, 557 insertions(+), 237 deletions(-)

-- 
2.25.1



[PATCH v8 1/2] drm/i915: preparation for using PAT index

2023-05-09 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  7 ++
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index bc1291887d4f..284e1aa396cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..bf0bd8e11355 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,13 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   /**
+* @I915_MAX_CACHE_LEVEL:
+*
+* Mark the last entry in the enum. Used for defining cachelevel_to_pat
+* array for cache_level to pat translation table.
+*/
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 22ec1566d2a7..bb6998d67133 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the 

[PATCH v6 4/4] drm/i915: Allow user to set cache at BO creation

2023-05-08 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..644a936248ad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->pat_set_by_user = true;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->pat_set_by_user)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..03c5c314846e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
 *
 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 * struct drm_i915_gem_create_ext_protected_content.

[PATCH v6 2/4] drm/i915: use pat_index instead of cache_level

2023-05-08 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.

One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.

Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.

Bspec: 63019

Cc: Chris Wilson 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Matt Roper 

To be squashed
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 44 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 84 +--
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 53 +---
 drivers/gpu/drm/i915/i915_gem.c   | 27 +-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 451 insertions(+), 238 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 

[PATCH v6 3/4] drm/i915/mtl: end support for set caching ioctl

2023-05-08 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 05107a6efe45..dfaaa8b66ac3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -350,6 +350,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v6 0/4] drm/i915: Allow user to set cache at BO creation

2023-05-08 Thread fei . yang
From: Fei Yang 

The first three patches in this series are taken from
https://patchwork.freedesktop.org/series/116868/
These patches are included here because the last patch
has dependency on the pat_index refactor.

This series is focusing on uAPI changes,
1. end support for set caching ioctl [PATCH 4/5]
2. add set_pat extension for gem_create [PATCH 5/5]

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: rebase on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: rebased on https://patchwork.freedesktop.org/series/117480/

Fei Yang (4):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 61 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 66 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 51 +++-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  9 ++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 78 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 53 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 27 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 include/uapi/drm/i915_drm.h   | 36 +
 tools/include/uapi/drm/i915_drm.h | 36 +
 44 files changed, 680 insertions(+), 238 deletions(-)

-- 
2.25.1



[PATCH v6 1/4] drm/i915: preparation for using PAT index

2023-05-08 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  7 ++
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index bc1291887d4f..284e1aa396cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..bf0bd8e11355 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,13 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   /**
+* @I915_MAX_CACHE_LEVEL:
+*
+* Mark the last entry in the enum. Used for defining cachelevel_to_pat
+* array for cache_level to pat translation table.
+*/
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 22ec1566d2a7..bb6998d67133 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the 

[PATCH v7 1/2] drm/i915: preparation for using PAT index

2023-05-08 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  7 ++
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index bc1291887d4f..284e1aa396cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..bf0bd8e11355 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,13 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   /**
+* @I915_MAX_CACHE_LEVEL:
+*
+* Mark the last entry in the enum. Used for defining cachelevel_to_pat
+* array for cache_level to pat translation table.
+*/
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 22ec1566d2a7..bb6998d67133 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the 

[PATCH v7 2/2] drm/i915: use pat_index instead of cache_level

2023-05-08 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.

One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.

Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.

Bspec: 63019

Cc: Chris Wilson 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Matt Roper 

To be squashed
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 44 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 84 +--
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 53 +---
 drivers/gpu/drm/i915/i915_gem.c   | 27 +-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 451 insertions(+), 238 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 

[PATCH v7 0/2] drm/i915: use pat_index instead of cache_level

2023-05-08 Thread fei . yang
From: Fei Yang 

This patch set was posted at
https://patchwork.freedesktop.org/series/116868/
Change title since the PTE patch was merged separately.

These patches are extracted from series
https://patchwork.freedesktop.org/series/115980/

This series refactor the cache policy programming so that the PTE
encode functions can be unified across all GEN12 platforms. This
refactor is also important in implementing the design which allows
uerspace to directly set cache policy for each Buffer Object.

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: disable {get, set}_caching ioctl
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: squash the pte_encode patch because separating them causes
bisect probelm. Also addressing some review comments from
Tvrtko and Matt.
v7: fix checkpatch errors and warnings.

Fei Yang (2):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 60 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 51 +++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 78 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 53 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 27 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 40 files changed, 555 insertions(+), 237 deletions(-)

-- 
2.25.1



[PATCH v6 2/2] drm/i915: use pat_index instead of cache_level

2023-05-08 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly matches
the desired behavior described in the hardware specification.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.

One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.

Special note: this patch changes the way caching behavior is controled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.

Bspec: 63019

Cc: Chris Wilson 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Matt Roper 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 44 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 84 +--
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 52 +---
 drivers/gpu/drm/i915/i915_gem.c   | 27 +-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 450 insertions(+), 238 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ stati

[PATCH v6 0/2] drm/i915: use pat_index instead of cache_level

2023-05-08 Thread fei . yang
From: Fei Yang 

This patch set was posted at
https://patchwork.freedesktop.org/series/116868/
Change title since the PTE patch was merged separately.

These patches are extracted from series
https://patchwork.freedesktop.org/series/115980/

This series refactor the cache policy programming so that the PTE
encode functions can be unified across all GEN12 platforms. This
refactor is also important in implementing the design which allows
uerspace to directly set cache policy for each Buffer Object.

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: disable {get, set}_caching ioctl
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.
v6: squash the pte_encode patch because separating them causes
bisect probelm. Also addressing some review comments from
Tvrtko and Matt.

Fei Yang (2):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 58 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 15 +++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  | 11 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 60 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 51 +++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  8 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 78 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  4 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 52 +---
 drivers/gpu/drm/i915/i915_gem.c   | 27 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 40 files changed, 554 insertions(+), 237 deletions(-)

-- 
2.25.1



[PATCH v6 1/2] drm/i915: preparation for using PAT index

2023-05-08 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  7 ++
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index bc1291887d4f..284e1aa396cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..bf0bd8e11355 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,13 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   /**
+* @I915_MAX_CACHE_LEVEL:
+*
+* Mark the last entry in the enum. Used for defining cachelevel_to_pat
+* array for cache_level to pat translation table.
+*/
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 22ec1566d2a7..bb6998d67133 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the 

[PATCH v5 2/5] drm/i915: use pat_index instead of cache_level

2023-05-03 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 45 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 52 +---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 391 insertions(+), 240 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i9

[PATCH v5 3/5] drm/i915: make sure correct pte encode is used

2023-05-03 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index f2334a713c4e..d1e3d3b90e95 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -995,8 +995,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v5 5/5] drm/i915: Allow user to set cache at BO creation

2023-05-03 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..644a936248ad 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->pat_set_by_user = true;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 46a19b099ec8..97ac6fb37958 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -208,6 +208,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->pat_set_by_user)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..03c5c314846e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
 *
 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 * struct drm_i915_gem_create_ext_protected_content.

[PATCH v5 4/5] drm/i915/mtl: end support for set caching ioctl

2023-05-03 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index ae99b4be5918..53282c6d3873 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -337,6 +337,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v5 0/5] drm/i915: Allow user to set cache at BO creation

2023-05-03 Thread fei . yang
From: Fei Yang 

The first three patches in this series are taken from
https://patchwork.freedesktop.org/series/116868/
These patches are included here because the last patch
has dependency on the pat_index refactor.

This series is focusing on uAPI changes,
1. end support for set caching ioctl [PATCH 4/5]
2. add set_pat extension for gem_create [PATCH 5/5]

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: rebase on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.

Fei Yang (5):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 48 ++-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 66 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  9 ++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 73 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 +++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 52 +---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 include/uapi/drm/i915_drm.h   | 36 +
 tools/include/uapi/drm/i915_drm.h | 36 +
 44 files changed, 618 insertions(+), 244 deletions(-)

-- 
2.25.1



[PATCH v5 1/5] drm/i915: preparation for using PAT index

2023-05-03 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 22ec1566d2a7..bb6998d67133 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--- a/drivers/

[PATCH v5 2/3] drm/i915: use pat_index instead of cache_level

2023-05-03 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 45 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 51 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 52 +---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 391 insertions(+), 240 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i9

[PATCH v5 0/3] drm/i915: use pat_index instead of cache_level

2023-05-03 Thread fei . yang
From: Fei Yang 

This patch set was posted at
https://patchwork.freedesktop.org/series/116868/
Change title since the PTE patch was merged separately.

These patches are extracted from series
https://patchwork.freedesktop.org/series/115980/

This series refactor the cache policy programming so that the PTE
encode functions can be unified across all GEN12 platforms. This
refactor is also important in implementing the design which allows
uerspace to directly set cache policy for each Buffer Object.

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: disable {get, set}_caching ioctl
v4: fix missing unlock introduced in v3, and
solve a rebase conflict
v5: replace obj->cache_level with pat_set_by_user,
fix i915_cache_level_str() for legacy platforms.

Fei Yang (3):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 45 ++-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 60 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 73 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 +++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 52 +---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 40 files changed, 493 insertions(+), 243 deletions(-)

-- 
2.25.1



[PATCH v5 1/3] drm/i915: preparation for using PAT index

2023-05-03 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 22ec1566d2a7..bb6998d67133 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--- a/drivers/

[PATCH v5 3/3] drm/i915: make sure correct pte encode is used

2023-05-03 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index f2334a713c4e..d1e3d3b90e95 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -995,8 +995,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v4 5/5] drm/i915: Allow user to set cache at BO creation

2023-05-01 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..723c3ddd6c74 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->cache_level = I915_CACHE_INVAL;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 27c948350b5b..61651f7e5806 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->cache_level == I915_CACHE_INVAL)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..03c5c314846e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
 *
 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 

[PATCH v4 3/5] drm/i915: make sure correct pte encode is used

2023-05-01 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index f2334a713c4e..d1e3d3b90e95 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -995,8 +995,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v4 2/5] drm/i915: use pat_index instead of cache_level

2023-05-01 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 47 +++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 398 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i9

[PATCH v4 1/5] drm/i915: preparation for using PAT index

2023-05-01 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 22ec1566d2a7..bb6998d67133 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--- a/drivers/

[PATCH v4 4/5] drm/i915/mtl: end support for set caching ioctl

2023-05-01 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 95a0d845d949..e62db612aea6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -338,6 +338,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v4 0/5] drm/i915: Allow user to set cache at BO creation

2023-05-01 Thread fei . yang
From: Fei Yang 

The first three patches in this series are taken from
https://patchwork.freedesktop.org/series/116868/
These patches are included here because the last patch
has dependency on the pat_index refactor.

This series is focusing on uAPI changes,
1. end support for set caching ioctl [PATCH 4/5]
2. add set_pat extension for gem_create [PATCH 5/5]

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: rebase on https://patchwork.freedesktop.org/series/117082/
v4: fix missing unlock introduced in v3, and
solve a rebase conflict

Fei Yang (5):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 50 +++-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  9 ++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 73 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 +++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 include/uapi/drm/i915_drm.h   | 36 +
 tools/include/uapi/drm/i915_drm.h | 36 +
 44 files changed, 625 insertions(+), 243 deletions(-)

-- 
2.25.1



[PATCH v4 2/3] drm/i915: use pat_index instead of cache_level

2023-05-01 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 47 +++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 398 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i9

[PATCH v4 1/3] drm/i915: preparation for using PAT index

2023-05-01 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 22ec1566d2a7..bb6998d67133 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--- a/drivers/

[PATCH v4 3/3] drm/i915: make sure correct pte encode is used

2023-05-01 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index f2334a713c4e..d1e3d3b90e95 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -995,8 +995,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v4 0/3] drm/i915: use pat_index instead of cache_level

2023-05-01 Thread fei . yang
From: Fei Yang 

This patch set was posted at
https://patchwork.freedesktop.org/series/116868/
Change title since the PTE patch was merged separately.

These patches are extracted from series
https://patchwork.freedesktop.org/series/115980/

This series refactor the cache policy programming so that the PTE
encode functions can be unified across all GEN12 platforms. This
refactor is also important in implementing the design which allows
uerspace to directly set cache policy for each Buffer Object.

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: disable {get, set}_caching ioctl
v4: fix missing unlock introduced in v3, and
solve a rebase conflict

Fei Yang (3):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 47 ++-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 61 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 73 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 +++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 40 files changed, 500 insertions(+), 242 deletions(-)

-- 
2.25.1



[PATCH v3 2/5] drm/i915: use pat_index instead of cache_level

2023-04-27 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 43 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 394 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i9

[PATCH v3 4/5] drm/i915/mtl: end support for set caching ioctl

2023-04-27 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 17375935ce1e..dcd6056ef90d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -336,6 +336,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v3 3/5] drm/i915: make sure correct pte encode is used

2023-04-27 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index ee52e5833c50..81b7725812ce 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v3 1/5] drm/i915: preparation for using PAT index

2023-04-27 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4c9a2f2db908..4c6b760d4774 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--- a/drivers/

[PATCH v3 5/5] drm/i915: Allow user to set cache at BO creation

2023-04-27 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..723c3ddd6c74 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->cache_level = I915_CACHE_INVAL;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 27c948350b5b..61651f7e5806 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->cache_level == I915_CACHE_INVAL)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..03c5c314846e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
 *
 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 

[PATCH v3 0/5] drm/i915: Allow user to set cache at BO creation

2023-04-27 Thread fei . yang
From: Fei Yang 

The first three patches in this series are taken from
https://patchwork.freedesktop.org/series/116868/
These patches are included here because the last patch
has dependency on the pat_index refactor.

This series is focusing on uAPI changes,
1. end support for set caching ioctl [PATCH 4/5]
2. add set_pat extension for gem_create [PATCH 5/5]

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: rebase on https://patchwork.freedesktop.org/series/117082/

Fei Yang (5):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 46 ++-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  9 ++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 73 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 +++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 include/uapi/drm/i915_drm.h   | 36 +
 tools/include/uapi/drm/i915_drm.h | 36 +
 44 files changed, 621 insertions(+), 243 deletions(-)

-- 
2.25.1



[PATCH v3 1/3] drm/i915: preparation for using PAT index

2023-04-27 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4c9a2f2db908..4c6b760d4774 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--- a/drivers/

[PATCH v3 3/3] drm/i915: make sure correct pte encode is used

2023-04-27 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index ee52e5833c50..81b7725812ce 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v3 0/3] drm/i915: use pat_index instead of cache_level

2023-04-27 Thread fei . yang
From: Fei Yang 

This patch set was posted at
https://patchwork.freedesktop.org/series/116868/
Change title since the PTE patch was merged separately.

These patches are extracted from series
https://patchwork.freedesktop.org/series/115980/

This series refactor the cache policy programming so that the PTE
encode functions can be unified across all GEN12 platforms. This
refactor is also important in implementing the design which allows
uerspace to directly set cache policy for each Buffer Object.

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function
v3: disable {get, set}_caching ioctl

Fei Yang (3):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 43 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 61 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 73 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 +++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 40 files changed, 496 insertions(+), 242 deletions(-)

-- 
2.25.1



[PATCH v3 2/3] drm/i915: use pat_index instead of cache_level

2023-04-27 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 43 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 394 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i9

[PATCH v2 2/5] drm/i915: use pat_index instead of cache_level

2023-04-26 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 378 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i915_vm_pt_stash *stash,
  

[PATCH v2 1/5] drm/i915: preparation for using PAT index

2023-04-26 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4c9a2f2db908..4c6b760d4774 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--

[PATCH v2 3/5] drm/i915: make sure correct pte encode is used

2023-04-26 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index ee52e5833c50..81b7725812ce 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v2 5/5] drm/i915: Allow user to set cache at BO creation

2023-04-26 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..723c3ddd6c74 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->cache_level = I915_CACHE_INVAL;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 27c948350b5b..61651f7e5806 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->cache_level == I915_CACHE_INVAL)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..03c5c314846e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
 *
 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 

[PATCH v2 0/5] drm/i915: Allow user to set cache at BO creation

2023-04-26 Thread fei . yang
From: Fei Yang 

The first three patches in this series are taken from
https://patchwork.freedesktop.org/series/116868/
These patches are included here because the last patch
has dependency on the pat_index refactor.

This series is focusing on uAPI changes,
1. end support for set caching ioctl [PATCH 4/5]
2. add set_pat extension for gem_create [PATCH 5/5]

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function

Fei Yang (5):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 +
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 30 +++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  9 ++-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 73 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 +++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 include/uapi/drm/i915_drm.h   | 36 +
 tools/include/uapi/drm/i915_drm.h | 36 +
 44 files changed, 605 insertions(+), 243 deletions(-)

-- 
2.25.1



[PATCH v2 4/5] drm/i915/mtl: end support for set caching ioctl

2023-04-26 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 89938084af97..d5fd4c9cd9f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -328,6 +328,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v2 3/3] drm/i915: make sure correct pte encode is used

2023-04-26 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index ee52e5833c50..81b7725812ce 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v2 2/3] drm/i915: use pat_index instead of cache_level

2023-04-26 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 378 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i915_vm_pt_stash *stash,
  

[PATCH v2 0/3] drm/i915/mtl: add PTE encode function

2023-04-26 Thread fei . yang
From: Fei Yang 

These patches are extracted from series
https://patchwork.freedesktop.org/series/115980/

This series refactor the cache policy programming so that the PTE
encode functions can be unified across all GEN12 platforms. This
refactor is also important in implementing the design which allows
uerspace to directly set cache policy for each Buffer Object.

v2: drop one patch that was merged separately
341ad0e8e254 drm/i915/mtl: Add PTE encode function

Fei Yang (3):
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used

 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 +++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 61 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 73 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 76 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 +++--
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 +--
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 40 files changed, 480 insertions(+), 242 deletions(-)

-- 
2.25.1



[PATCH v2 1/3] drm/i915: preparation for using PAT index

2023-04-26 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4c9a2f2db908..4c6b760d4774 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--

[PATCH v2 2/2] drm/i915/mtl: workaround coherency issue for Media

2023-04-24 Thread fei . yang
From: Fei Yang 

This patch implements Wa_22016122933.

In MTL, memory writes initiated by the Media tile update the whole
cache line, even for partial writes. This creates a coherency
problem for cacheable memory if both CPU and GPU are writing data
to different locations within a single cache line.
This patch circumvents the issue by making CPU/GPU shared memory
uncacheable (WC on CPU side, and PAT index 2 for GPU).  Additionally,
it ensures that CPU writes are visible to the GPU with an
intel_guc_write_barrier().

While fixing the CTB issue, we noticed some random GSC firmware
loading failure because the share buffers are cacheable (WB) on CPU
side but uncached on GPU side. To fix these issues we need to map
such shared buffers as WC on CPU side. Since such allocations are
not all done through GuC allocator, to avoid too many code changes,
the i915_coherent_map_type() is now hard coded to return WC for MTL.

v2: Simplify the commit message(Matt).

BSpec: 45101

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Acked-by: Nirmoy Das 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Matt Roper 
Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |  5 -
 drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 13 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  7 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  6 ++
 4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index ecd86130b74f..89fc8ea6bcfc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -469,7 +469,10 @@ enum i915_map_type i915_coherent_map_type(struct 
drm_i915_private *i915,
  struct drm_i915_gem_object *obj,
  bool always_coherent)
 {
-   if (i915_gem_object_is_lmem(obj))
+   /*
+* Wa_22016122933: always return I915_MAP_WC for MTL
+*/
+   if (i915_gem_object_is_lmem(obj) || IS_METEORLAKE(i915))
return I915_MAP_WC;
if (HAS_LLC(i915) || always_coherent)
return I915_MAP_WB;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
index 1d9fdfb11268..236673c02f9a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
@@ -110,6 +110,13 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
if (obj->base.size < gsc->fw.size)
return -ENOSPC;
 
+   /*
+* Wa_22016122933: For MTL the shared memory needs to be mapped
+* as WC on CPU side and UC (PAT index 2) on GPU side
+*/
+   if (IS_METEORLAKE(i915))
+   i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+
dst = i915_gem_object_pin_map_unlocked(obj,
   i915_coherent_map_type(i915, 
obj, true));
if (IS_ERR(dst))
@@ -125,6 +132,12 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
memset(dst, 0, obj->base.size);
memcpy(dst, src, gsc->fw.size);
 
+   /*
+* Wa_22016122933: Making sure the data in dst is
+* visible to GSC right away
+*/
+   intel_guc_write_barrier(>uc.guc);
+
i915_gem_object_unpin_map(gsc->fw.obj);
i915_gem_object_unpin_map(obj);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index e89f16ecf1ae..c9f20385f6a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -744,6 +744,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc 
*guc, u32 size)
if (IS_ERR(obj))
return ERR_CAST(obj);
 
+   /*
+* Wa_22016122933: For MTL the shared memory needs to be mapped
+* as WC on CPU side and UC (PAT index 2) on GPU side
+*/
+   if (IS_METEORLAKE(gt->i915))
+   i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+
vma = i915_vma_instance(obj, >ggtt->vm, NULL);
if (IS_ERR(vma))
goto err;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 1803a633ed64..99a0a89091e7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -902,6 +902,12 @@ static int ct_read(struct intel_guc_ct *ct, struct 
ct_incoming_msg **msg)
/* now update descriptor */
WRITE_ONCE(desc->head, head);
 
+   /*
+* Wa_22016122933: Making sure the head update is
+* visible to GuC right away
+*/
+   intel_guc_write_barrier(ct_to_guc(ct));
+
return available - len;
 
 corrupted:
-- 
2.25.1



[PATCH v2 0/2] drm/i915/mtl: Add PTE encode functions

2023-04-24 Thread fei . yang
From: Fei Yang 

Extract PTE patch from https://patchwork.freedesktop.org/series/116868/
to fix MTL boot issue caused by MOCS/PAT update.

v2: address comment from Matt.

Fei Yang (2):
  drm/i915/mtl: Add PTE encode function
  drm/i915/mtl: workaround coherency issue for Media

 drivers/gpu/drm/i915/display/intel_dpt.c  |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |  5 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 45 +++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 36 --
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 12 +-
 drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 13 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  7 
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  6 +++
 8 files changed, 112 insertions(+), 14 deletions(-)

-- 
2.25.1



[PATCH v2 1/2] drm/i915/mtl: Add PTE encode function

2023-04-24 Thread fei . yang
From: Fei Yang 

PTE encode functions are platform dependent. This patch implements
PTE functions for MTL, and ensures the correct PTE encode function
is used by calling pte_encode function pointer instead of the
hardcoded gen8 version of PTE encode.

Fixes: b76c0deef627 ("drm/i915/mtl: Define MOCS and PAT tables for MTL")
Signed-off-by: Fei Yang 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Andi Shyti 
Acked-by: Nirmoy Das 
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 12 +--
 4 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index b8027392144d..c5eacfdba1a5 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm->vma_ops.bind_vma= dpt_bind_vma;
vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-   vm->pte_encode = gen8_ggtt_pte_encode;
+   vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
dpt->obj = dpt_obj;
dpt->obj->is_dpt = true;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..4c9a2f2db908 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+   gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+   if (unlikely(flags & PTE_READ_ONLY))
+   pte &= ~GEN8_PAGE_RW;
+
+   if (flags & PTE_LM)
+   pte |= GEN12_PPGTT_PTE_LM;
+
+   switch (level) {
+   case I915_CACHE_NONE:
+   pte |= GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_LLC:
+   case I915_CACHE_L3_LLC:
+   pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_WT:
+   pte |= GEN12_PPGTT_PTE_PAT0;
+   break;
+   }
+
+   return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
  u32 flags)
 {
struct i915_page_directory *pd;
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, 
flags);
gen8_pte_t *vaddr;
 
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct 
i915_address_space *vm,
   enum i915_cache_level cache_level,
   u32 flags)
 {
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
 
@@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct 
i915_address_space *vm,
GEM_BUG_ON(pt->is_compact);
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
drm_clflush_virt_range([gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct 
i915_address_space *vm,
}
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
pte_flags |= PTE_LM;
 
vm->scratch[0]->encode =
-   gen8_pte_encode(px_dma(vm->scratch[0]),
-   I915_CACHE_NONE, pte_flags);
+   vm->pte_encode(px_dma(vm->scratch[0]),
+  I915_CACHE_NONE, pte_flags);
 
for (i = 1; i <= vm->top; i++) {
struct drm_i915_gem_object *obj;
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   ppgtt->vm.pte_encode = gen8_pte_encode;
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+   ppgtt->vm.pte_encode = mtl_pte_e

[PATCH 2/2] drm/i915/mtl: workaround coherency issue for Media

2023-04-24 Thread fei . yang
From: Fei Yang 

This patch implements Wa_22016122933.

In MTL, memory writes initiated by the Media tile update the whole
cache line, even for partial writes. This creates a coherency
problem for cacheable memory if both CPU and GPU are writing data
to different locations within a single cache line.
This patch circumvents the issue by making CPU/GPU shared memory
uncacheable (WC on CPU side, and PAT index 2 for GPU).  Additionally,
it ensures that CPU writes are visible to the GPU with an
intel_guc_write_barrier().

While fixing the CTB issue, we noticed some random GSC firmware
loading failure because the share buffers are cacheable (WB) on CPU
side but uncached on GPU side. To fix these issues we need to map
such shared buffers as WC on CPU side. Since such allocations are
not all done through GuC allocator, to avoid too many code changes,
the i915_coherent_map_type() is now hard coded to return WC for MTL.

v2: Simplify the commit message(Matt).

BSpec: 45101

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Acked-by: Nirmoy Das 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Matt Roper 
Signed-off-by: Nirmoy Das 
---
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |  5 -
 drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 13 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  7 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  6 ++
 4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index ecd86130b74f..89fc8ea6bcfc 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -469,7 +469,10 @@ enum i915_map_type i915_coherent_map_type(struct 
drm_i915_private *i915,
  struct drm_i915_gem_object *obj,
  bool always_coherent)
 {
-   if (i915_gem_object_is_lmem(obj))
+   /*
+* Wa_22016122933: always return I915_MAP_WC for MTL
+*/
+   if (i915_gem_object_is_lmem(obj) || IS_METEORLAKE(i915))
return I915_MAP_WC;
if (HAS_LLC(i915) || always_coherent)
return I915_MAP_WB;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
index 1d9fdfb11268..236673c02f9a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c
@@ -110,6 +110,13 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
if (obj->base.size < gsc->fw.size)
return -ENOSPC;
 
+   /*
+* Wa_22016122933: For MTL the shared memory needs to be mapped
+* as WC on CPU side and UC (PAT index 2) on GPU side
+*/
+   if (IS_METEORLAKE(i915))
+   i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+
dst = i915_gem_object_pin_map_unlocked(obj,
   i915_coherent_map_type(i915, 
obj, true));
if (IS_ERR(dst))
@@ -125,6 +132,12 @@ static int gsc_fw_load_prepare(struct intel_gsc_uc *gsc)
memset(dst, 0, obj->base.size);
memcpy(dst, src, gsc->fw.size);
 
+   /*
+* Wa_22016122933: Making sure the data in dst is
+* visible to GSC right away
+*/
+   intel_guc_write_barrier(>uc.guc);
+
i915_gem_object_unpin_map(gsc->fw.obj);
i915_gem_object_unpin_map(obj);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index e89f16ecf1ae..c9f20385f6a0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -744,6 +744,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc 
*guc, u32 size)
if (IS_ERR(obj))
return ERR_CAST(obj);
 
+   /*
+* Wa_22016122933: For MTL the shared memory needs to be mapped
+* as WC on CPU side and UC (PAT index 2) on GPU side
+*/
+   if (IS_METEORLAKE(gt->i915))
+   i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
+
vma = i915_vma_instance(obj, >ggtt->vm, NULL);
if (IS_ERR(vma))
goto err;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 1803a633ed64..99a0a89091e7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -902,6 +902,12 @@ static int ct_read(struct intel_guc_ct *ct, struct 
ct_incoming_msg **msg)
/* now update descriptor */
WRITE_ONCE(desc->head, head);
 
+   /*
+* Wa_22016122933: Making sure the head update is
+* visible to GuC right away
+*/
+   intel_guc_write_barrier(ct_to_guc(ct));
+
return available - len;
 
 corrupted:
-- 
2.25.1



[PATCH 1/2] drm/i915/mtl: Add PTE encode function

2023-04-24 Thread fei . yang
From: Fei Yang 

PTE encode functions are platform dependent. This patch implements
PTE functions for MTL, and ensures the correct PTE encode function
is used by calling pte_encode function pointer instead of the
hardcoded gen8 version of PTE encode.

Fixes: b76c0deef627 ("drm/i915/mtl: Define MOCS and PAT tables for MTL")
Signed-off-by: Fei Yang 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Andi Shyti 
Acked-by: Nirmoy Das 
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +--
 4 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index b8027392144d..c5eacfdba1a5 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm->vma_ops.bind_vma= dpt_bind_vma;
vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-   vm->pte_encode = gen8_ggtt_pte_encode;
+   vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
dpt->obj = dpt_obj;
dpt->obj->is_dpt = true;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..11b91e0453c8 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+   gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+   if (unlikely(flags & PTE_READ_ONLY))
+   pte &= ~GEN8_PAGE_RW;
+
+   if (flags & PTE_LM)
+   pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+   switch (level) {
+   case I915_CACHE_NONE:
+   pte |= GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_LLC:
+   case I915_CACHE_L3_LLC:
+   pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_WT:
+   pte |= GEN12_PPGTT_PTE_PAT0;
+   break;
+   }
+
+   return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
  u32 flags)
 {
struct i915_page_directory *pd;
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, 
flags);
gen8_pte_t *vaddr;
 
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct 
i915_address_space *vm,
   enum i915_cache_level cache_level,
   u32 flags)
 {
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
 
@@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct 
i915_address_space *vm,
GEM_BUG_ON(pt->is_compact);
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
drm_clflush_virt_range([gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct 
i915_address_space *vm,
}
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
pte_flags |= PTE_LM;
 
vm->scratch[0]->encode =
-   gen8_pte_encode(px_dma(vm->scratch[0]),
-   I915_CACHE_NONE, pte_flags);
+   vm->pte_encode(px_dma(vm->scratch[0]),
+  I915_CACHE_NONE, pte_flags);
 
for (i = 1; i <= vm->top; i++) {
struct drm_i915_gem_object *obj;
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   ppgtt->vm.pte_encode = gen8_pte_encode;
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+   ppgtt->vm.p

[PATCH 0/2] drm/i915/mtl: Add PTE encode functions

2023-04-24 Thread fei . yang
From: Fei Yang 

Extract PTE patch from https://patchwork.freedesktop.org/series/116868/
to fix MTL boot issue caused by MOCS/PAT update.

Fei Yang (2):
  drm/i915/mtl: Add PTE encode function
  drm/i915/mtl: workaround coherency issue for Media

 drivers/gpu/drm/i915/display/intel_dpt.c  |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |  5 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 45 +++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 36 --
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 13 ++-
 drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 13 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc.c|  7 
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |  6 +++
 8 files changed, 113 insertions(+), 14 deletions(-)

-- 
2.25.1



[PATCH v1 6/6] drm/i915: Allow user to set cache at BO creation

2023-04-23 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..723c3ddd6c74 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->cache_level = I915_CACHE_INVAL;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 27c948350b5b..61651f7e5806 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->cache_level == I915_CACHE_INVAL)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..03c5c314846e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
 *
 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 

[PATCH v1 5/6] drm/i915/mtl: end support for set caching ioctl

2023-04-23 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For userspace components needing to fine tune the caching policy
for BO's, a follow up patch will extend the GEM_CREATE uAPI to allow
them specify caching mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 89938084af97..d5fd4c9cd9f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -328,6 +328,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v1 4/6] drm/i915: make sure correct pte encode is used

2023-04-23 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index c046813514f4..a738a25dd857 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v1 3/6] drm/i915: use pat_index instead of cache_level

2023-04-23 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 378 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i915_vm_pt_stash *stash,
  

[PATCH v1 2/6] drm/i915: preparation for using PAT index

2023-04-23 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 11b91e0453c8..7a4b1d1afce9 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--

[PATCH v1 0/6] drm/i915: Allow user to set cache at BO creation

2023-04-23 Thread fei . yang
From: Fei Yang 

The first four patches in this series are taken from
https://patchwork.freedesktop.org/series/116868/
These patches are included here because the last patch
has dependency on the pat_index refactor.

This series is focusing on uAPI changes,
1. end support for set caching ioctl [PATCH 5/6]
2. add set_pat extension for gem_create [PATCH 6/6]

Fei Yang (6):
  drm/i915/mtl: Add PTE encode function
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

 drivers/gpu/drm/i915/display/intel_dpt.c  | 14 ++--
 drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 30 +++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  9 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 76 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 84 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 31 ---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 +---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 +++--
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 ++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 include/uapi/drm/i915_drm.h   | 36 
 tools/include/uapi/drm/i915_drm.h | 36 
 44 files changed, 652 insertions(+), 220 deletions(-)

-- 
2.25.1



[PATCH v1 1/6] drm/i915/mtl: Add PTE encode function

2023-04-23 Thread fei . yang
From: Fei Yang 

PTE encode functions are platform dependent. This patch implements
PTE functions for MTL, and ensures the correct PTE encode function
is used by calling pte_encode function pointer instead of the
hardcoded gen8 version of PTE encode.

Signed-off-by: Fei Yang 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Andi Shyti 
Acked-by: Nirmoy Das 
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +--
 4 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index b8027392144d..c5eacfdba1a5 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm->vma_ops.bind_vma= dpt_bind_vma;
vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-   vm->pte_encode = gen8_ggtt_pte_encode;
+   vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
dpt->obj = dpt_obj;
dpt->obj->is_dpt = true;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..11b91e0453c8 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+   gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+   if (unlikely(flags & PTE_READ_ONLY))
+   pte &= ~GEN8_PAGE_RW;
+
+   if (flags & PTE_LM)
+   pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+   switch (level) {
+   case I915_CACHE_NONE:
+   pte |= GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_LLC:
+   case I915_CACHE_L3_LLC:
+   pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_WT:
+   pte |= GEN12_PPGTT_PTE_PAT0;
+   break;
+   }
+
+   return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
  u32 flags)
 {
struct i915_page_directory *pd;
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, 
flags);
gen8_pte_t *vaddr;
 
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct 
i915_address_space *vm,
   enum i915_cache_level cache_level,
   u32 flags)
 {
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
 
@@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct 
i915_address_space *vm,
GEM_BUG_ON(pt->is_compact);
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
drm_clflush_virt_range([gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct 
i915_address_space *vm,
}
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
pte_flags |= PTE_LM;
 
vm->scratch[0]->encode =
-   gen8_pte_encode(px_dma(vm->scratch[0]),
-   I915_CACHE_NONE, pte_flags);
+   vm->pte_encode(px_dma(vm->scratch[0]),
+  I915_CACHE_NONE, pte_flags);
 
for (i = 1; i <= vm->top; i++) {
struct drm_i915_gem_object *obj;
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   ppgtt->vm.pte_encode = gen8_pte_encode;
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+   ppgtt->vm.pte_encode = mtl_pte_encode;
+   else
+   ppgtt->vm.pte_encode = gen8

[PATCH v1 4/4] drm/i915: make sure correct pte encode is used

2023-04-23 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index c046813514f4..a738a25dd857 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,9 +55,9 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
- unsigned int pat_index,
- u32 flags)
+static u64 gen12_pte_encode(dma_addr_t addr,
+   unsigned int pat_index,
+   u32 flags)
 {
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
 
@@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v1 2/4] drm/i915: preparation for using PAT index

2023-04-23 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index recommended in hardware specification
   Needed for validating the PAT index passed in from user
   space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 11b91e0453c8..7a4b1d1afce9 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--

[PATCH v1 3/4] drm/i915: use pat_index instead of cache_level

2023-04-23 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 378 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i915_vm_pt_stash *stash,
  

[PATCH v1 0/4] drm/i915/mtl: add PTE encode function

2023-04-23 Thread fei . yang
From: Fei Yang 

These patches are extracted from series
https://patchwork.freedesktop.org/series/115980/

This series start with adding PTE encode functions for MTL as it can no
longer reuse the PTE encode functions for GEN8 due to PAT index changes.
Then there are patches refactoring the cache policy programming so that
the PTE encode functions can be unified across all GEN12 platforms.
This refactor is also important in implementing the design which allows
uerspace to directly set cache policy for each Buffer Object.

Fei Yang (4):
  drm/i915/mtl: Add PTE encode function
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used

 drivers/gpu/drm/i915/display/intel_dpt.c  | 14 ++--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 61 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 76 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 84 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 31 ---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 +---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 +++--
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 ++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 40 files changed, 527 insertions(+), 219 deletions(-)

-- 
2.25.1



[PATCH v1 1/4] drm/i915/mtl: Add PTE encode function

2023-04-23 Thread fei . yang
From: Fei Yang 

PTE encode functions are platform dependent. This patch implements
PTE functions for MTL, and ensures the correct PTE encode function
is used by calling pte_encode function pointer instead of the
hardcoded gen8 version of PTE encode.

Signed-off-by: Fei Yang 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Andi Shyti 
Acked-by: Nirmoy Das 
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +--
 4 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index b8027392144d..c5eacfdba1a5 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm->vma_ops.bind_vma= dpt_bind_vma;
vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-   vm->pte_encode = gen8_ggtt_pte_encode;
+   vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
dpt->obj = dpt_obj;
dpt->obj->is_dpt = true;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..11b91e0453c8 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+   gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+   if (unlikely(flags & PTE_READ_ONLY))
+   pte &= ~GEN8_PAGE_RW;
+
+   if (flags & PTE_LM)
+   pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+   switch (level) {
+   case I915_CACHE_NONE:
+   pte |= GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_LLC:
+   case I915_CACHE_L3_LLC:
+   pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_WT:
+   pte |= GEN12_PPGTT_PTE_PAT0;
+   break;
+   }
+
+   return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
  u32 flags)
 {
struct i915_page_directory *pd;
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, 
flags);
gen8_pte_t *vaddr;
 
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct 
i915_address_space *vm,
   enum i915_cache_level cache_level,
   u32 flags)
 {
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
 
@@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct 
i915_address_space *vm,
GEM_BUG_ON(pt->is_compact);
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
drm_clflush_virt_range([gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct 
i915_address_space *vm,
}
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
pte_flags |= PTE_LM;
 
vm->scratch[0]->encode =
-   gen8_pte_encode(px_dma(vm->scratch[0]),
-   I915_CACHE_NONE, pte_flags);
+   vm->pte_encode(px_dma(vm->scratch[0]),
+  I915_CACHE_NONE, pte_flags);
 
for (i = 1; i <= vm->top; i++) {
struct drm_i915_gem_object *obj;
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   ppgtt->vm.pte_encode = gen8_pte_encode;
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+   ppgtt->vm.pte_encode = mtl_pte_encode;
+   else
+   ppgtt->vm.pte_encode = gen8

[PATCH v4 8/8] drm/i915: Allow user to set cache at BO creation

2023-04-21 Thread fei . yang
From: Fei Yang 

To comply with the design that buffer objects shall have immutable
cache setting through out their life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 36 ++
 drivers/gpu/drm/i915/gem/i915_gem_object.c |  6 
 include/uapi/drm/i915_drm.h| 36 ++
 tools/include/uapi/drm/i915_drm.h  | 36 ++
 4 files changed, 114 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index bfe1dbda4cb7..723c3ddd6c74 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -245,6 +245,7 @@ struct create_ext {
unsigned int n_placements;
unsigned int placement_mask;
unsigned long flags;
+   unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -394,11 +395,39 @@ static int ext_set_protected(struct i915_user_extension 
__user *base, void *data
return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+   struct create_ext *ext_data = data;
+   struct drm_i915_private *i915 = ext_data->i915;
+   struct drm_i915_gem_create_ext_set_pat ext;
+   unsigned int max_pat_index;
+
+   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+   if (copy_from_user(, base, sizeof(ext)))
+   return -EFAULT;
+
+   max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+   if (ext.pat_index > max_pat_index) {
+   drm_dbg(>drm, "PAT index is invalid: %u\n",
+   ext.pat_index);
+   return -EINVAL;
+   }
+
+   ext_data->pat_index = ext.pat_index;
+
+   return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET  0x
 /**
  * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to 
it.
  * @dev: drm device pointer
@@ -418,6 +447,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
 
+   ext_data.pat_index = PAT_INDEX_NOT_SET;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
   create_extensions,
   ARRAY_SIZE(create_extensions),
@@ -454,5 +484,11 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
*data,
if (IS_ERR(obj))
return PTR_ERR(obj);
 
+   if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+   i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+   /* Mark pat_index is set by UMD */
+   obj->cache_level = I915_CACHE_INVAL;
+   }
+
return i915_gem_publish(obj, file, >size, >handle);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 27c948350b5b..61651f7e5806 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -209,6 +209,12 @@ bool i915_gem_object_can_bypass_llc(struct 
drm_i915_gem_object *obj)
if (!(obj->flags & I915_BO_ALLOC_USER))
return false;
 
+   /*
+* Always flush cache for UMD objects at creation time.
+*/
+   if (obj->cache_level == I915_CACHE_INVAL)
+   return true;
+
/*
 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
 * possible for userspace to bypass the GTT caching bits set by the
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..03c5c314846e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
 *
 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 

[PATCH v4 5/8] drm/i915: use pat_index instead of cache_level

2023-04-21 Thread fei . yang
From: Fei Yang 

Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.

>From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity.

For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For such simple cases, using cache_level
would help simplify the code.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 52 +++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 25 +-
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 71 
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 36 files changed, 378 insertions(+), 239 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index c5eacfdba1a5..7c5fddb203ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void dpt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
-   enum i915_cache_level level,
+   unsigned int pat_index,
u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
 
gen8_set_pte(base + offset / I915_GTT_PAGE_SIZE,
-vm->pte_encode(addr, level, flags));
+vm->pte_encode(addr, pat_index, flags));
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
   struct i915_vma_resource *vma_res,
-  enum i915_cache_level level,
+  unsigned int pat_index,
   u32 flags)
 {
struct i915_dpt *dpt = i915_vm_to_dpt(vm);
gen8_pte_t __iomem *base = dpt->iomem;
-   const gen8_pte_t pte_encode = vm->pte_encode(0, level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, pat_index, flags);
struct sgt_iter sgt_iter;
dma_addr_t addr;
int i;
@@ -83,7 +83,7 @@ static void dpt_clear_range(struct i915_address_space *vm,
 static void dpt_bind_vma(struct i915_address_space *vm,
 struct i915_vm_pt_stash *stash,
  

[PATCH v4 6/8] drm/i915: make sure correct pte encode is used

2023-04-21 Thread fei . yang
From: Fei Yang 

PTE encode is platform dependent. After replacing cache_level with
pat_index, the newly introduced mtl_pte_encode is actually generic
for all gen12 platforms, thus rename it to gen12_pte_encode and
apply it to all gen12 platforms.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index c046813514f4..3373b2aeae86 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,7 +55,7 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
-static u64 mtl_pte_encode(dma_addr_t addr,
+static u64 gen12_pte_encode(dma_addr_t addr,
  unsigned int pat_index,
  u32 flags)
 {
@@ -994,8 +994,8 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
-   ppgtt->vm.pte_encode = mtl_pte_encode;
+   if (GRAPHICS_VER(gt->i915) >= 12)
+   ppgtt->vm.pte_encode = gen12_pte_encode;
else
ppgtt->vm.pte_encode = gen8_pte_encode;
 
-- 
2.25.1



[PATCH v4 3/8] drm/i915/mtl: Add PTE encode function

2023-04-21 Thread fei . yang
From: Fei Yang 

PTE encode functions are platform dependent. This patch implements
PTE functions for MTL, and ensures the correct PTE encode function
is used by calling pte_encode function pointer instead of the
hardcoded gen8 version of PTE encode.

Signed-off-by: Fei Yang 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Andi Shyti 
Acked-by: Nirmoy Das 
---
 drivers/gpu/drm/i915/display/intel_dpt.c |  2 +-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 45 
 drivers/gpu/drm/i915/gt/intel_ggtt.c | 36 +--
 drivers/gpu/drm/i915/gt/intel_gtt.h  | 13 +--
 4 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
b/drivers/gpu/drm/i915/display/intel_dpt.c
index b8027392144d..c5eacfdba1a5 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -300,7 +300,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
vm->vma_ops.bind_vma= dpt_bind_vma;
vm->vma_ops.unbind_vma  = dpt_unbind_vma;
 
-   vm->pte_encode = gen8_ggtt_pte_encode;
+   vm->pte_encode = vm->gt->ggtt->vm.pte_encode;
 
dpt->obj = dpt_obj;
dpt->obj->is_dpt = true;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 4daaa6f55668..11b91e0453c8 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -55,6 +55,34 @@ static u64 gen8_pte_encode(dma_addr_t addr,
return pte;
 }
 
+static u64 mtl_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+   gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+
+   if (unlikely(flags & PTE_READ_ONLY))
+   pte &= ~GEN8_PAGE_RW;
+
+   if (flags & PTE_LM)
+   pte |= GEN12_PPGTT_PTE_LM | GEN12_PPGTT_PTE_NC;
+
+   switch (level) {
+   case I915_CACHE_NONE:
+   pte |= GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_LLC:
+   case I915_CACHE_L3_LLC:
+   pte |= GEN12_PPGTT_PTE_PAT0 | GEN12_PPGTT_PTE_PAT1;
+   break;
+   case I915_CACHE_WT:
+   pte |= GEN12_PPGTT_PTE_PAT0;
+   break;
+   }
+
+   return pte;
+}
+
 static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
struct drm_i915_private *i915 = ppgtt->vm.i915;
@@ -427,7 +455,7 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
  u32 flags)
 {
struct i915_page_directory *pd;
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = ppgtt->vm.pte_encode(0, cache_level, 
flags);
gen8_pte_t *vaddr;
 
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
@@ -580,7 +608,7 @@ static void gen8_ppgtt_insert_huge(struct 
i915_address_space *vm,
   enum i915_cache_level cache_level,
   u32 flags)
 {
-   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+   const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
 
@@ -743,7 +771,7 @@ static void gen8_ppgtt_insert_entry(struct 
i915_address_space *vm,
GEM_BUG_ON(pt->is_compact);
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0)] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0)] = vm->pte_encode(addr, level, flags);
drm_clflush_virt_range([gen8_pd_index(idx, 0)], sizeof(*vaddr));
 }
 
@@ -773,7 +801,7 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct 
i915_address_space *vm,
}
 
vaddr = px_vaddr(pt);
-   vaddr[gen8_pd_index(idx, 0) / 16] = gen8_pte_encode(addr, level, flags);
+   vaddr[gen8_pd_index(idx, 0) / 16] = vm->pte_encode(addr, level, flags);
 }
 
 static void xehpsdv_ppgtt_insert_entry(struct i915_address_space *vm,
@@ -820,8 +848,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
pte_flags |= PTE_LM;
 
vm->scratch[0]->encode =
-   gen8_pte_encode(px_dma(vm->scratch[0]),
-   I915_CACHE_NONE, pte_flags);
+   vm->pte_encode(px_dma(vm->scratch[0]),
+  I915_CACHE_NONE, pte_flags);
 
for (i = 1; i <= vm->top; i++) {
struct drm_i915_gem_object *obj;
@@ -963,7 +991,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
 */
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
-   ppgtt->vm.pte_encode = gen8_pte_encode;
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+   ppgtt->vm.pte_encode = mtl_pte_encode;
+   else
+   ppgtt->vm.pte_encode = gen8

[PATCH v4 4/8] drm/i915: preparation for using PAT index

2023-04-21 Thread fei . yang
From: Fei Yang 

This patch is a preparation for replacing enum i915_cache_level with PAT
index. Caching policy for buffer objects is set through the PAT index in
PTE, the old i915_cache_level is not sufficient to represent all caching
modes supported by the hardware.

Preparing the transition by adding some platform dependent data structures
and helper functions to translate the cache_level to pat_index.

cachelevel_to_pat: a platform dependent array mapping cache_level to
   pat_index.

max_pat_index: the maximum PAT index supported by the hardware. Needed for
   validating the PAT index passed in from user space.

i915_gem_get_pat_index: function to convert cache_level to PAT index.

obj_to_i915(obj): macro moved to header file for wider usage.

I915_MAX_CACHE_LEVEL: upper bound of i915_cache_level for the
  convenience of coding.

Cc: Chris Wilson 
Cc: Matt Roper 
Cc: Andi Shyti 
Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 +++
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  6 ++
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  6 ++
 drivers/gpu/drm/i915/i915_pci.c   | 79 ---
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 +++
 9 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 4666bb82f312..8c70a0ec7d2f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -45,6 +45,15 @@ static struct kmem_cache *slab_objects;
 
 static const struct drm_gem_object_funcs i915_gem_object_funcs;
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level)
+{
+   if (drm_WARN_ON(>drm, level >= I915_MAX_CACHE_LEVEL))
+   return 0;
+
+   return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
 struct drm_i915_gem_object *i915_gem_object_alloc(void)
 {
struct drm_i915_gem_object *obj;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 885ccde9dc3c..4c92e17b4337 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -20,6 +20,8 @@
 
 enum intel_region_id;
 
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
 static inline bool i915_gem_object_size_2big(u64 size)
 {
struct drm_i915_gem_object *obj;
@@ -30,6 +32,8 @@ static inline bool i915_gem_object_size_2big(u64 size)
return false;
 }
 
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+   enum i915_cache_level level);
 void i915_gem_init__objects(struct drm_i915_private *i915);
 
 void i915_objects_module_exit(void);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 830c11431ee8..41b35abccf88 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -194,6 +194,7 @@ enum i915_cache_level {
 * engine.
 */
I915_CACHE_WT,
+   I915_MAX_CACHE_LEVEL,
 };
 
 enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index b1672e054b21..214763942aa2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -460,8 +460,6 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private 
*i915,
fs_reclaim_release(GFP_KERNEL);
 }
 
-#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
-
 /**
  * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
  * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 11b91e0453c8..7a4b1d1afce9 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -78,6 +78,12 @@ static u64 mtl_pte_encode(dma_addr_t addr,
case I915_CACHE_WT:
pte |= GEN12_PPGTT_PTE_PAT0;
break;
+   default:
+   /* This should never happen. Added to deal with the compile
+* error due to the addition of I915_MAX_CACHE_LEVEL. Will
+* be removed by the pat_index patch.
+*/
+   break;
}
 
return pte;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 20915edc8bd9..c8390d03fce2 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/

[PATCH v4 0/8] drm/i915/mtl: Define MOCS and PAT tables for MTL

2023-04-21 Thread fei . yang
From: Fei Yang 

The series includes patches needed to enable MTL.
Also add new extension for GEM_CREATE uAPI to let
user space set cache policy for buffer objects.

v2: addressing review comments and checkpatch warnings
v3: make mtl_ggtt_pte_encode static
v4: addressing more comments from Matt
drop two patches from previous version (to be merged separately)
extend mtl_pte_encode to cover all gen12


Fei Yang (7):
  drm/i915/mtl: fix mocs selftest
  drm/i915/mtl: Add PTE encode function
  drm/i915: preparation for using PAT index
  drm/i915: use pat_index instead of cache_level
  drm/i915: make sure correct pte encode is used
  drm/i915/mtl: end support for set caching ioctl
  drm/i915: Allow user to set cache at BO creation

Madhumitha Tolakanahalli Pradeep (1):
  drm/i915/mtl: Define MOCS and PAT tables for MTL

 drivers/gpu/drm/i915/display/intel_dpt.c  | 14 ++--
 drivers/gpu/drm/i915/gem/i915_gem_create.c| 36 
 drivers/gpu/drm/i915/gem/i915_gem_domain.c| 30 +++
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c| 67 ++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h|  8 ++
 .../gpu/drm/i915/gem/i915_gem_object_types.h  | 26 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c |  9 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  |  2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
 .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 76 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  | 84 +--
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  6 +-
 drivers/gpu/drm/i915/gt/intel_gtt.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 37 +---
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_mocs.c  | 70 +++-
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
 drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
 drivers/gpu/drm/i915/gt/selftest_mocs.c   |  3 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
 drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
 drivers/gpu/drm/i915/i915_debugfs.c   | 55 +---
 drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
 drivers/gpu/drm/i915/i915_pci.c   | 79 +++--
 drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
 drivers/gpu/drm/i915/i915_vma.h   |  2 +-
 drivers/gpu/drm/i915/i915_vma_types.h |  2 -
 drivers/gpu/drm/i915/intel_device_info.h  |  5 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
 .../drm/i915/selftests/intel_memory_region.c  |  4 +-
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  9 ++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
 include/uapi/drm/i915_drm.h   | 36 
 tools/include/uapi/drm/i915_drm.h | 36 
 48 files changed, 781 insertions(+), 223 deletions(-)

-- 
2.25.1



[PATCH v4 7/8] drm/i915/mtl: end support for set caching ioctl

2023-04-21 Thread fei . yang
From: Fei Yang 

The design is to keep Buffer Object's caching policy immutable through
out its life cycle. This patch ends the support for set caching ioctl
from MTL onward. While doing that we also set BO's to be 1-way coherent
at creation time because GPU is no longer automatically snooping CPU
cache. For UMD's need to fine tune the caching policy for BO's, a follow
up patch will extend the GEM_CREATE uAPI to allow UMD's specify caching
mode at BO creation time.

Signed-off-by: Fei Yang 
Reviewed-by: Andi Shyti 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  | 9 -
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c 
b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 89938084af97..d5fd4c9cd9f8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -328,6 +328,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void 
*data,
if (IS_DGFX(i915))
return -ENODEV;
 
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   return -EOPNOTSUPP;
+
switch (args->caching) {
case I915_CACHING_NONE:
level = I915_CACHE_NONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 37d1efcd3ca6..cad4a6017f4b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -601,7 +601,14 @@ static int shmem_object_init(struct intel_memory_region 
*mem,
obj->write_domain = I915_GEM_DOMAIN_CPU;
obj->read_domains = I915_GEM_DOMAIN_CPU;
 
-   if (HAS_LLC(i915))
+   /*
+* MTL doesn't snoop CPU cache by default for GPU access (namely
+* 1-way coherency). However some UMD's are currently depending on
+* that. Make 1-way coherent the default setting for MTL. A follow
+* up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+* caching mode at BO creation time
+*/
+   if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
/* On some devices, we can have the GPU use the LLC (the CPU
 * cache) for about a 10% performance improvement
 * compared to uncached.  Graphics requests other than
-- 
2.25.1



[PATCH v4 1/8] drm/i915/mtl: Define MOCS and PAT tables for MTL

2023-04-21 Thread fei . yang
From: Madhumitha Tolakanahalli Pradeep 


On MTL, GT can no longer allocate on LLC - only the CPU can.
This, along with programming new register bits that MTL
requires calls for a MOCS/PAT table update.
Also the PAT index registers are multicasted for primary GT,
and there is an address jump from index 7 to 8. This patch
makes sure that these registers are programmed in the proper
way.

BSpec: 44509, 45101, 44235

Cc: Matt Roper 
Cc: Lucas De Marchi 
Signed-off-by: Madhumitha Tolakanahalli Pradeep 

Signed-off-by: Aravind Iddamsetty 
Signed-off-by: Nirmoy Das 
Signed-off-by: Fei Yang 
Reviewed-by: Andrzej Hajda 
Reviewed-by: Nirmoy Das 
Reviewed-by: Andi Shyti 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  6 ++-
 drivers/gpu/drm/i915/gt/intel_gtt.c | 47 -
 drivers/gpu/drm/i915/gt/intel_gtt.h |  8 +++
 drivers/gpu/drm/i915/gt/intel_mocs.c| 70 -
 4 files changed, 128 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index fd1f9cd35e9d..e8c3b762a92a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -356,7 +356,11 @@
 #define GEN7_TLB_RD_ADDR   _MMIO(0x4700)
 
 #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
-#define XEHP_PAT_INDEX(index)  MCR_REG(0x4800 + (index) * 4)
+#define _PAT_INDEX(index)  _PICK_EVEN_2RANGES(index, 8, \
+  0x4800, 
0x4804, \
+  0x4848, 
0x484c)
+#define XEHP_PAT_INDEX(index)  MCR_REG(_PAT_INDEX(index))
+#define XELPMP_PAT_INDEX(index)_MMIO(_PAT_INDEX(index))
 
 #define XEHP_TILE0_ADDR_RANGE  MCR_REG(0x4900)
 #define   XEHP_TILE_LMEM_RANGE_SHIFT   8
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 4f436ba7a3c8..2f6a9be0ffe6 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -468,6 +468,44 @@ void gtt_write_workarounds(struct intel_gt *gt)
}
 }
 
+static void xelpmp_setup_private_ppat(struct intel_uncore *uncore)
+{
+   intel_uncore_write(uncore, XELPMP_PAT_INDEX(0),
+  MTL_PPAT_L4_0_WB);
+   intel_uncore_write(uncore, XELPMP_PAT_INDEX(1),
+  MTL_PPAT_L4_1_WT);
+   intel_uncore_write(uncore, XELPMP_PAT_INDEX(2),
+  MTL_PPAT_L4_3_UC);
+   intel_uncore_write(uncore, XELPMP_PAT_INDEX(3),
+  MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+   intel_uncore_write(uncore, XELPMP_PAT_INDEX(4),
+  MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+   /*
+* Remaining PAT entries are left at the hardware-default
+* fully-cached setting
+*/
+}
+
+static void xelpg_setup_private_ppat(struct intel_gt *gt)
+{
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0),
+MTL_PPAT_L4_0_WB);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1),
+MTL_PPAT_L4_1_WT);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2),
+MTL_PPAT_L4_3_UC);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3),
+MTL_PPAT_L4_0_WB | MTL_2_COH_1W);
+   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4),
+MTL_PPAT_L4_0_WB | MTL_3_COH_2W);
+
+   /*
+* Remaining PAT entries are left at the hardware-default
+* fully-cached setting
+*/
+}
+
 static void tgl_setup_private_ppat(struct intel_uncore *uncore)
 {
/* TGL doesn't support LLC or AGE settings */
@@ -603,7 +641,14 @@ void setup_private_pat(struct intel_gt *gt)
 
GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
 
-   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+   if (gt->type == GT_MEDIA) {
+   xelpmp_setup_private_ppat(gt->uncore);
+   return;
+   }
+
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+   xelpg_setup_private_ppat(gt);
+   else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
xehp_setup_private_ppat(gt);
else if (GRAPHICS_VER(i915) >= 12)
tgl_setup_private_ppat(uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 69ce55f517f5..ea17849e7a5c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -147,6 +147,14 @@ typedef u64 gen8_pte_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
+#define MTL_PPAT_L4_CACHE_POLICY_MASK  REG_GENMASK(3, 2)
+#define MTL_PAT_INDEX_COH_MODE_MASKREG_GENMASK(1, 0)
+#define MTL_PPAT_L4_3_UC

  1   2   >