Re: [RFC 1/2] drm/i915: Refactor PAT/object cache handling

2023-07-14 Thread Tvrtko Ursulin



On 14/07/2023 06:36, Yang, Fei wrote:

[snip]

@@ -326,10 +330,10 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, 
void *data,
   goto out;
   }

- if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
- i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
+ if (i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_WB) &&
+ i915_gem_object_has_cache_flag(obj, I915_CACHE_FLAG_COH1W) == 1)


Need to check L3 flag as well? The original code has L3_LLC.
if (i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_WB) &&
 (i915_gem_object_has_cache_flag(obj, I915_CACHE_FLAG_COH1W) == 1 ||
  i915_gem_object_has_cache_flag(obj, I915_CACHE_FLAG_L3) == 1))


Don't think so, because I have:

  [2] = __I915_CACHE(WB, BIT(I915_CACHE_FLAG_COH1W) | BIT(I915_CACHE_FLAG_L3)), 
\

So L3 and COH1W are both set for the equivalent of old I915_CACHE_L3_LLC, 
meaning I915_CACHING_CACHED will be returned for either - same as old code.




   args->caching = I915_CACHING_CACHED;


[snip]

+int i915_cache_init(struct drm_i915_private *i915)
+{
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+ char buf[I915_CACHE_NAME_LEN];
+ int ret;
+
+ i915->cache = HAS_LLC(i915) ? I915_CACHE_CACHED : I915_CACHE_NONE;
+ i915_cache_print(buf, sizeof(buf), "", i915->cache);
+ drm_printf(, "Coherent cache mode %s", buf);
+
+ ret = i915_cache_find_pat(i915, I915_CACHE_NONE);
+ if (ret < 0)
+ return -ENODEV;
+ drm_info(>drm, "Using PAT index %u for uncached access\n", ret);
+ i915->pat_uc = ret;
+
+ ret = i915_cache_find_pat(i915, I915_CACHE_CACHED);
+ if (ret < 0)
+ return -ENODEV;
+ drm_info(>drm, "Using PAT index %u for write-back access\n", ret);
+ i915->pat_wb = ret;


I think i915->pat_wt is needed as well, because display driver has code like 
this,
HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE

How did you handle functions like initial_plane_vma() and 
i915_gem_object_pin_to_display_plane()?


They have:

i915_gem_object_set_cache_coherency(obj, HAS_WT(i915) ?
I915_CACHE_WT : I915_CACHE_NONE);

Where HAS_WT is equal to HAS_EDRAM. So it is not always WT. I don't have the 
history/background of that but that is how it is.

So I could add i915->pat_wt but a) I don't think it is right for display (display 
would need more like i915->display_pat), and b) there would be no call sites for 
it.

i915->display_pat (or display_cache actually) would be similar to how I added 
i915->cache, not be best name I know, but it is replacing many occurrences of:

  cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
  i915_gem_object_set_cache_coherency(obj, cache_level)

I am unsure what would be the proper name for i915->cache, which would make it 
correctly self-documenting?




+ return 0;
+}


[snip]

-#define TGL_CACHELEVEL \
- .cachelevel_to_pat = { \
- [I915_CACHE_NONE]   = 3, \
- [I915_CACHE_LLC]= 0, \
- [I915_CACHE_L3_LLC] = 0, \
- [I915_CACHE_WT] = 2, \
+/*
+ * TODO/QQQ
+ *
+ * PAT 0 - is it 1-way or 2-way?


1-way


+ */
+#define GEN12_CACHE_MODES \
+ .cache_modes = { \
+ [0] = _I915_CACHE(WB, COH1W), \
+ [1] = I915_CACHE(WC), \
+ [2] = I915_CACHE(WT), \
+ [3] = I915_CACHE(UC), \
   }


[snip]

-#define PVC_CACHELEVEL \
- .cachelevel_to_pat = { \
- [I915_CACHE_NONE]   = 0, \
- [I915_CACHE_LLC]= 3, \
- [I915_CACHE_L3_LLC] = 3, \
- [I915_CACHE_WT] = 2, \
+/*
+ * TODO/QQQ
+ *
+ * PAT 3 - is it 1-way or 2-way?


1-way

PVC access is always coherent, it should have 1-way for [5] and [7] as well.


Thanks x2!

Regards,

Tvrtko




+ */
+#define PVC_CACHE_MODES \
+ .cache_modes = { \
+ [0] = I915_CACHE(UC), \
+ [1] = I915_CACHE(WC), \
+ [2] = I915_CACHE(WT), \
+ [3] = _I915_CACHE(WB, COH1W), \
+ [4] = _I915_CACHE(WT, CLOS1), \
+ [5] = _I915_CACHE(WB, CLOS1), \
+ [6] = _I915_CACHE(WT, CLOS2), \
+ [7] = _I915_CACHE(WB, CLOS2), \
   }


RE: [RFC 1/2] drm/i915: Refactor PAT/object cache handling

2023-07-13 Thread Yang, Fei
[snip]
> @@ -326,10 +330,10 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, 
> void *data,
>   goto out;
>   }
>
> - if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
> - i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
> + if (i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_WB) &&
> + i915_gem_object_has_cache_flag(obj, I915_CACHE_FLAG_COH1W) == 1)

Need to check L3 flag as well? The original code has L3_LLC.
if (i915_gem_object_has_cache_mode(obj, I915_CACHE_MODE_WB) &&
(i915_gem_object_has_cache_flag(obj, I915_CACHE_FLAG_COH1W) == 1 ||
 i915_gem_object_has_cache_flag(obj, I915_CACHE_FLAG_L3) == 1))

>   args->caching = I915_CACHING_CACHED;

[snip]
> +int i915_cache_init(struct drm_i915_private *i915)
> +{
> + struct drm_printer p = drm_info_printer(i915->drm.dev);
> + char buf[I915_CACHE_NAME_LEN];
> + int ret;
> +
> + i915->cache = HAS_LLC(i915) ? I915_CACHE_CACHED : I915_CACHE_NONE;
> + i915_cache_print(buf, sizeof(buf), "", i915->cache);
> + drm_printf(, "Coherent cache mode %s", buf);
> +
> + ret = i915_cache_find_pat(i915, I915_CACHE_NONE);
> + if (ret < 0)
> + return -ENODEV;
> + drm_info(>drm, "Using PAT index %u for uncached access\n", ret);
> + i915->pat_uc = ret;
> +
> + ret = i915_cache_find_pat(i915, I915_CACHE_CACHED);
> + if (ret < 0)
> + return -ENODEV;
> + drm_info(>drm, "Using PAT index %u for write-back access\n", ret);
> + i915->pat_wb = ret;

I think i915->pat_wt is needed as well, because display driver has code like 
this,
HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE

How did you handle functions like initial_plane_vma() and 
i915_gem_object_pin_to_display_plane()?

> + return 0;
> +}

[snip]
> -#define TGL_CACHELEVEL \
> - .cachelevel_to_pat = { \
> - [I915_CACHE_NONE]   = 3, \
> - [I915_CACHE_LLC]= 0, \
> - [I915_CACHE_L3_LLC] = 0, \
> - [I915_CACHE_WT] = 2, \
> +/*
> + * TODO/QQQ
> + *
> + * PAT 0 - is it 1-way or 2-way?

1-way

> + */
> +#define GEN12_CACHE_MODES \
> + .cache_modes = { \
> + [0] = _I915_CACHE(WB, COH1W), \
> + [1] = I915_CACHE(WC), \
> + [2] = I915_CACHE(WT), \
> + [3] = I915_CACHE(UC), \
>   }

[snip]
> -#define PVC_CACHELEVEL \
> - .cachelevel_to_pat = { \
> - [I915_CACHE_NONE]   = 0, \
> - [I915_CACHE_LLC]= 3, \
> - [I915_CACHE_L3_LLC] = 3, \
> - [I915_CACHE_WT] = 2, \
> +/*
> + * TODO/QQQ
> + *
> + * PAT 3 - is it 1-way or 2-way?

1-way

PVC access is always coherent, it should have 1-way for [5] and [7] as well.

> + */
> +#define PVC_CACHE_MODES \
> + .cache_modes = { \
> + [0] = I915_CACHE(UC), \
> + [1] = I915_CACHE(WC), \
> + [2] = I915_CACHE(WT), \
> + [3] = _I915_CACHE(WB, COH1W), \
> + [4] = _I915_CACHE(WT, CLOS1), \
> + [5] = _I915_CACHE(WB, CLOS1), \
> + [6] = _I915_CACHE(WT, CLOS2), \
> + [7] = _I915_CACHE(WB, CLOS2), \
>   }