On Wed, 2017-08-30 at 02:14 +0800, Zhi Wang wrote:
> The private PAT management is to support PPAT entry manipulation. Two
> APIs are introduced for dynamically managing PPAT entries: intel_ppat_get
> and intel_ppat_put.
> 
> intel_ppat_get will search for an existing PPAT entry which perfectly
> matches the required PPAT value. If not, it will try to allocate or
> return a partially matched PPAT entry if there is any available PPAT
> indexes or not.
> 
> intel_ppat_put will put back the PPAT entry which comes from
> intel_ppat_get. If it's dynamically allocated, the reference count will
> be decreased. If the reference count turns into zero, the PPAT index is
> freed again.
> 
> Besides, another two callbacks are introduced to support the private PAT
> management framework. One is ppat->update_hw(), which writes the PPAT
> configurations in ppat->entries into HW. Another one is ppat->match, which
> will return a score to show how two PPAT values match with each other.
> 
> v6:
> 
> - Address all comments from Chris:
> http://www.spinics.net/lists/intel-gfx/msg136850.html
> 
> - Address all comments from Joonas:
> http://www.spinics.net/lists/intel-gfx/msg136845.html
> 
> v5:
> 
> - Add check and warnnings for those platforms which don't have PPAT.
> 
> v3:
> 
> - Introduce dirty bitmap for PPAT registers. (Chris)
> - Change the name of the pointer "dev_priv" to "i915". (Chris)
> - intel_ppat_{get, put} returns/takes a const intel_ppat_entry *. (Chris)
> 
> v2:
> 
> - API re-design. (Chris)
> 
> Cc: Ben Widawsky <[email protected]>
> Cc: Rodrigo Vivi <[email protected]>
> Cc: Chris Wilson <[email protected]>
> Cc: Joonas Lahtinen <[email protected]>
> Signed-off-by: Zhi Wang <[email protected]>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |   2 +
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 273 
> +++++++++++++++++++++++++++++-------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  36 +++++
>  3 files changed, 262 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7587ef5..5ffde10 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2312,6 +2312,8 @@ struct drm_i915_private {
>       DECLARE_HASHTABLE(mm_structs, 7);
>       struct mutex mm_lock;
>  
> +     struct intel_ppat ppat;
> +
>       /* Kernel Modesetting */
>  
>       struct intel_crtc *plane_to_crtc_mapping[I915_MAX_PIPES];
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index b74fa9d..3106142 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2816,41 +2816,200 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, 
> u64 size)
>       return 0;
>  }
>  
> -static void cnl_setup_private_ppat(struct drm_i915_private *dev_priv)
> +static struct intel_ppat_entry *
> +__alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
>  {
> +     struct intel_ppat_entry *entry = &ppat->entries[index];
> +
> +     GEM_BUG_ON(index >= ppat->max_entries);
> +     GEM_BUG_ON(test_bit(index, ppat->used));
> +
> +     entry->ppat = ppat;
> +     entry->value = value;
> +     kref_init(&entry->ref);
> +     set_bit(index, ppat->used);
> +     set_bit(index, ppat->dirty);
> +
> +     return entry;
> +}
> +
> +static void __free_ppat_entry(struct intel_ppat_entry *entry)
> +{
> +     struct intel_ppat *ppat = entry->ppat;
> +     unsigned int index = entry - ppat->entries;
> +
> +     GEM_BUG_ON(index >= ppat->max_entries);
> +     GEM_BUG_ON(!test_bit(index, ppat->used));
> +
> +     entry->value = ppat->clear_value;
> +     clear_bit(index, ppat->used);
> +     set_bit(index, ppat->dirty);
> +}
> +
> +/**
> + * intel_ppat_get - get a usable PPAT entry
> + * @i915: i915 device instance
> + * @value: the PPAT value required by the caller
> + *
> + * The function tries to search if there is an existing PPAT entry which
> + * matches with the required value. If perfectly matched, the existing PPAT
> + * entry will be used. If only partially matched, it will try to check if
> + * there is any available PPAT index. If yes, it will allocate a new PPAT
> + * index for the required entry and update the HW. If not, the partially
> + * matched entry will be used.
> + */
> +const struct intel_ppat_entry *
> +intel_ppat_get(struct drm_i915_private *i915, u8 value)
> +{
> +     struct intel_ppat *ppat = &i915->ppat;
> +     struct intel_ppat_entry *entry;
> +     unsigned int scanned, best_score;
> +     int i;
> +
> +     GEM_BUG_ON(!ppat->max_entries);
> +
> +     scanned = best_score = 0;
> +
> +     for_each_set_bit(i, ppat->used, ppat->max_entries) {
> +             unsigned int score;
> +
> +             entry = &ppat->entries[i];
> +             score = ppat->match(entry->value, value);
> +             if (score > best_score) {
> +                     if (score == INTEL_PPAT_PERFECT_MATCH) {
> +                             kref_get(&entry->ref);
> +                             return entry;
> +                     }
> +                     best_score = score;
> +             }
> +             scanned++;
> +     }
> +
> +     if (scanned == ppat->max_entries) {
> +             if (!best_score)
> +                     return ERR_PTR(-ENOSPC);
> +
> +             kref_get(&entry->ref);
> +             return entry;
> +     }
> +
> +     i = find_first_zero_bit(ppat->used, ppat->max_entries);
> +     entry = __alloc_ppat_entry(ppat, i, value);
> +     ppat->update_hw(i915);
> +     return entry;
> +}
> +
> +static void release_ppat(struct kref *kref)
> +{
> +     struct intel_ppat_entry *entry =
> +             container_of(kref, struct intel_ppat_entry, ref);
> +     struct drm_i915_private *i915 = entry->ppat->i915;
> +
> +     __free_ppat_entry(entry);
> +     entry->ppat->update_hw(i915);
> +}
> +
> +/**
> + * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
> + * @entry: an intel PPAT entry
> + *
> + * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of 
> the
> + * entry is dynamically allocated, its reference count will be decreased. 
> Once
> + * the reference count becomes into zero, the PPAT index becomes free again.
> + */
> +void intel_ppat_put(const struct intel_ppat_entry *entry)
> +{
> +     struct intel_ppat *ppat = entry->ppat;
> +     unsigned int index = entry - ppat->entries;
> +
> +     GEM_BUG_ON(!ppat->max_entries);
> +
> +     kref_put(&ppat->entries[index].ref, release_ppat);
> +}
> +
> +static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
> +{
> +     struct intel_ppat *ppat = &dev_priv->ppat;
> +     int i;
> +
> +     for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
> +             I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
> +             clear_bit(i, ppat->dirty);
> +     }
> +}
> +
> +static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
> +{
> +     struct intel_ppat *ppat = &dev_priv->ppat;
> +     u64 pat = 0;
> +     int i;
> +
> +     for (i = 0; i < ppat->max_entries; i++)
> +             pat |= GEN8_PPAT(i, ppat->entries[i].value);
> +
> +     bitmap_clear(ppat->dirty, 0, ppat->max_entries);
> +
> +     I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
> +     I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
> +}
> +
> +static unsigned int bdw_private_pat_match(u8 src, u8 dst)
> +{
> +     unsigned int score = 0;
> +
> +     /* Cache attribute has to be matched. */
> +     if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
> +             return 0;
> +
> +     if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
> +             score += 2;
> +
> +     if(GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
> +             score += 1;
> +
> +     if (score == 3)
> +             return INTEL_PPAT_PERFECT_MATCH;
> +
> +     return score;
> +}
> +
> +static unsigned int chv_private_pat_match(u8 src, u8 dst)
> +{
> +     return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
> +             INTEL_PPAT_PERFECT_MATCH : 0;
> +}
> +
> +static void cnl_setup_private_ppat(struct intel_ppat *ppat)
> +{
> +     ppat->max_entries = 8;
> +     ppat->update_hw = cnl_private_pat_update_hw;
> +     ppat->match = bdw_private_pat_match;
> +     ppat->clear_value = GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
> +
>       /* XXX: spec is unclear if this is still needed for CNL+ */
> -     if (!USES_PPGTT(dev_priv)) {
> -             I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_UC);
> +     if (!USES_PPGTT(ppat->i915)) {
> +             __alloc_ppat_entry(ppat, PPAT_CACHED_PDE_INDEX, GEN8_PPAT_UC);
>               return;
>       }
>  
> -     I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC);
> -     I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
> -     I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
> -     I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC);
> -     I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
> -     I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
> -     I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
> -     I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));

Gen8 allocation style doesn't work on CNL.
I don't see where all these registers would be getting properly written.

> +     /* See gen8_pte_encode() for the mapping from cache-level to PPAT */
> +     __alloc_ppat_entry(ppat, PPAT_CACHED_PDE_INDEX, GEN8_PPAT_WB | 
> GEN8_PPAT_LLC);
> +     __alloc_ppat_entry(ppat, PPAT_DISPLAY_ELLC_INDEX, GEN8_PPAT_WT | 
> GEN8_PPAT_LLCELLC);
> +     __alloc_ppat_entry(ppat, PPAT_UNCACHED_INDEX, GEN8_PPAT_UC);
> +     __alloc_ppat_entry(ppat, PPAT_CACHED_INDEX, GEN8_PPAT_LLCELLC | 
> GEN8_PPAT_AGE(0));
>  }
>  
>  /* The GGTT and PPGTT need a private PPAT setup in order to handle 
> cacheability
>   * bits. When using advanced contexts each context stores its own PAT, but
>   * writing this data shouldn't be harmful even in those cases. */
> -static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
> +static void bdw_setup_private_ppat(struct intel_ppat *ppat)
>  {
> -     u64 pat;
> +     ppat->max_entries = 8;
> +     ppat->update_hw = bdw_private_pat_update_hw;
> +     ppat->match = bdw_private_pat_match;
> +     ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
>  
> -     pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal 
> objects, no eLLC */
> -           GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something 
> pointing to ptes? */
> -           GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout 
> with eLLC */
> -           GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached 
> objects, mostly for scanout */
> -           GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) 
> |
> -           GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) 
> |
> -           GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) 
> |
> -           GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
> -
> -     if (!USES_PPGTT(dev_priv))
> +     if (!USES_PPGTT(ppat->i915)) {
>               /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
>                * so RTL will always use the value corresponding to
>                * pat_sel = 000".
> @@ -2864,17 +3023,26 @@ static void bdw_setup_private_ppat(struct 
> drm_i915_private *dev_priv)
>                * So we can still hold onto all our assumptions wrt cpu
>                * clflushing on LLC machines.
>                */
> -             pat = GEN8_PPAT(0, GEN8_PPAT_UC);
> +             __alloc_ppat_entry(ppat, PPAT_CACHED_PDE_INDEX, GEN8_PPAT_UC);
> +             return;
> +     }
>  
> -     /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
> -      * write would work. */
> -     I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
> -     I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
> +     /* See gen8_pte_encode() for the mapping from cache-level to PPAT */
> +     /* for normal objects, no eLLC */
> +     __alloc_ppat_entry(ppat, PPAT_CACHED_PDE_INDEX, GEN8_PPAT_WB | 
> GEN8_PPAT_LLC);
> +     /* for scanout with eLLC */
> +     __alloc_ppat_entry(ppat, PPAT_DISPLAY_ELLC_INDEX, GEN8_PPAT_WT | 
> GEN8_PPAT_LLCELLC);
> +     /* Uncached objects, mostly for scanout */
> +     __alloc_ppat_entry(ppat, PPAT_UNCACHED_INDEX, GEN8_PPAT_UC);
> +     __alloc_ppat_entry(ppat, PPAT_CACHED_INDEX, GEN8_PPAT_WB | 
> GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
>  }
>  
> -static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
> +static void chv_setup_private_ppat(struct intel_ppat *ppat)
>  {
> -     u64 pat;
> +     ppat->max_entries = 8;
> +     ppat->update_hw = bdw_private_pat_update_hw;
> +     ppat->match = chv_private_pat_match;
> +     ppat->clear_value = CHV_PPAT_SNOOP;
>  
>       /*
>        * Map WB on BDW to snooped on CHV.
> @@ -2894,17 +3062,12 @@ static void chv_setup_private_ppat(struct 
> drm_i915_private *dev_priv)
>        * Which means we must set the snoop bit in PAT entry 0
>        * in order to keep the global status page working.
>        */
> -     pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
> -           GEN8_PPAT(1, 0) |
> -           GEN8_PPAT(2, 0) |
> -           GEN8_PPAT(3, 0) |
> -           GEN8_PPAT(4, CHV_PPAT_SNOOP) |
> -           GEN8_PPAT(5, CHV_PPAT_SNOOP) |
> -           GEN8_PPAT(6, CHV_PPAT_SNOOP) |
> -           GEN8_PPAT(7, CHV_PPAT_SNOOP);
>  
> -     I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
> -     I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
> +     /* See gen8_pte_encode() for the mapping from cache-level to PPAT */
> +     __alloc_ppat_entry(ppat, PPAT_CACHED_PDE_INDEX, CHV_PPAT_SNOOP);
> +     __alloc_ppat_entry(ppat, PPAT_DISPLAY_ELLC_INDEX, 0);
> +     __alloc_ppat_entry(ppat, PPAT_UNCACHED_INDEX, 0);
> +     __alloc_ppat_entry(ppat, PPAT_CACHED_INDEX, CHV_PPAT_SNOOP);
>  }
>  
>  static void gen6_gmch_remove(struct i915_address_space *vm)
> @@ -2917,12 +3080,27 @@ static void gen6_gmch_remove(struct 
> i915_address_space *vm)
>  
>  static void setup_private_pat(struct drm_i915_private *dev_priv)
>  {
> +     struct intel_ppat *ppat = &dev_priv->ppat;
> +     int i;
> +
> +     ppat->i915 = dev_priv;
> +
>       if (INTEL_GEN(dev_priv) >= 10)
> -             cnl_setup_private_ppat(dev_priv);
> +             cnl_setup_private_ppat(ppat);
>       else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
> -             chv_setup_private_ppat(dev_priv);
> +             chv_setup_private_ppat(ppat);
>       else
> -             bdw_setup_private_ppat(dev_priv);
> +             bdw_setup_private_ppat(ppat);
> +
> +     GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
> +
> +     for_each_clear_bit(i, ppat->used, ppat->max_entries) {
> +             ppat->entries[i].value = ppat->clear_value;
> +             ppat->entries[i].ppat = ppat;
> +             set_bit(i, ppat->dirty);
> +     }
> +
> +     ppat->update_hw(dev_priv);
>  }
>  
>  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> @@ -3236,13 +3414,10 @@ void i915_gem_restore_gtt_mappings(struct 
> drm_i915_private *dev_priv)
>       ggtt->base.closed = false;
>  
>       if (INTEL_GEN(dev_priv) >= 8) {
> -             if (INTEL_GEN(dev_priv) >= 10)
> -                     cnl_setup_private_ppat(dev_priv);
> -             else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
> -                     chv_setup_private_ppat(dev_priv);
> -             else
> -                     bdw_setup_private_ppat(dev_priv);
> +             struct intel_ppat *ppat = &dev_priv->ppat;
>  
> +             bitmap_set(ppat->dirty, 0, ppat->max_entries);
> +             dev_priv->ppat.update_hw(dev_priv);
>               return;
>       }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
> b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index b4e3aa7..e10ca89 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -143,6 +143,11 @@ typedef u64 gen8_ppgtt_pml4e_t;
>  #define GEN8_PPAT_ELLC_OVERRIDE              (0<<2)
>  #define GEN8_PPAT(i, x)                      ((u64)(x) << ((i) * 8))
>  
> +#define GEN8_PPAT_GET_CA(x) ((x) & 3)
> +#define GEN8_PPAT_GET_TC(x) ((x) & (3 << 2))
> +#define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4))
> +#define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6))
> +
>  struct sg_table;
>  
>  struct intel_rotation_info {
> @@ -536,6 +541,37 @@ i915_vm_to_ggtt(struct i915_address_space *vm)
>       return container_of(vm, struct i915_ggtt, base);
>  }
>  
> +#define INTEL_MAX_PPAT_ENTRIES 8
> +#define INTEL_PPAT_PERFECT_MATCH (~0U)
> +
> +struct intel_ppat;
> +
> +struct intel_ppat_entry {
> +     struct intel_ppat *ppat;
> +     struct kref ref;
> +     u8 value;
> +};
> +
> +struct intel_ppat {
> +     struct intel_ppat_entry entries[INTEL_MAX_PPAT_ENTRIES];
> +     DECLARE_BITMAP(used, INTEL_MAX_PPAT_ENTRIES);
> +     DECLARE_BITMAP(dirty, INTEL_MAX_PPAT_ENTRIES);
> +     unsigned int max_entries;
> +     u8 clear_value;
> +     /*
> +      * Return a score to show how two PPAT values match,
> +      * a INTEL_PPAT_PERFECT_MATCH indicates a perfect match
> +      */
> +     unsigned int (*match)(u8 src, u8 dst);
> +     void (*update_hw)(struct drm_i915_private *i915);
> +
> +     struct drm_i915_private *i915;
> +};
> +
> +const struct intel_ppat_entry *
> +intel_ppat_get(struct drm_i915_private *i915, u8 value);
> +void intel_ppat_put(const struct intel_ppat_entry *entry);
> +
>  int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915);
>  void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915);
>  

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to