On Mon, 7 Jun 2010 13:54:05 +1000 (EST), "Robert Lowery" 
<rglow...@exemail.com.au> wrote:
> > We had two cases recently where the rounding to powers of two hurt
> > badly: 4:2:0 YUV HD video frames would round up from 2.2MB to 4MB,
> > Urban Terror was hitting aperture size limitations.  Mipmap trees for
> > power of two sizes will land right in the middle between two cache
> > buckets.
> >
> > By giving a few more sizes between powers of two, Urban Terror on my
> > 945 ends up consuming 207MB of GEM objects instead of 272MB.
> > ---
> >  intel/intel_bufmgr_gem.c |   64
> > +++++++++++++++++++++++++++++++++++-----------
> >  1 files changed, 49 insertions(+), 15 deletions(-)
> >
> > diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> > index b76fd7e..c3e189e 100644
> > --- a/intel/intel_bufmgr_gem.c
> > +++ b/intel/intel_bufmgr_gem.c
> > @@ -66,6 +66,8 @@
> >             fprintf(stderr, __VA_ARGS__);           \
> >  } while (0)
> >
> > +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
> > +
> >  typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
> >
> >  struct drm_intel_gem_bo_bucket {
> > @@ -73,10 +75,6 @@ struct drm_intel_gem_bo_bucket {
> >     unsigned long size;
> >  };
> >
> > -/* Only cache objects up to 64MB.  Bigger than that, and the rounding of
> > the
> > - * size makes many operations fail that wouldn't otherwise.
> > - */
> > -#define DRM_INTEL_GEM_BO_BUCKETS   14
> >  typedef struct _drm_intel_bufmgr_gem {
> >     drm_intel_bufmgr bufmgr;
> >
> > @@ -93,7 +91,8 @@ typedef struct _drm_intel_bufmgr_gem {
> >     int exec_count;
> >
> >     /** Array of lists of cached gem objects of power-of-two sizes */
> > -   struct drm_intel_gem_bo_bucket cache_bucket[DRM_INTEL_GEM_BO_BUCKETS];
> > +   struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
> > +   int num_buckets;
> >
> >     uint64_t gtt_size;
> >     int available_fences;
> > @@ -285,7 +284,7 @@ drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem
> > *bufmgr_gem,
> >  {
> >     int i;
> >
> > -   for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
> > +   for (i = 0; i < bufmgr_gem->num_buckets; i++) {
> >             struct drm_intel_gem_bo_bucket *bucket =
> >                 &bufmgr_gem->cache_bucket[i];
> >             if (bucket->size >= size) {
> > @@ -822,7 +821,7 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem
> > *bufmgr_gem, time_t time)
> >  {
> >     int i;
> >
> > -   for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
> > +   for (i = 0; i < bufmgr_gem->num_buckets; i++) {
> >             struct drm_intel_gem_bo_bucket *bucket =
> >                 &bufmgr_gem->cache_bucket[i];
> >
> > @@ -1250,7 +1249,7 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr
> > *bufmgr)
> >     pthread_mutex_destroy(&bufmgr_gem->lock);
> >
> >     /* Free any cached buffer objects we were going to reuse */
> > -   for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
> > +   for (i = 0; i < bufmgr_gem->num_buckets; i++) {
> >             struct drm_intel_gem_bo_bucket *bucket =
> >                 &bufmgr_gem->cache_bucket[i];
> >             drm_intel_bo_gem *bo_gem;
> > @@ -1960,6 +1959,46 @@ drm_intel_gem_bo_references(drm_intel_bo *bo,
> > drm_intel_bo *target_bo)
> >     return 0;
> >  }
> >
> > +static void
> > +add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
> > +{
> > +   unsigned int i = bufmgr_gem->num_buckets;
> > +
> > +   assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
> > +
> > +   DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
> > +   bufmgr_gem->cache_bucket[i].size = size;
> > +   bufmgr_gem->num_buckets++;
> > +}
> > +
> > +static void
> > +init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
> > +{
> > +   unsigned long size, cache_max_size = 64 * 1024 * 1024;
> > +
> > +   /* Initialize the linked lists for BO reuse cache. */
> > +   for (size = 4096; size <= cache_max_size; size *= 2) {
> > +           add_bucket(bufmgr_gem, size);
> > +
> > +           /* OK, so power of two buckets was too wasteful of
> > +            * memory.  Give 3 other sizes between each power of
> > +            * two, to hopefully cover things accurately enough.
> > +            * (The alternative is probably to just go for exact
> > +            * matching of sizes, and assume that for things like
> > +            * composited window resize the tiled width/height
> > +            * alignment and rounding of sizes to pages will get
> > +            * us useful cache hit rates anyway)
> > +            */
> > +           if (size == 8192) {
> > +                   add_bucket(bufmgr_gem, size + size / 2);
> > +           } else if (size < cache_max_size) {
> > +                   add_bucket(bufmgr_gem, size + size * 1 / 4);
> > +                   add_bucket(bufmgr_gem, size + size * 2 / 4);
> > +                   add_bucket(bufmgr_gem, size + size * 3 / 4);
> > +           }
> > +   }
> > +}
> > +
> 
> Are bucket sizes that are not a multiple of 4096 supported and/or worthwhile?
> 
> The above code will create buckets size 5120, 6144 and 7168 in the size =
> 4096 case.
> 
> Apologies for the noise if I'm off the mark here.

Yeah, krh noted this bug on irc.  I'm looking at the alternate patch to
not use buckets at all currently.

Attachment: pgpG91vgpzmIT.pgp
Description: PGP signature

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to