[Intel-gfx] [PATCH 1/2] intel: Defer setting madv on the bo cache

2015-04-14 Thread Chris Wilson
Convert the bo-cache into 2 phases:

1. A two second cache of recently used buffers, keep untouched.
2. A two second cache of older buffers, marked for eviction.

This helps reduce ioctl traffic on a rapid turnover in working sets. The
issue is that even a 2 second window is enough for an application to
fill all of memory with inactive buffers (and we would rely on the
oom-killer identifying the right culprit).

Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
Cc: Daniel Vetter daniel.vet...@ffwll.ch
Cc: Ben Widawsky benjamin.widaw...@intel.com
---
 intel/intel_bufmgr_gem.c | 42 +++---
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index f5d6130..ecbf8ee 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -211,6 +211,11 @@ struct _drm_intel_bo_gem {
bool used_as_reloc_target;
 
/**
+* Are we keeping this buffer around in semi-permenant cache?
+*/
+   bool dontneed;
+
+   /**
 * Boolean of whether we have encountered an error whilst building the 
relocation tree.
 */
bool has_error;
@@ -714,7 +719,8 @@ retry:
}
 
if (alloc_from_cache) {
-   if (!drm_intel_gem_bo_madvise_internal
+   if (bo_gem-dontneed 
+   !drm_intel_gem_bo_madvise_internal
(bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
drm_intel_gem_bo_free(bo_gem-bo);
drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
@@ -1150,6 +1156,20 @@ drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
 #endif
 }
 
+static inline void __splice(const drmMMListHead *list,
+   drmMMListHead *prev,
+   drmMMListHead *next)
+{
+   drmMMListHead *first = list-next;
+   drmMMListHead *last = list-prev;
+
+   first-prev = prev;
+   prev-next = first;
+
+   last-next = next;
+   next-prev = last;
+}
+
 /** Frees all cached buffers significantly older than @time. */
 static void
 drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
@@ -1162,19 +1182,29 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem 
*bufmgr_gem, time_t time)
for (i = 0; i  bufmgr_gem-num_buckets; i++) {
struct drm_intel_gem_bo_bucket *bucket =
bufmgr_gem-cache_bucket[i];
+   drmMMListHead madv;
 
+   DRMINITLISTHEAD(madv);
while (!DRMLISTEMPTY(bucket-head)) {
drm_intel_bo_gem *bo_gem;
 
bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
  bucket-head.next, head);
-   if (time - bo_gem-free_time = 1)
+   if (time - bo_gem-free_time  2*(1+bo_gem-dontneed))
break;
 
DRMLISTDEL(bo_gem-head);
-
-   drm_intel_gem_bo_free(bo_gem-bo);
+   if (bo_gem-dontneed) {
+   drm_intel_gem_bo_free(bo_gem-bo);
+   } else {
+   drm_intel_gem_bo_madvise_internal(bufmgr_gem, 
bo_gem,
+ 
I915_MADV_DONTNEED);
+   bo_gem-dontneed = 1;
+   DRMLISTADDTAIL(bo_gem-head, madv);
+   }
}
+   if (!DRMLISTEMPTY(madv))
+   __splice(madv, bucket-head, bucket-head.next);
}
 
bufmgr_gem-time = time;
@@ -1285,9 +1315,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, 
time_t time)
 
bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo-size);
/* Put the buffer into our internal cache for reuse if we can. */
-   if (bufmgr_gem-bo_reuse  bo_gem-reusable  bucket != NULL 
-   drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
- I915_MADV_DONTNEED)) {
+   if (bufmgr_gem-bo_reuse  bo_gem-reusable  bucket != NULL) {
bo_gem-free_time = time;
 
bo_gem-name = NULL;
-- 
2.1.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] intel: Defer setting madv on the bo cache

2015-04-14 Thread Chris Wilson
On Tue, Apr 14, 2015 at 12:38:56PM -0700, Ben Widawsky wrote:
   /** Frees all cached buffers significantly older than @time. */
   static void
   drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t 
  time)
  @@ -1162,19 +1182,29 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem 
  *bufmgr_gem, time_t time)
  for (i = 0; i  bufmgr_gem-num_buckets; i++) {
  struct drm_intel_gem_bo_bucket *bucket =
  bufmgr_gem-cache_bucket[i];
  +   drmMMListHead madv;
   
  +   DRMINITLISTHEAD(madv);
  while (!DRMLISTEMPTY(bucket-head)) {
  drm_intel_bo_gem *bo_gem;
   
  bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
bucket-head.next, head);
  -   if (time - bo_gem-free_time = 1)
  +   if (time - bo_gem-free_time  2*(1+bo_gem-dontneed))
 
 Something somewhere will complain about mixing bools and ints, I'd guess.
 
 Also perhaps for perf bisection maybe do a patch before this changing the
 expiration time to 2 (or 4), then add the extra dontneed state?

It is already 2, since it uses = and the patch just converts it to 
to be consistent after the multiplication.
 
  break;
   
  DRMLISTDEL(bo_gem-head);
  -
  -   drm_intel_gem_bo_free(bo_gem-bo);
  +   if (bo_gem-dontneed) {
  +   drm_intel_gem_bo_free(bo_gem-bo);
  +   } else {
  +   drm_intel_gem_bo_madvise_internal(bufmgr_gem, 
  bo_gem,
  + 
  I915_MADV_DONTNEED);
  +   bo_gem-dontneed = 1;
  +   DRMLISTADDTAIL(bo_gem-head, madv);
  +   }
  }
 
 Maybe add a comment here?
 /* Objects which have elapsed 2s of disuse should go to the top of the 
 bucket. */
 
  +   if (!DRMLISTEMPTY(madv))
  +   __splice(madv, bucket-head, bucket-head.next);
 
 
  }
   
  bufmgr_gem-time = time;
  @@ -1285,9 +1315,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, 
  time_t time)
   
  bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo-size);
  /* Put the buffer into our internal cache for reuse if we can. */
  -   if (bufmgr_gem-bo_reuse  bo_gem-reusable  bucket != NULL 
  -   drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
  - I915_MADV_DONTNEED)) {
  +   if (bufmgr_gem-bo_reuse  bo_gem-reusable  bucket != NULL) {
  bo_gem-free_time = time;
   
  bo_gem-name = NULL;
 
 In general, it looks fine to me. Like I said above wrt adding the patch before
 this, I am curious how much difference you see just messing with the 
 expiration
 times versus the two state model.

The issue I was looking at was simply struct_mutex contention in madvise
(then busy). For busy we can tweak the ordering to reduce the contention
slightly, but madvise is trickier (though madvise is a candidate for one
of the first per-object locks). This just moves the contention elsewhere,
but madvise/busy calls tend to dominate overall and trimming them reduces
the intersection.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] intel: Defer setting madv on the bo cache

2015-04-14 Thread Ben Widawsky
On Tue, Apr 14, 2015 at 04:08:44PM +0100, Chris Wilson wrote:
 Convert the bo-cache into 2 phases:
 
 1. A two second cache of recently used buffers, keep untouched.
 2. A two second cache of older buffers, marked for eviction.
 
 This helps reduce ioctl traffic on a rapid turnover in working sets. The
 issue is that even a 2 second window is enough for an application to
 fill all of memory with inactive buffers (and we would rely on the
 oom-killer identifying the right culprit).
 
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Daniel Vetter daniel.vet...@ffwll.ch
 Cc: Ben Widawsky benjamin.widaw...@intel.com
 ---
  intel/intel_bufmgr_gem.c | 42 +++---
  1 file changed, 35 insertions(+), 7 deletions(-)
 
 diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
 index f5d6130..ecbf8ee 100644
 --- a/intel/intel_bufmgr_gem.c
 +++ b/intel/intel_bufmgr_gem.c
 @@ -211,6 +211,11 @@ struct _drm_intel_bo_gem {
   bool used_as_reloc_target;
  
   /**
 +  * Are we keeping this buffer around in semi-permenant cache?
 +  */
 + bool dontneed;
 +
 + /**
* Boolean of whether we have encountered an error whilst building the 
 relocation tree.
*/
   bool has_error;
 @@ -714,7 +719,8 @@ retry:
   }
  
   if (alloc_from_cache) {
 - if (!drm_intel_gem_bo_madvise_internal
 + if (bo_gem-dontneed 
 + !drm_intel_gem_bo_madvise_internal
   (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
   drm_intel_gem_bo_free(bo_gem-bo);
   drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
 @@ -1150,6 +1156,20 @@ drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo 
 *bo)
  #endif
  }
  
 +static inline void __splice(const drmMMListHead *list,
 + drmMMListHead *prev,
 + drmMMListHead *next)
 +{
 + drmMMListHead *first = list-next;
 + drmMMListHead *last = list-prev;
 +
 + first-prev = prev;
 + prev-next = first;
 +
 + last-next = next;
 + next-prev = last;
 +}
 +

Seems worth adding to libdrm_lists.h

  /** Frees all cached buffers significantly older than @time. */
  static void
  drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
 @@ -1162,19 +1182,29 @@ drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem 
 *bufmgr_gem, time_t time)
   for (i = 0; i  bufmgr_gem-num_buckets; i++) {
   struct drm_intel_gem_bo_bucket *bucket =
   bufmgr_gem-cache_bucket[i];
 + drmMMListHead madv;
  
 + DRMINITLISTHEAD(madv);
   while (!DRMLISTEMPTY(bucket-head)) {
   drm_intel_bo_gem *bo_gem;
  
   bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
 bucket-head.next, head);
 - if (time - bo_gem-free_time = 1)
 + if (time - bo_gem-free_time  2*(1+bo_gem-dontneed))

Something somewhere will complain about mixing bools and ints, I'd guess.

Also perhaps for perf bisection maybe do a patch before this changing the
expiration time to 2 (or 4), then add the extra dontneed state?

   break;
  
   DRMLISTDEL(bo_gem-head);
 -
 - drm_intel_gem_bo_free(bo_gem-bo);
 + if (bo_gem-dontneed) {
 + drm_intel_gem_bo_free(bo_gem-bo);
 + } else {
 + drm_intel_gem_bo_madvise_internal(bufmgr_gem, 
 bo_gem,
 +   
 I915_MADV_DONTNEED);
 + bo_gem-dontneed = 1;
 + DRMLISTADDTAIL(bo_gem-head, madv);
 + }
   }

Maybe add a comment here?
/* Objects which have elapsed 2s of disuse should go to the top of the bucket. 
*/

 + if (!DRMLISTEMPTY(madv))
 + __splice(madv, bucket-head, bucket-head.next);


   }
  
   bufmgr_gem-time = time;
 @@ -1285,9 +1315,7 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, 
 time_t time)
  
   bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo-size);
   /* Put the buffer into our internal cache for reuse if we can. */
 - if (bufmgr_gem-bo_reuse  bo_gem-reusable  bucket != NULL 
 - drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
 -   I915_MADV_DONTNEED)) {
 + if (bufmgr_gem-bo_reuse  bo_gem-reusable  bucket != NULL) {
   bo_gem-free_time = time;
  
   bo_gem-name = NULL;

In general, it looks fine to me. Like I said above wrt adding the patch before
this, I am curious how much difference you see just messing with the expiration
times versus the two state model.

-- 
Ben Widawsky,