On Fri, Jul 25, 2014 at 01:27:00PM +0100, Chris Wilson wrote:
> At the heart of this change is that the seqno is a too low level of an
> abstraction to handle the growing complexities of command tracking, both
> with the introduction of multiple command queues with execbuffer and the
> potential for reordering with a scheduler. On top of the seqno we have
> the request. Conceptually this is just a fence, but it also has
> substantial bookkeeping of its own in order to track the context and
> batch in flight, for example. It is the central structure upon which we
> can extend with dependency tracking et al.
> 
> As regards the objects, they were using the seqno as a simple fence,
> upon which is check or even wait upon for command completion. This patch
> exchanges that seqno/ring pair with the request itself. For the
> majority, lifetime of the request is ordered by how we retire objects
> then requests. However, both the unlocked waits and probing elsewhere do
> not tie into the normal request lifetimes and so we need to introduce a
> kref. Extending the objects to use the request as the fence naturally
> extends to segregrating read/write fence tracking. This has significance
> for it reduces the number of semaphores we need to emit, reducing the
> likelihood of #54226, and improving performance overall.
> 
> NOTE: this is not against bare drm-intel-nightly and is likely to
> conflict with execlists...
> 
> Signed-off-by: Chris Wilson <[email protected]>
> Cc: Jesse Barnes <[email protected]>
> Cc: Daniel Vetter <[email protected]>
> Cc: Oscar Mateo <[email protected]>
> Cc: Brad Volkin <[email protected]>

Ok, read through it and I like overall. Also, right now is the perfect
time to merge it since we're right before the merge window. But this here
needs to be split up a bit to cut out prep patches. I've noticed a few
things in-line, but there's also the mechanical stuff (like dropping the
drm_ prefix from requests).
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c          |  37 +-
>  drivers/gpu/drm/i915/i915_drv.h              | 108 ++--
>  drivers/gpu/drm/i915/i915_gem.c              | 769 
> ++++++++++++++++-----------
>  drivers/gpu/drm/i915/i915_gem_context.c      |  19 +-
>  drivers/gpu/drm/i915/i915_gem_exec.c         |  10 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  37 +-
>  drivers/gpu/drm/i915/i915_gem_render_state.c |   5 +-
>  drivers/gpu/drm/i915/i915_gem_tiling.c       |   2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c        |  35 +-
>  drivers/gpu/drm/i915/i915_irq.c              |   6 +-
>  drivers/gpu/drm/i915/i915_perf.c             |   6 +-
>  drivers/gpu/drm/i915/i915_trace.h            |   2 +-
>  drivers/gpu/drm/i915/intel_display.c         |  50 +-
>  drivers/gpu/drm/i915/intel_drv.h             |   3 +-
>  drivers/gpu/drm/i915/intel_overlay.c         | 118 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.c      |  83 +--
>  drivers/gpu/drm/i915/intel_ringbuffer.h      |  11 +-
>  17 files changed, 745 insertions(+), 556 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 406e630..676d5f1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct 
> drm_i915_gem_object *obj)
>  static void
>  describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  {
> +     struct i915_gem_request *rq = i915_gem_object_last_read(obj);
>       struct i915_vma *vma;
>       int pin_count = 0;
>  
> -     seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
> +     seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
>                  &obj->base,
>                  get_pin_flag(obj),
>                  get_tiling_flag(obj),
> @@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct 
> drm_i915_gem_object *obj)
>                  obj->base.size / 1024,
>                  obj->base.read_domains,
>                  obj->base.write_domain,
> -                obj->last_read_seqno,
> -                obj->last_write_seqno,
> -                obj->last_fenced_seqno,
> +                i915_request_seqno(rq),
> +                i915_request_seqno(obj->last_write.request),
> +                i915_request_seqno(obj->last_fence.request),
>                  i915_cache_level_str(obj->cache_level),
>                  obj->dirty ? " dirty" : "",
>                  obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> @@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct 
> drm_i915_gem_object *obj)
>               *t = '\0';
>               seq_printf(m, " (%s mappable)", s);
>       }
> -     if (obj->ring != NULL)
> -             seq_printf(m, " (%s)", obj->ring->name);
> +     if (rq)
> +             seq_printf(m, " (%s)", rq->ring->name);
>       if (obj->frontbuffer_bits)
>               seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
>  }
> @@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>                       if (ppgtt->ctx && ppgtt->ctx->file_priv != 
> stats->file_priv)
>                               continue;
>  
> -                     if (obj->ring) /* XXX per-vma statistic */
> +                     if (obj->active) /* XXX per-vma statistic */
>                               stats->active += obj->base.size;
>                       else
>                               stats->inactive += obj->base.size;
> @@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>       } else {
>               if (i915_gem_obj_ggtt_bound(obj)) {
>                       stats->global += obj->base.size;
> -                     if (obj->ring)
> +                     if (obj->active)
>                               stats->active += obj->base.size;
>                       else
>                               stats->inactive += obj->base.size;
> @@ -614,12 +615,12 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
> void *data)
>                               seq_printf(m, "Flip pending (waiting for vsync) 
> on pipe %c (plane %c)\n",
>                                          pipe, plane);
>                       }
> -                     if (work->ring)
> +                     if (work->flip_queued_request) {
> +                             struct i915_gem_request *rq = 
> work->flip_queued_request;
>                               seq_printf(m, "Flip queued on %s at seqno %u, 
> now %u\n",
> -                                             work->ring->name,
> -                                             work->flip_queued_seqno,
> -                                             
> work->ring->get_seqno(work->ring, true));
> -                     else
> +                                             rq->ring->name, rq->seqno,
> +                                             rq->ring->get_seqno(rq->ring, 
> true));
> +                     } else
>                               seq_printf(m, "Flip not associated with any 
> ring\n");
>                       seq_printf(m, "Flip queued on frame %d, (was ready on 
> frame %d), now %d\n",
>                                  work->flip_queued_vblank,
> @@ -656,7 +657,7 @@ static int i915_gem_request_info(struct seq_file *m, void 
> *data)
>       struct drm_device *dev = node->minor->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_engine_cs *ring;
> -     struct drm_i915_gem_request *gem_request;
> +     struct i915_gem_request *rq;
>       int ret, count, i;
>  
>       ret = mutex_lock_interruptible(&dev->struct_mutex);
> @@ -669,12 +670,10 @@ static int i915_gem_request_info(struct seq_file *m, 
> void *data)
>                       continue;
>  
>               seq_printf(m, "%s requests:\n", ring->name);
> -             list_for_each_entry(gem_request,
> -                                 &ring->request_list,
> -                                 list) {
> +             list_for_each_entry(rq, &ring->request_list, list) {
>                       seq_printf(m, "    %d @ %d\n",
> -                                gem_request->seqno,
> -                                (int) (jiffies - 
> gem_request->emitted_jiffies));
> +                                rq->seqno,
> +                                (int)(jiffies - rq->emitted_jiffies));
>               }
>               count++;
>       }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9837b0f..5794d096 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -187,6 +187,7 @@ enum hpd_pin {
>  struct drm_i915_private;
>  struct i915_mm_struct;
>  struct i915_mmu_object;
> +struct i915_gem_request;
>  
>  enum intel_dpll_id {
>       DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
> @@ -1720,16 +1721,15 @@ struct drm_i915_gem_object {
>       struct drm_mm_node *stolen;
>       struct list_head global_list;
>  
> -     struct list_head ring_list;
>       /** Used in execbuf to temporarily hold a ref */
>       struct list_head obj_exec_link;
>  
>       /**
>        * This is set if the object is on the active lists (has pending
> -      * rendering and so a non-zero seqno), and is not set if it i s on
> -      * inactive (ready to be unbound) list.
> +      * rendering and so a submitted request), and is not set if it is on
> +      * inactive (ready to be unbound) list. We track activity per engine.
>        */
> -     unsigned int active:1;
> +     unsigned int active:3;

Could we #define this and then add a BUILG_BUG_ON that 1 << shift >=
NUM_RINGS?

>  
>       /**
>        * This is set if the object has been written to since last bound
> @@ -1797,13 +1797,11 @@ struct drm_i915_gem_object {
>       void *dma_buf_vmapping;
>       int vmapping_count;
>  
> -     struct intel_engine_cs *ring;
> -
> -     /** Breadcrumb of last rendering to the buffer. */
> -     uint32_t last_read_seqno;
> -     uint32_t last_write_seqno;
> -     /** Breadcrumb of last fenced GPU access to the buffer. */
> -     uint32_t last_fenced_seqno;
> +     /** Breadcrumbs of last rendering to the buffer. */
> +     struct {
> +             struct i915_gem_request *request;
> +             struct list_head ring_list;
> +     } last_write, last_read[I915_NUM_RINGS], last_fence;
>  
>       /** Current tiling stride for the object, if it's tiled. */
>       uint32_t stride;
> @@ -1836,6 +1834,8 @@ struct drm_i915_gem_object {
>  };
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
> +struct i915_gem_request *i915_gem_object_last_read(struct 
> drm_i915_gem_object *obj);
> +
>  void i915_gem_track_fb(struct drm_i915_gem_object *old,
>                      struct drm_i915_gem_object *new,
>                      unsigned frontbuffer_bits);
> @@ -1850,7 +1850,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>   * sequence-number comparisons on buffer last_rendering_seqnos, and associate
>   * an emission time with seqnos for tracking how far ahead of the GPU we are.
>   */
> -struct drm_i915_gem_request {
> +struct i915_gem_request {
> +     struct kref kref;
> +
>       /** On Which ring this request was generated */
>       struct intel_engine_cs *ring;
>  
> @@ -1878,8 +1880,60 @@ struct drm_i915_gem_request {
>       struct drm_i915_file_private *file_priv;
>       /** file_priv list entry for this request */
>       struct list_head client_list;
> +
> +     bool completed:1;
>  };
>  
> +static inline struct intel_engine_cs *i915_request_ring(struct 
> i915_gem_request *rq)
> +{
> +     return rq ? rq->ring : NULL;
> +}
> +
> +static inline int i915_request_ring_id(struct i915_gem_request *rq)
> +{
> +     return rq ? rq->ring->id : -1;
> +}
> +
> +static inline u32 i915_request_seqno(struct i915_gem_request *rq)
> +{
> +     return rq ? rq->seqno : 0;
> +}
> +
> +/**
> + * Returns true if seq1 is later than seq2.
> + */
> +static inline bool
> +__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> +{
> +     return (int32_t)(seq1 - seq2) >= 0;
> +}
> +
> +static inline bool
> +i915_request_complete(struct i915_gem_request *rq, bool lazy)
> +{
> +     if (!rq->completed)
> +             rq->completed = 
> __i915_seqno_passed(rq->ring->get_seqno(rq->ring, lazy),
> +                                                 rq->seqno);
> +     return rq->completed;
> +}
> +
> +static inline struct i915_gem_request *
> +i915_request_get(struct i915_gem_request *rq)
> +{
> +     if (rq)
> +             kref_get(&rq->kref);
> +     return rq;
> +}
> +
> +void __i915_request_free(struct kref *kref);
> +
> +static inline void
> +i915_request_put(struct i915_gem_request *rq)
> +{
> +     if (rq)
> +             kref_put(&rq->kref, __i915_request_free);
> +}
> +
>  struct drm_i915_file_private {
>       struct drm_i915_private *dev_priv;
>       struct drm_file *file;
> @@ -2335,22 +2389,18 @@ static inline void i915_gem_object_unpin_pages(struct 
> drm_i915_gem_object *obj)
>  
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -                      struct intel_engine_cs *to);
> +                      struct intel_engine_cs *to,
> +                      bool readonly);
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -                          struct intel_engine_cs *ring);
> +                          struct intel_engine_cs *ring,
> +                          unsigned fenced);
> +#define VMA_IS_FENCED 0x1
> +#define VMA_HAS_FENCE 0x2
>  int i915_gem_dumb_create(struct drm_file *file_priv,
>                        struct drm_device *dev,
>                        struct drm_mode_create_dumb *args);
>  int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
>                     uint32_t handle, uint64_t *offset);
> -/**
> - * Returns true if seq1 is later than seq2.
> - */
> -static inline bool
> -i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> -{
> -     return (int32_t)(seq1 - seq2) >= 0;
> -}
>  
>  int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
>  int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
> @@ -2360,14 +2410,14 @@ int __must_check i915_gem_object_put_fence(struct 
> drm_i915_gem_object *obj);
>  bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
>  void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
>  
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *ring);
>  
>  bool i915_gem_retire_requests(struct drm_device *dev);
>  void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
>  int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
>                                     bool interruptible);
> -int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
> +int __must_check i915_gem_check_olr(struct i915_gem_request *rq);
>  
>  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
>  {
> @@ -2411,12 +2461,10 @@ int __must_check i915_gpu_idle(struct drm_device 
> *dev);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
>  int __i915_add_request(struct intel_engine_cs *ring,
>                      struct drm_file *file,
> -                    struct drm_i915_gem_object *batch_obj,
> -                    u32 *seqno);
> -#define i915_add_request(ring, seqno) \
> -     __i915_add_request(ring, NULL, NULL, seqno)
> -int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
> -                              uint32_t seqno);
> +                    struct drm_i915_gem_object *batch_obj);
> +#define i915_add_request(ring) \
> +     __i915_add_request(ring, NULL, NULL)
> +int __must_check i915_wait_request(struct i915_gem_request *rq);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
>  int __must_check
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f3ad6fb..d208658 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -48,8 +48,6 @@ static __must_check int
>  i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>                                           struct drm_i915_file_private 
> *file_priv,
>                                           bool readonly);
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj);
>  
>  static void i915_gem_write_fence(struct drm_device *dev, int reg,
>                                struct drm_i915_gem_object *obj);
> @@ -118,6 +116,73 @@ static void i915_gem_info_remove_obj(struct 
> drm_i915_private *dev_priv,
>       spin_unlock(&dev_priv->mm.object_stat_lock);
>  }
>  
> +static void
> +i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
> +{
> +     intel_fb_obj_flush(obj, true);
> +     obj->last_write.request = NULL;
> +     list_del_init(&obj->last_write.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
> +{
> +     obj->last_fence.request = NULL;
> +     list_del_init(&obj->last_fence.ring_list);
> +}
> +
> +static void
> +i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
> +                          struct intel_engine_cs *ring)
> +{
> +     struct i915_vma *vma;
> +
> +     BUG_ON(obj->active == 0);
> +     BUG_ON(obj->base.write_domain);
> +
> +     obj->last_read[ring->id].request = NULL;
> +     list_del_init(&obj->last_read[ring->id].ring_list);
> +
> +     if (--obj->active)
> +             return;
> +
> +     BUG_ON(obj->last_write.request);
> +     BUG_ON(obj->last_fence.request);
> +
> +     list_for_each_entry(vma, &obj->vma_list, vma_link) {
> +             if (!list_empty(&vma->mm_list))
> +                     list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> +     }
> +
> +     drm_gem_object_unreference(&obj->base);
> +
> +     WARN_ON(i915_verify_lists(dev));
> +}
> +
> +static void
> +i915_gem_object_retire(struct drm_i915_gem_object *obj)
> +{
> +     struct i915_gem_request *rq;
> +     int i;
> +
> +     if (!obj->active)
> +             return;
> +
> +     rq = obj->last_write.request;
> +     if (rq && i915_request_complete(rq, true))
> +             i915_gem_object_retire__write(obj);
> +
> +     rq = obj->last_fence.request;
> +     if (rq && i915_request_complete(rq, true))
> +             i915_gem_object_retire__fence(obj);
> +
> +     for (i = 0; i < I915_NUM_RINGS; i++) {
> +             rq = obj->last_read[i].request;
> +             if (rq && i915_request_complete(rq, true))
> +                     i915_gem_object_retire__read(obj, rq->ring);
> +     }
> +}
> +
>  static int
>  i915_gem_wait_for_error(struct i915_gpu_error *error)
>  {
> @@ -1337,15 +1402,15 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
>   * equal.
>   */
>  int
> -i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
> +i915_gem_check_olr(struct i915_gem_request *rq)
>  {
>       int ret;
>  
> -     BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
> +     BUG_ON(!mutex_is_locked(&rq->ring->dev->struct_mutex));
>  
>       ret = 0;
> -     if (seqno == ring->outstanding_lazy_seqno)
> -             ret = i915_add_request(ring, NULL);
> +     if (rq == rq->ring->preallocated_request)
> +             ret = i915_add_request(rq->ring);
>  
>       return ret;
>  }
> @@ -1370,9 +1435,8 @@ static bool can_wait_boost(struct drm_i915_file_private 
> *file_priv)
>  }
>  
>  /**
> - * __wait_seqno - wait until execution of seqno has finished
> - * @ring: the ring expected to report seqno
> - * @seqno: duh!
> + * __wait_request - wait until execution of request has finished
> + * @request: the request to wait upon
>   * @reset_counter: reset sequence associated with the given seqno
>   * @interruptible: do an interruptible wait (normally yes)
>   * @timeout: in - how long to wait (NULL forever); out - how much time 
> remaining
> @@ -1387,24 +1451,26 @@ static bool can_wait_boost(struct 
> drm_i915_file_private *file_priv)
>   * Returns 0 if the seqno was found within the alloted time. Else returns the
>   * errno with remaining time filled in timeout argument.
>   */
> -static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
> -                     unsigned reset_counter,
> -                     bool interruptible,
> -                     struct timespec *timeout,
> -                     struct drm_i915_file_private *file_priv)
> +static int __wait_request(struct i915_gem_request *rq,
> +                       unsigned reset_counter,
> +                       bool interruptible,
> +                       struct timespec *timeout,
> +                       struct drm_i915_file_private *file_priv)
>  {
> +     struct intel_engine_cs *ring = rq->ring;
>       struct drm_device *dev = ring->dev;
> -     struct drm_i915_private *dev_priv = dev->dev_private;
> +     struct drm_i915_private *dev_priv = to_i915(dev);
>       const bool irq_test_in_progress =
>               ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & 
> intel_ring_flag(ring);
>       struct timespec before, now;
>       DEFINE_WAIT(wait);
>       unsigned long timeout_expire;
> +     u32 seqno = rq->seqno;
>       int ret;
>  
>       WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
>  
> -     if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
> +     if (i915_request_complete(rq, true))
>               return 0;
>  
>       timeout_expire = timeout ? jiffies + 
> timespec_to_jiffies_timeout(timeout) : 0;
> @@ -1440,7 +1506,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, 
> u32 seqno,
>                       break;
>               }
>  
> -             if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
> +             if (i915_request_complete(rq, false)) {
>                       ret = 0;
>                       break;
>               }
> @@ -1494,46 +1560,30 @@ static int __wait_seqno(struct intel_engine_cs *ring, 
> u32 seqno,
>   * request and object lists appropriately for that event.
>   */
>  int
> -i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
> +i915_wait_request(struct i915_gem_request *rq)
>  {
> -     struct drm_device *dev = ring->dev;
> -     struct drm_i915_private *dev_priv = dev->dev_private;
> -     bool interruptible = dev_priv->mm.interruptible;
> +     struct drm_device *dev = rq->ring->dev;
> +     struct drm_i915_private *dev_priv = to_i915(dev);
>       int ret;
>  
> -     BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> -     BUG_ON(seqno == 0);
> +     if (WARN_ON(!mutex_is_locked(&dev->struct_mutex)))
> +             return -EINVAL;
> +
> +     if (i915_request_complete(rq, true))
> +             return 0;
>  
> -     ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
> +     ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> +                                dev_priv->mm.interruptible);
>       if (ret)
>               return ret;
>  
> -     ret = i915_gem_check_olr(ring, seqno);
> +     ret = i915_gem_check_olr(rq);
>       if (ret)
>               return ret;
>  
> -     return __wait_seqno(ring, seqno,
> -                         atomic_read(&dev_priv->gpu_error.reset_counter),
> -                         interruptible, NULL, NULL);
> -}
> -
> -static int
> -i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
> -                                  struct intel_engine_cs *ring)
> -{
> -     if (!obj->active)
> -             return 0;
> -
> -     /* Manually manage the write flush as we may have not yet
> -      * retired the buffer.
> -      *
> -      * Note that the last_write_seqno is always the earlier of
> -      * the two (read/write) seqno, so if we haved successfully waited,
> -      * we know we have passed the last write.
> -      */
> -     obj->last_write_seqno = 0;
> -
> -     return 0;
> +     return __wait_request(rq,
> +                           atomic_read(&dev_priv->gpu_error.reset_counter),
> +                           dev_priv->mm.interruptible, NULL, NULL);
>  }
>  
>  /**
> @@ -1544,19 +1594,37 @@ static __must_check int
>  i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>                              bool readonly)
>  {
> -     struct intel_engine_cs *ring = obj->ring;
> -     u32 seqno;
> -     int ret;
> +     int i, ret;
>  
> -     seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> -     if (seqno == 0)
> -             return 0;
> +     if (readonly) {
> +             if (obj->last_write.request == NULL)
> +                     return 0;
>  
> -     ret = i915_wait_seqno(ring, seqno);
> -     if (ret)
> -             return ret;
> +             ret = i915_wait_request(obj->last_write.request);
> +             if (ret)
> +                     return ret;
> +     } else {
> +             for (i = 0; i < I915_NUM_RINGS; i++) {
> +                     if (obj->last_read[i].request == NULL)
> +                             continue;
> +
> +                     ret = i915_wait_request(obj->last_read[i].request);
> +                     if (ret)
> +                             return ret;
> +             }
> +     }
>  
> -     return i915_gem_object_wait_rendering__tail(obj, ring);
> +     /* Manually manage the write flush as we may have not yet
> +      * retired the buffer.
> +      *
> +      * Note that the last_write_seqno is always the earlier of
> +      * the two (read/write) seqno, so if we haved successfully waited,
> +      * we know we have passed the last write.
> +      */
> +     if (obj->last_write.request)
> +             i915_gem_object_retire__write(obj);
> +
> +     return 0;
>  }
>  
>  /* A nonblocking variant of the above wait. This is a highly dangerous 
> routine
> @@ -1569,34 +1637,48 @@ i915_gem_object_wait_rendering__nonblocking(struct 
> drm_i915_gem_object *obj,
>  {
>       struct drm_device *dev = obj->base.dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
> -     struct intel_engine_cs *ring = obj->ring;
> +     struct i915_gem_request *rq[I915_NUM_RINGS] = {};
>       unsigned reset_counter;
> -     u32 seqno;
> -     int ret;
> +     int i, n, ret;
>  
>       BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>       BUG_ON(!dev_priv->mm.interruptible);
>  
> -     seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
> -     if (seqno == 0)
> +     n = 0;
> +     if (readonly) {
> +             if (obj->last_write.request)
> +                     rq[n++] = i915_request_get(obj->last_write.request);
> +     } else {
> +             for (i = 0; i < I915_NUM_RINGS; i++)
> +                     if (obj->last_read[i].request)
> +                             rq[n++] = 
> i915_request_get(obj->last_read[i].request);
> +     }
> +     if (n == 0)
>               return 0;
>  
>       ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
>       if (ret)
> -             return ret;
> +             goto out;
>  
> -     ret = i915_gem_check_olr(ring, seqno);
> -     if (ret)
> -             return ret;
> +     for (i = 0; i < n; i++) {
> +             ret = i915_gem_check_olr(rq[i]);
> +             if (ret)
> +                     goto out;
> +     }
>  
>       reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>       mutex_unlock(&dev->struct_mutex);
> -     ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
> +
> +     for (i = 0; ret == 0 && i < n; i++)
> +             ret = __wait_request(rq[i], reset_counter, true, NULL, 
> file_priv);
> +
>       mutex_lock(&dev->struct_mutex);
> -     if (ret)
> -             return ret;
>  
> -     return i915_gem_object_wait_rendering__tail(obj, ring);
> +out:
> +     for (i = 0; i < n; i++)
> +             i915_request_put(rq[i]);
> +
> +     return ret;
>  }
>  
>  /**
> @@ -2387,78 +2469,57 @@ i915_gem_object_get_pages(struct drm_i915_gem_object 
> *obj)
>       return 0;
>  }
>  
> -static void
> -i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> -                            struct intel_engine_cs *ring)
> -{
> -     u32 seqno = intel_ring_get_seqno(ring);
> -
> -     BUG_ON(ring == NULL);
> -     if (obj->ring != ring && obj->last_write_seqno) {
> -             /* Keep the seqno relative to the current ring */
> -             obj->last_write_seqno = seqno;
> -     }
> -     obj->ring = ring;
> -
> -     /* Add a reference if we're newly entering the active list. */
> -     if (!obj->active) {
> -             drm_gem_object_reference(&obj->base);
> -             obj->active = 1;
> -     }
> -
> -     list_move_tail(&obj->ring_list, &ring->active_list);
> -
> -     obj->last_read_seqno = seqno;
> -}
> -
>  void i915_vma_move_to_active(struct i915_vma *vma,
> -                          struct intel_engine_cs *ring)
> +                          struct intel_engine_cs *ring,
> +                          unsigned fenced)
>  {
> -     list_move_tail(&vma->mm_list, &vma->vm->active_list);
> -     return i915_gem_object_move_to_active(vma->obj, ring);
> -}
> +     struct drm_i915_gem_object *obj = vma->obj;
> +     struct i915_gem_request *rq = intel_ring_get_request(ring);
> +     u32 old_read = obj->base.read_domains;
> +     u32 old_write = obj->base.write_domain;
>  
> -static void
> -i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
> -{
> -     struct i915_vma *vma;
> +     BUG_ON(rq == NULL);
>  
> -     BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
> -     BUG_ON(!obj->active);
> +     obj->base.write_domain = obj->base.pending_write_domain;
> +     if (obj->base.write_domain == 0)
> +             obj->base.pending_read_domains |= obj->base.read_domains;
> +     obj->base.read_domains = obj->base.pending_read_domains;
>  
> -     list_for_each_entry(vma, &obj->vma_list, vma_link) {
> -             if (!list_empty(&vma->mm_list))
> -                     list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> -     }
> -
> -     intel_fb_obj_flush(obj, true);
> -
> -     list_del_init(&obj->ring_list);
> -     obj->ring = NULL;
> +     obj->base.pending_read_domains = 0;
> +     obj->base.pending_write_domain = 0;
>  
> -     obj->last_read_seqno = 0;
> -     obj->last_write_seqno = 0;
> -     obj->base.write_domain = 0;
> +     trace_i915_gem_object_change_domain(obj, old_read, old_write);
> +     if (obj->base.read_domains == 0)
> +             return;
>  
> -     obj->last_fenced_seqno = 0;
> +     /* Add a reference if we're newly entering the active list. */
> +     if (obj->last_read[ring->id].request == NULL && obj->active++ == 0)
> +             drm_gem_object_reference(&obj->base);
>  
> -     obj->active = 0;
> -     drm_gem_object_unreference(&obj->base);
> +     obj->last_read[ring->id].request = rq;
> +     list_move_tail(&obj->last_read[ring->id].ring_list, &ring->read_list);
>  
> -     WARN_ON(i915_verify_lists(dev));
> -}
> +     if (obj->base.write_domain) {
> +             obj->dirty = 1;
> +             obj->last_write.request = rq;
> +             list_move_tail(&obj->last_write.ring_list, &ring->write_list);
> +             intel_fb_obj_invalidate(obj, ring);
>  
> -static void
> -i915_gem_object_retire(struct drm_i915_gem_object *obj)
> -{
> -     struct intel_engine_cs *ring = obj->ring;
> +             /* update for the implicit flush after a batch */
> +             obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> +     }
>  
> -     if (ring == NULL)
> -             return;
> +     if (fenced) {
> +             obj->last_fence.request = rq;
> +             list_move_tail(&obj->last_fence.ring_list, &ring->fence_list);
> +             if (fenced & 2) {

Please use the #define here ...

> +                     struct drm_i915_private *dev_priv = to_i915(ring->dev);
> +                     
> list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> +                                     &dev_priv->mm.fence_list);
> +             }
> +     }
>  
> -     if (i915_seqno_passed(ring->get_seqno(ring, true),
> -                           obj->last_read_seqno))
> -             i915_gem_object_move_to_inactive(obj);
> +     list_move_tail(&vma->mm_list, &vma->vm->active_list);
>  }
>  
>  static int
> @@ -2533,11 +2594,10 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>  
>  int __i915_add_request(struct intel_engine_cs *ring,
>                      struct drm_file *file,
> -                    struct drm_i915_gem_object *obj,
> -                    u32 *out_seqno)
> +                    struct drm_i915_gem_object *obj)
>  {
>       struct drm_i915_private *dev_priv = ring->dev->dev_private;
> -     struct drm_i915_gem_request *request;
> +     struct i915_gem_request *rq;
>       u32 request_ring_position, request_start;
>       int ret;
>  
> @@ -2553,8 +2613,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>       if (ret)
>               return ret;
>  
> -     request = ring->preallocated_lazy_request;
> -     if (WARN_ON(request == NULL))
> +     rq = ring->preallocated_request;
> +     if (WARN_ON(rq == NULL))
>               return -ENOMEM;
>  
>       /* Record the position of the start of the request so that
> @@ -2568,10 +2628,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
>       if (ret)
>               return ret;
>  
> -     request->seqno = intel_ring_get_seqno(ring);
> -     request->ring = ring;
> -     request->head = request_start;
> -     request->tail = request_ring_position;
> +     rq->head = request_start;
> +     rq->tail = request_ring_position;
>  
>       /* Whilst this request exists, batch_obj will be on the
>        * active_list, and so will hold the active reference. Only when this
> @@ -2579,32 +2637,31 @@ int __i915_add_request(struct intel_engine_cs *ring,
>        * inactive_list and lose its active reference. Hence we do not need
>        * to explicitly hold another reference here.
>        */
> -     request->batch_obj = obj;
> +     rq->batch_obj = obj;
>  
>       /* Hold a reference to the current context so that we can inspect
>        * it later in case a hangcheck error event fires.
>        */
> -     request->ctx = ring->last_context;
> -     if (request->ctx)
> -             i915_gem_context_reference(request->ctx);
> +     rq->ctx = ring->last_context;
> +     if (rq->ctx)
> +             i915_gem_context_reference(rq->ctx);
>  
> -     request->emitted_jiffies = jiffies;
> -     list_add_tail(&request->list, &ring->request_list);
> -     request->file_priv = NULL;
> +     rq->emitted_jiffies = jiffies;
> +     list_add_tail(&rq->list, &ring->request_list);
> +     rq->file_priv = NULL;
>  
>       if (file) {
>               struct drm_i915_file_private *file_priv = file->driver_priv;
>  
>               spin_lock(&file_priv->mm.lock);
> -             request->file_priv = file_priv;
> -             list_add_tail(&request->client_list,
> +             rq->file_priv = file_priv;
> +             list_add_tail(&rq->client_list,
>                             &file_priv->mm.request_list);
>               spin_unlock(&file_priv->mm.lock);
>       }
>  
> -     trace_i915_gem_request_add(ring, request->seqno);
> -     ring->outstanding_lazy_seqno = 0;
> -     ring->preallocated_lazy_request = NULL;
> +     trace_i915_gem_request_add(ring, rq->seqno);
> +     ring->preallocated_request = NULL;
>  
>       if (!dev_priv->ums.mm_suspended) {
>               i915_queue_hangcheck(ring->dev);
> @@ -2616,22 +2673,20 @@ int __i915_add_request(struct intel_engine_cs *ring,
>               intel_mark_busy(dev_priv->dev);
>       }
>  
> -     if (out_seqno)
> -             *out_seqno = request->seqno;
>       return 0;
>  }
>  
>  static inline void
> -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
> +i915_gem_request_remove_from_client(struct i915_gem_request *rq)
>  {
> -     struct drm_i915_file_private *file_priv = request->file_priv;
> +     struct drm_i915_file_private *file_priv = rq->file_priv;
>  
>       if (!file_priv)
>               return;
>  
>       spin_lock(&file_priv->mm.lock);
> -     list_del(&request->client_list);
> -     request->file_priv = NULL;
> +     list_del(&rq->client_list);
> +     rq->file_priv = NULL;
>       spin_unlock(&file_priv->mm.lock);
>  }
>  
> @@ -2679,30 +2734,37 @@ static void i915_set_reset_status(struct 
> drm_i915_private *dev_priv,
>       }
>  }
>  
> -static void i915_gem_free_request(struct drm_i915_gem_request *request)
> +void __i915_request_free(struct kref *kref)
> +{
> +     struct i915_gem_request *rq = container_of(kref, struct 
> i915_gem_request, kref);
> +     kfree(rq);
> +}
> +
> +static void i915_request_retire(struct i915_gem_request *rq)
>  {
> -     list_del(&request->list);
> -     i915_gem_request_remove_from_client(request);
> +     rq->completed = true;
> +
> +     list_del(&rq->list);
> +     i915_gem_request_remove_from_client(rq);
>  
> -     if (request->ctx)
> -             i915_gem_context_unreference(request->ctx);
> +     if (rq->ctx) {
> +             i915_gem_context_unreference(rq->ctx);
> +             rq->ctx = NULL;
> +     }
>  
> -     kfree(request);
> +     i915_request_put(rq);
>  }
>  
> -struct drm_i915_gem_request *
> +struct i915_gem_request *
>  i915_gem_find_active_request(struct intel_engine_cs *ring)
>  {
> -     struct drm_i915_gem_request *request;
> -     u32 completed_seqno;
> +     struct i915_gem_request *rq;
>  
> -     completed_seqno = ring->get_seqno(ring, false);
> -
> -     list_for_each_entry(request, &ring->request_list, list) {
> -             if (i915_seqno_passed(completed_seqno, request->seqno))
> +     list_for_each_entry(rq, &ring->request_list, list) {
> +             if (i915_request_complete(rq, false))
>                       continue;
>  
> -             return request;
> +             return rq;
>       }
>  
>       return NULL;
> @@ -2711,33 +2773,53 @@ i915_gem_find_active_request(struct intel_engine_cs 
> *ring)
>  static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
>                                      struct intel_engine_cs *ring)
>  {
> -     struct drm_i915_gem_request *request;
> +     struct i915_gem_request *rq;
>       bool ring_hung;
>  
> -     request = i915_gem_find_active_request(ring);
> +     rq = i915_gem_find_active_request(ring);
>  
> -     if (request == NULL)
> +     if (rq == NULL)
>               return;
>  
>       ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
>  
> -     i915_set_reset_status(dev_priv, request->ctx, ring_hung);
> +     i915_set_reset_status(dev_priv, rq->ctx, ring_hung);
>  
> -     list_for_each_entry_continue(request, &ring->request_list, list)
> -             i915_set_reset_status(dev_priv, request->ctx, false);
> +     list_for_each_entry_continue(rq, &ring->request_list, list)
> +             i915_set_reset_status(dev_priv, rq->ctx, false);
>  }
>  
>  static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
>                                       struct intel_engine_cs *ring)
>  {
> -     while (!list_empty(&ring->active_list)) {
> +     while (!list_empty(&ring->write_list)) {
>               struct drm_i915_gem_object *obj;
>  
> -             obj = list_first_entry(&ring->active_list,
> +             obj = list_first_entry(&ring->write_list,
>                                      struct drm_i915_gem_object,
> -                                    ring_list);
> +                                    last_write.ring_list);
>  
> -             i915_gem_object_move_to_inactive(obj);
> +             i915_gem_object_retire__write(obj);
> +     }
> +
> +     while (!list_empty(&ring->fence_list)) {
> +             struct drm_i915_gem_object *obj;
> +
> +             obj = list_first_entry(&ring->fence_list,
> +                                    struct drm_i915_gem_object,
> +                                    last_fence.ring_list);
> +
> +             i915_gem_object_retire__fence(obj);
> +     }
> +
> +     while (!list_empty(&ring->read_list)) {
> +             struct drm_i915_gem_object *obj;
> +
> +             obj = list_first_entry(&ring->read_list,
> +                                    struct drm_i915_gem_object,
> +                                    last_read[ring->id].ring_list);
> +
> +             i915_gem_object_retire__read(obj, ring);
>       }
>  
>       /*
> @@ -2748,19 +2830,18 @@ static void i915_gem_reset_ring_cleanup(struct 
> drm_i915_private *dev_priv,
>        * the request.
>        */
>       while (!list_empty(&ring->request_list)) {
> -             struct drm_i915_gem_request *request;
> +             struct i915_gem_request *rq;
>  
> -             request = list_first_entry(&ring->request_list,
> -                                        struct drm_i915_gem_request,
> -                                        list);
> +             rq = list_first_entry(&ring->request_list,
> +                                   struct i915_gem_request,
> +                                   list);
>  
> -             i915_gem_free_request(request);
> +             i915_request_retire(rq);
>       }
>  
>       /* These may not have been flush before the reset, do so now */
> -     kfree(ring->preallocated_lazy_request);
> -     ring->preallocated_lazy_request = NULL;
> -     ring->outstanding_lazy_seqno = 0;
> +     kfree(ring->preallocated_request);
> +     ring->preallocated_request = NULL;
>  }
>  
>  void i915_gem_restore_fences(struct drm_device *dev)
> @@ -2825,43 +2906,71 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
> *ring)
>        * by the ringbuffer to the flushing/inactive lists as appropriate,
>        * before we free the context associated with the requests.
>        */
> -     while (!list_empty(&ring->active_list)) {
> +     while (!list_empty(&ring->write_list)) {
> +             struct drm_i915_gem_object *obj;
> +
> +             obj = list_first_entry(&ring->write_list,
> +                                    struct drm_i915_gem_object,
> +                                    last_write.ring_list);
> +
> +             if (!__i915_seqno_passed(seqno,
> +                                      obj->last_write.request->seqno))
> +                     break;
> +
> +             i915_gem_object_retire__write(obj);
> +     }
> +
> +     while (!list_empty(&ring->fence_list)) {
>               struct drm_i915_gem_object *obj;
>  
> -             obj = list_first_entry(&ring->active_list,
> -                                   struct drm_i915_gem_object,
> -                                   ring_list);
> +             obj = list_first_entry(&ring->fence_list,
> +                                    struct drm_i915_gem_object,
> +                                    last_fence.ring_list);
>  
> -             if (!i915_seqno_passed(seqno, obj->last_read_seqno))
> +             if (!__i915_seqno_passed(seqno,
> +                                      obj->last_fence.request->seqno))
>                       break;
>  
> -             i915_gem_object_move_to_inactive(obj);
> +             i915_gem_object_retire__fence(obj);
>       }
>  
> +     while (!list_empty(&ring->read_list)) {
> +             struct drm_i915_gem_object *obj;
> +
> +             obj = list_first_entry(&ring->read_list,
> +                                    struct drm_i915_gem_object,
> +                                    last_read[ring->id].ring_list);
> +
> +             if (!__i915_seqno_passed(seqno,
> +                                      
> obj->last_read[ring->id].request->seqno))
> +                     break;
> +
> +             i915_gem_object_retire__read(obj, ring);
> +     }
>  
>       while (!list_empty(&ring->request_list)) {
> -             struct drm_i915_gem_request *request;
> +             struct i915_gem_request *rq;
>  
> -             request = list_first_entry(&ring->request_list,
> -                                        struct drm_i915_gem_request,
> -                                        list);
> +             rq = list_first_entry(&ring->request_list,
> +                                   struct i915_gem_request,
> +                                   list);
>  
> -             if (!i915_seqno_passed(seqno, request->seqno))
> +             if (!__i915_seqno_passed(seqno, rq->seqno))
>                       break;
>  
> -             trace_i915_gem_request_retire(ring, request->seqno);
> +             trace_i915_gem_request_retire(ring, rq->seqno);
>               /* We know the GPU must have read the request to have
>                * sent us the seqno + interrupt, so use the position
>                * of tail of the request to update the last known position
>                * of the GPU head.
>                */
> -             ring->buffer->last_retired_head = request->tail;
> +             ring->buffer->last_retired_head = rq->tail;
>  
> -             i915_gem_free_request(request);
> +             i915_request_retire(rq);
>       }
>  
>       if (unlikely(ring->trace_irq_seqno &&
> -                  i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
> +                  __i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
>               ring->irq_put(ring);
>               ring->trace_irq_seqno = 0;
>       }
> @@ -2926,14 +3035,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  static int
>  i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>  {
> -     int ret;
> +     int i;
>  
> -     if (obj->active) {
> -             ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
> +     if (!obj->active)
> +             return 0;
> +
> +     for (i = 0; i < I915_NUM_RINGS; i++) {
> +             struct i915_gem_request *rq = obj->last_read[i].request;
> +             int ret;
> +
> +             if (rq == NULL)
> +                     continue;
> +
> +             ret = i915_gem_check_olr(rq);
>               if (ret)
>                       return ret;
>  
> -             i915_gem_retire_requests_ring(obj->ring);
> +             i915_gem_retire_requests_ring(rq->ring);
>       }
>  
>       return 0;
> @@ -2967,11 +3085,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *file)
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct drm_i915_gem_wait *args = data;
>       struct drm_i915_gem_object *obj;
> -     struct intel_engine_cs *ring = NULL;
>       struct timespec timeout_stack, *timeout = NULL;
> +     struct i915_gem_request *rq[I915_NUM_RINGS] = {};
>       unsigned reset_counter;
> -     u32 seqno = 0;
> -     int ret = 0;
> +     int i, n, ret = 0;
>  
>       if (args->timeout_ns >= 0) {
>               timeout_stack = ns_to_timespec(args->timeout_ns);
> @@ -2993,13 +3110,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *file)
>       if (ret)
>               goto out;
>  
> -     if (obj->active) {
> -             seqno = obj->last_read_seqno;
> -             ring = obj->ring;
> -     }
> -
> -     if (seqno == 0)
> -              goto out;
> +     if (!obj->active)
> +             goto out;
>  
>       /* Do this after OLR check to make sure we make forward progress polling
>        * on this IOCTL with a 0 timeout (like busy ioctl)
> @@ -3009,11 +3121,25 @@ i915_gem_wait_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *file)
>               goto out;
>       }
>  
> +     for (i = n = 0; i < I915_NUM_RINGS; i++) {
> +             if (obj->last_read[i].request == NULL)
> +                     continue;
> +
> +             rq[n++] = i915_request_get(obj->last_read[i].request);
> +     }
> +
>       drm_gem_object_unreference(&obj->base);
> +
>       reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>       mutex_unlock(&dev->struct_mutex);
>  
> -     ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, 
> file->driver_priv);
> +     for (i = 0; i < n; i++) {
> +             if (ret == 0)
> +                     ret = __wait_request(rq[i], reset_counter, true, 
> timeout, file->driver_priv);
> +
> +             i915_request_put(rq[i]);
> +     }
> +
>       if (timeout)
>               args->timeout_ns = timespec_to_ns(timeout);
>       return ret;
> @@ -3024,6 +3150,45 @@ out:
>       return ret;
>  }
>  
> +static int
> +i915_request_sync(struct i915_gem_request *rq,
> +               struct intel_engine_cs *to,
> +               struct drm_i915_gem_object *obj)
> +{
> +     int ret, idx;
> +
> +     if (to == NULL)
> +             return i915_wait_request(rq);
> +
> +     /* XXX this is broken by VEBOX+ */
> +     idx = intel_ring_sync_index(rq->ring, to);
> +
> +     /* Optimization: Avoid semaphore sync when we are sure we already
> +      * waited for an object with higher seqno */
> +     if (rq->seqno <= rq->ring->semaphore.sync_seqno[idx])
> +             return 0;
> +
> +     ret = i915_gem_check_olr(rq);
> +     if (ret)
> +             return ret;
> +
> +     if (!i915_request_complete(rq, true)) {
> +             trace_i915_gem_ring_sync_to(rq->ring, to, rq->seqno);
> +             ret = to->semaphore.sync_to(to, rq->ring, rq->seqno);
> +             if (ret)
> +                     return ret;
> +     }
> +
> +     /* We must recheck last_reqad_request because sync_to()
> +      * might have just caused seqno wrap under
> +      * the radar.
> +      */
> +     if (obj->last_read[rq->ring->id].request == rq)
> +             rq->ring->semaphore.sync_seqno[idx] = rq->seqno;
> +
> +     return 0;
> +}
> +
>  /**
>   * i915_gem_object_sync - sync an object to a ring.
>   *
> @@ -3038,44 +3203,35 @@ out:
>   */
>  int
>  i915_gem_object_sync(struct drm_i915_gem_object *obj,
> -                  struct intel_engine_cs *to)
> +                  struct intel_engine_cs *to,
> +                  bool readonly)
>  {
> -     struct intel_engine_cs *from = obj->ring;
> -     u32 seqno;
> -     int ret, idx;
> +     struct i915_gem_request *rq;
> +     struct intel_engine_cs *semaphore;
> +     int ret = 0, i;
>  
> -     if (from == NULL || to == from)
> -             return 0;
> +     semaphore = NULL;
> +     if (i915_semaphore_is_enabled(obj->base.dev))
> +             semaphore = to;
>  
> -     if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
> -             return i915_gem_object_wait_rendering(obj, false);
> -
> -     /* XXX this is broken by VEBOX+ */
> -     idx = intel_ring_sync_index(from, to);
> -
> -     seqno = obj->last_read_seqno;
> -     /* Optimization: Avoid semaphore sync when we are sure we already
> -      * waited for an object with higher seqno */
> -     if (seqno <= from->semaphore.sync_seqno[idx])
> -             return 0;
> -
> -     ret = 0;
> -     if (!i915_seqno_passed(from->get_seqno(from, true), seqno)) {
> -             ret = i915_gem_check_olr(from, seqno);
> -             if (ret)
> -                     return ret;
> +     if (readonly) {
> +             rq = obj->last_write.request;
> +             if (rq != NULL && to != rq->ring)
> +                     ret = i915_request_sync(rq, semaphore, obj);
> +     } else {
> +             for (i = 0; i < I915_NUM_RINGS; i++) {
> +                     rq = obj->last_read[i].request;
> +                     if (rq == NULL || to == rq->ring)
> +                             continue;
>  
> -             trace_i915_gem_ring_sync_to(from, to, seqno);
> -             ret = to->semaphore.sync_to(to, from, seqno);
> +                     ret = i915_request_sync(rq, semaphore, obj);
> +                     if (ret)
> +                             break;
> +             }
>       }
> -     if (!ret)
> -             /* We use last_read_seqno because sync_to()
> -              * might have just caused seqno wrap under
> -              * the radar.
> -              */
> -             from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
>  
>       return ret;
> +
>  }
>  
>  static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
> @@ -3381,14 +3537,16 @@ static void i915_gem_object_update_fence(struct 
> drm_i915_gem_object *obj,
>  static int
>  i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
>  {
> -     if (obj->last_fenced_seqno) {
> -             int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
> -             if (ret)
> -                     return ret;
> +     int ret;
>  
> -             obj->last_fenced_seqno = 0;
> -     }
> +     if (obj->last_fence.request == NULL)
> +             return 0;
>  
> +     ret = i915_wait_request(obj->last_fence.request);
> +     if (ret)
> +             return ret;
> +
> +     i915_gem_object_retire__fence(obj);
>       return 0;
>  }
>  
> @@ -3836,11 +3994,12 @@ int
>  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool 
> write)
>  {
>       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +     struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
>       uint32_t old_write_domain, old_read_domains;
>       int ret;
>  
>       /* Not valid to be called on unbound objects. */
> -     if (!i915_gem_obj_bound_any(obj))
> +     if (vma == NULL)
>               return -EINVAL;
>  
>       if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> @@ -3882,14 +4041,8 @@ i915_gem_object_set_to_gtt_domain(struct 
> drm_i915_gem_object *obj, bool write)
>                                           old_write_domain);
>  
>       /* And bump the LRU for this access */
> -     if (!obj->active) {
> -             struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
> -             if (vma)
> -                     list_move_tail(&vma->mm_list,
> -                                    &dev_priv->gtt.base.inactive_list);
> -
> -     }
> -
> +     list_move_tail(&vma->mm_list,
> +                    &dev_priv->gtt.base.inactive_list);

We've lost the obj->active check here and I didn't spot anything that
would justify that.

>       return 0;
>  }
>  
> @@ -4087,11 +4240,9 @@ i915_gem_object_pin_to_display_plane(struct 
> drm_i915_gem_object *obj,
>       bool was_pin_display;
>       int ret;
>  
> -     if (pipelined != obj->ring) {
> -             ret = i915_gem_object_sync(obj, pipelined);
> -             if (ret)
> -                     return ret;
> -     }
> +     ret = i915_gem_object_sync(obj, pipelined, true);
> +     if (ret)
> +             return ret;
>  
>       /* Mark the pin_display early so that we account for the
>        * display coherency whilst setting up the cache domains.
> @@ -4239,10 +4390,8 @@ i915_gem_ring_throttle(struct drm_device *dev, struct 
> drm_file *file)
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct drm_i915_file_private *file_priv = file->driver_priv;
>       unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
> -     struct drm_i915_gem_request *request;
> -     struct intel_engine_cs *ring = NULL;
> +     struct i915_gem_request *rq;
>       unsigned reset_counter;
> -     u32 seqno = 0;
>       int ret;
>  
>       ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
> @@ -4254,23 +4403,22 @@ i915_gem_ring_throttle(struct drm_device *dev, struct 
> drm_file *file)
>               return ret;
>  
>       spin_lock(&file_priv->mm.lock);
> -     list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
> -             if (time_after_eq(request->emitted_jiffies, recent_enough))
> +     list_for_each_entry(rq, &file_priv->mm.request_list, client_list) {
> +             if (time_after_eq(rq->emitted_jiffies, recent_enough))
>                       break;
> -
> -             ring = request->ring;
> -             seqno = request->seqno;
>       }
> +     rq = i915_request_get(&rq->client_list == &file_priv->mm.request_list ? 
> NULL : rq);
>       reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
>       spin_unlock(&file_priv->mm.lock);
>  
> -     if (seqno == 0)
> +     if (rq == NULL)
>               return 0;
>  
> -     ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
> +     ret = __wait_request(rq, reset_counter, true, NULL, NULL);
>       if (ret == 0)
>               queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
>  
> +     i915_request_put(rq);
>       return ret;
>  }
>  
> @@ -4488,7 +4636,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  {
>       struct drm_i915_gem_busy *args = data;
>       struct drm_i915_gem_object *obj;
> -     int ret;
> +     int ret, i;
>  
>       ret = i915_mutex_lock_interruptible(dev);
>       if (ret)
> @@ -4507,10 +4655,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void 
> *data,
>        */
>       ret = i915_gem_object_flush_active(obj);
>  
> -     args->busy = obj->active;
> -     if (obj->ring) {
> +     args->busy = 0;
> +     if (obj->active) {
>               BUILD_BUG_ON(I915_NUM_RINGS > 16);

Hm, this suggests we should size active to be 4 bits. Just to stay
consistent.

> -             args->busy |= intel_ring_flag(obj->ring) << 16;
> +             args->busy |= 1;
> +             for (i = 0; i < I915_NUM_RINGS; i++)  {
> +                     if (obj->last_read[i].request == NULL)
> +                             continue;
> +
> +                     args->busy |= 1 << (16 + i);
> +             }
>       }
>  
>       drm_gem_object_unreference(&obj->base);
> @@ -4584,8 +4738,13 @@ unlock:
>  void i915_gem_object_init(struct drm_i915_gem_object *obj,
>                         const struct drm_i915_gem_object_ops *ops)
>  {
> +     int i;
> +
>       INIT_LIST_HEAD(&obj->global_list);
> -     INIT_LIST_HEAD(&obj->ring_list);
> +     INIT_LIST_HEAD(&obj->last_fence.ring_list);
> +     INIT_LIST_HEAD(&obj->last_write.ring_list);
> +     for (i = 0; i < I915_NUM_RINGS; i++)
> +             INIT_LIST_HEAD(&obj->last_read[i].ring_list);
>       INIT_LIST_HEAD(&obj->obj_exec_link);
>       INIT_LIST_HEAD(&obj->vma_list);
>  
> @@ -5117,7 +5276,9 @@ i915_gem_lastclose(struct drm_device *dev)
>  static void
>  init_ring_lists(struct intel_engine_cs *ring)
>  {
> -     INIT_LIST_HEAD(&ring->active_list);
> +     INIT_LIST_HEAD(&ring->read_list);
> +     INIT_LIST_HEAD(&ring->write_list);
> +     INIT_LIST_HEAD(&ring->fence_list);
>       INIT_LIST_HEAD(&ring->request_list);
>  }
>  
> @@ -5213,13 +5374,13 @@ void i915_gem_release(struct drm_device *dev, struct 
> drm_file *file)
>        */
>       spin_lock(&file_priv->mm.lock);
>       while (!list_empty(&file_priv->mm.request_list)) {
> -             struct drm_i915_gem_request *request;
> +             struct i915_gem_request *rq;
>  
> -             request = list_first_entry(&file_priv->mm.request_list,
> -                                        struct drm_i915_gem_request,
> -                                        client_list);
> -             list_del(&request->client_list);
> -             request->file_priv = NULL;
> +             rq = list_first_entry(&file_priv->mm.request_list,
> +                                   struct i915_gem_request,
> +                                   client_list);
> +             list_del(&rq->client_list);
> +             rq->file_priv = NULL;
>       }
>       spin_unlock(&file_priv->mm.lock);
>  }
> @@ -5503,15 +5664,27 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct 
> drm_i915_gem_object *obj)
>  {
>       struct i915_vma *vma;
>  
> -     /* This WARN has probably outlived its usefulness (callers already
> -      * WARN if they don't find the GGTT vma they expect). When removing,
> -      * remember to remove the pre-check in is_pin_display() as well */
> -     if (WARN_ON(list_empty(&obj->vma_list)))
> -             return NULL;
> -

Smells like a separate patch. Maybe do it up-front if taking it out is too
invasive.

>       vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
>       if (vma->vm != obj_to_ggtt(obj))
>               return NULL;
>  
>       return vma;
>  }
> +
> +struct i915_gem_request *i915_gem_object_last_read(struct 
> drm_i915_gem_object *obj)

This one needs a big warning that it's only suitable as a hint for error
state and debugfs. If execbuf gets stuck in the slowpath we might end up
with slightly out-of-order reads (since now they don't sync cross-engine
any more).

> +{
> +     u32 seqno = 0;
> +     struct i915_gem_request *rq = NULL;
> +     int i;
> +
> +     /* This is approximate as seqno cannot be used across rings */
> +     for (i = 0; i < I915_NUM_RINGS; i++) {
> +             if (obj->last_read[i].request == NULL)
> +                     continue;
> +
> +             if (__i915_seqno_passed(obj->last_read[i].request->seqno, 
> seqno))
> +                     rq = obj->last_read[i].request, seqno = rq->seqno;
> +     }
> +
> +     return rq;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index 79dc77b..690e2dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -394,13 +394,9 @@ void i915_gem_context_reset(struct drm_device *dev)
>               if (!lctx)
>                       continue;
>  
> -             if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
> +             if (dctx->legacy_hw_ctx.rcs_state && i == RCS)
>                       
> WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
>                                                     
> get_context_alignment(dev), 0));
> -                     /* Fake a finish/inactive */
> -                     dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
> -                     dctx->legacy_hw_ctx.rcs_state->active = 0;
> -             }

Again taste like a separate patch for up-front merging.

>  
>               if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
>                       
> i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
> @@ -467,7 +463,6 @@ void i915_gem_context_fini(struct drm_device *dev)
>               WARN_ON(!dev_priv->ring[RCS].last_context);
>               if (dev_priv->ring[RCS].last_context == dctx) {
>                       /* Fake switch to NULL context */
> -                     WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
>                       
> i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
>                       i915_gem_context_unreference(dctx);
>                       dev_priv->ring[RCS].last_context = NULL;
> @@ -741,8 +736,11 @@ static int do_switch(struct intel_engine_cs *ring,
>        * MI_SET_CONTEXT instead of when the next seqno has completed.
>        */
>       if (from != NULL) {
> -             from->legacy_hw_ctx.rcs_state->base.read_domains = 
> I915_GEM_DOMAIN_INSTRUCTION;
> -             
> i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), 
> ring);
> +             struct drm_i915_gem_object *from_obj = 
> from->legacy_hw_ctx.rcs_state;
> +
> +             from_obj->base.pending_read_domains = 
> I915_GEM_DOMAIN_INSTRUCTION;
> +             i915_vma_move_to_active(i915_gem_obj_to_ggtt(from_obj), ring, 
> 0);
> +
>               /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
>                * whole damn pipeline, we don't need to explicitly mark the
>                * object dirty. The only exception is that the context must be
> @@ -750,11 +748,10 @@ static int do_switch(struct intel_engine_cs *ring,
>                * able to defer doing this until we know the object would be
>                * swapped, but there is no way to do that yet.
>                */
> -             from->legacy_hw_ctx.rcs_state->dirty = 1;
> -             BUG_ON(from->legacy_hw_ctx.rcs_state->ring != ring);
> +             from_obj->dirty = 1;
>  
>               /* obj is kept alive until the next request by its active ref */
> -             i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
> +             i915_gem_object_ggtt_unpin(from_obj);
>               i915_gem_context_unreference(from);
>       }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c 
> b/drivers/gpu/drm/i915/i915_gem_exec.c
> index 57d4dde..787ea6f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_exec.c
> +++ b/drivers/gpu/drm/i915/i915_gem_exec.c
> @@ -45,7 +45,7 @@ static int i915_gem_exec_flush_object(struct 
> drm_i915_gem_object *obj,
>  {
>       int ret;
>  
> -     ret = i915_gem_object_sync(obj, ring);
> +     ret = i915_gem_object_sync(obj, ring, false);
>       if (ret)
>               return ret;
>  
> @@ -65,11 +65,9 @@ static int i915_gem_exec_flush_object(struct 
> drm_i915_gem_object *obj,
>  static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
>                                      struct intel_engine_cs *ring)
>  {
> -     obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
> -     obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
> -     i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring);
> -     obj->last_write_seqno = intel_ring_get_seqno(ring);
> -     obj->dirty = 1;

Would be nice to split out the semantic change of moving dirty = 1 into
move_to_active.

> +     obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
> +     obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
> +     i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring, 0);
>  
>       ring->gpu_caches_dirty = true;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0faab01..8f1c2a2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -847,7 +847,8 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs 
> *ring,
>  
>       list_for_each_entry(vma, vmas, exec_list) {
>               struct drm_i915_gem_object *obj = vma->obj;
> -             ret = i915_gem_object_sync(obj, ring);
> +
> +             ret = i915_gem_object_sync(obj, ring, 
> obj->base.pending_write_domain == 0);
>               if (ret)
>                       return ret;
>  
> @@ -956,40 +957,20 @@ static void
>  i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>                                  struct intel_engine_cs *ring)
>  {
> -     u32 seqno = intel_ring_get_seqno(ring);
>       struct i915_vma *vma;
>  
>       list_for_each_entry(vma, vmas, exec_list) {
>               struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -             struct drm_i915_gem_object *obj = vma->obj;
> -             u32 old_read = obj->base.read_domains;
> -             u32 old_write = obj->base.write_domain;
> -
> -             obj->base.write_domain = obj->base.pending_write_domain;
> -             if (obj->base.write_domain == 0)
> -                     obj->base.pending_read_domains |= 
> obj->base.read_domains;
> -             obj->base.read_domains = obj->base.pending_read_domains;
> -
> -             i915_vma_move_to_active(vma, ring);
> -             if (obj->base.write_domain) {
> -                     obj->dirty = 1;
> -                     obj->last_write_seqno = seqno;
> +             unsigned fenced;
>  
> -                     intel_fb_obj_invalidate(obj, ring);
> -
> -                     /* update for the implicit flush after a batch */
> -                     obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> -             }
> +             fenced = 0;
>               if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> -                     obj->last_fenced_seqno = seqno;
> -                     if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> -                             struct drm_i915_private *dev_priv = 
> to_i915(ring->dev);
> -                             
> list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> -                                            &dev_priv->mm.fence_list);
> -                     }
> +                     fenced |= VMA_IS_FENCED;
> +                     if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
> +                             fenced |= VMA_HAS_FENCE;
>               }
>  
> -             trace_i915_gem_object_change_domain(obj, old_read, old_write);
> +             i915_vma_move_to_active(vma, ring, fenced);
>       }
>  }
>  
> @@ -1003,7 +984,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device 
> *dev,
>       ring->gpu_caches_dirty = true;
>  
>       /* Add a breadcrumb for the completion of the batch buffer */
> -     (void)__i915_add_request(ring, file, obj, NULL);
> +     (void)__i915_add_request(ring, file, obj);
>  }
>  
>  static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
> b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index e60be3f..fc1223c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -159,9 +159,10 @@ int i915_gem_render_state_init(struct intel_engine_cs 
> *ring)
>       if (ret)
>               goto out;
>  
> -     i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
> +     so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
> +     i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring, 0);
>  
> -     ret = __i915_add_request(ring, NULL, so.obj, NULL);
> +     ret = __i915_add_request(ring, NULL, so.obj);
>       /* __i915_add_request moves object to inactive if it fails */
>  out:
>       render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c 
> b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index af5d31a..e46fb34 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -326,7 +326,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object 
> *obj,
>  
>       if (ret == 0) {
>               obj->fence_dirty =
> -                     obj->last_fenced_seqno ||
> +                     obj->last_fence.request ||
>                       obj->fence_reg != I915_FENCE_REG_NONE;
>               obj->tiling_mode = tiling_mode;
>               obj->stride = stride;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index ebc8529..584b863 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -572,7 +572,7 @@ i915_error_object_create(struct drm_i915_private 
> *dev_priv,
>       if (i915_gem_obj_bound(src, vm))
>               dst->gtt_offset = i915_gem_obj_offset(src, vm);
>       else
> -             dst->gtt_offset = -1UL;
> +             dst->gtt_offset = -1;

Spurious change?

>  
>       reloc_offset = dst->gtt_offset;
>       use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> @@ -653,11 +653,12 @@ static void capture_bo(struct drm_i915_error_buffer 
> *err,
>                      struct i915_vma *vma)
>  {
>       struct drm_i915_gem_object *obj = vma->obj;
> +     struct i915_gem_request *rq = i915_gem_object_last_read(obj);
>  
>       err->size = obj->base.size;
>       err->name = obj->base.name;
> -     err->rseqno = obj->last_read_seqno;
> -     err->wseqno = obj->last_write_seqno;
> +     err->rseqno = i915_request_seqno(rq);
> +     err->wseqno = i915_request_seqno(obj->last_write.request);
>       err->gtt_offset = vma->node.start;
>       err->read_domains = obj->base.read_domains;
>       err->write_domain = obj->base.write_domain;
> @@ -671,7 +672,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>       err->dirty = obj->dirty;
>       err->purgeable = obj->madv != I915_MADV_WILLNEED;
>       err->userptr = obj->userptr.mm != NULL;
> -     err->ring = obj->ring ? obj->ring->id : -1;
> +     err->ring = i915_request_ring_id(rq);
>       err->cache_level = obj->cache_level;
>  }
>  
> @@ -963,7 +964,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>                                 struct drm_i915_error_state *error)
>  {
>       struct drm_i915_private *dev_priv = dev->dev_private;
> -     struct drm_i915_gem_request *request;
> +     struct i915_gem_request *rq;
>       int i, count;
>  
>       for (i = 0; i < I915_NUM_RINGS; i++) {
> @@ -978,17 +979,17 @@ static void i915_gem_record_rings(struct drm_device 
> *dev,
>  
>               i915_record_ring_state(dev, error, ring, &error->ring[i]);
>  
> -             request = i915_gem_find_active_request(ring);
> -             if (request) {
> +             rq = i915_gem_find_active_request(ring);

This reminds me that our locking for the error state capture and also the
guilty batch determination is fairly ... nonexistent. This will be a fun
problem to fix once we make reset more common with per-engine resets and
short-lived timers for media workloads. Anyway, unrelated comment.

> +             if (rq) {
>                       /* We need to copy these to an anonymous buffer
>                        * as the simplest method to avoid being overwritten
>                        * by userspace.
>                        */
>                       error->ring[i].batchbuffer =
>                               i915_error_object_create(dev_priv,
> -                                                      request->batch_obj,
> -                                                      request->ctx ?
> -                                                      request->ctx->vm :
> +                                                      rq->batch_obj,
> +                                                      rq->ctx ?
> +                                                      rq->ctx->vm :
>                                                        &dev_priv->gtt.base);
>  
>                       if (HAS_BROKEN_CS_TLB(dev_priv))
> @@ -996,11 +997,11 @@ static void i915_gem_record_rings(struct drm_device 
> *dev,
>                                       i915_error_ggtt_object_create(dev_priv,
>                                                            ring->scratch.obj);
>  
> -                     if (request->file_priv) {
> +                     if (rq->file_priv) {
>                               struct task_struct *task;
>  
>                               rcu_read_lock();
> -                             task = pid_task(request->file_priv->file->pid,
> +                             task = pid_task(rq->file_priv->file->pid,
>                                               PIDTYPE_PID);
>                               if (task) {
>                                       strcpy(error->ring[i].comm, task->comm);
> @@ -1019,7 +1020,7 @@ static void i915_gem_record_rings(struct drm_device 
> *dev,
>               i915_gem_record_active_context(ring, error, &error->ring[i]);
>  
>               count = 0;
> -             list_for_each_entry(request, &ring->request_list, list)
> +             list_for_each_entry(rq, &ring->request_list, list)
>                       count++;
>  
>               error->ring[i].num_requests = count;
> @@ -1032,13 +1033,13 @@ static void i915_gem_record_rings(struct drm_device 
> *dev,
>               }
>  
>               count = 0;
> -             list_for_each_entry(request, &ring->request_list, list) {
> +             list_for_each_entry(rq, &ring->request_list, list) {
>                       struct drm_i915_error_request *erq;
>  
>                       erq = &error->ring[i].requests[count++];
> -                     erq->seqno = request->seqno;
> -                     erq->jiffies = request->emitted_jiffies;
> -                     erq->tail = request->tail;
> +                     erq->seqno = rq->seqno;
> +                     erq->jiffies = rq->emitted_jiffies;
> +                     erq->tail = rq->tail;
>               }
>       }
>  }
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 717c111..6d4f5a7 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2935,14 +2935,14 @@ static u32
>  ring_last_seqno(struct intel_engine_cs *ring)
>  {
>       return list_entry(ring->request_list.prev,
> -                       struct drm_i915_gem_request, list)->seqno;
> +                       struct i915_gem_request, list)->seqno;
>  }
>  
>  static bool
>  ring_idle(struct intel_engine_cs *ring, u32 seqno)
>  {
>       return (list_empty(&ring->request_list) ||
> -             i915_seqno_passed(seqno, ring_last_seqno(ring)));
> +             __i915_seqno_passed(seqno, ring_last_seqno(ring)));
>  }
>  
>  static bool
> @@ -3057,7 +3057,7 @@ static int semaphore_passed(struct intel_engine_cs 
> *ring)
>       if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
>               return -1;
>  
> -     if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
> +     if (__i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
>               return 1;
>  
>       /* cursory check for an unkickable deadlock */
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index 75f423d..f1c2a28 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -17,16 +17,16 @@ static bool gpu_active(struct drm_i915_private *i915)
>       int i;
>  
>       for_each_ring(ring, i915, i) {
> -             struct drm_i915_gem_request *rq;
> +             struct i915_gem_request *rq;
>  
>               if (list_empty(&ring->request_list))
>                       continue;
>  
>               rq = list_last_entry(&ring->request_list,
> -                                  struct drm_i915_gem_request,
> +                                  struct i915_gem_request,
>                                    list);
>  
> -             if (i915_seqno_passed(ring->get_seqno(ring, true), rq->seqno))
> +             if (i915_request_complete(rq, true))
>                       continue;
>  
>               return true;
> diff --git a/drivers/gpu/drm/i915/i915_trace.h 
> b/drivers/gpu/drm/i915/i915_trace.h
> index 63f6875..0ebd85d 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -389,7 +389,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
>           TP_fast_assign(
>                          __entry->dev = ring->dev->primary->index;
>                          __entry->ring = ring->id;
> -                        __entry->seqno = intel_ring_get_seqno(ring),
> +                        __entry->seqno = intel_ring_get_request(ring)->seqno,
>                          __entry->flags = flags;
>                          i915_trace_irq_get(ring, __entry->seqno);
>                          ),
> diff --git a/drivers/gpu/drm/i915/intel_display.c 
> b/drivers/gpu/drm/i915/intel_display.c
> index d828f47..9b7931c 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9167,6 +9167,7 @@ static void intel_unpin_work_fn(struct work_struct 
> *__work)
>       BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
>       atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
>  
> +     i915_request_put(work->flip_queued_request);
>       kfree(work);
>  }
>  
> @@ -9548,7 +9549,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
>       else if (i915.use_mmio_flip > 0)
>               return true;
>       else
> -             return ring != obj->ring;
> +             return ring != i915_request_ring(obj->last_write.request);
>  }
>  
>  static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
> @@ -9581,25 +9582,22 @@ static void intel_do_mmio_flip(struct intel_crtc 
> *intel_crtc)
>  
>  static int intel_postpone_flip(struct drm_i915_gem_object *obj)
>  {
> -     struct intel_engine_cs *ring;
> +     struct i915_gem_request *rq = obj->last_write.request;
>       int ret;
>  
>       lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -     if (!obj->last_write_seqno)
> -             return 0;
> -
> -     ring = obj->ring;
> -
> -     if (i915_seqno_passed(ring->get_seqno(ring, true),
> -                           obj->last_write_seqno))
> +     if (rq == NULL)
>               return 0;
>  
> -     ret = i915_gem_check_olr(ring, obj->last_write_seqno);
> +     ret = i915_gem_check_olr(rq);
>       if (ret)
>               return ret;
>  
> -     if (WARN_ON(!ring->irq_get(ring)))
> +     if (i915_request_complete(rq, true))
> +             return 0;
> +
> +     if (WARN_ON(!rq->ring->irq_get(rq->ring)))
>               return 0;
>  
>       return 1;
> @@ -9625,7 +9623,7 @@ void intel_notify_mmio_flip(struct intel_engine_cs 
> *ring)
>               if (ring->id != mmio_flip->ring_id)
>                       continue;
>  
> -             if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
> +             if (__i915_seqno_passed(seqno, mmio_flip->seqno)) {
>                       intel_do_mmio_flip(intel_crtc);
>                       mmio_flip->seqno = 0;
>                       ring->irq_put(ring);
> @@ -9643,6 +9641,7 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
>  {
>       struct drm_i915_private *dev_priv = dev->dev_private;
>       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> +     struct i915_gem_request *rq;
>       unsigned long irq_flags;
>       int ret;
>  
> @@ -9657,16 +9656,20 @@ static int intel_queue_mmio_flip(struct drm_device 
> *dev,
>               return 0;
>       }
>  
> +     rq = obj->last_write.request;
> +     if (WARN_ON(rq == NULL))
> +             return 0;
> +
>       spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
> -     intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
> -     intel_crtc->mmio_flip.ring_id = obj->ring->id;
> +     intel_crtc->mmio_flip.seqno = rq->seqno;
> +     intel_crtc->mmio_flip.ring_id = rq->ring->id;
>       spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
>  
>       /*
>        * Double check to catch cases where irq fired before
>        * mmio flip data was ready
>        */
> -     intel_notify_mmio_flip(obj->ring);
> +     intel_notify_mmio_flip(rq->ring);
>       return 0;
>  }
>  
> @@ -9695,9 +9698,8 @@ static bool __intel_pageflip_stall_check(struct 
> drm_device *dev,
>               return false;
>  
>       if (work->flip_ready_vblank == 0) {
> -             if (work->ring &&
> -                 !i915_seqno_passed(work->ring->get_seqno(work->ring, true),
> -                                   work->flip_queued_seqno))
> +             struct i915_gem_request *rq = work->flip_queued_request;
> +             if (rq && !i915_request_complete(rq, true))
>                       return false;
>  
>               work->flip_ready_vblank = drm_vblank_count(dev, 
> intel_crtc->pipe);
> @@ -9758,6 +9760,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>       enum pipe pipe = intel_crtc->pipe;
>       struct intel_unpin_work *work;
>       struct intel_engine_cs *ring;
> +     struct i915_gem_request *rq;
>       unsigned long flags;
>       int ret;
>  
> @@ -9856,7 +9859,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>       } else if (IS_IVYBRIDGE(dev)) {
>               ring = &dev_priv->ring[BCS];
>       } else if (INTEL_INFO(dev)->gen >= 7) {
> -             ring = obj->ring;
> +             ring = i915_request_ring(obj->last_write.request);
>               if (ring == NULL || ring->id != RCS)
>                       ring = &dev_priv->ring[BCS];
>       } else {
> @@ -9864,7 +9867,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>       }
>  
>       if (use_mmio_flip(ring, obj, page_flip_flags)) {
> -             ret = intel_pin_and_fence_fb_obj(dev, obj, obj->ring);
> +             ret = intel_pin_and_fence_fb_obj(dev, obj, 
> i915_request_ring(obj->last_write.request));
>               if (ret)
>                       goto cleanup_pending;
>  
> @@ -9876,8 +9879,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>               if (ret)
>                       goto cleanup_unpin;
>  
> -             work->flip_queued_seqno = obj->last_write_seqno;
> -             work->ring = obj->ring;
> +             rq = obj->last_write.request;
>       } else {
>               ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
>               if (ret)
> @@ -9891,10 +9893,10 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>               if (ret)
>                       goto cleanup_unpin;
>  
> -             work->flip_queued_seqno = intel_ring_get_seqno(ring);
> -             work->ring = ring;
> +             rq = intel_ring_get_request(ring);
>       }
>  
> +     work->flip_queued_request = i915_request_get(rq);
>       work->flip_queued_vblank = drm_vblank_count(dev, intel_crtc->pipe);
>       work->enable_stall_check = true;
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> b/drivers/gpu/drm/i915/intel_drv.h
> index 274f77c..5f336a3 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -657,14 +657,13 @@ struct intel_unpin_work {
>       struct drm_i915_gem_object *old_fb_obj;
>       struct drm_i915_gem_object *pending_flip_obj;
>       struct drm_pending_vblank_event *event;
> -     struct intel_engine_cs *ring;
>       atomic_t pending;
>  #define INTEL_FLIP_INACTIVE  0
>  #define INTEL_FLIP_PENDING   1
>  #define INTEL_FLIP_COMPLETE  2
>       u32 flip_count;
>       u32 gtt_offset;
> -     u32 flip_queued_seqno;
> +     struct i915_gem_request *flip_queued_request;
>       int flip_queued_vblank;
>       int flip_ready_vblank;
>       bool enable_stall_check;
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c 
> b/drivers/gpu/drm/i915/intel_overlay.c
> index d94af27..c709ca5 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -183,7 +183,7 @@ struct intel_overlay {
>       u32 flip_addr;
>       struct drm_i915_gem_object *reg_bo;
>       /* flip handling */
> -     uint32_t last_flip_req;
> +     struct i915_gem_request *flip_request;
>       void (*flip_tail)(struct intel_overlay *);
>  };
>  
> @@ -209,29 +209,49 @@ static void intel_overlay_unmap_regs(struct 
> intel_overlay *overlay,
>               io_mapping_unmap(regs);
>  }
>  
> -static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> -                                      void (*tail)(struct intel_overlay *))
> +/* recover from an interruption due to a signal
> + * We have to be careful not to repeat work forever an make forward progess. 
> */
> +static int intel_overlay_recover_from_interrupt(struct intel_overlay 
> *overlay)
>  {
> -     struct drm_device *dev = overlay->dev;
> -     struct drm_i915_private *dev_priv = dev->dev_private;
> -     struct intel_engine_cs *ring = &dev_priv->ring[RCS];
>       int ret;
>  
> -     BUG_ON(overlay->last_flip_req);
> -     ret = i915_add_request(ring, &overlay->last_flip_req);
> -     if (ret)
> -             return ret;
> +     if (overlay->flip_request == NULL)
> +             return 0;
>  
> -     overlay->flip_tail = tail;
> -     ret = i915_wait_seqno(ring, overlay->last_flip_req);
> +     ret = i915_wait_request(overlay->flip_request);
>       if (ret)
>               return ret;
> -     i915_gem_retire_requests(dev);
>  
> -     overlay->last_flip_req = 0;
> +     i915_request_put(overlay->flip_request);
> +     overlay->flip_request = NULL;
> +
> +     i915_gem_retire_requests(overlay->dev);
> +
> +     if (overlay->flip_tail)
> +             overlay->flip_tail(overlay);
> +
>       return 0;
>  }
>  
> +static int intel_overlay_add_request(struct intel_overlay *overlay,
> +                                  struct intel_engine_cs *ring,
> +                                  void (*tail)(struct intel_overlay *))
> +{
> +     BUG_ON(overlay->flip_request);
> +     overlay->flip_request = i915_request_get(intel_ring_get_request(ring));
> +     overlay->flip_tail = tail;
> +
> +     return i915_add_request(ring);
> +}
> +
> +static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
> +                                      struct intel_engine_cs *ring,
> +                                      void (*tail)(struct intel_overlay *))
> +{
> +     intel_overlay_add_request(overlay, ring, tail);
> +     return intel_overlay_recover_from_interrupt(overlay);
> +}
> +
>  /* overlay needs to be disable in OCMD reg */
>  static int intel_overlay_on(struct intel_overlay *overlay)
>  {
> @@ -253,9 +273,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
>       intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
>       intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
>       intel_ring_emit(ring, MI_NOOP);
> -     intel_ring_advance(ring);
> +     __intel_ring_advance(ring);
>  
> -     return intel_overlay_do_wait_request(overlay, NULL);
> +     return intel_overlay_do_wait_request(overlay, ring, NULL);
>  }
>  
>  /* overlay needs to be enabled in OCMD reg */
> @@ -285,15 +305,18 @@ static int intel_overlay_continue(struct intel_overlay 
> *overlay,
>  
>       intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
>       intel_ring_emit(ring, flip_addr);
> -     intel_ring_advance(ring);
> +     __intel_ring_advance(ring);
>  
> -     return i915_add_request(ring, &overlay->last_flip_req);
> +     return intel_overlay_add_request(overlay, ring, NULL);
>  }
>  
>  static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
>  {
>       struct drm_i915_gem_object *obj = overlay->old_vid_bo;
>  
> +     i915_gem_track_fb(obj, NULL,
> +                       INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> +
>       i915_gem_object_ggtt_unpin(obj);
>       drm_gem_object_unreference(&obj->base);
>  
> @@ -353,33 +376,9 @@ static int intel_overlay_off(struct intel_overlay 
> *overlay)
>               intel_ring_emit(ring, flip_addr);
>               intel_ring_emit(ring, MI_WAIT_FOR_EVENT | 
> MI_WAIT_FOR_OVERLAY_FLIP);
>       }
> -     intel_ring_advance(ring);
> -
> -     return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
> -}
> -
> -/* recover from an interruption due to a signal
> - * We have to be careful not to repeat work forever an make forward progess. 
> */
> -static int intel_overlay_recover_from_interrupt(struct intel_overlay 
> *overlay)
> -{
> -     struct drm_device *dev = overlay->dev;
> -     struct drm_i915_private *dev_priv = dev->dev_private;
> -     struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> -     int ret;
> -
> -     if (overlay->last_flip_req == 0)
> -             return 0;
> +     __intel_ring_advance(ring);
>  
> -     ret = i915_wait_seqno(ring, overlay->last_flip_req);
> -     if (ret)
> -             return ret;
> -     i915_gem_retire_requests(dev);
> -
> -     if (overlay->flip_tail)
> -             overlay->flip_tail(overlay);
> -
> -     overlay->last_flip_req = 0;
> -     return 0;
> +     return intel_overlay_do_wait_request(overlay, ring, 
> intel_overlay_off_tail);
>  }
>  
>  /* Wait for pending overlay flip and release old frame.
> @@ -388,10 +387,8 @@ static int intel_overlay_recover_from_interrupt(struct 
> intel_overlay *overlay)
>   */
>  static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
>  {
> -     struct drm_device *dev = overlay->dev;
> -     struct drm_i915_private *dev_priv = dev->dev_private;
> -     struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> -     int ret;
> +     struct drm_i915_private *dev_priv = to_i915(overlay->dev);
> +     int ret = 0;
>  
>       /* Only wait if there is actually an old frame to release to
>        * guarantee forward progress.
> @@ -400,6 +397,8 @@ static int intel_overlay_release_old_vid(struct 
> intel_overlay *overlay)
>               return 0;
>  
>       if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
> +             struct intel_engine_cs *ring = &dev_priv->ring[RCS];
> +
>               /* synchronous slowpath */
>               ret = intel_ring_begin(ring, 2);
>               if (ret)
> @@ -407,20 +406,14 @@ static int intel_overlay_release_old_vid(struct 
> intel_overlay *overlay)
>  
>               intel_ring_emit(ring, MI_WAIT_FOR_EVENT | 
> MI_WAIT_FOR_OVERLAY_FLIP);
>               intel_ring_emit(ring, MI_NOOP);
> -             intel_ring_advance(ring);
> +             __intel_ring_advance(ring);
>  
> -             ret = intel_overlay_do_wait_request(overlay,
> +             ret = intel_overlay_do_wait_request(overlay, ring,
>                                                   
> intel_overlay_release_old_vid_tail);
> -             if (ret)
> -                     return ret;
> -     }
> -
> -     intel_overlay_release_old_vid_tail(overlay);
> +     } else
> +             intel_overlay_release_old_vid_tail(overlay);
>  
> -
> -     i915_gem_track_fb(overlay->old_vid_bo, NULL,
> -                       INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
> -     return 0;
> +     return ret;
>  }
>  
>  struct put_image_params {
> @@ -827,12 +820,7 @@ int intel_overlay_switch_off(struct intel_overlay 
> *overlay)
>       iowrite32(0, &regs->OCMD);
>       intel_overlay_unmap_regs(overlay, regs);
>  
> -     ret = intel_overlay_off(overlay);
> -     if (ret != 0)
> -             return ret;
> -
> -     intel_overlay_off_tail(overlay);
> -     return 0;
> +     return intel_overlay_off(overlay);
>  }
>  
>  static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 7c5a6c5..ae96de5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -726,7 +726,7 @@ static int gen8_rcs_signal(struct intel_engine_cs 
> *signaller,
>                                          PIPE_CONTROL_FLUSH_ENABLE);
>               intel_ring_emit(signaller, lower_32_bits(gtt_offset));
>               intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -             intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +             intel_ring_emit(signaller, 
> signaller->preallocated_request->seqno);
>               intel_ring_emit(signaller, 0);
>               intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>                                          MI_SEMAPHORE_TARGET(waiter->id));
> @@ -763,7 +763,7 @@ static int gen8_xcs_signal(struct intel_engine_cs 
> *signaller,
>               intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
>                                          MI_FLUSH_DW_USE_GTT);
>               intel_ring_emit(signaller, upper_32_bits(gtt_offset));
> -             intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
> +             intel_ring_emit(signaller, 
> signaller->preallocated_request->seqno);
>               intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
>                                          MI_SEMAPHORE_TARGET(waiter->id));
>               intel_ring_emit(signaller, 0);
> @@ -797,7 +797,7 @@ static int gen6_signal(struct intel_engine_cs *signaller,
>               if (mbox_reg != GEN6_NOSYNC) {
>                       intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
>                       intel_ring_emit(signaller, mbox_reg);
> -                     intel_ring_emit(signaller, 
> signaller->outstanding_lazy_seqno);
> +                     intel_ring_emit(signaller, 
> signaller->preallocated_request->seqno);
>               }
>       }
>  
> @@ -832,7 +832,7 @@ gen6_add_request(struct intel_engine_cs *ring)
>  
>       intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>       intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -     intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +     intel_ring_emit(ring, ring->preallocated_request->seqno);
>       intel_ring_emit(ring, MI_USER_INTERRUPT);
>       __intel_ring_advance(ring);
>  
> @@ -950,7 +950,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>                       PIPE_CONTROL_WRITE_FLUSH |
>                       PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
>       intel_ring_emit(ring, ring->scratch.gtt_offset | 
> PIPE_CONTROL_GLOBAL_GTT);
> -     intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +     intel_ring_emit(ring, ring->preallocated_request->seqno);
>       intel_ring_emit(ring, 0);
>       PIPE_CONTROL_FLUSH(ring, scratch_addr);
>       scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
> @@ -969,7 +969,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
>                       PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
>                       PIPE_CONTROL_NOTIFY);
>       intel_ring_emit(ring, ring->scratch.gtt_offset | 
> PIPE_CONTROL_GLOBAL_GTT);
> -     intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +     intel_ring_emit(ring, ring->preallocated_request->seqno);
>       intel_ring_emit(ring, 0);
>       __intel_ring_advance(ring);
>  
> @@ -1224,7 +1224,7 @@ i9xx_add_request(struct intel_engine_cs *ring)
>  
>       intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
>       intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
> -     intel_ring_emit(ring, ring->outstanding_lazy_seqno);
> +     intel_ring_emit(ring, ring->preallocated_request->seqno);
>       intel_ring_emit(ring, MI_USER_INTERRUPT);
>       __intel_ring_advance(ring);
>  
> @@ -1602,7 +1602,8 @@ static int intel_init_ring_buffer(struct drm_device 
> *dev,
>       }
>  
>       ring->dev = dev;
> -     INIT_LIST_HEAD(&ring->active_list);
> +     INIT_LIST_HEAD(&ring->read_list);
> +     INIT_LIST_HEAD(&ring->write_list);
>       INIT_LIST_HEAD(&ring->request_list);
>       ringbuf->size = 32 * PAGE_SIZE;
>       memset(ring->semaphore.sync_seqno, 0, 
> sizeof(ring->semaphore.sync_seqno));
> @@ -1662,8 +1663,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs 
> *ring)
>       WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
>  
>       intel_destroy_ringbuffer_obj(ringbuf);
> -     ring->preallocated_lazy_request = NULL;
> -     ring->outstanding_lazy_seqno = 0;
> +     ring->preallocated_request = NULL;
>  
>       if (ring->cleanup)
>               ring->cleanup(ring);
> @@ -1679,8 +1679,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs 
> *ring)
>  static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
>  {
>       struct intel_ringbuffer *ringbuf = ring->buffer;
> -     struct drm_i915_gem_request *request;
> -     u32 seqno = 0;
> +     struct i915_gem_request *rq;
>       int ret;
>  
>       if (ringbuf->last_retired_head != -1) {
> @@ -1692,17 +1691,15 @@ static int intel_ring_wait_request(struct 
> intel_engine_cs *ring, int n)
>                       return 0;
>       }
>  
> -     list_for_each_entry(request, &ring->request_list, list) {
> -             if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) 
> >= n) {
> -                     seqno = request->seqno;
> +     list_for_each_entry(rq, &ring->request_list, list) {
> +             if (__ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= n)
>                       break;
> -             }
>       }
>  
> -     if (seqno == 0)
> +     if (rq == list_entry(&ring->request_list, typeof(*rq), list))
>               return -ENOSPC;
>  
> -     ret = i915_wait_seqno(ring, seqno);
> +     ret = i915_wait_request(rq);
>       if (ret)
>               return ret;
>  
> @@ -1803,12 +1800,11 @@ static int intel_wrap_ring_buffer(struct 
> intel_engine_cs *ring)
>  
>  int intel_ring_idle(struct intel_engine_cs *ring)
>  {
> -     u32 seqno;
>       int ret;
>  
>       /* We need to add any requests required to flush the objects and ring */
> -     if (ring->outstanding_lazy_seqno) {
> -             ret = i915_add_request(ring, NULL);
> +     if (ring->preallocated_request) {
> +             ret = i915_add_request(ring);
>               if (ret)
>                       return ret;
>       }
> @@ -1817,30 +1813,36 @@ int intel_ring_idle(struct intel_engine_cs *ring)
>       if (list_empty(&ring->request_list))
>               return 0;
>  
> -     seqno = list_entry(ring->request_list.prev,
> -                        struct drm_i915_gem_request,
> -                        list)->seqno;
> -
> -     return i915_wait_seqno(ring, seqno);
> +     return i915_wait_request(container_of(ring->request_list.prev,
> +                                           struct i915_gem_request,
> +                                           list));
>  }
>  
>  static int
> -intel_ring_alloc_seqno(struct intel_engine_cs *ring)
> +intel_ring_alloc_request(struct intel_engine_cs *ring)
>  {
> -     if (ring->outstanding_lazy_seqno)
> -             return 0;
> +     struct i915_gem_request *rq;
> +     int ret;
>  
> -     if (ring->preallocated_lazy_request == NULL) {
> -             struct drm_i915_gem_request *request;
> +     if (ring->preallocated_request)
> +             return 0;
>  
> -             request = kmalloc(sizeof(*request), GFP_KERNEL);
> -             if (request == NULL)
> -                     return -ENOMEM;
> +     rq = kmalloc(sizeof(*rq), GFP_KERNEL);
> +     if (rq == NULL)
> +             return -ENOMEM;
>  
> -             ring->preallocated_lazy_request = request;
> +     ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
> +     if (ret) {
> +             kfree(rq);
> +             return ret;
>       }
>  
> -     return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
> +     kref_init(&rq->kref);
> +     rq->ring = ring;
> +     rq->completed = false;
> +
> +     ring->preallocated_request = rq;
> +     return 0;
>  }
>  
>  static int __intel_ring_prepare(struct intel_engine_cs *ring,
> @@ -1876,7 +1878,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
>               return ret;
>  
>       /* Preallocate the olr before touching the ring, */
> -     ret = intel_ring_alloc_seqno(ring);
> +     ret = intel_ring_alloc_request(ring);
>       if (ret)
>               return ret;
>  
> @@ -1886,7 +1888,7 @@ int intel_ring_begin(struct intel_engine_cs *ring,
>               return ret;
>  
>       /* but we may flush the seqno during prepare. */
> -     ret = intel_ring_alloc_seqno(ring);
> +     ret = intel_ring_alloc_request(ring);
>       if (ret)
>               return ret;
>  
> @@ -1921,7 +1923,7 @@ void intel_ring_init_seqno(struct intel_engine_cs 
> *ring, u32 seqno)
>       struct drm_device *dev = ring->dev;
>       struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -     BUG_ON(ring->outstanding_lazy_seqno);
> +     BUG_ON(ring->preallocated_request);
>  
>       if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
>               I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
> @@ -2300,7 +2302,8 @@ int intel_render_ring_init_dri(struct drm_device *dev, 
> u64 start, u32 size)
>       ring->cleanup = render_ring_cleanup;
>  
>       ring->dev = dev;
> -     INIT_LIST_HEAD(&ring->active_list);
> +     INIT_LIST_HEAD(&ring->read_list);
> +     INIT_LIST_HEAD(&ring->write_list);
>       INIT_LIST_HEAD(&ring->request_list);
>  
>       ringbuf->size = size;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
> b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index dcd2e44..2a78051 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -222,7 +222,7 @@ struct  intel_engine_cs {
>        *
>        * A reference is held on the buffer while on this list.
>        */
> -     struct list_head active_list;
> +     struct list_head read_list, write_list, fence_list;
>  
>       /**
>        * List of breadcrumbs associated with GPU requests currently
> @@ -233,8 +233,7 @@ struct  intel_engine_cs {
>       /**
>        * Do we have some not yet emitted requests outstanding?
>        */
> -     struct drm_i915_gem_request *preallocated_lazy_request;
> -     u32 outstanding_lazy_seqno;
> +     struct i915_gem_request *preallocated_request;
>       bool gpu_caches_dirty;
>       bool fbc_dirty;
>  
> @@ -393,10 +392,10 @@ static inline u32 intel_ring_get_tail(struct 
> intel_ringbuffer *ringbuf)
>       return ringbuf->tail;
>  }
>  
> -static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
> +static inline struct i915_gem_request *intel_ring_get_request(struct 
> intel_engine_cs *ring)
>  {
> -     BUG_ON(ring->outstanding_lazy_seqno == 0);
> -     return ring->outstanding_lazy_seqno;
> +     BUG_ON(ring->preallocated_request == 0);
> +     return ring->preallocated_request;
>  }
>  
>  static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 
> seqno)
> -- 
> 1.9.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to