RE: [Intel-gfx] [PATCH] i915: add page flipping ioctl

Zou, Nanhai Thu, 19 Feb 2009 23:28:03 -0800

>-----Original Message-----
>From: intel-gfx-boun...@lists.freedesktop.org
>[mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf Of Jesse Barnes
>Sent: 2009年2月20日 8:48
>To: dri-devel@lists.sourceforge.net
>Cc: intel-...@lists.freedesktop.org
>Subject: Re: [Intel-gfx] [PATCH] i915: add page flipping ioctl
>
>On Thursday 19 February 2009 16:43:54 Jesse Barnes wrote:
>> On Thursday 19 February 2009 11:37:01 Chris Wilson wrote:
>> > With a few additional suggestions by Jesse, I've managed to get
>> > tear-free compositing working on i915. Here's the diff on top of the
>> > original patch (though obviously this is just a suggestion, still need
>> > to prevent multiple pending flips to the same plane and ensure that the
>> > old buffer is eventually unpinned, and you might choose to drop the
>> > mutex around the wait_for_vblank ;-):
>>
>> Yeah, looks pretty reasonable.  Here's what I've been working with.  It
>> adds a couple of more changes (and slightly different cleanups) over your
>> version: - added a seqno to wait for
>>   - wait for flip before queuing another
>> but it's missing thew 915 changes you added (btw did I get the pitch wrong?
>> or are you submitting a different value for your objects?).
>>
>> I added a new seqno since it's possible (even likely) that the next vblank
>> won't actually reflect the flip, if the GPU is busy processing a large
>> batchbuffer when the ring command goes in for example.
>>
>> Also it might be a good idea to wait for any previous flip before queuing a
>> new one.  Anyway still tracking down some issues with the X side, but it
>> seems like it's approaching readiness.
>
>Oh you want to actually see the code?
>



Hi Jesse,
        See below some comments.
>diff --git a/drivers/gpu/drm/i915/i915_dma.c
>b/drivers/gpu/drm/i915/i915_dma.c
>index cbb8224..4e4fb61 100644
>--- a/drivers/gpu/drm/i915/i915_dma.c
>+++ b/drivers/gpu/drm/i915/i915_dma.c
>@@ -829,6 +829,152 @@ static int i915_set_status_page(struct drm_device *dev,
>void *data,
>       return 0;
> }
>
>+static int i915_pipe_to_plane(struct drm_device *dev, int pipe)
>+{
>+      drm_i915_private_t *dev_priv = dev->dev_private;
>+      u32 reg, pipe_mask;
>+
>+      if (pipe == 0)
>+              pipe_mask = DISPPLANE_SEL_PIPE_A;
>+      else
>+              pipe_mask = DISPPLANE_SEL_PIPE_B;
>+
>+      pipe_mask |= DISPLAY_PLANE_ENABLE;
>+
>+      reg = I915_READ(DSPACNTR);
>+      if ((reg & (DISPLAY_PLANE_ENABLE | DISPPLANE_SEL_PIPE_MASK)) ==
>+          pipe_mask)
>+              return 0;
>+
>+      reg = I915_READ(DSPBCNTR);
>+      if ((reg & (DISPLAY_PLANE_ENABLE | DISPPLANE_SEL_PIPE_MASK)) ==
>+          pipe_mask)
>+              return 1;
>+
>+      return -1;
>+}
>+
>+bool
>+i915_gem_flip_pending(struct drm_gem_object *obj)
>+{
>+      struct drm_device *dev = obj->dev;
>+      drm_i915_private_t *dev_priv = dev->dev_private;
>+      struct drm_i915_gem_object *obj_priv = obj->driver_private;
>+      unsigned long flags;
>+      bool pending = false;
>+
>+      spin_lock_irqsave(&dev_priv->vblank_lock, flags);
>+      if (!list_empty(&obj_priv->vblank_head))
>+              pending = true;
>+      spin_unlock_irqrestore(&dev_priv->vblank_lock, flags);
>+
>+      return pending;
>+}
>+
>+static int i915_gem_page_flip(struct drm_device *dev, void *data,
>+                            struct drm_file *file_priv)
>+{
>+      struct drm_i915_gem_page_flip *flip_data = data;
>+      drm_i915_private_t *dev_priv = dev->dev_private;
>+      struct drm_gem_object *obj;
>+      struct drm_i915_gem_object *obj_priv;
>+      unsigned long flags;
>+      uint32_t offset;
>+      int pitch, pipe, plane, tiled;
>+      int ret = 0;
>+      RING_LOCALS;
>+
>+      if (!(dev->driver->driver_features & DRIVER_GEM))
>+              return -ENODEV;
>+
>+      /*
>+       * Reject unknown flags so future userspace knows what we (don't)
>+       * support
>+       */
>+      if (flip_data->flags & (~I915_PAGE_FLIP_WAIT)) {
>+              DRM_ERROR("bad page flip flags\n");
>+              return -EINVAL;
>+      }
>+
>+      if ((pipe = flip_data->pipe) > 1) {
>+              DRM_ERROR("bad pipe\n");
>+              return -EINVAL;
>+      }
        pipe should be unsigned int, if it is int, this check should be if 
(pipe > 1 || pipe < 0)

>+
>+      plane = i915_pipe_to_plane(dev, pipe);
>+      if (plane < 0) {
>+              DRM_ERROR("bad pipe (no planes enabled?)\n");
>+              return -EINVAL;
>+      }
>+
>+      obj = drm_gem_object_lookup(dev, file_priv, flip_data->handle);
>+      if (obj == NULL)
>+              return -EBADF;
>+
>+      if (i915_gem_flip_pending(obj))
>+              wait_for_completion(&obj_priv->vblank);
>+
>+      mutex_lock(&dev->struct_mutex);
>+
>+      obj_priv = obj->driver_private;
>+      if (obj_priv->tiling_mode != I915_TILING_NONE &&
>+          obj_priv->tiling_mode != I915_TILING_X) {
>+              DRM_ERROR("can only flip non-tiled or X tiled pages\n");
>+              ret = -EINVAL;
>+              goto out_unref;
>+      }
>+
>+      ret = i915_gem_object_pin(obj, 0);
>+      if (ret) {
>+              DRM_ERROR("failed to pin object for flip\n");
>+              ret = -EBUSY;
>+              goto out_unref;
>+      }
>+      mutex_unlock(&dev->struct_mutex);
>+
>+      /*
>+       * Put the object in the GTT domain before the flip,
>+       * since there may be outstanding rendering
>+       */
>+      i915_gem_object_set_to_gtt_domain(obj, 0);
>+
>+      offset = obj_priv->gtt_offset;
>+      pitch = obj_priv->stride;
>+      tiled = !!(obj_priv->tiling_mode == I915_TILING_X);
>+
>+      /*
>+       * Queue the flip with the lock held in case the flip happens
>+       * immediately.
>+       */
>+      spin_lock_irqsave(&dev_priv->vblank_lock, flags);
>+
>+      BEGIN_LP_RING(4);
  
     This could be sleep in atomic because BEGIN_LP_RING may sleep.

>+      OUT_RING(CMD_OP_DISPLAYBUFFER_INFO | (plane << 20));
>+      OUT_RING((pitch / 8) << 3); /* flip queue and/or pitch */
>+      OUT_RING(offset | tiled);
>+      OUT_RING(((flip_data->x - 1) << 16) | (flip_data->y - 1));
>+      ADVANCE_LP_RING();
>+
>+      list_add_tail(&obj_priv->vblank_head,
>&dev_priv->mm.vblank_list[pipe]);
>+
>+      obj_priv->flip_seqno = i915_add_request(dev, 0);
>+
>+      drm_vblank_get(dev, pipe);
>+
>+      spin_unlock_irqrestore(&dev_priv->vblank_lock, flags);
>+
>+      if (flip_data->flags & I915_PAGE_FLIP_WAIT)
>+              wait_for_completion(&obj_priv->vblank);
>+
>+      /* Take lock again for unref */
>+      mutex_lock(&dev->struct_mutex);
>+out_unref:
>+      drm_gem_object_unreference(obj);
>+      mutex_unlock(&dev->struct_mutex);
>+
>+      return ret;
>+}
>+
> /**
>  * i915_probe_agp - get AGP bootup configuration
>  * @pdev: PCI device
>@@ -1300,6 +1446,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
>       DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0),
>       DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
>       DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl,
>0),
>+      DRM_IOCTL_DEF(DRM_I915_GEM_PAGE_FLIP, i915_gem_page_flip,
>DRM_AUTH|DRM_MASTER),
> };
>
> int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
>diff --git a/drivers/gpu/drm/i915/i915_drv.h
>b/drivers/gpu/drm/i915/i915_drv.h
>index 7ed981c..c75735d 100644
>--- a/drivers/gpu/drm/i915/i915_drv.h
>+++ b/drivers/gpu/drm/i915/i915_drv.h
>@@ -161,6 +161,9 @@ typedef struct drm_i915_private {
>       u32 irq_mask_reg;
>       u32 pipestat[2];
>
>+      /** Protects vblank list */
>+      spinlock_t vblank_lock;
>+
>       u32 hotplug_supported_mask;
>       struct work_struct hotplug_work;
>
>@@ -343,6 +346,11 @@ typedef struct drm_i915_private {
>                */
>               struct delayed_work retire_work;
>
>+              /**
>+               * List of objects waiting on vblank events (one per pipe)
>+               */
>+              struct list_head vblank_list[2];
>+
>               uint32_t next_gem_seqno;
>
>               /**
>@@ -394,6 +402,11 @@ struct drm_i915_gem_object {
>       /** This object's place on the active/flushing/inactive lists */
>       struct list_head list;
>
>+      /** Object's place on the vblank list */
>+      struct list_head vblank_head;
>+      /** sequence number for flip (when it passes the flip is done) */
>+      int flip_seqno;
>+
>       /**
>        * This is set if the object is on the active or flushing lists
>        * (has pending rendering), and is not set if it's on inactive (ready
>@@ -462,6 +475,9 @@ struct drm_i915_gem_object {
>
>       /** for phy allocated objects */
>       struct drm_i915_gem_phys_object *phys_obj;
>+
>+      /** for page flips and other vblank related blocks */
>+      struct completion vblank;
> };
>
> /**
>@@ -521,6 +537,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned
>int cmd,
> extern int i915_emit_box(struct drm_device *dev,
>                        struct drm_clip_rect __user *boxes,
>                        int i, int DR1, int DR4);
>+extern bool i915_gem_flip_pending(struct drm_gem_object *obj);
>
> /* i915_irq.c */
> extern int i915_irq_emit(struct drm_device *dev, void *data,
>@@ -629,6 +646,9 @@ int i915_gem_attach_phys_object(struct drm_device *dev,
> void i915_gem_detach_phys_object(struct drm_device *dev,
>                                struct drm_gem_object *obj);
> void i915_gem_free_all_phys_object(struct drm_device *dev);
>+uint32_t i915_add_request(struct drm_device *dev, uint32_t flush_domains);
>+int i915_seqno_passed(uint32_t seq1, uint32_t seq2);
>+uint32_t i915_get_gem_seqno(struct drm_device *dev);
>
> /* i915_gem_tiling.c */
> void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
>diff --git a/drivers/gpu/drm/i915/i915_gem.c
>b/drivers/gpu/drm/i915/i915_gem.c
>index da9a2cb..a2f2a9d 100644
>--- a/drivers/gpu/drm/i915/i915_gem.c
>+++ b/drivers/gpu/drm/i915/i915_gem.c
>@@ -917,7 +917,7 @@ i915_gem_object_move_to_inactive(struct drm_gem_object
>*obj)
>  *
>  * Returned sequence numbers are nonzero on success.
>  */
>-static uint32_t
>+uint32_t
> i915_add_request(struct drm_device *dev, uint32_t flush_domains)
> {
>       drm_i915_private_t *dev_priv = dev->dev_private;
>@@ -1045,7 +1045,7 @@ i915_gem_retire_request(struct drm_device *dev,
> /**
>  * Returns true if seq1 is later than seq2.
>  */
>-static int
>+int
> i915_seqno_passed(uint32_t seq1, uint32_t seq2)
> {
>       return (int32_t)(seq1 - seq2) >= 0;
>@@ -2547,6 +2547,11 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
>       for (pin_tries = 0; ; pin_tries++) {
>               ret = 0;
>               for (i = 0; i < args->buffer_count; i++) {
>+                      struct drm_i915_gem_object *obj_priv;
>+
>+                      obj_priv = object_list[i]->driver_private;
>+                      if (i915_gem_flip_pending(object_list[i]))
>+                              wait_for_completion(&obj_priv->vblank);
        I am wondering if this will block other clients execbuffer to run thus 
hurt their performance because struct_mutex is hold.

>                       object_list[i]->pending_read_domains = 0;
>                       object_list[i]->pending_write_domain = 0;
>                       ret = i915_gem_object_pin_and_relocate(object_list[i],
>@@ -2924,6 +2929,9 @@ int i915_gem_init_object(struct drm_gem_object *obj)
>       obj_priv->obj = obj;
>       obj_priv->fence_reg = I915_FENCE_REG_NONE;
>       INIT_LIST_HEAD(&obj_priv->list);
>+      INIT_LIST_HEAD(&obj_priv->vblank_head);
>+
>+      init_completion(&obj_priv->vblank);
>
>       return 0;
> }
>@@ -3358,9 +3366,12 @@ i915_gem_load(struct drm_device *dev)
>       INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
>       INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
>       INIT_LIST_HEAD(&dev_priv->mm.request_list);
>+      INIT_LIST_HEAD(&dev_priv->mm.vblank_list[0]);
>+      INIT_LIST_HEAD(&dev_priv->mm.vblank_list[1]);
>       INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
>                         i915_gem_retire_work_handler);
>       dev_priv->mm.next_gem_seqno = 1;
>+      spin_lock_init(&dev_priv->vblank_lock);
>
>       /* Old X drivers will take 0-2 for front, back, depth buffers */
>       dev_priv->fence_reg_start = 3;
>diff --git a/drivers/gpu/drm/i915/i915_irq.c
>b/drivers/gpu/drm/i915/i915_irq.c
>index 019140e..66cd33d 100644
>--- a/drivers/gpu/drm/i915/i915_irq.c
>+++ b/drivers/gpu/drm/i915/i915_irq.c
>@@ -196,6 +196,25 @@ static void i915_hotplug_work_func(struct work_struct
>*work)
>       drm_sysfs_hotplug_event(dev);
> }
>
>+static void i915_complete_vblank(struct drm_device *dev, int pipe)
>+{
>+      drm_i915_private_t *dev_priv = dev->dev_private;
>+      struct drm_i915_gem_object *obj_priv, *tmp;
>+      unsigned long irqflags;
>+
>+      spin_lock_irqsave(&dev_priv->vblank_lock, irqflags);
>+      list_for_each_entry_safe(obj_priv, tmp,
>&dev_priv->mm.vblank_list[pipe],
>+                               vblank_head) {
>+              int cur_seqno = i915_get_gem_seqno(dev);
>+              if (i915_seqno_passed(cur_seqno, obj_priv->flip_seqno)) {
>+                      complete(&obj_priv->vblank);
>+                      list_del_init(&obj_priv->vblank_head);
>+                      drm_vblank_put(dev, pipe);
>+              }
>+      }
>+      spin_unlock_irqrestore(&dev_priv->vblank_lock, irqflags);
>+}
>+
> irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
> {
>       struct drm_device *dev = (struct drm_device *) arg;
>@@ -284,11 +303,13 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
>
>               if (pipea_stats & vblank_status) {
>                       vblank++;
>+                      i915_complete_vblank(dev, 0);
>                       drm_handle_vblank(dev, 0);
>               }
>
>               if (pipeb_stats & vblank_status) {
>                       vblank++;
>+                      i915_complete_vblank(dev, 1);
>                       drm_handle_vblank(dev, 1);
>               }
>
>diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
>index 912cd52..0678e47 100644
>--- a/include/drm/i915_drm.h
>+++ b/include/drm/i915_drm.h
>@@ -184,6 +184,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_I915_GEM_GET_TILING       0x22
> #define DRM_I915_GEM_GET_APERTURE 0x23
> #define DRM_I915_GEM_MMAP_GTT 0x24
>+#define DRM_I915_GEM_PAGE_FLIP        0x25
>
> #define DRM_IOCTL_I915_INIT           DRM_IOW( DRM_COMMAND_BASE +
>DRM_I915_INIT, drm_i915_init_t)
> #define DRM_IOCTL_I915_FLUSH          DRM_IO ( DRM_COMMAND_BASE +
>DRM_I915_FLUSH)
>@@ -219,6 +220,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE +
>DRM_I915_GEM_SET_TILING, struct
>drm_i915_gem_set_tiling)
> #define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE +
>DRM_I915_GEM_GET_TILING, struct
>drm_i915_gem_get_tiling)
> #define DRM_IOCTL_I915_GEM_GET_APERTURE       DRM_IOR  (DRM_COMMAND_BASE +
>DRM_I915_GEM_GET_APERTURE, struct
>drm_i915_gem_get_aperture)
>+#define DRM_IOCTL_I915_GEM_PAGE_FLIP  DRM_IOW (DRM_COMMAND_BASE +
>DRM_I915_GEM_PAGE_FLIP, struct
>drm_i915_gem_page_flip)
>
> /* Allow drivers to submit batchbuffers directly to hardware, relying
>  * on the security mechanisms provided by hardware.
>@@ -654,4 +656,27 @@ struct drm_i915_gem_get_aperture {
>       uint64_t aper_available_size;
> };
>
>+#define I915_PAGE_FLIP_WAIT (1<<0) /* block on page flip completion */
>+
>+struct drm_i915_gem_page_flip {
>+      /** Handle of new front buffer */

     Should this be handle of new front buffer or handle of the execbuf?

>+      uint32_t handle;
>+
>+      /**
>+       * page flip flags (wait on flip only for now)
>+       */
>+      uint32_t flags;
>+
>+      /**
>+       * pipe to flip
>+       */
>+      uint32_t pipe;
>+
>+      /**
>+       * screen dimensions for flip
>+       */
>+      uint32_t x;
>+      uint32_t y;
>+};
>+
> #endif                                /* _I915_DRM_H_ */
>
>_______________________________________________
>Intel-gfx mailing list
>intel-...@lists.freedesktop.org
>http://lists.freedesktop.org/mailman/listinfo/intel-gfx
------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

RE: [Intel-gfx] [PATCH] i915: add page flipping ioctl

Reply via email to