>-----Original Message----- >From: intel-gfx-boun...@lists.freedesktop.org >[mailto:intel-gfx-boun...@lists.freedesktop.org] On Behalf Of Jesse Barnes >Sent: 2009年2月20日 8:48 >To: dri-devel@lists.sourceforge.net >Cc: intel-...@lists.freedesktop.org >Subject: Re: [Intel-gfx] [PATCH] i915: add page flipping ioctl > >On Thursday 19 February 2009 16:43:54 Jesse Barnes wrote: >> On Thursday 19 February 2009 11:37:01 Chris Wilson wrote: >> > With a few additional suggestions by Jesse, I've managed to get >> > tear-free compositing working on i915. Here's the diff on top of the >> > original patch (though obviously this is just a suggestion, still need >> > to prevent multiple pending flips to the same plane and ensure that the >> > old buffer is eventually unpinned, and you might choose to drop the >> > mutex around the wait_for_vblank ;-): >> >> Yeah, looks pretty reasonable. Here's what I've been working with. It >> adds a couple of more changes (and slightly different cleanups) over your >> version: - added a seqno to wait for >> - wait for flip before queuing another >> but it's missing thew 915 changes you added (btw did I get the pitch wrong? >> or are you submitting a different value for your objects?). >> >> I added a new seqno since it's possible (even likely) that the next vblank >> won't actually reflect the flip, if the GPU is busy processing a large >> batchbuffer when the ring command goes in for example. >> >> Also it might be a good idea to wait for any previous flip before queuing a >> new one. Anyway still tracking down some issues with the X side, but it >> seems like it's approaching readiness. > >Oh you want to actually see the code? >
Hi Jesse, See below some comments. >diff --git a/drivers/gpu/drm/i915/i915_dma.c >b/drivers/gpu/drm/i915/i915_dma.c >index cbb8224..4e4fb61 100644 >--- a/drivers/gpu/drm/i915/i915_dma.c >+++ b/drivers/gpu/drm/i915/i915_dma.c >@@ -829,6 +829,152 @@ static int i915_set_status_page(struct drm_device *dev, >void *data, > return 0; > } > >+static int i915_pipe_to_plane(struct drm_device *dev, int pipe) >+{ >+ drm_i915_private_t *dev_priv = dev->dev_private; >+ u32 reg, pipe_mask; >+ >+ if (pipe == 0) >+ pipe_mask = DISPPLANE_SEL_PIPE_A; >+ else >+ pipe_mask = DISPPLANE_SEL_PIPE_B; >+ >+ pipe_mask |= DISPLAY_PLANE_ENABLE; >+ >+ reg = I915_READ(DSPACNTR); >+ if ((reg & (DISPLAY_PLANE_ENABLE | DISPPLANE_SEL_PIPE_MASK)) == >+ pipe_mask) >+ return 0; >+ >+ reg = I915_READ(DSPBCNTR); >+ if ((reg & (DISPLAY_PLANE_ENABLE | DISPPLANE_SEL_PIPE_MASK)) == >+ pipe_mask) >+ return 1; >+ >+ return -1; >+} >+ >+bool >+i915_gem_flip_pending(struct drm_gem_object *obj) >+{ >+ struct drm_device *dev = obj->dev; >+ drm_i915_private_t *dev_priv = dev->dev_private; >+ struct drm_i915_gem_object *obj_priv = obj->driver_private; >+ unsigned long flags; >+ bool pending = false; >+ >+ spin_lock_irqsave(&dev_priv->vblank_lock, flags); >+ if (!list_empty(&obj_priv->vblank_head)) >+ pending = true; >+ spin_unlock_irqrestore(&dev_priv->vblank_lock, flags); >+ >+ return pending; >+} >+ >+static int i915_gem_page_flip(struct drm_device *dev, void *data, >+ struct drm_file *file_priv) >+{ >+ struct drm_i915_gem_page_flip *flip_data = data; >+ drm_i915_private_t *dev_priv = dev->dev_private; >+ struct drm_gem_object *obj; >+ struct drm_i915_gem_object *obj_priv; >+ unsigned long flags; >+ uint32_t offset; >+ int pitch, pipe, plane, tiled; >+ int ret = 0; >+ RING_LOCALS; >+ >+ if (!(dev->driver->driver_features & DRIVER_GEM)) >+ return -ENODEV; >+ >+ /* >+ * Reject unknown flags so future userspace knows what we (don't) >+ * support >+ */ >+ if (flip_data->flags & (~I915_PAGE_FLIP_WAIT)) { >+ DRM_ERROR("bad page flip flags\n"); >+ return -EINVAL; >+ } >+ >+ if ((pipe = flip_data->pipe) > 1) { >+ DRM_ERROR("bad pipe\n"); >+ return -EINVAL; >+ } pipe should be unsigned int, if it is int, this check should be if (pipe > 1 || pipe < 0) >+ >+ plane = i915_pipe_to_plane(dev, pipe); >+ if (plane < 0) { >+ DRM_ERROR("bad pipe (no planes enabled?)\n"); >+ return -EINVAL; >+ } >+ >+ obj = drm_gem_object_lookup(dev, file_priv, flip_data->handle); >+ if (obj == NULL) >+ return -EBADF; >+ >+ if (i915_gem_flip_pending(obj)) >+ wait_for_completion(&obj_priv->vblank); >+ >+ mutex_lock(&dev->struct_mutex); >+ >+ obj_priv = obj->driver_private; >+ if (obj_priv->tiling_mode != I915_TILING_NONE && >+ obj_priv->tiling_mode != I915_TILING_X) { >+ DRM_ERROR("can only flip non-tiled or X tiled pages\n"); >+ ret = -EINVAL; >+ goto out_unref; >+ } >+ >+ ret = i915_gem_object_pin(obj, 0); >+ if (ret) { >+ DRM_ERROR("failed to pin object for flip\n"); >+ ret = -EBUSY; >+ goto out_unref; >+ } >+ mutex_unlock(&dev->struct_mutex); >+ >+ /* >+ * Put the object in the GTT domain before the flip, >+ * since there may be outstanding rendering >+ */ >+ i915_gem_object_set_to_gtt_domain(obj, 0); >+ >+ offset = obj_priv->gtt_offset; >+ pitch = obj_priv->stride; >+ tiled = !!(obj_priv->tiling_mode == I915_TILING_X); >+ >+ /* >+ * Queue the flip with the lock held in case the flip happens >+ * immediately. >+ */ >+ spin_lock_irqsave(&dev_priv->vblank_lock, flags); >+ >+ BEGIN_LP_RING(4); This could be sleep in atomic because BEGIN_LP_RING may sleep. >+ OUT_RING(CMD_OP_DISPLAYBUFFER_INFO | (plane << 20)); >+ OUT_RING((pitch / 8) << 3); /* flip queue and/or pitch */ >+ OUT_RING(offset | tiled); >+ OUT_RING(((flip_data->x - 1) << 16) | (flip_data->y - 1)); >+ ADVANCE_LP_RING(); >+ >+ list_add_tail(&obj_priv->vblank_head, >&dev_priv->mm.vblank_list[pipe]); >+ >+ obj_priv->flip_seqno = i915_add_request(dev, 0); >+ >+ drm_vblank_get(dev, pipe); >+ >+ spin_unlock_irqrestore(&dev_priv->vblank_lock, flags); >+ >+ if (flip_data->flags & I915_PAGE_FLIP_WAIT) >+ wait_for_completion(&obj_priv->vblank); >+ >+ /* Take lock again for unref */ >+ mutex_lock(&dev->struct_mutex); >+out_unref: >+ drm_gem_object_unreference(obj); >+ mutex_unlock(&dev->struct_mutex); >+ >+ return ret; >+} >+ > /** > * i915_probe_agp - get AGP bootup configuration > * @pdev: PCI device >@@ -1300,6 +1446,7 @@ struct drm_ioctl_desc i915_ioctls[] = { > DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0), > DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0), > DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, >0), >+ DRM_IOCTL_DEF(DRM_I915_GEM_PAGE_FLIP, i915_gem_page_flip, >DRM_AUTH|DRM_MASTER), > }; > > int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); >diff --git a/drivers/gpu/drm/i915/i915_drv.h >b/drivers/gpu/drm/i915/i915_drv.h >index 7ed981c..c75735d 100644 >--- a/drivers/gpu/drm/i915/i915_drv.h >+++ b/drivers/gpu/drm/i915/i915_drv.h >@@ -161,6 +161,9 @@ typedef struct drm_i915_private { > u32 irq_mask_reg; > u32 pipestat[2]; > >+ /** Protects vblank list */ >+ spinlock_t vblank_lock; >+ > u32 hotplug_supported_mask; > struct work_struct hotplug_work; > >@@ -343,6 +346,11 @@ typedef struct drm_i915_private { > */ > struct delayed_work retire_work; > >+ /** >+ * List of objects waiting on vblank events (one per pipe) >+ */ >+ struct list_head vblank_list[2]; >+ > uint32_t next_gem_seqno; > > /** >@@ -394,6 +402,11 @@ struct drm_i915_gem_object { > /** This object's place on the active/flushing/inactive lists */ > struct list_head list; > >+ /** Object's place on the vblank list */ >+ struct list_head vblank_head; >+ /** sequence number for flip (when it passes the flip is done) */ >+ int flip_seqno; >+ > /** > * This is set if the object is on the active or flushing lists > * (has pending rendering), and is not set if it's on inactive (ready >@@ -462,6 +475,9 @@ struct drm_i915_gem_object { > > /** for phy allocated objects */ > struct drm_i915_gem_phys_object *phys_obj; >+ >+ /** for page flips and other vblank related blocks */ >+ struct completion vblank; > }; > > /** >@@ -521,6 +537,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned >int cmd, > extern int i915_emit_box(struct drm_device *dev, > struct drm_clip_rect __user *boxes, > int i, int DR1, int DR4); >+extern bool i915_gem_flip_pending(struct drm_gem_object *obj); > > /* i915_irq.c */ > extern int i915_irq_emit(struct drm_device *dev, void *data, >@@ -629,6 +646,9 @@ int i915_gem_attach_phys_object(struct drm_device *dev, > void i915_gem_detach_phys_object(struct drm_device *dev, > struct drm_gem_object *obj); > void i915_gem_free_all_phys_object(struct drm_device *dev); >+uint32_t i915_add_request(struct drm_device *dev, uint32_t flush_domains); >+int i915_seqno_passed(uint32_t seq1, uint32_t seq2); >+uint32_t i915_get_gem_seqno(struct drm_device *dev); > > /* i915_gem_tiling.c */ > void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); >diff --git a/drivers/gpu/drm/i915/i915_gem.c >b/drivers/gpu/drm/i915/i915_gem.c >index da9a2cb..a2f2a9d 100644 >--- a/drivers/gpu/drm/i915/i915_gem.c >+++ b/drivers/gpu/drm/i915/i915_gem.c >@@ -917,7 +917,7 @@ i915_gem_object_move_to_inactive(struct drm_gem_object >*obj) > * > * Returned sequence numbers are nonzero on success. > */ >-static uint32_t >+uint32_t > i915_add_request(struct drm_device *dev, uint32_t flush_domains) > { > drm_i915_private_t *dev_priv = dev->dev_private; >@@ -1045,7 +1045,7 @@ i915_gem_retire_request(struct drm_device *dev, > /** > * Returns true if seq1 is later than seq2. > */ >-static int >+int > i915_seqno_passed(uint32_t seq1, uint32_t seq2) > { > return (int32_t)(seq1 - seq2) >= 0; >@@ -2547,6 +2547,11 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, > for (pin_tries = 0; ; pin_tries++) { > ret = 0; > for (i = 0; i < args->buffer_count; i++) { >+ struct drm_i915_gem_object *obj_priv; >+ >+ obj_priv = object_list[i]->driver_private; >+ if (i915_gem_flip_pending(object_list[i])) >+ wait_for_completion(&obj_priv->vblank); I am wondering if this will block other clients execbuffer to run thus hurt their performance because struct_mutex is hold. > object_list[i]->pending_read_domains = 0; > object_list[i]->pending_write_domain = 0; > ret = i915_gem_object_pin_and_relocate(object_list[i], >@@ -2924,6 +2929,9 @@ int i915_gem_init_object(struct drm_gem_object *obj) > obj_priv->obj = obj; > obj_priv->fence_reg = I915_FENCE_REG_NONE; > INIT_LIST_HEAD(&obj_priv->list); >+ INIT_LIST_HEAD(&obj_priv->vblank_head); >+ >+ init_completion(&obj_priv->vblank); > > return 0; > } >@@ -3358,9 +3366,12 @@ i915_gem_load(struct drm_device *dev) > INIT_LIST_HEAD(&dev_priv->mm.flushing_list); > INIT_LIST_HEAD(&dev_priv->mm.inactive_list); > INIT_LIST_HEAD(&dev_priv->mm.request_list); >+ INIT_LIST_HEAD(&dev_priv->mm.vblank_list[0]); >+ INIT_LIST_HEAD(&dev_priv->mm.vblank_list[1]); > INIT_DELAYED_WORK(&dev_priv->mm.retire_work, > i915_gem_retire_work_handler); > dev_priv->mm.next_gem_seqno = 1; >+ spin_lock_init(&dev_priv->vblank_lock); > > /* Old X drivers will take 0-2 for front, back, depth buffers */ > dev_priv->fence_reg_start = 3; >diff --git a/drivers/gpu/drm/i915/i915_irq.c >b/drivers/gpu/drm/i915/i915_irq.c >index 019140e..66cd33d 100644 >--- a/drivers/gpu/drm/i915/i915_irq.c >+++ b/drivers/gpu/drm/i915/i915_irq.c >@@ -196,6 +196,25 @@ static void i915_hotplug_work_func(struct work_struct >*work) > drm_sysfs_hotplug_event(dev); > } > >+static void i915_complete_vblank(struct drm_device *dev, int pipe) >+{ >+ drm_i915_private_t *dev_priv = dev->dev_private; >+ struct drm_i915_gem_object *obj_priv, *tmp; >+ unsigned long irqflags; >+ >+ spin_lock_irqsave(&dev_priv->vblank_lock, irqflags); >+ list_for_each_entry_safe(obj_priv, tmp, >&dev_priv->mm.vblank_list[pipe], >+ vblank_head) { >+ int cur_seqno = i915_get_gem_seqno(dev); >+ if (i915_seqno_passed(cur_seqno, obj_priv->flip_seqno)) { >+ complete(&obj_priv->vblank); >+ list_del_init(&obj_priv->vblank_head); >+ drm_vblank_put(dev, pipe); >+ } >+ } >+ spin_unlock_irqrestore(&dev_priv->vblank_lock, irqflags); >+} >+ > irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) > { > struct drm_device *dev = (struct drm_device *) arg; >@@ -284,11 +303,13 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) > > if (pipea_stats & vblank_status) { > vblank++; >+ i915_complete_vblank(dev, 0); > drm_handle_vblank(dev, 0); > } > > if (pipeb_stats & vblank_status) { > vblank++; >+ i915_complete_vblank(dev, 1); > drm_handle_vblank(dev, 1); > } > >diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h >index 912cd52..0678e47 100644 >--- a/include/drm/i915_drm.h >+++ b/include/drm/i915_drm.h >@@ -184,6 +184,7 @@ typedef struct _drm_i915_sarea { > #define DRM_I915_GEM_GET_TILING 0x22 > #define DRM_I915_GEM_GET_APERTURE 0x23 > #define DRM_I915_GEM_MMAP_GTT 0x24 >+#define DRM_I915_GEM_PAGE_FLIP 0x25 > > #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + >DRM_I915_INIT, drm_i915_init_t) > #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + >DRM_I915_FLUSH) >@@ -219,6 +220,7 @@ typedef struct _drm_i915_sarea { > #define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + >DRM_I915_GEM_SET_TILING, struct >drm_i915_gem_set_tiling) > #define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + >DRM_I915_GEM_GET_TILING, struct >drm_i915_gem_get_tiling) > #define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + >DRM_I915_GEM_GET_APERTURE, struct >drm_i915_gem_get_aperture) >+#define DRM_IOCTL_I915_GEM_PAGE_FLIP DRM_IOW (DRM_COMMAND_BASE + >DRM_I915_GEM_PAGE_FLIP, struct >drm_i915_gem_page_flip) > > /* Allow drivers to submit batchbuffers directly to hardware, relying > * on the security mechanisms provided by hardware. >@@ -654,4 +656,27 @@ struct drm_i915_gem_get_aperture { > uint64_t aper_available_size; > }; > >+#define I915_PAGE_FLIP_WAIT (1<<0) /* block on page flip completion */ >+ >+struct drm_i915_gem_page_flip { >+ /** Handle of new front buffer */ Should this be handle of new front buffer or handle of the execbuf? >+ uint32_t handle; >+ >+ /** >+ * page flip flags (wait on flip only for now) >+ */ >+ uint32_t flags; >+ >+ /** >+ * pipe to flip >+ */ >+ uint32_t pipe; >+ >+ /** >+ * screen dimensions for flip >+ */ >+ uint32_t x; >+ uint32_t y; >+}; >+ > #endif /* _I915_DRM_H_ */ > >_______________________________________________ >Intel-gfx mailing list >intel-...@lists.freedesktop.org >http://lists.freedesktop.org/mailman/listinfo/intel-gfx ------------------------------------------------------------------------------ Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA -OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise -Strategies to boost innovation and cut costs with open source participation -Receive a $600 discount off the registration fee with the source code: SFAD http://p.sf.net/sfu/XcvMzF8H -- _______________________________________________ Dri-devel mailing list Dri-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/dri-devel