Re: [Intel-gfx] [PATCH] drm/i915/gen9: Add WaVFEStateAfterPipeControlwithMediaStateClear

2016-06-03 Thread Jeff McGee
On Fri, Jun 03, 2016 at 12:40:00PM +0100, Arun Siluvery wrote:
> Kernel only need to add a register to HW whitelist, required for a
> preemption related issue.
> 
> Reference: HSD#2131039
> Signed-off-by: Arun Siluvery 
> ---
>  drivers/gpu/drm/i915/i915_reg.h | 1 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +
>  2 files changed, 6 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index e307725..1f6040a 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6072,6 +6072,7 @@ enum skl_disp_power_wells {
>  #define  GEN9_TSG_BARRIER_ACK_DISABLE(1<<8)
>  
>  #define GEN9_CS_DEBUG_MODE1  _MMIO(0x20ec)
> +#define GEN9_CTX_PREEMPT_REG _MMIO(0x2248)
>  #define GEN8_CS_CHICKEN1 _MMIO(0x2580)
>  
>  /* GEN7 chicken */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 8d35a39..1f9d3a4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -987,6 +987,11 @@ static int gen9_init_workarounds(struct intel_engine_cs 
> *engine)
>   I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
>   GEN8_LQSC_FLUSH_COHERENT_LINES));
>  
> + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */
> + ret= wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
> + if (ret)
> + return ret;
> +
>   /* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
>   ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
>   if (ret)
> -- 
> 1.9.1
> 

Reviewed-by: Jeff McGee 

> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 3/5] Move fifo_size from intel_plane_wm_parameters to vlv_wm_state

2016-06-03 Thread Matt Roper
On Wed, Jun 01, 2016 at 08:10:19AM +0100, chix.d...@intel.com wrote:
> From: Maarten Lankhorst 
> 
> This patch doesn't change the code to use two-level watermark yet,

Just as a general note, people may look back on this commit in the
future (once it's been accepted and merged upstream) and won't have the
context of the rest of the series in front of them, so notes like this
will just be confusing (and they're probably confusing even to people
looking at the whole series now who aren't familiar with the motivation
and challenges of two-stage watermarks for atomic).

In general it's good to write some high-level overview in the cover
letter email to give the background and overall goal of the series.
Then it's a little bit easier to write individual commit messages that
just focus on the specific changes that patch is making (sometimes they
might make general comments like "in the future we'll want to do 
so make  changes now in preparation").

> With this patch the watermarks are saved for each plane and the wm
> state, instead of previously only for the plane

It would be good to explain a little bit more the motivation for the
switch from a plane-based structure to a CRTC-based structure.

> 
> The patch is based on Maarten Lankhorst's work and created by Chi Ding

This can be seen from the "From:" line above, along with the
Signed-off-by lines, so I don't think you need to include it.

> 
> Signed-off-by: Maarten Lankhorst 
> Signed-off-by: Chi Ding 
> 
> cc: Ville Syrjälä 
> cc: matthew.d.ro...@intel.com
> cc: yetundex.adeb...@intel.com
> ---
>  drivers/gpu/drm/i915/intel_drv.h |  12 +---
>  drivers/gpu/drm/i915/intel_pm.c  | 117 
> +--
>  2 files changed, 65 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> b/drivers/gpu/drm/i915/intel_drv.h
> index b973b86..31118e1 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -624,6 +624,7 @@ struct intel_crtc_state {
>  struct vlv_wm_state {
>   struct vlv_pipe_wm wm[3];
>   struct vlv_sr_wm sr[3];
> + uint16_t fifo_size[I915_MAX_PLANES];
>   uint8_t num_active_planes;
>   uint8_t num_levels;
>   uint8_t level;
> @@ -696,10 +697,6 @@ struct intel_crtc {
>   struct vlv_wm_state wm_state;
>  };
>  
> -struct intel_plane_wm_parameters {
> - uint16_t fifo_size;
> -};
> -
>  struct intel_plane {
>   struct drm_plane base;
>   int plane;
> @@ -708,13 +705,6 @@ struct intel_plane {
>   int max_downscale;
>   uint32_t frontbuffer_bit;
>  
> - /* Since we need to change the watermarks before/after
> -  * enabling/disabling the planes, we need to store the parameters here
> -  * as the other pieces of the struct may not reflect the values we want
> -  * for the watermark calculations. Currently only Haswell uses this.
> -  */
> - struct intel_plane_wm_parameters wm;
> -
>   /*
>* NOTE: Do not place new plane state fields here (e.g., when adding
>* new plane properties).  New runtime state should now be placed in
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index a3942df..5515328 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -55,7 +55,7 @@
>  #define INTEL_RC6pp_ENABLE   (1<<2)
>  
>  /*
> - * Return the index of a plane in the DDB and wm result arrays.  Primary
> + * Return the index of a plane in the wm result arrays.  Primary

This change seems unrelated to this patch.  Also, the code the comment
applies to is still used on SKL/BXT where DDB is important.


>   * plane is always in slot 0, cursor is always in slot I915_MAX_PLANES-1, and
>   * other universal planes are in indices 1..n.  Note that this may leave 
> unused
>   * indices between the top "sprite" plane and the cursor.
> @@ -983,14 +983,17 @@ static uint16_t vlv_compute_wm_level(struct intel_plane 
> *plane,
>   return min_t(int, wm, USHRT_MAX);
>  }
>  
> -static void vlv_compute_fifo(struct intel_crtc *crtc)
> +static void vlv_compute_fifo(struct intel_crtc_state *cstate,
> + struct vlv_wm_state *wm_state)
>  {

There's some logic changes in this function (and some of the subsequent
functions ) that seem unrelated to the migration of fifo_size into
vlv_wm_state.  I'd break out those logic changes into a separate patch
that has its own commit message describing how/why the code is changing.

E.g., changing some of these functions to operate on state rather than
the base CRTC objects is an important step in transitioning to atomic,
but the changes aren't what this patch was advertising in the commit
message, so they kind of slip under the radar.


Matt


> + struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc);
>   struct drm_device *dev = 

Re: [Intel-gfx] [RFC 2/5] Rename skl_plane_id to wm_plane_id

2016-06-03 Thread Matt Roper
On Wed, Jun 01, 2016 at 08:10:18AM +0100, chix.d...@intel.com wrote:
> From: Chi Ding 
> 
> This function will be used not only by SKL but also VLV/CHV.
> Therefore it's renamed.
> 
> Signed-off-by: Chi Ding 

As with the first patch, you should update the patch headline.  Other
than that,

Reviewed-by: Matt Roper 

> 
> cc: Ville Syrjälä 
> cc: matthew.d.ro...@intel.com
> cc: yetundex.adeb...@intel.com
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 59 
> +
>  1 file changed, 30 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b6dfd02..a3942df 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -54,6 +54,28 @@
>  #define INTEL_RC6p_ENABLE(1<<1)
>  #define INTEL_RC6pp_ENABLE   (1<<2)
>  
> +/*
> + * Return the index of a plane in the DDB and wm result arrays.  Primary
> + * plane is always in slot 0, cursor is always in slot I915_MAX_PLANES-1, and
> + * other universal planes are in indices 1..n.  Note that this may leave 
> unused
> + * indices between the top "sprite" plane and the cursor.
> + */
> +static int
> +wm_plane_id(const struct intel_plane *plane)
> +{
> + switch (plane->base.type) {
> + case DRM_PLANE_TYPE_PRIMARY:
> + return 0;
> + case DRM_PLANE_TYPE_CURSOR:
> + return PLANE_CURSOR;
> + case DRM_PLANE_TYPE_OVERLAY:
> + return plane->plane + 1;
> + default:
> + MISSING_CASE(plane->base.type);
> + return plane->plane;
> + }
> +}
> +
>  static void bxt_init_clock_gating(struct drm_device *dev)
>  {
>   struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2828,27 +2850,6 @@ bool ilk_disable_lp_wm(struct drm_device *dev)
>  #define SKL_DDB_SIZE 896 /* in blocks */
>  #define BXT_DDB_SIZE 512
>  
> -/*
> - * Return the index of a plane in the SKL DDB and wm result arrays.  Primary
> - * plane is always in slot 0, cursor is always in slot I915_MAX_PLANES-1, and
> - * other universal planes are in indices 1..n.  Note that this may leave 
> unused
> - * indices between the top "sprite" plane and the cursor.
> - */
> -static int
> -skl_wm_plane_id(const struct intel_plane *plane)
> -{
> - switch (plane->base.type) {
> - case DRM_PLANE_TYPE_PRIMARY:
> - return 0;
> - case DRM_PLANE_TYPE_CURSOR:
> - return PLANE_CURSOR;
> - case DRM_PLANE_TYPE_OVERLAY:
> - return plane->plane + 1;
> - default:
> - MISSING_CASE(plane->base.type);
> - return plane->plane;
> - }
> -}
>  
>  static void
>  skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
> @@ -3011,7 +3012,7 @@ skl_get_total_relative_data_rate(struct 
> intel_crtc_state *intel_cstate)
>  
>   /* Calculate and cache data rate for each plane */
>   for_each_plane_in_state(state, plane, pstate, i) {
> - id = skl_wm_plane_id(to_intel_plane(plane));
> + id = wm_plane_id(to_intel_plane(plane));
>   intel_plane = to_intel_plane(plane);
>  
>   if (intel_plane->pipe != intel_crtc->pipe)
> @@ -3030,7 +3031,7 @@ skl_get_total_relative_data_rate(struct 
> intel_crtc_state *intel_cstate)
>  
>   /* Calculate CRTC's total data rate from cached values */
>   for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
> - int id = skl_wm_plane_id(intel_plane);
> + int id = wm_plane_id(intel_plane);
>  
>   /* packed/uv */
>   total_data_rate += intel_cstate->wm.skl.plane_data_rate[id];
> @@ -3088,7 +3089,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>   /* 1. Allocate the mininum required blocks for each active plane */
>   for_each_plane_in_state(state, plane, pstate, i) {
>   intel_plane = to_intel_plane(plane);
> - id = skl_wm_plane_id(intel_plane);
> + id = wm_plane_id(intel_plane);
>  
>   if (intel_plane->pipe != pipe)
>   continue;
> @@ -3130,7 +3131,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
>   for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
>   unsigned int data_rate, y_data_rate;
>   uint16_t plane_blocks, y_plane_blocks = 0;
> - int id = skl_wm_plane_id(intel_plane);
> + int id = wm_plane_id(intel_plane);
>  
>   data_rate = cstate->wm.skl.plane_data_rate[id];
>  
> @@ -3321,7 +3322,7 @@ static int skl_compute_plane_wm(const struct 
> drm_i915_private *dev_priv,
>   DRM_DEBUG_KMS("Requested display configuration exceeds 
> system watermark limitations\n");
>   DRM_DEBUG_KMS("Plane %d.%d: blocks required = %u/%u, 
> lines required = %u/31\n",
>  

Re: [Intel-gfx] [RFC 1/5] Remove unused parameters from intel_plane_wm_parameters

2016-06-03 Thread Matt Roper
General comment (that applies to the whole series); when you write your
commit message, the first line (which becomes the email subject above)
should be prefixed by "drm/i915:"  If your patch is only modifying the
code for a specific platform, you can include that too when appropriate
(e.g., "drm/i915/skl:").

On Wed, Jun 01, 2016 at 08:10:17AM +0100, chix.d...@intel.com wrote:
> From: Chi Ding 
> 
> Everything except fifo_size is unused and therefore removed
> 
> This is the first patch of two-level watermark for VLV/CHV

I think you can leave this line out of your description.  Killing off
dead code / unused fields is worthwhile on its own and doesn't really
need two-level watermarks as justification.

With an updated commit message,

Reviewed-by: Matt Roper 

> 
> v2: Split the first patch of v1 into the following patches
> - Remove unused parameters from intel_plane_wm_parameters.
> - Rename skl_plane_id to wm_plane_id.
> - Move fifo_size from intel_plane_wm_parameters to vlv_wm_state.
> 
> Signed-off-by: Chi Ding 
> 
> cc: Ville Syrjälä 
> cc: matthew.d.ro...@intel.com
> cc: yetundex.adeb...@intel.com
> 
> ---
>  drivers/gpu/drm/i915/intel_drv.h | 15 ---
>  1 file changed, 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> b/drivers/gpu/drm/i915/intel_drv.h
> index 9b5f663..b973b86 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -697,21 +697,6 @@ struct intel_crtc {
>  };
>  
>  struct intel_plane_wm_parameters {
> - uint32_t horiz_pixels;
> - uint32_t vert_pixels;
> - /*
> -  *   For packed pixel formats:
> -  * bytes_per_pixel - holds bytes per pixel
> -  *   For planar pixel formats:
> -  * bytes_per_pixel - holds bytes per pixel for uv-plane
> -  * y_bytes_per_pixel - holds bytes per pixel for y-plane
> -  */
> - uint8_t bytes_per_pixel;
> - uint8_t y_bytes_per_pixel;
> - bool enabled;
> - bool scaled;
> - u64 tiling;
> - unsigned int rotation;
>   uint16_t fifo_size;
>  };
>  
> -- 
> 1.8.0.1
> 

-- 
Matt Roper
Graphics Software Engineer
IoTG Platform Enabling & Development
Intel Corporation
(916) 356-2795
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 01/20] drm/atomic: Fix remaining places where !funcs->best_encoder is valid

2016-06-03 Thread Daniel Vetter
On Fri, Jun 03, 2016 at 09:37:43AM +0200, Boris Brezillon wrote:
> On Thu, 2 Jun 2016 23:57:02 +0200
> Daniel Vetter  wrote:
> 
> > On Thu, Jun 2, 2016 at 11:05 PM, Laurent Pinchart
> >  wrote:
> > > Hi Boris,
> > >
> > > Thank you for the patch.
> > >
> > > On Thursday 02 Jun 2016 16:31:28 Boris Brezillon wrote:  
> > >> Adapt drm_pick_crtcs() and update_connector_routing() to fallback to
> > >> drm_atomic_helper_best_encoder() if funcs->best_encoder() is NULL so
> > >> that DRM drivers can leave this hook unassigned if they know they want
> > >> to use drm_atomic_helper_best_encoder().  
> > >
> > > Could you please update include/drm/drm_modeset_helper_vtables.h to 
> > > document
> > > this new behaviour ?  
> > 
> > Thanks for reminding me. Please update hooks for both
> > atomic_best_encoder and best_encoder. Also mention that it's only
> > optional for atomic drivers. There's lots of examples in that file for
> > the wording usually used.
> 
> Hm, I haven't changed anything for the ->atomic_best_encoder() hook, or
> am I missing something?

Before you needed atomic_best_encoder or best_encoder (well that's the
idea at least), now both are optional. Kerneldoc needs to be adjusted in
both places to match new reality after your patch.
-Daniel

> 
> > 
> > > The only drawback I see in this patch is the additional object lookup
> > > performed by drm_atomic_helper_best_encoder() at runtime. I wonder if we 
> > > could
> > > somehow cache the information, as the assignment can't change when 
> > > there's a
> > > 1:1 correspondence between encoders and connectors.  
> > 
> > drm_encoder_find is an idr lookup. That should be plenty fast,
> > especially for modeset code. Usually what's too expensive even for
> > modeset code is linear list walks. But Chris just submitted patches to
> > convert most of them into simple lookups.
> > -Daniel
> > 
> > >>> Signed-off-by: Boris Brezillon   
> > >> ---
> > >>  drivers/gpu/drm/drm_atomic_helper.c |  4 +++-
> > >>  drivers/gpu/drm/drm_fb_helper.c | 13 -
> > >>  2 files changed, 15 insertions(+), 2 deletions(-)
> > >>
> > >> diff --git a/drivers/gpu/drm/drm_atomic_helper.c
> > >> b/drivers/gpu/drm/drm_atomic_helper.c index f6a3350..849d029 100644
> > >> --- a/drivers/gpu/drm/drm_atomic_helper.c
> > >> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> > >> @@ -300,8 +300,10 @@ update_connector_routing(struct drm_atomic_state
> > >> *state, if (funcs->atomic_best_encoder)
> > >>   new_encoder = funcs->atomic_best_encoder(connector,
> > >>connector_state);
> > >> - else
> > >> + else if (funcs->best_encoder)
> > >>   new_encoder = funcs->best_encoder(connector);
> > >> + else
> > >> + new_encoder = drm_atomic_helper_best_encoder(connector);
> > >>
> > >>   if (!new_encoder) {
> > >>   DRM_DEBUG_ATOMIC("No suitable encoder found for 
> > >> [CONNECTOR:%d:%s]\n",
> > >> diff --git a/drivers/gpu/drm/drm_fb_helper.c
> > >> b/drivers/gpu/drm/drm_fb_helper.c index 7c2eb75..d44389a 100644
> > >> --- a/drivers/gpu/drm/drm_fb_helper.c
> > >> +++ b/drivers/gpu/drm/drm_fb_helper.c
> > >> @@ -2000,7 +2000,18 @@ static int drm_pick_crtcs(struct drm_fb_helper
> > >> *fb_helper, my_score++;
> > >>
> > >>   connector_funcs = connector->helper_private;
> > >> - encoder = connector_funcs->best_encoder(connector);
> > >> +
> > >> + /*
> > >> +  * If the DRM device implements atomic hooks and ->best_encoder() 
> > >> is
> > >> +  * NULL we fallback to the default drm_atomic_helper_best_encoder()
> > >> +  * helper.
> > >> +  */
> > >> + if (fb_helper->dev->mode_config.funcs->atomic_commit &&
> > >> + !connector_funcs->best_encoder)
> > >> + encoder = drm_atomic_helper_best_encoder(connector);
> > >> + else
> > >> + encoder = connector_funcs->best_encoder(connector);
> > >> +
> > >>   if (!encoder)
> > >>   goto out;  
> > >
> > > --
> > > Regards,
> > >
> > > Laurent Pinchart
> > >  
> > 
> > 
> > 
> 
> 
> 
> -- 
> Boris Brezillon, Free Electrons
> Embedded Linux and Kernel engineering
> http://free-electrons.com

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [Nouveau] [PATCH 9/9] drm: Turn off crtc before tearing down its data structure

2016-06-03 Thread Daniel Vetter
On Fri, Jun 03, 2016 at 09:30:06AM +0200, Lukas Wunner wrote:
> On Wed, Jun 01, 2016 at 04:40:12PM +0200, Daniel Vetter wrote:
> > On Wed, Jun 01, 2016 at 02:36:41PM +0200, Lukas Wunner wrote:
> > > On Wed, May 25, 2016 at 03:43:42PM +0200, Daniel Vetter wrote:
> > > > On Wed, May 25, 2016 at 12:51 PM, Lukas Wunner  wrote:
> > > > > On Tue, May 24, 2016 at 11:30:42PM +0200, Daniel Vetter wrote:
> > > > > > On Tue, May 24, 2016 at 06:03:27PM +0200, Lukas Wunner wrote:
> > > > > > > When a drm_crtc structure is destroyed with drm_crtc_cleanup(), 
> > > > > > > the DRM
> > > > > > > core does not turn off the crtc first and neither do the drivers. 
> > > > > > > With
> > > > > > > nouveau, radeon and amdgpu, this causes a runtime pm ref to be 
> > > > > > > leaked on
> > > > > > > driver unload if at least one crtc was enabled.
> > > > > > >
> > > > > > > (See usage of have_disp_power_ref in nouveau_crtc_set_config(),
> > > > > > > radeon_crtc_set_config() and amdgpu_crtc_set_config()).
> > > > > > >
> > > > > > > Fixes: 5addcf0a5f0f ("nouveau: add runtime PM support (v0.9)")
> > > > > > > Cc: Dave Airlie 
> > > > > > > Tested-by: Karol Herbst 
> > > > > > > Signed-off-by: Lukas Wunner 
> > > > 
> > > > With legacy kms the only way to keep a crtc enabled is to display a
> > > > drm_framebuffer on it. And drm_mode_config_cleanup has a WARN_ON if
> > > > framebuffers are left behind. There's a bunch of options:
> > > > - nouveau somehow manages to keep the crtc on without a framebuffer
> > > > - nouveau somehow leaks a drm_framebuffer, but removes it from the 
> > > > fb_list
> > > > - something else
> > > 
> > > Found it. nouveau_fbcon_destroy() doesn't call drm_framebuffer_remove().
> > > If I add that, the crtc gets properly disabled on unload.
> > > 
> > > It does call drm_framebuffer_cleanup(). That's why there was no WARN,
> > > drm_mode_config_cleanup() only WARNs if a framebuffer was left on the
> > > mode_config.fb_list.
> > > 
> > > radeon and amdgpu have the same problem. In fact there are very few
> > > drivers that call drm_framebuffer_remove(): tegra, msm, exynos, omapdrm
> > > and i915 (since Imre Deak's 9d6612516da0).
> > > 
> > > Should we add a WARN to prevent this? How about WARN_ON(crtc->enabled)
> > > in drm_crtc_cleanup()?
> > > 
> > > Also, i915 calls drm_framebuffer_unregister_private() before it calls
> > > drm_framebuffer_remove(). This ordering has the unfortunate side effect
> > > that the drm_framebuffer has ID 0 in log messages emitted by
> > > drm_framebuffer_remove():
> > > 
> > > [   39.680874] [drm:drm_mode_object_unreference] OBJ ID: 0 (3)
> > > [   39.680878] [drm:drm_mode_object_unreference] OBJ ID: 0 (2)
> > > [   39.680884] [drm:drm_mode_object_unreference] OBJ ID: 0 (1)
> > 
> > Well we must first unregister it before we can remove it, so this is
> > unavoidable.
> 
> Yes but drm_framebuffer_free() calls drm_mode_object_unregister()
> and is invoked by drm_framebuffer_remove(), so the additional call to
> drm_framebuffer_unregister_private() in intel_fbdev_destroy() seems
> superfluous. Or is there some reason I'm missing that this needs to
> be called before intel_unpin_fb_obj()?
> 
> 
> > Wrt switching from _cleanup to _remove, iirc there was troubles with the
> > later calling into the fb->funcs->destroy hook. But many drivers have
> > their fbdev fb embedded into some struct (instead of a pointer like i915),
> > and then things go sideways badly. That's why you can't just blindly
> > replace them.
> 
> So the options seem to be:
> 
> (1) Refactor nouveau, radeon and amdgpu to not embed their framebuffer
> struct in their fbdev struct, so that drm_framebuffer_remove() can
> be used.
> 
> (2) Amend each of them to turn off crtcs which are using the fbdev
> framebuffer, duplicating the code in drm_framebuffer_remove().
> 
> (3) Split drm_framebuffer_remove(), move the portion to turn off crtcs
> into a separate helper, say, drm_framebuffer_deactivate(), call that
> from nouveau, radeon and amdgpu.
> 
> (4) Go back to square one and use patch [9/9] of this series.
> 
> Which one would be most preferred? Is there another solution I've missed?

I think a dedicated turn_off_everything helper would be best. We'd need an
atomic and a legacy version (because hooray), but that would work in all
cases. Relying on the implicit behaviour to turn off everything (strictly
speaking you only need to turn off all the planes, you can leave crtcs on,
and that's what most atomic drivers want really under normal
circumstances) is a bit fragile, and it's also possible to disable fbdev
emulation. If you driver needs everything to be off in module unload, then
it's imo best to explicitly enforce that.

So "(5) Write dedicated helper to turn off everything" is imo the right
fix.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

Re: [Intel-gfx] [PATCH 18/38] drm/fsl-du: Implement some semblance of vblank event handling

2016-06-03 Thread Stefan Agner
On 2016-06-01 15:06, Daniel Vetter wrote:
> No idea how exactly fsl-du commits hw state changes, but here in flush
> is probably the safest place.

The writes to the DCU_UPDATE_MODE register (DCU_UPDATE_MODE_READREG)
commit state changes. There are several callbacks causing a commit, it
seems to me that the current code does not what atomic is asking for,
checking that is on my 2do...

I tested this patch individually, using modesetting driver. Things seem
to work as they did before.
Acked-by: Stefan Agner 

But I guess to test the non-blocking commit, I would have to apply the
whole patchset...? Do you have it somewhere in a git tree?

--
Stefan

> 
> While at it nuke the dummy functions.
> 
> v2: Be more robust and either arm, when the CRTC is on, or just send
> the event out right away.
> 
> Cc: Stefan Agner 
> Signed-off-by: Daniel Vetter 
> ---
>  drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c | 23 +++
>  1 file changed, 11 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
> b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
> index 89c0084c2814..706de3278f1c 100644
> --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
> +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
> @@ -22,20 +22,21 @@
>  #include "fsl_dcu_drm_drv.h"
>  #include "fsl_dcu_drm_plane.h"
>  
> -static void fsl_dcu_drm_crtc_atomic_begin(struct drm_crtc *crtc,
> +static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc,
> struct drm_crtc_state *old_crtc_state)
>  {
> -}
> + struct drm_pending_vblank_event *event = crtc->state->event;
>  
> -static int fsl_dcu_drm_crtc_atomic_check(struct drm_crtc *crtc,
> -  struct drm_crtc_state *state)
> -{
> - return 0;
> -}
> + if (event) {
> + crtc->state->event = NULL;
>  
> -static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc,
> -   struct drm_crtc_state *old_crtc_state)
> -{
> + spin_lock_irq(>dev->event_lock);
> + if (drm_crtc_vblank_get(crtc) == 0)
> + drm_crtc_arm_vblank_event(crtc, event);
> + else
> + drm_crtc_send_vblank_event(crtc, event);
> + spin_unlock_irq(>dev->event_lock);
> + }
>  }
>  
>  static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc)
> @@ -117,8 +118,6 @@ static void fsl_dcu_drm_crtc_mode_set_nofb(struct
> drm_crtc *crtc)
>  }
>  
>  static const struct drm_crtc_helper_funcs fsl_dcu_drm_crtc_helper_funcs = {
> - .atomic_begin = fsl_dcu_drm_crtc_atomic_begin,
> - .atomic_check = fsl_dcu_drm_crtc_atomic_check,
>   .atomic_flush = fsl_dcu_drm_crtc_atomic_flush,
>   .disable = fsl_dcu_drm_disable_crtc,
>   .enable = fsl_dcu_drm_crtc_enable,
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 13/24] drm/i915/kbl: Add WaDisableDynamicCreditSharing

2016-06-03 Thread Matthew Auld
> +   /* WaDisableDynamicCreditSharing:kbl */
> +   if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
> +   WA_SET_BIT(GAMT_CHKN_BIT_REG, (1 << 28));
> +
Let's play name that bit!

Otherwise the patch looks good, although slightly worrying that the
hsd's state the WA is needed up to B0, but the WA db says up to A0...

I guess we should rather trust the hsd's?

Reviewed-by: Matthew Auld 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH i-g-t] lib/igt_kms: Fix passing invalid fd to openat(2).

2016-06-03 Thread Chris Wilson
On Fri, Jun 03, 2016 at 06:06:19PM +0300, Marius Vlad wrote:
> Introduced by 0e11befe442. openat(2) uses a relative path. Fix by
> passing the correct fd.
> 
> Signed-off-by: Marius Vlad 
> CC: Chris Wilson 

Yup, At one point it was using the drm_fd to search for the right sysfs
every time,

> ---
>  lib/igt_kms.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/igt_kms.c b/lib/igt_kms.c
> index b12a5b3..135c7b8 100644
> --- a/lib/igt_kms.c
> +++ b/lib/igt_kms.c
> @@ -641,7 +641,8 @@ bool kmstest_force_connector(int drm_fd, drmModeConnector 
> *connector,
>   return false;
>   }
>  
> - if (!igt_sysfs_set(drm_fd, path, value)) {
> + /* use the fd returned by igt_sysfs_open() */

Just call it dir, don't tell people they can treat it as an fd.

> + if (!igt_sysfs_set(dir, path, value)) {
>   close(dir);
>   return false;
>   }

Preferrably without the comment,
Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 25/38] drm/i915: Remove (struct_mutex) locking for wait-ioctl

2016-06-03 Thread Chris Wilson
With a bit of care (and leniency) we can iterate over the object and
wait for previous rendering to complete with judicial use of atomic
reference counting. The ABI requires us to ensure that an active object
is eventually flushed (like the busy-ioctl) which is guaranteed by our
management of requests (i.e. everything that is submitted to hardware is
flushed in the same request). All we have to do is ensure that we can
detect when the requests are complete for reporting when the object is
idle (without triggering ETIME) - this is handled by
__i915_wait_request.

The biggest danger in the code is walking the object without holding any
locks. We iterate over the set of last requests and carefully grab a
reference upon it. (If it is changing beneath us, that is the usual
userspace race and even with locking you get the same indeterminate
results.) If the request is unreferenced beneath us, it will be disposed
of into the request cache - so we have to carefully order the retrieval
of the request pointer with its removal, and to do this we employ RCU on
the request cache and upon the last_request pointer tracking.

The impact of this is actually quite small - the return to userspace
following the wait was already lockless. What we achieve here is
completing an already finished wait without hitting the struct_mutex,
our hold is quite short and so we are typically just a victim of
contention rather than a cause.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 42 +++--
 1 file changed, 11 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4b178dca..4af64d864587 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2358,46 +2358,26 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
 {
struct drm_i915_gem_wait *args = data;
struct drm_i915_gem_object *obj;
-   struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
-   int i, n = 0;
-   int ret;
+   unsigned long active;
+   int idx, ret = 0;
 
if (args->flags != 0)
return -EINVAL;
 
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
-
obj = i915_gem_object_lookup(file, args->bo_handle);
-   if (!obj) {
-   mutex_unlock(>struct_mutex);
+   if (!obj)
return -ENOENT;
-   }
-
-   if (!i915_gem_object_is_active(obj))
-   goto out;
 
-   for (i = 0; i < I915_NUM_ENGINES; i++) {
-   struct drm_i915_gem_request *req;
-
-   req = i915_gem_active_get(>last_read[i],
- >base.dev->struct_mutex);
-   if (req)
-   requests[n++] = req;
+   active = __I915_BO_ACTIVE(obj);
+   for_each_active(active, idx) {
+   ret = i915_gem_active_wait_unlocked(>last_read[idx], true,
+   args->timeout_ns >= 0 ? 
>timeout_ns : NULL,
+   to_rps_client(file));
+   if (ret)
+   break;
}
 
-out:
-   i915_gem_object_put(obj);
-   mutex_unlock(>struct_mutex);
-
-   for (i = 0; i < n; i++) {
-   if (ret == 0)
-   ret = __i915_wait_request(requests[i], true,
- args->timeout_ns > 0 ? 
>timeout_ns : NULL,
- to_rps_client(file));
-   i915_gem_request_put(requests[i]);
-   }
+   i915_gem_object_put_unlocked(obj);
return ret;
 }
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 38/38] drm/i915/overlay: Use VMA as the primary tracker for images

2016-06-03 Thread Chris Wilson
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_overlay.c | 32 +---
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_overlay.c 
b/drivers/gpu/drm/i915/intel_overlay.c
index 75bdd335d565..ad57149f4809 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -170,8 +170,7 @@ struct overlay_registers {
 struct intel_overlay {
struct drm_i915_private *i915;
struct intel_crtc *crtc;
-   struct drm_i915_gem_object *vid_bo, *old_vid_bo;
-   struct i915_vma *vid_vma, *old_vid_vma;
+   struct i915_vma *vma, *old_vma;
bool active;
bool pfit_active;
u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */
@@ -314,24 +313,21 @@ static int intel_overlay_continue(struct intel_overlay 
*overlay,
 
 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
-   struct drm_i915_gem_object *obj = overlay->old_vid_bo;
+   i915_gem_object_unpin_from_display_plane(overlay->old_vma);
+   i915_gem_object_put(overlay->old_vma->obj);
 
-   i915_gem_object_unpin_from_display_plane(overlay->old_vid_vma);
-   i915_gem_object_put(obj);
-
-   overlay->old_vid_bo = NULL;
+   overlay->old_vma = NULL;
 }
 
 static void intel_overlay_off_tail(struct intel_overlay *overlay)
 {
/* never have the overlay hw on without showing a frame */
-   if (WARN_ON(overlay->vid_vma))
+   if (WARN_ON(overlay->vma))
return;
 
-   i915_gem_object_unpin_from_display_plane(overlay->vid_vma);
-   i915_gem_object_put(overlay->vid_bo);
-   overlay->vid_vma = NULL;
-   overlay->vid_bo = NULL;
+   i915_gem_object_unpin_from_display_plane(overlay->vma);
+   i915_gem_object_put(overlay->vma->obj);
+   overlay->vma = NULL;
 
overlay->crtc->overlay = NULL;
overlay->crtc = NULL;
@@ -422,7 +418,7 @@ static int intel_overlay_release_old_vid(struct 
intel_overlay *overlay)
/* Only wait if there is actually an old frame to release to
 * guarantee forward progress.
 */
-   if (!overlay->old_vid_bo)
+   if (!overlay->old_vma)
return 0;
 
if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
@@ -455,7 +451,7 @@ static int intel_overlay_release_old_vid(struct 
intel_overlay *overlay)
intel_overlay_release_old_vid_tail(overlay);
 
 
-   i915_gem_track_fb(overlay->old_vid_bo, NULL,
+   i915_gem_track_fb(overlay->old_vma->obj, NULL,
  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
return 0;
 }
@@ -841,13 +837,11 @@ static int intel_overlay_do_put_image(struct 
intel_overlay *overlay,
if (ret)
goto out_unpin;
 
-   i915_gem_track_fb(overlay->vid_bo, new_bo,
+   i915_gem_track_fb(overlay->vma->obj, new_bo,
  INTEL_FRONTBUFFER_OVERLAY(pipe));
 
-   overlay->old_vid_bo = overlay->vid_bo;
-   overlay->old_vid_vma = overlay->vid_vma;
-   overlay->vid_bo = new_bo;
-   overlay->vid_vma = vma;
+   overlay->old_vma = overlay->vma;
+   overlay->vma = vma;
 
intel_frontbuffer_flip(dev_priv->dev, INTEL_FRONTBUFFER_OVERLAY(pipe));
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 27/38] drm/i915: Reduce locking inside swfinish ioctl

2016-06-03 Thread Chris Wilson
We only need to take the struct_mutex if the object is pinned to the
display engine and so requires checking for clflush. (The race with
userspace pinning the object to a framebuffer is irrelevant.)

v2: Use access once for compiler hints (or not as it is a bitfield)

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_gem.c | 29 -
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a4f949038d50..b78f9df1894c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1265,25 +1265,28 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void 
*data,
 {
struct drm_i915_gem_sw_finish *args = data;
struct drm_i915_gem_object *obj;
-   int ret = 0;
-
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
+   int ret;
 
obj = i915_gem_object_lookup(file, args->handle);
-   if (!obj) {
-   ret = -ENOENT;
-   goto unlock;
-   }
+   if (!obj)
+   return -ENOENT;
 
/* Pinned buffers may be scanout, so flush the cache */
-   if (obj->pin_display)
+   if (obj->pin_display) {
+   ret = i915_mutex_lock_interruptible(dev);
+   if (ret)
+   goto unref;
+
i915_gem_object_flush_cpu_write_domain(obj);
 
-   i915_gem_object_put(obj);
-unlock:
-   mutex_unlock(>struct_mutex);
+   i915_gem_object_put(obj);
+   mutex_unlock(>struct_mutex);
+   } else {
+   ret = 0;
+unref:
+   i915_gem_object_put_unlocked(obj);
+   }
+
return ret;
 }
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 32/38] drm/i915: Stop the machine whilst capturing the GPU crash dump

2016-06-03 Thread Chris Wilson
The error state is purposefully racy as we expect it to be called at any
time and so have avoided any locking whilst capturing the crash dump.
However, with multi-engine GPUs and multiple CPUs, those races can
manifest into OOPSes as we attempt to chase dangling pointers freed on
other CPUs. Under discussion are lots of ways to slow down normal
operation in order to protect the post-mortem error capture, but what it
we take the opposite approach and freeze the machine whilst the error
capture runs (note the GPU may still running, but as long as we don't
process any of the results the driver's bookkeeping will be static).

Note that by of itself, this is not a complete fix. It also depends on
the compiler barriers in list_add/list_del to prevent traversing the
lists into the void.

v2: Avoid drm_clflush_pages() inside stop_machine() as it may use
stop_machine() itself for its wbinvd fallback.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Kconfig  |  1 +
 drivers/gpu/drm/i915/i915_drv.h   |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 48 +--
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 29a32b11953b..9398a4d06c0e 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -4,6 +4,7 @@ config DRM_I915
depends on X86 && PCI
select INTEL_GTT
select INTERVAL_TREE
+   select STOP_MACHINE
# we need shmfs for the swappable backing store, and in particular
# the shmem_readpage() which depends upon tmpfs
select SHMEM
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dbd3c6f3abbc..77564f378771 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -470,6 +470,8 @@ struct drm_i915_error_state {
struct kref ref;
struct timeval time;
 
+   struct drm_i915_private *i915;
+
char error_msg[128];
bool simulated;
int iommu;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index f01f0ca4bb86..ab2ba76a2a3b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -28,6 +28,7 @@
  */
 
 #include 
+#include 
 #include "i915_drv.h"
 
 static const char *ring_str(int ring)
@@ -682,14 +683,12 @@ i915_error_object_create(struct drm_i915_private 
*dev_priv,
 
dst->page_count = num_pages;
while (num_pages--) {
-   unsigned long flags;
void *d;
 
d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
if (d == NULL)
goto unwind;
 
-   local_irq_save(flags);
if (use_ggtt) {
void __iomem *s;
 
@@ -708,15 +707,10 @@ i915_error_object_create(struct drm_i915_private 
*dev_priv,
 
page = i915_gem_object_get_page(src, i);
 
-   drm_clflush_pages(, 1);
-
s = kmap_atomic(page);
memcpy(d, s, PAGE_SIZE);
kunmap_atomic(s);
-
-   drm_clflush_pages(, 1);
}
-   local_irq_restore(flags);
 
dst->pages[i++] = d;
reloc_offset += PAGE_SIZE;
@@ -1366,6 +1360,32 @@ static void i915_capture_gen_state(struct 
drm_i915_private *dev_priv,
error->suspend_count = dev_priv->suspend_count;
 }
 
+static int capture(void *data)
+{
+   struct drm_i915_error_state *error = data;
+
+   /* Ensure that what we readback from memory matches what the GPU sees */
+   wbinvd();
+
+   i915_capture_gen_state(error->i915, error);
+   i915_capture_reg_state(error->i915, error);
+   i915_gem_record_fences(error->i915, error);
+   i915_gem_record_rings(error->i915, error);
+
+   i915_capture_active_buffers(error->i915, error);
+   i915_capture_pinned_buffers(error->i915, error);
+
+   do_gettimeofday(>time);
+
+   error->overlay = intel_overlay_capture_error_state(error->i915);
+   error->display = intel_display_capture_error_state(error->i915);
+
+   /* And make sure we don't leave trash in the CPU cache */
+   wbinvd();
+
+   return 0;
+}
+
 /**
  * i915_capture_error_state - capture an error record for later analysis
  * @dev: drm device
@@ -1394,19 +1414,9 @@ void i915_capture_error_state(struct drm_i915_private 
*dev_priv,
}
 
kref_init(>ref);
+   error->i915 = dev_priv;
 
-   i915_capture_gen_state(dev_priv, error);
-   i915_capture_reg_state(dev_priv, error);
-   i915_gem_record_fences(dev_priv, error);
-   i915_gem_record_rings(dev_priv, error);
-
-   i915_capture_active_buffers(dev_priv, error);
-   i915_capture_pinned_buffers(dev_priv, error);
-
-   do_gettimeofday(>time);
-
-   error->overlay = 

[Intel-gfx] [PATCH 19/38] drm/i915: Convert non-blocking userptr waits for requests over to using RCU

2016-06-03 Thread Chris Wilson
We can completely avoid taking the struct_mutex around the non-blocking
waits by switching over to the RCU request management (trading the mutex
for a RCU read lock and some complex atomic operations). The improvement
is that we gain further contention reduction, and overall the code
become simpler due to the reduced mutex dancing.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 34 +++--
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 221792632290..96ab6161903a 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -63,32 +63,12 @@ struct i915_mmu_object {
 
 static void wait_rendering(struct drm_i915_gem_object *obj)
 {
-   struct drm_device *dev = obj->base.dev;
-   struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
-   int i, n;
-
-   if (!i915_gem_object_is_active(obj))
-   return;
-
-   n = 0;
-   for (i = 0; i < I915_NUM_ENGINES; i++) {
-   struct drm_i915_gem_request *req;
+   unsigned long active = __I915_BO_ACTIVE(obj);
+   int idx;
 
-   req = i915_gem_active_get(>last_read[i],
- >base.dev->struct_mutex);
-   if (req)
-   requests[n++] = req;
-   }
-
-   mutex_unlock(>struct_mutex);
-
-   for (i = 0; i < n; i++)
-   __i915_wait_request(requests[i], false, NULL, NULL);
-
-   mutex_lock(>struct_mutex);
-
-   for (i = 0; i < n; i++)
-   i915_gem_request_put(requests[i]);
+   for_each_active(active, idx)
+   i915_gem_active_wait_unlocked(>last_read[idx],
+ false, NULL, NULL);
 }
 
 static void cancel_userptr(struct work_struct *work)
@@ -97,6 +77,8 @@ static void cancel_userptr(struct work_struct *work)
struct drm_i915_gem_object *obj = mo->obj;
struct drm_device *dev = obj->base.dev;
 
+   wait_rendering(obj);
+
mutex_lock(>struct_mutex);
/* Cancel any active worker and force us to re-evaluate gup */
obj->userptr.work = NULL;
@@ -105,8 +87,6 @@ static void cancel_userptr(struct work_struct *work)
struct drm_i915_private *dev_priv = to_i915(dev);
bool was_interruptible;
 
-   wait_rendering(obj);
-
was_interruptible = dev_priv->mm.interruptible;
dev_priv->mm.interruptible = false;
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 21/38] drm/i915: Avoid requiring struct_mutex during suspend

2016-06-03 Thread Chris Wilson
The struct_mutex can have some tricky interactions with other mutexes
(mainly due to using nasty constructs like stop_machine() from within
its confines). This makes it "illegal" (lockdep should generate WARNs)
from certain paths like suspend, where the locking order may be
inverted. We can extend the RCU request management to track activity on
an engine and thereby wait upon all GPU activity without taking the
struct_mutex.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 56 +
 drivers/gpu/drm/i915/i915_gem_evict.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_request.c |  8 +++--
 drivers/gpu/drm/i915/i915_gem_request.h | 11 +++
 drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
 drivers/gpu/drm/i915/i915_irq.c |  3 +-
 drivers/gpu/drm/i915/intel_lrc.c|  2 +-
 drivers/gpu/drm/i915/intel_pm.c |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 30 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h | 26 ---
 10 files changed, 73 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 76e5a241c7be..c1e91589e7bc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2179,13 +2179,18 @@ static void i915_gem_reset_engine_status(struct 
intel_engine_cs *engine)
 
 static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 {
+   struct drm_i915_gem_request *request;
struct intel_ring *ring;
 
+   request = i915_gem_active_peek(>last_request,
+  >i915->dev->struct_mutex);
+
/* Mark all pending requests as complete so that any concurrent
 * (lockless) lookup doesn't try and wait upon the request as we
 * reset it.
 */
-   intel_engine_init_seqno(engine, engine->last_submitted_seqno);
+   if (request)
+   intel_engine_init_seqno(engine, request->fence.seqno);
 
/*
 * Clear the execlists queue up before freeing the requests, as those
@@ -2207,15 +2212,9 @@ static void i915_gem_reset_engine_cleanup(struct 
intel_engine_cs *engine)
 * implicit references on things like e.g. ppgtt address spaces through
 * the request.
 */
-   if (!list_empty(>request_list)) {
-   struct drm_i915_gem_request *request;
-
-   request = list_last_entry(>request_list,
- struct drm_i915_gem_request,
- link);
-
+   if (request)
i915_gem_request_retire_upto(request);
-   }
+   GEM_BUG_ON(intel_engine_is_active(engine));
 
/* Having flushed all requests from all queues, we know that all
 * ringbuffers must now be empty. However, since we do not reclaim
@@ -2614,8 +2613,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private 
*dev_priv)
struct intel_engine_cs *engine;
int ret;
 
-   lockdep_assert_held(_priv->dev->struct_mutex);
-
for_each_engine(engine, dev_priv) {
if (engine->last_context == NULL)
continue;
@@ -3787,47 +3784,36 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct 
drm_i915_gem_object *obj,
return NULL;
 }
 
-static void
-i915_gem_stop_engines(struct drm_device *dev)
-{
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   struct intel_engine_cs *engine;
-
-   for_each_engine(engine, dev_priv)
-   dev_priv->gt.stop_engine(engine);
-}
-
 int
 i915_gem_suspend(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
-   int ret = 0;
+   int ret;
 
-   mutex_lock(>struct_mutex);
ret = i915_gem_wait_for_idle(dev_priv);
if (ret)
-   goto err;
-
-   i915_gem_retire_requests(dev_priv);
-
-   i915_gem_stop_engines(dev);
-   i915_gem_context_lost(dev_priv);
-   mutex_unlock(>struct_mutex);
+   return ret;
 
cancel_delayed_work_sync(_priv->gpu_error.hangcheck_work);
cancel_delayed_work_sync(_priv->gt.retire_work);
flush_delayed_work(_priv->gt.idle_work);
 
+   mutex_lock(_priv->dev->struct_mutex);
+
/* Assert that we sucessfully flushed all the work and
 * reset the GPU back to its idle, low power state.
 */
-   WARN_ON(dev_priv->gt.awake);
+   if (dev_priv->gt.awake) {
+   if (INTEL_INFO(dev_priv)->gen >= 6)
+   gen6_rps_idle(dev_priv);
+   intel_runtime_pm_put(dev_priv);
+   dev_priv->gt.awake = false;
+   }
 
-   return 0;
+   i915_gem_context_lost(dev_priv);
+   mutex_unlock(_priv->dev->struct_mutex);
 
-err:
-   mutex_unlock(>struct_mutex);
-   return ret;
+   return 0;
 }
 
 void i915_gem_init_swizzling(struct drm_device *dev)
diff --git 

[Intel-gfx] [PATCH 24/38] drm/i915: Do a nonblocking wait first in pread/pwrite

2016-06-03 Thread Chris Wilson
If we try and read or write to an active request, we first must wait
upon the GPU completing that request. Let's do that without holding the
mutex (and so allow someone else to access the GPU whilst we wait). Upn
completion, we will reacquire the mutex and only then start the
operation (i.e. we do not rely on state from before dropping the mutex).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 66 +++--
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 98aa0a7c91f0..4b178dca 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -773,21 +773,15 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
   args->size))
return -EFAULT;
 
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
-
obj = i915_gem_object_lookup(file, args->handle);
-   if (!obj) {
-   ret = -ENOENT;
-   goto unlock;
-   }
+   if (!obj)
+   return -ENOENT;
 
/* Bounds check source.  */
if (args->offset > obj->base.size ||
args->size > obj->base.size - args->offset) {
ret = -EINVAL;
-   goto out;
+   goto out_unlocked;
}
 
/* prime objects have no backing filp to GEM pread/pwrite
@@ -795,17 +789,27 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 */
if (!obj->base.filp) {
ret = -EINVAL;
-   goto out;
+   goto out_unlocked;
}
 
-   trace_i915_gem_object_pread(obj, args->offset, args->size);
+   ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
+   if (ret)
+   goto out_unlocked;
+
+   ret = i915_mutex_lock_interruptible(dev);
+   if (ret)
+   goto out_unlocked;
 
+   trace_i915_gem_object_pread(obj, args->offset, args->size);
ret = i915_gem_shmem_pread(dev, obj, args, file);
 
-out:
i915_gem_object_put(obj);
-unlock:
mutex_unlock(>struct_mutex);
+
+   return ret;
+
+out_unlocked:
+   i915_gem_object_put_unlocked(obj);
return ret;
 }
 
@@ -1127,23 +1131,15 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void 
*data,
return -EFAULT;
}
 
-   intel_runtime_pm_get(dev_priv);
-
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   goto put_rpm;
-
obj = i915_gem_object_lookup(file, args->handle);
-   if (!obj) {
-   ret = -ENOENT;
-   goto unlock;
-   }
+   if (!obj)
+   return -ENOENT;
 
/* Bounds check destination. */
if (args->offset > obj->base.size ||
args->size > obj->base.size - args->offset) {
ret = -EINVAL;
-   goto out;
+   goto out_unlocked;
}
 
/* prime objects have no backing filp to GEM pread/pwrite
@@ -1151,11 +1147,20 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void 
*data,
 */
if (!obj->base.filp) {
ret = -EINVAL;
-   goto out;
+   goto out_unlocked;
}
 
-   trace_i915_gem_object_pwrite(obj, args->offset, args->size);
+   ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
+   if (ret)
+   goto out_unlocked;
+
+   intel_runtime_pm_get(dev_priv);
 
+   ret = i915_mutex_lock_interruptible(dev);
+   if (ret)
+   goto out_rpm;
+
+   trace_i915_gem_object_pwrite(obj, args->offset, args->size);
ret = -EFAULT;
/* We can only do the GTT pwrite on untiled buffers, as otherwise
 * it would end up going through the fenced access, and we'll get
@@ -1179,14 +1184,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void 
*data,
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
}
 
-out:
i915_gem_object_put(obj);
-unlock:
mutex_unlock(>struct_mutex);
-put_rpm:
intel_runtime_pm_put(dev_priv);
 
return ret;
+
+out_rpm:
+   intel_runtime_pm_put(dev_priv);
+out_unlocked:
+   i915_gem_object_put_unlocked(obj);
+   return ret;
 }
 
 /**
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 33/38] drm/i915: Scan GGTT active list for context object

2016-06-03 Thread Chris Wilson
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index ab2ba76a2a3b..367b8b2ce5f2 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1034,18 +1034,17 @@ static void i915_gem_record_active_context(struct 
intel_engine_cs *engine,
   struct drm_i915_error_ring *ering)
 {
struct drm_i915_private *dev_priv = engine->i915;
-   struct drm_i915_gem_object *obj;
+   struct i915_vma *vma;
 
/* Currently render ring is the only HW context user */
if (engine->id != RCS || !error->ccid)
return;
 
-   list_for_each_entry(obj, _priv->mm.bound_list, global_list) {
-   if (!i915_gem_obj_ggtt_bound(obj))
-   continue;
-
-   if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) 
{
-   ering->ctx = i915_error_ggtt_object_create(dev_priv, 
obj);
+   list_for_each_entry(vma, _priv->ggtt.base.active_list, vm_link) {
+   if ((error->ccid & PAGE_MASK) == vma->node.start) {
+   ering->ctx = i915_error_object_create(dev_priv,
+ vma->obj,
+ vma->vm);
break;
}
}
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 31/38] drm/i915: Reduce amount of duplicate buffer information captured on error

2016-06-03 Thread Chris Wilson
When capturing the error state, we do not need to know about every
address space - just those that are related to the error. We know which
context is active at the time, therefore we know which VM are implicated
in the error. We can then restrict the VM which we report to the
relevant subset.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h   |   9 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 198 ++
 2 files changed, 87 insertions(+), 120 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e72b7f35a98e..dbd3c6f3abbc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -506,6 +506,7 @@ struct drm_i915_error_state {
int num_waiters;
int hangcheck_score;
enum intel_engine_hangcheck_action hangcheck_action;
+   struct i915_address_space *vm;
int num_requests;
 
/* our own tracking of ring head and tail */
@@ -575,17 +576,15 @@ struct drm_i915_error_state {
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
-   s32 pinned:2;
u32 tiling:2;
u32 dirty:1;
u32 purgeable:1;
u32 userptr:1;
s32 ring:4;
u32 cache_level:3;
-   } **active_bo, **pinned_bo;
-
-   u32 *active_bo_count, *pinned_bo_count;
-   u32 vm_count;
+   } *active_bo[I915_NUM_ENGINES], *pinned_bo;
+   u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
+   struct i915_address_space *active_vm[I915_NUM_ENGINES];
 };
 
 struct intel_connector;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index c2cf5bd57db5..f01f0ca4bb86 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -42,16 +42,6 @@ static const char *ring_str(int ring)
}
 }
 
-static const char *pin_flag(int pinned)
-{
-   if (pinned > 0)
-   return " P";
-   else if (pinned < 0)
-   return " p";
-   else
-   return "";
-}
-
 static const char *tiling_flag(int tiling)
 {
switch (tiling) {
@@ -189,7 +179,7 @@ static void print_error_buffers(struct 
drm_i915_error_state_buf *m,
 {
int i;
 
-   err_printf(m, "  %s [%d]:\n", name, count);
+   err_printf(m, "%s [%d]:\n", name, count);
 
while (count--) {
err_printf(m, "%08x_%08x %8u %02x %02x [ ",
@@ -202,7 +192,6 @@ static void print_error_buffers(struct 
drm_i915_error_state_buf *m,
err_printf(m, "%02x ", err->rseqno[i]);
 
err_printf(m, "] %02x", err->wseqno);
-   err_puts(m, pin_flag(err->pinned));
err_puts(m, tiling_flag(err->tiling));
err_puts(m, dirty_flag(err->dirty));
err_puts(m, purgeable_flag(err->purgeable));
@@ -417,18 +406,25 @@ int i915_error_state_to_str(struct 
drm_i915_error_state_buf *m,
for (i = 0; i < ARRAY_SIZE(error->ring); i++)
i915_ring_error_state(m, dev, error, i);
 
-   for (i = 0; i < error->vm_count; i++) {
-   err_printf(m, "vm[%d]\n", i);
+   for (i = 0; i < I915_NUM_ENGINES; i++) {
+   if (error->active_vm[i] == NULL)
+   break;
 
-   print_error_buffers(m, "Active",
+   err_printf(m, "Active vm[%d]\n", i);
+   for (j = 0; j < I915_NUM_ENGINES; j++) {
+   if (error->ring[j].vm == error->active_vm[i])
+   err_printf(m, "%s\n",
+  dev_priv->engine[j].name);
+   }
+   print_error_buffers(m, "  Buffers",
error->active_bo[i],
error->active_bo_count[i]);
-
-   print_error_buffers(m, "Pinned",
-   error->pinned_bo[i],
-   error->pinned_bo_count[i]);
}
 
+   print_error_buffers(m, "Pinned (global)",
+   error->pinned_bo,
+   error->pinned_bo_count);
+
for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
obj = error->ring[i].batchbuffer;
if (obj) {
@@ -624,13 +620,10 @@ static void i915_error_state_free(struct kref *error_ref)
 
i915_error_object_free(error->semaphore_obj);
 
-   for (i = 0; i < error->vm_count; i++)
+   for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
kfree(error->active_bo[i]);
-
-   kfree(error->active_bo);
-   kfree(error->active_bo_count);
kfree(error->pinned_bo);
-   kfree(error->pinned_bo_count);
+
kfree(error->overlay);

[Intel-gfx] [PATCH 12/38] drm/i915: Use atomics to manipulate obj->frontbuffer_bits

2016-06-03 Thread Chris Wilson
The individual bits inside obj->frontbuffer_bits are protected by each
plane->mutex, but the whole bitfield may be accessed by multiple KMS
operations simultaneously and so the RMW need to be under atomics.
However, for updating the single field we do not need to mandate that it
be under the struct_mutex, one more step towards its removal as the de
facto BKL.

Signed-off-by: Chris Wilson 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  6 --
 drivers/gpu/drm/i915/i915_drv.h  |  4 +---
 drivers/gpu/drm/i915/i915_gem.c  | 18 +++---
 drivers/gpu/drm/i915/intel_display.c |  7 ---
 drivers/gpu/drm/i915/intel_drv.h |  4 ++--
 drivers/gpu/drm/i915/intel_frontbuffer.c | 19 +++
 6 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index f4745e0c8d5c..355bbf895c22 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -138,6 +138,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct intel_engine_cs *engine;
struct i915_vma *vma;
+   unsigned frontbuffer_bits;
int pin_count = 0;
enum intel_engine_id id;
 
@@ -204,8 +205,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
if (engine)
seq_printf(m, " (%s)", engine->name);
 
-   if (obj->frontbuffer_bits)
-   seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
+   frontbuffer_bits = atomic_read(>frontbuffer_bits);
+   if (frontbuffer_bits)
+   seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
 }
 
 static int i915_gem_object_list_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 59846de3b33d..236ade61cade 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2108,8 +2108,6 @@ struct drm_i915_gem_object_ops {
  */
 #define INTEL_MAX_SPRITE_BITS_PER_PIPE 5
 #define INTEL_FRONTBUFFER_BITS_PER_PIPE 8
-#define INTEL_FRONTBUFFER_BITS \
-   (INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES)
 #define INTEL_FRONTBUFFER_PRIMARY(pipe) \
(1 << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
 #define INTEL_FRONTBUFFER_CURSOR(pipe) \
@@ -2197,7 +2195,7 @@ struct drm_i915_gem_object {
unsigned int cache_level:3;
unsigned int cache_dirty:1;
 
-   unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
+   atomic_t frontbuffer_bits;
 
/** Count of VMA actually bound by this object */
unsigned int bind_count;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 522f379c8d44..05425ae7c8a8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3746,7 +3746,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
if (obj->stolen)
i915_gem_object_unpin_pages(obj);
 
-   WARN_ON(obj->frontbuffer_bits);
+   WARN_ON(atomic_read(>frontbuffer_bits));
 
if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
@@ -4288,16 +4288,20 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
   struct drm_i915_gem_object *new,
   unsigned frontbuffer_bits)
 {
+   /* Control of individual bits within the bitfield are guarded by
+* the owning plane->mutex, i.e. we can never see concurrent
+* manipulation of individual bits. But since the bitfield as a whole
+* is updated using RMW, we need to use atomics in order to update
+* the bits.
+*/
if (old) {
-   WARN_ON(!mutex_is_locked(>base.dev->struct_mutex));
-   WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
-   old->frontbuffer_bits &= ~frontbuffer_bits;
+   WARN_ON(!(atomic_read(>frontbuffer_bits) & 
frontbuffer_bits));
+   atomic_andnot(frontbuffer_bits, >frontbuffer_bits);
}
 
if (new) {
-   WARN_ON(!mutex_is_locked(>base.dev->struct_mutex));
-   WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
-   new->frontbuffer_bits |= frontbuffer_bits;
+   WARN_ON(atomic_read(>frontbuffer_bits) & frontbuffer_bits);
+   atomic_or(frontbuffer_bits, >frontbuffer_bits);
}
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 82533f1da54c..0cfaace38370 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2635,7 +2635,8 @@ valid_fb:
primary->fb = primary->state->fb = fb;
primary->crtc = primary->state->crtc = _crtc->base;

[Intel-gfx] [PATCH 18/38] drm/i915: Convert non-blocking waits for requests over to using RCU

2016-06-03 Thread Chris Wilson
We can completely avoid taking the struct_mutex around the non-blocking
waits by switching over to the RCU request management (trading the mutex
for a RCU read lock and some complex atomic operations). The improvement
is that we gain further contention reduction, and overall the code
become simpler due to the reduced mutex dancing.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 113 +---
 1 file changed, 47 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4c0e3632214f..76e5a241c7be 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -313,25 +313,20 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object 
*obj,
return 0;
 }
 
-/* A nonblocking variant of the above wait. This is a highly dangerous routine
- * as the object state may change during this call.
+/* A nonblocking variant of the above wait. Must be called prior to
+ * acquiring the mutex for the object, as the object state may change
+ * during this call. A reference must be held by the caller for the object.
  */
 static __must_check int
-i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
-   struct intel_rps_client *rps,
-   bool readonly)
+__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
+   struct intel_rps_client *rps,
+   bool readonly)
 {
-   struct drm_device *dev = obj->base.dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
struct i915_gem_active *active;
unsigned long active_mask;
-   int ret, i, n = 0;
-
-   BUG_ON(!mutex_is_locked(>struct_mutex));
-   BUG_ON(!dev_priv->mm.interruptible);
+   int idx;
 
-   active_mask = i915_gem_object_is_active(obj);
+   active_mask = __I915_BO_ACTIVE(obj);
if (!active_mask)
return 0;
 
@@ -342,25 +337,16 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
active = >last_write;
}
 
-   for_each_active(active_mask, i) {
-   struct drm_i915_gem_request *req;
+   for_each_active(active_mask, idx) {
+   int ret;
 
-   req = i915_gem_active_get([i],
- >base.dev->struct_mutex);
-   if (req)
-   requests[n++] = req;
+   ret = i915_gem_active_wait_unlocked([idx],
+   true, NULL, rps);
+   if (ret)
+   return ret;
}
 
-   mutex_unlock(>struct_mutex);
-   ret = 0;
-   for (i = 0; ret == 0 && i < n; i++)
-   ret = __i915_wait_request(requests[i], true, NULL, rps);
-   mutex_lock(>struct_mutex);
-
-   for (i = 0; i < n; i++)
-   i915_gem_request_put(requests[i]);
-
-   return ret;
+   return 0;
 }
 
 static struct intel_rps_client *to_rps_client(struct drm_file *file)
@@ -1218,10 +1204,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
int ret;
 
/* Only handle setting domains to types used by the CPU. */
-   if (write_domain & I915_GEM_GPU_DOMAINS)
-   return -EINVAL;
-
-   if (read_domains & I915_GEM_GPU_DOMAINS)
+   if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
return -EINVAL;
 
/* Having something in the write domain implies it's in the read
@@ -1230,25 +1213,21 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
if (write_domain != 0 && read_domains != write_domain)
return -EINVAL;
 
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
-
obj = i915_gem_object_lookup(file, args->handle);
-   if (!obj) {
-   ret = -ENOENT;
-   goto unlock;
-   }
+   if (!obj)
+   return -ENOENT;
 
/* Try to flush the object off the GPU without holding the lock.
 * We will repeat the flush holding the lock in the normal manner
 * to catch cases where we are gazumped.
 */
-   ret = i915_gem_object_wait_rendering__nonblocking(obj,
- to_rps_client(file),
- !write_domain);
+   ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
+   if (ret)
+   goto out_unlocked;
+
+   ret = i915_mutex_lock_interruptible(dev);
if (ret)
-   goto unref;
+   goto out_unlocked;
 
if (read_domains & I915_GEM_DOMAIN_GTT)
ret = i915_gem_object_set_to_gtt_domain(obj, write_domain 

[Intel-gfx] [PATCH 28/38] drm/i915: Remove pinned check from madvise ioctl

2016-06-03 Thread Chris Wilson
We don't need to incur the overhead of checking whether the object is
pinned prior to changing its madvise. If the object is pinned, the
madvise will not take effect until it is unpinned and so we cannot free
the pages being pointed at by hardware. Marking a pinned object with
allocated pages as DONTNEED will not trigger any undue warnings. The check
is therefore superfluous, and by removing it we can remove a linear walk
over all the vma the object has.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b78f9df1894c..dad00800aeef 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3600,11 +3600,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void 
*data,
goto unlock;
}
 
-   if (i915_gem_obj_is_pinned(obj)) {
-   ret = -EINVAL;
-   goto out;
-   }
-
if (obj->pages &&
obj->tiling_mode != I915_TILING_NONE &&
dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
@@ -3623,7 +3618,6 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 
args->retained = obj->madv != __I915_MADV_PURGED;
 
-out:
i915_gem_object_put(obj);
 unlock:
mutex_unlock(>struct_mutex);
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 14/38] drm/i915: Move i915_gem_object_wait_rendering()

2016-06-03 Thread Chris Wilson
Just move it earlier so that we can use the companion nonblocking
version in a couple of more callsites without having to add a forward
declaration.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 182 
 1 file changed, 91 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8279a598c4b..93a874b0ba14 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -278,6 +278,97 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
return ret;
 }
 
+/**
+ * Ensures that all rendering to the object has completed and the object is
+ * safe to unbind from the GTT or access from the CPU.
+ */
+int
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+  bool readonly)
+{
+   struct i915_gem_active *active;
+   unsigned long active_mask;
+   int idx;
+
+   lockdep_assert_held(>base.dev->struct_mutex);
+
+   active_mask = i915_gem_object_is_active(obj);
+   if (!active_mask)
+   return 0;
+
+   if (!readonly) {
+   active = obj->last_read;
+   } else {
+   active_mask = 1;
+   active = >last_write;
+   }
+
+   for_each_active(active_mask, idx) {
+   int ret = i915_gem_active_wait([idx],
+  >base.dev->struct_mutex);
+   if (ret)
+   return ret;
+   }
+
+   return 0;
+}
+
+/* A nonblocking variant of the above wait. This is a highly dangerous routine
+ * as the object state may change during this call.
+ */
+static __must_check int
+i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
+   struct intel_rps_client *rps,
+   bool readonly)
+{
+   struct drm_device *dev = obj->base.dev;
+   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
+   struct i915_gem_active *active;
+   unsigned long active_mask;
+   int ret, i, n = 0;
+
+   BUG_ON(!mutex_is_locked(>struct_mutex));
+   BUG_ON(!dev_priv->mm.interruptible);
+
+   active_mask = i915_gem_object_is_active(obj);
+   if (!active_mask)
+   return 0;
+
+   if (!readonly) {
+   active = obj->last_read;
+   } else {
+   active_mask = 1;
+   active = >last_write;
+   }
+
+   for_each_active(active_mask, i) {
+   struct drm_i915_gem_request *req;
+
+   req = i915_gem_active_get([i],
+ >base.dev->struct_mutex);
+   if (req)
+   requests[n++] = req;
+   }
+
+   mutex_unlock(>struct_mutex);
+   ret = 0;
+   for (i = 0; ret == 0 && i < n; i++)
+   ret = __i915_wait_request(requests[i], true, NULL, rps);
+   mutex_lock(>struct_mutex);
+
+   for (i = 0; i < n; i++)
+   i915_gem_request_put(requests[i]);
+
+   return ret;
+}
+
+static struct intel_rps_client *to_rps_client(struct drm_file *file)
+{
+   struct drm_i915_file_private *fpriv = file->driver_priv;
+   return >rps;
+}
+
 int
 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
int align)
@@ -1113,97 +1204,6 @@ put_rpm:
 }
 
 /**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
-  bool readonly)
-{
-   struct i915_gem_active *active;
-   unsigned long active_mask;
-   int idx;
-
-   lockdep_assert_held(>base.dev->struct_mutex);
-
-   active_mask = i915_gem_object_is_active(obj);
-   if (!active_mask)
-   return 0;
-
-   if (!readonly) {
-   active = obj->last_read;
-   } else {
-   active_mask = 1;
-   active = >last_write;
-   }
-
-   for_each_active(active_mask, idx) {
-   int ret = i915_gem_active_wait([idx],
-  >base.dev->struct_mutex);
-   if (ret)
-   return ret;
-   }
-
-   return 0;
-}
-
-/* A nonblocking variant of the above wait. This is a highly dangerous routine
- * as the object state may change during this call.
- */
-static __must_check int
-i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
-   struct intel_rps_client *rps,
-   bool readonly)
-{
-   struct drm_device *dev = obj->base.dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   struct 

[Intel-gfx] [PATCH 23/38] suspend

2016-06-03 Thread Chris Wilson
---
 drivers/gpu/drm/i915/i915_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c1e91589e7bc..98aa0a7c91f0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3796,7 +3796,7 @@ i915_gem_suspend(struct drm_device *dev)
 
cancel_delayed_work_sync(_priv->gpu_error.hangcheck_work);
cancel_delayed_work_sync(_priv->gt.retire_work);
-   flush_delayed_work(_priv->gt.idle_work);
+   cancel_delayed_work_sync(_priv->gt.idle_work);
 
mutex_lock(_priv->dev->struct_mutex);
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 30/38] drm/i915: Assert that the request hasn't been retired

2016-06-03 Thread Chris Wilson
With all callers now not playing tricks with dropping the struct_mutex
between waiting and retiring, we can assert that the request is ready to
be retired.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_request.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 016edc6f2d0b..8029b37c9eee 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -319,7 +319,7 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
struct i915_gem_active *active, *next;
 
trace_i915_gem_request_retire(request);
-   list_del_init(>link);
+   list_del(>link);
 
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
@@ -367,8 +367,7 @@ void i915_gem_request_retire_upto(struct 
drm_i915_gem_request *req)
struct drm_i915_gem_request *tmp;
 
lockdep_assert_held(>i915->dev->struct_mutex);
-   if (list_empty(>link))
-   return;
+   GEM_BUG_ON(list_empty(>link));
 
do {
tmp = list_first_entry(>request_list,
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 35/38] drm/i915: Mark unmappable GGTT entries as PIN_HIGH

2016-06-03 Thread Chris Wilson
We allocate a few objects into the GGTT that we never need to access via
the mappable aperture (such as contexts, status pages). We can request
that these are bound high in the VM to increase the amount of mappable
aperture available. However, anything that may be frequently pinned
(such as logical contexts) we want to use the fast search & insert.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f172ac6a06dc..c8211913f2d6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2008,7 +2008,7 @@ static int intel_ring_context_pin(struct i915_gem_context 
*ctx,
 
if (ce->state) {
ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
-  ctx->ggtt_alignment, 0);
+  ctx->ggtt_alignment, PIN_HIGH);
if (ret)
goto error;
}
@@ -2578,7 +2578,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
} else {
i915_gem_object_set_cache_level(obj, 
I915_CACHE_LLC);
ret = i915_gem_object_ggtt_pin(obj, NULL,
-  0, 0, 0);
+  0, 0,
+  PIN_HIGH);
if (ret != 0) {
i915_gem_object_put(obj);
DRM_ERROR("Failed to pin semaphore bo. 
Disabling semaphores\n");
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 37/38] drm/i915: Track pinned VMA

2016-06-03 Thread Chris Wilson
Treat the VMA as the primary struct responsible for tracking bindings
into the GPU's VM. That is we want to treat the VMA returned after we
pin an object into the VM as the cookie we hold and eventually release
when unpinning. Doing so eliminates the ambiguity in pinning the object
and then searching for the relevant pin later.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  75 +-
 drivers/gpu/drm/i915/i915_drv.h  |  64 +++--
 drivers/gpu/drm/i915/i915_gem.c  | 200 ++-
 drivers/gpu/drm/i915/i915_gem_context.c  |  43 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  60 
 drivers/gpu/drm/i915/i915_gem_fence.c|  64 -
 drivers/gpu/drm/i915/i915_gem_gtt.c  |  58 +---
 drivers/gpu/drm/i915/i915_gem_gtt.h  |  14 --
 drivers/gpu/drm/i915/i915_gem_render_state.c |  31 ++---
 drivers/gpu/drm/i915/i915_gem_render_state.h |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c  |  10 +-
 drivers/gpu/drm/i915/i915_gem_request.h  |   2 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c   |   2 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c   |  42 +++---
 drivers/gpu/drm/i915/i915_gpu_error.c|  55 +++-
 drivers/gpu/drm/i915/i915_guc_submission.c   |  28 ++--
 drivers/gpu/drm/i915/intel_display.c |  57 +---
 drivers/gpu/drm/i915/intel_drv.h |   5 +-
 drivers/gpu/drm/i915/intel_fbc.c |   2 +-
 drivers/gpu/drm/i915/intel_fbdev.c   |  19 ++-
 drivers/gpu/drm/i915/intel_guc_loader.c  |  29 ++--
 drivers/gpu/drm/i915/intel_lrc.c | 113 ---
 drivers/gpu/drm/i915/intel_overlay.c |  44 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 194 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  20 +--
 drivers/gpu/drm/i915/intel_sprite.c  |   8 +-
 26 files changed, 549 insertions(+), 692 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 485fc23893d6..938a95df8a11 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -111,7 +111,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj)
 
 static char get_global_flag(struct drm_i915_gem_object *obj)
 {
-   return i915_gem_obj_to_ggtt(obj) ? 'g' : ' ';
+   return i915_gem_object_to_ggtt(obj, NULL) ?  'g' : ' ';
 }
 
 static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
@@ -278,7 +278,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj;
-   u64 total_obj_size, total_gtt_size;
+   u64 total_obj_size;
LIST_HEAD(stolen);
int count, ret;
 
@@ -286,7 +286,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
if (ret)
return ret;
 
-   total_obj_size = total_gtt_size = count = 0;
+   total_obj_size = count = 0;
list_for_each_entry(obj, _priv->mm.bound_list, global_list) {
if (obj->stolen == NULL)
continue;
@@ -294,7 +294,6 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
list_add(>obj_exec_link, );
 
total_obj_size += obj->base.size;
-   total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
count++;
}
list_for_each_entry(obj, _priv->mm.unbound_list, global_list) {
@@ -317,8 +316,8 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
}
mutex_unlock(>struct_mutex);
 
-   seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
-  count, total_obj_size, total_gtt_size);
+   seq_printf(m, "Total %d objects, %llu bytes\n",
+  count, total_obj_size);
return 0;
 }
 
@@ -327,7 +326,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, 
void *data)
size += i915_gem_obj_total_ggtt_size(obj); \
++count; \
if (obj->map_and_fenceable) { \
-   mappable_size += i915_gem_obj_ggtt_size(obj); \
+   mappable_size += obj->base.size; \
++mappable_count; \
} \
} \
@@ -451,10 +450,10 @@ static void print_context_stats(struct seq_file *m,
 
 #define count_vmas(list, member) do { \
list_for_each_entry(vma, list, member) { \
-   size += i915_gem_obj_total_ggtt_size(vma->obj); \
+   size += vma->size; \
++count; \
if (vma->obj->map_and_fenceable) { \
-   mappable_size += i915_gem_obj_ggtt_size(vma->obj); \
+   mappable_size += vma->size; \

[Intel-gfx] [PATCH 16/38] drm/i915: Enable lockless lookup of request tracking via RCU

2016-06-03 Thread Chris Wilson
If we enable RCU for the requests (providing a grace period where we can
inspect a "dead" request before it is freed), we can allow callers to
carefully perform lockless lookup of an active request.

However, by enabling deferred freeing of requests, we can potentially
hog a lot of memory when dealing with tens of thousands of requests per
second - with a quick insertion of a synchronize_rcu() inside our
shrinker callback, that issue disappears.

v2: Currently, it is our responsibility to handle reclaim i.e. to avoid
hogging memory with the delayed slab frees. At the moment, we wait for a
grace period in the shrinker, and block for all RCU callbacks on oom.
Suggested alternatives focus on flushing our RCU callback when we have a
certain number of outstanding request frees, and blocking on that flush
after a second high watermark. (So rather than wait for the system to
run out of memory, we stop issuing requests - both are nondeterministic.)

Paul E. McKenney wrote:

Another approach is synchronize_rcu() after some largish number of
requests.  The advantage of this approach is that it throttles the
production of callbacks at the source.  The corresponding disadvantage
is that it slows things up.

Another approach is to use call_rcu(), but if the previous call_rcu()
is still in flight, block waiting for it.  Yet another approach is
the get_state_synchronize_rcu() / cond_synchronize_rcu() pair.  The
idea is to do something like this:

cond_synchronize_rcu(cookie);
cookie = get_state_synchronize_rcu();

You would of course do an initial get_state_synchronize_rcu() to
get things going.  This would not block unless there was less than
one grace period's worth of time between invocations.  But this
assumes a busy system, where there is almost always a grace period
in flight.  But you can make that happen as follows:

cond_synchronize_rcu(cookie);
cookie = get_state_synchronize_rcu();
call_rcu(_rcu_head, noop_function);

Note that you need additional code to make sure that the old callback
has completed before doing a new one.  Setting and clearing a flag
with appropriate memory ordering control suffices (e.g,. smp_load_acquire()
and smp_store_release()).

Signed-off-by: Chris Wilson 
Cc: Maarten Lankhorst 
Cc: "Goel, Akash" 
Cc: Josh Triplett 
---
 drivers/gpu/drm/i915/i915_gem.c  |   7 +-
 drivers/gpu/drm/i915/i915_gem_request.c  |   2 +-
 drivers/gpu/drm/i915/i915_gem_request.h  | 110 ++-
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  15 +++--
 4 files changed, 113 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f6f039aad6e2..4c0e3632214f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4158,7 +4158,9 @@ i915_gem_load_init(struct drm_device *dev)
dev_priv->requests =
kmem_cache_create("i915_gem_request",
  sizeof(struct drm_i915_gem_request), 0,
- SLAB_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_RECLAIM_ACCOUNT |
+ SLAB_DESTROY_BY_RCU,
  NULL);
 
INIT_LIST_HEAD(_priv->context_list);
@@ -4194,6 +4196,9 @@ void i915_gem_load_cleanup(struct drm_device *dev)
kmem_cache_destroy(dev_priv->requests);
kmem_cache_destroy(dev_priv->vmas);
kmem_cache_destroy(dev_priv->objects);
+
+   /* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
+   rcu_barrier();
 }
 
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 59afc8e547c4..a0cdd3f10566 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -344,7 +344,7 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
prefetchw(next);
 
INIT_LIST_HEAD(>link);
-   active->__request = NULL;
+   RCU_INIT_POINTER(active->__request, NULL);
 
active->retire(active, request);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index e794801baf07..6aa246848894 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -178,6 +178,12 @@ i915_gem_request_get(struct drm_i915_gem_request *req)
return to_request(fence_get(>fence));
 }
 
+static inline struct drm_i915_gem_request *
+i915_gem_request_get_rcu(struct drm_i915_gem_request *req)
+{
+   return to_request(fence_get_rcu(>fence));
+}
+
 static inline void
 i915_gem_request_put(struct drm_i915_gem_request *req)
 {
@@ -276,21 +282,12 @@ 

[Intel-gfx] [PATCH 02/38] drm/i915: Remove surplus drm_device parameter to i915_gem_evict_something()

2016-06-03 Thread Chris Wilson
Eviction is VM local, so we can ignore the significance of the
drm_device in the caller, and leave it to i915_gem_evict_something() to
manager itself.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h   |  3 +--
 drivers/gpu/drm/i915/i915_gem.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_evict.c |  9 -
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  2 +-
 drivers/gpu/drm/i915/i915_trace.h | 12 +++-
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0ebf1a70..09f6f0eecd96 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3291,8 +3291,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device 
*dev, void *data,
   struct drm_file *file);
 
 /* i915_gem_evict.c */
-int __must_check i915_gem_evict_something(struct drm_device *dev,
- struct i915_address_space *vm,
+int __must_check i915_gem_evict_something(struct i915_address_space *vm,
  int min_size,
  unsigned alignment,
  unsigned cache_level,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e6c46f2d08e7..a7aa465cb76d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2802,7 +2802,7 @@ search_free:
  search_flag,
  alloc_flag);
if (ret) {
-   ret = i915_gem_evict_something(dev, vm, size, alignment,
+   ret = i915_gem_evict_something(vm, size, alignment,
   obj->cache_level,
   start, end,
   flags);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index 677297bf970e..09e9078f5856 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -91,7 +91,6 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
 
 /**
  * i915_gem_evict_something - Evict vmas to make room for binding a new one
- * @dev: drm_device
  * @vm: address space to evict from
  * @min_size: size of the desired free space
  * @alignment: alignment constraint of the desired free space
@@ -114,12 +113,12 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
  * memory in e.g. the shrinker.
  */
 int
-i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
+i915_gem_evict_something(struct i915_address_space *vm,
 int min_size, unsigned alignment, unsigned cache_level,
 unsigned long start, unsigned long end,
 unsigned flags)
 {
-   struct drm_i915_private *dev_priv = to_i915(dev);
+   struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct list_head eviction_list;
struct list_head *phases[] = {
>inactive_list,
@@ -129,7 +128,7 @@ i915_gem_evict_something(struct drm_device *dev, struct 
i915_address_space *vm,
struct i915_vma *vma, *next;
int ret;
 
-   trace_i915_gem_evict(dev, min_size, alignment, flags);
+   trace_i915_gem_evict(vm, min_size, alignment, flags);
 
/*
 * The goal is to evict objects and amalgamate space in LRU order.
@@ -187,7 +186,7 @@ search_again:
 * back to userspace to give our workqueues time to
 * acquire our locks and unpin the old scanouts.
 */
-   return intel_has_pending_fb_unpin(dev) ? -EAGAIN : -ENOSPC;
+   return intel_has_pending_fb_unpin(vm->dev) ? -EAGAIN : -ENOSPC;
}
 
/* Not everything in the GGTT is tracked via vma (otherwise we
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9db542f761f7..fb2dd65b16e6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2029,7 +2029,7 @@ alloc:
  0, ggtt->base.total,
  DRM_MM_TOPDOWN);
if (ret == -ENOSPC && !retried) {
-   ret = i915_gem_evict_something(dev, >base,
+   ret = i915_gem_evict_something(>base,
   GEN6_PD_SIZE, GEN6_PD_ALIGN,
   I915_CACHE_NONE,
   0, ggtt->base.total,
diff --git a/drivers/gpu/drm/i915/i915_trace.h 
b/drivers/gpu/drm/i915/i915_trace.h
index e7b3e6e4f4a4..e20355d447db 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ 

[Intel-gfx] [PATCH 26/38] drm/i915: Remove (struct_mutex) locking for busy-ioctl

2016-06-03 Thread Chris Wilson
By applying the same logic as for wait-ioctl, we can query whether a
request has completed without holding struct_mutex. The biggest impact
system-wide is removing the flush_active and the contention that causes.

Testcase: igt/gem_busy
Signed-off-by: Chris Wilson 
Cc: Akash Goel 
---
 drivers/gpu/drm/i915/i915_gem.c | 115 +---
 1 file changed, 85 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4af64d864587..a4f949038d50 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3463,49 +3463,104 @@ i915_gem_object_ggtt_unpin_view(struct 
drm_i915_gem_object *obj,
i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
 }
 
+static __always_inline unsigned
+__busy_read_flag(const struct drm_i915_gem_request *request)
+{
+   return 0x1 << request->engine->exec_id;
+}
+
+static __always_inline unsigned
+__busy_write_flag(const struct drm_i915_gem_request *request)
+{
+   return request->engine->exec_id;
+}
+
+static __always_inline unsigned
+__busy_flag(const struct i915_gem_active *active,
+   unsigned (*flag)(const struct drm_i915_gem_request *))
+{
+   do {
+   struct drm_i915_gem_request *request;
+   unsigned busy;
+
+   request = rcu_dereference(active->__request);
+   if (!request || i915_gem_request_completed(request))
+   return 0;
+
+   busy = flag(request);
+   if (request ==  rcu_dereference(active->__request))
+   return busy;
+   } while (1);
+}
+
+static inline unsigned
+busy_read_flag(const struct i915_gem_active *active)
+{
+   return __busy_flag(active, __busy_read_flag);
+}
+
+static inline unsigned
+busy_write_flag(const struct i915_gem_active *active)
+{
+   return __busy_flag(active, __busy_write_flag);
+}
+
 int
 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
struct drm_i915_gem_busy *args = data;
struct drm_i915_gem_object *obj;
-   int ret;
-
-   ret = i915_mutex_lock_interruptible(dev);
-   if (ret)
-   return ret;
+   unsigned long active;
 
obj = i915_gem_object_lookup(file, args->handle);
-   if (!obj) {
-   ret = -ENOENT;
-   goto unlock;
-   }
+   if (!obj)
+   return -ENOENT;
 
-   /* Count all active objects as busy, even if they are currently not used
-* by the gpu. Users of this interface expect objects to eventually
-* become non-busy without any further actions.
-*/
args->busy = 0;
-   if (i915_gem_object_is_active(obj)) {
-   struct drm_i915_gem_request *req;
-   int i;
+   active = __I915_BO_ACTIVE(obj);
+   if (active) {
+   int idx;
 
-   for (i = 0; i < I915_NUM_ENGINES; i++) {
-   req = i915_gem_active_peek(>last_read[i],
-  
>base.dev->struct_mutex);
-   if (req)
-   args->busy |= 1 << (16 + req->engine->exec_id);
-   }
-   req = i915_gem_active_peek(>last_write,
-  >base.dev->struct_mutex);
-   if (req)
-   args->busy |= req->engine->exec_id;
+   /* Yes, the lookups are intentionally racy.
+*
+* Even though we guard the pointer lookup by RCU, that only
+* guarantees that the pointer and its contents remain
+* dereferencable and does *not* mean that the request we
+* have is the same as the one being tracked by the object.
+*
+* Consider that we lookup the request just as it is being
+* retired and free. We take a local copy of the pointer,
+* but before we add its engine into the busy set, the other
+* thread reallocates it and assigns it to a task on another
+* engine with a fresh and incomplete seqno.
+*
+* So after we lookup the engine's id, we double check that
+* the active request is the same and only then do we add it
+* into the busy set.
+*/
+   rcu_read_lock();
+
+   for_each_active(active, idx)
+   args->busy |= busy_read_flag(>last_read[idx]);
+
+   /* For ABI sanity, we only care that the write engine is in
+* the set of read engines. This is ensured by the ordering
+* of setting last_read/last_write in i915_vma_move_to_active,
+* and then in reverse in retire.
+*
+* We 

[Intel-gfx] [PATCH 20/38] drm/i915/userptr: Remove superfluous interruptible=false on waiting

2016-06-03 Thread Chris Wilson
Inside the kthread context, we can't be interrupted by signals so
touching the mm.interruptible flag is pointless and wait-request now
consumes EIO itself.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c 
b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 96ab6161903a..57218cca7e05 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -84,16 +84,9 @@ static void cancel_userptr(struct work_struct *work)
obj->userptr.work = NULL;
 
if (obj->pages != NULL) {
-   struct drm_i915_private *dev_priv = to_i915(dev);
-   bool was_interruptible;
-
-   was_interruptible = dev_priv->mm.interruptible;
-   dev_priv->mm.interruptible = false;
-
+   /* We are inside a kthread context and can't be interrupted */
WARN_ON(i915_gem_object_unbind(obj));
WARN_ON(i915_gem_object_put_pages(obj));
-
-   dev_priv->mm.interruptible = was_interruptible;
}
 
i915_gem_object_put(obj);
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 22/38] drm/gem/shrinker: Wait before acquiring struct_mutex under oom

2016-06-03 Thread Chris Wilson
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 6eea4abeb9ce..454be9719daa 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -312,9 +312,14 @@ i915_gem_shrinker_lock_uninterruptible(struct 
drm_i915_private *dev_priv,
unsigned long timeout = msecs_to_jiffies(timeout_ms) + 1;
 
while (!i915_gem_shrinker_lock(dev_priv->dev, >unlock)) {
+   if (i915_gem_wait_for_idle(dev_priv) == 0 &&
+   i915_gem_shrinker_lock(dev_priv->dev, >unlock))
+   break;
+
schedule_timeout_killable(1);
if (fatal_signal_pending(current))
return false;
+
if (--timeout == 0) {
pr_err("Unable to lock GPU to purge memory.\n");
return false;
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 29/38] drm/i915: Remove locking for get_tiling

2016-06-03 Thread Chris Wilson
Since we are not concerned with userspace racing itself with set-tiling
(the order is indeterminant even if we take a lock), then we can safely
read back the single obj->tiling_mode and do the static lookup of
swizzle mode without having to take a lock.

get-tiling is reasonably frequent due to the back-channel passing around
of tiling parameters in DRI2/DRI3.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_tiling.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c 
b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 326de7eae101..d6acd0a27c06 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -302,10 +302,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
if (!obj)
return -ENOENT;
 
-   mutex_lock(>struct_mutex);
-
args->tiling_mode = obj->tiling_mode;
-   switch (obj->tiling_mode) {
+   switch (args->tiling_mode) {
case I915_TILING_X:
args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
break;
@@ -329,8 +327,6 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
 
-   i915_gem_object_put(obj);
-   mutex_unlock(>struct_mutex);
-
+   i915_gem_object_put_unlocked(obj);
return 0;
 }
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 08/38] drm/i915: Record allocated vma size

2016-06-03 Thread Chris Wilson
Tracking the size of the VMA as allocated allows us to dramatically
reduce the complexity of later functions (like inserting the VMA in to
the drm_mm range manager).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  10 +--
 drivers/gpu/drm/i915/i915_gem.c | 117 
 drivers/gpu/drm/i915/i915_gem_gtt.c |  56 +
 drivers/gpu/drm/i915/i915_gem_gtt.h |   5 +-
 4 files changed, 71 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9520adba33f6..fe7e87e8cf9a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3137,11 +3137,11 @@ int i915_gem_object_attach_phys(struct 
drm_i915_gem_object *obj,
 int i915_gem_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
-uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
-   int tiling_mode, bool fenced);
+uint64_t
+i915_gem_get_gtt_size(struct drm_device *dev, uint64_t size, int tiling_mode);
+uint64_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint64_t size,
+  int tiling_mode, bool fenced);
 
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cd54c290680d..7340fc830d9a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1593,11 +1593,13 @@ i915_gem_release_all_mmaps(struct drm_i915_private 
*dev_priv)
i915_gem_release_mmap(obj);
 }
 
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
+uint64_t
+i915_gem_get_gtt_size(struct drm_device *dev, uint64_t size, int tiling_mode)
 {
uint32_t gtt_size;
 
+   GEM_BUG_ON(size == 0);
+
if (INTEL_INFO(dev)->gen >= 4 ||
tiling_mode == I915_TILING_NONE)
return size;
@@ -1621,10 +1623,12 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t 
size, int tiling_mode)
  * Return the required GTT alignment for an object, taking into account
  * potential fence register mapping.
  */
-uint32_t
-i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
+uint64_t
+i915_gem_get_gtt_alignment(struct drm_device *dev, uint64_t size,
   int tiling_mode, bool fenced)
 {
+   GEM_BUG_ON(size == 0);
+
/*
 * Minimum alignment is 4k (GTT page size), but might be greater
 * if a fence register is needed for the object.
@@ -2679,56 +2683,40 @@ i915_gem_object_insert_into_vm(struct 
drm_i915_gem_object *obj,
   struct i915_address_space *vm,
   const struct i915_ggtt_view *ggtt_view,
   uint64_t size,
-  unsigned alignment,
+  uint64_t alignment,
   uint64_t flags)
 {
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
-   u64 start, end;
-   u32 search_flag, alloc_flag;
struct i915_vma *vma;
+   u64 start, end;
+   u64 min_alignment;
int ret;
 
-   if (i915_is_ggtt(vm)) {
-   u32 fence_size, fence_alignment, unfenced_alignment;
-   u64 view_size;
-
-   if (WARN_ON(!ggtt_view))
-   return ERR_PTR(-EINVAL);
-
-   view_size = i915_ggtt_view_size(obj, ggtt_view);
-
-   fence_size = i915_gem_get_gtt_size(dev,
-  view_size,
-  obj->tiling_mode);
-   fence_alignment = i915_gem_get_gtt_alignment(dev,
-view_size,
-obj->tiling_mode,
-true);
-   unfenced_alignment = i915_gem_get_gtt_alignment(dev,
-   view_size,
-   
obj->tiling_mode,
-   false);
-   size = max(size, view_size);
-   if (flags & PIN_MAPPABLE)
-   size = max_t(u64, size, fence_size);
-
-   if (alignment == 0)
-   alignment = flags & PIN_MAPPABLE ? fence_alignment :
-   unfenced_alignment;
-   if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
-

[Intel-gfx] [PATCH 10/38] drm/i915: Remove highly confusing i915_gem_obj_ggtt_pin()

2016-06-03 Thread Chris Wilson
Since i915_gem_obj_ggtt_pin() is an idiom breaking curry function for
i915_gem_object_ggtt_pin(), spare us the confustion and remove it.
Removing it now simplifies later patches to change the i915_vma_pin()
(and friends) interface.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  | 35 -
 drivers/gpu/drm/i915/i915_gem.c  | 46 +--
 drivers/gpu/drm/i915/i915_gem_context.c  |  5 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   | 10 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.h  | 47 +++-
 drivers/gpu/drm/i915/i915_gem_render_state.c |  2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c   |  4 +--
 drivers/gpu/drm/i915/intel_guc_loader.c  |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c |  8 +++--
 drivers/gpu/drm/i915/intel_overlay.c |  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 16 +-
 11 files changed, 89 insertions(+), 89 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f537d8fc5e0f..861d132b2fe4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2934,32 +2934,32 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 int __must_check
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
 /* Flags used by pin/bind */
-#define PIN_MAPPABLE   (1<<0)
-#define PIN_NONBLOCK   (1<<1)
-#define PIN_GLOBAL (1<<2)
-#define PIN_OFFSET_BIAS(1<<3)
-#define PIN_USER   (1<<4)
-#define PIN_UPDATE (1<<5)
-#define PIN_ZONE_4G(1<<6)
-#define PIN_HIGH   (1<<7)
-#define PIN_OFFSET_FIXED   (1<<8)
+#define PIN_GLOBAL (1<<0)
+#define PIN_USER   (1<<1)
+#define PIN_UPDATE (1<<2)
+#define PIN_MAPPABLE   (1<<3)
+#define PIN_ZONE_4G(1<<4)
+#define PIN_NONBLOCK   (1<<5)
+#define PIN_HIGH   (1<<6)
+#define PIN_OFFSET_BIAS(1<<7)
+#define PIN_OFFSET_FIXED (1<<8)
 #define PIN_OFFSET_MASK (~4095)
 
 static inline void __i915_vma_pin(struct i915_vma *vma)
 {
GEM_BUG_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
-   vma->pin_count++;
+   vma->flags++;
 }
 
 static inline bool i915_vma_is_pinned(struct i915_vma *vma)
 {
-   return vma->pin_count;
+   return vma->flags & DRM_I915_GEM_OBJECT_MAX_PIN_COUNT;
 }
 
 static inline void __i915_vma_unpin(struct i915_vma *vma)
 {
GEM_BUG_ON(!i915_vma_is_pinned(vma));
-   vma->pin_count--;
+   vma->flags--;
 }
 
 static inline void i915_vma_unpin(struct i915_vma *vma)
@@ -2972,7 +2972,7 @@ int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 const struct i915_ggtt_view *view,
 uint64_t size,
-uint32_t alignment,
+uint64_t alignment,
 uint64_t flags);
 
 int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
@@ -3223,15 +3223,6 @@ static inline bool i915_gem_obj_ggtt_bound(struct 
drm_i915_gem_object *obj)
 unsigned long
 i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj);
 
-static inline int __must_check
-i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
- uint32_t alignment,
- unsigned flags)
-{
-   return i915_gem_object_ggtt_pin(obj, _ggtt_view_normal,
-   0, alignment, flags);
-}
-
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
 const struct i915_ggtt_view *view);
 static inline void
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 71a32a9f9858..53776a071ce7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -772,7 +772,9 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
char __user *user_data;
int page_offset, page_length, ret;
 
-   ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
+   ret = i915_gem_object_ggtt_pin(obj, NULL,
+  0, 0,
+  PIN_MAPPABLE | PIN_NONBLOCK);
if (ret)
goto out;
 
@@ -3408,32 +3410,35 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma 
*vma)
 int
 i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 {
-   unsigned bound = vma->bound;
+   unsigned bound;
int ret;
 
GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt);
 
-   if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
-   return -EBUSY;
-
/* Pin early to prevent the shrinker/eviction logic from destroying
 * our vma as we insert and bind.
 */
-   __i915_vma_pin(vma);
+   bound = vma->flags++;
+   if (WARN_ON((bound & 0xf) == 

[Intel-gfx] [PATCH 17/38] drm/i915: Introduce i915_gem_active_wait_unlocked()

2016-06-03 Thread Chris Wilson
It is useful to be able to wait on pending rendering without grabbing
the struct_mutex. We can do this by using the i915_gem_active_get_rcu()
primitive to acquire a reference to the pending request without
requiring struct_mutex, just the RCU read lock, and then call
__i915_wait_request().

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_request.h | 21 +
 1 file changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index 6aa246848894..006f212b7fd6 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -476,6 +476,27 @@ i915_gem_active_wait(const struct i915_gem_active *active, 
struct mutex *mutex)
return i915_wait_request(request);
 }
 
+static inline int
+i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
+ bool interruptible,
+ s64 *timeout,
+ struct intel_rps_client *rps)
+{
+   struct drm_i915_gem_request *request;
+   int ret = 0;
+
+   rcu_read_lock();
+   request = i915_gem_active_get_rcu(active);
+   rcu_read_unlock();
+
+   if (request) {
+   ret = __i915_wait_request(request, interruptible, timeout, rps);
+   i915_gem_request_put(request);
+   }
+
+   return ret;
+}
+
 /**
  * i915_gem_active_retire - waits until the request is retired
  * @active - the active request on which to wait
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 15/38] drm/i915: Mark all current requests as complete before resetting them

2016-06-03 Thread Chris Wilson
Following a GPU reset upon hang, we retire all the requests and then
mark them all as complete. If we mark them as complete first, we both
keep the normal retirement order (completed first then retired) and
provide a small optimisation for concurrent lookups.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 93a874b0ba14..f6f039aad6e2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2200,6 +2200,12 @@ static void i915_gem_reset_engine_cleanup(struct 
intel_engine_cs *engine)
 {
struct intel_ring *ring;
 
+   /* Mark all pending requests as complete so that any concurrent
+* (lockless) lookup doesn't try and wait upon the request as we
+* reset it.
+*/
+   intel_engine_init_seqno(engine, engine->last_submitted_seqno);
+
/*
 * Clear the execlists queue up before freeing the requests, as those
 * are the ones that keep the context and ringbuffer backing objects
@@ -2241,8 +2247,6 @@ static void i915_gem_reset_engine_cleanup(struct 
intel_engine_cs *engine)
ring->last_retired_head = ring->tail;
intel_ring_update_space(ring);
}
-
-   intel_engine_init_seqno(engine, engine->last_submitted_seqno);
 }
 
 void i915_gem_reset(struct drm_device *dev)
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 34/38] drm/i915: Move setting of request->batch into its single callsite

2016-06-03 Thread Chris Wilson
request->batch_obj is only set by execbuffer for the convenience of
debugging hangs. By moving that operation to the callsite, we can
simplify all other callers and future patches. We also move the
complications of reference handling of the request->batch_obj next to
where the active tracking is set up for the request.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +-
 drivers/gpu/drm/i915/i915_gem_request.c| 12 +---
 drivers/gpu/drm/i915/i915_gem_request.h|  8 +++-
 3 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 224265619f00..b89e9d2b33c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1656,6 +1656,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
goto err_batch_unpin;
}
 
+   /* Whilst this request exists, batch_obj will be on the
+* active_list, and so will hold the active reference. Only when this
+* request is retired will the the batch_obj be moved onto the
+* inactive_list and lose its active reference. Hence we do not need
+* to explicitly hold another reference here.
+*/
+   params->request->batch_obj = params->batch_vma->obj;
+
ret = i915_gem_request_add_to_client(params->request, file);
if (ret)
goto err_request;
@@ -1674,7 +1682,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
ret = execbuf_submit(params, args, >vmas);
 err_request:
-   __i915_add_request(params->request, params->batch_vma->obj, ret == 0);
+   __i915_add_request(params->request, ret == 0);
 
 err_batch_unpin:
/*
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 8029b37c9eee..8101d9169027 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -402,9 +402,7 @@ static void i915_gem_mark_busy(struct drm_i915_private 
*dev_priv,
  * request is not being tracked for completion but the work itself is
  * going to happen on the hardware. This would be a Bad Thing(tm).
  */
-void __i915_add_request(struct drm_i915_gem_request *request,
-   struct drm_i915_gem_object *obj,
-   bool flush_caches)
+void __i915_add_request(struct drm_i915_gem_request *request, bool 
flush_caches)
 {
struct intel_engine_cs *engine;
struct intel_ring *ring;
@@ -447,14 +445,6 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
trace_i915_gem_request_add(request);
request->head = request_start;
 
-   /* Whilst this request exists, batch_obj will be on the
-* active_list, and so will hold the active reference. Only when this
-* request is retired will the the batch_obj be moved onto the
-* inactive_list and lose its active reference. Hence we do not need
-* to explicitly hold another reference here.
-*/
-   request->batch_obj = obj;
-
/* Seal the request and mark it as pending execution. Note that
 * we may inspect this state, without holding any locks, during
 * hangcheck. Hence we apply the barrier to ensure that we do not
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index 8d1225999fae..87e055267904 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -213,13 +213,11 @@ static inline void i915_gem_request_assign(struct 
drm_i915_gem_request **pdst,
*pdst = src;
 }
 
-void __i915_add_request(struct drm_i915_gem_request *req,
-   struct drm_i915_gem_object *batch_obj,
-   bool flush_caches);
+void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
 #define i915_add_request(req) \
-   __i915_add_request(req, NULL, true)
+   __i915_add_request(req, true)
 #define i915_add_request_no_flush(req) \
-   __i915_add_request(req, NULL, false)
+   __i915_add_request(req, false)
 
 struct intel_rps_client;
 #define NO_WAITBOOST ERR_PTR(-1)
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 36/38] drm/i915: Track pinned vma inside guc

2016-06-03 Thread Chris Wilson
Since the guc allocates and pins and object into the GGTT for its usage,
it is more natural to use that pinned VMA as our resource cookie.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  10 +--
 drivers/gpu/drm/i915/i915_guc_submission.c | 131 ++---
 drivers/gpu/drm/i915/intel_guc.h   |   9 +-
 drivers/gpu/drm/i915/intel_guc_loader.c|   7 +-
 4 files changed, 73 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 9154919fdd56..485fc23893d6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2601,15 +2601,15 @@ static int i915_guc_log_dump(struct seq_file *m, void 
*data)
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
-   struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj;
-   u32 *log;
+   struct drm_i915_gem_object *obj;
int i = 0, pg;
 
-   if (!log_obj)
+   if (dev_priv->guc.log == NULL)
return 0;
 
-   for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) {
-   log = kmap_atomic(i915_gem_object_get_page(log_obj, pg));
+   obj = dev_priv->guc.log->obj;
+   for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
+   u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
 
for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 63ef34c78494..1c92c4c6b0e1 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -357,8 +357,8 @@ static void guc_init_proc_desc(struct intel_guc *guc,
 static void guc_init_ctx_desc(struct intel_guc *guc,
  struct i915_guc_client *client)
 {
-   struct drm_i915_gem_object *client_obj = client->client_obj;
struct drm_i915_private *dev_priv = guc_to_i915(guc);
+   struct drm_i915_gem_object *client_obj = client->client->obj;
struct intel_engine_cs *engine;
struct i915_gem_context *ctx = client->owner;
struct guc_context_desc desc;
@@ -412,7 +412,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 * The doorbell, process descriptor, and workqueue are all parts
 * of the client object, which the GuC will reference via the GGTT
 */
-   gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
+   gfx_addr = client->client->node.start;
desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
client->doorbell_offset;
desc.db_trigger_cpu = (uintptr_t)client->client_base +
@@ -429,7 +429,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
desc.desc_private = (uintptr_t)client;
 
/* Pool context is pinned already */
-   sg = guc->ctx_pool_obj->pages;
+   sg = guc->ctx_pool->obj->pages;
sg_pcopy_from_buffer(sg->sgl, sg->nents, , sizeof(desc),
 sizeof(desc) * client->ctx_index);
 }
@@ -442,7 +442,7 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
 
memset(, 0, sizeof(desc));
 
-   sg = guc->ctx_pool_obj->pages;
+   sg = guc->ctx_pool->obj->pages;
sg_pcopy_from_buffer(sg->sgl, sg->nents, , sizeof(desc),
 sizeof(desc) * client->ctx_index);
 }
@@ -524,7 +524,7 @@ static void guc_add_workqueue_item(struct i915_guc_client 
*gc,
/* WQ starts from the page after doorbell / process_desc */
wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
wq_off &= PAGE_SIZE - 1;
-   base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page));
+   base = kmap_atomic(i915_gem_object_get_page(gc->client->obj, wq_page));
wqi = (struct guc_wq_item *)((char *)base + wq_off);
 
/* Now fill in the 4-word work queue item */
@@ -588,8 +588,8 @@ void i915_guc_submit(struct drm_i915_gem_request *rq)
  */
 
 /**
- * gem_allocate_guc_obj() - Allocate gem object for GuC usage
- * @dev:   drm device
+ * guc_allocate_vma() - Allocate gem object for GuC usage
+ * @guc:   the guc
  * @size:  size of object
  *
  * This is a wrapper to create a gem obj. In order to use it inside GuC, the
@@ -598,46 +598,40 @@ void i915_guc_submit(struct drm_i915_gem_request *rq)
  *
  * Return: A drm_i915_gem_object if successful, otherwise NULL.
  */
-static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
-   u32 size)
+static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
 {
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct 

[Intel-gfx] [PATCH 13/38] drm/i915: Move obj->active:5 to obj->flags

2016-06-03 Thread Chris Wilson
We are motivated to avoid using a bitfield for obj->active for a couple
of reasons. Firstly, we wish to document our lockless read of obj->active
using READ_ONCE inside i915_gem_busy_ioctl() and that requires an
integral type (i.e. not a bitfield). Secondly, gcc produces abysmal code
when presented with a bitfield and that shows up high on the profiles of
request tracking (mainly due to excess memory traffic as it converts
the bitfield to a register and back and generates frequent AGI in the
process).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  2 +-
 drivers/gpu/drm/i915/i915_drv.h| 31 +-
 drivers/gpu/drm/i915/i915_gem.c| 16 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +-
 drivers/gpu/drm/i915/i915_gem_shrinker.c   |  5 +++--
 drivers/gpu/drm/i915/i915_gem_userptr.c|  2 +-
 6 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 355bbf895c22..9154919fdd56 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -91,7 +91,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
 
 static char get_active_flag(struct drm_i915_gem_object *obj)
 {
-   return obj->active ? '*' : ' ';
+   return i915_gem_object_is_active(obj) ? '*' : ' ';
 }
 
 static char get_pin_flag(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 236ade61cade..e72b7f35a98e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2136,12 +2136,16 @@ struct drm_i915_gem_object {
 
struct list_head batch_pool_link;
 
+   unsigned long flags;
/**
 * This is set if the object is on the active lists (has pending
 * rendering and so a non-zero seqno), and is not set if it i s on
 * inactive (ready to be unbound) list.
 */
-   unsigned int active:I915_NUM_ENGINES;
+#define I915_BO_ACTIVE_SHIFT 0
+#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
+#define __I915_BO_ACTIVE(bo) \
+   ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
 
/**
 * This is set if the object has been written to since last bound
@@ -2288,6 +2292,31 @@ i915_gem_object_put_unlocked(struct drm_i915_gem_object 
*obj)
 }
 __deprecated extern void drm_gem_object_unreference_unlocked(struct 
drm_gem_object *);
 
+static inline unsigned long
+i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
+{
+   return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
+}
+
+static inline void
+i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
+{
+   obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT);
+}
+
+static inline void
+i915_gem_object_unset_active(struct drm_i915_gem_object *obj, int engine)
+{
+   obj->flags &= ~(1 << (engine + I915_BO_ACTIVE_SHIFT));
+}
+
+static inline bool
+i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
+ int engine)
+{
+   return obj->flags & (1 << (engine + I915_BO_ACTIVE_SHIFT));
+}
+
 /*
  * Optimised SGL iterator for GEM objects
  */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 05425ae7c8a8..a8279a598c4b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1126,7 +1126,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object 
*obj,
 
lockdep_assert_held(>base.dev->struct_mutex);
 
-   active_mask = obj->active;
+   active_mask = i915_gem_object_is_active(obj);
if (!active_mask)
return 0;
 
@@ -1165,7 +1165,7 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
BUG_ON(!mutex_is_locked(>struct_mutex));
BUG_ON(!dev_priv->mm.interruptible);
 
-   active_mask = obj->active;
+   active_mask = i915_gem_object_is_active(obj);
if (!active_mask)
return 0;
 
@@ -2109,10 +2109,10 @@ i915_gem_object_retire__read(struct i915_gem_active 
*active,
struct drm_i915_gem_object *obj =
container_of(active, struct drm_i915_gem_object, 
last_read[ring]);
 
-   GEM_BUG_ON((obj->active & (1 << ring)) == 0);
+   GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, ring));
 
-   obj->active &= ~(1 << ring);
-   if (obj->active)
+   i915_gem_object_unset_active(obj, ring);
+   if (i915_gem_object_is_active(obj))
return;
 
/* Bump our place on the bound list to keep it roughly in LRU order
@@ -2383,7 +2383,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
return -ENOENT;
}
 
-   if (!obj->active)
+   if (!i915_gem_object_is_active(obj))
goto out;
 

[Intel-gfx] [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size

2016-06-03 Thread Chris Wilson
Our GPUs impose certain requirements upon buffers that depend upon how
exactly they are used. Typically this is expressed as that they require
a larger surface than would be naively computed by pitch * height.
Normally such requirements are hidden away in the userspace driver, but
when we accept pointers from strangers and later impose extra conditions
on them, the original client allocator has no idea about the
monstrosities in the GPU and we require the userspace driver to inform
the kernel how many padding pages are required beyond the client
allocation.

v2: Long time, no see
v3: Try an anonymous union for uapi struct compatability

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.h|  6 ++-
 drivers/gpu/drm/i915/i915_gem.c| 82 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +-
 include/uapi/drm/i915_drm.h|  8 ++-
 4 files changed, 65 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a065325580d8..9520adba33f6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2945,11 +2945,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
+   uint64_t size,
uint32_t alignment,
uint64_t flags);
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 const struct i915_ggtt_view *view,
+uint64_t size,
 uint32_t alignment,
 uint64_t flags);
 
@@ -3209,8 +3211,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_ggtt *ggtt = _priv->ggtt;
 
-   return i915_gem_object_pin(obj, >base,
-  alignment, flags | PIN_GLOBAL);
+   return i915_gem_object_pin(obj, >base, 0, alignment,
+  flags | PIN_GLOBAL);
 }
 
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 19b8d2ea7698..0f0101300b2b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1438,7 +1438,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct 
vm_fault *vmf)
}
 
/* Now pin it into the GTT if needed */
-   ret = i915_gem_object_ggtt_pin(obj, , 0, PIN_MAPPABLE);
+   ret = i915_gem_object_ggtt_pin(obj, , 0, 0, PIN_MAPPABLE);
if (ret)
goto unlock;
 
@@ -2678,21 +2678,20 @@ static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
   struct i915_address_space *vm,
   const struct i915_ggtt_view *ggtt_view,
+  uint64_t size,
   unsigned alignment,
   uint64_t flags)
 {
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
-   struct i915_ggtt *ggtt = _priv->ggtt;
-   u32 fence_alignment, unfenced_alignment;
-   u32 search_flag, alloc_flag;
u64 start, end;
-   u64 size, fence_size;
+   u32 search_flag, alloc_flag;
struct i915_vma *vma;
int ret;
 
if (i915_is_ggtt(vm)) {
-   u32 view_size;
+   u32 fence_size, fence_alignment, unfenced_alignment;
+   u64 view_size;
 
if (WARN_ON(!ggtt_view))
return ERR_PTR(-EINVAL);
@@ -2710,48 +2709,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object 
*obj,
view_size,

obj->tiling_mode,
false);
-   size = flags & PIN_MAPPABLE ? fence_size : view_size;
+   size = max(size, view_size);
+   if (flags & PIN_MAPPABLE)
+   size = max_t(u64, size, fence_size);
+
+   if (alignment == 0)
+   alignment = flags & PIN_MAPPABLE ? fence_alignment :
+   unfenced_alignment;
+   if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
+   DRM_DEBUG("Invalid object (view type=%u) alignment 
requested %u\n",
+ ggtt_view ? ggtt_view->type : 0,
+ alignment);
+   return ERR_PTR(-EINVAL);
+   }
} else {
-   

[Intel-gfx] [PATCH 11/38] drm/i915: Make fb_tracking.lock a spinlock

2016-06-03 Thread Chris Wilson
We only need a very lightweight mechanism here as the locking is only
used for co-ordinating a bitfield.

Also double check that the object is still pinned to the display plane
before processing the state change.

v2: Move the cheap unlikely tests into the caller

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-
 drivers/gpu/drm/i915/i915_gem.c  |  2 +-
 drivers/gpu/drm/i915/intel_drv.h | 29 ++---
 drivers/gpu/drm/i915/intel_frontbuffer.c | 54 ++--
 4 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 861d132b2fe4..59846de3b33d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1650,7 +1650,7 @@ struct intel_pipe_crc {
 };
 
 struct i915_frontbuffer_tracking {
-   struct mutex lock;
+   spinlock_t lock;
 
/*
 * Tracking bits for delayed frontbuffer flushing du to gpu activity or
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 53776a071ce7..522f379c8d44 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4180,7 +4180,7 @@ i915_gem_load_init(struct drm_device *dev)
 
dev_priv->mm.interruptible = true;
 
-   mutex_init(_priv->fb_tracking.lock);
+   spin_lock_init(_priv->fb_tracking.lock);
 }
 
 void i915_gem_load_cleanup(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a29618dc7e98..9410767c97da 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1106,8 +1106,6 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc 
*crtc, bool state);
 uint32_t ddi_signal_levels(struct intel_dp *intel_dp);
 
 /* intel_frontbuffer.c */
-void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-enum fb_op_origin origin);
 void intel_frontbuffer_flip_prepare(struct drm_device *dev,
unsigned frontbuffer_bits);
 void intel_frontbuffer_flip_complete(struct drm_device *dev,
@@ -1118,8 +1116,31 @@ unsigned int intel_fb_align_height(struct drm_device 
*dev,
   unsigned int height,
   uint32_t pixel_format,
   uint64_t fb_format_modifier);
-void intel_fb_obj_flush(struct drm_i915_gem_object *obj, bool retire,
-   enum fb_op_origin origin);
+
+void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
+  enum fb_op_origin origin);
+static inline void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
+  enum fb_op_origin origin)
+{
+   if (!obj->frontbuffer_bits || !obj->pin_display)
+   return;
+
+   __intel_fb_obj_invalidate(obj, origin);
+}
+
+void __intel_fb_obj_flush(struct drm_i915_gem_object *obj,
+ bool retire,
+ enum fb_op_origin origin);
+static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj,
+ bool retire,
+ enum fb_op_origin origin)
+{
+   if (!obj->frontbuffer_bits || !obj->pin_display)
+   return;
+
+   __intel_fb_obj_flush(obj, retire, origin);
+}
+
 u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv,
  uint64_t fb_modifier, uint32_t pixel_format);
 
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c 
b/drivers/gpu/drm/i915/intel_frontbuffer.c
index ac85357010b4..a38ccfe4894a 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -76,24 +76,19 @@
  * until the rendering completes or a flip on this frontbuffer plane is
  * scheduled.
  */
-void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-enum fb_op_origin origin)
+void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
+  enum fb_op_origin origin)
 {
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
 
WARN_ON(!mutex_is_locked(>struct_mutex));
 
-   if (!obj->frontbuffer_bits)
-   return;
-
if (origin == ORIGIN_CS) {
-   mutex_lock(_priv->fb_tracking.lock);
-   dev_priv->fb_tracking.busy_bits
-   |= obj->frontbuffer_bits;
-   dev_priv->fb_tracking.flip_bits
-   &= ~obj->frontbuffer_bits;
-   mutex_unlock(_priv->fb_tracking.lock);
+   spin_lock(_priv->fb_tracking.lock);
+   dev_priv->fb_tracking.busy_bits |= obj->frontbuffer_bits;
+   dev_priv->fb_tracking.flip_bits &= ~obj->frontbuffer_bits;
+   

[Intel-gfx] [PATCH 07/38] drm/i915: Split insertion/binding of an object into the VM

2016-06-03 Thread Chris Wilson
Split the insertion into the address space's range manager and binding
of that object into the GTT to simplify the code flow when pinning a
VMA.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 33 +++--
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0f0101300b2b..cd54c290680d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2675,12 +2675,12 @@ static bool i915_gem_valid_gtt_space(struct i915_vma 
*vma,
  * there.
  */
 static struct i915_vma *
-i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
-  struct i915_address_space *vm,
-  const struct i915_ggtt_view *ggtt_view,
-  uint64_t size,
-  unsigned alignment,
-  uint64_t flags)
+i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj,
+  struct i915_address_space *vm,
+  const struct i915_ggtt_view *ggtt_view,
+  uint64_t size,
+  unsigned alignment,
+  uint64_t flags)
 {
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = to_i915(dev);
@@ -2809,11 +2809,6 @@ search_free:
goto err_remove_node;
}
 
-   trace_i915_vma_bind(vma, flags);
-   ret = i915_vma_bind(vma, obj->cache_level, flags);
-   if (ret)
-   goto err_remove_node;
-
list_move_tail(>global_list, _priv->mm.bound_list);
list_move_tail(>vm_link, >inactive_list);
obj->bind_count++;
@@ -3484,24 +3479,26 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
}
}
 
-   bound = vma ? vma->bound : 0;
if (vma == NULL || !drm_mm_node_allocated(>node)) {
-   vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
-size, alignment, flags);
+   vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view,
+size, alignment, flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
-   } else {
-   ret = i915_vma_bind(vma, obj->cache_level, flags);
-   if (ret)
-   return ret;
}
 
+   bound = vma->bound;
+   ret = i915_vma_bind(vma, obj->cache_level, flags);
+   if (ret)
+   return ret;
+
if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
(bound ^ vma->bound) & GLOBAL_BIND) {
__i915_vma_set_map_and_fenceable(vma);
WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
}
 
+   GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
+
vma->pin_count++;
return 0;
 }
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 05/38] drm/i915: Remove i915_gem_execbuffer_retire_commands()

2016-06-03 Thread Chris Wilson
Move the single line to the callsite as the name is now misleading, and
the purpose is solely to add the request to the execution queue.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1b19a36adedc..40937a09855d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1177,13 +1177,6 @@ i915_gem_execbuffer_move_to_active(struct list_head 
*vmas,
}
 }
 
-static void
-i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
-{
-   /* Add a breadcrumb for the completion of the batch buffer */
-   __i915_add_request(params->request, params->batch_obj, true);
-}
-
 static int
 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 {
@@ -1677,7 +1670,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
ret = execbuf_submit(params, args, >vmas);
 err_request:
-   i915_gem_execbuffer_retire_commands(params);
+   __i915_add_request(params->request, params->batch_obj, ret == 0);
 
 err_batch_unpin:
/*
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 09/38] drm/i915: Start passing around i915_vma from execbuffer

2016-06-03 Thread Chris Wilson
During execbuffer we look up the i915_vma in order to reserver them in
the VM. However, we then do a double lookup of the vma in order to then
pin them, all because we lack the necessary interfaces to operate on
i915_vma.

v2: Tidy parameter lists to remove one level of redirection in the hot
path.

Signed-off-by: Chris Wilson 
Cc: Mika Kuoppala 
---
 drivers/gpu/drm/i915/i915_debugfs.c|   2 +-
 drivers/gpu/drm/i915/i915_drv.h|  47 +---
 drivers/gpu/drm/i915/i915_gem.c| 178 -
 drivers/gpu/drm/i915/i915_gem_evict.c  |  12 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 131 ++---
 drivers/gpu/drm/i915/i915_gem_gtt.c|   7 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |   4 +-
 7 files changed, 174 insertions(+), 207 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 99857ee0bb8b..f4745e0c8d5c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -168,7 +168,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
if (obj->base.name)
seq_printf(m, " (name: %d)", obj->base.name);
list_for_each_entry(vma, >vma_list, obj_link) {
-   if (vma->pin_count > 0)
+   if (i915_vma_is_pinned(vma))
pin_count++;
}
seq_printf(m, " (pinned x %d)", pin_count);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fe7e87e8cf9a..f537d8fc5e0f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2931,6 +2931,8 @@ struct drm_i915_gem_object 
*i915_gem_object_create_from_data(
 void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
 void i915_gem_free_object(struct drm_gem_object *obj);
 
+int __must_check
+i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags);
 /* Flags used by pin/bind */
 #define PIN_MAPPABLE   (1<<0)
 #define PIN_NONBLOCK   (1<<1)
@@ -2942,12 +2944,30 @@ void i915_gem_free_object(struct drm_gem_object *obj);
 #define PIN_HIGH   (1<<7)
 #define PIN_OFFSET_FIXED   (1<<8)
 #define PIN_OFFSET_MASK (~4095)
-int __must_check
-i915_gem_object_pin(struct drm_i915_gem_object *obj,
-   struct i915_address_space *vm,
-   uint64_t size,
-   uint32_t alignment,
-   uint64_t flags);
+
+static inline void __i915_vma_pin(struct i915_vma *vma)
+{
+   GEM_BUG_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
+   vma->pin_count++;
+}
+
+static inline bool i915_vma_is_pinned(struct i915_vma *vma)
+{
+   return vma->pin_count;
+}
+
+static inline void __i915_vma_unpin(struct i915_vma *vma)
+{
+   GEM_BUG_ON(!i915_vma_is_pinned(vma));
+   vma->pin_count--;
+}
+
+static inline void i915_vma_unpin(struct i915_vma *vma)
+{
+   GEM_BUG_ON(!drm_mm_node_allocated(>node));
+   __i915_vma_unpin(vma);
+}
+
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 const struct i915_ggtt_view *view,
@@ -3208,11 +3228,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
  uint32_t alignment,
  unsigned flags)
 {
-   struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-   struct i915_ggtt *ggtt = _priv->ggtt;
-
-   return i915_gem_object_pin(obj, >base, 0, alignment,
-  flags | PIN_GLOBAL);
+   return i915_gem_object_ggtt_pin(obj, _ggtt_view_normal,
+   0, alignment, flags);
 }
 
 void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
@@ -3293,11 +3310,11 @@ int i915_gem_context_reset_stats_ioctl(struct 
drm_device *dev, void *data,
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
- int min_size,
- unsigned alignment,
+ u64 min_size,
+ u64 alignment,
  unsigned cache_level,
- unsigned long start,
- unsigned long end,
+ u64 start,
+ u64 end,
  unsigned flags);
 int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
 int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7340fc830d9a..71a32a9f9858 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -130,10 +130,10 @@ i915_gem_get_aperture_ioctl(struct drm_device 

[Intel-gfx] [PATCH 04/38] drm/i915: Remove request retirement before each batch

2016-06-03 Thread Chris Wilson
This reimplements the denial-of-service protection against igt from

commit 227f782e4667fc622810bce8be8ccdeee45f89c2
Author: Chris Wilson 
Date:   Thu May 15 10:41:42 2014 +0100

drm/i915: Retire requests before creating a new one

and transfers the stall from before each batch into get_pages().
The issue is that the stall is increasing latency between batches which
is detrimental in some cases (especially coupled with execlists) to
keeping the GPU well fed. Also we have made the observation that retiring
requests can of itself free objects (and requests) and therefore makes
a good first step when shrinking.

v2: Recycle objects prior to i915_gem_object_get_pages()

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h| 1 -
 drivers/gpu/drm/i915/i915_gem.c| 9 ++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 --
 drivers/gpu/drm/i915/i915_gem_request.c| 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 09f6f0eecd96..a065325580d8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3065,7 +3065,6 @@ struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *engine);
 
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
-void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
 
 static inline u32 i915_reset_counter(struct i915_gpu_error *error)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a7aa465cb76d..19b8d2ea7698 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1989,8 +1989,7 @@ err_pages:
 int
 i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
-   struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-   const struct drm_i915_gem_object_ops *ops = obj->ops;
+   struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
int ret;
 
if (obj->pages)
@@ -2003,7 +2002,10 @@ i915_gem_object_get_pages(struct drm_i915_gem_object 
*obj)
 
BUG_ON(obj->pages_pin_count);
 
-   ret = ops->get_pages(obj);
+   /* Recycle as many active objects as possible first */
+   i915_gem_retire_requests(dev_priv);
+
+   ret = obj->ops->get_pages(obj);
if (ret)
return ret;
 
@@ -4161,6 +4163,7 @@ i915_gem_cleanup_engines(struct drm_device *dev)
 static void
 init_engine_lists(struct intel_engine_cs *engine)
 {
+   /* Early initialisation so that core GEM works during engine setup */
INIT_LIST_HEAD(>request_list);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7b381358512e..1b19a36adedc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -751,8 +751,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
int retry;
 
-   i915_gem_retire_requests_ring(engine);
-
vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
INIT_LIST_HEAD(_vmas);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 38e5daecd8f5..59afc8e547c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -734,7 +734,7 @@ int i915_wait_request(struct drm_i915_gem_request *req)
return 0;
 }
 
-void i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
+static void i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
 {
struct drm_i915_gem_request *request, *next;
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 01/38] drm/i915: Combine loops within i915_gem_evict_something

2016-06-03 Thread Chris Wilson
Slight micro-optimise to produce combine loops so that gcc is able to
optimise the inner-loops concisely. Since we are reviewing the loops, we
can update the comments to describe the current state of affairs, in
particular the distinction between evicting from the global GTT (which
may contain untracked items and transient global pins) and the
per-process GTT.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_evict.c | 139 +-
 1 file changed, 69 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index 2a9adc802e85..677297bf970e 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -63,6 +63,18 @@ static int switch_to_pinned_context(struct drm_i915_private 
*dev_priv)
return 0;
 }
 
+static bool
+gpu_is_idle(struct drm_i915_private *dev_priv)
+{
+   struct intel_engine_cs *engine;
+
+   for_each_engine(engine, dev_priv) {
+   if (!list_empty(>request_list))
+   return false;
+   }
+
+   return true;
+}
 
 static bool
 mark_free(struct i915_vma *vma, struct list_head *unwind)
@@ -107,37 +119,32 @@ i915_gem_evict_something(struct drm_device *dev, struct 
i915_address_space *vm,
 unsigned long start, unsigned long end,
 unsigned flags)
 {
-   struct list_head eviction_list, unwind_list;
-   struct i915_vma *vma;
-   int ret = 0;
-   int pass = 0;
+   struct drm_i915_private *dev_priv = to_i915(dev);
+   struct list_head eviction_list;
+   struct list_head *phases[] = {
+   >inactive_list,
+   >active_list,
+   NULL,
+   }, **phase;
+   struct i915_vma *vma, *next;
+   int ret;
 
trace_i915_gem_evict(dev, min_size, alignment, flags);
 
/*
 * The goal is to evict objects and amalgamate space in LRU order.
 * The oldest idle objects reside on the inactive list, which is in
-* retirement order. The next objects to retire are those on the (per
-* ring) active list that do not have an outstanding flush. Once the
-* hardware reports completion (the seqno is updated after the
-* batchbuffer has been finished) the clean buffer objects would
-* be retired to the inactive list. Any dirty objects would be added
-* to the tail of the flushing list. So after processing the clean
-* active objects we need to emit a MI_FLUSH to retire the flushing
-* list, hence the retirement order of the flushing list is in
-* advance of the dirty objects on the active lists.
+* retirement order. The next objects to retire are those in flight,
+* on the active list, again in retirement order.
 *
 * The retirement sequence is thus:
 *   1. Inactive objects (already retired)
-*   2. Clean active objects
-*   3. Flushing list
-*   4. Dirty active objects.
+*   2. Active objects (will stall on unbinding)
 *
 * On each list, the oldest objects lie at the HEAD with the freshest
 * object on the TAIL.
 */
-
-   INIT_LIST_HEAD(_list);
+   INIT_LIST_HEAD(_list);
if (start != 0 || end != vm->total) {
drm_mm_init_scan_with_range(>mm, min_size,
alignment, cache_level,
@@ -145,26 +152,20 @@ i915_gem_evict_something(struct drm_device *dev, struct 
i915_address_space *vm,
} else
drm_mm_init_scan(>mm, min_size, alignment, cache_level);
 
-search_again:
-   /* First see if there is a large enough contiguous idle region... */
-   list_for_each_entry(vma, >inactive_list, vm_link) {
-   if (mark_free(vma, _list))
-   goto found;
-   }
-
if (flags & PIN_NONBLOCK)
-   goto none;
+   phases[1] = NULL;
 
-   /* Now merge in the soon-to-be-expired objects... */
-   list_for_each_entry(vma, >active_list, vm_link) {
-   if (mark_free(vma, _list))
-   goto found;
-   }
+search_again:
+   phase = phases;
+   do {
+   list_for_each_entry(vma, *phase, vm_link)
+   if (mark_free(vma, _list))
+   goto found;
+   } while (*++phase);
 
-none:
/* Nothing found, clean up and bail out! */
-   while (!list_empty(_list)) {
-   vma = list_first_entry(_list,
+   while (!list_empty(_list)) {
+   vma = list_first_entry(_list,
   struct i915_vma,
   exec_list);
ret = drm_mm_scan_remove_block(>node);
@@ -174,50 +175,50 @@ none:
}
 
/* Can we unpin some objects such as idle hw contents,
-

[Intel-gfx] [PATCH 03/38] drm/i915: Double check the active status on the batch pool

2016-06-03 Thread Chris Wilson
We should not rely on obj->active being uptodate unless we manually
flush it. Instead, we can verify that the next available batch object is
idle by looking at its last active request (and checking it for
completion).

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 15 ---
 drivers/gpu/drm/i915/i915_gem_batch_pool.h |  7 +--
 drivers/gpu/drm/i915/intel_lrc.c   |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c|  2 +-
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 3507b2753fd3..bd646e259012 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -41,15 +41,15 @@
 
 /**
  * i915_gem_batch_pool_init() - initialize a batch buffer pool
- * @dev: the drm device
+ * @engine: the associated request submission engine
  * @pool: the batch buffer pool
  */
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
  struct i915_gem_batch_pool *pool)
 {
int n;
 
-   pool->dev = dev;
+   pool->engine = engine;
 
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
INIT_LIST_HEAD(>cache_list[n]);
@@ -65,7 +65,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool 
*pool)
 {
int n;
 
-   WARN_ON(!mutex_is_locked(>dev->struct_mutex));
+   lockdep_assert_held(>engine->i915->dev->struct_mutex);
 
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
while (!list_empty(>cache_list[n])) {
@@ -102,7 +102,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
struct list_head *list;
int n;
 
-   WARN_ON(!mutex_is_locked(>dev->struct_mutex));
+   lockdep_assert_held(>engine->i915->dev->struct_mutex);
 
/* Compute a power-of-two bucket, but throw everything greater than
 * 16KiB into the same bucket: i.e. the the buckets hold objects of
@@ -115,7 +115,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 
list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
/* The batches are strictly LRU ordered */
-   if (tmp->active)
+   if (!i915_gem_active_is_idle(>last_read[pool->engine->id],
+ >base.dev->struct_mutex))
break;
 
/* While we're looping, do some clean up */
@@ -134,7 +135,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
if (obj == NULL) {
int ret;
 
-   obj = i915_gem_object_create(pool->dev, size);
+   obj = i915_gem_object_create(pool->engine->i915->dev, size);
if (IS_ERR(obj))
return obj;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h 
b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 848e90703eed..7fd4df0a29fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -27,13 +27,16 @@
 
 #include "i915_drv.h"
 
+struct drm_device;
+struct intel_engine_cs;
+
 struct i915_gem_batch_pool {
-   struct drm_device *dev;
+   struct intel_engine_cs *engine;
struct list_head cache_list[4];
 };
 
 /* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
  struct i915_gem_batch_pool *pool);
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
 struct drm_i915_gem_object*
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 69fca2f27f8b..964108cbb9c0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1798,7 +1798,7 @@ logical_ring_setup(struct drm_device *dev, enum 
intel_engine_id id)
logical_ring_default_irqs(engine, info->irq_shift);
 
intel_engine_init_hangcheck(engine);
-   i915_gem_batch_pool_init(dev, >batch_pool);
+   i915_gem_batch_pool_init(engine, >batch_pool);
 
return engine;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 33d2c019576e..d63e4fdc60de 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2060,7 +2060,7 @@ static int intel_init_engine(struct drm_device *dev,
INIT_LIST_HEAD(>request_list);
INIT_LIST_HEAD(>execlist_queue);
INIT_LIST_HEAD(>buffers);
-   i915_gem_batch_pool_init(dev, >batch_pool);
+   i915_gem_batch_pool_init(engine, >batch_pool);
memset(engine->semaphore.sync_seqno, 0,
   sizeof(engine->semaphore.sync_seqno));
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org

[Intel-gfx] Tracking VMA

2016-06-03 Thread Chris Wilson
One issue with the current VMA api is that callers do not take ownership
of the VMA they pin for their use, and corresponding never explicitly
unpin it. Being able to track the VMA they are using, imo, allows for
simpler code that is more easily verified (and is faster and more
accurate - less guessing over state).

However, at the start are patches to take advantage of lockless request
lookups.
-Chris

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 33/62] drm/i915: Remove obsolete engine->gpu_caches_dirty

2016-06-03 Thread Chris Wilson
Space for flushing the GPU cache prior to completing the request is
preallocated and so cannot fail.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c|  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  9 +---
 drivers/gpu/drm/i915/i915_gem_gtt.c| 18 
 drivers/gpu/drm/i915/i915_gem_request.c|  7 ++-
 drivers/gpu/drm/i915/intel_lrc.c   | 47 +++
 drivers/gpu/drm/i915/intel_lrc.h   |  2 -
 drivers/gpu/drm/i915/intel_ringbuffer.c| 72 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.h|  7 ---
 8 files changed, 39 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 13b934ab4a8a..9eb6ab9cb610 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -529,7 +529,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 * itlb_before_ctx_switch.
 */
if (IS_GEN6(dev_priv)) {
-   ret = req->engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
+   ret = req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 186e466f932f..6e439f5d1674 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -968,10 +968,8 @@ i915_gem_execbuffer_move_to_gpu(struct 
drm_i915_gem_request *req,
if (flush_domains & I915_GEM_DOMAIN_GTT)
wmb();
 
-   /* Unconditionally invalidate gpu caches and ensure that we do flush
-* any residual writes from the previous batch.
-*/
-   return intel_engine_invalidate_all_caches(req);
+   /* Unconditionally invalidate gpu caches and TLBs. */
+   return req->engine->emit_flush(req, I915_GEM_GPU_DOMAINS, 0);
 }
 
 static bool
@@ -1130,9 +1128,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 static void
 i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
 {
-   /* Unconditionally force add_request to emit a full flush. */
-   params->engine->gpu_caches_dirty = true;
-
/* Add a breadcrumb for the completion of the batch buffer */
__i915_add_request(params->request, params->batch_obj, true);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6a6e69a3894f..5d718c488f23 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1664,9 +1664,9 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
-   ret = req->engine->flush(req,
-I915_GEM_GPU_DOMAINS,
-I915_GEM_GPU_DOMAINS);
+   ret = req->engine->emit_flush(req,
+ I915_GEM_GPU_DOMAINS,
+ I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
 
@@ -1703,9 +1703,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
-   ret = req->engine->flush(req,
-I915_GEM_GPU_DOMAINS,
-I915_GEM_GPU_DOMAINS);
+   ret = req->engine->emit_flush(req,
+ I915_GEM_GPU_DOMAINS,
+ I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
 
@@ -1723,9 +1723,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 
/* XXX: RCS is the only one to auto invalidate the TLBs? */
if (req->engine->id != RCS) {
-   ret = req->engine->flush(req,
-I915_GEM_GPU_DOMAINS,
-I915_GEM_GPU_DOMAINS);
+   ret = req->engine->emit_flush(req,
+ I915_GEM_GPU_DOMAINS,
+ I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 58d84b153810..b0c6e57197bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -443,10 +443,9 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 * what.
 */
if (flush_caches) {
-   if (i915.enable_execlists)
-   ret = logical_ring_flush_all_caches(request);
-   else
-   ret = intel_engine_flush_all_caches(request);
+   ret = 

[Intel-gfx] [PATCH 04/62] drm/i915: Restore waitboost credit to the synchronous waiter

2016-06-03 Thread Chris Wilson
Ideally, we want to automagically have the GPU respond to the
instantaneous load by reclocking itself. However, reclocking occurs
relatively slowly, and to the client waiting for a result from the GPU,
too late. To compensate and reduce the client latency, we allow the
first wait from a client to boost the GPU clocks to maximum. This
overcomes the lag in autoreclocking, at the expense of forcing the GPU
clocks too high. So to offset the excessive power usage, we currently
allow a client to only boost the clocks once before we detect the GPU
is idle again. This works reasonably for say the first frame in a
benchmark, but for many more synchronous workloads (like OpenCL) we find
the GPU clocks remain too low. By noting a wait which would idle the GPU
(i.e. we just waited upon the last known request), we can give that
client the idle boost credit (for their next wait) without the 100ms
delay required for us to detect the GPU idle state. The intention is to
boost clients that are stalling in the process of feeding the GPU more
work (and who in doing so let the GPU idle), without granting boost
credits to clients that are throttling themselves (such as compositors).

Signed-off-by: Chris Wilson 
Cc: "Zou, Nanhai" 
Cc: Jesse Barnes 
Reviewed-by: Jesse Barnes 
---
 drivers/gpu/drm/i915/i915_gem.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index da44715c894f..bec02baef190 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1310,6 +1310,22 @@ complete:
*timeout = 0;
}
 
+   if (rps && req->seqno == req->engine->last_submitted_seqno) {
+   /* The GPU is now idle and this client has stalled.
+* Since no other client has submitted a request in the
+* meantime, assume that this client is the only one
+* supplying work to the GPU but is unable to keep that
+* work supplied because it is waiting. Since the GPU is
+* then never kept fully busy, RPS autoclocking will
+* keep the clocks relatively low, causing further delays.
+* Compensate by giving the synchronous client credit for
+* a waitboost next time.
+*/
+   spin_lock(>i915->rps.client_lock);
+   list_del_init(>link);
+   spin_unlock(>i915->rps.client_lock);
+   }
+
return ret;
 }
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 22/62] drm/i915: Treat ringbuffer writes as write to normal memory

2016-06-03 Thread Chris Wilson
Ringbuffers are now being written to either through LLC or WC paths, so
treating them as simply iomem is no longer adequate. However, for the
older !llc hardware, the hardware is documentated as treating the TAIL
register update as serialising, so we can relax the barriers when filling
the rings (but even if it were not, it is still an uncached register write
and so serialising anyway.).

For simplicity, let's ignore the iomem annotation.

v2: Remove iomem from ringbuffer->virtual_address

Signed-off-by: Chris Wilson 
Reviewed-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/intel_lrc.h|  6 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.h | 22 ++
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a8db42a9c50f..e99848067fb8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -73,8 +73,9 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request 
*req);
  */
 static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
 {
-   ringbuf->tail &= ringbuf->size - 1;
+   __intel_ringbuffer_advance(ringbuf);
 }
+
 /**
  * intel_logical_ring_emit() - write a DWORD to the ringbuffer.
  * @ringbuf: Ringbuffer to write to.
@@ -83,8 +84,7 @@ static inline void intel_logical_ring_advance(struct 
intel_ringbuffer *ringbuf)
 static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
   u32 data)
 {
-   iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
-   ringbuf->tail += 4;
+   __intel_ringbuffer_emit(ringbuf, data);
 }
 static inline void intel_logical_ring_emit_reg(struct intel_ringbuffer 
*ringbuf,
   i915_reg_t reg)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b041fb6a6d01..5db7db069566 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -96,7 +96,7 @@ struct intel_ring_hangcheck {
 
 struct intel_ringbuffer {
struct drm_i915_gem_object *obj;
-   void __iomem *virtual_start;
+   void *virtual_start;
struct i915_vma *vma;
 
struct intel_engine_cs *engine;
@@ -462,12 +462,19 @@ int intel_ring_alloc_request_extras(struct 
drm_i915_gem_request *request);
 
 int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
 int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
-static inline void intel_ring_emit(struct intel_engine_cs *engine,
-  u32 data)
+static inline void __intel_ringbuffer_emit(struct intel_ringbuffer *rb,
+  u32 data)
 {
-   struct intel_ringbuffer *ringbuf = engine->buffer;
-   iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
-   ringbuf->tail += 4;
+   *(uint32_t *)(rb->virtual_start + rb->tail) = data;
+   rb->tail += 4;
+}
+static inline void __intel_ringbuffer_advance(struct intel_ringbuffer *rb)
+{
+   rb->tail &= rb->size - 1;
+}
+static inline void intel_ring_emit(struct intel_engine_cs *engine, u32 data)
+{
+   __intel_ringbuffer_emit(engine->buffer, data);
 }
 static inline void intel_ring_emit_reg(struct intel_engine_cs *engine,
   i915_reg_t reg)
@@ -476,8 +483,7 @@ static inline void intel_ring_emit_reg(struct 
intel_engine_cs *engine,
 }
 static inline void intel_ring_advance(struct intel_engine_cs *engine)
 {
-   struct intel_ringbuffer *ringbuf = engine->buffer;
-   ringbuf->tail &= ringbuf->size - 1;
+   __intel_ringbuffer_advance(engine->buffer);
 }
 int __intel_ring_space(int head, int tail, int size);
 void intel_ring_update_space(struct intel_ringbuffer *ringbuf);
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 43/62] drm/i915: Introduce i915_gem_active for request tracking

2016-06-03 Thread Chris Wilson
In the next patch, request tracking is made more generic and for that we
need a new expanded struct and to separate out the logic changes from
the mechanical churn, we split out the structure renaming into this
patch.

v2: Writer's block. Add some spiel about why we track requests.
v3: Now i915_gem_active.
v4: Now with i915_gem_active_set() for attaching to the active request.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c| 10 +++---
 drivers/gpu/drm/i915/i915_drv.h|  9 +++--
 drivers/gpu/drm/i915/i915_gem.c| 58 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 +--
 drivers/gpu/drm/i915/i915_gem_fence.c  |  6 ++--
 drivers/gpu/drm/i915/i915_gem_request.h| 41 +
 drivers/gpu/drm/i915/i915_gem_tiling.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c|  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |  6 ++--
 drivers/gpu/drm/i915/intel_display.c   |  8 ++---
 10 files changed, 93 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 48c8f74e6256..2edbf9e95e7f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -155,10 +155,10 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
   obj->base.write_domain);
for_each_engine_id(engine, dev_priv, id)
seq_printf(m, "%x ",
-   
i915_gem_request_get_seqno(obj->last_read_req[id]));
+  
i915_gem_request_get_seqno(obj->last_read[id].request));
seq_printf(m, "] %x %x%s%s%s",
-  i915_gem_request_get_seqno(obj->last_write_req),
-  i915_gem_request_get_seqno(obj->last_fenced_req),
+  i915_gem_request_get_seqno(obj->last_write.request),
+  i915_gem_request_get_seqno(obj->last_fence.request),
   i915_cache_level_str(to_i915(obj->base.dev), 
obj->cache_level),
   obj->dirty ? " dirty" : "",
   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -192,8 +192,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
*t = '\0';
seq_printf(m, " (%s mappable)", s);
}
-   if (obj->last_write_req != NULL)
-   seq_printf(m, " (%s)", obj->last_write_req->engine->name);
+   if (obj->last_write.request != NULL)
+   seq_printf(m, " (%s)", obj->last_write.request->engine->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e9b48808deef..b8df48e0e32b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2220,11 +2220,10 @@ struct drm_i915_gem_object {
 * requests on one ring where the write request is older than the
 * read request. This allows for the CPU to read from an active
 * buffer by only waiting for the write to complete.
-* */
-   struct drm_i915_gem_request *last_read_req[I915_NUM_ENGINES];
-   struct drm_i915_gem_request *last_write_req;
-   /** Breadcrumb of last fenced GPU access to the buffer. */
-   struct drm_i915_gem_request *last_fenced_req;
+*/
+   struct i915_gem_active last_read[I915_NUM_ENGINES];
+   struct i915_gem_active last_write;
+   struct i915_gem_active last_fence;
 
/** Current tiling stride for the object, if it's tiled. */
uint32_t stride;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b75185273b0e..8c3b39a8e974 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1119,23 +1119,23 @@ i915_gem_object_wait_rendering(struct 
drm_i915_gem_object *obj,
return 0;
 
if (readonly) {
-   if (obj->last_write_req != NULL) {
-   ret = i915_wait_request(obj->last_write_req);
+   if (obj->last_write.request != NULL) {
+   ret = i915_wait_request(obj->last_write.request);
if (ret)
return ret;
 
-   i = obj->last_write_req->engine->id;
-   if (obj->last_read_req[i] == obj->last_write_req)
+   i = obj->last_write.request->engine->id;
+   if (obj->last_read[i].request == 
obj->last_write.request)
i915_gem_object_retire__read(obj, i);
else
i915_gem_object_retire__write(obj);
}
} else {
for (i = 0; i < I915_NUM_ENGINES; i++) {
-   if (obj->last_read_req[i] == NULL)
+   if 

[Intel-gfx] [PATCH 35/62] drm/i915: Unify legacy/execlists emission of MI_BATCHBUFFER_START

2016-06-03 Thread Chris Wilson
Both the ->dispatch_execbuffer and ->emit_bb_start callbacks do exactly
the same thing, add MI_BATCHBUFFER_START to the request's ringbuffer -
we need only one vfunc.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  6 +--
 drivers/gpu/drm/i915/i915_gem_render_state.c | 16 +++
 drivers/gpu/drm/i915/intel_lrc.c |  9 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 67 +---
 drivers/gpu/drm/i915/intel_ringbuffer.h  | 12 +++--
 5 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8751a21cb62a..49dda93ba63c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1293,9 +1293,9 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
if (exec_len == 0)
exec_len = params->batch_obj->base.size;
 
-   ret = params->engine->dispatch_execbuffer(params->request,
- exec_start, exec_len,
- params->dispatch_flags);
+   ret = params->engine->emit_bb_start(params->request,
+   exec_start, exec_len,
+   params->dispatch_flags);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 41eb9a91bfee..6aedb913f694 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -206,18 +206,18 @@ int i915_gem_render_state_init(struct 
drm_i915_gem_request *req)
if (so.rodata == NULL)
return 0;
 
-   ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset,
-  so.rodata->batch_items * 4,
-  I915_DISPATCH_SECURE);
+   ret = req->engine->emit_bb_start(req, so.ggtt_offset,
+so.rodata->batch_items * 4,
+I915_DISPATCH_SECURE);
if (ret)
goto out;
 
if (so.aux_batch_size > 8) {
-   ret = req->engine->dispatch_execbuffer(req,
-  (so.ggtt_offset +
-   so.aux_batch_offset),
-  so.aux_batch_size,
-  I915_DISPATCH_SECURE);
+   ret = req->engine->emit_bb_start(req,
+(so.ggtt_offset +
+ so.aux_batch_offset),
+so.aux_batch_size,
+I915_DISPATCH_SECURE);
if (ret)
goto out;
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3fa2bc5297c1..71960e47277c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -843,7 +843,9 @@ int intel_execlists_submission(struct 
i915_execbuffer_params *params,
exec_start = params->batch_obj_vm_offset +
 args->batch_start_offset;
 
-   ret = engine->emit_bb_start(params->request, exec_start, 
params->dispatch_flags);
+   ret = engine->emit_bb_start(params->request,
+   exec_start, args->batch_len,
+   params->dispatch_flags);
if (ret)
return ret;
 
@@ -1495,7 +1497,8 @@ static int intel_logical_ring_emit_pdps(struct 
drm_i915_gem_request *req)
 }
 
 static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
- u64 offset, unsigned dispatch_flags)
+ u64 offset, u32 len,
+ unsigned dispatch_flags)
 {
struct intel_ring *ring = req->ring;
bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
@@ -1739,12 +1742,14 @@ static int intel_lr_context_render_state_init(struct 
drm_i915_gem_request *req)
return 0;
 
ret = req->engine->emit_bb_start(req, so.ggtt_offset,
+so.rodata->batch_items * 4,
 I915_DISPATCH_SECURE);
if (ret)
goto out;
 
ret = req->engine->emit_bb_start(req,
 (so.ggtt_offset + so.aux_batch_offset),
+so.aux_batch_size,
 I915_DISPATCH_SECURE);
if (ret)
goto out;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 

[Intel-gfx] [PATCH 07/62] drm/i915: Remove temporary RPM wakeref assert disables

2016-06-03 Thread Chris Wilson
Now that the last couple of hacks have been removed from the runtime
powermanagement users, we can fully enable the asserts.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_drv.h | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 3f39004fbc6a..a29618dc7e98 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1616,13 +1616,6 @@ enable_rpm_wakeref_asserts(struct drm_i915_private 
*dev_priv)
atomic_dec(_priv->pm.wakeref_count);
 }
 
-/* TODO: convert users of these to rely instead on proper RPM refcounting */
-#define DISABLE_RPM_WAKEREF_ASSERTS(dev_priv)  \
-   disable_rpm_wakeref_asserts(dev_priv)
-
-#define ENABLE_RPM_WAKEREF_ASSERTS(dev_priv)   \
-   enable_rpm_wakeref_asserts(dev_priv)
-
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
 bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 39/62] drm/i915: Reuse legacy breadcrumbs + tail emission

2016-06-03 Thread Chris Wilson
As GEN6+ is now a simple variant on the basic breadcrumbs + tail write,
reuse the common code.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 68 +
 1 file changed, 27 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b4edbdeac27e..97836e6c61f5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1328,25 +1328,18 @@ static int gen6_signal(struct drm_i915_gem_request 
*signaller_req)
return 0;
 }
 
-/**
- * gen6_emit_request - Update the semaphore mailbox registers
- *
- * @request - request to write to the ring
- *
- * Update the mailbox registers in the *other* rings with the current seqno.
- * This acts like a signal in the canonical semaphore.
- */
-static int gen6_emit_request(struct drm_i915_gem_request *req)
+static void i9xx_submit_request(struct drm_i915_gem_request *request)
+{
+   struct drm_i915_private *dev_priv = request->i915;
+   I915_WRITE_TAIL(request->engine, request->tail);
+}
+
+
+static int i9xx_emit_request(struct drm_i915_gem_request *req)
 {
struct intel_ring *ring = req->ring;
int ret;
 
-   if (req->engine->semaphore.signal) {
-   ret = req->engine->semaphore.signal(req);
-   if (ret)
-   return ret;
-   }
-
ret = intel_ring_begin(req, 4);
if (ret)
return ret;
@@ -1362,6 +1355,25 @@ static int gen6_emit_request(struct drm_i915_gem_request 
*req)
return 0;
 }
 
+/**
+ * gen6_emit_request - Update the semaphore mailbox registers
+ *
+ * @request - request to write to the ring
+ *
+ * Update the mailbox registers in the *other* rings with the current seqno.
+ * This acts like a signal in the canonical semaphore.
+ */
+static int gen6_emit_request(struct drm_i915_gem_request *req)
+{
+   if (req->engine->semaphore.signal) {
+   int ret = req->engine->semaphore.signal(req);
+   if (ret)
+   return ret;
+   }
+
+   return i9xx_emit_request(req);
+}
+
 static int gen8_render_emit_request(struct drm_i915_gem_request *req)
 {
struct intel_engine_cs *engine = req->engine;
@@ -1599,32 +1611,6 @@ bsd_ring_flush(struct drm_i915_gem_request *req,
return 0;
 }
 
-static int i9xx_emit_request(struct drm_i915_gem_request *req)
-{
-   struct intel_ring *ring = req->ring;
-   int ret;
-
-   ret = intel_ring_begin(req, 4);
-   if (ret)
-   return ret;
-
-   intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
-   intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-   intel_ring_emit(ring, req->fence.seqno);
-   intel_ring_emit(ring, MI_USER_INTERRUPT);
-   intel_ring_advance(ring);
-
-   req->tail = intel_ring_get_tail(ring);
-
-   return 0;
-}
-
-static void i9xx_submit_request(struct drm_i915_gem_request *request)
-{
-   struct drm_i915_private *dev_priv = request->i915;
-   I915_WRITE_TAIL(request->engine, request->tail);
-}
-
 static void
 gen6_ring_enable_irq(struct intel_engine_cs *engine)
 {
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 31/62] drm/i915: Rename residual ringbuf parameters

2016-06-03 Thread Chris Wilson
Now that we have a clear ring/engine split and a struct intel_ring, we
no longer need the stopgap ringbuf names.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 66 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |  6 +--
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 942711cd5495..d643698da830 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -47,15 +47,15 @@ int __intel_ring_space(int head, int tail, int size)
return space - I915_RING_FREE_SPACE;
 }
 
-void intel_ring_update_space(struct intel_ring *ringbuf)
+void intel_ring_update_space(struct intel_ring *ring)
 {
-   if (ringbuf->last_retired_head != -1) {
-   ringbuf->head = ringbuf->last_retired_head;
-   ringbuf->last_retired_head = -1;
+   if (ring->last_retired_head != -1) {
+   ring->head = ring->last_retired_head;
+   ring->last_retired_head = -1;
}
 
-   ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
-   ringbuf->tail, ringbuf->size);
+   ring->space = __intel_ring_space(ring->head & HEAD_ADDR,
+ring->tail, ring->size);
 }
 
 static void __intel_engine_submit(struct intel_engine_cs *engine)
@@ -1894,25 +1894,25 @@ static int init_phys_status_page(struct intel_engine_cs 
*engine)
return 0;
 }
 
-void intel_unpin_ring(struct intel_ring *ringbuf)
+void intel_unpin_ring(struct intel_ring *ring)
 {
-   GEM_BUG_ON(ringbuf->vma == NULL);
-   GEM_BUG_ON(ringbuf->vaddr == NULL);
+   GEM_BUG_ON(ring->vma == NULL);
+   GEM_BUG_ON(ring->vaddr == NULL);
 
-   if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
-   i915_gem_object_unpin_map(ringbuf->obj);
+   if (HAS_LLC(ring->obj->base.dev) && !ring->obj->stolen)
+   i915_gem_object_unpin_map(ring->obj);
else
-   i915_vma_unpin_iomap(ringbuf->vma);
-   ringbuf->vaddr = NULL;
+   i915_vma_unpin_iomap(ring->vma);
+   ring->vaddr = NULL;
 
-   i915_gem_object_ggtt_unpin(ringbuf->obj);
-   ringbuf->vma = NULL;
+   i915_gem_object_ggtt_unpin(ring->obj);
+   ring->vma = NULL;
 }
 
 int intel_pin_and_map_ring(struct drm_i915_private *dev_priv,
-  struct intel_ring *ringbuf)
+  struct intel_ring *ring)
 {
-   struct drm_i915_gem_object *obj = ringbuf->obj;
+   struct drm_i915_gem_object *obj = ring->obj;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
unsigned flags = PIN_OFFSET_BIAS | 4096;
void *addr;
@@ -1952,8 +1952,8 @@ int intel_pin_and_map_ring(struct drm_i915_private 
*dev_priv,
}
}
 
-   ringbuf->vaddr = addr;
-   ringbuf->vma = i915_gem_obj_to_ggtt(obj);
+   ring->vaddr = addr;
+   ring->vma = i915_gem_obj_to_ggtt(obj);
return 0;
 
 err_unpin:
@@ -1961,29 +1961,29 @@ err_unpin:
return ret;
 }
 
-static void intel_destroy_ringbuffer_obj(struct intel_ring *ringbuf)
+static void intel_destroy_ringbuffer_obj(struct intel_ring *ring)
 {
-   i915_gem_object_put(ringbuf->obj);
-   ringbuf->obj = NULL;
+   i915_gem_object_put(ring->obj);
+   ring->obj = NULL;
 }
 
 static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
- struct intel_ring *ringbuf)
+ struct intel_ring *ring)
 {
struct drm_i915_gem_object *obj;
 
obj = NULL;
if (!HAS_LLC(dev))
-   obj = i915_gem_object_create_stolen(dev, ringbuf->size);
+   obj = i915_gem_object_create_stolen(dev, ring->size);
if (obj == NULL)
-   obj = i915_gem_object_create(dev, ringbuf->size);
+   obj = i915_gem_object_create(dev, ring->size);
if (IS_ERR(obj))
return PTR_ERR(obj);
 
/* mark ring buffers as read-only from GPU side by default */
obj->gt_ro = 1;
 
-   ringbuf->obj = obj;
+   ring->obj = obj;
 
return 0;
 }
@@ -2091,7 +2091,7 @@ static int intel_init_engine(struct drm_device *dev,
 struct intel_engine_cs *engine)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
-   struct intel_ring *ringbuf;
+   struct intel_ring *ring;
int ret;
 
WARN_ON(engine->buffer);
@@ -2119,12 +2119,12 @@ static int intel_init_engine(struct drm_device *dev,
if (ret)
goto error;
 
-   ringbuf = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
-   if (IS_ERR(ringbuf)) {
-   ret = PTR_ERR(ringbuf);
+   ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
+   if 

[Intel-gfx] [PATCH 56/62] drm/i915: Count how many VMA are bound for an object

2016-06-03 Thread Chris Wilson
Since we may have VMA allocated for an object, but we interrupted their
binding, there is a disparity between have elements on the obj->vma_list
and being bound. i915_gem_obj_bound_any() does this check, but this is
not rigorously observed - add an explicit count to make it easier.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 12 +--
 drivers/gpu/drm/i915/i915_drv.h  |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c  | 34 +---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 17 +---
 drivers/gpu/drm/i915/i915_gem_stolen.c   |  1 +
 5 files changed, 23 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 2e0eb8f5cf35..51f84dd37675 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -177,6 +177,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
if (obj->fence_reg != I915_FENCE_REG_NONE)
seq_printf(m, " (fence: %d)", obj->fence_reg);
list_for_each_entry(vma, >vma_list, obj_link) {
+   if (!drm_mm_node_allocated(>node))
+   continue;
+
seq_printf(m, " (%sgtt offset: %08llx, size: %08llx",
   vma->is_ggtt ? "g" : "pp",
   vma->node.start, vma->node.size);
@@ -341,11 +344,11 @@ static int per_file_stats(int id, void *ptr, void *data)
struct drm_i915_gem_object *obj = ptr;
struct file_stats *stats = data;
struct i915_vma *vma;
-   int bound = 0;
 
stats->count++;
stats->total += obj->base.size;
-
+   if (!obj->bind_count)
+   stats->unbound += obj->base.size;
if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size;
 
@@ -353,8 +356,6 @@ static int per_file_stats(int id, void *ptr, void *data)
if (!drm_mm_node_allocated(>node))
continue;
 
-   bound++;
-
if (vma->is_ggtt) {
stats->global += vma->node.size;
} else {
@@ -372,9 +373,6 @@ static int per_file_stats(int id, void *ptr, void *data)
stats->inactive += vma->node.size;
}
 
-   if (!bound)
-   stats->unbound += obj->base.size;
-
return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 912d54b6998a..dd3f7afdf423 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2198,6 +2198,8 @@ struct drm_i915_gem_object {
 
unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
 
+   /** Count of VMA actually bound by this object */
+   unsigned int bind_count;
unsigned int pin_display;
 
struct sg_table *pages;
@@ -3159,7 +3161,6 @@ i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o)
return i915_gem_obj_ggtt_offset_view(o, _ggtt_view_normal);
 }
 
-bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o);
 bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
  const struct i915_ggtt_view *view);
 bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ca6b55f52f8b..2ba467c0b0b7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1822,7 +1822,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
if (obj->pages_pin_count)
return -EBUSY;
 
-   BUG_ON(i915_gem_obj_bound_any(obj));
+   BUG_ON(obj->bind_count);
 
/* ->put_pages might need to allocate memory for the bit17 swizzle
 * array, hence protect them from being reaped by removing them from gtt
@@ -2508,7 +2508,6 @@ static void __i915_vma_iounmap(struct i915_vma *vma)
 static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 {
struct drm_i915_gem_object *obj = vma->obj;
-   struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
int ret;
 
if (list_empty(>obj_link))
@@ -2522,7 +2521,8 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
if (vma->pin_count)
return -EBUSY;
 
-   BUG_ON(obj->pages == NULL);
+   GEM_BUG_ON(obj->bind_count == 0);
+   GEM_BUG_ON(obj->pages == NULL);
 
if (wait) {
ret = i915_gem_object_wait_rendering(obj, false);
@@ -2562,8 +2562,9 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
 
/* Since the unbound list is global, only move to that list if
 * no more VMAs exist. */
-   if (list_empty(>vma_list))
-   list_move_tail(>global_list, _priv->mm.unbound_list);
+   if (--obj->bind_count == 0)
+   list_move_tail(>global_list,
+  

[Intel-gfx] [PATCH 30/62] drm/i915: Rename struct intel_ringbuffer to struct intel_ring

2016-06-03 Thread Chris Wilson
The state stored in this struct is not only the information about the
buffer object, but the ring used to communicate with the hardware. Using
buffer here is overly specific and, for me at least, conflates with the
notion of buffer objects themselves.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  11 ++-
 drivers/gpu/drm/i915/i915_drv.h|   4 +-
 drivers/gpu/drm/i915/i915_gem.c|  24 +++---
 drivers/gpu/drm/i915/i915_gem_context.c|   6 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   6 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c|   6 +-
 drivers/gpu/drm/i915/i915_gem_request.c|   6 +-
 drivers/gpu/drm/i915/i915_gem_request.h|   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |   8 +-
 drivers/gpu/drm/i915/i915_irq.c|  14 ++--
 drivers/gpu/drm/i915/intel_display.c   |  10 +--
 drivers/gpu/drm/i915/intel_lrc.c   |  34 
 drivers/gpu/drm/i915/intel_mocs.c  |   4 +-
 drivers/gpu/drm/i915/intel_overlay.c   |   8 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c| 127 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h|  51 ++--
 16 files changed, 159 insertions(+), 162 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 8d3bc2bd532e..48c8f74e6256 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1415,7 +1415,7 @@ static int i915_hangcheck_info(struct seq_file *m, void 
*unused)
intel_runtime_pm_get(dev_priv);
 
for_each_engine_id(engine, dev_priv, id) {
-   acthd[id] = intel_ring_get_active_head(engine);
+   acthd[id] = intel_engine_get_active_head(engine);
seqno[id] = intel_engine_get_seqno(engine);
}
 
@@ -2013,12 +2013,11 @@ static int i915_gem_framebuffer_info(struct seq_file 
*m, void *data)
return 0;
 }
 
-static void describe_ctx_ringbuf(struct seq_file *m,
-struct intel_ringbuffer *ringbuf)
+static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
 {
seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: 
%d)",
-  ringbuf->space, ringbuf->head, ringbuf->tail,
-  ringbuf->last_retired_head);
+  ring->space, ring->head, ring->tail,
+  ring->last_retired_head);
 }
 
 static int i915_context_status(struct seq_file *m, void *unused)
@@ -2063,7 +2062,7 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
if (ce->state)
describe_obj(m, ce->state);
if (ce->ring)
-   describe_ctx_ringbuf(m, ce->ring);
+   describe_ctx_ring(m, ce->ring);
seq_putc(m, '\n');
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index de54adbf5768..fe39cd2584f3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -507,7 +507,7 @@ struct drm_i915_error_state {
bool waiting;
int num_waiters;
int hangcheck_score;
-   enum intel_ring_hangcheck_action hangcheck_action;
+   enum intel_engine_hangcheck_action hangcheck_action;
int num_requests;
 
/* our own tracking of ring head and tail */
@@ -881,7 +881,7 @@ struct i915_gem_context {
 
struct intel_context {
struct drm_i915_gem_object *state;
-   struct intel_ringbuffer *ring;
+   struct intel_ring *ring;
struct i915_vma *lrc_vma;
uint32_t *lrc_reg_state;
u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8edd79ad08b4..034d81c54d67 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2230,7 +2230,7 @@ static void i915_gem_reset_engine_status(struct 
intel_engine_cs *engine)
 
 static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 {
-   struct intel_ringbuffer *buffer;
+   struct intel_ring *ring;
 
while (!list_empty(>active_list)) {
struct drm_i915_gem_object *obj;
@@ -2279,12 +2279,12 @@ static void i915_gem_reset_engine_cleanup(struct 
intel_engine_cs *engine)
 * upon reset is less than when we start. Do one more pass over
 * all the ringbuffers to reset last_retired_head.
 */
-   list_for_each_entry(buffer, >buffers, link) {
-   buffer->last_retired_head = buffer->tail;
-   intel_ring_update_space(buffer);
+   list_for_each_entry(ring, >buffers, link) {
+   ring->last_retired_head = ring->tail;
+   intel_ring_update_space(ring);
}
 
-   

[Intel-gfx] [PATCH 41/62] drm/i915: Unify legacy/execlists submit_execbuf callbacks

2016-06-03 Thread Chris Wilson
Now that emitting requests is identical between legacy and execlists, we
can use the same function to build up the ring for submitting to either
engine. (With the exception of i915_switch_contexts(), but in time that
will also be handled gracefully.)

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h|  20 -
 drivers/gpu/drm/i915/i915_gem.c|   2 -
 drivers/gpu/drm/i915/i915_gem_context.c|   3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  24 --
 drivers/gpu/drm/i915/intel_lrc.c   | 129 -
 drivers/gpu/drm/i915/intel_lrc.h   |   4 -
 6 files changed, 20 insertions(+), 162 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b1e00b42a830..f95378f33f6c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1686,18 +1686,6 @@ struct i915_virtual_gpu {
bool active;
 };
 
-struct i915_execbuffer_params {
-   struct drm_device   *dev;
-   struct drm_file *file;
-   uint32_tdispatch_flags;
-   uint32_targs_batch_start_offset;
-   uint64_tbatch_obj_vm_offset;
-   struct intel_engine_cs *engine;
-   struct drm_i915_gem_object  *batch_obj;
-   struct i915_gem_context*ctx;
-   struct drm_i915_gem_request *request;
-};
-
 /* used in computing the new watermarks state */
 struct intel_wm_config {
unsigned int num_pipes_active;
@@ -1996,9 +1984,6 @@ struct drm_i915_private {
 
/* Abstract the submission mechanism (legacy ringbuffer or execlists) 
away */
struct {
-   int (*execbuf_submit)(struct i915_execbuffer_params *params,
- struct drm_i915_gem_execbuffer2 *args,
- struct list_head *vmas);
int (*init_engines)(struct drm_device *dev);
void (*cleanup_engine)(struct intel_engine_cs *engine);
void (*stop_engine)(struct intel_engine_cs *engine);
@@ -2906,11 +2891,6 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, 
void *data,
  struct drm_file *file_priv);
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 struct drm_file *file_priv);
-void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
-   struct drm_i915_gem_request *req);
-int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
-  struct drm_i915_gem_execbuffer2 *args,
-  struct list_head *vmas);
 int i915_gem_execbuffer(struct drm_device *dev, void *data,
struct drm_file *file_priv);
 int i915_gem_execbuffer2(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index de1e866276c5..6c4c2c711dc7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4249,12 +4249,10 @@ int i915_gem_init(struct drm_device *dev)
mutex_lock(>struct_mutex);
 
if (!i915.enable_execlists) {
-   dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
dev_priv->gt.init_engines = i915_gem_init_engines;
dev_priv->gt.cleanup_engine = intel_engine_cleanup;
dev_priv->gt.stop_engine = intel_engine_stop;
} else {
-   dev_priv->gt.execbuf_submit = intel_execlists_submission;
dev_priv->gt.init_engines = intel_logical_rings_init;
dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
dev_priv->gt.stop_engine = intel_logical_ring_stop;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 9eb6ab9cb610..8641783618dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -853,8 +853,9 @@ int i915_switch_context(struct drm_i915_gem_request *req)
 {
struct intel_engine_cs *engine = req->engine;
 
-   WARN_ON(i915.enable_execlists);
lockdep_assert_held(>i915->dev->struct_mutex);
+   if (i915.enable_execlists)
+   return 0;
 
if (!req->ctx->engine[engine->id].state) {
struct i915_gem_context *to = req->ctx;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 49dda93ba63c..c2d703323fc2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -41,6 +41,18 @@
 
 #define BATCH_OFFSET_BIAS (256*1024)
 
+struct i915_execbuffer_params {
+   struct drm_device   *dev;
+   struct drm_file *file;
+   uint32_t

[Intel-gfx] [PATCH 37/62] drm/i915: Unify request submission

2016-06-03 Thread Chris Wilson
Move request submission from emit_request into its own common vfunc
from i915_add_request().

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_request.c|  8 +++---
 drivers/gpu/drm/i915/i915_guc_submission.c |  4 +--
 drivers/gpu/drm/i915/intel_guc.h   |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c   | 13 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c| 39 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h| 23 +-
 6 files changed, 41 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 5fef1c291b25..a55042ff7994 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -459,12 +459,10 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 */
request->postfix = intel_ring_get_tail(ring);
 
-   if (i915.enable_execlists)
-   ret = engine->emit_request(request);
-   else
-   ret = engine->add_request(request);
/* Not allowed to fail! */
+   ret = engine->emit_request(request);
WARN(ret, "emit|add_request failed: %d!\n", ret);
+
/* Sanity check that the reserved size was large enough. */
ret = intel_ring_get_tail(ring) - request_start;
if (ret < 0)
@@ -475,6 +473,8 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
  reserved_tail, ret);
 
i915_gem_mark_busy(request->i915, engine);
+
+   engine->submit_request(request);
 }
 
 static unsigned long local_clock_us(unsigned *cpu)
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 8aa3cf8cac45..cc4792df249d 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -562,7 +562,7 @@ static void guc_add_workqueue_item(struct i915_guc_client 
*gc,
  * The only error here arises if the doorbell hardware isn't functioning
  * as expected, which really shouln't happen.
  */
-int i915_guc_submit(struct drm_i915_gem_request *rq)
+void i915_guc_submit(struct drm_i915_gem_request *rq)
 {
unsigned int engine_id = rq->engine->guc_id;
struct intel_guc *guc = >i915->guc;
@@ -579,8 +579,6 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
 
guc->submissions[engine_id] += 1;
guc->last_seqno[engine_id] = rq->fence.seqno;
-
-   return b_ret;
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 41601c71f529..7f9063385258 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -159,7 +159,7 @@ extern int intel_guc_resume(struct drm_device *dev);
 int i915_guc_submission_init(struct drm_device *dev);
 int i915_guc_submission_enable(struct drm_device *dev);
 int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
-int i915_guc_submit(struct drm_i915_gem_request *rq);
+void i915_guc_submit(struct drm_i915_gem_request *rq);
 void i915_guc_submission_disable(struct drm_device *dev);
 void i915_guc_submission_fini(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 71960e47277c..eee9274f7516 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -751,12 +751,6 @@ intel_logical_ring_advance_and_submit(struct 
drm_i915_gem_request *request)
 */
request->previous_context = engine->last_context;
engine->last_context = request->ctx;
-
-   if (i915.enable_guc_submission)
-   i915_guc_submit(request);
-   else
-   execlists_context_queue(request);
-
return 0;
 }
 
@@ -1834,8 +1828,13 @@ logical_ring_default_vfuncs(struct intel_engine_cs 
*engine)
 {
/* Default vfuncs which can be overriden by each engine. */
engine->init_hw = gen8_init_common_ring;
-   engine->emit_request = gen8_emit_request;
engine->emit_flush = gen8_emit_flush;
+   engine->emit_request = gen8_emit_request;
+   if (i915.enable_guc_submission)
+   engine->submit_request = i915_guc_submit;
+   else
+   engine->submit_request = execlists_context_queue;
+
engine->irq_enable = gen8_logical_ring_enable_irq;
engine->irq_disable = gen8_logical_ring_disable_irq;
engine->emit_bb_start = gen8_emit_bb_start;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index db38abddfec1..b7b5c2d94db5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1341,15 +1341,14 @@ static int gen6_signal(struct drm_i915_gem_request 
*signaller_req,
 }
 
 /**
- * gen6_add_request - Update the semaphore mailbox registers
+ * gen6_emit_request - Update the semaphore mailbox registers
  *
  * @request - request to write to the ring
  

[Intel-gfx] [PATCH 61/62] drm/i915: Mark the context and address space as closed

2016-06-03 Thread Chris Wilson
When the user closes the context mark it and the dependent address space
as closed. As we use an asynchronous destruct method, this has two purposes.
First it allows us to flag the closed context and detect internal errors if
we to create any new objects for it (as it is removed from the user's
namespace, these should be internal bugs only). And secondly, it allows
us to immediately reap stale vma.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c | 15 ++--
 drivers/gpu/drm/i915/i915_gem_context.c | 43 -
 drivers/gpu/drm/i915/i915_gem_gtt.c |  9 +--
 drivers/gpu/drm/i915/i915_gem_gtt.h |  9 +++
 drivers/gpu/drm/i915/i915_gem_stolen.c  |  2 +-
 6 files changed, 63 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e494e692fef0..492e5e73c1ca 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -890,6 +890,7 @@ struct i915_gem_context {
struct list_head link;
 
u8 remap_slice;
+   bool closed:1;
 };
 
 enum fb_op_origin {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e7595ab02255..3e12122f0f1f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2578,12 +2578,15 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
__i915_vma_iounmap(vma);
}
 
-   trace_i915_vma_unbind(vma);
-
-   vma->vm->unbind_vma(vma);
+   if (likely(!vma->vm->closed)) {
+   trace_i915_vma_unbind(vma);
+   vma->vm->unbind_vma(vma);
+   }
vma->bound = 0;
 
-   list_del_init(>vm_link);
+   drm_mm_remove_node(>node);
+   list_move_tail(>vm_link, >vm->unbound_list);
+
if (vma->is_ggtt) {
if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
obj->map_and_fenceable = false;
@@ -2594,8 +2597,6 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
vma->ggtt_view.pages = NULL;
}
 
-   drm_mm_remove_node(>node);
-
/* Since the unbound list is global, only move to that list if
 * no more VMAs exist. */
if (--obj->bind_count == 0)
@@ -2832,7 +2833,7 @@ search_free:
goto err_remove_node;
 
list_move_tail(>global_list, _priv->mm.bound_list);
-   list_add_tail(>vm_link, >inactive_list);
+   list_move_tail(>vm_link, >inactive_list);
obj->bind_count++;
 
return vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index cace85998204..f04073469853 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -156,6 +156,7 @@ void i915_gem_context_free(struct kref *ctx_ref)
 
lockdep_assert_held(>i915->dev->struct_mutex);
trace_i915_context_free(ctx);
+   GEM_BUG_ON(!ctx->closed);
 
/*
 * This context is going away and we need to remove all VMAs still
@@ -224,6 +225,37 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t 
size)
return obj;
 }
 
+static void i915_ppgtt_close(struct i915_address_space *vm)
+{
+   struct list_head *phases[] = {
+   >active_list,
+   >inactive_list,
+   >unbound_list,
+   NULL,
+   }, **phase;
+
+   GEM_BUG_ON(vm->closed);
+   vm->closed = true;
+
+   for (phase = phases; *phase; phase++) {
+   struct i915_vma *vma, *vn;
+
+   list_for_each_entry_safe(vma, vn, *phase, vm_link)
+   if (!vma->closed)
+   i915_vma_close(vma);
+   }
+}
+
+static void context_close(struct i915_gem_context *ctx)
+{
+   GEM_BUG_ON(ctx->closed);
+   ctx->closed = true;
+   if (ctx->ppgtt)
+   i915_ppgtt_close(>ppgtt->base);
+   ctx->file_priv = ERR_PTR(-EBADF);
+   i915_gem_context_put(ctx);
+}
+
 static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
 {
int ret;
@@ -301,7 +333,7 @@ __create_hw_context(struct drm_device *dev,
return ctx;
 
 err_out:
-   i915_gem_context_put(ctx);
+   context_close(ctx);
return ERR_PTR(ret);
 }
 
@@ -330,7 +362,7 @@ i915_gem_create_context(struct drm_device *dev,
DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 PTR_ERR(ppgtt));
idr_remove(_priv->context_idr, ctx->user_handle);
-   i915_gem_context_put(ctx);
+   context_close(ctx);
return ERR_CAST(ppgtt);
}
 
@@ -467,7 +499,7 @@ void i915_gem_context_fini(struct drm_device *dev)
 
lockdep_assert_held(>struct_mutex);
 
-   

[Intel-gfx] [PATCH 23/62] drm/i915: Rename ring->virtual_start as ring->vaddr

2016-06-03 Thread Chris Wilson
Just a different colour to better match virtual addresses elsewhere.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_irq.c | 8 
 drivers/gpu/drm/i915/intel_ringbuffer.c | 9 -
 drivers/gpu/drm/i915/intel_ringbuffer.h | 4 ++--
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 860235d1e0bf..42149153510e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2906,7 +2906,7 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 
*seqno)
head &= engine->buffer->size - 1;
 
/* This here seems to blow up */
-   cmd = ioread32(engine->buffer->virtual_start + head);
+   cmd = ioread32(engine->buffer->vaddr + head);
if (cmd == ipehr)
break;
 
@@ -2916,11 +2916,11 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 
*seqno)
if (!i)
return NULL;
 
-   *seqno = ioread32(engine->buffer->virtual_start + head + 4) + 1;
+   *seqno = ioread32(engine->buffer->vaddr + head + 4) + 1;
if (INTEL_GEN(dev_priv) >= 8) {
-   offset = ioread32(engine->buffer->virtual_start + head + 12);
+   offset = ioread32(engine->buffer->vaddr + head + 12);
offset <<= 32;
-   offset = ioread32(engine->buffer->virtual_start + head + 8);
+   offset = ioread32(engine->buffer->vaddr + head + 8);
}
return semaphore_wait_to_signaller_ring(engine, ipehr, offset);
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 71ddf1dfea76..75b6d6eee0ac 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1892,13 +1892,13 @@ static int init_phys_status_page(struct intel_engine_cs 
*engine)
 void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 {
GEM_BUG_ON(ringbuf->vma == NULL);
-   GEM_BUG_ON(ringbuf->virtual_start == NULL);
+   GEM_BUG_ON(ringbuf->vaddr == NULL);
 
if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
i915_gem_object_unpin_map(ringbuf->obj);
else
i915_vma_unpin_iomap(ringbuf->vma);
-   ringbuf->virtual_start = NULL;
+   ringbuf->vaddr = NULL;
 
i915_gem_object_ggtt_unpin(ringbuf->obj);
ringbuf->vma = NULL;
@@ -1947,7 +1947,7 @@ int intel_pin_and_map_ringbuffer_obj(struct 
drm_i915_private *dev_priv,
}
}
 
-   ringbuf->virtual_start = addr;
+   ringbuf->vaddr = addr;
ringbuf->vma = i915_gem_obj_to_ggtt(obj);
return 0;
 
@@ -2317,8 +2317,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, 
int num_dwords)
GEM_BUG_ON(ringbuf->tail + remain_actual > ringbuf->size);
 
/* Fill the tail with MI_NOOP */
-   memset(ringbuf->virtual_start + ringbuf->tail,
-  0, remain_actual);
+   memset(ringbuf->vaddr + ringbuf->tail, 0, remain_actual);
ringbuf->tail = 0;
ringbuf->space -= remain_actual;
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 5db7db069566..3cbcdd5751ad 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -96,7 +96,7 @@ struct intel_ring_hangcheck {
 
 struct intel_ringbuffer {
struct drm_i915_gem_object *obj;
-   void *virtual_start;
+   void *vaddr;
struct i915_vma *vma;
 
struct intel_engine_cs *engine;
@@ -465,7 +465,7 @@ int __must_check intel_ring_cacheline_align(struct 
drm_i915_gem_request *req);
 static inline void __intel_ringbuffer_emit(struct intel_ringbuffer *rb,
   u32 data)
 {
-   *(uint32_t *)(rb->virtual_start + rb->tail) = data;
+   *(uint32_t *)(rb->vaddr + rb->tail) = data;
rb->tail += 4;
 }
 static inline void __intel_ringbuffer_advance(struct intel_ringbuffer *rb)
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 62/62] Revert "drm/i915: Clean up associated VMAs on context destruction"

2016-06-03 Thread Chris Wilson
This reverts commit e9f24d5fb7cf3628b195b18ff3ac4e37937ceeae.

The patch was only a stop-gap measure that fixed half the problem - the
leak of the fbcon when restarting X. A complete solution required
releasing the VMA when the object itself was closed rather than rely on
file/process exit. The previous patches add the VMA tracking necessary
to do close them along with the object, context or file, and so the time
has come to remove the partial fix.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h |  5 -
 drivers/gpu/drm/i915/i915_gem.c | 14 ++
 drivers/gpu/drm/i915/i915_gem_context.c | 22 --
 3 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 492e5e73c1ca..0ebf1a70 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2957,11 +2957,6 @@ int i915_vma_bind(struct i915_vma *vma, enum 
i915_cache_level cache_level,
  u32 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
-/*
- * BEWARE: Do not use the function below unless you can _absolutely_
- * _guarantee_ VMA in question is _not in use_ anywhere.
- */
-int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
 void i915_vma_destroy(struct i915_vma *vma);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3e12122f0f1f..e6c46f2d08e7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2524,7 +2524,7 @@ static void __i915_vma_iounmap(struct i915_vma *vma)
vma->iomap = NULL;
 }
 
-static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
+int i915_vma_unbind(struct i915_vma *vma)
 {
struct drm_i915_gem_object *obj = vma->obj;
unsigned long active;
@@ -2534,7 +2534,7 @@ static int __i915_vma_unbind(struct i915_vma *vma, bool 
wait)
 * have side-effects such as unpinning or even unbinding this vma.
 */
active = vma->active;
-   if (active && wait) {
+   if (active) {
int idx;
 
/* When a closed VMA is retired, it is unbound - eek.
@@ -2616,16 +2616,6 @@ destroy:
return 0;
 }
 
-int i915_vma_unbind(struct i915_vma *vma)
-{
-   return __i915_vma_unbind(vma, true);
-}
-
-int __i915_vma_unbind_no_wait(struct i915_vma *vma)
-{
-   return __i915_vma_unbind(vma, false);
-}
-
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv)
 {
struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index f04073469853..5ed91406d4e9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -134,21 +134,6 @@ static int get_context_size(struct drm_i915_private 
*dev_priv)
return ret;
 }
 
-static void i915_gem_context_clean(struct i915_gem_context *ctx)
-{
-   struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
-   struct i915_vma *vma, *next;
-
-   if (!ppgtt)
-   return;
-
-   list_for_each_entry_safe(vma, next, >base.inactive_list,
-vm_link) {
-   if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
-   break;
-   }
-}
-
 void i915_gem_context_free(struct kref *ctx_ref)
 {
struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -158,13 +143,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
trace_i915_context_free(ctx);
GEM_BUG_ON(!ctx->closed);
 
-   /*
-* This context is going away and we need to remove all VMAs still
-* around. This is to handle imported shared objects for which
-* destructor did not run when their handles were closed.
-*/
-   i915_gem_context_clean(ctx);
-
i915_ppgtt_put(ctx->ppgtt);
 
for (i = 0; i < I915_NUM_ENGINES; i++) {
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 49/62] drm/i915: Refactor activity tracking for requests

2016-06-03 Thread Chris Wilson
With the introduction of requests, we amplified the number of atomic
refcounted objects we use and update every execbuffer; from none to
several references, and a set of references that need to be changed. We
also introduced interesting side-effects in the order of retiring
requests and objects.

Instead of independently tracking the last request for an object, track
the active objects for each request. The object will reside in the
buffer list of its most recent active request and so we reduce the kref
interchange to a list_move. Now retirements are entirely driven by the
request, dramatically simplifying activity tracking on the object
themselves, and removing the ambiguity between retiring objects and
retiring requests.

Furthermore with the consolidation of managing the activity tracking
centrally, we can look forward to using RCU to enable lockless lookup of
the current active requests for an object. In the future, we will be
able to query the status or wait upon rendering to an object without
even touching the struct_mutex BKL.

All told, less code, simpler and faster, and more extensible.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile   |   1 -
 drivers/gpu/drm/i915/i915_drv.h |  10 ---
 drivers/gpu/drm/i915/i915_gem.c | 135 +++-
 drivers/gpu/drm/i915/i915_gem_debug.c   |  70 -
 drivers/gpu/drm/i915/i915_gem_fence.c   |   9 +--
 drivers/gpu/drm/i915/i915_gem_request.c |  39 ++---
 drivers/gpu/drm/i915/i915_gem_request.h |  73 +++--
 drivers/gpu/drm/i915/intel_lrc.c|   1 -
 drivers/gpu/drm/i915/intel_ringbuffer.c |   1 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |  12 ---
 10 files changed, 105 insertions(+), 246 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_debug.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 14cef1d2343c..99347343ac59 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -24,7 +24,6 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 i915-y += i915_cmd_parser.o \
  i915_gem_batch_pool.o \
  i915_gem_context.o \
- i915_gem_debug.o \
  i915_gem_dmabuf.o \
  i915_gem_evict.o \
  i915_gem_execbuffer.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b8df48e0e32b..089415f51a0b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -421,8 +421,6 @@ void intel_link_compute_m_n(int bpp, int nlanes,
 #define DRIVER_MINOR   6
 #define DRIVER_PATCHLEVEL  0
 
-#define WATCH_LISTS0
-
 struct opregion_header;
 struct opregion_acpi;
 struct opregion_swsci;
@@ -2134,7 +2132,6 @@ struct drm_i915_gem_object {
struct drm_mm_node *stolen;
struct list_head global_list;
 
-   struct list_head engine_list[I915_NUM_ENGINES];
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
 
@@ -3354,13 +3351,6 @@ static inline bool 
i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_objec
obj->tiling_mode != I915_TILING_NONE;
 }
 
-/* i915_gem_debug.c */
-#if WATCH_LISTS
-int i915_verify_lists(struct drm_device *dev);
-#else
-#define i915_verify_lists(dev) 0
-#endif
-
 /* i915_debugfs.c */
 #ifdef CONFIG_DEBUG_FS
 int i915_debugfs_register(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f517bc151af1..3b3a3b834e80 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -41,10 +41,6 @@
 
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object 
*obj);
 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object 
*obj);
-static void
-i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
-static void
-i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
 
 static bool cpu_cache_is_coherent(struct drm_device *dev,
  enum i915_cache_level level)
@@ -118,7 +114,6 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
if (ret)
return ret;
 
-   WARN_ON(i915_verify_lists(dev));
return 0;
 }
 
@@ -1105,23 +1100,6 @@ put_rpm:
return ret;
 }
 
-static void
-i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
-  struct drm_i915_gem_request *req)
-{
-   int ring = req->engine->id;
-
-   if (i915_gem_active_peek(>last_read[ring],
->base.dev->struct_mutex) == req)
-   i915_gem_object_retire__read(obj, ring);
-   else if (i915_gem_active_peek(>last_write,
- >base.dev->struct_mutex) == req)
-   i915_gem_object_retire__write(obj);
-
-   if (req->reset_counter == i915_reset_counter(>i915->gpu_error))
-   

[Intel-gfx] [PATCH 05/62] drm/i915: Add background commentary to "waitboosting"

2016-06-03 Thread Chris Wilson
Describe the intent of boosting the GPU frequency to maximum before
waiting on the GPU.

RPS waitboosting was introduced with

commit b29c19b645287f7062e17d70fa4e9781a01a5d88
Author: Chris Wilson 
Date:   Wed Sep 25 17:34:56 2013 +0100

drm/i915: Boost RPS frequency for CPU stalls

but lacked a concise comment in the code to explain itself.

Signed-off-by: Chris Wilson 
Reviewed-by: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_gem.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bec02baef190..0f487e3b920c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1237,6 +1237,21 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 
trace_i915_gem_request_wait_begin(req);
 
+   /* This client is about to stall waiting for the GPU. In many cases
+* this is undesirable and limits the throughput of the system, as
+* many clients cannot continue processing user input/output whilst
+* blocked. RPS autotuning may take tens of milliseconds to respond
+* to the GPU load and thus incurs additional latency for the client.
+* We can circumvent that by promoting the GPU frequency to maximum
+* before we wait. This makes the GPU throttle up much more quickly
+* (good for benchmarks and user experience, e.g. window animations),
+* but at a cost of spending more power processing the workload
+* (bad for battery). Not all clients even want their results
+* immediately and for them we should just let the GPU select its own
+* frequency to maximise efficiency. To prevent a single client from
+* forcing the clocks too high for the whole system, we only allow
+* each client to waitboost once in a busy period.
+*/
if (INTEL_INFO(req->i915)->gen >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 32/62] drm/i915: Rename intel_pin_and_map_ring()

2016-06-03 Thread Chris Wilson
For more consistent oop-naming, we would use intel_ring_verb, so pick
intel_ring_pin() and intel_ring_unpin().

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_lrc.c|  4 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 38 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |  5 ++---
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fd093efffe85..e8685ce4d2a4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -954,7 +954,7 @@ static int intel_lr_context_pin(struct i915_gem_context 
*ctx,
 
lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 
-   ret = intel_pin_and_map_ring(dev_priv, ce->ring);
+   ret = intel_ring_pin(ce->ring);
if (ret)
goto unpin_map;
 
@@ -992,7 +992,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
if (--ce->pin_count)
return;
 
-   intel_unpin_ring(ce->ring);
+   intel_ring_unpin(ce->ring);
 
i915_gem_object_unpin_map(ce->state);
i915_gem_object_ggtt_unpin(ce->state);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d643698da830..07c2470c24f9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1894,24 +1894,9 @@ static int init_phys_status_page(struct intel_engine_cs 
*engine)
return 0;
 }
 
-void intel_unpin_ring(struct intel_ring *ring)
-{
-   GEM_BUG_ON(ring->vma == NULL);
-   GEM_BUG_ON(ring->vaddr == NULL);
-
-   if (HAS_LLC(ring->obj->base.dev) && !ring->obj->stolen)
-   i915_gem_object_unpin_map(ring->obj);
-   else
-   i915_vma_unpin_iomap(ring->vma);
-   ring->vaddr = NULL;
-
-   i915_gem_object_ggtt_unpin(ring->obj);
-   ring->vma = NULL;
-}
-
-int intel_pin_and_map_ring(struct drm_i915_private *dev_priv,
-  struct intel_ring *ring)
+int intel_ring_pin(struct intel_ring *ring)
 {
+   struct drm_i915_private *dev_priv = ring->engine->i915;
struct drm_i915_gem_object *obj = ring->obj;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
unsigned flags = PIN_OFFSET_BIAS | 4096;
@@ -1961,6 +1946,21 @@ err_unpin:
return ret;
 }
 
+void intel_ring_unpin(struct intel_ring *ring)
+{
+   GEM_BUG_ON(ring->vma == NULL);
+   GEM_BUG_ON(ring->vaddr == NULL);
+
+   if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen)
+   i915_gem_object_unpin_map(ring->obj);
+   else
+   i915_vma_unpin_iomap(ring->vma);
+   ring->vaddr = NULL;
+
+   i915_gem_object_ggtt_unpin(ring->obj);
+   ring->vma = NULL;
+}
+
 static void intel_destroy_ringbuffer_obj(struct intel_ring *ring)
 {
i915_gem_object_put(ring->obj);
@@ -2137,7 +2137,7 @@ static int intel_init_engine(struct drm_device *dev,
goto error;
}
 
-   ret = intel_pin_and_map_ring(dev_priv, ring);
+   ret = intel_ring_pin(ring);
if (ret) {
DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
engine->name, ret);
@@ -2169,7 +2169,7 @@ void intel_engine_cleanup(struct intel_engine_cs *engine)
intel_engine_stop(engine);
WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & 
MODE_IDLE) == 0);
 
-   intel_unpin_ring(engine->buffer);
+   intel_ring_unpin(engine->buffer);
intel_ring_free(engine->buffer);
engine->buffer = NULL;
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ef0133188a65..5403cc614095 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -450,9 +450,8 @@ intel_write_status_page(struct intel_engine_cs *engine,
 
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size);
-int intel_pin_and_map_ring(struct drm_i915_private *dev_priv,
-  struct intel_ring *ring);
-void intel_unpin_ring(struct intel_ring *ring);
+int intel_ring_pin(struct intel_ring *ring);
+void intel_ring_unpin(struct intel_ring *ring);
 void intel_ring_free(struct intel_ring *ring);
 
 void intel_engine_stop(struct intel_engine_cs *engine);
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 47/62] drm/i915: Rename request->list to link for consistency

2016-06-03 Thread Chris Wilson
We use "list" to denote the list and "link" to denote an element on that
list. Rename request->list to match this idiom.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c | 10 +-
 drivers/gpu/drm/i915/i915_gem_request.c | 10 +-
 drivers/gpu/drm/i915/i915_gem_request.h |  4 ++--
 drivers/gpu/drm/i915/i915_gpu_error.c   |  4 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c |  6 +++---
 6 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index d35454d5683e..345caf2e1841 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -760,13 +760,13 @@ static int i915_gem_request_info(struct seq_file *m, void 
*data)
int count;
 
count = 0;
-   list_for_each_entry(req, >request_list, list)
+   list_for_each_entry(req, >request_list, link)
count++;
if (count == 0)
continue;
 
seq_printf(m, "%s requests: %d\n", engine->name, count);
-   list_for_each_entry(req, >request_list, list) {
+   list_for_each_entry(req, >request_list, link) {
struct task_struct *task;
 
rcu_read_lock();
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad3330adfa41..2bddd1386788 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2210,7 +2210,7 @@ i915_gem_find_active_request(struct intel_engine_cs 
*engine)
 * extra delay for a recent interrupt is pointless. Hence, we do
 * not need an engine->irq_seqno_barrier() before the seqno reads.
 */
-   list_for_each_entry(request, >request_list, list) {
+   list_for_each_entry(request, >request_list, link) {
if (i915_gem_request_completed(request))
continue;
 
@@ -2232,7 +2232,7 @@ static void i915_gem_reset_engine_status(struct 
intel_engine_cs *engine)
ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
 
i915_set_reset_status(request->ctx, ring_hung);
-   list_for_each_entry_continue(request, >request_list, list)
+   list_for_each_entry_continue(request, >request_list, link)
i915_set_reset_status(request->ctx, false);
 }
 
@@ -2275,7 +2275,7 @@ static void i915_gem_reset_engine_cleanup(struct 
intel_engine_cs *engine)
 
request = list_last_entry(>request_list,
  struct drm_i915_gem_request,
- list);
+ link);
 
i915_gem_request_retire_upto(request);
}
@@ -2336,7 +2336,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*engine)
 
request = list_first_entry(>request_list,
   struct drm_i915_gem_request,
-  list);
+  link);
 
if (!i915_gem_request_completed(request))
break;
@@ -2356,7 +2356,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs 
*engine)
   engine_list[engine->id]);
 
if 
(!list_empty(_gem_active_peek(>last_read[engine->id],
- 
>base.dev->struct_mutex)->list))
+ 
>base.dev->struct_mutex)->link))
break;
 
i915_gem_object_retire__read(obj, engine->id);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 1e9515cfb506..20ad95d9a65f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -314,7 +314,7 @@ i915_gem_request_remove_from_client(struct 
drm_i915_gem_request *request)
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
trace_i915_gem_request_retire(request);
-   list_del_init(>list);
+   list_del_init(>link);
 
/* We know the GPU must have read the request to have
 * sent us the seqno + interrupt, so use the position
@@ -345,12 +345,12 @@ void i915_gem_request_retire_upto(struct 
drm_i915_gem_request *req)
 
lockdep_assert_held(>i915->dev->struct_mutex);
 
-   if (list_empty(>list))
+   if (list_empty(>link))
return;
 
do {
tmp = list_first_entry(>request_list,
-  typeof(*tmp), list);
+  typeof(*tmp), link);
 
i915_gem_request_retire(tmp);
} while (tmp != req);
@@ -443,7 +443,7 @@ void __i915_add_request(struct 

[Intel-gfx] [PATCH 50/62] drm/i915: Double check activity before relocations

2016-06-03 Thread Chris Wilson
If the object is active and we need to perform a relocation upon it, we
need to take the slow relocation path. Before we do, double check the
active requests to see if they have completed.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 5c7eb3c93a86..6fa13c618a6b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -411,6 +411,20 @@ relocate_entry_clflush(struct drm_i915_gem_object *obj,
return 0;
 }
 
+static bool object_is_idle(struct drm_i915_gem_object *obj)
+{
+   unsigned long active = obj->active;
+   int idx;
+
+   for_each_active(active, idx) {
+   if (!i915_gem_active_is_idle(>last_read[idx],
+>base.dev->struct_mutex))
+   return false;
+   }
+
+   return true;
+}
+
 static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
   struct eb_vmas *eb,
@@ -494,7 +508,7 @@ i915_gem_execbuffer_relocate_entry(struct 
drm_i915_gem_object *obj,
}
 
/* We can't wait for rendering with pagefaults disabled */
-   if (obj->active && pagefault_disabled())
+   if (pagefault_disabled() && !object_is_idle(obj))
return -EFAULT;
 
if (use_cpu_reloc(obj))
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 27/62] drm/i915: Rename request->ringbuf to request->ring

2016-06-03 Thread Chris Wilson
Now that we have disambuigated ring and engine, we can use the clearer
and more consistent name for the intel_ringbuffer pointer in the
request.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c|  4 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c|  6 +-
 drivers/gpu/drm/i915/i915_gem_request.c| 16 +++---
 drivers/gpu/drm/i915/i915_gem_request.h|  3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  | 20 +++
 drivers/gpu/drm/i915/intel_display.c   | 10 ++--
 drivers/gpu/drm/i915/intel_lrc.c   | 57 +-
 drivers/gpu/drm/i915/intel_mocs.c  | 36 ++--
 drivers/gpu/drm/i915/intel_overlay.c   |  8 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c| 92 +++---
 11 files changed, 126 insertions(+), 130 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 899731f9a2c4..a7911f39f416 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -514,7 +514,7 @@ static inline int
 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 {
struct drm_i915_private *dev_priv = req->i915;
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
u32 flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
@@ -614,7 +614,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 static int remap_l3(struct drm_i915_gem_request *req, int slice)
 {
u32 *remap_info = req->i915->l3_parity.remap_info[slice];
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int i, ret;
 
if (!remap_info)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 99663e8429b3..246bd70c0c9f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1140,7 +1140,7 @@ i915_gem_execbuffer_retire_commands(struct 
i915_execbuffer_params *params)
 static int
 i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret, i;
 
if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
@@ -1270,7 +1270,7 @@ i915_gem_ringbuffer_submission(struct 
i915_execbuffer_params *params,
 
if (params->engine->id == RCS &&
instp_mode != dev_priv->relative_constants_mode) {
-   struct intel_ringbuffer *ring = params->request->ringbuf;
+   struct intel_ringbuffer *ring = params->request->ring;
 
ret = intel_ring_begin(params->request, 4);
if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 4b4e3de58ad9..b0a644cede20 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -669,7 +669,7 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
  unsigned entry,
  dma_addr_t addr)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret;
 
BUG_ON(entry >= 4);
@@ -1660,7 +1660,7 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
 struct drm_i915_gem_request *req)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
@@ -1699,7 +1699,7 @@ static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
  struct drm_i915_gem_request *req)
 {
-   struct intel_ringbuffer *ring = req->ringbuf;
+   struct intel_ringbuffer *ring = req->ring;
int ret;
 
/* NB: TLBs must be flushed and invalidated before a switch */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 059ba88e182e..c6a7a7984f1f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -351,7 +351,7 @@ static void i915_gem_request_retire(struct 
drm_i915_gem_request *request)
 * Note this requires that we are always called in request
 * completion order.
 */
-   request->ringbuf->last_retired_head = request->postfix;
+   request->ring->last_retired_head = request->postfix;
 
i915_gem_request_remove_from_client(request);
 
@@ -415,7 +415,7 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
bool 

[Intel-gfx] [PATCH 25/62] drm/i915: Unify intel_logical_ring_emit and intel_ring_emit

2016-06-03 Thread Chris Wilson
Both perform the same actions with more or less indirection, so just
unify the code.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_context.c|  54 ++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  53 ++---
 drivers/gpu/drm/i915/i915_gem_gtt.c|  62 ++---
 drivers/gpu/drm/i915/intel_display.c   |  80 +++
 drivers/gpu/drm/i915/intel_lrc.c   | 160 ++---
 drivers/gpu/drm/i915/intel_lrc.h   |  26 --
 drivers/gpu/drm/i915/intel_mocs.c  |  38 ++-
 drivers/gpu/drm/i915/intel_overlay.c   |  50 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c| 365 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h|  23 +-
 10 files changed, 439 insertions(+), 472 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 7c114f90f61a..41e32426d174 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -514,7 +514,7 @@ static inline int
 mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 {
struct drm_i915_private *dev_priv = req->i915;
-   struct intel_engine_cs *engine = req->engine;
+   struct intel_ringbuffer *ring = req->ringbuf;
u32 flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
@@ -529,7 +529,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 * itlb_before_ctx_switch.
 */
if (IS_GEN6(dev_priv)) {
-   ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
+   ret = req->engine->flush(req, I915_GEM_GPU_DOMAINS, 0);
if (ret)
return ret;
}
@@ -551,64 +551,64 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
 
/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
if (INTEL_GEN(dev_priv) >= 7) {
-   intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE);
+   intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
if (num_rings) {
struct intel_engine_cs *signaller;
 
-   intel_ring_emit(engine,
+   intel_ring_emit(ring,
MI_LOAD_REGISTER_IMM(num_rings));
for_each_engine(signaller, dev_priv) {
-   if (signaller == engine)
+   if (signaller == req->engine)
continue;
 
-   intel_ring_emit_reg(engine,
+   intel_ring_emit_reg(ring,

RING_PSMI_CTL(signaller->mmio_base));
-   intel_ring_emit(engine,
+   intel_ring_emit(ring,

_MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
}
}
}
 
-   intel_ring_emit(engine, MI_NOOP);
-   intel_ring_emit(engine, MI_SET_CONTEXT);
-   intel_ring_emit(engine,
+   intel_ring_emit(ring, MI_NOOP);
+   intel_ring_emit(ring, MI_SET_CONTEXT);
+   intel_ring_emit(ring,
i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) |
flags);
/*
 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 * WaMiSetContext_Hang:snb,ivb,vlv
 */
-   intel_ring_emit(engine, MI_NOOP);
+   intel_ring_emit(ring, MI_NOOP);
 
if (INTEL_GEN(dev_priv) >= 7) {
if (num_rings) {
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
 
-   intel_ring_emit(engine,
+   intel_ring_emit(ring,
MI_LOAD_REGISTER_IMM(num_rings));
for_each_engine(signaller, dev_priv) {
-   if (signaller == engine)
+   if (signaller == req->engine)
continue;
 
last_reg = RING_PSMI_CTL(signaller->mmio_base);
-   intel_ring_emit_reg(engine, last_reg);
-   intel_ring_emit(engine,
+   intel_ring_emit_reg(ring, last_reg);
+   intel_ring_emit(ring,

_MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
}
 
/* Insert a delay before the next switch! */
-   intel_ring_emit(engine,
+   intel_ring_emit(ring,
MI_STORE_REGISTER_MEM |
  

[Intel-gfx] [PATCH 52/62] drm/i915: Amalgamate GGTT/ppGTT vma debug list walkers

2016-06-03 Thread Chris Wilson
As we can now have multiple VMA inside the global GTT (with partial
mappings, rotations, etc), it is no longer true that there may just be a
single GGTT entry and so we should walk the full vma_list to count up
the actual usage. In addition to unifying the two walkers, switch from
multiplying the object size for each vma to summing the bound vma sizes.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 46 +++--
 1 file changed, 18 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 345caf2e1841..338c85a5ab27 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -341,6 +341,7 @@ static int per_file_stats(int id, void *ptr, void *data)
struct drm_i915_gem_object *obj = ptr;
struct file_stats *stats = data;
struct i915_vma *vma;
+   int bound = 0;
 
stats->count++;
stats->total += obj->base.size;
@@ -348,41 +349,30 @@ static int per_file_stats(int id, void *ptr, void *data)
if (obj->base.name || obj->base.dma_buf)
stats->shared += obj->base.size;
 
-   if (USES_FULL_PPGTT(obj->base.dev)) {
-   list_for_each_entry(vma, >vma_list, obj_link) {
-   struct i915_hw_ppgtt *ppgtt;
+   list_for_each_entry(vma, >vma_list, obj_link) {
+   if (!drm_mm_node_allocated(>node))
+   continue;
 
-   if (!drm_mm_node_allocated(>node))
-   continue;
+   bound++;
 
-   if (vma->is_ggtt) {
-   stats->global += obj->base.size;
-   continue;
-   }
-
-   ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, 
base);
+   if (vma->is_ggtt) {
+   stats->global += vma->node.size;
+   } else {
+   struct i915_hw_ppgtt *ppgtt
+   = container_of(vma->vm,
+  struct i915_hw_ppgtt,
+  base);
if (ppgtt->file_priv != stats->file_priv)
continue;
-
-   if (obj->active) /* XXX per-vma statistic */
-   stats->active += obj->base.size;
-   else
-   stats->inactive += obj->base.size;
-
-   return 0;
-   }
-   } else {
-   if (i915_gem_obj_ggtt_bound(obj)) {
-   stats->global += obj->base.size;
-   if (obj->active)
-   stats->active += obj->base.size;
-   else
-   stats->inactive += obj->base.size;
-   return 0;
}
+
+   if (obj->active) /* XXX per-vma statistic */
+   stats->active += vma->node.size;
+   else
+   stats->inactive += vma->node.size;
}
 
-   if (!list_empty(>global_list))
+   if (!bound)
stats->unbound += obj->base.size;
 
return 0;
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 12/62] drm/i915: Skip capturing an error state if we already have one

2016-06-03 Thread Chris Wilson
As we only ever keep the first error state around, we can avoid some
work that can be quite intrusive if we don't record the error the second
time around. This does move the race whereby the user could discard one
error state as the second is being captured, but that race exists in the
current code and we hope that recapturing error state is only done for
debugging.

Note that as we discard the error state for simulated errors, igt that
exercise error capture continue to function.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile   |   1 +
 drivers/gpu/drm/i915/i915_drv.h | 210 +-
 drivers/gpu/drm/i915/i915_gem.c | 653 +--
 drivers/gpu/drm/i915/i915_gem_request.c | 659 
 drivers/gpu/drm/i915/i915_gem_request.h | 245 
 drivers/gpu/drm/i915/i915_gpu_error.c   |   3 +
 6 files changed, 916 insertions(+), 855 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_request.c
 create mode 100644 drivers/gpu/drm/i915/i915_gem_request.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f20007440821..14cef1d2343c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -32,6 +32,7 @@ i915-y += i915_cmd_parser.o \
  i915_gem_gtt.o \
  i915_gem.o \
  i915_gem_render_state.o \
+ i915_gem_request.o \
  i915_gem_shrinker.o \
  i915_gem_stolen.o \
  i915_gem_tiling.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 15a0c6bdf500..939cd45043c7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -60,6 +60,7 @@
 #include "i915_gem.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_render_state.h"
+#include "i915_gem_request.h"
 
 /* General customization:
  */
@@ -2339,172 +2340,6 @@ static inline struct scatterlist *__sg_next(struct 
scatterlist *sg)
 (((__iter).curr += PAGE_SIZE) < (__iter).max) ||   \
 ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0))
 
-/**
- * Request queue structure.
- *
- * The request queue allows us to note sequence numbers that have been emitted
- * and may be associated with active buffers to be retired.
- *
- * By keeping this list, we can avoid having to do questionable sequence
- * number comparisons on buffer last_read|write_seqno. It also allows an
- * emission time to be associated with the request for tracking how far ahead
- * of the GPU the submission is.
- *
- * The requests are reference counted, so upon creation they should have an
- * initial reference taken using kref_init
- */
-struct drm_i915_gem_request {
-   struct kref ref;
-
-   /** On Which ring this request was generated */
-   struct drm_i915_private *i915;
-   struct intel_engine_cs *engine;
-   unsigned reset_counter;
-   struct intel_signal_node signaling;
-
-/** GEM sequence number associated with the previous request,
- * when the HWS breadcrumb is equal to this the GPU is processing
- * this request.
- */
-   u32 previous_seqno;
-
-/** GEM sequence number associated with this request,
- * when the HWS breadcrumb is equal or greater than this the GPU
- * has finished processing this request.
- */
-   u32 seqno;
-
-   /** Position in the ringbuffer of the start of the request */
-   u32 head;
-
-   /**
-* Position in the ringbuffer of the start of the postfix.
-* This is required to calculate the maximum available ringbuffer
-* space without overwriting the postfix.
-*/
-u32 postfix;
-
-   /** Position in the ringbuffer of the end of the whole request */
-   u32 tail;
-
-   /** Preallocate space in the ringbuffer for the emitting the request */
-   u32 reserved_space;
-
-   /**
-* Context and ring buffer related to this request
-* Contexts are refcounted, so when this request is associated with a
-* context, we must increment the context's refcount, to guarantee that
-* it persists while any request is linked to it. Requests themselves
-* are also refcounted, so the request will only be freed when the last
-* reference to it is dismissed, and the code in
-* i915_gem_request_free() will then decrement the refcount on the
-* context.
-*/
-   struct i915_gem_context *ctx;
-   struct intel_ringbuffer *ringbuf;
-
-   /**
-* Context related to the previous request.
-* As the contexts are accessed by the hardware until the switch is
-* completed to a new context, the hardware may still be writing
-* to the context object after the breadcrumb is visible. We must
-* not unpin/unbind/prune that object whilst still active and so
-* we keep the previous context 

[Intel-gfx] [PATCH 45/62] drm/i915: Mark up i915_gem_active for locking annotation

2016-06-03 Thread Chris Wilson
The future annotations will track the locking used for access to ensure
that it is always sufficient. We make the preparations now to present
the API ahead and to make sure that GCC can eliminate the unused
parameter.

Before: 6298417 3619610  696320 10614347 a1f64b vmlinux
After:  6298417 3619610  696320 10614347 a1f64b vmlinux

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 12 +---
 drivers/gpu/drm/i915/i915_gem.c | 49 ++---
 drivers/gpu/drm/i915/i915_gem_fence.c   |  3 +-
 drivers/gpu/drm/i915/i915_gem_request.h | 38 +++--
 drivers/gpu/drm/i915/i915_gem_tiling.c  |  3 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   | 29 +++
 drivers/gpu/drm/i915/intel_display.c| 12 +---
 8 files changed, 102 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index fefb35c4becc..d35454d5683e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -155,10 +155,13 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
   obj->base.write_domain);
for_each_engine_id(engine, dev_priv, id)
seq_printf(m, "%x ",
-  i915_gem_active_get_seqno(>last_read[id]));
+  i915_gem_active_get_seqno(>last_read[id],
+
>base.dev->struct_mutex));
seq_printf(m, "] %x %x%s%s%s",
-  i915_gem_active_get_seqno(>last_write),
-  i915_gem_active_get_seqno(>last_fence),
+  i915_gem_active_get_seqno(>last_write,
+>base.dev->struct_mutex),
+  i915_gem_active_get_seqno(>last_fence,
+>base.dev->struct_mutex),
   i915_cache_level_str(to_i915(obj->base.dev), 
obj->cache_level),
   obj->dirty ? " dirty" : "",
   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -193,7 +196,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
seq_printf(m, " (%s mappable)", s);
}
 
-   engine = i915_gem_active_get_engine(>last_write);
+   engine = i915_gem_active_get_engine(>last_write,
+   >base.dev->struct_mutex);
if (engine)
seq_printf(m, " (%s)", engine->name);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99e3b269b4b9..610378bd1be4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1120,21 +1120,24 @@ i915_gem_object_wait_rendering(struct 
drm_i915_gem_object *obj,
return 0;
 
if (readonly) {
-   request = i915_gem_active_peek(>last_write);
+   request = i915_gem_active_peek(>last_write,
+  >base.dev->struct_mutex);
if (request) {
ret = i915_wait_request(request);
if (ret)
return ret;
 
i = request->engine->id;
-   if (i915_gem_active_peek(>last_read[i]) == request)
+   if (i915_gem_active_peek(>last_read[i],
+>base.dev->struct_mutex) 
== request)
i915_gem_object_retire__read(obj, i);
else
i915_gem_object_retire__write(obj);
}
} else {
for (i = 0; i < I915_NUM_ENGINES; i++) {
-   request = i915_gem_active_peek(>last_read[i]);
+   request = i915_gem_active_peek(>last_read[i],
+  
>base.dev->struct_mutex);
if (!request)
continue;
 
@@ -1156,9 +1159,11 @@ i915_gem_object_retire_request(struct 
drm_i915_gem_object *obj,
 {
int ring = req->engine->id;
 
-   if (i915_gem_active_peek(>last_read[ring]) == req)
+   if (i915_gem_active_peek(>last_read[ring],
+>base.dev->struct_mutex) == req)
i915_gem_object_retire__read(obj, ring);
-   else if (i915_gem_active_peek(>last_write) == req)
+   else if (i915_gem_active_peek(>last_write,
+ >base.dev->struct_mutex) == req)
i915_gem_object_retire__write(obj);
 
if (req->reset_counter == i915_reset_counter(>i915->gpu_error))
@@ -1187,7 +1192,8 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
if (readonly) {
struct drm_i915_gem_request 

[Intel-gfx] [PATCH 48/62] drm/i915: Remove obsolete i915_gem_object_flush_active()

2016-06-03 Thread Chris Wilson
Since we track requests, and requests are always added to the GPU fully
formed, we never have to flush the incomplete request and know that the
given request will eventually complete without any further action on our
part.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 58 +++--
 1 file changed, 3 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2bddd1386788..f517bc151af1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2447,34 +2447,6 @@ out:
 }
 
 /**
- * Ensures that an object will eventually get non-busy by flushing any required
- * write domains, emitting any outstanding lazy request and retiring and
- * completed requests.
- */
-static int
-i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
-{
-   int i;
-
-   if (!obj->active)
-   return 0;
-
-   for (i = 0; i < I915_NUM_ENGINES; i++) {
-   struct drm_i915_gem_request *req;
-
-   req = i915_gem_active_peek(>last_read[i],
-  >base.dev->struct_mutex);
-   if (req == NULL)
-   continue;
-
-   if (i915_gem_request_completed(req))
-   i915_gem_object_retire__read(obj, i);
-   }
-
-   return 0;
-}
-
-/**
  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
  * @DRM_IOCTL_ARGS: standard ioctl arguments
  *
@@ -2518,24 +2490,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
return -ENOENT;
}
 
-   /* Need to make sure the object gets inactive eventually. */
-   ret = i915_gem_object_flush_active(obj);
-   if (ret)
-   goto out;
-
if (!obj->active)
goto out;
 
-   /* Do this after OLR check to make sure we make forward progress polling
-* on this IOCTL with a timeout == 0 (like busy ioctl)
-*/
-   if (args->timeout_ns == 0) {
-   ret = -ETIME;
-   goto out;
-   }
-
-   i915_gem_object_put(obj);
-
for (i = 0; i < I915_NUM_ENGINES; i++) {
struct drm_i915_gem_request *req;
 
@@ -2545,6 +2502,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
requests[n++] = req;
}
 
+out:
+   i915_gem_object_put(obj);
mutex_unlock(>struct_mutex);
 
for (i = 0; i < n; i++) {
@@ -2555,11 +2514,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
i915_gem_request_put(requests[i]);
}
return ret;
-
-out:
-   i915_gem_object_put(obj);
-   mutex_unlock(>struct_mutex);
-   return ret;
 }
 
 static int
@@ -3714,13 +3668,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 
/* Count all active objects as busy, even if they are currently not used
 * by the gpu. Users of this interface expect objects to eventually
-* become non-busy without any further actions, therefore emit any
-* necessary flushes here.
+* become non-busy without any further actions.
 */
-   ret = i915_gem_object_flush_active(obj);
-   if (ret)
-   goto unref;
-
args->busy = 0;
if (obj->active) {
struct drm_i915_gem_request *req;
@@ -3738,7 +3687,6 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
args->busy |= req->engine->exec_id;
}
 
-unref:
i915_gem_object_put(obj);
 unlock:
mutex_unlock(>struct_mutex);
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 60/62] drm/i915: Release vma when the handle is closed

2016-06-03 Thread Chris Wilson
In order to prevent a leak of the vma on shared objects, we need to
hook into the object_close callback to destroy the vma on the object for
this file. However, if we destroyed that vma immediately we may cause
unexpected application stalls as we try to unbind a busy vma - hence we
defer the unbind to when we retire the vma.

v2: Keep vma allocated until closed. This is useful for a later
optimisation, but it is required now in order to handle potential
recursion of i915_vma_unbind() by retiring itself.
v3: Comments are important.

Testcase: igt/gem_ppggtt/flink-and-close-vma-leak
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Daniele Ceraolo Spurio base.dev->struct_mutex);
+   int ret = i915_gem_active_wait([idx],
+  >base.dev->struct_mutex);
if (ret)
return ret;
}
@@ -2318,6 +2318,19 @@ out:
}
 }
 
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+{
+   struct drm_i915_gem_object *obj = to_intel_bo(gem);
+   struct drm_i915_file_private *fpriv = file->driver_priv;
+   struct i915_vma *vma, *vn;
+
+   mutex_lock(>base.dev->struct_mutex);
+   list_for_each_entry_safe(vma, vn, >vma_list, obj_link)
+   if (vma->vm->file == fpriv)
+   i915_vma_close(vma);
+   mutex_unlock(>base.dev->struct_mutex);
+}
+
 /**
  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
  * @DRM_IOCTL_ARGS: standard ioctl arguments
@@ -2514,28 +2527,46 @@ static void __i915_vma_iounmap(struct i915_vma *vma)
 static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
 {
struct drm_i915_gem_object *obj = vma->obj;
+   unsigned long active;
int ret;
 
-   if (list_empty(>obj_link))
-   return 0;
+   /* First wait upon any activity as retiring the request may
+* have side-effects such as unpinning or even unbinding this vma.
+*/
+   active = vma->active;
+   if (active && wait) {
+   int idx;
+
+   /* When a closed VMA is retired, it is unbound - eek.
+* In order to prevent it from being recursively closed,
+* take a pin on the vma so that the second unbind is
+* aborted.
+*/
+   vma->pin_count++;
 
-   if (!drm_mm_node_allocated(>node)) {
- 

[Intel-gfx] [PATCH 17/62] drm/i915: Wrap drm_gem_object_reference in i915_gem_object_get

2016-06-03 Thread Chris Wilson
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h| 10 +-
 drivers/gpu/drm/i915/i915_gem.c|  4 ++--
 drivers/gpu/drm/i915/i915_gem_dmabuf.c |  3 +--
 drivers/gpu/drm/i915/i915_gem_evict.c  |  2 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem_shrinker.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c|  3 +--
 drivers/gpu/drm/i915/intel_display.c   |  3 +--
 8 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 27096004db7c..1ff7a9df4209 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2284,7 +2284,15 @@ i915_gem_object_lookup(struct drm_file *file, u32 handle)
return to_intel_bo(drm_gem_object_lookup(file, handle));
 }
 __deprecated extern struct drm_gem_object *
-drm_gem_object_lookup(struct drm_file *file, u32 handle);
+drm_gem_object_lookup(struct drm_file *, u32);
+
+__attribute__((nonnull)) static inline struct drm_i915_gem_object *
+i915_gem_object_get(struct drm_i915_gem_object *obj)
+{
+   drm_gem_object_reference(>base);
+   return obj;
+}
+__deprecated extern void drm_gem_object_reference(struct drm_gem_object *);
 
 /*
  * Optimised SGL iterator for GEM objects
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 837b1402c798..4aecdd4434d8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -266,7 +266,7 @@ drop_pages(struct drm_i915_gem_object *obj)
struct i915_vma *vma, *next;
int ret;
 
-   drm_gem_object_reference(>base);
+   i915_gem_object_get(obj);
list_for_each_entry_safe(vma, next, >vma_list, obj_link)
if (i915_vma_unbind(vma))
break;
@@ -2107,7 +2107,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
/* Add a reference if we're newly entering the active list. */
if (obj->active == 0)
-   drm_gem_object_reference(>base);
+   i915_gem_object_get(obj);
obj->active |= intel_engine_flag(engine);
 
list_move_tail(>engine_list[engine->id], >active_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c 
b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 80bbe43a2e92..7accb99f3da3 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -278,8 +278,7 @@ struct drm_gem_object *i915_gem_prime_import(struct 
drm_device *dev,
 * Importing dmabuf exported from out own gem increases
 * refcount on gem itself instead of f_count of dmabuf.
 */
-   drm_gem_object_reference(>base);
-   return >base;
+   return _gem_object_get(obj)->base;
}
}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index 3c1280ec7ff6..d5777a0750f0 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -214,7 +214,7 @@ found:
   exec_list);
if (drm_mm_scan_remove_block(>node)) {
list_move(>exec_list, _list);
-   drm_gem_object_reference(>obj->base);
+   i915_gem_object_get(vma->obj);
continue;
}
list_del_init(>exec_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7f441e74c903..590c4d3ac2e4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -122,7 +122,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
goto err;
}
 
-   drm_gem_object_reference(>base);
+   i915_gem_object_get(obj);
list_add_tail(>obj_exec_link, );
}
spin_unlock(>table_lock);
@@ -1203,7 +1203,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
vma->exec_entry = shadow_exec_entry;
vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
-   drm_gem_object_reference(_batch_obj->base);
+   i915_gem_object_get(shadow_batch_obj);
list_add_tail(>exec_list, >vmas);
 
shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 1bf14544d8ad..416eaaece776 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -190,7 +190,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
if (!can_release_pages(obj))
continue;
 
-   

[Intel-gfx] [PATCH 57/62] drm/i915: Be more careful when unbinding vma

2016-06-03 Thread Chris Wilson
When we call i915_vma_unbind(), we will wait upon outstanding rendering.
This will also trigger a retirement phase, which may update the object
lists. If, we extend request tracking to the VMA itself (rather than
keep it at the encompassing object), then there is a potential that the
obj->vma_list be modified for other elements upon i915_vma_unbind(). As
a result, if we walk over the object list and call i915_vma_unbind(), we
need to be prepared for that list to change.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_gem.c  | 57 +++-
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  7 +---
 drivers/gpu/drm/i915/i915_gem_userptr.c  |  4 +--
 4 files changed, 46 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dd3f7afdf423..83c8dcc744fb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2961,6 +2961,8 @@ int __must_check i915_vma_unbind(struct i915_vma *vma);
  * _guarantee_ VMA in question is _not in use_ anywhere.
  */
 int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma);
+
+int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2ba467c0b0b7..e5189155e729 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -255,18 +255,38 @@ static const struct drm_i915_gem_object_ops 
i915_gem_phys_ops = {
.release = i915_gem_object_release_phys,
 };
 
+int
+i915_gem_object_unbind(struct drm_i915_gem_object *obj)
+{
+   struct i915_vma *vma;
+   LIST_HEAD(still_in_list);
+   int ret;
+
+   /* The vma will only be freed if it is marked as closed, and if we wait
+* upon rendering to the vma, we may unbind anything in the list.
+*/
+   while ((vma = list_first_entry_or_null(>vma_list,
+  struct i915_vma,
+  obj_link))) {
+   list_move_tail(>obj_link, _in_list);
+   ret = i915_vma_unbind(vma);
+   if (ret)
+   break;
+   }
+   list_splice(_in_list, >vma_list);
+
+   return ret;
+}
+
 static int
 drop_pages(struct drm_i915_gem_object *obj)
 {
-   struct i915_vma *vma, *next;
int ret;
 
i915_gem_object_get(obj);
-   list_for_each_entry_safe(vma, next, >vma_list, obj_link)
-   if (i915_vma_unbind(vma))
-   break;
-
-   ret = i915_gem_object_put_pages(obj);
+   ret = i915_gem_object_unbind(obj);
+   if (ret == 0)
+   ret = i915_gem_object_put_pages(obj);
i915_gem_object_put(obj);
 
return ret;
@@ -2983,8 +3003,7 @@ i915_gem_object_set_to_gtt_domain(struct 
drm_i915_gem_object *obj, bool write)
 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
enum i915_cache_level cache_level)
 {
-   struct drm_device *dev = obj->base.dev;
-   struct i915_vma *vma, *next;
+   struct i915_vma *vma;
int ret = 0;
 
if (obj->cache_level == cache_level)
@@ -2995,7 +3014,8 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
 * catch the issue of the CS prefetch crossing page boundaries and
 * reading an invalid PTE on older architectures.
 */
-   list_for_each_entry_safe(vma, next, >vma_list, obj_link) {
+restart:
+   list_for_each_entry(vma, >vma_list, obj_link) {
if (!drm_mm_node_allocated(>node))
continue;
 
@@ -3004,11 +3024,18 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
return -EBUSY;
}
 
-   if (!i915_gem_valid_gtt_space(vma, cache_level)) {
-   ret = i915_vma_unbind(vma);
-   if (ret)
-   return ret;
-   }
+   if (i915_gem_valid_gtt_space(vma, cache_level))
+   continue;
+
+   ret = i915_vma_unbind(vma);
+   if (ret)
+   return ret;
+
+   /* As unbinding may affect other elements in the
+* obj->vma_list (due to side-effects from retiring
+* an active vma), play safe and restart the iterator.
+*/
+   goto restart;
}
 
/* We can reuse the existing drm_mm nodes but need to change the
@@ -3027,7 +3054,7 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
if (ret)

[Intel-gfx] [PATCH 59/62] drm/i915: Track active vma requests

2016-06-03 Thread Chris Wilson
Hook the vma itself into the i915_gem_request_retire() so that we can
accurately track when a solitary vma is inactive (as opposed to having
to wait for the entire object to be idle). This improves the interaction
when using multiple contexts (with full-ppgtt) and eliminates some
frequent list walking when retiring objects after a completed request.

A side-effect is that we get an active vma reference for free. The
consequence of this is shown in the next patch...

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  2 +-
 drivers/gpu/drm/i915/i915_gem.c| 20 +++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c| 20 
 drivers/gpu/drm/i915/i915_gem_gtt.h| 26 ++
 5 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 51f84dd37675..99857ee0bb8b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -367,7 +367,7 @@ static int per_file_stats(int id, void *ptr, void *data)
continue;
}
 
-   if (obj->active) /* XXX per-vma statistic */
+   if (i915_vma_is_active(vma))
stats->active += vma->node.size;
else
stats->inactive += vma->node.size;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a39d767d8137..ef68a9183d7d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2100,7 +2100,6 @@ i915_gem_object_retire__read(struct i915_gem_active 
*active,
int ring = request->engine->id;
struct drm_i915_gem_object *obj =
container_of(active, struct drm_i915_gem_object, 
last_read[ring]);
-   struct i915_vma *vma;
 
GEM_BUG_ON((obj->active & (1 << ring)) == 0);
 
@@ -2112,12 +2111,9 @@ i915_gem_object_retire__read(struct i915_gem_active 
*active,
 * so that we don't steal from recently used but inactive objects
 * (unless we are forced to ofc!)
 */
-   list_move_tail(>global_list, >i915->mm.bound_list);
-
-   list_for_each_entry(vma, >vma_list, obj_link) {
-   if (!list_empty(>vm_link))
-   list_move_tail(>vm_link, >vm->inactive_list);
-   }
+   if (obj->bind_count)
+   list_move_tail(>global_list,
+  >i915->mm.bound_list);
 
i915_gem_object_put(obj);
 }
@@ -2915,9 +2911,6 @@ i915_gem_object_flush_cpu_write_domain(struct 
drm_i915_gem_object *obj)
 int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
-   struct drm_device *dev = obj->base.dev;
-   struct drm_i915_private *dev_priv = to_i915(dev);
-   struct i915_ggtt *ggtt = _priv->ggtt;
uint32_t old_write_domain, old_read_domains;
struct i915_vma *vma;
int ret;
@@ -2970,9 +2963,10 @@ i915_gem_object_set_to_gtt_domain(struct 
drm_i915_gem_object *obj, bool write)
 
/* And bump the LRU for this access */
vma = i915_gem_obj_to_ggtt(obj);
-   if (vma && drm_mm_node_allocated(>node) && !obj->active)
-   list_move_tail(>vm_link,
-  >base.inactive_list);
+   if (vma &&
+   drm_mm_node_allocated(>node) &&
+   !i915_vma_is_active(vma))
+   list_move_tail(>vm_link, >vm->inactive_list);
 
return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e099080b3b5b..7b381358512e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1121,7 +1121,13 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
obj->dirty = 1; /* be paranoid  */
 
-   /* Add a reference if we're newly entering the active list. */
+   /* Add a reference if we're newly entering the active list.
+* The order in which we add operations to the retirement queue is
+* vital here: mark_active adds to the start of the callback list,
+* such that subsequent callbacks are called first. Therefore we
+* add the active reference first and queue for it to be dropped
+* *last*.
+*/
if (obj->active == 0)
i915_gem_object_get(obj);
obj->active |= 1 << idx;
@@ -1145,6 +1151,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
}
}
 
+   i915_vma_set_active(vma, idx);
+   i915_gem_active_set(>last_read[idx], req);
list_move_tail(>vm_link, >vm->active_list);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 57fc84b9b633..4d3179e15b94 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ 

[Intel-gfx] [PATCH 58/62] drm/i915: Kill drop_pages()

2016-06-03 Thread Chris Wilson
The drop_pages() function is a dangerous trap in that it can release the
passed in object pointer and so unless the caller is aware, it can
easily trick us into using the stale object afterwards. Move it into its
solitary callsite where we know it is safe.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 20 +---
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e5189155e729..a39d767d8137 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -278,20 +278,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
return ret;
 }
 
-static int
-drop_pages(struct drm_i915_gem_object *obj)
-{
-   int ret;
-
-   i915_gem_object_get(obj);
-   ret = i915_gem_object_unbind(obj);
-   if (ret == 0)
-   ret = i915_gem_object_put_pages(obj);
-   i915_gem_object_put(obj);
-
-   return ret;
-}
-
 int
 i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
int align)
@@ -312,7 +298,11 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object 
*obj,
if (obj->base.filp == NULL)
return -EINVAL;
 
-   ret = drop_pages(obj);
+   ret = i915_gem_object_unbind(obj);
+   if (ret)
+   return ret;
+
+   ret = i915_gem_object_put_pages(obj);
if (ret)
return ret;
 
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 16/62] drm/i915: Wrap drm_gem_object_lookup in i915_gem_object_lookup

2016-06-03 Thread Chris Wilson
For symmetry with a forthcoming i915_gem_object_get() and
i915_gem_object_pu().

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h| 18 ++-
 drivers/gpu/drm/i915/i915_gem.c| 56 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c |  8 ++---
 drivers/gpu/drm/i915/intel_display.c   |  4 +--
 drivers/gpu/drm/i915/intel_overlay.c   |  5 ++-
 5 files changed, 53 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 48d89b181246..27096004db7c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2268,7 +2268,23 @@ struct drm_i915_gem_object {
} userptr;
};
 };
-#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
+
+static inline struct drm_i915_gem_object *
+to_intel_bo(struct drm_gem_object *gem)
+{
+   /* Assert that to_intel_bo(NULL) == NULL */
+   BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
+
+   return container_of(gem, struct drm_i915_gem_object, base);
+}
+
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup(struct drm_file *file, u32 handle)
+{
+   return to_intel_bo(drm_gem_object_lookup(file, handle));
+}
+__deprecated extern struct drm_gem_object *
+drm_gem_object_lookup(struct drm_file *file, u32 handle);
 
 /*
  * Optimised SGL iterator for GEM objects
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5f232fb1a2a4..837b1402c798 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -695,8 +695,8 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
if (ret)
return ret;
 
-   obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
-   if (>base == NULL) {
+   obj = i915_gem_object_lookup(file, args->handle);
+   if (!obj) {
ret = -ENOENT;
goto unlock;
}
@@ -1049,8 +1049,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret)
goto put_rpm;
 
-   obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
-   if (>base == NULL) {
+   obj = i915_gem_object_lookup(file, args->handle);
+   if (!obj) {
ret = -ENOENT;
goto unlock;
}
@@ -1253,8 +1253,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
if (ret)
return ret;
 
-   obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
-   if (>base == NULL) {
+   obj = i915_gem_object_lookup(file, args->handle);
+   if (!obj) {
ret = -ENOENT;
goto unlock;
}
@@ -1301,8 +1301,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void 
*data,
if (ret)
return ret;
 
-   obj = to_intel_bo(drm_gem_object_lookup(file, args->handle));
-   if (>base == NULL) {
+   obj = i915_gem_object_lookup(file, args->handle);
+   if (!obj) {
ret = -ENOENT;
goto unlock;
}
@@ -1339,7 +1339,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
struct drm_i915_gem_mmap *args = data;
-   struct drm_gem_object *obj;
+   struct drm_i915_gem_object *obj;
unsigned long addr;
 
if (args->flags & ~(I915_MMAP_WC))
@@ -1348,19 +1348,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
return -ENODEV;
 
-   obj = drm_gem_object_lookup(file, args->handle);
-   if (obj == NULL)
+   obj = i915_gem_object_lookup(file, args->handle);
+   if (!obj)
return -ENOENT;
 
/* prime objects have no backing filp to GEM mmap
 * pages from.
 */
-   if (!obj->filp) {
-   drm_gem_object_unreference_unlocked(obj);
+   if (!obj->base.filp) {
+   drm_gem_object_unreference_unlocked(>base);
return -EINVAL;
}
 
-   addr = vm_mmap(obj->filp, 0, args->size,
+   addr = vm_mmap(obj->base.filp, 0, args->size,
   PROT_READ | PROT_WRITE, MAP_SHARED,
   args->offset);
if (args->flags & I915_MMAP_WC) {
@@ -1368,7 +1368,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct vm_area_struct *vma;
 
if (down_write_killable(>mmap_sem)) {
-   drm_gem_object_unreference_unlocked(obj);
+   drm_gem_object_unreference_unlocked(>base);
return -EINTR;
}
vma = find_vma(mm, addr);
@@ -1379,7 +1379,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
addr = -ENOMEM;
up_write(>mmap_sem);
}
-   

[Intel-gfx] [PATCH 06/62] drm/i915: Flush the RPS bottom-half when the GPU idles

2016-06-03 Thread Chris Wilson
Make sure that the RPS bottom-half is flushed before we set the idle
frequency when we decide the GPU is idle. This should prevent any races
with the bottom-half and setting the idle frequency, and ensures that
the bottom-half is bounded by the GPU's rpm reference taken for when it
is active (i.e. between gen6_rps_busy() and gen6_rps_idle()).

v2: Avoid recursively using the i915->wq - RPS does not touch the
struct_mutex so has no place being on the ordered i915->wq.
v3: Enable/disable interrupts for RPS busy/idle in order to prevent
further HW access from RPS outside of the wakeref.

Signed-off-by: Chris Wilson 
Cc: Imre Deak 
Cc: Jesse Barnes 
---
 drivers/gpu/drm/i915/i915_drv.c |  3 ---
 drivers/gpu/drm/i915/i915_irq.c | 32 
 drivers/gpu/drm/i915/intel_pm.c | 14 ++
 3 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5f7208d2fdbf..7ba040141722 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2699,7 +2699,6 @@ static int intel_runtime_suspend(struct device *device)
 
intel_guc_suspend(dev);
 
-   intel_suspend_gt_powersave(dev_priv);
intel_runtime_pm_disable_interrupts(dev_priv);
 
ret = 0;
@@ -2813,8 +2812,6 @@ static int intel_runtime_resume(struct device *device)
if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
intel_hpd_init(dev_priv);
 
-   intel_autoenable_gt_powersave(dev_priv);
-
enable_rpm_wakeref_asserts(dev_priv);
 
if (ret)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7a2dc8f1f64e..34e25fc2b90a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -351,9 +351,8 @@ void gen6_reset_rps_interrupts(struct drm_i915_private 
*dev_priv)
 void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv)
 {
spin_lock_irq(_priv->irq_lock);
-
-   WARN_ON(dev_priv->rps.pm_iir);
-   WARN_ON(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
+   WARN_ON_ONCE(dev_priv->rps.pm_iir);
+   WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & 
dev_priv->pm_rps_events);
dev_priv->rps.interrupts_enabled = true;
I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) |
dev_priv->pm_rps_events);
@@ -371,11 +370,6 @@ void gen6_disable_rps_interrupts(struct drm_i915_private 
*dev_priv)
 {
spin_lock_irq(_priv->irq_lock);
dev_priv->rps.interrupts_enabled = false;
-   spin_unlock_irq(_priv->irq_lock);
-
-   cancel_work_sync(_priv->rps.work);
-
-   spin_lock_irq(_priv->irq_lock);
 
I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 
@@ -384,8 +378,15 @@ void gen6_disable_rps_interrupts(struct drm_i915_private 
*dev_priv)
~dev_priv->pm_rps_events);
 
spin_unlock_irq(_priv->irq_lock);
-
synchronize_irq(dev_priv->dev->irq);
+
+   /* Now that we will not be generating any more work, flush any
+* outsanding tasks. As we are called on the RPS idle path,
+* we will reset the GPU to minimum frequencies, so the current
+* state of the worker can be discarded.
+*/
+   cancel_work_sync(_priv->rps.work);
+   gen6_reset_rps_interrupts(dev_priv);
 }
 
 /**
@@ -1082,13 +1083,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
return;
}
 
-   /*
-* The RPS work is synced during runtime suspend, we don't require a
-* wakeref. TODO: instead of disabling the asserts make sure that we
-* always hold an RPM reference while the work is running.
-*/
-   DISABLE_RPM_WAKEREF_ASSERTS(dev_priv);
-
pm_iir = dev_priv->rps.pm_iir;
dev_priv->rps.pm_iir = 0;
/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
@@ -1101,7 +1095,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
 
if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
-   goto out;
+   return;
 
mutex_lock(_priv->rps.hw_lock);
 
@@ -1156,8 +1150,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
intel_set_rps(dev_priv, new_delay);
 
mutex_unlock(_priv->rps.hw_lock);
-out:
-   ENABLE_RPM_WAKEREF_ASSERTS(dev_priv);
 }
 
 
@@ -1597,7 +1589,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private 
*dev_priv, u32 pm_iir)
gen6_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events);
if (dev_priv->rps.interrupts_enabled) {
dev_priv->rps.pm_iir |= pm_iir & 
dev_priv->pm_rps_events;
-   queue_work(dev_priv->wq, _priv->rps.work);
+  

[Intel-gfx] [PATCH 13/62] drm/i915: Derive GEM requests from dma-fence

2016-06-03 Thread Chris Wilson
dma-buf provides a generic fence class for interoperation between
drivers. Internally we use the request structure as a fence, and so with
only a little bit of interfacing we can rebase those requests on top of
dma-buf fences. This will allow us, in the future, to pass those fences
back to userspace or between drivers.

v2: The fence_context needs to be globally unique, not just unique to
this device.

Signed-off-by: Chris Wilson 
Cc: Jesse Barnes 
Cc: Daniel Vetter 
---
 drivers/gpu/drm/i915/i915_debugfs.c|   2 +-
 drivers/gpu/drm/i915/i915_gem_request.c| 116 ++---
 drivers/gpu/drm/i915/i915_gem_request.h|  33 
 drivers/gpu/drm/i915/i915_gpu_error.c  |   2 +-
 drivers/gpu/drm/i915/i915_guc_submission.c |   4 +-
 drivers/gpu/drm/i915/i915_trace.h  |  10 +--
 drivers/gpu/drm/i915/intel_breadcrumbs.c   |   7 +-
 drivers/gpu/drm/i915/intel_lrc.c   |   3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c|  11 +--
 drivers/gpu/drm/i915/intel_ringbuffer.h|   1 +
 10 files changed, 143 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 8f576b443ff6..8e37315443f3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -768,7 +768,7 @@ static int i915_gem_request_info(struct seq_file *m, void 
*data)
if (req->pid)
task = pid_task(req->pid, PIDTYPE_PID);
seq_printf(m, "%x @ %d: %s [%d]\n",
-  req->seqno,
+  req->fence.seqno,
   (int) (jiffies - req->emitted_jiffies),
   task ? task->comm : "",
   task ? task->pid : -1);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 34b2f151cdfc..512b15153ac6 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,98 @@
 
 #include "i915_drv.h"
 
+static inline struct drm_i915_gem_request *
+to_i915_request(struct fence *fence)
+{
+   return container_of(fence, struct drm_i915_gem_request, fence);
+}
+
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+   return "i915";
+}
+
+static const char *i915_fence_get_timeline_name(struct fence *fence)
+{
+   /* Timelines are bound by eviction to a VM. However, since
+* we only have a global seqno at the moment, we only have
+* a single timeline. Note that each timeline will have
+* multiple execution contexts (fence contexts) as we allow
+* engines within a single timeline to execute in parallel.
+*/
+   return "global";
+}
+
+static bool i915_fence_signaled(struct fence *fence)
+{
+   return i915_gem_request_completed(to_i915_request(fence));
+}
+
+static bool i915_fence_enable_signaling(struct fence *fence)
+{
+   if (i915_fence_signaled(fence))
+   return false;
+
+   return intel_engine_enable_signaling(to_i915_request(fence)) == 0;
+}
+
+static signed long i915_fence_wait(struct fence *fence,
+  bool interruptible,
+  signed long timeout_jiffies)
+{
+   s64 timeout_ns, *timeout;
+   int ret;
+
+   if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
+   timeout_ns = jiffies_to_nsecs(timeout_jiffies);
+   timeout = _ns;
+   } else
+   timeout = NULL;
+
+   ret = __i915_wait_request(to_i915_request(fence),
+ interruptible, timeout,
+ NULL);
+   if (ret == -ETIME)
+   return 0;
+
+   if (ret < 0)
+   return ret;
+
+   if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
+   timeout_jiffies = nsecs_to_jiffies(timeout_ns);
+
+   return timeout_jiffies;
+}
+
+static void i915_fence_value_str(struct fence *fence, char *str, int size)
+{
+   snprintf(str, size, "%u", fence->seqno);
+}
+
+static void i915_fence_timeline_value_str(struct fence *fence, char *str,
+ int size)
+{
+   snprintf(str, size, "%u",
+intel_engine_get_seqno(to_i915_request(fence)->engine));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+   struct drm_i915_gem_request *req = to_i915_request(fence);
+   kmem_cache_free(req->i915->requests, req);
+}
+
+static const struct fence_ops i915_fence_ops = {
+   .get_driver_name = i915_fence_get_driver_name,
+   .get_timeline_name = i915_fence_get_timeline_name,
+   .enable_signaling = i915_fence_enable_signaling,
+   .signaled = i915_fence_signaled,
+   .wait = i915_fence_wait,
+   .release = 

[Intel-gfx] [PATCH 20/62] drm/i915: Disable waitboosting for fence_wait()

2016-06-03 Thread Chris Wilson
We want to restrict waitboosting to known process contexts, where we can
track which clients are receiving waitboosts and prevent excessive power
wasting. For fence_wait() we do not have any client tracking and so that
leaves it open to abuse.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_request.c | 7 ---
 drivers/gpu/drm/i915/i915_gem_request.h | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 987a43f1aac8..ba745f0740d0 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -74,7 +74,7 @@ static signed long i915_fence_wait(struct fence *fence,
 
ret = __i915_wait_request(to_i915_request(fence),
  interruptible, timeout,
- NULL);
+ NO_WAITBOOST);
if (ret == -ETIME)
return 0;
 
@@ -634,7 +634,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 * forcing the clocks too high for the whole system, we only allow
 * each client to waitboost once in a busy period.
 */
-   if (INTEL_INFO(req->i915)->gen >= 6)
+   if (!IS_ERR(rps) && INTEL_INFO(req->i915)->gen >= 6)
gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
 
/* Optimistic spin for the next ~jiffie before touching IRQs */
@@ -707,7 +707,8 @@ complete:
*timeout = 0;
}
 
-   if (rps && req->fence.seqno == req->engine->last_submitted_seqno) {
+   if (!IS_ERR_OR_NULL(rps) &&
+   req->fence.seqno == req->engine->last_submitted_seqno) {
/* The GPU is now idle and this client has stalled.
 * Since no other client has submitted a request in the
 * meantime, assume that this client is the only one
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h 
b/drivers/gpu/drm/i915/i915_gem_request.h
index b1bc96c9e31d..a3cac13ab9af 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -205,6 +205,7 @@ void __i915_add_request(struct drm_i915_gem_request *req,
__i915_add_request(req, NULL, false)
 
 struct intel_rps_client;
+#define NO_WAITBOOST ERR_PTR(-1)
 
 int __i915_wait_request(struct drm_i915_gem_request *req,
bool interruptible,
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 09/62] drm/i915: Record the ringbuffer associated with the request

2016-06-03 Thread Chris Wilson
The request tells us where to read the ringbuf from, so use that
information to simplify the error capture. If no request was active at
the time of the hang, the ring is idle and there is no information
inside the ring pertaining to the hang.

Note carefully that this will reduce the amount of information stored in
the error state - any ring without an active request will not be
recorded.

Signed-off-by: Chris Wilson 
Reviewed-by: Dave Gordon 
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 28 
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 81341fc4e61a..cf444ddec66e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1076,7 +1076,6 @@ static void i915_gem_record_rings(struct drm_i915_private 
*dev_priv,
 
for (i = 0; i < I915_NUM_ENGINES; i++) {
struct intel_engine_cs *engine = _priv->engine[i];
-   struct intel_ringbuffer *rbuf;
 
error->ring[i].pid = -1;
 
@@ -1091,6 +1090,7 @@ static void i915_gem_record_rings(struct drm_i915_private 
*dev_priv,
request = i915_gem_find_active_request(engine);
if (request) {
struct i915_address_space *vm;
+   struct intel_ringbuffer *rb;
 
vm = request->ctx && request->ctx->ppgtt ?
>ctx->ppgtt->base :
@@ -1121,26 +1121,14 @@ static void i915_gem_record_rings(struct 
drm_i915_private *dev_priv,
}
rcu_read_unlock();
}
-   }
 
-   if (i915.enable_execlists) {
-   /* TODO: This is only a small fix to keep basic error
-* capture working, but we need to add more information
-* for it to be useful (e.g. dump the context being
-* executed).
-*/
-   if (request)
-   rbuf = request->ctx->engine[engine->id].ringbuf;
-   else
-   rbuf = 
dev_priv->kernel_context->engine[engine->id].ringbuf;
-   } else
-   rbuf = engine->buffer;
-
-   error->ring[i].cpu_ring_head = rbuf->head;
-   error->ring[i].cpu_ring_tail = rbuf->tail;
-
-   error->ring[i].ringbuffer =
-   i915_error_ggtt_object_create(dev_priv, rbuf->obj);
+   rb = request->ringbuf;
+   error->ring[i].cpu_ring_head = rb->head;
+   error->ring[i].cpu_ring_tail = rb->tail;
+   error->ring[i].ringbuffer =
+   i915_error_ggtt_object_create(dev_priv,
+ rb->obj);
+   }
 
error->ring[i].hws_page =
i915_error_ggtt_object_create(dev_priv,
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 11/62] drm/i915: Clean up GPU hang message

2016-06-03 Thread Chris Wilson
Remove some redundant kernel messages as we deduce a hung GPU and
capture the error state.

v2: Fix "hang" vs "no progress" message whilst I was there

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_irq.c | 41 ++---
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 34e25fc2b90a..860235d1e0bf 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3083,9 +3083,8 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
container_of(work, typeof(*dev_priv),
 gpu_error.hangcheck_work.work);
struct intel_engine_cs *engine;
-   enum intel_engine_id id;
-   int busy_count = 0, rings_hung = 0;
-   bool stuck[I915_NUM_ENGINES] = { 0 };
+   unsigned hung = 0, stuck = 0;
+   int busy_count = 0;
 #define BUSY 1
 #define KICK 5
 #define HUNG 20
@@ -3103,7 +3102,7 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
 */
intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
 
-   for_each_engine_id(engine, dev_priv, id) {
+   for_each_engine(engine, dev_priv) {
bool busy = intel_engine_has_waiter(engine);
u64 acthd;
u32 seqno;
@@ -3166,10 +3165,15 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
break;
case HANGCHECK_HUNG:
engine->hangcheck.score += HUNG;
-   stuck[id] = true;
break;
}
}
+
+   if (engine->hangcheck.score >= 
HANGCHECK_SCORE_RING_HUNG) {
+   hung |= intel_engine_flag(engine);
+   if (engine->hangcheck.action != HANGCHECK_HUNG)
+   stuck |= intel_engine_flag(engine);
+   }
} else {
engine->hangcheck.action = HANGCHECK_ACTIVE;
 
@@ -3194,17 +3198,24 @@ static void i915_hangcheck_elapsed(struct work_struct 
*work)
busy_count += busy;
}
 
-   for_each_engine_id(engine, dev_priv, id) {
-   if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) {
-   DRM_INFO("%s on %s\n",
-stuck[id] ? "stuck" : "no progress",
-engine->name);
-   rings_hung |= intel_engine_flag(engine);
-   }
-   }
+   if (hung) {
+   char msg[80];
+   int len;
 
-   if (rings_hung)
-   i915_handle_error(dev_priv, rings_hung, "Engine(s) hung");
+   /* If some rings hung but others were still busy, only
+* blame the hanging rings in the synopsis.
+*/
+   if (stuck != hung)
+   hung &= ~stuck;
+   len = snprintf(msg, sizeof(msg),
+  "%s on ", stuck == hung ? "No progress" : 
"Hang");
+   for_each_engine_masked(engine, dev_priv, hung)
+   len += snprintf(msg + len, sizeof(msg) - len,
+   "%s, ", engine->name);
+   msg[len-2] = '\0';
+
+   return i915_handle_error(dev_priv, hung, msg);
+   }
 
/* Reset timer in case GPU hangs without another request being added */
if (busy_count)
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 44/62] drm/i915: Prepare i915_gem_active for annotations

2016-06-03 Thread Chris Wilson
In the future, we will want to add annotations to the i915_gem_active
struct. The API is thus expanded to hide direct access to the contents
of i915_gem_active and mediated instead through a number of helpers.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  13 ++--
 drivers/gpu/drm/i915/i915_gem.c |  91 +--
 drivers/gpu/drm/i915/i915_gem_fence.c   |  11 ++-
 drivers/gpu/drm/i915/i915_gem_request.h | 128 +++-
 drivers/gpu/drm/i915/i915_gem_tiling.c  |   2 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |   8 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |   9 ++-
 drivers/gpu/drm/i915/intel_display.c|  12 ++-
 8 files changed, 206 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 2edbf9e95e7f..fefb35c4becc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -155,10 +155,10 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
   obj->base.write_domain);
for_each_engine_id(engine, dev_priv, id)
seq_printf(m, "%x ",
-  
i915_gem_request_get_seqno(obj->last_read[id].request));
+  i915_gem_active_get_seqno(>last_read[id]));
seq_printf(m, "] %x %x%s%s%s",
-  i915_gem_request_get_seqno(obj->last_write.request),
-  i915_gem_request_get_seqno(obj->last_fence.request),
+  i915_gem_active_get_seqno(>last_write),
+  i915_gem_active_get_seqno(>last_fence),
   i915_cache_level_str(to_i915(obj->base.dev), 
obj->cache_level),
   obj->dirty ? " dirty" : "",
   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -192,8 +192,11 @@ describe_obj(struct seq_file *m, struct 
drm_i915_gem_object *obj)
*t = '\0';
seq_printf(m, " (%s mappable)", s);
}
-   if (obj->last_write.request != NULL)
-   seq_printf(m, " (%s)", obj->last_write.request->engine->name);
+
+   engine = i915_gem_active_get_engine(>last_write);
+   if (engine)
+   seq_printf(m, " (%s)", engine->name);
+
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8c3b39a8e974..99e3b269b4b9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1113,29 +1113,32 @@ int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
   bool readonly)
 {
+   struct drm_i915_gem_request *request;
int ret, i;
 
if (!obj->active)
return 0;
 
if (readonly) {
-   if (obj->last_write.request != NULL) {
-   ret = i915_wait_request(obj->last_write.request);
+   request = i915_gem_active_peek(>last_write);
+   if (request) {
+   ret = i915_wait_request(request);
if (ret)
return ret;
 
-   i = obj->last_write.request->engine->id;
-   if (obj->last_read[i].request == 
obj->last_write.request)
+   i = request->engine->id;
+   if (i915_gem_active_peek(>last_read[i]) == request)
i915_gem_object_retire__read(obj, i);
else
i915_gem_object_retire__write(obj);
}
} else {
for (i = 0; i < I915_NUM_ENGINES; i++) {
-   if (obj->last_read[i].request == NULL)
+   request = i915_gem_active_peek(>last_read[i]);
+   if (!request)
continue;
 
-   ret = i915_wait_request(obj->last_read[i].request);
+   ret = i915_wait_request(request);
if (ret)
return ret;
 
@@ -1153,9 +1156,9 @@ i915_gem_object_retire_request(struct drm_i915_gem_object 
*obj,
 {
int ring = req->engine->id;
 
-   if (obj->last_read[ring].request == req)
+   if (i915_gem_active_peek(>last_read[ring]) == req)
i915_gem_object_retire__read(obj, ring);
-   else if (obj->last_write.request == req)
+   else if (i915_gem_active_peek(>last_write) == req)
i915_gem_object_retire__write(obj);
 
if (req->reset_counter == i915_reset_counter(>i915->gpu_error))
@@ -1184,20 +1187,20 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
if (readonly) {
struct drm_i915_gem_request *req;
 
-   req = obj->last_write.request;
+   

[Intel-gfx] [PATCH 53/62] drm/i915: Split early global GTT initialisation

2016-06-03 Thread Chris Wilson
Initialising the global GTT is tricky as we wish to use the drm_mm range
manager during the modesetting initialisation (to capture stolen
allocations from the BIOS) before we actually enable GEM. To overcome
this, we currently setup the drm_mm first and then carefully rebind
them.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.c| 19 ---
 drivers/gpu/drm/i915/i915_gem.c|  6 ++-
 drivers/gpu/drm/i915/i915_gem_gtt.c| 98 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h|  2 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c | 17 +++---
 5 files changed, 49 insertions(+), 93 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index babeee1a6127..4483f9e75aa5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1340,8 +1340,6 @@ static void i915_driver_cleanup_mmio(struct 
drm_i915_private *dev_priv)
 static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 {
struct drm_device *dev = dev_priv->dev;
-   struct i915_ggtt *ggtt = _priv->ggtt;
-   uint32_t aperture_size;
int ret;
 
if (i915_inject_load_failure())
@@ -1385,7 +1383,6 @@ static int i915_driver_init_hw(struct drm_i915_private 
*dev_priv)
}
}
 
-
/* 965GM sometimes incorrectly writes to hardware status page (HWS)
 * using 32bit addressing, overwriting memory if HWS is located
 * above 4GB.
@@ -1404,19 +1401,6 @@ static int i915_driver_init_hw(struct drm_i915_private 
*dev_priv)
}
}
 
-   aperture_size = ggtt->mappable_end;
-
-   ggtt->mappable =
-   io_mapping_create_wc(ggtt->mappable_base,
-aperture_size);
-   if (!ggtt->mappable) {
-   ret = -EIO;
-   goto out_ggtt;
-   }
-
-   ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base,
- aperture_size);
-
pm_qos_add_request(_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY,
   PM_QOS_DEFAULT_VALUE);
 
@@ -1457,14 +1441,11 @@ out_ggtt:
 static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv)
 {
struct drm_device *dev = dev_priv->dev;
-   struct i915_ggtt *ggtt = _priv->ggtt;
 
if (dev->pdev->msi_enabled)
pci_disable_msi(dev->pdev);
 
pm_qos_remove_request(_priv->pm_qos);
-   arch_phys_wc_del(ggtt->mtrr);
-   io_mapping_free(ggtt->mappable);
i915_ggtt_cleanup_hw(dev);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 20e174f7fc9e..b51d20a4f1ea 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4112,7 +4112,10 @@ int i915_gem_init(struct drm_device *dev)
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
i915_gem_init_userptr(dev_priv);
-   i915_gem_init_ggtt(dev);
+
+   ret = i915_gem_init_ggtt(dev);
+   if (ret)
+   goto out_unlock;
 
ret = i915_gem_context_init(dev);
if (ret)
@@ -4202,7 +4205,6 @@ i915_gem_load_init(struct drm_device *dev)
  SLAB_HWCACHE_ALIGN,
  NULL);
 
-   INIT_LIST_HEAD(_priv->vm_list);
INIT_LIST_HEAD(_priv->context_list);
INIT_LIST_HEAD(_priv->mm.unbound_list);
INIT_LIST_HEAD(_priv->mm.bound_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5d718c488f23..1cdd26ea94ed 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2706,10 +2706,7 @@ static void i915_gtt_color_adjust(struct drm_mm_node 
*node,
}
 }
 
-static int i915_gem_setup_global_gtt(struct drm_device *dev,
-u64 start,
-u64 mappable_end,
-u64 end)
+int i915_gem_init_ggtt(struct drm_device *dev)
 {
/* Let GEM Manage all of the aperture.
 *
@@ -2722,48 +2719,16 @@ static int i915_gem_setup_global_gtt(struct drm_device 
*dev,
 */
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_ggtt *ggtt = _priv->ggtt;
-   struct drm_mm_node *entry;
-   struct drm_i915_gem_object *obj;
unsigned long hole_start, hole_end;
+   struct drm_mm_node *entry;
int ret;
 
-   BUG_ON(mappable_end > end);
-
-   ggtt->base.start = start;
-
-   /* Subtract the guard page before address space initialization to
-* shrink the range used by drm_mm */
-   ggtt->base.total = end - start - PAGE_SIZE;
-   i915_address_space_init(>base, dev_priv);
-   ggtt->base.total += PAGE_SIZE;
-
if (intel_vgpu_active(dev_priv)) {
ret = intel_vgt_balloon(dev);
if (ret)
return ret;
  

[Intel-gfx] [PATCH 26/62] drm/i915: Rename request->ring to request->engine

2016-06-03 Thread Chris Wilson
In order to disambiguate between the pointer to the intel_engine_cs
(called ring) and the intel_ringbuffer (called ringbuf), rename
s/ring/engine/.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  3 +--
 drivers/gpu/drm/i915/i915_gem.c  |  6 ++
 drivers/gpu/drm/i915/i915_gem_context.c  |  6 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c  |  5 ++---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 12 ++--
 drivers/gpu/drm/i915/i915_gem_request.c  |  6 +-
 drivers/gpu/drm/i915/i915_gpu_error.c|  3 +--
 drivers/gpu/drm/i915/i915_guc_submission.c   |  4 ++--
 drivers/gpu/drm/i915/intel_lrc.c |  6 +++---
 9 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index c1f8b5126d16..34e41ae2943e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -193,8 +193,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object 
*obj)
seq_printf(m, " (%s mappable)", s);
}
if (obj->last_write_req != NULL)
-   seq_printf(m, " (%s)",
-  
i915_gem_request_get_engine(obj->last_write_req)->name);
+   seq_printf(m, " (%s)", obj->last_write_req->engine->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 22c8361748d6..8edd79ad08b4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2101,9 +2101,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 struct drm_i915_gem_request *req)
 {
struct drm_i915_gem_object *obj = vma->obj;
-   struct intel_engine_cs *engine;
-
-   engine = i915_gem_request_get_engine(req);
+   struct intel_engine_cs *engine = req->engine;
 
/* Add a reference if we're newly entering the active list. */
if (obj->active == 0)
@@ -2561,7 +2559,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct intel_engine_cs *from;
int ret;
 
-   from = i915_gem_request_get_engine(from_req);
+   from = from_req->engine;
if (to == from)
return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index 41e32426d174..899731f9a2c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -555,8 +555,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
if (num_rings) {
struct intel_engine_cs *signaller;
 
-   intel_ring_emit(ring,
-   MI_LOAD_REGISTER_IMM(num_rings));
+   intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
for_each_engine(signaller, dev_priv) {
if (signaller == req->engine)
continue;
@@ -585,8 +584,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
struct intel_engine_cs *signaller;
i915_reg_t last_reg = {}; /* keep gcc quiet */
 
-   intel_ring_emit(ring,
-   MI_LOAD_REGISTER_IMM(num_rings));
+   intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
for_each_engine(signaller, dev_priv) {
if (signaller == req->engine)
continue;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f735d1ec189a..4b4e3de58ad9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1689,7 +1689,7 @@ static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
  struct drm_i915_gem_request *req)
 {
struct intel_engine_cs *engine = req->engine;
-   struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
+   struct drm_i915_private *dev_priv = req->i915;
 
I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
@@ -1737,8 +1737,7 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
  struct drm_i915_gem_request *req)
 {
struct intel_engine_cs *engine = req->engine;
-   struct drm_device *dev = ppgtt->base.dev;
-   struct drm_i915_private *dev_priv = dev->dev_private;
+   struct drm_i915_private *dev_priv = req->i915;
 
 
I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 

[Intel-gfx] [PATCH 36/62] drm/i915: Convert engine->write_tail to operate on a request

2016-06-03 Thread Chris Wilson
If we rewrite the I915_WRITE_TAIL specialisation for the legacy
ringbuffer as submitting the request onto the ringbuffer, we can unify
the vfunc with both execlists and GuC in the next patch.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_request.c |  5 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c | 63 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  3 +-
 3 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 06f724ee23dd..5fef1c291b25 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -461,11 +461,8 @@ void __i915_add_request(struct drm_i915_gem_request 
*request,
 
if (i915.enable_execlists)
ret = engine->emit_request(request);
-   else {
+   else
ret = engine->add_request(request);
-
-   request->tail = intel_ring_get_tail(ring);
-   }
/* Not allowed to fail! */
WARN(ret, "emit|add_request failed: %d!\n", ret);
/* Sanity check that the reserved size was large enough. */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 943dc08c69df..db38abddfec1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -58,13 +58,6 @@ void intel_ring_update_space(struct intel_ring *ring)
 ring->tail, ring->size);
 }
 
-static void __intel_engine_submit(struct intel_engine_cs *engine)
-{
-   struct intel_ring *ring = engine->buffer;
-   ring->tail &= ring->size - 1;
-   engine->write_tail(engine, ring->tail);
-}
-
 static int
 gen2_render_ring_flush(struct drm_i915_gem_request *req,
   u32  invalidate_domains,
@@ -420,13 +413,6 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req,
return gen8_emit_pipe_control(req, flags, scratch_addr);
 }
 
-static void ring_write_tail(struct intel_engine_cs *engine,
-   u32 value)
-{
-   struct drm_i915_private *dev_priv = engine->i915;
-   I915_WRITE_TAIL(engine, value);
-}
-
 u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
 {
struct drm_i915_private *dev_priv = engine->i915;
@@ -535,7 +521,7 @@ static bool stop_ring(struct intel_engine_cs *engine)
 
I915_WRITE_CTL(engine, 0);
I915_WRITE_HEAD(engine, 0);
-   engine->write_tail(engine, 0);
+   I915_WRITE_TAIL(engine, 0);
 
if (!IS_GEN2(dev_priv)) {
(void)I915_READ_CTL(engine);
@@ -1380,7 +1366,11 @@ gen6_add_request(struct drm_i915_gem_request *req)
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, req->fence.seqno);
intel_ring_emit(ring, MI_USER_INTERRUPT);
-   __intel_engine_submit(req->engine);
+   intel_ring_advance(ring);
+
+   req->tail = intel_ring_get_tail(ring);
+
+   req->engine->submit_request(req);
 
return 0;
 }
@@ -1410,7 +1400,8 @@ gen8_render_add_request(struct drm_i915_gem_request *req)
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_USER_INTERRUPT);
intel_ring_emit(ring, MI_NOOP);
-   __intel_engine_submit(engine);
+
+   req->engine->submit_request(req);
 
return 0;
 }
@@ -1632,11 +1623,21 @@ i9xx_add_request(struct drm_i915_gem_request *req)
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, req->fence.seqno);
intel_ring_emit(ring, MI_USER_INTERRUPT);
-   __intel_engine_submit(req->engine);
+   intel_ring_advance(ring);
+
+   req->tail = intel_ring_get_tail(ring);
+
+   req->engine->submit_request(req);
 
return 0;
 }
 
+static void i9xx_submit_request(struct drm_i915_gem_request *request)
+{
+   struct drm_i915_private *dev_priv = request->i915;
+   I915_WRITE_TAIL(request->engine, request->tail);
+}
+
 static void
 gen6_ring_enable_irq(struct intel_engine_cs *engine)
 {
@@ -2395,10 +2396,9 @@ void intel_engine_init_seqno(struct intel_engine_cs 
*engine, u32 seqno)
engine->hangcheck.seqno = seqno;
 }
 
-static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine,
-u32 value)
+static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
 {
-   struct drm_i915_private *dev_priv = engine->i915;
+   struct drm_i915_private *dev_priv = request->i915;
 
/* Every tail move must follow the sequence below */
 
@@ -2418,8 +2418,8 @@ static void gen6_bsd_ring_write_tail(struct 
intel_engine_cs *engine,
DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
 
/* Now that the ring is fully powered up, update the tail */
-   I915_WRITE_TAIL(engine, value);
-   

[Intel-gfx] [PATCH 38/62] drm/i915: Stop passing caller's num_dwords to engine->semaphore.signal()

2016-06-03 Thread Chris Wilson
Rather than pass in the num_dwords that the caller wishes to use after
the signal command packet, split the breadcrumb emission into two phases
and have both the signal and breadcrumb individiually acquire space on
the ring. This makes the interface simpler for the reader, and will
simplify for patches.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 51 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  4 +--
 2 files changed, 23 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b7b5c2d94db5..b4edbdeac27e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1226,10 +1226,8 @@ static void render_ring_cleanup(struct intel_engine_cs 
*engine)
intel_fini_pipe_control(engine);
 }
 
-static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
-  unsigned int num_dwords)
+static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req)
 {
-#define MBOX_UPDATE_DWORDS 8
struct intel_ring *signaller = signaller_req->ring;
struct drm_i915_private *dev_priv = signaller_req->i915;
struct intel_engine_cs *waiter;
@@ -1237,10 +1235,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request 
*signaller_req,
int ret, num_rings;
 
num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
-   num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
-#undef MBOX_UPDATE_DWORDS
-
-   ret = intel_ring_begin(signaller_req, num_dwords);
+   ret = intel_ring_begin(signaller_req, (num_rings-1) * 8);
if (ret)
return ret;
 
@@ -1262,14 +1257,13 @@ static int gen8_rcs_signal(struct drm_i915_gem_request 
*signaller_req,
   MI_SEMAPHORE_TARGET(waiter->hw_id));
intel_ring_emit(signaller, 0);
}
+   intel_ring_advance(signaller);
 
return 0;
 }
 
-static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
-  unsigned int num_dwords)
+static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req)
 {
-#define MBOX_UPDATE_DWORDS 6
struct intel_ring *signaller = signaller_req->ring;
struct drm_i915_private *dev_priv = signaller_req->i915;
struct intel_engine_cs *waiter;
@@ -1277,10 +1271,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request 
*signaller_req,
int ret, num_rings;
 
num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
-   num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
-#undef MBOX_UPDATE_DWORDS
-
-   ret = intel_ring_begin(signaller_req, num_dwords);
+   ret = intel_ring_begin(signaller_req, (num_rings-1) * 6);
if (ret)
return ret;
 
@@ -1300,12 +1291,12 @@ static int gen8_xcs_signal(struct drm_i915_gem_request 
*signaller_req,
   MI_SEMAPHORE_TARGET(waiter->hw_id));
intel_ring_emit(signaller, 0);
}
+   intel_ring_advance(signaller);
 
return 0;
 }
 
-static int gen6_signal(struct drm_i915_gem_request *signaller_req,
-  unsigned int num_dwords)
+static int gen6_signal(struct drm_i915_gem_request *signaller_req)
 {
struct intel_ring *signaller = signaller_req->ring;
struct drm_i915_private *dev_priv = signaller_req->i915;
@@ -1313,12 +1304,8 @@ static int gen6_signal(struct drm_i915_gem_request 
*signaller_req,
enum intel_engine_id id;
int ret, num_rings;
 
-#define MBOX_UPDATE_DWORDS 3
num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask);
-   num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
-#undef MBOX_UPDATE_DWORDS
-
-   ret = intel_ring_begin(signaller_req, num_dwords);
+   ret = intel_ring_begin(signaller_req, round_up((num_rings-1) * 3, 2));
if (ret)
return ret;
 
@@ -1336,6 +1323,7 @@ static int gen6_signal(struct drm_i915_gem_request 
*signaller_req,
/* If num_dwords was rounded, make sure the tail pointer is correct */
if (num_rings % 2 == 0)
intel_ring_emit(signaller, MI_NOOP);
+   intel_ring_advance(signaller);
 
return 0;
 }
@@ -1353,11 +1341,13 @@ static int gen6_emit_request(struct 
drm_i915_gem_request *req)
struct intel_ring *ring = req->ring;
int ret;
 
-   if (req->engine->semaphore.signal)
-   ret = req->engine->semaphore.signal(req, 4);
-   else
-   ret = intel_ring_begin(req, 4);
+   if (req->engine->semaphore.signal) {
+   ret = req->engine->semaphore.signal(req);
+   if (ret)
+   return ret;
+   }
 
+   ret = intel_ring_begin(req, 4);
if (ret)
return ret;
 
@@ -1378,10 +1368,13 @@ static int gen8_render_emit_request(struct 

[Intel-gfx] [PATCH 29/62] drm/i915: Rename intel_context[engine].ringbuf

2016-06-03 Thread Chris Wilson
Perform s/ringbuf/ring/ on the context struct for consistency with the
ring/engine split.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c|  8 
 drivers/gpu/drm/i915/i915_drv.h|  2 +-
 drivers/gpu/drm/i915/i915_gem_context.c|  4 ++--
 drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c   | 33 ++
 5 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 34e41ae2943e..8d3bc2bd532e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -424,8 +424,8 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) {
if (ctx->engine[n].state)
per_file_stats(0, ctx->engine[n].state, data);
-   if (ctx->engine[n].ringbuf)
-   per_file_stats(0, ctx->engine[n].ringbuf->obj, data);
+   if (ctx->engine[n].ring)
+   per_file_stats(0, ctx->engine[n].ring->obj, data);
}
 
return 0;
@@ -2062,8 +2062,8 @@ static int i915_context_status(struct seq_file *m, void 
*unused)
seq_putc(m, ce->initialised ? 'I' : 'i');
if (ce->state)
describe_obj(m, ce->state);
-   if (ce->ringbuf)
-   describe_ctx_ringbuf(m, ce->ringbuf);
+   if (ce->ring)
+   describe_ctx_ringbuf(m, ce->ring);
seq_putc(m, '\n');
}
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fcac90104ba9..de54adbf5768 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -881,7 +881,7 @@ struct i915_gem_context {
 
struct intel_context {
struct drm_i915_gem_object *state;
-   struct intel_ringbuffer *ringbuf;
+   struct intel_ringbuffer *ring;
struct i915_vma *lrc_vma;
uint32_t *lrc_reg_state;
u64 lrc_desc;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index a7911f39f416..7e45e7cdb538 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -173,8 +173,8 @@ void i915_gem_context_free(struct kref *ctx_ref)
continue;
 
WARN_ON(ce->pin_count);
-   if (ce->ringbuf)
-   intel_ringbuffer_free(ce->ringbuf);
+   if (ce->ring)
+   intel_ringbuffer_free(ce->ring);
 
i915_gem_object_put(ce->state);
}
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 337b8f60989c..8aa3cf8cac45 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -395,7 +395,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
 
-   obj = ce->ringbuf->obj;
+   obj = ce->ring->obj;
gfx_addr = i915_gem_obj_ggtt_offset(obj);
 
lrc->ring_begin = gfx_addr;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 229545fc5b4a..14e3437d9074 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -459,11 +459,8 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *engine)
 * resubmit the request. See gen8_emit_request() for where we
 * prepare the padding after the end of the request.
 */
-   struct intel_ringbuffer *ringbuf;
-
-   ringbuf = req0->ctx->engine[engine->id].ringbuf;
req0->tail += 8;
-   req0->tail &= ringbuf->size - 1;
+   req0->tail &= req0->ring->size - 1;
}
 
execlists_submit_requests(req0, req1);
@@ -692,7 +689,7 @@ int intel_logical_ring_alloc_request_extras(struct 
drm_i915_gem_request *request
return ret;
}
 
-   request->ring = ce->ringbuf;
+   request->ring = ce->ring;
 
if (i915.enable_guc_submission) {
/*
@@ -957,14 +954,14 @@ static int intel_lr_context_pin(struct i915_gem_context 
*ctx,
 
lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 
-   ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ce->ringbuf);
+   ret = intel_pin_and_map_ringbuffer_obj(dev_priv, ce->ring);
if (ret)
goto unpin_map;
 
ce->lrc_vma = 

[Intel-gfx] [PATCH 08/62] drm/i915: Remove stop-rings debugfs interface

2016-06-03 Thread Chris Wilson
Now that we have (near) universal GPU recovery code, we can inject a
real hang from userspace and not need any fakery. Not only does this
mean that the testing is far more realistic, but we can simplify the
kernel in the process.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c | 35 --
 drivers/gpu/drm/i915/i915_drv.c | 17 ++---
 drivers/gpu/drm/i915/i915_drv.h | 19 --
 drivers/gpu/drm/i915/i915_gem.c | 44 ++---
 drivers/gpu/drm/i915/intel_lrc.c|  3 ---
 drivers/gpu/drm/i915/intel_ringbuffer.c |  8 --
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 7 files changed, 15 insertions(+), 112 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index dd6cf222e8f5..8f576b443ff6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4821,40 +4821,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
"%llu\n");
 
 static int
-i915_ring_stop_get(void *data, u64 *val)
-{
-   struct drm_device *dev = data;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-
-   *val = dev_priv->gpu_error.stop_rings;
-
-   return 0;
-}
-
-static int
-i915_ring_stop_set(void *data, u64 val)
-{
-   struct drm_device *dev = data;
-   struct drm_i915_private *dev_priv = dev->dev_private;
-   int ret;
-
-   DRM_DEBUG_DRIVER("Stopping rings 0x%08llx\n", val);
-
-   ret = mutex_lock_interruptible(>struct_mutex);
-   if (ret)
-   return ret;
-
-   dev_priv->gpu_error.stop_rings = val;
-   mutex_unlock(>struct_mutex);
-
-   return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_ring_stop_fops,
-   i915_ring_stop_get, i915_ring_stop_set,
-   "0x%08llx\n");
-
-static int
 i915_ring_missed_irq_get(void *data, u64 *val)
 {
struct drm_device *dev = data;
@@ -5457,7 +5423,6 @@ static const struct i915_debugfs_files {
{"i915_max_freq", _max_freq_fops},
{"i915_min_freq", _min_freq_fops},
{"i915_cache_sharing", _cache_sharing_fops},
-   {"i915_ring_stop", _ring_stop_fops},
{"i915_ring_missed_irq", _ring_missed_irq_fops},
{"i915_ring_test_irq", _ring_test_irq_fops},
{"i915_gem_drop_caches", _drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 7ba040141722..f2ac0cae929b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2125,24 +2125,11 @@ int i915_reset(struct drm_i915_private *dev_priv)
goto error;
}
 
+   pr_notice("drm/i915: Resetting chip after gpu hang\n");
+
i915_gem_reset(dev);
 
ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
-
-   /* Also reset the gpu hangman. */
-   if (error->stop_rings != 0) {
-   DRM_INFO("Simulated gpu hang, resetting stop_rings\n");
-   error->stop_rings = 0;
-   if (ret == -ENODEV) {
-   DRM_INFO("Reset not implemented, but ignoring "
-"error for simulated gpu hangs\n");
-   ret = 0;
-   }
-   }
-
-   if (i915_stop_ring_allow_warn(dev_priv))
-   pr_notice("drm/i915: Resetting chip after gpu hang\n");
-
if (ret) {
if (ret != -ENODEV)
DRM_ERROR("Failed to reset chip: %i\n", ret);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3f075adf9e84..a48c0f4e1d42 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1393,13 +1393,6 @@ struct i915_gpu_error {
 */
wait_queue_head_t reset_queue;
 
-   /* Userspace knobs for gpu hang simulation;
-* combines both a ring mask, and extra flags
-*/
-   u32 stop_rings;
-#define I915_STOP_RING_ALLOW_BAN   (1 << 31)
-#define I915_STOP_RING_ALLOW_WARN  (1 << 30)
-
/* For missed irq/seqno simulation. */
unsigned long test_irq_rings;
 };
@@ -3292,18 +3285,6 @@ static inline u32 i915_reset_count(struct i915_gpu_error 
*error)
return ((i915_reset_counter(error) & ~I915_WEDGED) + 1) / 2;
 }
 
-static inline bool i915_stop_ring_allow_ban(struct drm_i915_private *dev_priv)
-{
-   return dev_priv->gpu_error.stop_rings == 0 ||
-   dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_BAN;
-}
-
-static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
-{
-   return dev_priv->gpu_error.stop_rings == 0 ||
-   dev_priv->gpu_error.stop_rings & I915_STOP_RING_ALLOW_WARN;
-}
-
 void i915_gem_reset(struct drm_device *dev);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_init(struct drm_device *dev);
diff --git 

[Intel-gfx] [PATCH 28/62] drm/i915: Rename backpointer from intel_ringbuffer to intel_engine_cs

2016-06-03 Thread Chris Wilson
Having ringbuf->ring point to an engine is confusing, so rename it once
again to ring->engine.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0f13e9900bd6..ab498ecce1ca 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2087,8 +2087,8 @@ static void intel_ring_context_unpin(struct 
i915_gem_context *ctx,
i915_gem_context_put(ctx);
 }
 
-static int intel_init_ring_buffer(struct drm_device *dev,
- struct intel_engine_cs *engine)
+static int intel_init_engine(struct drm_device *dev,
+struct intel_engine_cs *engine)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
struct intel_ringbuffer *ringbuf;
@@ -2707,7 +2707,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
engine->init_hw = init_render_ring;
engine->cleanup = render_ring_cleanup;
 
-   ret = intel_init_ring_buffer(dev, engine);
+   ret = intel_init_engine(dev, engine);
if (ret)
return ret;
 
@@ -2794,7 +2794,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
}
engine->init_hw = init_ring_common;
 
-   return intel_init_ring_buffer(dev, engine);
+   return intel_init_engine(dev, engine);
 }
 
 /**
@@ -2828,7 +2828,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev)
}
engine->init_hw = init_ring_common;
 
-   return intel_init_ring_buffer(dev, engine);
+   return intel_init_engine(dev, engine);
 }
 
 int intel_init_blt_ring_buffer(struct drm_device *dev)
@@ -2886,7 +2886,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
}
engine->init_hw = init_ring_common;
 
-   return intel_init_ring_buffer(dev, engine);
+   return intel_init_engine(dev, engine);
 }
 
 int intel_init_vebox_ring_buffer(struct drm_device *dev)
@@ -2938,7 +2938,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
}
engine->init_hw = init_ring_common;
 
-   return intel_init_ring_buffer(dev, engine);
+   return intel_init_engine(dev, engine);
 }
 
 int
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 34/62] drm/i915: Simplify request_alloc by returning the allocated request

2016-06-03 Thread Chris Wilson
If is simpler and leads to more readable code through the callstack if
the allocation returns the allocated struct through the return value.

The importance of this is that it no longer looks like we accidentally
allocate requests as side-effect of calling certain functions.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h|  3 +-
 drivers/gpu/drm/i915/i915_gem.c| 75 --
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++---
 drivers/gpu/drm/i915/i915_gem_request.c| 58 ---
 drivers/gpu/drm/i915/i915_trace.h  | 13 +++---
 drivers/gpu/drm/i915/intel_display.c   | 36 ++
 drivers/gpu/drm/i915/intel_lrc.c   |  2 +-
 drivers/gpu/drm/i915/intel_overlay.c   | 19 
 8 files changed, 78 insertions(+), 140 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fe39cd2584f3..b1e00b42a830 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3067,8 +3067,7 @@ static inline void i915_gem_object_unpin_map(struct 
drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-struct intel_engine_cs *to,
-struct drm_i915_gem_request **to_req);
+struct drm_i915_gem_request *to);
 void i915_vma_move_to_active(struct i915_vma *vma,
 struct drm_i915_gem_request *req);
 int i915_gem_dumb_create(struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 034d81c54d67..de1e866276c5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2552,51 +2552,35 @@ out:
 
 static int
 __i915_gem_object_sync(struct drm_i915_gem_object *obj,
-  struct intel_engine_cs *to,
-  struct drm_i915_gem_request *from_req,
-  struct drm_i915_gem_request **to_req)
+  struct drm_i915_gem_request *to,
+  struct drm_i915_gem_request *from)
 {
-   struct intel_engine_cs *from;
int ret;
 
-   from = from_req->engine;
-   if (to == from)
+   if (to->engine == from->engine)
return 0;
 
-   if (i915_gem_request_completed(from_req))
+   if (i915_gem_request_completed(from))
return 0;
 
if (!i915.semaphores) {
-   struct drm_i915_private *i915 = to_i915(obj->base.dev);
-   ret = __i915_wait_request(from_req,
- i915->mm.interruptible,
+   ret = __i915_wait_request(from,
+ from->i915->mm.interruptible,
  NULL,
  NO_WAITBOOST);
if (ret)
return ret;
 
-   i915_gem_object_retire_request(obj, from_req);
+   i915_gem_object_retire_request(obj, from);
} else {
-   int idx = intel_engine_sync_index(from, to);
-   u32 seqno = i915_gem_request_get_seqno(from_req);
+   int idx = intel_engine_sync_index(from->engine, to->engine);
+   u32 seqno = i915_gem_request_get_seqno(from);
 
-   WARN_ON(!to_req);
-
-   if (seqno <= from->semaphore.sync_seqno[idx])
+   if (seqno <= from->engine->semaphore.sync_seqno[idx])
return 0;
 
-   if (*to_req == NULL) {
-   struct drm_i915_gem_request *req;
-
-   req = i915_gem_request_alloc(to, NULL);
-   if (IS_ERR(req))
-   return PTR_ERR(req);
-
-   *to_req = req;
-   }
-
-   trace_i915_gem_ring_sync_to(*to_req, from, from_req);
-   ret = to->semaphore.sync_to(*to_req, from, seqno);
+   trace_i915_gem_ring_sync_to(to, from);
+   ret = to->engine->semaphore.sync_to(to, from->engine, seqno);
if (ret)
return ret;
 
@@ -2604,8 +2588,8 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
 * might have just caused seqno wrap under
 * the radar.
 */
-   from->semaphore.sync_seqno[idx] =
-   
i915_gem_request_get_seqno(obj->last_read_req[from->id]);
+   from->engine->semaphore.sync_seqno[idx] =
+   
i915_gem_request_get_seqno(obj->last_read_req[from->engine->id]);
}
 
return 0;
@@ -2615,17 +2599,12 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
  * i915_gem_object_sync - sync an object to a ring.
  *
  * @obj: object which may be in use on 

[Intel-gfx] [PATCH 40/62] drm/i915: Remove duplicate golden render state init from execlists

2016-06-03 Thread Chris Wilson
Now that we use the same vfuncs for emitting the batch buffer in both
execlists and legacy, the golden render state initialisation is
identical between both.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 23 +--
 drivers/gpu/drm/i915/i915_gem_render_state.h | 18 ---
 drivers/gpu/drm/i915/intel_lrc.c | 34 +---
 drivers/gpu/drm/i915/intel_renderstate.h | 16 +
 4 files changed, 28 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c 
b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 6aedb913f694..8587dbc302e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,6 +28,15 @@
 #include "i915_drv.h"
 #include "intel_renderstate.h"
 
+struct render_state {
+   const struct intel_renderstate_rodata *rodata;
+   struct drm_i915_gem_object *obj;
+   u64 ggtt_offset;
+   int gen;
+   u32 aux_batch_size;
+   u32 aux_batch_offset;
+};
+
 static const struct intel_renderstate_rodata *
 render_state_get_rodata(const int gen)
 {
@@ -51,6 +60,7 @@ static int render_state_init(struct render_state *so,
int ret;
 
so->gen = INTEL_GEN(dev_priv);
+   so->ggtt_offset = 0;
so->rodata = render_state_get_rodata(so->gen);
if (so->rodata == NULL)
return 0;
@@ -164,14 +174,14 @@ err_out:
 
 #undef OUT_BATCH
 
-void i915_gem_render_state_fini(struct render_state *so)
+static void render_state_fini(struct render_state *so)
 {
i915_gem_object_ggtt_unpin(so->obj);
i915_gem_object_put(so->obj);
 }
 
-int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
- struct render_state *so)
+static int render_state_prepare(struct intel_engine_cs *engine,
+   struct render_state *so)
 {
int ret;
 
@@ -187,7 +197,7 @@ int i915_gem_render_state_prepare(struct intel_engine_cs 
*engine,
 
ret = render_state_setup(so);
if (ret) {
-   i915_gem_render_state_fini(so);
+   render_state_fini(so);
return ret;
}
 
@@ -199,7 +209,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request 
*req)
struct render_state so;
int ret;
 
-   ret = i915_gem_render_state_prepare(req->engine, );
+   ret = render_state_prepare(req->engine, );
if (ret)
return ret;
 
@@ -223,8 +233,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request 
*req)
}
 
i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
-
 out:
-   i915_gem_render_state_fini();
+   render_state_fini();
return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h 
b/drivers/gpu/drm/i915/i915_gem_render_state.h
index 6aaa3a10a630..c44fca8599bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -26,24 +26,6 @@
 
 #include 
 
-struct intel_renderstate_rodata {
-   const u32 *reloc;
-   const u32 *batch;
-   const u32 batch_items;
-};
-
-struct render_state {
-   const struct intel_renderstate_rodata *rodata;
-   struct drm_i915_gem_object *obj;
-   u64 ggtt_offset;
-   int gen;
-   u32 aux_batch_size;
-   u32 aux_batch_offset;
-};
-
 int i915_gem_render_state_init(struct drm_i915_gem_request *req);
-void i915_gem_render_state_fini(struct render_state *so);
-int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
- struct render_state *so);
 
 #endif /* _I915_GEM_RENDER_STATE_H_ */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index eee9274f7516..3f7f7d72487e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1723,38 +1723,6 @@ static int gen8_emit_request_render(struct 
drm_i915_gem_request *request)
return intel_logical_ring_advance_and_submit(request);
 }
 
-static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
-{
-   struct render_state so;
-   int ret;
-
-   ret = i915_gem_render_state_prepare(req->engine, );
-   if (ret)
-   return ret;
-
-   if (so.rodata == NULL)
-   return 0;
-
-   ret = req->engine->emit_bb_start(req, so.ggtt_offset,
-so.rodata->batch_items * 4,
-I915_DISPATCH_SECURE);
-   if (ret)
-   goto out;
-
-   ret = req->engine->emit_bb_start(req,
-(so.ggtt_offset + so.aux_batch_offset),
-so.aux_batch_size,
-I915_DISPATCH_SECURE);
-   if (ret)
-   goto out;
-
-   

[Intel-gfx] [PATCH 24/62] drm/i915: Convert i915_semaphores_is_enabled over to early sanitize

2016-06-03 Thread Chris Wilson
Rather than recomputing whether semaphores are enabled, we can do that
computation once during early initialisation as the i915.semaphores
module parameter is now read-only.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c |  2 +-
 drivers/gpu/drm/i915/i915_drv.c |  4 +++-
 drivers/gpu/drm/i915/i915_drv.h |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c | 27 ++-
 drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c   |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 20 ++--
 7 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index daabbc6b65e9..c1f8b5126d16 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3201,7 +3201,7 @@ static int i915_semaphore_status(struct seq_file *m, void 
*unused)
enum intel_engine_id id;
int j, ret;
 
-   if (!i915_semaphore_is_enabled(dev_priv)) {
+   if (!i915.semaphores) {
seq_puts(m, "Semaphores are disabled\n");
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f2ac0cae929b..babeee1a6127 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -318,7 +318,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = 1;
break;
case I915_PARAM_HAS_SEMAPHORES:
-   value = i915_semaphore_is_enabled(dev_priv);
+   value = i915.semaphores;
break;
case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
value = 1;
@@ -1102,6 +1102,8 @@ static void intel_device_info_runtime_init(struct 
drm_device *dev)
i915.enable_ppgtt =
intel_sanitize_enable_ppgtt(dev_priv, i915.enable_ppgtt);
DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
+
+   i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores);
 }
 
 static void intel_init_dpio(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 548fd3b9d858..fcac90104ba9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2748,6 +2748,8 @@ extern int i915_resume_switcheroo(struct drm_device *dev);
 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
int enable_ppgtt);
 
+bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value);
+
 /* i915_drv.c */
 void __printf(3, 4)
 __i915_printk(struct drm_i915_private *dev_priv, const char *level,
@@ -3528,7 +3530,6 @@ extern void intel_set_rps(struct drm_i915_private 
*dev_priv, u8 val);
 extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
  bool enable);
 
-extern bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv);
 int i915_reg_read_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 703e98e1a2e5..22c8361748d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2568,7 +2568,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
if (i915_gem_request_completed(from_req))
return 0;
 
-   if (!i915_semaphore_is_enabled(to_i915(obj->base.dev))) {
+   if (!i915.semaphores) {
struct drm_i915_private *i915 = to_i915(obj->base.dev);
ret = __i915_wait_request(from_req,
  i915->mm.interruptible,
@@ -4253,6 +4253,31 @@ out:
return ret;
 }
 
+bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
+{
+   if (INTEL_INFO(dev_priv)->gen < 6)
+   return false;
+
+   if (value >= 0)
+   return value;
+
+   /* TODO: make semaphores and Execlists play nicely together */
+   if (i915.enable_execlists)
+   return false;
+
+   /* Until we get further testing... */
+   if (IS_GEN8(dev_priv))
+   return false;
+
+#ifdef CONFIG_INTEL_IOMMU
+   /* Enable semaphores on SNB when IO remapping is off */
+   if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
+   return false;
+#endif
+
+   return true;
+}
+
 int i915_gem_init(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index d8ef41138c95..7c114f90f61a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -518,7 +518,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 
hw_flags)
u32 flags = hw_flags | 

[Intel-gfx] [PATCH 46/62] drm/i915: Refactor blocking waits

2016-06-03 Thread Chris Wilson
Tidy up the for loops that handle waiting for read/write vs read-only
access.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 163 +++-
 1 file changed, 78 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 610378bd1be4..ad3330adfa41 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1105,6 +1105,23 @@ put_rpm:
return ret;
 }
 
+static void
+i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
+  struct drm_i915_gem_request *req)
+{
+   int ring = req->engine->id;
+
+   if (i915_gem_active_peek(>last_read[ring],
+>base.dev->struct_mutex) == req)
+   i915_gem_object_retire__read(obj, ring);
+   else if (i915_gem_active_peek(>last_write,
+ >base.dev->struct_mutex) == req)
+   i915_gem_object_retire__write(obj);
+
+   if (req->reset_counter == i915_reset_counter(>i915->gpu_error))
+   i915_gem_request_retire_upto(req);
+}
+
 /**
  * Ensures that all rendering to the object has completed and the object is
  * safe to unbind from the GTT or access from the CPU.
@@ -1113,61 +1130,40 @@ int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
   bool readonly)
 {
-   struct drm_i915_gem_request *request;
-   int ret, i;
+   struct i915_gem_active *active;
+   unsigned long active_mask;
+   int idx;
 
-   if (!obj->active)
-   return 0;
+   lockdep_assert_held(>base.dev->struct_mutex);
 
-   if (readonly) {
-   request = i915_gem_active_peek(>last_write,
-  >base.dev->struct_mutex);
-   if (request) {
-   ret = i915_wait_request(request);
-   if (ret)
-   return ret;
+   active_mask = obj->active;
+   if (!active_mask)
+   return 0;
 
-   i = request->engine->id;
-   if (i915_gem_active_peek(>last_read[i],
->base.dev->struct_mutex) 
== request)
-   i915_gem_object_retire__read(obj, i);
-   else
-   i915_gem_object_retire__write(obj);
-   }
+   if (!readonly) {
+   active = obj->last_read;
} else {
-   for (i = 0; i < I915_NUM_ENGINES; i++) {
-   request = i915_gem_active_peek(>last_read[i],
-  
>base.dev->struct_mutex);
-   if (!request)
-   continue;
-
-   ret = i915_wait_request(request);
-   if (ret)
-   return ret;
-
-   i915_gem_object_retire__read(obj, i);
-   }
-   GEM_BUG_ON(obj->active);
+   active_mask = 1;
+   active = >last_write;
}
 
-   return 0;
-}
+   for_each_active(active_mask, idx) {
+   struct drm_i915_gem_request *request;
+   int ret;
 
-static void
-i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
-  struct drm_i915_gem_request *req)
-{
-   int ring = req->engine->id;
+   request = i915_gem_active_peek([idx],
+  >base.dev->struct_mutex);
+   if (!request)
+   continue;
 
-   if (i915_gem_active_peek(>last_read[ring],
->base.dev->struct_mutex) == req)
-   i915_gem_object_retire__read(obj, ring);
-   else if (i915_gem_active_peek(>last_write,
- >base.dev->struct_mutex) == req)
-   i915_gem_object_retire__write(obj);
+   ret = i915_wait_request(request);
+   if (ret)
+   return ret;
 
-   if (req->reset_counter == i915_reset_counter(>i915->gpu_error))
-   i915_gem_request_retire_upto(req);
+   i915_gem_object_retire_request(obj, request);
+   }
+
+   return 0;
 }
 
 /* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -1181,34 +1177,31 @@ i915_gem_object_wait_rendering__nonblocking(struct 
drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_request *requests[I915_NUM_ENGINES];
+   struct i915_gem_active *active;
+   unsigned long active_mask;
int ret, i, n = 0;
 
BUG_ON(!mutex_is_locked(>struct_mutex));

[Intel-gfx] [PATCH 51/62] drm/i915: Move request list retirement to i915_gem_request.c

2016-06-03 Thread Chris Wilson
As the list retirement is now clean of implementation details, we can
move it closer to the request management.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 41 -
 drivers/gpu/drm/i915/i915_gem_request.c | 33 ++
 2 files changed, 33 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3b3a3b834e80..20e174f7fc9e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2268,47 +2268,6 @@ void i915_gem_reset(struct drm_device *dev)
i915_gem_restore_fences(dev);
 }
 
-/**
- * This function clears the request list as sequence numbers are passed.
- */
-void
-i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
-{
-   while (!list_empty(>request_list)) {
-   struct drm_i915_gem_request *request;
-
-   request = list_first_entry(>request_list,
-  struct drm_i915_gem_request,
-  link);
-
-   if (!i915_gem_request_completed(request))
-   break;
-
-   i915_gem_request_retire_upto(request);
-   }
-}
-
-void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
-{
-   struct intel_engine_cs *engine;
-
-   if (dev_priv->gt.active_engines == 0)
-   return;
-
-   GEM_BUG_ON(!dev_priv->gt.awake);
-
-   for_each_engine(engine, dev_priv) {
-   i915_gem_retire_requests_ring(engine);
-   if (list_empty(>request_list))
-   dev_priv->gt.active_engines &= 
~intel_engine_flag(engine);
-   }
-
-   if (dev_priv->gt.active_engines == 0)
-   queue_delayed_work(dev_priv->wq,
-  _priv->gt.idle_work,
-  msecs_to_jiffies(100));
-}
-
 static void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 2e13934041f3..38e5daecd8f5 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -733,3 +733,36 @@ int i915_wait_request(struct drm_i915_gem_request *req)
 
return 0;
 }
+
+void i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
+{
+   struct drm_i915_gem_request *request, *next;
+
+   list_for_each_entry_safe(request, next, >request_list, link) {
+   if (!i915_gem_request_completed(request))
+   break;
+
+   i915_gem_request_retire(request);
+   }
+}
+
+void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
+{
+   struct intel_engine_cs *engine;
+
+   if (dev_priv->gt.active_engines == 0)
+   return;
+
+   GEM_BUG_ON(!dev_priv->gt.awake);
+
+   for_each_engine(engine, dev_priv) {
+   i915_gem_retire_requests_ring(engine);
+   if (list_empty(>request_list))
+   dev_priv->gt.active_engines &= 
~intel_engine_flag(engine);
+   }
+
+   if (dev_priv->gt.active_engines == 0)
+   queue_delayed_work(dev_priv->wq,
+  _priv->gt.idle_work,
+  msecs_to_jiffies(100));
+}
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 21/62] drm/i915: Disable waitboosting for mmioflips/semaphores

2016-06-03 Thread Chris Wilson
Since

commit a6f766f3975185af66a31a2cea2cd38721645999
Author: Chris Wilson 
Date:   Mon Apr 27 13:41:20 2015 +0100

drm/i915: Limit ring synchronisation (sw sempahores) RPS boosts

and

commit bcafc4e38b6ad03f48989b7ecaff03845b5b7acf
Author: Chris Wilson 
Date:   Mon Apr 27 13:41:21 2015 +0100

drm/i915: Limit mmio flip RPS boosts

we have limited the waitboosting for semaphores and flips. Ideally we do
not want to boost in either of these instances as no consumer is waiting
upon the results. With the introduction of NO_WAITBOOST in the previous
patch, we can finally disable these needless boosts.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_debugfs.c  | 8 +---
 drivers/gpu/drm/i915/i915_drv.h  | 2 --
 drivers/gpu/drm/i915/i915_gem.c  | 2 +-
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 drivers/gpu/drm/i915/intel_pm.c  | 2 --
 5 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 8e37315443f3..daabbc6b65e9 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2462,13 +2462,7 @@ static int i915_rps_boost_info(struct seq_file *m, void 
*data)
   list_empty(_priv->rps.link) ? "" : ", active");
rcu_read_unlock();
}
-   seq_printf(m, "Semaphore boosts: %d%s\n",
-  dev_priv->rps.semaphores.boosts,
-  list_empty(_priv->rps.semaphores.link) ? "" : ", 
active");
-   seq_printf(m, "MMIO flip boosts: %d%s\n",
-  dev_priv->rps.mmioflips.boosts,
-  list_empty(_priv->rps.mmioflips.link) ? "" : ", active");
-   seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
+   seq_printf(m, "Kernel (anonymous) boosts: %d\n", dev_priv->rps.boosts);
spin_unlock(_priv->rps.client_lock);
mutex_unlock(>filelist_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 316192077142..548fd3b9d858 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1176,8 +1176,6 @@ struct intel_gen6_power_mgmt {
struct delayed_work delayed_resume_work;
unsigned boosts;
 
-   struct intel_rps_client semaphores, mmioflips;
-
/* manual wa residency calculations */
struct intel_rps_ei up_ei, down_ei;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 50df7a11d6b1..703e98e1a2e5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2573,7 +2573,7 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj,
ret = __i915_wait_request(from_req,
  i915->mm.interruptible,
  NULL,
- >rps.semaphores);
+ NO_WAITBOOST);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c 
b/drivers/gpu/drm/i915/intel_display.c
index 30f1854b3ab9..849abb565d3d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11520,7 +11520,7 @@ static void intel_mmio_flip_work_func(struct 
work_struct *w)
if (work->flip_queued_req)
WARN_ON(__i915_wait_request(work->flip_queued_req,
false, NULL,
-   _priv->rps.mmioflips));
+   NO_WAITBOOST));
 
/* For framebuffer backed by dmabuf, wait for fence */
if (obj->base.dma_buf)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 337f46c50934..c141d3e15eed 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7730,8 +7730,6 @@ void intel_pm_setup(struct drm_device *dev)
INIT_DELAYED_WORK(_priv->rps.delayed_resume_work,
  __intel_autoenable_gt_powersave);
INIT_LIST_HEAD(_priv->rps.clients);
-   INIT_LIST_HEAD(_priv->rps.semaphores.link);
-   INIT_LIST_HEAD(_priv->rps.mmioflips.link);
 
dev_priv->pm.suspended = false;
atomic_set(_priv->pm.wakeref_count, 0);
-- 
2.8.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 18/62] drm/i915: Rename drm_gem_object_unreference in preparation for lockless free

2016-06-03 Thread Chris Wilson
Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  |  7 +++
 drivers/gpu/drm/i915/i915_gem.c  | 26 +-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c   |  4 ++--
 drivers/gpu/drm/i915/i915_gem_context.c  |  4 ++--
 drivers/gpu/drm/i915/i915_gem_evict.c|  7 ---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  6 +++---
 drivers/gpu/drm/i915/i915_gem_render_state.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem_shrinker.c |  2 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c   |  2 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c   |  4 ++--
 drivers/gpu/drm/i915/i915_gem_userptr.c  |  4 ++--
 drivers/gpu/drm/i915/i915_guc_submission.c   |  6 +++---
 drivers/gpu/drm/i915/intel_display.c |  6 +++---
 drivers/gpu/drm/i915/intel_fbdev.c   |  2 +-
 drivers/gpu/drm/i915/intel_guc_loader.c  |  8 +---
 drivers/gpu/drm/i915/intel_lrc.c |  6 +++---
 drivers/gpu/drm/i915/intel_overlay.c |  8 
 drivers/gpu/drm/i915/intel_pm.c  |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 14 +++---
 19 files changed, 66 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1ff7a9df4209..2d8cc5f3a77b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2294,6 +2294,13 @@ i915_gem_object_get(struct drm_i915_gem_object *obj)
 }
 __deprecated extern void drm_gem_object_reference(struct drm_gem_object *);
 
+__attribute__((nonnull)) static inline void
+i915_gem_object_put(struct drm_i915_gem_object *obj)
+{
+   drm_gem_object_unreference(>base);
+}
+__deprecated extern void drm_gem_object_unreference(struct drm_gem_object *);
+
 /*
  * Optimised SGL iterator for GEM objects
  */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4aecdd4434d8..e887d07dea4c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -272,7 +272,7 @@ drop_pages(struct drm_i915_gem_object *obj)
break;
 
ret = i915_gem_object_put_pages(obj);
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
 
return ret;
 }
@@ -721,7 +721,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
ret = i915_gem_shmem_pread(dev, obj, args, file);
 
 out:
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
 unlock:
mutex_unlock(>struct_mutex);
return ret;
@@ -1096,7 +1096,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
}
 
 out:
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
 unlock:
mutex_unlock(>struct_mutex);
 put_rpm:
@@ -1280,7 +1280,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
ORIGIN_GTT : ORIGIN_CPU);
 
 unref:
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
 unlock:
mutex_unlock(>struct_mutex);
return ret;
@@ -1311,7 +1311,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void 
*data,
if (obj->pin_display)
i915_gem_object_flush_cpu_write_domain(obj);
 
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
 unlock:
mutex_unlock(>struct_mutex);
return ret;
@@ -1733,7 +1733,7 @@ i915_gem_mmap_gtt(struct drm_file *file,
*offset = drm_vma_node_offset_addr(>base.vma_node);
 
 out:
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
 unlock:
mutex_unlock(>struct_mutex);
return ret;
@@ -2157,7 +2157,7 @@ i915_gem_object_retire__read(struct drm_i915_gem_object 
*obj, int ring)
}
 
i915_gem_request_assign(>last_fenced_req, NULL);
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
 }
 
 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
@@ -2526,7 +2526,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
goto out;
}
 
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
 
for (i = 0; i < I915_NUM_ENGINES; i++) {
if (obj->last_read_req[i] == NULL)
@@ -2547,7 +2547,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, 
struct drm_file *file)
return ret;
 
 out:
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
mutex_unlock(>struct_mutex);
return ret;
 }
@@ -3370,7 +3370,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, 
void *data,
 
ret = i915_gem_object_set_cache_level(obj, level);
 
-   drm_gem_object_unreference(>base);
+   i915_gem_object_put(obj);
 unlock:
mutex_unlock(>struct_mutex);
 rpm_put:
@@ -3760,7 +3760,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
}
 
 

  1   2   3   >