[Intel-gfx] ✓ Fi.CI.BAT: success for drm: move allocation out of drm_get_format_name()

2016-11-04 Thread Patchwork
== Series Details ==

Series: drm: move allocation out of drm_get_format_name()
URL   : https://patchwork.freedesktop.org/series/14873/
State : success

== Summary ==

Series 14873v1 drm: move allocation out of drm_get_format_name()
https://patchwork.freedesktop.org/api/1.0/series/14873/revisions/1/mbox/

Test gem_sync:
Subgroup basic-store-all:
fail   -> PASS   (fi-hsw-4770r)

fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
fi-bxt-t5700 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-ilk-650   total:241  pass:188  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770  total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
fi-skl-6770hqtotal:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600  total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 

49a651a2e66ef603995f88a470d0986c2ef8b5b8 drm-intel-nightly: 
2016y-11m-04d-18h-04m-36s UTC integration manifest
7bc4368 drm: move allocation out of drm_get_format_name()

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2912/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm: move allocation out of drm_get_format_name()

2016-11-04 Thread Eric Engestrom
Fixes: 90844f00049e9f42573fd31d7c32e8fd31d3fd07

drm: make drm_get_format_name thread-safe

Signed-off-by: Eric Engestrom 
[danvet: Clarify that the returned pointer must be freed with
kfree().]
Signed-off-by: Daniel Vetter 

Suggested-by: Ville Syrjälä 
Signed-off-by: Eric Engestrom 
---
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c  |  7 ++---
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c  |  7 ++---
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c   |  3 +-
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c   |  7 ++---
 drivers/gpu/drm/drm_atomic.c|  7 +++--
 drivers/gpu/drm/drm_crtc.c  |  7 +++--
 drivers/gpu/drm/drm_fourcc.c| 12 +++-
 drivers/gpu/drm/drm_framebuffer.c   |  7 +++--
 drivers/gpu/drm/drm_modeset_helper.c|  7 +++--
 drivers/gpu/drm/drm_plane.c |  7 +++--
 drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c |  7 ++---
 drivers/gpu/drm/i915/i915_debugfs.c |  8 ++---
 drivers/gpu/drm/i915/intel_atomic_plane.c   |  8 ++---
 drivers/gpu/drm/i915/intel_display.c| 41 ++---
 drivers/gpu/drm/radeon/atombios_crtc.c  | 14 -
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c |  3 +-
 include/drm/drm_fourcc.h|  3 +-
 17 files changed, 71 insertions(+), 84 deletions(-)

diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h
index dc0aafa..5a8cb4b 100644
--- a/include/drm/drm_fourcc.h
+++ b/include/drm/drm_fourcc.h
@@ -54,6 +54,7 @@ int drm_format_horz_chroma_subsampling(uint32_t format);
 int drm_format_vert_chroma_subsampling(uint32_t format);
 int drm_format_plane_width(int width, uint32_t format, int plane);
 int drm_format_plane_height(int height, uint32_t format, int plane);
-char *drm_get_format_name(uint32_t format) __malloc;
+typedef char drm_format_name_buf[32];
+char *drm_get_format_name(uint32_t format, drm_format_name_buf buf);
 
 #endif /* __DRM_FOURCC_H__ */
diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index cbb8b77..34ed520 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -79,17 +79,13 @@ uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t 
depth)
 EXPORT_SYMBOL(drm_mode_legacy_fb_format);
 
 /**
- * drm_get_format_name - return a string for drm fourcc format
+ * drm_get_format_name - fill a string with a drm fourcc format's name
  * @format: format to compute name of
+ * @buf: caller-supplied buffer
- *
- * Note that the buffer returned by this function is owned by the caller
- * and will need to be freed using kfree().
  */
-char *drm_get_format_name(uint32_t format)
+char *drm_get_format_name(uint32_t format, drm_format_name_buf buf)
 {
-   char *buf = kmalloc(32, GFP_KERNEL);
-
-   snprintf(buf, 32,
+   snprintf(buf, sizeof(drm_format_name_buf),
 "%c%c%c%c %s-endian (0x%08x)",
 printable_char(format & 0xff),
 printable_char((format >> 8) & 0xff),
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 199d3f7..cefa3d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2032,7 +2032,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc 
*crtc,
u32 tmp, viewport_w, viewport_h;
int r;
bool bypass_lut = false;
-   char *format_name;
+   drm_format_name_buf format_name;
 
/* no fb bound */
if (!atomic && !crtc->primary->fb) {
@@ -2144,9 +2144,8 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc 
*crtc,
bypass_lut = true;
break;
default:
-   format_name = drm_get_format_name(target_fb->pixel_format);
-   DRM_ERROR("Unsupported screen format %s\n", format_name);
-   kfree(format_name);
+   DRM_ERROR("Unsupported screen format %s\n",
+ drm_get_format_name(target_fb->pixel_format, 
format_name));
return -EINVAL;
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index ecd000e..462abb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2013,7 +2013,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc 
*crtc,
u32 tmp, viewport_w, viewport_h;
int r;
bool bypass_lut = false;
-   char *format_name;
+   drm_format_name_buf format_name;
 
/* no fb bound */
if (!atomic && !crtc->primary->fb) {
@@ -2125,9 +2125,8 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc 
*crtc,
bypass_lut = true;
break;
default:
-   format_name = drm_get_format_name(target_fb->pixel_format);
-   

Re: [Intel-gfx] [PATCH 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode

2016-11-04 Thread Imre Deak
On Sat, 2016-11-05 at 00:32 +0200, Imre Deak wrote:
> On Fri, 2016-11-04 at 21:01 +, Chris Wilson wrote:
> > On Fri, Nov 04, 2016 at 10:33:24PM +0200, Imre Deak wrote:
> > > On Thu, 2016-11-03 at 21:14 +, Chris Wilson wrote:
> > > > Where is that guaranteed? I thought we only serialised with the
> > > > pm
> > > > interrupts. Remember this happens before rpm suspend, since
> > > > gem_idle_work_handler is responsible for dropping the GPU
> > > > wakelock.
> > > 
> > > I meant that the 100msec after the last request signals
> > > completion
> > > and
> > > this handler is scheduled is normally enough for the context
> > > complete
> > > interrupt to get delivered. But yea, it's not a guarantee.
> > 
> > If only it was that deterministic! The idle_worker was scheduled
> > 100ms
> > after some retire_worker, just not necessarily the most recent. So
> > it
> > could be running exactly as active_requests -> 0 and so before the
> > context-interrupt.
> 
> Right, but we don't poll in that case, so there is no overhead.

Ok, there is a small window in the idle_worker after the unlocked poll
and before taking the lock where a new request could be submitted and
retired. In that case active_requests could be 0 after taking the lock
and we'd have the poll overhead there.

We could detect this by the fact that there is a new idle_worker
pending and bail out in that case. We shouldn't idle the GPU in that
case anyway.

> > Anyway, it was a good find!
> > -Chris
> > 
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode

2016-11-04 Thread Imre Deak
On Fri, 2016-11-04 at 21:01 +, Chris Wilson wrote:
> On Fri, Nov 04, 2016 at 10:33:24PM +0200, Imre Deak wrote:
> > On Thu, 2016-11-03 at 21:14 +, Chris Wilson wrote:
> > > Where is that guaranteed? I thought we only serialised with the
> > > pm
> > > interrupts. Remember this happens before rpm suspend, since
> > > gem_idle_work_handler is responsible for dropping the GPU
> > > wakelock.
> > 
> > I meant that the 100msec after the last request signals completion
> > and
> > this handler is scheduled is normally enough for the context
> > complete
> > interrupt to get delivered. But yea, it's not a guarantee.
> 
> If only it was that deterministic! The idle_worker was scheduled
> 100ms
> after some retire_worker, just not necessarily the most recent. So it
> could be running exactly as active_requests -> 0 and so before the
> context-interrupt.

Right, but we don't poll in that case, so there is no overhead.

> Anyway, it was a good find!
> -Chris
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] RFC gemfs

2016-11-04 Thread Matthew Auld
The hope of this RFC is to gather some high-level feedback and ideas,
since I couldn't really find any in-depth discussions on the mailing
list regarding gemfs, only the odd whisper. But after talking with
Joonas and grepping around, the parts of shmem fs we would initially
need to have for a drop in replacement would roughly be something
like:

struct file * drm_gemfs_setup_file(const char *name,
 loff_t size,
 unsigned long flags)

struct page * drm_gemfs_read_page(struct drm_gem_object *obj,
  pgoff_t index)

struct page * drm_gemfs_read_page_gfp(struct drm_gem_object *obj,
   pgoff_t index,
   gfp_t gfp)

void drm_gemfs_truncate(struct drm_gem_object *obj)

Am I missing any?

This is pretty much what we already have, minus truncate_range, since
we don't care about partial truncation. Also we now operate at the gem
object level and not the mapping, does this seem appropriate?

The approach would then be to have our own in-kernel mount point for
gemfs, with probably a fair amount of copy-paste from shmem fs, but in
what should be a very stripped down form to suit our needs.

Once we have gemfs in place, we should then have the much needed
flexibility to change it as we see fit, for example, being able to
control how migration is handled or where the backing pages are
allocated from, which could be useful for handling stolen memory etc.

Does this all sound reasonable, am I missing anything?

Thanks,
Matt
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/dp: Make space for null terminator in the DP device ID char array

2016-11-04 Thread Patchwork
== Series Details ==

Series: drm/dp: Make space for null terminator in the DP device ID char array
URL   : https://patchwork.freedesktop.org/series/14865/
State : failure

== Summary ==

Series 14865v1 drm/dp: Make space for null terminator in the DP device ID char 
array
https://patchwork.freedesktop.org/api/1.0/series/14865/revisions/1/mbox/

Test gem_ringfill:
Subgroup basic-default-hang:
pass   -> INCOMPLETE (fi-hsw-4770)
Test gem_sync:
Subgroup basic-store-all:
fail   -> PASS   (fi-hsw-4770r)

fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
fi-bxt-t5700 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770  total:112  pass:102  dwarn:0   dfail:0   fail:0   skip:9  
fi-hsw-4770r total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-ilk-650   total:241  pass:188  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770  total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
fi-skl-6770hqtotal:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600  total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 

49a651a2e66ef603995f88a470d0986c2ef8b5b8 drm-intel-nightly: 
2016y-11m-04d-18h-04m-36s UTC integration manifest
08543dd drm/dp: Make space for null terminator in the DP device ID char array

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2911/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/4] drm/i915: Bail if plane/crtc init fails

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 11:07:26PM +0200, Ville Syrjälä wrote:
> On Fri, Nov 04, 2016 at 08:48:21PM +, Chris Wilson wrote:
> > On Tue, Oct 25, 2016 at 06:58:02PM +0300, ville.syrj...@linux.intel.com 
> > wrote:
> > > From: Ville Syrjälä 
> > > 
> > > Due to the plane->index not getting readjusted in drm_plane_cleanup(),
> > > we can't continue initialization of some plane/crtc init fails.
> > > Well, we sort of could I suppose if we left all initialized planes on
> > > the list, but that would expose those planes to userspace as well.
> > > 
> > > But for crtcs the situation is even worse since we assume that
> > > pipe==crtc index occasionally, so we can't really deal with a partially
> > > initialize set of crtcs.
> > > 
> > > So seems safest to just abort the entire thing if anything goes wrong.
> > > All the failure paths here are kmalloc()s anyway, so it seems unlikely
> > > we'd get very far if these start failing.
> > 
> > smatch spotted ERR_PTR(0)
> > 
> > > @@ -15296,22 +15304,30 @@ static void intel_crtc_init(struct drm_device 
> > > *dev, int pipe)
> > >   }
> > >  
> > >   primary = intel_primary_plane_create(dev, pipe);
> > > - if (!primary)
> > > + if (IS_ERR(primary)) {
> > > + ret = PTR_ERR(primary);
> > 
> > Here...
> 
> This looks correct to me, but the cursor and sprite paths are clearly
> crap.

Brain had already turned off. Yes, it was the plane and cursor, I just
goofed in trimming.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [v2,1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode (rev2)

2016-11-04 Thread Patchwork
== Series Details ==

Series: series starting with [v2,1/2] drm/i915: Make sure engines are idle 
during GPU idling in LR mode (rev2)
URL   : https://patchwork.freedesktop.org/series/14864/
State : warning

== Summary ==

Series 14864v2 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/14864/revisions/2/mbox/

Test gem_sync:
Subgroup basic-store-all:
fail   -> PASS   (fi-hsw-4770r)
Test kms_pipe_crc_basic:
Subgroup read-crc-pipe-c:
pass   -> DMESG-WARN (fi-ivb-3770)

fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-ilk-650   total:241  pass:188  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770  total:241  pass:218  dwarn:1   dfail:0   fail:0   skip:22 
fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
fi-skl-6770hqtotal:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600  total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 

49a651a2e66ef603995f88a470d0986c2ef8b5b8 drm-intel-nightly: 
2016y-11m-04d-18h-04m-36s UTC integration manifest
80739c2 drm/i915: Add assert for no pending GPU requests during suspend/resume 
in LR mode
ff4dfe2 drm/i915: Make sure engines are idle during GPU idling in LR mode

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2910/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/4] drm/i915: Bail if plane/crtc init fails

2016-11-04 Thread Ville Syrjälä
On Fri, Nov 04, 2016 at 08:48:21PM +, Chris Wilson wrote:
> On Tue, Oct 25, 2016 at 06:58:02PM +0300, ville.syrj...@linux.intel.com wrote:
> > From: Ville Syrjälä 
> > 
> > Due to the plane->index not getting readjusted in drm_plane_cleanup(),
> > we can't continue initialization of some plane/crtc init fails.
> > Well, we sort of could I suppose if we left all initialized planes on
> > the list, but that would expose those planes to userspace as well.
> > 
> > But for crtcs the situation is even worse since we assume that
> > pipe==crtc index occasionally, so we can't really deal with a partially
> > initialize set of crtcs.
> > 
> > So seems safest to just abort the entire thing if anything goes wrong.
> > All the failure paths here are kmalloc()s anyway, so it seems unlikely
> > we'd get very far if these start failing.
> 
> smatch spotted ERR_PTR(0)
> 
> > @@ -15296,22 +15304,30 @@ static void intel_crtc_init(struct drm_device 
> > *dev, int pipe)
> > }
> >  
> > primary = intel_primary_plane_create(dev, pipe);
> > -   if (!primary)
> > +   if (IS_ERR(primary)) {
> > +   ret = PTR_ERR(primary);
> 
> Here...

This looks correct to me, but the cursor and sprite paths are clearly
crap.

> 
> > goto fail;
> > +   }
> >  
> > for_each_sprite(dev_priv, pipe, sprite) {
> > -   ret = intel_plane_init(dev, pipe, sprite);
> > -   if (ret)
> > -   DRM_DEBUG_KMS("pipe %c sprite %c init failed: %d\n",
> > - pipe_name(pipe), sprite_name(pipe, 
> > sprite), ret);
> > +   struct intel_plane *plane;
> > +
> > +   plane = intel_sprite_plane_create(dev, pipe, sprite);
> > +   if (!plane) {
> > +   ret = PTR_ERR(plane);
> 
> and here.
> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre

-- 
Ville Syrjälä
Intel OTC
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/dp: Make space for null terminator in the DP device ID char array

2016-11-04 Thread Dhinakaran Pandiyan
The DP device identification string read from the DPCD registers is 6
characters long at max. and we store it in a char array of the same length
without space for the NULL terminator. Fix this by increasing the array
size to 7 and initialize it to an empty string.

Signed-off-by: Dhinakaran Pandiyan 
---
 drivers/gpu/drm/drm_dp_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 3e6fe82..3a39312 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -544,7 +544,7 @@ void drm_dp_downstream_debug(struct seq_file *m,
 DP_DETAILED_CAP_INFO_AVAILABLE;
int clk;
int bpc;
-   char id[6];
+   char id[7] = "";
int len;
uint8_t rev[2];
int type = port_cap[0] & DP_DS_PORT_TYPE_MASK;
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 2/2] drm/i915: Add assert for no pending GPU requests during suspend/resume in LR mode

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 10:58:52PM +0200, Imre Deak wrote:
> During resume we will reset the SW/HW tracking for each ring head/tail
> pointers and so are not prepared to replay any pending requests (as
> opposed to GPU reset time). Add an assert for this both to the suspend
> and the resume code.
> 
> v2:
> - Check for ELSP port idle already during suspend and check !gt.awake
>   during resume. (Chris)
> v3:
> - Move the !gt.awake check to i915_gem_resume().
> 
> Cc: Chris Wilson 
> Cc: Mika Kuoppala 
> Signed-off-by: Imre Deak 
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 81ea88c..c344abc 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4437,6 +4437,7 @@ int i915_gem_suspend(struct drm_device *dev)
>* reset the GPU back to its idle, low power state.
>*/
>   WARN_ON(dev_priv->gt.awake);
> + WARN_ON(i915.enable_execlists && !intel_lr_engines_idle(dev_priv));

Just WARN_ON(!intel_execlists_idle(dev_priv));

Being forward thinking intel_execlists_submission_idle().

Reviewed-by: Chris Wilson 
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 10:33:24PM +0200, Imre Deak wrote:
> On Thu, 2016-11-03 at 21:14 +, Chris Wilson wrote:
> > Where is that guaranteed? I thought we only serialised with the pm
> > interrupts. Remember this happens before rpm suspend, since
> > gem_idle_work_handler is responsible for dropping the GPU wakelock.
> 
> I meant that the 100msec after the last request signals completion and
> this handler is scheduled is normally enough for the context complete
> interrupt to get delivered. But yea, it's not a guarantee.

If only it was that deterministic! The idle_worker was scheduled 100ms
after some retire_worker, just not necessarily the most recent. So it
could be running exactly as active_requests -> 0 and so before the
context-interrupt.

Anyway, it was a good find!
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 2/2] drm/i915: Add assert for no pending GPU requests during suspend/resume in LR mode

2016-11-04 Thread Imre Deak
During resume we will reset the SW/HW tracking for each ring head/tail
pointers and so are not prepared to replay any pending requests (as
opposed to GPU reset time). Add an assert for this both to the suspend
and the resume code.

v2:
- Check for ELSP port idle already during suspend and check !gt.awake
  during resume. (Chris)
v3:
- Move the !gt.awake check to i915_gem_resume().

Cc: Chris Wilson 
Cc: Mika Kuoppala 
Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/i915/i915_gem.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 81ea88c..c344abc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4437,6 +4437,7 @@ int i915_gem_suspend(struct drm_device *dev)
 * reset the GPU back to its idle, low power state.
 */
WARN_ON(dev_priv->gt.awake);
+   WARN_ON(i915.enable_execlists && !intel_lr_engines_idle(dev_priv));
 
/*
 * Neither the BIOS, ourselves or any other kernel
@@ -4473,6 +4474,8 @@ void i915_gem_resume(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
 
+   WARN_ON(dev_priv->gt.awake);
+
mutex_lock(>struct_mutex);
i915_gem_restore_gtt_mappings(dev);
 
-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/4] drm/i915: Bail if plane/crtc init fails

2016-11-04 Thread Chris Wilson
On Tue, Oct 25, 2016 at 06:58:02PM +0300, ville.syrj...@linux.intel.com wrote:
> From: Ville Syrjälä 
> 
> Due to the plane->index not getting readjusted in drm_plane_cleanup(),
> we can't continue initialization of some plane/crtc init fails.
> Well, we sort of could I suppose if we left all initialized planes on
> the list, but that would expose those planes to userspace as well.
> 
> But for crtcs the situation is even worse since we assume that
> pipe==crtc index occasionally, so we can't really deal with a partially
> initialize set of crtcs.
> 
> So seems safest to just abort the entire thing if anything goes wrong.
> All the failure paths here are kmalloc()s anyway, so it seems unlikely
> we'd get very far if these start failing.

smatch spotted ERR_PTR(0)

> @@ -15296,22 +15304,30 @@ static void intel_crtc_init(struct drm_device *dev, 
> int pipe)
>   }
>  
>   primary = intel_primary_plane_create(dev, pipe);
> - if (!primary)
> + if (IS_ERR(primary)) {
> + ret = PTR_ERR(primary);

Here...

>   goto fail;
> + }
>  
>   for_each_sprite(dev_priv, pipe, sprite) {
> - ret = intel_plane_init(dev, pipe, sprite);
> - if (ret)
> - DRM_DEBUG_KMS("pipe %c sprite %c init failed: %d\n",
> -   pipe_name(pipe), sprite_name(pipe, 
> sprite), ret);
> + struct intel_plane *plane;
> +
> + plane = intel_sprite_plane_create(dev, pipe, sprite);
> + if (!plane) {
> + ret = PTR_ERR(plane);

and here.

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 2/2] drm/i915: Add assert for no pending GPU requests during suspend/resume in LR mode

2016-11-04 Thread Imre Deak
During resume we will reset the SW/HW tracking for each ring head/tail
pointers and so are not prepared to replay any pending requests (as
opposed to GPU reset time). Add an assert for this both to the suspend
and the resume code.

v2:
- Check for ELSP port idle already during suspend and check !gt.awake
  during resume. (Chris)

Cc: Chris Wilson 
Cc: Mika Kuoppala 
Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/i915/i915_gem.c  | 1 +
 drivers/gpu/drm/i915/intel_lrc.c | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 81ea88c..acc2030 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4437,6 +4437,7 @@ int i915_gem_suspend(struct drm_device *dev)
 * reset the GPU back to its idle, low power state.
 */
WARN_ON(dev_priv->gt.awake);
+   WARN_ON(i915.enable_execlists && !intel_lr_engines_idle(dev_priv));
 
/*
 * Neither the BIOS, ourselves or any other kernel
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7aa5665..77577d1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2165,6 +2165,8 @@ void intel_lr_context_resume(struct drm_i915_private 
*dev_priv)
if (WARN_ON(IS_ERR(reg)))
continue;
 
+   WARN_ON(dev_priv->gt.awake);
+
reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg);
reg[CTX_RING_HEAD+1] = 0;
reg[CTX_RING_TAIL+1] = 0;
-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode

2016-11-04 Thread Imre Deak
We assume that the GPU is idle once receiving the seqno via the last
request's user interrupt. In execlist mode the corresponding context
completed interrupt can be delayed though and until this latter
interrupt arrives we consider the request to be pending on the ELSP
submit port. This can cause a problem during system suspend where this
last request will be seen by the resume code as still pending. Such
pending requests are normally replayed after a GPU reset, but during
resume we reset both SW and HW tracking of the ring head/tail pointers,
so replaying the pending request with its stale tail pointer will leave
the ring in an inconsistent state. A subsequent request submission can
lead then to the GPU executing from uninitialized area in the ring
behind the above stale tail pointer.

Fix this by making sure any pending request on the ELSP port is
completed before suspending. I used a polling wait since the completion
time I measured was <1ms and since normally we only need to wait during
system suspend. GPU idling during runtime suspend is scheduled with a
delay (currently 50-100ms) after the retirement of the last request at
which point the context completed interrupt must have arrived already.

The chance of this bug was increased by

commit 1c777c5d1dcdf8fa0223fcff35fb387b5bb9517a
Author: Imre Deak 
Date:   Wed Oct 12 17:46:37 2016 +0300

drm/i915/hsw: Fix GPU hang during resume from S3-devices state

but it could happen even without the explicit GPU reset, since we
disable interrupts afterwards during the suspend sequence.

v2:
- Do an unlocked poll-wait first. (Chris)

Cc: Chris Wilson 
Cc: Mika Kuoppala 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98470
Signed-off-by: Imre Deak 
---
 drivers/gpu/drm/i915/i915_gem.c  |  8 
 drivers/gpu/drm/i915/intel_lrc.c | 19 +++
 drivers/gpu/drm/i915/intel_lrc.h |  1 +
 3 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0dbf38c..81ea88c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2752,6 +2752,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
if (!READ_ONCE(dev_priv->gt.awake))
return;
 
+   if (i915.enable_execlists)
+   wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
+intel_lr_engines_idle(dev_priv), 10);
+
if (READ_ONCE(dev_priv->gt.active_requests))
return;
 
@@ -2769,6 +2773,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
if (dev_priv->gt.active_requests)
goto out_unlock;
 
+   if (i915.enable_execlists &&
+   wait_for(intel_lr_engines_idle(dev_priv), 10))
+   DRM_ERROR("Timeout waiting for engines to idle\n");
+
for_each_engine(engine, dev_priv, id)
i915_gem_batch_pool_fini(>batch_pool);
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fa3012c..7aa5665 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -522,6 +522,25 @@ static bool execlists_elsp_idle(struct intel_engine_cs 
*engine)
return !engine->execlist_port[0].request;
 }
 
+/**
+ * intel_lr_engines_idle() - Determine if all engine submission ports are idle
+ * @dev_priv: i915 device private
+ *
+ * Return true if there are no requests pending on any of the submission ports
+ * of any engines.
+ */
+bool intel_lr_engines_idle(struct drm_i915_private *dev_priv)
+{
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+
+   for_each_engine(engine, dev_priv, id)
+   if (!execlists_elsp_idle(engine))
+   return false;
+
+   return true;
+}
+
 static bool execlists_elsp_ready(struct intel_engine_cs *engine)
 {
int port;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 4fed816..c855ffb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -87,6 +87,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx,
 
 struct drm_i915_private;
 
+bool intel_lr_engines_idle(struct drm_i915_private *dev_priv);
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
 struct intel_engine_cs *engine);
-- 
2.5.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode

2016-11-04 Thread Imre Deak
On Thu, 2016-11-03 at 21:14 +, Chris Wilson wrote:
> On Thu, Nov 03, 2016 at 10:57:23PM +0200, Imre Deak wrote:
> > On Thu, 2016-11-03 at 18:59 +, Chris Wilson wrote:
> > > On Thu, Nov 03, 2016 at 06:19:37PM +0200, Imre Deak wrote:
> > > > We assume that the GPU is idle once receiving the seqno via the last
> > > > request's user interrupt. In execlist mode the corresponding context
> > > > completed interrupt can be delayed though and until this latter
> > > > interrupt arrives we consider the request to be pending on the ELSP
> > > > submit port. This can cause a problem during system suspend where this
> > > > last request will be seen by the resume code as still pending. Such
> > > > pending requests are normally replayed after a GPU reset, but during
> > > > resume we reset both SW and HW tracking of the ring head/tail pointers,
> > > > so replaying the pending request with its stale tale pointer will leave
> > > > the ring in an inconsistent state. A subsequent request submission can
> > > > lead then to the GPU executing from uninitialized area in the ring
> > > > behind the above stale tail pointer.
> > > > 
> > > > Fix this by making sure any pending request on the ELSP port is
> > > > completed before suspending. I used a polling wait since the completion
> > > > time I measured was <1ms and since normally we only need to wait during
> > > > system suspend. GPU idling during runtime suspend is scheduled with a
> > > > delay (currently 50-100ms) after the retirement of the last request at
> > > > which point the context completed interrupt must have arrived already.
> > > > 
> > > > The chance of this bug was increased by
> > > > 
> > > > commit 1c777c5d1dcdf8fa0223fcff35fb387b5bb9517a
> > > > Author: Imre Deak 
> > > > Date:   Wed Oct 12 17:46:37 2016 +0300
> > > > 
> > > > drm/i915/hsw: Fix GPU hang during resume from S3-devices state
> > > > 
> > > > but it could happen even without the explicit GPU reset, since we
> > > > disable interrupts afterwards during the suspend sequence.
> > > > 
> > > > Cc: Chris Wilson 
> > > > Cc: Mika Kuoppala 
> > > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98470
> > > > Signed-off-by: Imre Deak 
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_gem.c  |  3 +++
> > > >  drivers/gpu/drm/i915/intel_lrc.c | 12 
> > > >  drivers/gpu/drm/i915/intel_lrc.h |  1 +
> > > >  3 files changed, 16 insertions(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > > > b/drivers/gpu/drm/i915/i915_gem.c
> > > > index 1f995ce..5ff02b5 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > > @@ -2766,6 +2766,9 @@ i915_gem_idle_work_handler(struct work_struct 
> > > > *work)
> > > >     if (dev_priv->gt.active_requests)
> > > >     goto out_unlock;
> > > >  
> > > > +   if (i915.enable_execlists)
> > > > +   intel_lr_wait_engines_idle(dev_priv);
> > > 
> > > Idle work handler... So runtime suspend.
> > > Anyway this is not an ideal place for a stall under struct_mutex (even if
> > > 16x10us, it's the principle!).
> > 
> > During runtime suspend this won't add any overhead since the context
> > done interrupt happened already (unless there is a bug somewhere else).
> 
> Where is that guaranteed? I thought we only serialised with the pm
> interrupts. Remember this happens before rpm suspend, since
> gem_idle_work_handler is responsible for dropping the GPU wakelock.

I meant that the 100msec after the last request signals completion and
this handler is scheduled is normally enough for the context complete
interrupt to get delivered. But yea, it's not a guarantee.

> > > Move this to before the first READ_ONCE(dev_priv->gt.active_requests);
> > > so we stall before taking the lock, and skip if any new requests arrive
> > > whilst waiting.
> > > 
> > > (Also i915.enable_execlists is forbidden. But meh)
> > > 
> > > static struct drm_i915_gem_request *
> > > execlists_active_port(struct intel_engine_cs *engine)
> > > {
> > >   struct drm_i915_gem_request *request;
> > > 
> > >   request = READ_ONCE(engine->execlist_port[1]);
> > >   if (request)
> > >   return request;
> > > 
> > >   return READ_ONCE(engine->execlist_port[0]);
> > > }
> > > 
> > > /* Wait for execlists to settle, but bail if any new requests come in */
> > > for_each_engine(engine, dev_priv, id) {
> > >   struct drm_i915_gem_request *request;
> > > 
> > >   request = execlists_active_port(engine);
> > >   if (!request)
> > >   continue;
> > > 
> > >   if (wait_for(execlists_active_port(engine) != request, 10))
> > >   DRM_ERROR("Timeout waiting for %s to idle\n", engine->name);
> > > }
> > 
> > Hm, but we still need to re-check and bail out if not idle with
> > struct_mutex held, since gt.active_requests could go 0->1->0 before
> > taking struct_mutex? I can rewrite things 

Re: [Intel-gfx] [PATCH] drm/i915: Perform object clflushing asynchronously

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 08:03:57PM +, Chris Wilson wrote:
> Flushing the cachelines for an object is slow, can be as much as 100ms
> for a large framebuffer. We currently do this under the struct_mutex BKL
> on execution or on pageflip. But now with the ability to add fences to
> obj->resv for both flips and execbuf (and we naturally wait on the fence
> before CPU access), we can move the clflush operation to a workqueue and
> signal a fence for completion, thereby doing the work asynchronously and
> not blocking the driver or its clients.
> 
> Suggested-by: Akash Goel 
> Signed-off-by: Chris Wilson 
> Cc: Akash Goel 

Needs a bit more work to restrict the async operations. In the end, I
think only the explicit paths towards execbuf / flip should opt in,
as the majority will want sync (pread/pwrite/set-domain). This idea came
up in a discussion on whether we needed create2 for early clflush or
whether we could explot set-domain for the same functionality. Now, we
can do the clflush asynchronously from create, but we must do it
synchronously in set-domain (albeit now it could be done outside of the
struct_mutex).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Perform object clflushing asynchronously

2016-11-04 Thread Chris Wilson
Flushing the cachelines for an object is slow, can be as much as 100ms
for a large framebuffer. We currently do this under the struct_mutex BKL
on execution or on pageflip. But now with the ability to add fences to
obj->resv for both flips and execbuf (and we naturally wait on the fence
before CPU access), we can move the clflush operation to a workqueue and
signal a fence for completion, thereby doing the work asynchronously and
not blocking the driver or its clients.

Suggested-by: Akash Goel 
Signed-off-by: Chris Wilson 
Cc: Akash Goel 
---
 drivers/gpu/drm/i915/Makefile  |   1 +
 drivers/gpu/drm/i915/i915_drv.h|   8 +-
 drivers/gpu/drm/i915/i915_gem.c|  60 +++--
 drivers/gpu/drm/i915/i915_gem_clflush.c| 138 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   6 +-
 drivers/gpu/drm/i915/intel_display.c   |  57 ++--
 6 files changed, 190 insertions(+), 80 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0857e5035f4d..6afd402e440b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 # GEM code
 i915-y += i915_cmd_parser.o \
  i915_gem_batch_pool.o \
+ i915_gem_clflush.o \
  i915_gem_context.o \
  i915_gem_dmabuf.o \
  i915_gem_evict.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2754d5de76af..c80044267333 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3410,7 +3410,13 @@ static inline u32 i915_reset_count(struct i915_gpu_error 
*error)
 
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
-bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
+
+void i915_gem_clflush_init(struct drm_i915_private *i915);
+int i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+   unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 void i915_gem_init_swizzling(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cffe60237b6a..524f72774537 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -230,7 +230,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object 
*obj)
obj->mm.dirty = false;
 
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
-   i915_gem_clflush_object(obj, false);
+   i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 
obj->base.read_domains = I915_GEM_DOMAIN_CPU;
obj->base.write_domain = I915_GEM_DOMAIN_CPU;
@@ -1570,6 +1570,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void 
*data,
 
mutex_unlock(>struct_mutex);
 
+   if (err == 0)
+   err = i915_gem_object_wait(obj,
+  I915_WAIT_INTERRUPTIBLE,
+  MAX_SCHEDULE_TIMEOUT,
+  NULL);
if (write_domain != 0)
intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
 
@@ -3236,44 +3241,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 
alignment, u64 flags)
return ret;
 }
 
-bool
-i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-   bool force)
-{
-   /* If we don't have a page list set up, then we're not pinned
-* to GPU, and we can ignore the cache flush because it'll happen
-* again at bind time.
-*/
-   if (!obj->mm.pages)
-   return false;
-
-   /*
-* Stolen memory is always coherent with the GPU as it is explicitly
-* marked as wc by the system, or the system is cache-coherent.
-*/
-   if (obj->stolen || obj->phys_handle)
-   return false;
-
-   /* If the GPU is snooping the contents of the CPU cache,
-* we do not need to manually clear the CPU cache lines.  However,
-* the caches are only snooped when the render cache is
-* flushed/invalidated.  As we always have to emit invalidations
-* and flushes when moving into and out of the RENDER domain, correct
-* snooping behaviour occurs naturally as the result of our domain
-* tracking.
-*/
-   if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
-   obj->cache_dirty = true;
-   return false;
-   }
-
-   trace_i915_gem_object_clflush(obj);
-   drm_clflush_sg(obj->mm.pages);
-   

Re: [Intel-gfx] [PATCH v4 2/8] drm/i915/skl: New ddb allocation algorithm

2016-11-04 Thread Paulo Zanoni
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> From: Mahesh Kumar 
> 
> This patch implements new DDB allocation algorithm as per HW team
> recommendation. This algo takecare of scenario where we allocate less
> DDB
> for the planes with lower relative pixel rate, but they require more
> DDB
> to work.
> It also takes care of enabling same watermark level for each
> plane, for efficient power saving.
> 
> Changes since v1:
>  - Rebase on top of Paulo's patch series
> 
> Changes since v2:
>  - Fix the for loop condition to enable WM
> 
> Changes since v3:
>  - Fix crash in cursor i-g-t reported by Maarten
>  - Rebase after addressing Paulo's comments
>  - Few other ULT fixes
> 

This will require a huge rebase due to the things that were already
merged and those who are about to be merged. Also, this is a general
improvement while the other patches are bug fixes. Can you please move
this to the end of the series? I'd really like to get the other things
merged first, in case we decide to backport the fixes.


> Signed-off-by: Mahesh Kumar 
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 149 +-
> --
>  1 file changed, 79 insertions(+), 70 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 098336d..84ec6b1 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3344,6 +3344,7 @@ skl_ddb_min_alloc(const struct drm_plane_state
> *pstate,
>  
>  static int
>  skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
> +   struct skl_pipe_wm *pipe_wm,
>     struct skl_ddb_allocation *ddb /* out */)
>  {
>   struct drm_atomic_state *state = cstate->base.state;
> @@ -3359,8 +3360,12 @@ skl_allocate_pipe_ddb(struct intel_crtc_state
> *cstate,
>   uint16_t *minimum = cstate->wm.skl.minimum_blocks;
>   uint16_t *y_minimum = cstate->wm.skl.minimum_y_blocks;
>   unsigned int total_data_rate;
> + uint16_t total_min_blocks = 0;
> + uint16_t total_level_ddb = 0;
>   int num_active;
> - int id, i;
> + int max_level, level;
> + int id, i, ret = 0;
> +
>  
>   if (WARN_ON(!state))
>   return 0;
> @@ -3409,19 +3414,42 @@ skl_allocate_pipe_ddb(struct intel_crtc_state
> *cstate,
>   }
>  
>   for (i = 0; i < PLANE_CURSOR; i++) {
> - alloc_size -= minimum[i];
> - alloc_size -= y_minimum[i];
> + total_min_blocks += minimum[i];
> + total_min_blocks += y_minimum[i];
>   }
>  
> - /*
> -  * 2. Distribute the remaining space in proportion to the
> amount of
> -  * data each plane needs to fetch from memory.
> -  *
> -  * FIXME: we may not allocate every single block here.
> -  */
> + for (level = ilk_wm_max_level(dev); level >= 0; level--) {
> + total_level_ddb = 0;
> + for (i = 0; i < PLANE_CURSOR; i++) {
> + /*
> +  * TODO: We should calculate watermark
> values for Y/UV
> +  * plane both in case of NV12 format and use
> both values
> +  * for ddb calculation, As NV12 is disabled
> as of now.
> +  * using only single plane value here.
> +  */
> + uint16_t min = minimum[i] + y_minimum[i];
> + uint16_t plane_level_ddb_wm =
> + max(pipe_wm-
> >wm[level].plane_res_b[i], min);
> + total_level_ddb += plane_level_ddb_wm;
> + }
> +
> + if (total_level_ddb <= alloc_size)
> + break;
> + }
> +
> + if ((level < 0) || (total_min_blocks > alloc_size)) {
> + DRM_DEBUG_KMS("Requested display configuration
> exceeds system DDB limitations");
> + DRM_DEBUG_KMS("minimum required %d/%d\n", (level <
> 0) ?
> + total_level_ddb : total_min_blocks,
> alloc_size);
> + ret = -EINVAL;
> + goto exit;
> + }
> + max_level = level;
> + alloc_size -= total_level_ddb;
> +
>   total_data_rate = skl_get_total_relative_data_rate(cstate);
>   if (total_data_rate == 0)
> - return 0;
> + goto exit;
>  
>   start = alloc->start;
>   for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
> @@ -3436,7 +3464,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state
> *cstate,
>    * promote the expression to 64 bits to avoid
> overflowing, the
>    * result is < available as data_rate /
> total_data_rate < 1
>    */
> - plane_blocks = minimum[id];
> + plane_blocks = max(pipe_wm-
> >wm[max_level].plane_res_b[id],
> + minimum[id]);
>   plane_blocks += div_u64((uint64_t)alloc_size *
> data_rate,
>   

Re: [Intel-gfx] [PATCH v4 8/8] drm/i915/bxt: Enable IPC support

2016-11-04 Thread Paulo Zanoni
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> From: Mahesh Kumar 
> 
> This patch adds IPC support for platforms. This patch enables IPC
> only for BXT/KBL platform as for SKL recommendation is to keep is
> disabled.
> IPC (Isochronous Priority Control) is the hardware feature, which
> dynamically controles the memory read priority of Display.
> 
> When IPC is enabled, plane read requests are sent at high priority
> until
> filling above the transition watermark, then the requests are sent at
> lower priority until dropping below the level 0 watermark.
> The lower priority requests allow other memory clients to have better
> memory access. When IPC is disabled, all plane read requests are sent
> at
> high priority.
> 
> Changes since V1:
>  - Remove commandline parameter to disable ipc
>  - Address Paulo's comments
> 

In addition to what others said, we also need the linetime/2 WA if we
want to enable IPC.

Also, see below.

> Signed-off-by: Mahesh Kumar 
> ---
>  drivers/gpu/drm/i915/i915_drv.c  |  2 ++
>  drivers/gpu/drm/i915/i915_reg.h  |  1 +
>  drivers/gpu/drm/i915/intel_drv.h |  1 +
>  drivers/gpu/drm/i915/intel_pm.c  | 15 +++
>  4 files changed, 19 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c
> index b5f601c..58abbaa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1415,6 +1415,8 @@ int i915_driver_load(struct pci_dev *pdev,
> const struct pci_device_id *ent)
>  
>   intel_runtime_pm_enable(dev_priv);
>  
> + intel_enable_ipc(dev_priv);
> +
>   /* Everything is in place, we can now relax! */
>   DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
>    driver.name, driver.major, driver.minor,
> driver.patchlevel,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h
> b/drivers/gpu/drm/i915/i915_reg.h
> index a9c467c..c9ebf23 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6144,6 +6144,7 @@ enum {
>  #define  DISP_FBC_WM_DIS (1<<15)
>  #define DISP_ARB_CTL2_MMIO(0x45004)
>  #define  DISP_DATA_PARTITION_5_6 (1<<6)
> +#define  DISP_IPC_ENABLE (1<<3)
>  #define DBUF_CTL _MMIO(0x45008)
>  #define  DBUF_POWER_REQUEST  (1<<31)
>  #define  DBUF_POWER_STATE(1<<30)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index 2c1897b..45b0fa4 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1766,6 +1766,7 @@ void skl_write_plane_wm(struct intel_crtc
> *intel_crtc,
>  uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state
> *pipe_config);
>  bool ilk_disable_lp_wm(struct drm_device *dev);
>  int sanitize_rc6_option(struct drm_i915_private *dev_priv, int
> enable_rc6);
> +void intel_enable_ipc(struct drm_i915_private *dev_priv);
>  static inline int intel_enable_rc6(void)
>  {
>   return i915.enable_rc6;
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 4263212..543aa5d 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -4833,6 +4833,21 @@ void intel_update_watermarks(struct drm_crtc
> *crtc)
>   dev_priv->display.update_wm(crtc);
>  }
>  
> +void intel_enable_ipc(struct drm_i915_private *dev_priv)
> +{
> + u32 val;
> +
> + /* enable IPC only for Broxton for now*/
> + if (!IS_BROXTON(dev_priv) || !IS_KABYLAKE(dev_priv))
> + return;

This will always return...

> +
> + val = I915_READ(DISP_ARB_CTL2);
> +
> + val |= DISP_IPC_ENABLE;
> +
> + I915_WRITE(DISP_ARB_CTL2, val);
> +}
> +
>  /*
>   * Lock protecting IPS related data structures
>   */
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 7/8] drm/i915/skl+: change WM calc to fixed point 16.16

2016-11-04 Thread Paulo Zanoni
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> From: Mahesh Kumar 
> 
> This patch changes Watermak calculation to fixed point calculation.
> Problem with current calculation is during plane_blocks_per_line
> calculation we divide intermediate blocks with min_scanlines and
> takes floor of the result because of integer operation.
> hence we end-up assigning less blocks than required. Which leads to
> flickers.
> 

There are still variables that got auto-converted to 16.16 and need to
be adjusted because later they are mixed with non-16.16 in non-safe
ways. The fact that's it's hard to identify these things really worries
me.


> Signed-off-by: Mahesh Kumar 
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 16 +++-
>  1 file changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 0eaaadc..4263212 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3527,16 +3527,19 @@ static uint32_t skl_pipe_pixel_rate(const
> struct intel_crtc_state *config)
>   * for the read latency) and cpp should always be <= 8, so that
>   * should allow pixel_rate up to ~2 GHz which seems sufficient since
> max
>   * 2xcdclk is 1350 MHz and the pixel rate should never exceed that.
> + * Both Method1 & Method2 returns fixedpoint 16.16 output
>  */
>  static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp,
> uint32_t latency)
>  {
> - uint32_t wm_intermediate_val, ret;
> + uint64_t wm_intermediate_val;
> + uint32_t ret;
>  
>   if (latency == 0)
>   return UINT_MAX;
>  
> - wm_intermediate_val = latency * pixel_rate * cpp / 512;
> - ret = DIV_ROUND_UP(wm_intermediate_val, 1000);
> + wm_intermediate_val = latency * pixel_rate * cpp;
> + wm_intermediate_val <<= 16;
> + ret = DIV_ROUND_UP_ULL(wm_intermediate_val, 1000 * 512);
>  
>   return ret;
>  }
> @@ -3658,12 +3661,15 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>   if (y_tiled) {
>   plane_blocks_per_line =
>     DIV_ROUND_UP(plane_bytes_per_line *
> y_min_scanlines, 512);
> - plane_blocks_per_line /= y_min_scanlines;
> + plane_blocks_per_line = (plane_blocks_per_line <<
> 16) /
> + y_mi
> n_scanlines;
>   } else if (x_tiled) {
>   plane_blocks_per_line =
> DIV_ROUND_UP(plane_bytes_per_line, 512);
> + plane_blocks_per_line <<= 16;
>   } else {
>   plane_blocks_per_line =
> DIV_ROUND_UP(plane_bytes_per_line, 512)
>   + 1;
> + plane_blocks_per_line <<= 16;
>   }
>  
>   method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
> @@ -3690,7 +3696,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>   selected_result = method1;
>   }
>  
> - res_blocks = selected_result + 1;
> + res_blocks = DIV_ROUND_UP(selected_result, 1 << 16) + 1;
>   res_lines = DIV_ROUND_UP(selected_result,
> plane_blocks_per_line);
>  
>   if (level >= 1 && level <= 7) {
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 1/2] drm/i915/dp: Enable DP audio stall fix for gen9 platforms

2016-11-04 Thread Pandiyan, Dhinakaran
On Fri, 2016-11-04 at 17:48 +0200, Jani Nikula wrote:
> On Wed, 26 Oct 2016, Dhinakaran Pandiyan  
> wrote:
> > Enabling DP audio stall fix is necessary to play audio over DP HBR2. So,
> > let's set this bit right before enabling the audio codec. Playing audio
> > without setting this bit results in pipe FIFO underruns.
> >
> > This workaround is applicable only for audio sample rates up to 96kHz. For
> > frequencies above 96kHz, this is insufficient and cdclk should be increased
> > to at least 432 MHz, just like BDW. Since, the audio driver does not
> > support sample rates > 48 kHz, we are safe with this fix for now.
> 
> Do we still need this patch now that these two have been pushed?
> 
> b30ce9e0552a drm/i915/dp: BDW cdclk fix for DP audio
> 9c7540241885 drm/i915/dp: Extend BDW DP audio workaround to GEN9 platforms
> 
> BR,
> Jani.
> 
> 
> 

No, we are good afaik. This patch would have helped us to make use of a
lower cdclk (337.5 MHz), with constraints on audio bit rate. Operating
at 432 MHz, like we do now, rules out the need for this patch.

-DK

> >
> > v2: Inlined the code change within hsw_audio_codec_enable() (Jani)
> > Fixed the port clock typo
> > Added TODO comment
> > Signed-off-by: Dhinakaran Pandiyan 
> > ---
> >  drivers/gpu/drm/i915/i915_reg.h|  5 +
> >  drivers/gpu/drm/i915/intel_audio.c | 30 +-
> >  2 files changed, 34 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index 00efaa1..76dac48 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -6236,6 +6236,11 @@ enum {
> >  #define SLICE_ECO_CHICKEN0 _MMIO(0x7308)
> >  #define   PIXEL_MASK_CAMMING_DISABLE   (1 << 14)
> >  
> > +#define _CHICKEN_TRANS_A   0x420C0
> > +#define _CHICKEN_TRANS_B   0x420C4
> > +#define CHICKEN_TRANS(tran) _MMIO_TRANS(tran, _CHICKEN_TRANS_A, 
> > _CHICKEN_TRANS_B)
> > +#define SPARE_13   (1<<13)
> > +
> >  /* WaCatErrorRejectionIssue */
> >  #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030)
> >  #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB  (1<<11)
> > diff --git a/drivers/gpu/drm/i915/intel_audio.c 
> > b/drivers/gpu/drm/i915/intel_audio.c
> > index 7093cfb..894f11e 100644
> > --- a/drivers/gpu/drm/i915/intel_audio.c
> > +++ b/drivers/gpu/drm/i915/intel_audio.c
> > @@ -283,6 +283,8 @@ static void hsw_audio_codec_disable(struct 
> > intel_encoder *encoder)
> >  {
> > struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
> > struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
> > +   struct intel_crtc_state *crtc_config =  intel_crtc->config;
> > +   enum transcoder cpu_transcoder = crtc_config->cpu_transcoder;
> > enum pipe pipe = intel_crtc->pipe;
> > uint32_t tmp;
> >  
> > @@ -290,13 +292,21 @@ static void hsw_audio_codec_disable(struct 
> > intel_encoder *encoder)
> >  
> > mutex_lock(_priv->av_mutex);
> >  
> > +   /*Disable DP audio stall fix for HBR2*/
> > +   if (IS_GEN9(dev_priv) && intel_crtc_has_dp_encoder(crtc_config) &&
> > +   crtc_config->port_clock >= 54) {
> > +   tmp = I915_READ(CHICKEN_TRANS(cpu_transcoder));
> > +   tmp &= ~SPARE_13;
> > +   I915_WRITE(CHICKEN_TRANS(cpu_transcoder), tmp);
> > +   }
> > +
> > /* Disable timestamps */
> > tmp = I915_READ(HSW_AUD_CFG(pipe));
> > tmp &= ~AUD_CONFIG_N_VALUE_INDEX;
> > tmp |= AUD_CONFIG_N_PROG_ENABLE;
> > tmp &= ~AUD_CONFIG_UPPER_N_MASK;
> > tmp &= ~AUD_CONFIG_LOWER_N_MASK;
> > -   if (intel_crtc_has_dp_encoder(intel_crtc->config))
> > +   if (intel_crtc_has_dp_encoder(crtc_config))
> > tmp |= AUD_CONFIG_N_VALUE_INDEX;
> > I915_WRITE(HSW_AUD_CFG(pipe), tmp);
> >  
> > @@ -315,6 +325,8 @@ static void hsw_audio_codec_enable(struct drm_connector 
> > *connector,
> >  {
> > struct drm_i915_private *dev_priv = to_i915(connector->dev);
> > struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc);
> > +   struct intel_crtc_state *crtc_config =  intel_crtc->config;
> > +   enum transcoder cpu_transcoder = crtc_config->cpu_transcoder;
> > enum pipe pipe = intel_crtc->pipe;
> > enum port port = intel_encoder->port;
> > const uint8_t *eld = connector->eld;
> > @@ -326,6 +338,22 @@ static void hsw_audio_codec_enable(struct 
> > drm_connector *connector,
> >  
> > mutex_lock(_priv->av_mutex);
> >  
> > +   /* Enable DP audio stall fix for HBR2
> > +*
> > +* TODO: This workaround is applicable only for audio sample rates up
> > +* to 96kHz. For frequencies above 96kHz, this is insufficient and
> > +* cdclk should be increased to at least 432 MHz, just like BDW. Since,
> > +* the audio driver does not support sample rates > 48 kHz, we are safe
> > +* with this fix for now.
> > +*/
> > +
> > +   if 

[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Remove the vma from the object list upon close

2016-11-04 Thread Patchwork
== Series Details ==

Series: drm/i915: Remove the vma from the object list upon close
URL   : https://patchwork.freedesktop.org/series/14850/
State : failure

== Summary ==

Series 14850v1 drm/i915: Remove the vma from the object list upon close
https://patchwork.freedesktop.org/api/1.0/series/14850/revisions/1/mbox/

Test gem_busy:
Subgroup basic-hang-default:
pass   -> FAIL   (fi-hsw-4770r)

fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
fi-bxt-t5700 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r total:241  pass:220  dwarn:0   dfail:0   fail:1   skip:20 
fi-ilk-650   total:241  pass:188  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770  total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
fi-skl-6770hqtotal:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600  total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 

00d2fcf7c84de382bd2ceb5eaf908f76900d0791 drm-intel-nightly: 
2016y-11m-04d-15h-43m-43s UTC integration manifest
e8336dc drm/i915: Remove the vma from the object list upon close

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2908/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 6/8] drm/i915/skl: Add variables to check x_tile and y_tile

2016-11-04 Thread Paulo Zanoni
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> From: Mahesh Kumar 
> 
> This patch adds variable to check for X_tiled & y_tiled planes,
> instead
> of always checking against framebuffer-modifiers.
> 
> Changes:
>  - Created separate patch as per Paulo's comment
>  - Added x_tiled variable as well
> 
> Signed-off-by: Mahesh Kumar 
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 22 +-
>  1 file changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index a668204..0eaaadc 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3602,6 +3602,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>   uint32_t plane_pixel_rate;
>   uint32_t y_tile_minimum, y_min_scanlines;
>   enum watermark_memory_wa mem_wa;
> + bool y_tiled = false, x_tiled = false;
>  
>   if (latency == 0 || !cstate->base.active || !intel_pstate-
> >base.visible)
>   return 0;
> @@ -3621,6 +3622,12 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>   cpp = drm_format_plane_cpp(fb->pixel_format, 0);
>   plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
> intel_pstate);
>  
> + if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
> + fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED)
> + y_tiled = true;
> + else if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
> + x_tiled = true;
> +

Or you could go with the simpler:

y_tiled = fb->modifier[0] == Y_TILED || fb->modifier[0] == Yf_TILED;
x_tiled = fb->modifier[0] == X_TILED;

And this would allow you to even remove the initialization to false
above, and would allow the compiler to complain in case we try to use
uninitialized values.

But that's just an optional bikeshed.

Anyway, I like the patch but it needs a rebase. It's better to just
include this patch in the beginning of the series so we can merge it
more easily, independently of the others.

>   if (intel_rotation_90_or_270(pstate->rotation)) {
>   int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ?
>   drm_format_plane_cpp(fb->pixel_format, 1) :
> @@ -3648,16 +3655,15 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>   y_min_scanlines *= 2;
>  
>   plane_bytes_per_line = width * cpp;
> - if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
> - fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
> + if (y_tiled) {
>   plane_blocks_per_line =
>     DIV_ROUND_UP(plane_bytes_per_line *
> y_min_scanlines, 512);
>   plane_blocks_per_line /= y_min_scanlines;
> - } else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) {
> + } else if (x_tiled) {
> + plane_blocks_per_line =
> DIV_ROUND_UP(plane_bytes_per_line, 512);
> + } else {
>   plane_blocks_per_line =
> DIV_ROUND_UP(plane_bytes_per_line, 512)
>   + 1;
> - } else {
> - plane_blocks_per_line =
> DIV_ROUND_UP(plane_bytes_per_line, 512);
>   }
>  
>   method1 = skl_wm_method1(plane_pixel_rate, cpp, latency);
> @@ -3668,8 +3674,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  
>   y_tile_minimum = plane_blocks_per_line * y_min_scanlines;
>  
> - if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
> - fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
> + if (y_tiled) {
>   selected_result = max(method2, y_tile_minimum);
>   } else {
>   uint32_t linetime_us = 0;
> @@ -3689,8 +3694,7 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>   res_lines = DIV_ROUND_UP(selected_result,
> plane_blocks_per_line);
>  
>   if (level >= 1 && level <= 7) {
> - if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED ||
> - fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) {
> + if (y_tiled) {
>   res_blocks += y_tile_minimum;
>   res_lines += y_min_scanlines;
>   } else {
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 5/8] drm/i915/skl+: reset y_plane ddb structure also during calculation

2016-11-04 Thread Paulo Zanoni
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> From: Mahesh Kumar 
> 
> Current code clears only plane ddb allocation if total ddb allocated
> to
> pipe in zero. y_plane ddb still contains old value, clear that as
> well.
> 
> Signed-off-by: Mahesh Kumar 
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 5b8f715..a668204 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3381,6 +3381,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state
> *cstate,
>   alloc_size = skl_ddb_entry_size(alloc);
>   if (alloc_size == 0) {
>   memset(ddb->plane[pipe], 0, sizeof(ddb-
> >plane[pipe]));
> + memset(ddb->y_plane[pipe], 0, sizeof(ddb-
> >y_plane[pipe]));

With the latest code we can just remove both memset() calls.

>   return 0;
>   }
>  
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 4/8] drm/i915/gen9: WM memory bandwidth related workaround

2016-11-04 Thread Ville Syrjälä
On Fri, Nov 04, 2016 at 03:09:04PM -0200, Paulo Zanoni wrote:
> Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> > This patch implemnets Workariunds related to display arbitrated
> > memory
> > bandwidth. These WA are applicabe for all gen-9 based platforms.
> > 
> > Changes since v1:
> >  - Rebase on top of Paulo's patch series
> > Changes since v2:
> >  - Rebase/rework after addressing Paulo's comments in previous patch
> 
> A lot of this code has changed since then, so this will need a
> significant rebase. In the meantime, I added skl_needs_memory_bw_wa()
> and we're now applying the WA by default: we just won't apply the WA
> when we're pretty sure we don't need to. This helps avoiding underruns
> by default.
> 
> See more below.
> 
> 
> > 
> > Signed-off-by: "Kumar, Mahesh" 
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h  |   9 +++
> >  drivers/gpu/drm/i915/intel_drv.h |  11 +++
> >  drivers/gpu/drm/i915/intel_pm.c  | 146
> > +++
> >  3 files changed, 166 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > b/drivers/gpu/drm/i915/i915_drv.h
> > index adbd9aa..c169360 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1092,6 +1092,13 @@ enum intel_sbi_destination {
> >     SBI_MPHY,
> >  };
> >  
> > +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> > +enum watermark_memory_wa {
> > +   WATERMARK_WA_NONE,
> > +   WATERMARK_WA_X_TILED,
> > +   WATERMARK_WA_Y_TILED,
> > +};
> > +
> >  #define QUIRK_PIPEA_FORCE (1<<0)
> >  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
> >  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> > @@ -1644,6 +1651,8 @@ struct skl_ddb_allocation {
> >  
> >  struct skl_wm_values {
> >     unsigned dirty_pipes;
> > +   /* any WaterMark memory workaround Required */
> 
> We can remove this comment since it doesn't say anything the variable
> name doesn't.
> 
> > +   enum watermark_memory_wa mem_wa;
> 
> Now that we have a proper variable in the state struct, it probably
> makes sense to just kill skl_needs_memory_bw_wa() and read this
> variable when we need to.
> 
> 
> >     struct skl_ddb_allocation ddb;
> >     uint32_t wm_linetime[I915_MAX_PIPES];
> >     uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
> > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > b/drivers/gpu/drm/i915/intel_drv.h
> > index f48e79a..2c1897b 100644
> > --- a/drivers/gpu/drm/i915/intel_drv.h
> > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > @@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct
> > drm_atomic_state *state,
> >     return to_intel_crtc_state(crtc_state);
> >  }
> >  
> > +static inline struct intel_crtc_state *
> > +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
> > +     struct intel_crtc *crtc)
> > +{
> > +   struct drm_crtc_state *crtc_state;
> > +
> > +   crtc_state = drm_atomic_get_existing_crtc_state(state,
> > >base);
> > +
> > +   return to_intel_crtc_state(crtc_state);
> 
> I really don't like the idea of calling to_intel_crtc_state() on a
> potentially NULL pointer so the caller of this function will also check
> for NULL. Even though it works today, I still think it's unsafe
> practice. Please check crtc_state for NULL directly and then return
> NULL.

I want to make this safe by making it a compile error if offsetof(base) != 0.
https://lists.freedesktop.org/archives/intel-gfx/2016-October/108175.html

But I think we want to go further than that patch by adding a bit more
type safety to things. I did play around with this stuff a bit more,
and I have something sitting on a branch, but I didn't quite figure out
what I want to do about const vs. non const yet.

> 
> Also, I think this function should be extracted to its own commit, and
> we'd probably be able to find some callers in the existing i915 code.

I have, on some branch again, _intel_ versions of the for_each_foo_in_state()
macros as well. I think those are going to allow a lot of ugly casting
stuff to disappear. But I think I'll hold off until Maarten's new
iterators go in before I try to send those out.

-- 
Ville Syrjälä
Intel OTC
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] shmem: Support for registration of driver/file owner specific ops

2016-11-04 Thread Patchwork
== Series Details ==

Series: series starting with [1/2] shmem: Support for registration of 
driver/file owner specific ops
URL   : https://patchwork.freedesktop.org/series/14845/
State : success

== Summary ==

Series 14845v1 Series without cover letter
https://patchwork.freedesktop.org/api/1.0/series/14845/revisions/1/mbox/


fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-ilk-650   total:241  pass:188  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770  total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
fi-skl-6770hqtotal:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600  total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 

00d2fcf7c84de382bd2ceb5eaf908f76900d0791 drm-intel-nightly: 
2016y-11m-04d-15h-43m-43s UTC integration manifest
95d9dd7 drm/i915: Make GPU pages movable
e67b361 shmem: Support for registration of driver/file owner specific ops

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2907/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 4/8] drm/i915/gen9: WM memory bandwidth related workaround

2016-11-04 Thread Paulo Zanoni
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu:
> This patch implemnets Workariunds related to display arbitrated
> memory
> bandwidth. These WA are applicabe for all gen-9 based platforms.
> 
> Changes since v1:
>  - Rebase on top of Paulo's patch series
> Changes since v2:
>  - Rebase/rework after addressing Paulo's comments in previous patch

A lot of this code has changed since then, so this will need a
significant rebase. In the meantime, I added skl_needs_memory_bw_wa()
and we're now applying the WA by default: we just won't apply the WA
when we're pretty sure we don't need to. This helps avoiding underruns
by default.

See more below.


> 
> Signed-off-by: "Kumar, Mahesh" 
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |   9 +++
>  drivers/gpu/drm/i915/intel_drv.h |  11 +++
>  drivers/gpu/drm/i915/intel_pm.c  | 146
> +++
>  3 files changed, 166 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index adbd9aa..c169360 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1092,6 +1092,13 @@ enum intel_sbi_destination {
>   SBI_MPHY,
>  };
>  
> +/* SKL+ Watermark arbitrated display bandwidth Workarounds */
> +enum watermark_memory_wa {
> + WATERMARK_WA_NONE,
> + WATERMARK_WA_X_TILED,
> + WATERMARK_WA_Y_TILED,
> +};
> +
>  #define QUIRK_PIPEA_FORCE (1<<0)
>  #define QUIRK_LVDS_SSC_DISABLE (1<<1)
>  #define QUIRK_INVERT_BRIGHTNESS (1<<2)
> @@ -1644,6 +1651,8 @@ struct skl_ddb_allocation {
>  
>  struct skl_wm_values {
>   unsigned dirty_pipes;
> + /* any WaterMark memory workaround Required */

We can remove this comment since it doesn't say anything the variable
name doesn't.

> + enum watermark_memory_wa mem_wa;

Now that we have a proper variable in the state struct, it probably
makes sense to just kill skl_needs_memory_bw_wa() and read this
variable when we need to.


>   struct skl_ddb_allocation ddb;
>   uint32_t wm_linetime[I915_MAX_PIPES];
>   uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8];
> diff --git a/drivers/gpu/drm/i915/intel_drv.h
> b/drivers/gpu/drm/i915/intel_drv.h
> index f48e79a..2c1897b 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct
> drm_atomic_state *state,
>   return to_intel_crtc_state(crtc_state);
>  }
>  
> +static inline struct intel_crtc_state *
> +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
> +   struct intel_crtc *crtc)
> +{
> + struct drm_crtc_state *crtc_state;
> +
> + crtc_state = drm_atomic_get_existing_crtc_state(state,
> >base);
> +
> + return to_intel_crtc_state(crtc_state);

I really don't like the idea of calling to_intel_crtc_state() on a
potentially NULL pointer so the caller of this function will also check
for NULL. Even though it works today, I still think it's unsafe
practice. Please check crtc_state for NULL directly and then return
NULL.

Also, I think this function should be extracted to its own commit, and
we'd probably be able to find some callers in the existing i915 code.


> +}
> +
>  static inline struct intel_plane_state *
>  intel_atomic_get_existing_plane_state(struct drm_atomic_state
> *state,
>     struct intel_plane *plane)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index 84ec6b1..5b8f715 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>  {
>   struct drm_plane_state *pstate = _pstate->base;
>   struct drm_framebuffer *fb = pstate->fb;
> + struct intel_atomic_state *intel_state =
> + to_intel_atomic_state(cstate->base.state);
>   uint32_t latency = dev_priv->wm.skl_latency[level];
>   uint32_t method1, method2;
>   uint32_t plane_bytes_per_line, plane_blocks_per_line;
> @@ -3598,10 +3600,17 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>   uint32_t width = 0, height = 0;
>   uint32_t plane_pixel_rate;
>   uint32_t y_tile_minimum, y_min_scanlines;
> + enum watermark_memory_wa mem_wa;
>  
>   if (latency == 0 || !cstate->base.active || !intel_pstate-
> >base.visible)
>   return 0;
>  
> + mem_wa = intel_state ? intel_state->wm_results.mem_wa :
> WATERMARK_WA_NONE;
> + if (mem_wa != WATERMARK_WA_NONE) {
> + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED)
> + latency += 15;
> + }
> +
>   width = drm_rect_width(_pstate->base.src) >> 16;
>   height = drm_rect_height(_pstate->base.src) >> 16;
>  
> @@ -3634,6 +3643,9 @@ static int skl_compute_plane_wm(const struct
> drm_i915_private *dev_priv,
>   

Re: [Intel-gfx] linux-next: manual merge of the mali-dp tree with the drm-misc tree

2016-11-04 Thread Liviu Dudau
On Sat, Nov 05, 2016 at 03:55:03AM +1100, Stephen Rothwell wrote:
> Hi Liviu,
> 
> On Fri, 4 Nov 2016 15:48:02 + Liviu Dudau  wrote:
> >
> > Brian Starkey is a co-maintainer for the Mali DP tree, so his Signed-off-by
> > alone should be good. Baoyou's patch is in my tree to stop him repeatedly
> > send me the same patch over and over again :) But yes, I will add my
> > Signed-off-by for that one.
> 
> Sorry, but this is not sufficient.  Please read section 11 of
> Documentation/SubmittingPatches (or
> Documentation/process/submitting-patches.rst where it has been moved
> recently).  If you are in the path of a patch to Linus, you must add a
> Signed-off-by line, and as the person who committed those patches to
> the tree, you are in the path.

Thanks for correcting me. I will add my Signed-off-bys to the relevant
patches.

Best regards,
Liviu

> -- 
> Cheers,
> Stephen Rothwell

-- 

| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---
¯\_(ツ)_/¯
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] linux-next: manual merge of the mali-dp tree with the drm-misc tree

2016-11-04 Thread Stephen Rothwell
Hi Liviu,

On Fri, 4 Nov 2016 15:48:02 + Liviu Dudau  wrote:
>
> Brian Starkey is a co-maintainer for the Mali DP tree, so his Signed-off-by
> alone should be good. Baoyou's patch is in my tree to stop him repeatedly
> send me the same patch over and over again :) But yes, I will add my
> Signed-off-by for that one.

Sorry, but this is not sufficient.  Please read section 11 of
Documentation/SubmittingPatches (or
Documentation/process/submitting-patches.rst where it has been moved
recently).  If you are in the path of a patch to Linus, you must add a
Signed-off-by line, and as the person who committed those patches to
the tree, you are in the path.
-- 
Cheers,
Stephen Rothwell
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/5] drm/i915: More assorted dev_priv cleanups

2016-11-04 Thread Ville Syrjälä
On Fri, Nov 04, 2016 at 04:03:55PM +, Tvrtko Ursulin wrote:
> 
> On 04/11/2016 15:32, Ville Syrjälä wrote:
> > On Fri, Nov 04, 2016 at 02:42:45PM +, Tvrtko Ursulin wrote:
> >> From: Tvrtko Ursulin 
> >>
> >> A small selection of macros which can only accept dev_priv from
> >> now on and a resulting trickle of fixups.
> >>
> >> Signed-off-by: Tvrtko Ursulin 
> >> ---
> >>  drivers/gpu/drm/i915/i915_drv.h   | 27 ---
> >>  drivers/gpu/drm/i915/i915_gpu_error.c |  2 +-
> >>  drivers/gpu/drm/i915/i915_irq.c   |  6 +++---
> >>  drivers/gpu/drm/i915/intel_crt.c  |  8 
> >>  drivers/gpu/drm/i915/intel_display.c  |  4 ++--
> >>  drivers/gpu/drm/i915/intel_dp.c   |  2 +-
> >>  drivers/gpu/drm/i915/intel_hotplug.c  |  2 +-
> >>  drivers/gpu/drm/i915/intel_psr.c  |  2 +-
> >>  8 files changed, 25 insertions(+), 28 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
> >> b/drivers/gpu/drm/i915/i915_drv.h
> >> index 45a30f730216..6060e41d25e5 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> @@ -2901,28 +2901,25 @@ struct drm_i915_cmd_table {
> >>  #define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \
> >> !(IS_I915G(dev_priv) || \
> >> IS_I915GM(dev_priv)))
> >> -#define SUPPORTS_TV(dev)  (INTEL_INFO(dev)->supports_tv)
> >> -#define I915_HAS_HOTPLUG(dev)  (INTEL_INFO(dev)->has_hotplug)
> >> -
> >> -#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2)
> >> -#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)
> >> -#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc)
> >> +#define SUPPORTS_TV(dev_priv) ((dev_priv)->info.supports_tv)
> >> +#define I915_HAS_HOTPLUG(dev_priv)((dev_priv)->info.has_hotplug)
> >>
> >> +#define HAS_FW_BLC(dev_priv)  (INTEL_GEN(dev_priv) > 2)
> >> +#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr)
> >> +#define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc)
> >>  #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))
> >> -
> >> -#define HAS_DP_MST(dev)   (INTEL_INFO(dev)->has_dp_mst)
> >> -
> >> +#define HAS_DP_MST(dev_priv)  ((dev_priv)->info.has_dp_mst)
> >>  #define HAS_DDI(dev_priv) ((dev_priv)->info.has_ddi)
> >> -#define HAS_FPGA_DBG_UNCLAIMED(dev)   (INTEL_INFO(dev)->has_fpga_dbg)
> >> -#define HAS_PSR(dev)  (INTEL_INFO(dev)->has_psr)
> >> -#define HAS_RC6(dev)  (INTEL_INFO(dev)->has_rc6)
> >> -#define HAS_RC6p(dev) (INTEL_INFO(dev)->has_rc6p)
> >> -
> >> -#define HAS_CSR(dev)  (INTEL_INFO(dev)->has_csr)
> >> +#define HAS_PSR(dev_priv) ((dev_priv)->info.has_psr)
> >> +#define HAS_RC6(dev_priv) ((dev_priv)->info.has_rc6)
> >> +#define HAS_RC6p(dev_priv)((dev_priv)->info.has_rc6p)
> >> +#define HAS_CSR(dev_priv) ((dev_priv)->info.has_csr)
> >>
> >>  #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm)
> >>  #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc)
> >>
> >> +#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg)
> >
> > What's confusing me is this reordering of these macros. Was there a
> > particular reason for doing that?
> 
> Just because of its long name, so I pulled it out and separated so the 
> alignment is nicer in the blocks above it.

The original grouping looked more based on functionality, so made a bit
more sense to me.

> 
> > Outside that it all looks pretty reasonable. Could got a bit further
> > with passing around dev_priv in some cases, but I guess we can leave
> > that to future work.
> 
> Yes, I mention that in the cover letter.
> 
> > One random idea that did pop into my head was this:
> >
> > static inline const struct ... *
> > intel_info(struct drm_i915_private *dev_priv)
> > {
> > return _priv->info;
> > }
> > #define HAS_WHATEVER(dev_priv) (intel_info(dev_priv)->whatever)
> >
> > for some extra type safety. Any thoughts?
> 
> Sounds like a good idea to me. And it would be really easy to do, 
> localized to i915_drv.h, and then when the last INTEL_INFO(dev) gets 
> converted we can make it use the inline as well.
> 
> Regards,
> 
> Tvrtko

-- 
Ville Syrjälä
Intel OTC
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Remove the vma from the object list upon close

2016-11-04 Thread Chris Wilson
Currently, the vma is being unlink from the object lookup on destroy.
However, we are meant to be decoupling it upon close so that the user
cannot access the closed vma whilst it remains active on the GPU.

[   34.074858] kernel BUG at drivers/gpu/drm/i915/i915_gem_gtt.c:3561!
[   34.074875] invalid opcode:  [#1] PREEMPT SMP
[   34.074888] Modules linked in: snd_hda_intel i915 x86_pkg_temp_thermal 
coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel lpc_ich mei_me mei 
snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_codec 
snd_hwdep snd_hda_core i2c_designware_platform i2c_designware_core snd_pcm 
e1000e ptp pps_core sdhci_acpi sdhci mmc_core i2c_hid [last unloaded: i915]
[   34.075010] CPU: 1 PID: 6224 Comm: gem_close_race Tainted: G U  
4.9.0-rc3-CI-CI_DRM_1800+ #1
[   34.075034] Hardware name:  /NUC5i7RYB, BIOS 
RYBDWi35.86A.0355.2016.0224.1501 02/24/2016
[   34.075057] task: 8802459a8040 task.stack: c9524000
[   34.075074] RIP: 0010:[]  [] 
i915_gem_obj_lookup_or_create_vma+0x8c/0xc0 [i915]
[   34.075118] RSP: 0018:c9527b68  EFLAGS: 00010202
[   34.075135] RAX: 8802426c5e40 RBX:  RCX: 8802447fc2a8
[   34.075158] RDX:  RSI: 8802447fc2a8 RDI: 880248a4a880
[   34.075181] RBP: c9527b88 R08: 0008 R09: 
[   34.075203] R10: 0001 R11:  R12: 880248a4a880
[   34.075225] R13: 8802447fc2a8 R14: 880243e9afa8 R15: 880248a4a9c8
[   34.075248] FS:  7f9b43e59740() GS:880256c8() 
knlGS:
[   34.075273] CS:  0010 DS:  ES:  CR0: 80050033
[   34.075292] CR2: 7f9b43419140 CR3: 00024455d000 CR4: 003406e0
[   34.075314] Stack:
[   34.075323]   c9527bd0 880243cb8008 
880243e9afa8
[   34.075353]  c9527c08 a03874c7 c9527bb8 
880243e9afa8
[   34.075383]  880243e9afb0 c9527e10 8802447fc2a8 
880243cb8040
[   34.075414] Call Trace:
[   34.075435]  [] eb_lookup_vmas.isra.7+0x247/0x330 [i915]
[   34.075468]  [] 
i915_gem_do_execbuffer.isra.15+0x604/0x1a10 [i915]
[   34.075507]  [] ? i915_gem_object_get_sg+0x347/0x380 [i915]
[   34.075532]  [] ? __might_fault+0x3e/0x90
[   34.075562]  [] i915_gem_execbuffer2+0xc0/0x250 [i915]
[   34.075585]  [] drm_ioctl+0x1f6/0x480
[   34.075604]  [] ? trace_hardirqs_on_thunk+0x1a/0x1c
[   34.075635]  [] ? i915_gem_execbuffer+0x330/0x330 [i915]
[   34.075658]  [] do_vfs_ioctl+0x8e/0x690
[   34.075677]  [] ? _raw_spin_unlock_irqrestore+0x3d/0x60
[   34.075700]  [] ? SyS_timer_settime+0x141/0x1e0
[   34.075721]  [] ? trace_hardirqs_on_caller+0x122/0x1b0
[   34.075742]  [] SyS_ioctl+0x3c/0x70
[   34.075760]  [] entry_SYSCALL_64_fastpath+0x1c/0xb1
[   34.075781] Code: 44 a0 48 c7 c2 9a 7e 43 a0 be e0 0d 00 00 48 c7 c7 a0 45 
44 a0 e8 55 b8 ce e0 48 85 db 74 a3 49 83 bd f8 03 00 00 00 74 99 0f 0b <0f> 0b 
48 89 da 4c 89 ee 4c 89 e7 e8 04 a9 ff ff 48 89 da 49 89
[   34.075955] RIP  [] 
i915_gem_obj_lookup_or_create_vma+0x8c/0xc0 [i915]
[   34.075994]  RSP 

Testcase: igt/gem_close_race/basic-threads
Fixes: db6c2b4151f2 ("drm/i915: Store the vma in an rbtree...")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 52999e51a946..c5e77e040627 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3403,7 +3403,6 @@ void i915_vma_destroy(struct i915_vma *vma)
GEM_BUG_ON(!i915_vma_is_closed(vma));
GEM_BUG_ON(vma->fence);
 
-   rb_erase(>obj_node, >obj->vma_tree);
list_del(>vm_link);
if (!i915_vma_is_ggtt(vma))
i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
@@ -3416,7 +3415,9 @@ void i915_vma_close(struct i915_vma *vma)
GEM_BUG_ON(i915_vma_is_closed(vma));
vma->flags |= I915_VMA_CLOSED;
 
-   list_del_init(>obj_link);
+   list_del(>obj_link);
+   rb_erase(>obj_node, >obj->vma_tree);
+
if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
WARN_ON(i915_vma_unbind(vma));
 }
-- 
2.10.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/5] drm/i915: More assorted dev_priv cleanups

2016-11-04 Thread Tvrtko Ursulin


On 04/11/2016 15:32, Ville Syrjälä wrote:

On Fri, Nov 04, 2016 at 02:42:45PM +, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

A small selection of macros which can only accept dev_priv from
now on and a resulting trickle of fixups.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.h   | 27 ---
 drivers/gpu/drm/i915/i915_gpu_error.c |  2 +-
 drivers/gpu/drm/i915/i915_irq.c   |  6 +++---
 drivers/gpu/drm/i915/intel_crt.c  |  8 
 drivers/gpu/drm/i915/intel_display.c  |  4 ++--
 drivers/gpu/drm/i915/intel_dp.c   |  2 +-
 drivers/gpu/drm/i915/intel_hotplug.c  |  2 +-
 drivers/gpu/drm/i915/intel_psr.c  |  2 +-
 8 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 45a30f730216..6060e41d25e5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2901,28 +2901,25 @@ struct drm_i915_cmd_table {
 #define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \
 !(IS_I915G(dev_priv) || \
 IS_I915GM(dev_priv)))
-#define SUPPORTS_TV(dev)   (INTEL_INFO(dev)->supports_tv)
-#define I915_HAS_HOTPLUG(dev)   (INTEL_INFO(dev)->has_hotplug)
-
-#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2)
-#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)
-#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc)
+#define SUPPORTS_TV(dev_priv)  ((dev_priv)->info.supports_tv)
+#define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug)

+#define HAS_FW_BLC(dev_priv)   (INTEL_GEN(dev_priv) > 2)
+#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr)
+#define HAS_FBC(dev_priv)  ((dev_priv)->info.has_fbc)
 #define HAS_IPS(dev_priv)  (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))
-
-#define HAS_DP_MST(dev)(INTEL_INFO(dev)->has_dp_mst)
-
+#define HAS_DP_MST(dev_priv)   ((dev_priv)->info.has_dp_mst)
 #define HAS_DDI(dev_priv)  ((dev_priv)->info.has_ddi)
-#define HAS_FPGA_DBG_UNCLAIMED(dev)(INTEL_INFO(dev)->has_fpga_dbg)
-#define HAS_PSR(dev)   (INTEL_INFO(dev)->has_psr)
-#define HAS_RC6(dev)   (INTEL_INFO(dev)->has_rc6)
-#define HAS_RC6p(dev)  (INTEL_INFO(dev)->has_rc6p)
-
-#define HAS_CSR(dev)   (INTEL_INFO(dev)->has_csr)
+#define HAS_PSR(dev_priv)  ((dev_priv)->info.has_psr)
+#define HAS_RC6(dev_priv)  ((dev_priv)->info.has_rc6)
+#define HAS_RC6p(dev_priv) ((dev_priv)->info.has_rc6p)
+#define HAS_CSR(dev_priv)  ((dev_priv)->info.has_csr)

 #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm)
 #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc)

+#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg)


What's confusing me is this reordering of these macros. Was there a
particular reason for doing that?


Just because of its long name, so I pulled it out and separated so the 
alignment is nicer in the blocks above it.



Outside that it all looks pretty reasonable. Could got a bit further
with passing around dev_priv in some cases, but I guess we can leave
that to future work.


Yes, I mention that in the cover letter.


One random idea that did pop into my head was this:

static inline const struct ... *
intel_info(struct drm_i915_private *dev_priv)
{
return _priv->info;
}
#define HAS_WHATEVER(dev_priv) (intel_info(dev_priv)->whatever)

for some extra type safety. Any thoughts?


Sounds like a good idea to me. And it would be really easy to do, 
localized to i915_drv.h, and then when the last INTEL_INFO(dev) gets 
converted we can make it use the inline as well.


Regards,

Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v4 1/2] drm/i915/dp: Enable DP audio stall fix for gen9 platforms

2016-11-04 Thread Jani Nikula
On Wed, 26 Oct 2016, Dhinakaran Pandiyan  wrote:
> Enabling DP audio stall fix is necessary to play audio over DP HBR2. So,
> let's set this bit right before enabling the audio codec. Playing audio
> without setting this bit results in pipe FIFO underruns.
>
> This workaround is applicable only for audio sample rates up to 96kHz. For
> frequencies above 96kHz, this is insufficient and cdclk should be increased
> to at least 432 MHz, just like BDW. Since, the audio driver does not
> support sample rates > 48 kHz, we are safe with this fix for now.

Do we still need this patch now that these two have been pushed?

b30ce9e0552a drm/i915/dp: BDW cdclk fix for DP audio
9c7540241885 drm/i915/dp: Extend BDW DP audio workaround to GEN9 platforms

BR,
Jani.



>
> v2: Inlined the code change within hsw_audio_codec_enable() (Jani)
> Fixed the port clock typo
> Added TODO comment
> Signed-off-by: Dhinakaran Pandiyan 
> ---
>  drivers/gpu/drm/i915/i915_reg.h|  5 +
>  drivers/gpu/drm/i915/intel_audio.c | 30 +-
>  2 files changed, 34 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 00efaa1..76dac48 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6236,6 +6236,11 @@ enum {
>  #define SLICE_ECO_CHICKEN0   _MMIO(0x7308)
>  #define   PIXEL_MASK_CAMMING_DISABLE (1 << 14)
>  
> +#define _CHICKEN_TRANS_A 0x420C0
> +#define _CHICKEN_TRANS_B 0x420C4
> +#define CHICKEN_TRANS(tran) _MMIO_TRANS(tran, _CHICKEN_TRANS_A, 
> _CHICKEN_TRANS_B)
> +#define SPARE_13 (1<<13)
> +
>  /* WaCatErrorRejectionIssue */
>  #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG   _MMIO(0x9030)
>  #define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB(1<<11)
> diff --git a/drivers/gpu/drm/i915/intel_audio.c 
> b/drivers/gpu/drm/i915/intel_audio.c
> index 7093cfb..894f11e 100644
> --- a/drivers/gpu/drm/i915/intel_audio.c
> +++ b/drivers/gpu/drm/i915/intel_audio.c
> @@ -283,6 +283,8 @@ static void hsw_audio_codec_disable(struct intel_encoder 
> *encoder)
>  {
>   struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
>   struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
> + struct intel_crtc_state *crtc_config =  intel_crtc->config;
> + enum transcoder cpu_transcoder = crtc_config->cpu_transcoder;
>   enum pipe pipe = intel_crtc->pipe;
>   uint32_t tmp;
>  
> @@ -290,13 +292,21 @@ static void hsw_audio_codec_disable(struct 
> intel_encoder *encoder)
>  
>   mutex_lock(_priv->av_mutex);
>  
> + /*Disable DP audio stall fix for HBR2*/
> + if (IS_GEN9(dev_priv) && intel_crtc_has_dp_encoder(crtc_config) &&
> + crtc_config->port_clock >= 54) {
> + tmp = I915_READ(CHICKEN_TRANS(cpu_transcoder));
> + tmp &= ~SPARE_13;
> + I915_WRITE(CHICKEN_TRANS(cpu_transcoder), tmp);
> + }
> +
>   /* Disable timestamps */
>   tmp = I915_READ(HSW_AUD_CFG(pipe));
>   tmp &= ~AUD_CONFIG_N_VALUE_INDEX;
>   tmp |= AUD_CONFIG_N_PROG_ENABLE;
>   tmp &= ~AUD_CONFIG_UPPER_N_MASK;
>   tmp &= ~AUD_CONFIG_LOWER_N_MASK;
> - if (intel_crtc_has_dp_encoder(intel_crtc->config))
> + if (intel_crtc_has_dp_encoder(crtc_config))
>   tmp |= AUD_CONFIG_N_VALUE_INDEX;
>   I915_WRITE(HSW_AUD_CFG(pipe), tmp);
>  
> @@ -315,6 +325,8 @@ static void hsw_audio_codec_enable(struct drm_connector 
> *connector,
>  {
>   struct drm_i915_private *dev_priv = to_i915(connector->dev);
>   struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc);
> + struct intel_crtc_state *crtc_config =  intel_crtc->config;
> + enum transcoder cpu_transcoder = crtc_config->cpu_transcoder;
>   enum pipe pipe = intel_crtc->pipe;
>   enum port port = intel_encoder->port;
>   const uint8_t *eld = connector->eld;
> @@ -326,6 +338,22 @@ static void hsw_audio_codec_enable(struct drm_connector 
> *connector,
>  
>   mutex_lock(_priv->av_mutex);
>  
> + /* Enable DP audio stall fix for HBR2
> +  *
> +  * TODO: This workaround is applicable only for audio sample rates up
> +  * to 96kHz. For frequencies above 96kHz, this is insufficient and
> +  * cdclk should be increased to at least 432 MHz, just like BDW. Since,
> +  * the audio driver does not support sample rates > 48 kHz, we are safe
> +  * with this fix for now.
> +  */
> +
> + if (IS_GEN9(dev_priv) && intel_crtc_has_dp_encoder(crtc_config) &&
> + crtc_config->port_clock >= 54) {
> + tmp = I915_READ(CHICKEN_TRANS(cpu_transcoder));
> + tmp |= SPARE_13;
> + I915_WRITE(CHICKEN_TRANS(cpu_transcoder), tmp);
> + }
> +
>   /* Enable audio presence detect, invalidate ELD */
>   tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
>   tmp |= 

Re: [Intel-gfx] linux-next: manual merge of the mali-dp tree with the drm-misc tree

2016-11-04 Thread Liviu Dudau
On Fri, Nov 04, 2016 at 04:38:54PM +1100, Stephen Rothwell wrote:
> Hi Liviu,
> 
> On Thu, 3 Nov 2016 17:19:58 + Liviu Dudau  wrote:
> >
> > I have revamped the mali-dp tree and rebased it on the newer
> > version of drm-next (which includes the drm-misc change) and pushed the
> > updated patch in my tree.
> 
> Thanks for that.  However, several of the commits in your tree now have
> no Signed-off-by from you as the committer :-(

Brian Starkey is a co-maintainer for the Mali DP tree, so his Signed-off-by
alone should be good. Baoyou's patch is in my tree to stop him repeatedly
send me the same patch over and over again :) But yes, I will add my
Signed-off-by for that one.

Many thanks,
Liviu

> 
> -- 
> Cheers,
> Stephen Rothwell

-- 

| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---
¯\_(ツ)_/¯
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [v4,1/2] drm/i915/dp: BDW cdclk fix for DP audio (rev2)

2016-11-04 Thread Jani Nikula
On Wed, 02 Nov 2016, Patchwork  wrote:
> == Series Details ==
>
> Series: series starting with [v4,1/2] drm/i915/dp: BDW cdclk fix for DP audio 
> (rev2)
> URL   : https://patchwork.freedesktop.org/series/14688/
> State : warning
>
> == Summary ==
>
> Series 14688v2 Series without cover letter
> https://patchwork.freedesktop.org/api/1.0/series/14688/revisions/2/mbox/
>
> Test drv_module_reload_basic:
> pass   -> DMESG-WARN (fi-skl-6770hq)

LSPCON being cranky.

Pushed both to drm-intel-next-queued, thanks for the patches.

BR,
Jani.

>
> fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
> fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
> fi-bxt-t5700 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
> fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
> fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
> fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
> fi-hsw-4770r total:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
> fi-ilk-650   total:241  pass:187  dwarn:0   dfail:0   fail:0   skip:54 
> fi-ivb-3520m total:241  pass:218  dwarn:0   dfail:0   fail:0   skip:23 
> fi-ivb-3770  total:241  pass:218  dwarn:0   dfail:0   fail:0   skip:23 
> fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
> fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
> fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
> fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
> fi-skl-6770hqtotal:241  pass:226  dwarn:1   dfail:0   fail:0   skip:14 
> fi-snb-2520m total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 
> fi-snb-2600  total:241  pass:207  dwarn:0   dfail:0   fail:0   skip:34 
>
> bf6b989af8b0fde56a352d9005c97b2d8e3bbbe3 drm-intel-nightly: 
> 2016y-11m-02d-15h-44m-03s UTC integration manifest
> 6346fda drm/i915/dp: Extend BDW DP audio workaround to GEN9 platforms
> 884d02ce drm/i915/dp: BDW cdclk fix for DP audio
>
> == Logs ==
>
> For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2895/
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Jani Nikula, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ Fi.CI.BAT: failure for dev_priv cleanup continuation

2016-11-04 Thread Patchwork
== Series Details ==

Series: dev_priv cleanup continuation
URL   : https://patchwork.freedesktop.org/series/14844/
State : failure

== Summary ==

Series 14844v1 dev_priv cleanup continuation
https://patchwork.freedesktop.org/api/1.0/series/14844/revisions/1/mbox/

Test kms_busy:
Subgroup basic-flip-default-c:
pass   -> INCOMPLETE (fi-skl-6260u)

fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-ilk-650   total:241  pass:188  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770  total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u total:165  pass:159  dwarn:0   dfail:0   fail:0   skip:5  
fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
fi-skl-6770hqtotal:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600  total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 

f302fec3503812bb71a8f71511a0bd4f720d5091 drm-intel-nightly: 
2016y-11m-04d-11h-56m-52s UTC integration manifest
01f89f2 drm/i915: Convert i915_drv.c to INTEL_GEN
f1ccc0b drm/i915: Pass dev_priv to INTEL_INFO everywhere apart from the gen use
ef23459 drm/i915: Further assorted dev_priv cleanups
28a34c7 drm/i915: More assorted dev_priv cleanups
8c5b825 drm/i915: Assorted dev_priv cleanups

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2906/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/5] drm/i915: More assorted dev_priv cleanups

2016-11-04 Thread Ville Syrjälä
On Fri, Nov 04, 2016 at 02:42:45PM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> A small selection of macros which can only accept dev_priv from
> now on and a resulting trickle of fixups.
> 
> Signed-off-by: Tvrtko Ursulin 
> ---
>  drivers/gpu/drm/i915/i915_drv.h   | 27 ---
>  drivers/gpu/drm/i915/i915_gpu_error.c |  2 +-
>  drivers/gpu/drm/i915/i915_irq.c   |  6 +++---
>  drivers/gpu/drm/i915/intel_crt.c  |  8 
>  drivers/gpu/drm/i915/intel_display.c  |  4 ++--
>  drivers/gpu/drm/i915/intel_dp.c   |  2 +-
>  drivers/gpu/drm/i915/intel_hotplug.c  |  2 +-
>  drivers/gpu/drm/i915/intel_psr.c  |  2 +-
>  8 files changed, 25 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 45a30f730216..6060e41d25e5 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2901,28 +2901,25 @@ struct drm_i915_cmd_table {
>  #define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \
>!(IS_I915G(dev_priv) || \
>IS_I915GM(dev_priv)))
> -#define SUPPORTS_TV(dev) (INTEL_INFO(dev)->supports_tv)
> -#define I915_HAS_HOTPLUG(dev) (INTEL_INFO(dev)->has_hotplug)
> -
> -#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2)
> -#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)
> -#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc)
> +#define SUPPORTS_TV(dev_priv)((dev_priv)->info.supports_tv)
> +#define I915_HAS_HOTPLUG(dev_priv)   ((dev_priv)->info.has_hotplug)
>  
> +#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2)
> +#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr)
> +#define HAS_FBC(dev_priv)((dev_priv)->info.has_fbc)
>  #define HAS_IPS(dev_priv)(IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))
> -
> -#define HAS_DP_MST(dev)  (INTEL_INFO(dev)->has_dp_mst)
> -
> +#define HAS_DP_MST(dev_priv) ((dev_priv)->info.has_dp_mst)
>  #define HAS_DDI(dev_priv)((dev_priv)->info.has_ddi)
> -#define HAS_FPGA_DBG_UNCLAIMED(dev)  (INTEL_INFO(dev)->has_fpga_dbg)
> -#define HAS_PSR(dev) (INTEL_INFO(dev)->has_psr)
> -#define HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6)
> -#define HAS_RC6p(dev)(INTEL_INFO(dev)->has_rc6p)
> -
> -#define HAS_CSR(dev) (INTEL_INFO(dev)->has_csr)
> +#define HAS_PSR(dev_priv)((dev_priv)->info.has_psr)
> +#define HAS_RC6(dev_priv)((dev_priv)->info.has_rc6)
> +#define HAS_RC6p(dev_priv)   ((dev_priv)->info.has_rc6p)
> +#define HAS_CSR(dev_priv)((dev_priv)->info.has_csr)
>  
>  #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm)
>  #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc)
>  
> +#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg)

What's confusing me is this reordering of these macros. Was there a
particular reason for doing that?

Outside that it all looks pretty reasonable. Could got a bit further
with passing around dev_priv in some cases, but I guess we can leave
that to future work.


One random idea that did pop into my head was this:

static inline const struct ... *
intel_info(struct drm_i915_private *dev_priv)
{
return _priv->info;
}
#define HAS_WHATEVER(dev_priv) (intel_info(dev_priv)->whatever)

for some extra type safety. Any thoughts?

> +
>  /*
>   * For now, anything with a GuC requires uCode loading, and then supports
>   * command submission once loaded. But these are logically independent
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
> b/drivers/gpu/drm/i915/i915_gpu_error.c
> index d430b9441e6b..35b13f178b61 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -573,7 +573,7 @@ int i915_error_state_to_str(struct 
> drm_i915_error_state_buf *m,
>  pdev->subsystem_device);
>   err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
>  
> - if (HAS_CSR(dev)) {
> + if (HAS_CSR(dev_priv)) {
>   struct intel_csr *csr = _priv->csr;
>  
>   err_printf(m, "DMC loaded: %s\n",
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 6d7505b5c5e7..285ee1e4352a 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -3678,7 +3678,7 @@ static void i915_irq_preinstall(struct drm_device * dev)
>   struct drm_i915_private *dev_priv = to_i915(dev);
>   int pipe;
>  
> - if (I915_HAS_HOTPLUG(dev)) {
> + if (I915_HAS_HOTPLUG(dev_priv)) {
>   i915_hotplug_interrupt_update(dev_priv, 0x, 0);
>   I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
>   }
> @@ -3712,7 +3712,7 @@ static int i915_irq_postinstall(struct drm_device *dev)
>   I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
>   

[Intel-gfx] [maintainer-tools PATCH 1/2] dim: add a variable for nightly.conf

2016-11-04 Thread Jani Nikula
We'll change the name at some point, add some indirection, with a
generic variable name.

Signed-off-by: Jani Nikula 
---
 dim | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/dim b/dim
index 8e95cd82407f..6a23c868856c 100755
--- a/dim
+++ b/dim
@@ -92,6 +92,9 @@ addr_intel_gfx="intel-gfx@lists.freedesktop.org"
 addr_dri_devel="dri-de...@lists.freedesktop.org"
 addr_intel_qa="\"Christophe Prigent\" "
 
+# integration configuration
+integration_config=nightly.conf
+
 #
 # Command line options.
 #
@@ -163,7 +166,7 @@ if [ "$subcommand" != "setup" -a "$subcommand" != "help" -a 
"$subcommand" != "us
# Internal configuration that depends on a sane setup.
#
 
-   dim_branches=`(source $DIM_PREFIX/drm-intel-rerere/nightly.conf ; echo 
$nightly_branches) | \
+   dim_branches=`(source $DIM_PREFIX/drm-intel-rerere/$integration_config 
; echo $nightly_branches) | \
xargs -n 1 echo | grep '^origin' | sed -e 's/^origin\///'`
 fi
 
@@ -280,14 +283,16 @@ function dim_rebuild_nightly
 
cd $rerere
if [[ `git status --porcelain | grep -v "^[ ?][ ?]" | wc -l` -gt 0 ]]; 
then
-   warn_or_fail "-nightly configuration file not commited"
+   warn_or_fail "integration configuration file 
$integration_config not commited"
fi
 
-   echo -n "Updating rerere cache and nightly.conf... "
+   echo -n "Updating rerere cache... "
update_rerere_cache >& /dev/null
echo "Done."
 
-   source $rerere/nightly.conf
+   echo -n "Reloading $integration_config... "
+   source $rerere/$integration_config
+   echo "Done."
 
cd $DIM_PREFIX/$integration_branch
if ! git branch --list $integration_branch | grep '\*' >& /dev/null ; 
then
@@ -383,7 +388,6 @@ function dim_rebuild_nightly
 
update_linux_next
 }
-
 # push branch $1, rebuild nightly. the rest of the arguments are passed to git
 # push.
 function dim_push_branch
@@ -615,9 +619,9 @@ function dim_create_branch
git push $DRY_RUN $DIM_DRM_INTEL_REMOTE +$branch --set-upstream
cd $DIM_PREFIX/drm-intel-rerere
$DRY echo "nightly_branches=\"\$nightly_branches origin/$branch\"" \
->> nightly.conf
-   $DRY git add nightly.conf
-   $DRY git commit --quiet -m "Adding $branch to -nightly"
+>> $integration_config
+   $DRY git add $integration_config
+   $DRY git commit --quiet -m "Add $branch to $integration_config"
 }
 
 function dim_remove_branch
@@ -643,9 +647,9 @@ function dim_remove_branch
$DRY git fetch origin --prune
cd $DIM_PREFIX/drm-intel-rerere
full_branch="origin/$branch"
-   $DRY sed -e "/${full_branch//\//\\\/}/d" -i nightly.conf
-   $DRY git add nightly.conf
-   $DRY git commit --quiet -m "Deleted $branch and removed from -nightly"
+   $DRY sed -e "/${full_branch//\//\\\/}/d" -i $integration_config
+   $DRY git add $integration_config
+   $DRY git commit --quiet -m "Remove $branch from $integration_config"
 }
 
 function dim_cd
-- 
2.1.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [maintainer-tools PATCH 2/2] dim: switch to using remote agnostic integration branch config

2016-11-04 Thread Jani Nikula
NOTE: This change depends on nightly.conf changes that have been
committed earlier to the drm-intel-rerere repo. Looking at that first
makes this change more sensible.

Use two arrays to configure the repos and branches to be merged to the
integration branch:

drm_tip_repos

An associative array that maps repo names to urls. This is
mostly a convenience for defining the other array. The repo
names are symbolic, and not related to actual git remote names.
It's also helpful for implementing dim create-branch and
remove-branch.

drm_tip_config

An array of strings which describes the repos and branches to be
used to generate the integration branch. The repos are listed
using the symbolic repo names from the drm_tip_repos array. It's
also possible to list an override sha, in case there's a need to
hold back updating to the tip of the branch for some reason.

dim as a whole still remains dependent on specific (and configured)
remote names, but this change detaches nightly.conf from the remote
names.

Signed-off-by: Jani Nikula 
---
 dim | 98 ++---
 1 file changed, 72 insertions(+), 26 deletions(-)

diff --git a/dim b/dim
index 6a23c868856c..66ea0dd918b3 100755
--- a/dim
+++ b/dim
@@ -95,6 +95,27 @@ addr_intel_qa="\"Christophe Prigent\" 
"
 # integration configuration
 integration_config=nightly.conf
 
+function read_integration_config
+{
+   # clear everything first to allow configuration reload
+   unset drm_tip_repos drm_tip_config
+   declare -g -A drm_tip_repos
+   declare -g -a drm_tip_config
+
+   if [ -r $DIM_PREFIX/drm-intel-rerere/$integration_config ]; then
+   source $DIM_PREFIX/drm-intel-rerere/$integration_config
+   fi
+
+   dim_branches=
+   for conf in "${drm_tip_config[@]}"; do
+   read repo branch override <<< $conf
+   if [[ "$repo" = "drm-intel" ]]; then
+   dim_branches="$dim_branches $branch"
+   fi
+   done
+}
+read_integration_config
+
 #
 # Command line options.
 #
@@ -161,14 +182,30 @@ if [ "$subcommand" != "setup" -a "$subcommand" != "help" 
-a "$subcommand" != "us
exit 1
fi
done
+fi
 
-   #
-   # Internal configuration that depends on a sane setup.
-   #
+# get the remote name for url, depends on current repo
+function url_to_remote
+{
+   local url="$1"
 
-   dim_branches=`(source $DIM_PREFIX/drm-intel-rerere/$integration_config 
; echo $nightly_branches) | \
-   xargs -n 1 echo | grep '^origin' | sed -e 's/^origin\///'`
-fi
+   if [[ -z "$url" ]]; then
+   echoerr "$0 without url"
+   exit 1
+   fi
+
+   local remote=$(git remote -v | grep -m 1 "$url" | cut -f 1)
+
+   if [[ -z "$remote" ]]; then
+   echoerr "No git remote for url $url found in $(pwd)"
+   echoerr "Please set it up using:"
+   echoerr "$ git remote add  $url"
+   echoerr "with a name of your choice."
+   exit 1
+   fi
+
+   echo $remote
+}
 
 function dim_uptodate
 {
@@ -291,7 +328,7 @@ function dim_rebuild_nightly
echo "Done."
 
echo -n "Reloading $integration_config... "
-   source $rerere/$integration_config
+   read_integration_config
echo "Done."
 
cd $DIM_PREFIX/$integration_branch
@@ -300,7 +337,8 @@ function dim_rebuild_nightly
exit 1
fi
 
-   for remote in $(echo $nightly_branches | tr " " "\n" | sed 's|/.*$||g' 
| sort -u); do
+   for url in "${drm_tip_repos[@]}"; do
+   local remote=$(url_to_remote $url)
echo -n "Fetching $remote... "
# git fetch returns 128 if there's nothing to be fetched
git fetch $remote >& /dev/null || true
@@ -308,22 +346,17 @@ function dim_rebuild_nightly
done
 
# merge -fixes
-   for tree in $nightly_branches; do
-   local branch=${tree%:*}
-   local sha1=${tree#*:}
-   local name=${branch##*/}
-
-   # the : separator is optional
-   if [[ $sha1 == $tree ]] ; then
-   sha1=
-   fi
+   for conf in "${drm_tip_config[@]}"; do
+   read repo branch override <<< $conf
+   local url=${drm_tip_repos[$repo]}
+   local remote=$(url_to_remote $url)
+   local sha1=$remote/$branch
 
-   echo -n "Merging $branch... "
+   echo -n "Merging $repo (local remote $remote) $branch... "
 
-   if [[ -n $sha1 ]] ; then
+   if [[ -n "$override" ]]; then
+   sha1=$override
echo -n "Using override sha1: $sha1... "
-   else
-  

Re: [Intel-gfx] [PATCH 05/12] drm/i915/scheduler: Record all dependencies upon request construction

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 02:44:44PM +, Tvrtko Ursulin wrote:
> 
> On 03/11/2016 11:55, Chris Wilson wrote:
> >On Thu, Nov 03, 2016 at 11:03:47AM +, Tvrtko Ursulin wrote:
> >>
> >>On 02/11/2016 17:50, Chris Wilson wrote:
> >>>+struct i915_dependency {
> >>>+  struct i915_priotree *signal;
> >>>+  struct list_head pre_link, post_link;
> >>>+  unsigned long flags;
> >>>+#define I915_DEPENDENCY_ALLOC BIT(0)
> >>>+};
> >>>+
> >>>+struct i915_priotree {
> >>>+  struct list_head pre_list; /* who is before us, we depend upon */
> >>>+  struct list_head post_list; /* who is after us, they depend upon us */
> >>>+};
> >>
> >>I need a picture to imagine this data structure. :(
> >
> >The names suck.
> 
> When you wrote this I assumed you would respin shortly with some
> better names?

Not yet. I kind of like

struct i915_dependency {
struct i915_priotree *signaler;
struct list_head signaler_link;
struct list_head listener_link;
};

struct i915_priotree {
struct list_head signalers_list; /* before us, we depend on them */
struct list_head listeners_list; /* those after, who depend on us */
};

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH igt] igt/gem_exec_reloc: Check we write the full 64bit relocation

2016-11-04 Thread Chris Wilson
Recently a patch ran successfully through BAT that broke 64bit
relocations on a couple of machines. Oops. So lets add a very fast set
of tests to check basic relocation handling.

Signed-off-by: Chris Wilson 
---
 tests/gem_exec_reloc.c| 199 ++
 tests/intel-ci/fast-feedback.testlist |   3 +
 2 files changed, 202 insertions(+)

diff --git a/tests/gem_exec_reloc.c b/tests/gem_exec_reloc.c
index 5f898da..b541b38 100644
--- a/tests/gem_exec_reloc.c
+++ b/tests/gem_exec_reloc.c
@@ -28,6 +28,9 @@ IGT_TEST_DESCRIPTION("Basic sanity check of execbuf-ioctl 
relocations.");
 #define LOCAL_I915_EXEC_BSD_SHIFT  (13)
 #define LOCAL_I915_EXEC_BSD_MASK   (3 << LOCAL_I915_EXEC_BSD_SHIFT)
 
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
 #define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
 
 static uint32_t find_last_set(uint64_t x)
@@ -320,6 +323,193 @@ static void active(int fd, unsigned engine)
gem_close(fd, obj[0].handle);
 }
 
+static bool has_64bit_reloc(int fd)
+{
+   return intel_gen(intel_get_drm_devid(fd)) >= 8;
+}
+
+static void basic_cpu(int fd)
+{
+   struct drm_i915_gem_relocation_entry reloc;
+   struct drm_i915_gem_exec_object2 obj;
+   struct drm_i915_gem_execbuffer2 execbuf;
+   uint32_t bbe = MI_BATCH_BUFFER_END;
+   uint32_t trash;
+   uint64_t offset;
+   char *wc;
+
+   memset(, 0, sizeof(obj));
+
+   obj.handle = gem_create(fd, 4096);
+   obj.relocs_ptr = (uintptr_t)
+   obj.relocation_count = 1;
+   gem_write(fd, obj.handle, 0, , sizeof(bbe));
+
+   memset(, 0, sizeof(reloc));
+   reloc.offset = 4000;
+   reloc.target_handle = obj.handle;
+   reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+
+   memset(, 0, sizeof(execbuf));
+   execbuf.buffers_ptr = (uintptr_t)
+   execbuf.buffer_count = 1;
+
+   wc = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE);
+   offset = -1;
+   memcpy(wc + 4000, , sizeof(offset));
+
+   gem_set_domain(fd, obj.handle,
+  I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+   gem_execbuf(fd, );
+
+   offset = 0;
+   memcpy(, wc + 4000, has_64bit_reloc(fd) ? 8 : 4);
+   munmap(wc, 4096);
+
+   igt_assert_eq_u64(reloc.presumed_offset, offset);
+   igt_assert_eq_u64(obj.offset, offset);
+
+   /* Simulate relocation */
+   trash = obj.handle;
+   obj.handle = gem_create(fd, 4096);
+   gem_write(fd, obj.handle, 0, , sizeof(bbe));
+   reloc.target_handle = obj.handle;
+
+   wc = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE);
+   offset = -1;
+   memcpy(wc + 4000, , sizeof(offset));
+
+   gem_set_domain(fd, obj.handle,
+  I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+   gem_execbuf(fd, );
+
+   offset = 0;
+   memcpy(, wc + 4000, has_64bit_reloc(fd) ? 8 : 4);
+   munmap(wc, 4096);
+
+   igt_assert_eq_u64(reloc.presumed_offset, offset);
+   igt_assert_eq_u64(obj.offset, offset);
+
+   gem_close(fd, obj.handle);
+   gem_close(fd, trash);
+}
+
+static void basic_gtt(int fd)
+{
+   struct drm_i915_gem_relocation_entry reloc;
+   struct drm_i915_gem_exec_object2 obj;
+   struct drm_i915_gem_execbuffer2 execbuf;
+   uint32_t bbe = MI_BATCH_BUFFER_END;
+   uint64_t offset;
+   char *wc;
+
+   memset(, 0, sizeof(obj));
+
+   obj.handle = gem_create(fd, 4096);
+   obj.relocs_ptr = (uintptr_t)
+   obj.relocation_count = 1;
+   gem_write(fd, obj.handle, 0, , sizeof(bbe));
+
+   memset(, 0, sizeof(reloc));
+   reloc.offset = 4000;
+   reloc.target_handle = obj.handle;
+   reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+
+   memset(, 0, sizeof(execbuf));
+   execbuf.buffers_ptr = (uintptr_t)
+   execbuf.buffer_count = 1;
+
+   wc = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE);
+   offset = -1;
+   memcpy(wc + 4000, , sizeof(offset));
+
+   gem_set_domain(fd, obj.handle,
+  I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+   gem_execbuf(fd, );
+
+   offset = 0;
+   memcpy(, wc + 4000, has_64bit_reloc(fd) ? 8 : 4);
+
+   igt_assert_eq_u64(reloc.presumed_offset, offset);
+   igt_assert_eq_u64(obj.offset, offset);
+
+   offset = -1;
+   memcpy(wc + 4000, , sizeof(offset));
+
+   /* Simulate relocation */
+   obj.offset += 4096;
+   reloc.presumed_offset += 4096;
+   memcpy(wc + 4000, , has_64bit_reloc(fd) ? 8 : 4);
+
+   gem_execbuf(fd, );
+
+   offset = 0;
+   memcpy(, wc + 4000, has_64bit_reloc(fd) ? 8 : 4);
+   munmap(wc, 4096);
+
+   igt_assert_eq_u64(reloc.presumed_offset, offset);
+   igt_assert_eq_u64(obj.offset, offset);
+
+   gem_close(fd, obj.handle);
+}
+
+static void basic_noreloc(int fd)
+{
+   struct drm_i915_gem_relocation_entry 

[Intel-gfx] [PATCH 0/5] dev_priv cleanup continuation

2016-11-04 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

A few small patches towards the goal of getting rid of the
__I915__ polymorphism.

Series starts with three patches to convert some more IS/HAS macros to accepting
dev_priv only, and continues with a patch to make all users of INTEL_INFO pass
in dev_priv, apart from the ones which can be replaced with INTEL_GEN.

This leaves the disruptive conversion to the latter as the only remaining bit
before the __I915__ can be completely eliminated.

To start with that, last patch converts i915_drv.c, going with the idea to do
this gradually over time on a file by file basis.

When all this is done at some point in the future, we can also tackle the
opportunities to change some local function signatures to take dev_priv and so
make further cleanups where appropriate.

Tvrtko Ursulin (5):
  drm/i915: Assorted dev_priv cleanups
  drm/i915: More assorted dev_priv cleanups
  drm/i915: Further assorted dev_priv cleanups
  drm/i915: Pass dev_priv to INTEL_INFO everywhere apart from the gen
use
  drm/i915: Convert i915_drv.c to INTEL_GEN

 drivers/gpu/drm/i915/i915_drv.c| 18 
 drivers/gpu/drm/i915/i915_drv.h| 70 +++---
 drivers/gpu/drm/i915/i915_gem.c| 13 +++---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  6 +--
 drivers/gpu/drm/i915/i915_gem_gtt.c|  2 +-
 drivers/gpu/drm/i915/i915_gem_stolen.c |  3 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c|  3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c  |  4 +-
 drivers/gpu/drm/i915/i915_irq.c|  8 ++--
 drivers/gpu/drm/i915/intel_color.c | 31 ++---
 drivers/gpu/drm/i915/intel_crt.c   |  8 ++--
 drivers/gpu/drm/i915/intel_display.c   | 32 +++---
 drivers/gpu/drm/i915/intel_dp.c|  8 ++--
 drivers/gpu/drm/i915/intel_fbdev.c | 10 ++---
 drivers/gpu/drm/i915/intel_guc_loader.c| 10 ++---
 drivers/gpu/drm/i915/intel_hotplug.c   |  2 +-
 drivers/gpu/drm/i915/intel_pm.c|  7 +--
 drivers/gpu/drm/i915/intel_psr.c   |  2 +-
 18 files changed, 118 insertions(+), 119 deletions(-)

-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 5/5] drm/i915: Convert i915_drv.c to INTEL_GEN

2016-11-04 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 35940192e569..096c368bda0b 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -374,12 +374,12 @@ static int
 intel_alloc_mchbar_resource(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
-   int reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915;
+   int reg = INTEL_GEN(dev_priv) >= 4 ? MCHBAR_I965 : MCHBAR_I915;
u32 temp_lo, temp_hi = 0;
u64 mchbar_addr;
int ret;
 
-   if (INTEL_INFO(dev)->gen >= 4)
+   if (INTEL_GEN(dev_priv) >= 4)
pci_read_config_dword(dev_priv->bridge_dev, reg + 4, _hi);
pci_read_config_dword(dev_priv->bridge_dev, reg, _lo);
mchbar_addr = ((u64)temp_hi << 32) | temp_lo;
@@ -406,7 +406,7 @@ intel_alloc_mchbar_resource(struct drm_device *dev)
return ret;
}
 
-   if (INTEL_INFO(dev)->gen >= 4)
+   if (INTEL_GEN(dev_priv) >= 4)
pci_write_config_dword(dev_priv->bridge_dev, reg + 4,
   upper_32_bits(dev_priv->mch_res.start));
 
@@ -420,7 +420,7 @@ static void
 intel_setup_mchbar(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
-   int mchbar_reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915;
+   int mchbar_reg = INTEL_GEN(dev_priv) >= 4 ? MCHBAR_I965 : MCHBAR_I915;
u32 temp;
bool enabled;
 
@@ -460,7 +460,7 @@ static void
 intel_teardown_mchbar(struct drm_device *dev)
 {
struct drm_i915_private *dev_priv = to_i915(dev);
-   int mchbar_reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915;
+   int mchbar_reg = INTEL_GEN(dev_priv) >= 4 ? MCHBAR_I965 : MCHBAR_I915;
 
if (dev_priv->mchbar_need_disable) {
if (IS_I915G(dev_priv) || IS_I915GM(dev_priv)) {
@@ -879,7 +879,7 @@ static int i915_mmio_setup(struct drm_device *dev)
 * the register BAR remains the same size for all the earlier
 * generations up to Ironlake.
 */
-   if (INTEL_INFO(dev)->gen < 5)
+   if (INTEL_GEN(dev_priv) < 5)
mmio_size = 512 * 1024;
else
mmio_size = 2 * 1024 * 1024;
@@ -1512,7 +1512,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, 
bool hibernation)
 * Fujitsu FSC S7110
 * Acer Aspire 1830T
 */
-   if (!(hibernation && INTEL_INFO(dev_priv)->gen < 6))
+   if (!(hibernation && INTEL_GEN(dev_priv) < 6))
pci_set_power_state(pdev, PCI_D3hot);
 
dev_priv->suspended_to_idle = suspend_to_idle(dev_priv);
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] shmem: Support for registration of driver/file owner specific ops

2016-11-04 Thread akash . goel
From: Chris Wilson 

This provides support for the drivers or shmem file owners to register
a set of callbacks, which can be invoked from the address space
operations methods implemented by shmem.  This allow the file owners to
hook into the shmem address space operations to do some extra/custom
operations in addition to the default ones.

The private_data field of address_space struct is used to store the
pointer to driver specific ops.  Currently only one ops field is defined,
which is migratepage, but can be extended on an as-needed basis.

The need for driver specific operations arises since some of the
operations (like migratepage) may not be handled completely within shmem,
so as to be effective, and would need some driver specific handling also.
Specifically, i915.ko would like to participate in migratepage().
i915.ko uses shmemfs to provide swappable backing storage for its user
objects, but when those objects are in use by the GPU it must pin the
entire object until the GPU is idle.  As a result, large chunks of memory
can be arbitrarily withdrawn from page migration, resulting in premature
out-of-memory due to fragmentation.  However, if i915.ko can receive the
migratepage() request, it can then flush the object from the GPU, remove
its pin and thus enable the migration.

Since gfx allocations are one of the major consumer of system memory, its
imperative to have such a mechanism to effectively deal with
fragmentation.  And therefore the need for such a provision for initiating
driver specific actions during address space operations.

v2:
- Drop dev_ prefix from the members of shmem_dev_info structure. (Joonas)
- Change the return type of shmem_set_device_op() to void and remove the
  check for pre-existing data. (Joonas)
- Rename shmem_set_device_op() to shmem_set_dev_info() to be consistent
  with shmem_dev_info structure. (Joonas)

Cc: Hugh Dickins 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.linux.org
Signed-off-by: Sourab Gupta 
Signed-off-by: Akash Goel 
Reviewed-by: Chris Wilson 
---
 include/linux/shmem_fs.h | 13 +
 mm/shmem.c   | 17 -
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index ff078e7..454c3ba 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -39,11 +39,24 @@ struct shmem_sb_info {
unsigned long shrinklist_len; /* Length of shrinklist */
 };
 
+struct shmem_dev_info {
+   void *private_data;
+   int (*migratepage)(struct address_space *mapping,
+  struct page *newpage, struct page *page,
+  enum migrate_mode mode, void *dev_priv_data);
+};
+
 static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 {
return container_of(inode, struct shmem_inode_info, vfs_inode);
 }
 
+static inline void shmem_set_dev_info(struct address_space *mapping,
+ struct shmem_dev_info *info)
+{
+   mapping->private_data = info;
+}
+
 /*
  * Functions in mm/shmem.c called directly from elsewhere:
  */
diff --git a/mm/shmem.c b/mm/shmem.c
index ad7813d..fce8de3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1290,6 +1290,21 @@ static int shmem_writepage(struct page *page, struct 
writeback_control *wbc)
return 0;
 }
 
+#ifdef CONFIG_MIGRATION
+static int shmem_migratepage(struct address_space *mapping,
+struct page *newpage, struct page *page,
+enum migrate_mode mode)
+{
+   struct shmem_dev_info *dev_info = mapping->private_data;
+
+   if (dev_info && dev_info->migratepage)
+   return dev_info->migratepage(mapping, newpage, page,
+mode, dev_info->private_data);
+
+   return migrate_page(mapping, newpage, page, mode);
+}
+#endif
+
 #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
 static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
 {
@@ -3654,7 +3669,7 @@ static void shmem_destroy_inodecache(void)
.write_end  = shmem_write_end,
 #endif
 #ifdef CONFIG_MIGRATION
-   .migratepage= migrate_page,
+   .migratepage= shmem_migratepage,
 #endif
.error_remove_page = generic_error_remove_page,
 };
-- 
1.9.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915: Make GPU pages movable

2016-11-04 Thread akash . goel
From: Chris Wilson 

On a long run of more than 2-3 days, physical memory tends to get
fragmented severely, which considerably slows down the system. In such a
scenario, the shrinker is also unable to help as lack of memory is not
the actual problem, since it has been observed that there are enough free
pages of 0 order. This also manifests itself when an indiviual zone in
the mm runs out of pages and if we cannot migrate pages between zones,
the kernel hits an out-of-memory even though there are free pages (and
often all of swap) available.

To address the issue of external fragementation, kernel does a compaction
(which involves migration of pages) but it's efficacy depends upon how
many pages are marked as MOVABLE, as only those pages can be migrated.

Currently the backing pages for GPU buffers are allocated from shmemfs
with GFP_RECLAIMABLE flag, in units of 4KB pages.  In the case of limited
swap space, it may not be possible always to reclaim or swap-out pages of
all the inactive objects, to make way for free space allowing formation
of higher order groups of physically-contiguous pages on compaction.

Just marking the GPU pages as MOVABLE will not suffice, as i915.ko has to
pin the pages if they are in use by GPU, which will prevent their
migration. So the migratepage callback in shmem is also hooked up to get
a notification when kernel initiates the page migration. On the
notification, i915.ko appropriately unpin the pages.  With this we can
effectively mark the GPU pages as MOVABLE and hence mitigate the
fragmentation problem.

v2:
 - Rename the migration routine to gem_shrink_migratepage, move it to the
   shrinker file, and use the existing constructs (Chris)
 - To cleanup, add a new helper function to encapsulate all page migration
   skip conditions (Chris)
 - Add a new local helper function in shrinker file, for dropping the
   backing pages, and call the same from gem_shrink() also (Chris)

v3:
 - Fix/invert the check on the return value of unsafe_drop_pages (Chris)

v4:
 - Minor tidy

v5:
 - Fix unsafe usage of unsafe_drop_pages()
 - Rebase onto vmap-notifier

v6:
- Remove i915_gem_object_get/put across unsafe_drop_pages() as with
  struct_mutex protection object can't disappear. (Chris)

Testcase: igt/gem_shrink
Bugzilla: (e.g.) https://bugs.freedesktop.org/show_bug.cgi?id=90254
Cc: Hugh Dickins 
Cc: linux...@kvack.org
Signed-off-by: Sourab Gupta 
Signed-off-by: Akash Goel 
Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
Reviewed-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_gem.c  |   9 ++-
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 132 +++
 3 files changed, 142 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4735b417..7f2717b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1357,6 +1357,8 @@ struct intel_l3_parity {
 };
 
 struct i915_gem_mm {
+   struct shmem_dev_info shmem_info;
+
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
/** Protects the usage of the GTT stolen memory allocator. This is
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1f995ce..f0d4ce7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2164,6 +2164,7 @@ void __i915_gem_object_invalidate(struct 
drm_i915_gem_object *obj)
if (obj->mm.madv == I915_MADV_WILLNEED)
mark_page_accessed(page);
 
+   set_page_private(page, 0);
put_page(page);
}
obj->mm.dirty = false;
@@ -2310,6 +2311,7 @@ static unsigned int swiotlb_max_size(void)
sg->length += PAGE_SIZE;
}
last_pfn = page_to_pfn(page);
+   set_page_private(page, (unsigned long)obj);
 
/* Check that the i965g/gm workaround works. */
WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x0010UL));
@@ -2334,8 +2336,10 @@ static unsigned int swiotlb_max_size(void)
 
 err_pages:
sg_mark_end(sg);
-   for_each_sgt_page(page, sgt_iter, st)
+   for_each_sgt_page(page, sgt_iter, st) {
+   set_page_private(page, 0);
put_page(page);
+   }
sg_free_table(st);
kfree(st);
 
@@ -4185,6 +4189,8 @@ struct drm_i915_gem_object *
goto fail;
 
mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+   if (IS_ENABLED(MIGRATION))
+   mask |= __GFP_MOVABLE;
if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) {
/* 965gm cannot relocate objects above 4GiB. */
mask &= ~__GFP_HIGHMEM;
@@ -4193,6 

Re: [Intel-gfx] [PATCH 05/12] drm/i915/scheduler: Record all dependencies upon request construction

2016-11-04 Thread Tvrtko Ursulin


On 03/11/2016 11:55, Chris Wilson wrote:

On Thu, Nov 03, 2016 at 11:03:47AM +, Tvrtko Ursulin wrote:


On 02/11/2016 17:50, Chris Wilson wrote:

The scheduler needs to know the dependencies of each request for the
lifetime of the request, as it may choose to reschedule the requests at
any time and must ensure the dependency tree is not broken. This is in
additional to using the fence to only allow execution after all
dependencies have been completed.

One option was to extend the fence to support the bidirectional
dependency tracking required by the scheduler. However the mismatch in
lifetimes between the submit fence and the request essentially meant
that we had to build a completely separate struct (and we could not
simply reuse the existing waitqueue in the fence for one half of the
dependency tracking). The extra dependency tracking simply did not mesh
well with the fence, and keeping it separate both keeps the fence
implementation simpler and allows us to extend the dependency tracking
into a priority tree (whilst maintaining support for reordering the
tree).

To avoid the additional allocations and list manipulations, the use of
the priotree is disabled when there are no schedulers to use it.

Signed-off-by: Chris Wilson 
---
drivers/gpu/drm/i915/i915_gem_request.c | 72 -
drivers/gpu/drm/i915/i915_gem_request.h | 23 +++
2 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c 
b/drivers/gpu/drm/i915/i915_gem_request.c
index 9c8605c834f9..13090f226203 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -113,6 +113,59 @@ i915_gem_request_remove_from_client(struct 
drm_i915_gem_request *request)
spin_unlock(_priv->mm.lock);
}

+static int
+i915_priotree_add_dependency(struct i915_priotree *pt,
+struct i915_priotree *signal,
+struct i915_dependency *dep)
+{
+   unsigned long flags = 0;
+
+   if (!dep) {
+   dep = kmalloc(sizeof(*dep), GFP_KERNEL);


I will mention a dedicated cache again since this could possibly be
our hottest allocation path. With a dedicated slab I've seen it grow
to 5-7k objects in some benchmarks, with the request slab around 1k
at the same time.


I'm open to one. We allocate more of these than we do even for fences. I
was thinking it could be added later, but if we can the api to always
pass in the i915_dependency it will probably work better.



+   if (!dep)
+   return -ENOMEM;
+
+   flags |= I915_DEPENDENCY_ALLOC;
+   }


Not sure if it would be any nicer to just set the flags after
allocating to I915_DEPENDENCY_ALLOC and add an else path to set it
to zero here.


I just tend to avoid if {} else {} if I can help, just a personal
preference.


+struct i915_dependency {
+   struct i915_priotree *signal;
+   struct list_head pre_link, post_link;
+   unsigned long flags;
+#define I915_DEPENDENCY_ALLOC BIT(0)
+};
+
+struct i915_priotree {
+   struct list_head pre_list; /* who is before us, we depend upon */
+   struct list_head post_list; /* who is after us, they depend upon us */
+};


I need a picture to imagine this data structure. :(


The names suck.


When you wrote this I assumed you would respin shortly with some better 
names?


I tried to grasp it one more time since then but keep getting lost. :I

Regards,

Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 4/5] drm/i915: Pass dev_priv to INTEL_INFO everywhere apart from the gen use

2016-11-04 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

After this patch only conversion of INTEL_INFO(p)->gen to
INTEL_GEN(dev_priv) remains before the __I915__ macro can
be removed.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.c  |  4 ++--
 drivers/gpu/drm/i915/i915_gem_gtt.c  |  2 +-
 drivers/gpu/drm/i915/intel_color.c   | 31 ++-
 drivers/gpu/drm/i915/intel_display.c | 28 +---
 drivers/gpu/drm/i915/intel_fbdev.c   | 10 +-
 drivers/gpu/drm/i915/intel_pm.c  |  7 ---
 6 files changed, 39 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 79cea49183b3..35940192e569 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -150,7 +150,7 @@ static void intel_detect_pch(struct drm_device *dev)
/* In all current cases, num_pipes is equivalent to the PCH_NOP setting
 * (which really amounts to a PCH but no South Display).
 */
-   if (INTEL_INFO(dev)->num_pipes == 0) {
+   if (INTEL_INFO(dev_priv)->num_pipes == 0) {
dev_priv->pch_type = PCH_NOP;
return;
}
@@ -607,7 +607,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
intel_modeset_gem_init(dev);
 
-   if (INTEL_INFO(dev)->num_pipes == 0)
+   if (INTEL_INFO(dev_priv)->num_pipes == 0)
return 0;
 
ret = intel_fbdev_init(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index cad6de65947d..b98f11735c5b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -714,7 +714,7 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
  */
 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
 {
-   ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
+   ppgtt->pd_dirty_rings = INTEL_INFO(to_i915(ppgtt->base.dev))->ring_mask;
 }
 
 /* Removes entries from a single page table, releasing it if it's empty.
diff --git a/drivers/gpu/drm/i915/intel_color.c 
b/drivers/gpu/drm/i915/intel_color.c
index 445108855275..3784940a4e7a 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -345,11 +345,10 @@ static void haswell_load_luts(struct drm_crtc_state 
*crtc_state)
 static void broadwell_load_luts(struct drm_crtc_state *state)
 {
struct drm_crtc *crtc = state->crtc;
-   struct drm_device *dev = crtc->dev;
-   struct drm_i915_private *dev_priv = to_i915(dev);
+   struct drm_i915_private *dev_priv = to_i915(crtc->dev);
struct intel_crtc_state *intel_state = to_intel_crtc_state(state);
enum pipe pipe = to_intel_crtc(crtc)->pipe;
-   uint32_t i, lut_size = INTEL_INFO(dev)->color.degamma_lut_size;
+   uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size;
 
if (crtc_state_is_legacy(state)) {
haswell_load_luts(state);
@@ -428,8 +427,7 @@ static void broadwell_load_luts(struct drm_crtc_state 
*state)
 static void cherryview_load_luts(struct drm_crtc_state *state)
 {
struct drm_crtc *crtc = state->crtc;
-   struct drm_device *dev = crtc->dev;
-   struct drm_i915_private *dev_priv = to_i915(dev);
+   struct drm_i915_private *dev_priv = to_i915(crtc->dev);
enum pipe pipe = to_intel_crtc(crtc)->pipe;
struct drm_color_lut *lut;
uint32_t i, lut_size;
@@ -446,7 +444,7 @@ static void cherryview_load_luts(struct drm_crtc_state 
*state)
 
if (state->degamma_lut) {
lut = (struct drm_color_lut *) state->degamma_lut->data;
-   lut_size = INTEL_INFO(dev)->color.degamma_lut_size;
+   lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size;
for (i = 0; i < lut_size; i++) {
/* Write LUT in U0.14 format. */
word0 =
@@ -461,7 +459,7 @@ static void cherryview_load_luts(struct drm_crtc_state 
*state)
 
if (state->gamma_lut) {
lut = (struct drm_color_lut *) state->gamma_lut->data;
-   lut_size = INTEL_INFO(dev)->color.gamma_lut_size;
+   lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size;
for (i = 0; i < lut_size; i++) {
/* Write LUT in U0.10 format. */
word0 =
@@ -497,12 +495,12 @@ void intel_color_load_luts(struct drm_crtc_state 
*crtc_state)
 int intel_color_check(struct drm_crtc *crtc,
  struct drm_crtc_state *crtc_state)
 {
-   struct drm_device *dev = crtc->dev;
+   struct drm_i915_private *dev_priv = to_i915(crtc->dev);
size_t gamma_length, degamma_length;
 
-   degamma_length = INTEL_INFO(dev)->color.degamma_lut_size *
+   degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size *
sizeof(struct drm_color_lut);
-   

[Intel-gfx] [PATCH 2/5] drm/i915: More assorted dev_priv cleanups

2016-11-04 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

A small selection of macros which can only accept dev_priv from
now on and a resulting trickle of fixups.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.h   | 27 ---
 drivers/gpu/drm/i915/i915_gpu_error.c |  2 +-
 drivers/gpu/drm/i915/i915_irq.c   |  6 +++---
 drivers/gpu/drm/i915/intel_crt.c  |  8 
 drivers/gpu/drm/i915/intel_display.c  |  4 ++--
 drivers/gpu/drm/i915/intel_dp.c   |  2 +-
 drivers/gpu/drm/i915/intel_hotplug.c  |  2 +-
 drivers/gpu/drm/i915/intel_psr.c  |  2 +-
 8 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 45a30f730216..6060e41d25e5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2901,28 +2901,25 @@ struct drm_i915_cmd_table {
 #define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \
 !(IS_I915G(dev_priv) || \
 IS_I915GM(dev_priv)))
-#define SUPPORTS_TV(dev)   (INTEL_INFO(dev)->supports_tv)
-#define I915_HAS_HOTPLUG(dev)   (INTEL_INFO(dev)->has_hotplug)
-
-#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2)
-#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)
-#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc)
+#define SUPPORTS_TV(dev_priv)  ((dev_priv)->info.supports_tv)
+#define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug)
 
+#define HAS_FW_BLC(dev_priv)   (INTEL_GEN(dev_priv) > 2)
+#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr)
+#define HAS_FBC(dev_priv)  ((dev_priv)->info.has_fbc)
 #define HAS_IPS(dev_priv)  (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))
-
-#define HAS_DP_MST(dev)(INTEL_INFO(dev)->has_dp_mst)
-
+#define HAS_DP_MST(dev_priv)   ((dev_priv)->info.has_dp_mst)
 #define HAS_DDI(dev_priv)  ((dev_priv)->info.has_ddi)
-#define HAS_FPGA_DBG_UNCLAIMED(dev)(INTEL_INFO(dev)->has_fpga_dbg)
-#define HAS_PSR(dev)   (INTEL_INFO(dev)->has_psr)
-#define HAS_RC6(dev)   (INTEL_INFO(dev)->has_rc6)
-#define HAS_RC6p(dev)  (INTEL_INFO(dev)->has_rc6p)
-
-#define HAS_CSR(dev)   (INTEL_INFO(dev)->has_csr)
+#define HAS_PSR(dev_priv)  ((dev_priv)->info.has_psr)
+#define HAS_RC6(dev_priv)  ((dev_priv)->info.has_rc6)
+#define HAS_RC6p(dev_priv) ((dev_priv)->info.has_rc6p)
+#define HAS_CSR(dev_priv)  ((dev_priv)->info.has_csr)
 
 #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm)
 #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc)
 
+#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg)
+
 /*
  * For now, anything with a GuC requires uCode loading, and then supports
  * command submission once loaded. But these are logically independent
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index d430b9441e6b..35b13f178b61 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -573,7 +573,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf 
*m,
   pdev->subsystem_device);
err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
 
-   if (HAS_CSR(dev)) {
+   if (HAS_CSR(dev_priv)) {
struct intel_csr *csr = _priv->csr;
 
err_printf(m, "DMC loaded: %s\n",
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6d7505b5c5e7..285ee1e4352a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3678,7 +3678,7 @@ static void i915_irq_preinstall(struct drm_device * dev)
struct drm_i915_private *dev_priv = to_i915(dev);
int pipe;
 
-   if (I915_HAS_HOTPLUG(dev)) {
+   if (I915_HAS_HOTPLUG(dev_priv)) {
i915_hotplug_interrupt_update(dev_priv, 0x, 0);
I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
}
@@ -3712,7 +3712,7 @@ static int i915_irq_postinstall(struct drm_device *dev)
I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
I915_USER_INTERRUPT;
 
-   if (I915_HAS_HOTPLUG(dev)) {
+   if (I915_HAS_HOTPLUG(dev_priv)) {
i915_hotplug_interrupt_update(dev_priv, 0x, 0);
POSTING_READ(PORT_HOTPLUG_EN);
 
@@ -3880,7 +3880,7 @@ static void i915_irq_uninstall(struct drm_device * dev)
struct drm_i915_private *dev_priv = to_i915(dev);
int pipe;
 
-   if (I915_HAS_HOTPLUG(dev)) {
+   if (I915_HAS_HOTPLUG(dev_priv)) {
i915_hotplug_interrupt_update(dev_priv, 0x, 0);
I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
}
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 30eb95b54dcf..fed61958ffd4 100644
--- 

[Intel-gfx] [PATCH 3/5] drm/i915: Further assorted dev_priv cleanups

2016-11-04 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

A small selection of macros which can only accept dev_priv from
now on and a resulting trickle of fixups.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.h| 12 ++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 +-
 drivers/gpu/drm/i915/i915_irq.c|  2 +-
 drivers/gpu/drm/i915/intel_guc_loader.c| 10 +-
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6060e41d25e5..f392b0fb9b86 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2700,7 +2700,7 @@ struct drm_i915_cmd_table {
 #define INTEL_DEVID(dev_priv)  ((dev_priv)->info.device_id)
 
 #define REVID_FOREVER  0xff
-#define INTEL_REVID(p) (__I915__(p)->drm.pdev->revision)
+#define INTEL_REVID(dev_priv)  ((dev_priv)->drm.pdev->revision)
 
 #define GEN_FOREVER (0)
 /*
@@ -2925,13 +2925,13 @@ struct drm_i915_cmd_table {
  * command submission once loaded. But these are logically independent
  * properties, so we have separate macros to test them.
  */
-#define HAS_GUC(dev)   (INTEL_INFO(dev)->has_guc)
-#define HAS_GUC_UCODE(dev) (HAS_GUC(dev))
-#define HAS_GUC_SCHED(dev) (HAS_GUC(dev))
+#define HAS_GUC(dev_priv)  ((dev_priv)->info.has_guc)
+#define HAS_GUC_UCODE(dev_priv)(HAS_GUC(dev_priv))
+#define HAS_GUC_SCHED(dev_priv)(HAS_GUC(dev_priv))
 
-#define HAS_RESOURCE_STREAMER(dev) (INTEL_INFO(dev)->has_resource_streamer)
+#define HAS_RESOURCE_STREAMER(dev_priv) 
((dev_priv)->info.has_resource_streamer)
 
-#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)->has_pooled_eu)
+#define HAS_POOLED_EU(dev_priv)((dev_priv)->info.has_pooled_eu)
 
 #define INTEL_PCH_DEVICE_ID_MASK   0xff00
 #define INTEL_PCH_IBX_DEVICE_ID_TYPE   0x3b00
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9c7d9c88d879..f98921174161 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1616,7 +1616,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
 
if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
-   if (!HAS_RESOURCE_STREAMER(dev)) {
+   if (!HAS_RESOURCE_STREAMER(dev_priv)) {
DRM_DEBUG("RS is only allowed for Haswell, Gen8 and 
above\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 285ee1e4352a..cb8a75f6ca16 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -4145,7 +4145,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
INIT_WORK(_priv->rps.work, gen6_pm_rps_work);
INIT_WORK(_priv->l3_parity.error_work, ivybridge_parity_work);
 
-   if (HAS_GUC_SCHED(dev))
+   if (HAS_GUC_SCHED(dev_priv))
dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
 
/* Let's track the enabled rps events */
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c 
b/drivers/gpu/drm/i915/intel_guc_loader.c
index 1aa85236b788..34d6ad2cf7c1 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -566,7 +566,7 @@ int intel_guc_setup(struct drm_device *dev)
ret = 0;
}
 
-   if (err == 0 && !HAS_GUC_UCODE(dev))
+   if (err == 0 && !HAS_GUC_UCODE(dev_priv))
;   /* Don't mention the GuC! */
else if (err == 0)
DRM_INFO("GuC firmware load skipped\n");
@@ -725,18 +725,18 @@ void intel_guc_init(struct drm_device *dev)
struct intel_guc_fw *guc_fw = _priv->guc.guc_fw;
const char *fw_path;
 
-   if (!HAS_GUC(dev)) {
+   if (!HAS_GUC(dev_priv)) {
i915.enable_guc_loading = 0;
i915.enable_guc_submission = 0;
} else {
/* A negative value means "use platform default" */
if (i915.enable_guc_loading < 0)
-   i915.enable_guc_loading = HAS_GUC_UCODE(dev);
+   i915.enable_guc_loading = HAS_GUC_UCODE(dev_priv);
if (i915.enable_guc_submission < 0)
-   i915.enable_guc_submission = HAS_GUC_SCHED(dev);
+   i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv);
}
 
-   if (!HAS_GUC_UCODE(dev)) {
+   if (!HAS_GUC_UCODE(dev_priv)) {
fw_path = NULL;
} else if (IS_SKYLAKE(dev_priv)) {
fw_path = I915_SKL_GUC_UCODE;
-- 
2.7.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/5] drm/i915: Assorted dev_priv cleanups

2016-11-04 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

A small selection of macros which can only accept dev_priv from
now on and a resulting trickle of fixups.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_drv.h| 31 --
 drivers/gpu/drm/i915/i915_gem.c| 13 +++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  4 ++--
 drivers/gpu/drm/i915/i915_gem_stolen.c |  3 ++-
 drivers/gpu/drm/i915/i915_gem_userptr.c|  3 ++-
 drivers/gpu/drm/i915/i915_gpu_error.c  |  2 +-
 drivers/gpu/drm/i915/intel_dp.c|  6 +++---
 7 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4735b4177100..45a30f730216 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2851,28 +2851,31 @@ struct drm_i915_cmd_table {
 #define ALL_ENGINES(~0)
 
 #define HAS_ENGINE(dev_priv, id) \
-   (!!(INTEL_INFO(dev_priv)->ring_mask & ENGINE_MASK(id)))
+   (!!((dev_priv)->info.ring_mask & ENGINE_MASK(id)))
 
 #define HAS_BSD(dev_priv)  HAS_ENGINE(dev_priv, VCS)
 #define HAS_BSD2(dev_priv) HAS_ENGINE(dev_priv, VCS2)
 #define HAS_BLT(dev_priv)  HAS_ENGINE(dev_priv, BCS)
 #define HAS_VEBOX(dev_priv)HAS_ENGINE(dev_priv, VECS)
 
-#define HAS_LLC(dev)   (INTEL_INFO(dev)->has_llc)
-#define HAS_SNOOP(dev) (INTEL_INFO(dev)->has_snoop)
-#define HAS_EDRAM(dev) (!!(__I915__(dev)->edram_cap & EDRAM_ENABLED))
+#define HAS_LLC(dev_priv)  ((dev_priv)->info.has_llc)
+#define HAS_SNOOP(dev_priv)((dev_priv)->info.has_snoop)
+#define HAS_EDRAM(dev_priv)(!!((dev_priv)->edram_cap & EDRAM_ENABLED))
 #define HAS_WT(dev_priv)   ((IS_HASWELL(dev_priv) || \
 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
-#define HWS_NEEDS_PHYSICAL(dev)(INTEL_INFO(dev)->hws_needs_physical)
 
-#define HAS_HW_CONTEXTS(dev)   (INTEL_INFO(dev)->has_hw_contexts)
-#define HAS_LOGICAL_RING_CONTEXTS(dev) 
(INTEL_INFO(dev)->has_logical_ring_contexts)
-#define USES_PPGTT(dev)(i915.enable_ppgtt)
-#define USES_FULL_PPGTT(dev)   (i915.enable_ppgtt >= 2)
-#define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3)
+#define HWS_NEEDS_PHYSICAL(dev_priv)   ((dev_priv)->info.hws_needs_physical)
 
-#define HAS_OVERLAY(dev)   (INTEL_INFO(dev)->has_overlay)
-#define OVERLAY_NEEDS_PHYSICAL(dev)
(INTEL_INFO(dev)->overlay_needs_physical)
+#define HAS_HW_CONTEXTS(dev_priv)  ((dev_priv)->info.has_hw_contexts)
+#define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \
+   ((dev_priv)->info.has_logical_ring_contexts)
+#define USES_PPGTT(dev_priv)   (i915.enable_ppgtt)
+#define USES_FULL_PPGTT(dev_priv)  (i915.enable_ppgtt >= 2)
+#define USES_FULL_48BIT_PPGTT(dev_priv)(i915.enable_ppgtt == 3)
+
+#define HAS_OVERLAY(dev_priv)   ((dev_priv)->info.has_overlay)
+#define OVERLAY_NEEDS_PHYSICAL(dev_priv) \
+   ((dev_priv)->info.overlay_needs_physical)
 
 /* Early gen2 have a totally busted CS tlb and require pinned batches. */
 #define HAS_BROKEN_CS_TLB(dev_priv)(IS_I830(dev_priv) || IS_845G(dev_priv))
@@ -2889,8 +2892,8 @@ struct drm_i915_cmd_table {
  * legacy irq no. is shared with another device. The kernel then disables that
  * interrupt source and so prevents the other device from working properly.
  */
-#define HAS_AUX_IRQ(dev) (INTEL_INFO(dev)->gen >= 5)
-#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->has_gmbus_irq)
+#define HAS_AUX_IRQ(dev_priv)   ((dev_priv)->info.gen >= 5)
+#define HAS_GMBUS_IRQ(dev_priv) ((dev_priv)->info.has_gmbus_irq)
 
 /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
  * rows, which changed the alignment requirements and fence programming.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1f995ced524e..e9808c8ef55b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -48,7 +48,7 @@ static void i915_gem_object_flush_cpu_write_domain(struct 
drm_i915_gem_object *o
 static bool cpu_cache_is_coherent(struct drm_device *dev,
  enum i915_cache_level level)
 {
-   return HAS_LLC(dev) || level != I915_CACHE_NONE;
+   return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE;
 }
 
 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
@@ -1757,7 +1757,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct 
vm_fault *vmf)
goto err_rpm;
 
/* Access to snoopable pages through the GTT is incoherent. */
-   if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
+   if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
ret = -EFAULT;
goto err_unlock;
}
@@ -3414,7 +3414,8 @@ int i915_gem_object_set_cache_level(struct 
drm_i915_gem_object *obj,
   

Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout

2016-11-04 Thread Jani Nikula
On Fri, 04 Nov 2016, Chris Wilson  wrote:
> On Fri, Nov 04, 2016 at 12:59:08PM +, Tvrtko Ursulin wrote:
>> 
>> On 04/11/2016 11:08, Chris Wilson wrote:
>> >Valleyview and Cherryview are definitely limited to only scanning out
>> >from the first 256MiB and 512MiB of the Global GTT respectively. Lets
>> >presume that this behaviour was inherited from the display block copied
>> >from g4x (not Ironlake) and all earlier generations are similarly
>> >affected. For simplicity, impose that these platforms must scanout from
>> >the mappable region.
>> >
>> >Reported-by: Luis Botello 
>> >Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036
>> >Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for 
>> >scanout")
>> >Signed-off-by: Chris Wilson 
>> >Cc: Akash Goel 
>> >Cc: Joonas Lahtinen 
>> >Cc:  # v4.9-rc1+
>> >---
>> >This leaves Ironlake -> Haswell with a bit of uncertainity. It is also
>> >not clear if the scanout accessible region is similarly limited on all
>> >gen8+, and so whether we need to similarly curtain the upper range for
>> >their scanouts.
>> >---
>> > drivers/gpu/drm/i915/i915_gem.c | 18 --
>> > 1 file changed, 16 insertions(+), 2 deletions(-)
>> >
>> >diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>> >b/drivers/gpu/drm/i915/i915_gem.c
>> >index 269e2487c104..408875fbec66 100644
>> >--- a/drivers/gpu/drm/i915/i915_gem.c
>> >+++ b/drivers/gpu/drm/i915/i915_gem.c
>> >@@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct 
>> >drm_i915_gem_object *obj,
>> >if (view->type == I915_GGTT_VIEW_NORMAL)
>> >vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
>> >   PIN_MAPPABLE | PIN_NONBLOCK);
>> >-   if (IS_ERR(vma))
>> >-   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
>> >+   if (IS_ERR(vma)) {
>> >+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
>> 
>> dev_priv ?
>> 
>> What do we do with i915_params being a global i915?
>
> Sssh, I'm gradually waging war against dev_priv.
> Eventually Jani won't be able to complain about i915 being the minority.
>
> The global modparams is an easy rename.

I just liked that i915.foo was the same on both the kernel command line
and in code. I kinda still do, but like Chris I'm not too fond of
dev_priv either, and i915 seems like a good replacement.

Seeing how module parameters multiply like rabbits, with all sorts of
sanitization, how the parameters are changed in kernel, and
/sys/module/i915/parameters/ not reflecting what the user did, maybe you
could come up with something nice for that while at it...

BR,
Jani.


-- 
Jani Nikula, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Make GPU pages movable

2016-11-04 Thread Goel, Akash



On 11/4/2016 7:07 PM, Chris Wilson wrote:

Best if we send these as a new series to unconfuse CI.


Okay will send as a new series.


On Fri, Nov 04, 2016 at 06:18:26PM +0530, akash.g...@intel.com wrote:

+static int do_migrate_page(struct drm_i915_gem_object *obj)
+{
+   struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+   int ret = 0;
+
+   if (!can_migrate_page(obj))
+   return -EBUSY;
+
+   /* HW access would be required for a GGTT bound object, for which
+* device has to be kept awake. But a deadlock scenario can arise if
+* the attempt is made to resume the device, when either a suspend
+* or a resume operation is already happening concurrently from some
+* other path and that only also triggers compaction. So only unbind
+* if the device is currently awake.
+*/
+   if (!intel_runtime_pm_get_if_in_use(dev_priv))
+   return -EBUSY;
+
+   i915_gem_object_get(obj);
+   if (!unsafe_drop_pages(obj))
+   ret = -EBUSY;
+   i915_gem_object_put(obj);


Since the object release changes, we can now do this without the
i915_gem_object_get / i915_gem_object_put (as we are guarded by the BKL
struct_mutex).
Fine will remove object_get/put as with struct_mutex protection object 
can't disappear across unsafe_drop_pages().


Best regards
Akash



-Chris


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Make GPU pages movable

2016-11-04 Thread Chris Wilson
Best if we send these as a new series to unconfuse CI.

On Fri, Nov 04, 2016 at 06:18:26PM +0530, akash.g...@intel.com wrote:
> +static int do_migrate_page(struct drm_i915_gem_object *obj)
> +{
> + struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> + int ret = 0;
> +
> + if (!can_migrate_page(obj))
> + return -EBUSY;
> +
> + /* HW access would be required for a GGTT bound object, for which
> +  * device has to be kept awake. But a deadlock scenario can arise if
> +  * the attempt is made to resume the device, when either a suspend
> +  * or a resume operation is already happening concurrently from some
> +  * other path and that only also triggers compaction. So only unbind
> +  * if the device is currently awake.
> +  */
> + if (!intel_runtime_pm_get_if_in_use(dev_priv))
> + return -EBUSY;
> +
> + i915_gem_object_get(obj);
> + if (!unsafe_drop_pages(obj))
> + ret = -EBUSY;
> + i915_gem_object_put(obj);

Since the object release changes, we can now do this without the
i915_gem_object_get / i915_gem_object_put (as we are guarded by the BKL
struct_mutex).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/2] shmem: Support for registration of driver/file owner specific ops (rev4)

2016-11-04 Thread Patchwork
== Series Details ==

Series: series starting with [1/2] shmem: Support for registration of 
driver/file owner specific ops (rev4)
URL   : https://patchwork.freedesktop.org/series/4780/
State : failure

== Summary ==

drivers/gpu/drm/i915/i915_drv.h: At top level:
drivers/gpu/drm/i915/i915_drv.h:58:1: error: expected identifier or ‘(’ before 
‘==’ token
 ===
 ^
In file included from drivers/gpu/drm/i915/intel_guc.h:27:0,
 from drivers/gpu/drm/i915/i915_drv.h:60,
 from drivers/gpu/drm/i915/intel_csr.c:25:
drivers/gpu/drm/i915/intel_guc_fwif.h:222:1: warning: empty declaration
 } __packed;
 ^
In file included from drivers/gpu/drm/i915/intel_csr.c:25:0:
drivers/gpu/drm/i915/i915_drv.h:61:1: error: expected identifier or ‘(’ before 
‘>>’ token
 >>> drm/i915: Make pages of GFX allocations movable
 ^
  LD  net/key/built-in.o
scripts/Makefile.build:290: recipe for target 
'drivers/gpu/drm/i915/i915_sysfs.o' failed
make[4]: *** [drivers/gpu/drm/i915/i915_sysfs.o] Error 1
  LD  drivers/acpi/acpica/acpi.o
scripts/Makefile.build:290: recipe for target 
'drivers/gpu/drm/i915/i915_suspend.o' failed
make[4]: *** [drivers/gpu/drm/i915/i915_suspend.o] Error 1
  LD  drivers/thermal/thermal_sys.o
  LD  drivers/thermal/built-in.o
  LD  drivers/iommu/built-in.o
  LD  net/netlink/built-in.o
  LD  drivers/video/console/built-in.o
  LD  drivers/pci/pcie/pcieportdrv.o
scripts/Makefile.build:290: recipe for target 
'drivers/gpu/drm/i915/intel_csr.o' failed
make[4]: *** [drivers/gpu/drm/i915/intel_csr.o] Error 1
scripts/Makefile.build:475: recipe for target 'drivers/gpu/drm/i915' failed
make[3]: *** [drivers/gpu/drm/i915] Error 2
scripts/Makefile.build:475: recipe for target 'drivers/gpu/drm' failed
make[2]: *** [drivers/gpu/drm] Error 2
scripts/Makefile.build:475: recipe for target 'drivers/gpu' failed
make[1]: *** [drivers/gpu] Error 2
make[1]: *** Waiting for unfinished jobs
  LD  drivers/spi/built-in.o
  LD  kernel/sched/built-in.o
  LD  drivers/video/built-in.o
  LD  drivers/acpi/acpica/built-in.o
  LD  drivers/tty/serial/8250/8250.o
  LD  kernel/built-in.o
  LD  drivers/acpi/built-in.o
  LD [M]  drivers/net/ethernet/intel/igbvf/igbvf.o
  LD  lib/raid6/raid6_pq.o
  LD  lib/raid6/built-in.o
  LD [M]  drivers/mmc/core/mmc_core.o
  LD  drivers/mmc/built-in.o
  LD  drivers/pci/pcie/aer/aerdriver.o
  LD  drivers/usb/gadget/libcomposite.o
  LD  net/unix/unix.o
  LD  drivers/pci/pcie/aer/built-in.o
  LD  net/unix/built-in.o
  LD  drivers/pci/pcie/built-in.o
  LD [M]  drivers/net/ethernet/intel/e1000/e1000.o
  LD [M]  sound/pci/hda/snd-hda-codec-generic.o
  LD  sound/pci/built-in.o
  LD  net/packet/built-in.o
  LD  sound/built-in.o
  LD  drivers/usb/core/usbcore.o
  LD  drivers/scsi/sd_mod.o
  LD  drivers/scsi/built-in.o
  LD  drivers/tty/serial/8250/8250_base.o
  LD  drivers/tty/serial/8250/built-in.o
  LD  drivers/usb/core/built-in.o
  LD  drivers/pci/built-in.o
  LD  drivers/tty/serial/built-in.o
  LD  drivers/usb/gadget/udc/udc-core.o
  LD  drivers/usb/gadget/udc/built-in.o
  LD  drivers/usb/gadget/built-in.o
  LD  net/xfrm/built-in.o
  CC  arch/x86/kernel/cpu/capflags.o
  LD  arch/x86/kernel/cpu/built-in.o
  LD  arch/x86/kernel/built-in.o
  LD  arch/x86/built-in.o
  AR  lib/lib.a
  EXPORTS lib/lib-ksyms.o
  LD  drivers/md/md-mod.o
  LD  drivers/tty/vt/built-in.o
  LD [M]  drivers/net/ethernet/intel/igb/igb.o
  LD  lib/built-in.o
  LD  drivers/md/built-in.o
  LD  drivers/tty/built-in.o
  LD  drivers/usb/host/xhci-hcd.o
  LD  fs/btrfs/btrfs.o
  LD  fs/ext4/ext4.o
  LD  net/ipv6/ipv6.o
  LD  fs/ext4/built-in.o
  LD  fs/btrfs/built-in.o
  LD  net/ipv6/built-in.o
  LD  fs/built-in.o
  LD  drivers/usb/host/built-in.o
  LD  drivers/usb/built-in.o
  LD  net/core/built-in.o
  LD  net/ipv4/built-in.o
  LD  net/built-in.o
  LD [M]  drivers/net/ethernet/intel/e1000e/e1000e.o
  LD  drivers/net/ethernet/built-in.o
  LD  drivers/net/built-in.o
Makefile:983: recipe for target 'drivers' failed
make: *** [drivers] Error 2

Full logs at /archive/deploy/logs/Patchwork_2905

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-11-04 Thread Robert Bragg
On Fri, Nov 4, 2016 at 8:59 AM, sourab gupta  wrote:

> On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
> > Adds base i915 perf infrastructure for Gen performance metrics.
> >
> > This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
> > properties to configure a stream of metrics and returns a new fd usable
> > with standard VFS system calls including read() to read typed and sized
> > records; ioctl() to enable or disable capture and poll() to wait for
> > data.
> >
> > A stream is opened something like:
> >
> >   uint64_t properties[] = {
> >   /* Single context sampling */
> >   DRM_I915_PERF_PROP_CTX_HANDLE,ctx_handle,
> >
> >   /* Include OA reports in samples */
> >   DRM_I915_PERF_PROP_SAMPLE_OA, true,
> >
> >   /* OA unit configuration */
> >   DRM_I915_PERF_PROP_OA_METRICS_SET,metrics_set_id,
> >   DRM_I915_PERF_PROP_OA_FORMAT, report_format,
> >   DRM_I915_PERF_PROP_OA_EXPONENT,   period_exponent,
> >};
> >struct drm_i915_perf_open_param parm = {
> >   .flags = I915_PERF_FLAG_FD_CLOEXEC |
> >I915_PERF_FLAG_FD_NONBLOCK |
> >I915_PERF_FLAG_DISABLED,
> >   .properties_ptr = (uint64_t)properties,
> >   .num_properties = sizeof(properties) / 16,
> >};
> >int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, );
> >
> > Records read all start with a common { type, size } header with
> > DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
> > contain an extensible number of fields and it's the
> > DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
> > determine what's included in every sample.
> >
> > No specific streams are supported yet so any attempt to open a stream
> > will return an error.
> >
> > v2:
> > use i915_gem_context_get() - Chris Wilson
> > v3:
> > update read() interface to avoid passing state struct - Chris Wilson
> > fix some rebase fallout, with i915-perf init/deinit
> > v4:
> > s/DRM_IORW/DRM_IOW/ - Emil Velikov
> >
> > Signed-off-by: Robert Bragg 
> > ---
> >  drivers/gpu/drm/i915/Makefile|   3 +
> >  drivers/gpu/drm/i915/i915_drv.c  |   4 +
> >  drivers/gpu/drm/i915/i915_drv.h  |  91 
> >  drivers/gpu/drm/i915/i915_perf.c | 443 ++
> +
> >  include/uapi/drm/i915_drm.h  |  67 ++
> >  5 files changed, 608 insertions(+)
> >  create mode 100644 drivers/gpu/drm/i915/i915_perf.c
> >
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/
> Makefile
> > index 6123400..8d4e25f 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) +=
> i915_gpu_error.o
> >  # virtual gpu code
> >  i915-y += i915_vgpu.o
> >
> > +# perf code
> > +i915-y += i915_perf.o
> > +
> >  ifeq ($(CONFIG_DRM_I915_GVT),y)
> >  i915-y += intel_gvt.o
> >  include $(src)/gvt/Makefile
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> b/drivers/gpu/drm/i915/i915_drv.c
> > index af3559d..685c96e 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -836,6 +836,8 @@ static int i915_driver_init_early(struct
> drm_i915_private *dev_priv,
> >
> >   intel_detect_preproduction_hw(dev_priv);
> >
> > + i915_perf_init(dev_priv);
> > +
> >   return 0;
> >
> >  err_workqueues:
> > @@ -849,6 +851,7 @@ static int i915_driver_init_early(struct
> drm_i915_private *dev_priv,
> >   */
> >  static void i915_driver_cleanup_early(struct drm_i915_private
> *dev_priv)
> >  {
> > + i915_perf_fini(dev_priv);
> >   i915_gem_load_cleanup(_priv->drm);
> >   i915_workqueues_cleanup(dev_priv);
> >  }
> > @@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc i915_ioctls[] =
> {
> >   DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl,
> DRM_RENDER_ALLOW),
> >   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM,
> i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
> >   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM,
> i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
> > + DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl,
> DRM_RENDER_ALLOW),
> >  };
> >
> >  static struct drm_driver driver = {
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> > index 5a260db..7a65c0b 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1767,6 +1767,84 @@ struct intel_wm_config {
> >   bool sprites_scaled;
> >  };
> >
> > +struct i915_perf_stream;
> > +
> > +struct i915_perf_stream_ops {
> > + /* Enables the collection of HW samples, either in response to
> > +  * I915_PERF_IOCTL_ENABLE or implicitly called when stream is
> > +  * opened without I915_PERF_FLAG_DISABLED.
> > +  */
> > + void (*enable)(struct i915_perf_stream *stream);
> > +
> > + /* Disables the collection 

Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 12:59:08PM +, Tvrtko Ursulin wrote:
> 
> On 04/11/2016 11:08, Chris Wilson wrote:
> >Valleyview and Cherryview are definitely limited to only scanning out
> >from the first 256MiB and 512MiB of the Global GTT respectively. Lets
> >presume that this behaviour was inherited from the display block copied
> >from g4x (not Ironlake) and all earlier generations are similarly
> >affected. For simplicity, impose that these platforms must scanout from
> >the mappable region.
> >
> >Reported-by: Luis Botello 
> >Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036
> >Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for 
> >scanout")
> >Signed-off-by: Chris Wilson 
> >Cc: Akash Goel 
> >Cc: Joonas Lahtinen 
> >Cc:  # v4.9-rc1+
> >---
> >This leaves Ironlake -> Haswell with a bit of uncertainity. It is also
> >not clear if the scanout accessible region is similarly limited on all
> >gen8+, and so whether we need to similarly curtain the upper range for
> >their scanouts.
> >---
> > drivers/gpu/drm/i915/i915_gem.c | 18 --
> > 1 file changed, 16 insertions(+), 2 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> >b/drivers/gpu/drm/i915/i915_gem.c
> >index 269e2487c104..408875fbec66 100644
> >--- a/drivers/gpu/drm/i915/i915_gem.c
> >+++ b/drivers/gpu/drm/i915/i915_gem.c
> >@@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct 
> >drm_i915_gem_object *obj,
> > if (view->type == I915_GGTT_VIEW_NORMAL)
> > vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
> >PIN_MAPPABLE | PIN_NONBLOCK);
> >-if (IS_ERR(vma))
> >-vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
> >+if (IS_ERR(vma)) {
> >+struct drm_i915_private *i915 = to_i915(obj->base.dev);
> 
> dev_priv ?
> 
> What do we do with i915_params being a global i915?

Sssh, I'm gradually waging war against dev_priv.
Eventually Jani won't be able to complain about i915 being the minority.

The global modparams is an easy rename.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout

2016-11-04 Thread Tvrtko Ursulin


On 04/11/2016 11:08, Chris Wilson wrote:

Valleyview and Cherryview are definitely limited to only scanning out
from the first 256MiB and 512MiB of the Global GTT respectively. Lets
presume that this behaviour was inherited from the display block copied
from g4x (not Ironlake) and all earlier generations are similarly
affected. For simplicity, impose that these platforms must scanout from
the mappable region.

Reported-by: Luis Botello 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036
Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for 
scanout")
Signed-off-by: Chris Wilson 
Cc: Akash Goel 
Cc: Joonas Lahtinen 
Cc:  # v4.9-rc1+
---
This leaves Ironlake -> Haswell with a bit of uncertainity. It is also
not clear if the scanout accessible region is similarly limited on all
gen8+, and so whether we need to similarly curtain the upper range for
their scanouts.
---
 drivers/gpu/drm/i915/i915_gem.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 269e2487c104..408875fbec66 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
if (view->type == I915_GGTT_VIEW_NORMAL)
vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
   PIN_MAPPABLE | PIN_NONBLOCK);
-   if (IS_ERR(vma))
-   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
+   if (IS_ERR(vma)) {
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);


dev_priv ?

What do we do with i915_params being a global i915?

Regards,

Tvrtko


+   unsigned int flags;
+
+   /* Valleyview and Cherryview are definitely limited to scanning
+* out the first 256MiB and 512MiB respectively. Lets presume
+* this behaviour was inherited from their g4x display engine
+* and that all earlier gen are similarly limited.
+*/
+   flags = 0;
+   if (INTEL_GEN(i915) < 5 ||
+   IS_VALLEYVIEW(i915) ||
+   IS_CHERRYVIEW(i915))
+   flags = PIN_MAPPABLE;
+   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+   }
if (IS_ERR(vma))
goto err_unpin_display;



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915: Make GPU pages movable

2016-11-04 Thread akash . goel
From: Chris Wilson 

On a long run of more than 2-3 days, physical memory tends to get
fragmented severely, which considerably slows down the system. In such a
scenario, the shrinker is also unable to help as lack of memory is not
the actual problem, since it has been observed that there are enough free
pages of 0 order. This also manifests itself when an indiviual zone in
the mm runs out of pages and if we cannot migrate pages between zones,
the kernel hits an out-of-memory even though there are free pages (and
often all of swap) available.

To address the issue of external fragementation, kernel does a compaction
(which involves migration of pages) but it's efficacy depends upon how
many pages are marked as MOVABLE, as only those pages can be migrated.

Currently the backing pages for GPU buffers are allocated from shmemfs
with GFP_RECLAIMABLE flag, in units of 4KB pages.  In the case of limited
swap space, it may not be possible always to reclaim or swap-out pages of
all the inactive objects, to make way for free space allowing formation
of higher order groups of physically-contiguous pages on compaction.

Just marking the GPU pages as MOVABLE will not suffice, as i915.ko has to
pin the pages if they are in use by GPU, which will prevent their
migration. So the migratepage callback in shmem is also hooked up to get
a notification when kernel initiates the page migration. On the
notification, i915.ko appropriately unpin the pages.  With this we can
effectively mark the GPU pages as MOVABLE and hence mitigate the
fragmentation problem.

v2:
 - Rename the migration routine to gem_shrink_migratepage, move it to the
   shrinker file, and use the existing constructs (Chris)
 - To cleanup, add a new helper function to encapsulate all page migration
   skip conditions (Chris)
 - Add a new local helper function in shrinker file, for dropping the
   backing pages, and call the same from gem_shrink() also (Chris)

v3:
 - Fix/invert the check on the return value of unsafe_drop_pages (Chris)

v4:
 - Minor tidy

v5:
 - Fix unsafe usage of unsafe_drop_pages()
 - Rebase onto vmap-notifier

Testcase: igt/gem_shrink
Bugzilla: (e.g.) https://bugs.freedesktop.org/show_bug.cgi?id=90254
Cc: Hugh Dickins 
Cc: linux...@kvack.org
Signed-off-by: Sourab Gupta 
Signed-off-by: Akash Goel 
Signed-off-by: Chris Wilson 
Reviewed-by: Joonas Lahtinen 
Reviewed-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_drv.h  |   2 +
 drivers/gpu/drm/i915/i915_gem.c  |   9 ++-
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 134 +++
 3 files changed, 144 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4735b417..7f2717b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1357,6 +1357,8 @@ struct intel_l3_parity {
 };
 
 struct i915_gem_mm {
+   struct shmem_dev_info shmem_info;
+
/** Memory allocator for GTT stolen memory */
struct drm_mm stolen;
/** Protects the usage of the GTT stolen memory allocator. This is
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1f995ce..f0d4ce7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2164,6 +2164,7 @@ void __i915_gem_object_invalidate(struct 
drm_i915_gem_object *obj)
if (obj->mm.madv == I915_MADV_WILLNEED)
mark_page_accessed(page);
 
+   set_page_private(page, 0);
put_page(page);
}
obj->mm.dirty = false;
@@ -2310,6 +2311,7 @@ static unsigned int swiotlb_max_size(void)
sg->length += PAGE_SIZE;
}
last_pfn = page_to_pfn(page);
+   set_page_private(page, (unsigned long)obj);
 
/* Check that the i965g/gm workaround works. */
WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x0010UL));
@@ -2334,8 +2336,10 @@ static unsigned int swiotlb_max_size(void)
 
 err_pages:
sg_mark_end(sg);
-   for_each_sgt_page(page, sgt_iter, st)
+   for_each_sgt_page(page, sgt_iter, st) {
+   set_page_private(page, 0);
put_page(page);
+   }
sg_free_table(st);
kfree(st);
 
@@ -4185,6 +4189,8 @@ struct drm_i915_gem_object *
goto fail;
 
mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+   if (IS_ENABLED(MIGRATION))
+   mask |= __GFP_MOVABLE;
if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) {
/* 965gm cannot relocate objects above 4GiB. */
mask &= ~__GFP_HIGHMEM;
@@ -4193,6 +4199,7 @@ struct drm_i915_gem_object *
 
mapping = obj->base.filp->f_mapping;
mapping_set_gfp_mask(mapping, mask);
+ 

[Intel-gfx] [PATCH 1/2] shmem: Support for registration of driver/file owner specific ops

2016-11-04 Thread akash . goel
From: Chris Wilson 

This provides support for the drivers or shmem file owners to register
a set of callbacks, which can be invoked from the address space
operations methods implemented by shmem.  This allow the file owners to
hook into the shmem address space operations to do some extra/custom
operations in addition to the default ones.

The private_data field of address_space struct is used to store the
pointer to driver specific ops.  Currently only one ops field is defined,
which is migratepage, but can be extended on an as-needed basis.

The need for driver specific operations arises since some of the
operations (like migratepage) may not be handled completely within shmem,
so as to be effective, and would need some driver specific handling also.
Specifically, i915.ko would like to participate in migratepage().
i915.ko uses shmemfs to provide swappable backing storage for its user
objects, but when those objects are in use by the GPU it must pin the
entire object until the GPU is idle.  As a result, large chunks of memory
can be arbitrarily withdrawn from page migration, resulting in premature
out-of-memory due to fragmentation.  However, if i915.ko can receive the
migratepage() request, it can then flush the object from the GPU, remove
its pin and thus enable the migration.

Since gfx allocations are one of the major consumer of system memory, its
imperative to have such a mechanism to effectively deal with
fragmentation.  And therefore the need for such a provision for initiating
driver specific actions during address space operations.

v2:
- Drop dev_ prefix from the members of shmem_dev_info structure. (Joonas)
- Change the return type of shmem_set_device_op() to void and remove the
  check for pre-existing data. (Joonas)
- Rename shmem_set_device_op() to shmem_set_dev_info() to be consistent
  with shmem_dev_info structure. (Joonas)

Cc: Hugh Dickins 
Cc: linux...@kvack.org
Cc: linux-ker...@vger.linux.org
Signed-off-by: Sourab Gupta 
Signed-off-by: Akash Goel 
Reviewed-by: Chris Wilson 
---
 include/linux/shmem_fs.h | 13 +
 mm/shmem.c   | 17 -
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index ff078e7..22796a0 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -39,11 +39,24 @@ struct shmem_sb_info {
unsigned long shrinklist_len; /* Length of shrinklist */
 };
 
+struct shmem_dev_info {
+   void *private_data;
+   int (*migratepage)(struct address_space *mapping,
+  struct page *newpage, struct page *page,
+  enum migrate_mode mode, void *dev_priv_data);
+};
+
 static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 {
return container_of(inode, struct shmem_inode_info, vfs_inode);
 }
 
+static inline void shmem_set_dev_info(struct address_space *mapping,
+struct shmem_dev_info *info)
+{
+   mapping->private_data = info;
+}
+
 /*
  * Functions in mm/shmem.c called directly from elsewhere:
  */
diff --git a/mm/shmem.c b/mm/shmem.c
index ad7813d..bf71ddd 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1290,6 +1290,21 @@ static int shmem_writepage(struct page *page, struct 
writeback_control *wbc)
return 0;
 }
 
+#ifdef CONFIG_MIGRATION
+static int shmem_migratepage(struct address_space *mapping,
+struct page *newpage, struct page *page,
+enum migrate_mode mode)
+{
+   struct shmem_dev_info *dev_info = mapping->private_data;
+
+   if (dev_info && dev_info->migratepage)
+   return dev_info->migratepage(mapping, newpage, page,
+   mode, dev_info->private_data);
+
+   return migrate_page(mapping, newpage, page, mode);
+}
+#endif
+
 #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS)
 static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
 {
@@ -3654,7 +3669,7 @@ static void shmem_destroy_inodecache(void)
.write_end  = shmem_write_end,
 #endif
 #ifdef CONFIG_MIGRATION
-   .migratepage= migrate_page,
+   .migratepage= shmem_migratepage,
 #endif
.error_remove_page = generic_error_remove_page,
 };
-- 
1.9.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2] drm/i915: Fix pages pin counting around swizzle quirk

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 01:43:34PM +0200, Joonas Lahtinen wrote:
> On pe, 2016-11-04 at 10:30 +, Chris Wilson wrote:
> > @@ -3711,6 +3711,13 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
> >  {
> >     int ret = 0;
> >  
> > +   /* The vma->pages are only valid within the lifespan of the borrowed
> > +    * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
> > +    * must be the vma->pages. A simple rule is that vma->pages must only
> > +    * be accessed when the obj->mm.pages are pinned.
> > +    */
> > +   GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
> > +
> >     if (vma->pages)
> >     return 0;
> 
> My confusion was vma == obj for the moment, but I think the comment is
> still good. The barriers are much more sensible now, too.
> 
> Reviewed-by: Joonas Lahtinen 

* fingers crossed that's the last we see of this quirk.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Limit Valleyview and earlier to only using mappable scanout

2016-11-04 Thread Patchwork
== Series Details ==

Series: drm/i915: Limit Valleyview and earlier to only using mappable scanout
URL   : https://patchwork.freedesktop.org/series/14835/
State : success

== Summary ==

Series 14835v1 drm/i915: Limit Valleyview and earlier to only using mappable 
scanout
https://patchwork.freedesktop.org/api/1.0/series/14835/revisions/1/mbox/

Test kms_force_connector_basic:
Subgroup force-load-detect:
dmesg-warn -> PASS   (fi-snb-2520m)

fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
fi-bxt-t5700 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-ilk-650   total:241  pass:188  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770  total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
fi-skl-6770hqtotal:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600  total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 

21f242e536b5077c046df785a8c4c28374941c15 drm-intel-nightly: 
2016y-11m-03d-21h-01m-03s UTC integration manifest
b9cc4a5 drm/i915: Limit Valleyview and earlier to only using mappable scanout

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2903/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2] drm/i915: Fix pages pin counting around swizzle quirk

2016-11-04 Thread Joonas Lahtinen
On pe, 2016-11-04 at 10:30 +, Chris Wilson wrote:
> @@ -3711,6 +3711,13 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
>  {
>   int ret = 0;
>  
> + /* The vma->pages are only valid within the lifespan of the borrowed
> +  * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
> +  * must be the vma->pages. A simple rule is that vma->pages must only
> +  * be accessed when the obj->mm.pages are pinned.
> +  */
> + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
> +
>   if (vma->pages)
>   return 0;

My confusion was vma == obj for the moment, but I think the comment is
still good. The barriers are much more sensible now, too.

Reviewed-by: Joonas Lahtinen 

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 01:29:04PM +0200, Jani Nikula wrote:
> On Fri, 04 Nov 2016, Chris Wilson  wrote:
> > Valleyview and Cherryview are definitely limited to only scanning out
> > from the first 256MiB and 512MiB of the Global GTT respectively. Lets
> > presume that this behaviour was inherited from the display block copied
> > from g4x (not Ironlake) and all earlier generations are similarly
> > affected. For simplicity, impose that these platforms must scanout from
> > the mappable region.
> >
> > Reported-by: Luis Botello 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036
> > Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for 
> > scanout")
> > Signed-off-by: Chris Wilson 
> > Cc: Akash Goel 
> > Cc: Joonas Lahtinen 
> > Cc:  # v4.9-rc1+
> > ---
> > This leaves Ironlake -> Haswell with a bit of uncertainity. It is also
> > not clear if the scanout accessible region is similarly limited on all
> > gen8+, and so whether we need to similarly curtain the upper range for
> > their scanouts.
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c | 18 --
> >  1 file changed, 16 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c 
> > b/drivers/gpu/drm/i915/i915_gem.c
> > index 269e2487c104..408875fbec66 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct 
> > drm_i915_gem_object *obj,
> > if (view->type == I915_GGTT_VIEW_NORMAL)
> > vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
> >PIN_MAPPABLE | PIN_NONBLOCK);
> > -   if (IS_ERR(vma))
> > -   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
> > +   if (IS_ERR(vma)) {
> > +   struct drm_i915_private *i915 = to_i915(obj->base.dev);
> > +   unsigned int flags;
> > +
> > +   /* Valleyview and Cherryview are definitely limited to scanning
> > +* out the first 256MiB and 512MiB respectively. Lets presume
> > +* this behaviour was inherited from their g4x display engine
> > +* and that all earlier gen are similarly limited.
> > +*/
> > +   flags = 0;
> > +   if (INTEL_GEN(i915) < 5 ||
> > +   IS_VALLEYVIEW(i915) ||
> > +   IS_CHERRYVIEW(i915))
> 
> Since it's related to the display engine, HAS_GMCH_DISPLAY()?

Ah, that's synonym I was thinking off. That describes the split I used
here much better. We may need to refine this as more information
becomes available (if ever!)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v2] get-fences-locked

2016-11-04 Thread Joonas Lahtinen
On pe, 2016-11-04 at 10:29 +, Chris Wilson wrote:
> ---
>  drivers/dma-buf/reservation.c | 58 
> +++
>  include/linux/reservation.h   |  4 +++
>  2 files changed, 62 insertions(+)

Wrong branch.

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout

2016-11-04 Thread Jani Nikula
On Fri, 04 Nov 2016, Chris Wilson  wrote:
> Valleyview and Cherryview are definitely limited to only scanning out
> from the first 256MiB and 512MiB of the Global GTT respectively. Lets
> presume that this behaviour was inherited from the display block copied
> from g4x (not Ironlake) and all earlier generations are similarly
> affected. For simplicity, impose that these platforms must scanout from
> the mappable region.
>
> Reported-by: Luis Botello 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036
> Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for 
> scanout")
> Signed-off-by: Chris Wilson 
> Cc: Akash Goel 
> Cc: Joonas Lahtinen 
> Cc:  # v4.9-rc1+
> ---
> This leaves Ironlake -> Haswell with a bit of uncertainity. It is also
> not clear if the scanout accessible region is similarly limited on all
> gen8+, and so whether we need to similarly curtain the upper range for
> their scanouts.
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 18 --
>  1 file changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 269e2487c104..408875fbec66 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct 
> drm_i915_gem_object *obj,
>   if (view->type == I915_GGTT_VIEW_NORMAL)
>   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
>  PIN_MAPPABLE | PIN_NONBLOCK);
> - if (IS_ERR(vma))
> - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
> + if (IS_ERR(vma)) {
> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> + unsigned int flags;
> +
> + /* Valleyview and Cherryview are definitely limited to scanning
> +  * out the first 256MiB and 512MiB respectively. Lets presume
> +  * this behaviour was inherited from their g4x display engine
> +  * and that all earlier gen are similarly limited.
> +  */
> + flags = 0;
> + if (INTEL_GEN(i915) < 5 ||
> + IS_VALLEYVIEW(i915) ||
> + IS_CHERRYVIEW(i915))

Since it's related to the display engine, HAS_GMCH_DISPLAY()?

BR,
Jani.

> + flags = PIN_MAPPABLE;
> + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
> + }
>   if (IS_ERR(vma))
>   goto err_unpin_display;

-- 
Jani Nikula, Intel Open Source Technology Center
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Fix pages pin counting around swizzle quirk (rev3)

2016-11-04 Thread Patchwork
== Series Details ==

Series: drm/i915: Fix pages pin counting around swizzle quirk (rev3)
URL   : https://patchwork.freedesktop.org/series/14720/
State : success

== Summary ==

Series 14720v3 drm/i915: Fix pages pin counting around swizzle quirk
https://patchwork.freedesktop.org/api/1.0/series/14720/revisions/3/mbox/

Test kms_force_connector_basic:
Subgroup force-load-detect:
dmesg-warn -> PASS   (fi-snb-2520m)

fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
fi-bxt-t5700 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-hsw-4770r total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
fi-ilk-650   total:241  pass:188  dwarn:0   dfail:0   fail:0   skip:53 
fi-ivb-3520m total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-ivb-3770  total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
fi-skl-6770hqtotal:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
fi-snb-2520m total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
fi-snb-2600  total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 

21f242e536b5077c046df785a8c4c28374941c15 drm-intel-nightly: 
2016y-11m-03d-21h-01m-03s UTC integration manifest
25582a0 drm/i915: Fix pages pin counting around swizzle quirk

== Logs ==

For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2902/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout

2016-11-04 Thread Chris Wilson
Valleyview and Cherryview are definitely limited to only scanning out
from the first 256MiB and 512MiB of the Global GTT respectively. Lets
presume that this behaviour was inherited from the display block copied
from g4x (not Ironlake) and all earlier generations are similarly
affected. For simplicity, impose that these platforms must scanout from
the mappable region.

Reported-by: Luis Botello 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036
Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for 
scanout")
Signed-off-by: Chris Wilson 
Cc: Akash Goel 
Cc: Joonas Lahtinen 
Cc:  # v4.9-rc1+
---
This leaves Ironlake -> Haswell with a bit of uncertainity. It is also
not clear if the scanout accessible region is similarly limited on all
gen8+, and so whether we need to similarly curtain the upper range for
their scanouts.
---
 drivers/gpu/drm/i915/i915_gem.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 269e2487c104..408875fbec66 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
if (view->type == I915_GGTT_VIEW_NORMAL)
vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
   PIN_MAPPABLE | PIN_NONBLOCK);
-   if (IS_ERR(vma))
-   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
+   if (IS_ERR(vma)) {
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
+   unsigned int flags;
+
+   /* Valleyview and Cherryview are definitely limited to scanning
+* out the first 256MiB and 512MiB respectively. Lets presume
+* this behaviour was inherited from their g4x display engine
+* and that all earlier gen are similarly limited.
+*/
+   flags = 0;
+   if (INTEL_GEN(i915) < 5 ||
+   IS_VALLEYVIEW(i915) ||
+   IS_CHERRYVIEW(i915))
+   flags = PIN_MAPPABLE;
+   vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+   }
if (IS_ERR(vma))
goto err_unpin_display;
 
-- 
2.10.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] drm/i915: Fix pages pin counting around swizzle quirk

2016-11-04 Thread Chris Wilson
commit bc0629a76726 ("drm/i915: Track pages pinned due to swizzling
quirk") fixed one problem, but revealed a whole lot more. The root cause
of the pin count mismatch for the swizzle quirk (for L-shaped memory on
gen3/4) was that we were incrementing the pages_pin_count upon getting
the backing pages but then overwriting the pages_pin_count to set it to
1 afterwards. With a little bit of adjustment to satisfy the GEM_BUG_ON
sanitychecks, the fix is to replace the explicit atomic_set with an
atomic_inc.

v2: Consistently use atomics (not mix atomics and helpers) within the
lowlevel get_pages routines. This makes the atomic operations much
clearer.

Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation")
Signed-off-by: Chris Wilson 
Cc: Joonas Lahtinen 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_gem.c| 47 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.c|  7 +
 drivers/gpu/drm/i915/i915_gem_tiling.c |  1 +
 3 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a7a9ae2c4bce..269e2487c104 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2376,12 +2376,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object 
*obj)
if (i915_gem_object_needs_bit17_swizzle(obj))
i915_gem_object_do_bit_17_swizzle(obj, st);
 
-   if (i915_gem_object_is_tiled(obj) &&
-   dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
-   __i915_gem_object_pin_pages(obj);
-   obj->mm.quirked = true;
-   }
-
return st;
 
 err_pages:
@@ -2414,12 +2408,21 @@ void __i915_gem_object_set_pages(struct 
drm_i915_gem_object *obj,
obj->mm.get_page.sg_idx = 0;
 
obj->mm.pages = pages;
+
+   if (i915_gem_object_is_tiled(obj) &&
+   to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+   GEM_BUG_ON(obj->mm.quirked);
+   __i915_gem_object_pin_pages(obj);
+   obj->mm.quirked = true;
+   }
 }
 
 static int i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 {
struct sg_table *pages;
 
+   GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
DRM_DEBUG("Attempting to obtain a purgeable object\n");
return -EFAULT;
@@ -2448,17 +2451,15 @@ int __i915_gem_object_get_pages(struct 
drm_i915_gem_object *obj)
if (err)
return err;
 
-   if (likely(obj->mm.pages)) {
-   __i915_gem_object_pin_pages(obj);
-   goto unlock;
-   }
-
-   GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+   if (unlikely(!obj->mm.pages)) {
+   err = i915_gem_object_get_pages(obj);
+   if (err)
+   goto unlock;
 
-   err = i915_gem_object_get_pages(obj);
-   if (!err)
-   atomic_set_release(>mm.pages_pin_count, 1);
+   smp_mb__before_atomic();
+   }
 
+   atomic_inc(>mm.pages_pin_count);
 unlock:
mutex_unlock(>mm.lock);
return err;
@@ -2528,12 +2529,14 @@ void *i915_gem_object_pin_map(struct 
drm_i915_gem_object *obj,
 
pinned = true;
if (!atomic_inc_not_zero(>mm.pages_pin_count)) {
-   ret = i915_gem_object_get_pages(obj);
-   if (ret)
-   goto err_unlock;
+   if (unlikely(!obj->mm.pages)) {
+   ret = i915_gem_object_get_pages(obj);
+   if (ret)
+   goto err_unlock;
 
-   GEM_BUG_ON(atomic_read(>mm.pages_pin_count));
-   atomic_set_release(>mm.pages_pin_count, 1);
+   smp_mb__before_atomic();
+   }
+   atomic_inc(>mm.pages_pin_count);
pinned = false;
}
GEM_BUG_ON(!obj->mm.pages);
@@ -2986,7 +2989,7 @@ int i915_vma_unbind(struct i915_vma *vma)
goto destroy;
 
GEM_BUG_ON(obj->bind_count == 0);
-   GEM_BUG_ON(!obj->mm.pages);
+   GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
if (i915_vma_is_map_and_fenceable(vma)) {
/* release the fence reg _after_ flushing */
@@ -3220,6 +3223,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 
alignment, u64 flags)
list_move_tail(>global_link, _priv->mm.bound_list);
list_move_tail(>vm_link, >vm->inactive_list);
obj->bind_count++;
+   GEM_BUG_ON(atomic_read(>mm.pages_pin_count) < obj->bind_count);
 
return 0;
 
@@ -4272,6 +4276,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
obj->mm.quirked = false;
}
if (args->madv == I915_MADV_WILLNEED) {
+

[Intel-gfx] [PATCH v2] get-fences-locked

2016-11-04 Thread Chris Wilson
---
 drivers/dma-buf/reservation.c | 58 +++
 include/linux/reservation.h   |  4 +++
 2 files changed, 62 insertions(+)

diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 3c9ab53be2b9..0f254d0d9bec 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -133,6 +133,64 @@ void reservation_object_add_excl_fence(struct 
reservation_object *obj,
 EXPORT_SYMBOL(reservation_object_add_excl_fence);
 
 /**
+ * reservation_object_get_fences_locked - Get an object's shared and exclusive
+ * fences
+ * @obj: the reservation object
+ * @pfence_excl: the returned exclusive fence (or NULL)
+ * @pshared_count: the number of shared fences returned
+ * @pshared: the array of shared fence ptrs returned (array is krealloc'd to
+ * the required size, and must be freed by caller)
+ *
+ * RETURNS
+ * Zero or -errno
+ */
+int reservation_object_get_fences_locked(struct reservation_object *obj,
+ struct dma_fence **pfence_excl,
+ unsigned *pshared_count,
+ struct dma_fence ***pshared)
+{
+   struct dma_fence **shared = NULL;
+   unsigned int count = 0;
+   struct radix_tree_iter iter;
+   void **slot;
+
+   radix_tree_for_each_slot(slot, >shared, , 0) {
+   struct dma_fence *fence = radix_tree_deref_slot(slot);
+
+   if (dma_fence_is_signaled(fence)) {
+   radix_tree_delete(>shared, iter.index);
+   continue;
+   }
+
+   if ((count & -count) == count) {
+   struct dma_fence **nshared;
+   unsigned int sz;
+
+   sz = count ? 2*count : 1;
+   nshared = krealloc(shared,
+  sz * sizeof(*shared),
+  GFP_TEMPORARY);
+   if (!nshared) {
+   while (count--)
+   dma_fence_put(shared[count]);
+   kfree(shared);
+   return -ENOMEM;
+   }
+
+   shared = nshared;
+   }
+
+   shared[count++] = dma_fence_get(fence);
+   }
+
+   *pshared_count = count;
+   *pshared = shared;
+   *pfence_excl = dma_fence_get(rcu_dereference(obj->excl));
+   return 0;
+}
+EXPORT_SYMBOL_GPL(reservation_object_get_fences_locked);
+
+/**
  * reservation_object_get_fences_rcu - Get an object's shared and exclusive
  * fences without update side lock held
  * @obj: the reservation object
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index 697ec52427ca..4f39942906e2 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -161,6 +161,10 @@ void reservation_object_add_shared_fence(struct 
reservation_object *obj,
 void reservation_object_add_excl_fence(struct reservation_object *obj,
   struct dma_fence *fence);
 
+int reservation_object_get_fences_locked(struct reservation_object *obj,
+ struct dma_fence **pfence_excl,
+ unsigned *pshared_count,
+ struct dma_fence ***pshared);
 int reservation_object_get_fences_rcu(struct reservation_object *obj,
  struct dma_fence **pfence_excl,
  unsigned *pshared_count,
-- 
2.10.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fix pages pin counting around swizzle quirk

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 09:36:31AM +, Chris Wilson wrote:
> On Fri, Nov 04, 2016 at 10:50:44AM +0200, Joonas Lahtinen wrote:
> > On ke, 2016-11-02 at 09:43 +, Chris Wilson wrote:
> > > @@ -2458,17 +2459,16 @@ int __i915_gem_object_get_pages(struct 
> > > drm_i915_gem_object *obj)
> > >   if (err)
> > >   return err;
> > >  
> > > - if (likely(obj->mm.pages)) {
> > > - __i915_gem_object_pin_pages(obj);
> > > - goto unlock;
> > > - }
> > > -
> > > - GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
> > > + if (unlikely(!obj->mm.pages)) {
> > > + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
> > > + err = i915_gem_object_get_pages(obj);
> > > + if (err)
> > > + goto unlock;
> > >  
> > > - err = i915_gem_object_get_pages(obj);
> > > - if (!err)
> > > - atomic_set_release(>mm.pages_pin_count, 1);
> > > + smp_mb__before_atomic();
> > 
> > This is not cool without atomic in sight. Inline wrap as
> > __i915_gem_object_pages_mb() or something.
> 
> My first thought was to put in i915_gem_object_get_pages() since it
> closes the action of setting up the obj->mm.pages and co. I didn't like
> that because the association then with the use of the pages_pin_count as
> the mutex was not as apparent. Now that you cannot see the atomic_inc()
> at all here, you are left confused!
> 
> Would you rather this just used the raw atomic_inc() here?

Actually, I like using atomics better here. It is definitely consistent
as we then don't mix the raw atomics and the helpers.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for drm/i915/dp: Update connector status for DP MST hotplugs (rev2)

2016-11-04 Thread Saarinen, Jani
> == Series Details ==
> 
> Series: drm/i915/dp: Update connector status for DP MST hotplugs (rev2)
> URL   : https://patchwork.freedesktop.org/series/14821/
> State : warning
> 
> == Summary ==
> 
> Series 14821v2 drm/i915/dp: Update connector status for DP MST hotplugs
> https://patchwork.freedesktop.org/api/1.0/series/14821/revisions/2/mbox/
> 
> Test kms_force_connector_basic:
> Subgroup force-edid:
> pass   -> DMESG-WARN (fi-snb-2520m)
https://bugs.freedesktop.org/show_bug.cgi?id=74102 ?

> Subgroup force-load-detect:
> dmesg-warn -> PASS   (fi-snb-2520m)
> 
> fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15
> fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40
> fi-bxt-t5700 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28
> fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28
> fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20
> fi-hsw-4770r total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20
> fi-ilk-650   total:241  pass:188  dwarn:0   dfail:0   fail:0   skip:53
> fi-ivb-3520m total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22
> fi-ivb-3770  total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22
> fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22
> fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14
> fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21
> fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21
> fi-skl-6770hqtotal:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14
> fi-snb-2520m total:241  pass:208  dwarn:1   dfail:0   fail:0   skip:32
> fi-snb-2600  total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33
> fi-byt-n2820 failed to collect. IGT log at Patchwork_2901/fi-byt-n2820/igt.log
> 
> 21f242e536b5077c046df785a8c4c28374941c15 drm-intel-nightly: 2016y-11m-
> 03d-21h-01m-03s UTC integration manifest
> bc09ce1 drm/i915/dp: Update connector status for DP MST hotplugs
> 
> == Logs ==
> 
> For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2901/


Jani Saarinen
Intel Finland Oy - BIC 0357606-4 - Westendinkatu 7, 02160 Espoo


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 08/15] drm/i915: Add support for emitting execbuffer tags through OA counter reports

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 03:00:37PM +0530, sourab.gu...@intel.com wrote:
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ead97b7f4..15921c7 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -832,6 +832,11 @@ struct drm_i915_gem_execbuffer2 {
>  #define i915_execbuffer2_get_context_id(eb2) \
>   ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
>  
> +/* upper 32 bits of rsvd1 field contain tag */
> +#define I915_EXEC_TAG_MASK   (0xUL)
> +#define i915_execbuffer2_get_tag(eb2) \
> + ((eb2).rsvd1 & I915_EXEC_TAG_MASK)

Which does not return a u32

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for drm/i915: Move hangcheck code out from i915_irq.c

2016-11-04 Thread Mika Kuoppala
Patchwork  writes:

> == Series Details ==
>
> Series: drm/i915: Move hangcheck code out from i915_irq.c
> URL   : https://patchwork.freedesktop.org/series/14685/
> State : warning
>
> == Summary ==
>
> Series 14685v1 drm/i915: Move hangcheck code out from i915_irq.c
> https://patchwork.freedesktop.org/api/1.0/series/14685/revisions/1/mbox/
>
> Test gem_exec_suspend:
> Subgroup basic-s3:
> pass   -> DMESG-WARN (fi-ilk-650)
> Test kms_pipe_crc_basic:
> Subgroup bad-nb-words-3:
> dmesg-warn -> PASS   (fi-ilk-650)
> Subgroup bad-source:
> dmesg-warn -> PASS   (fi-ilk-650)
> Subgroup nonblocking-crc-pipe-a-frame-sequence:
> dmesg-warn -> PASS   (fi-ilk-650)
> Subgroup suspend-read-crc-pipe-b:
> pass   -> DMESG-WARN (fi-ilk-650)

https://bugs.freedesktop.org/show_bug.cgi?id=98531

> Subgroup suspend-read-crc-pipe-c:
> pass   -> DMESG-WARN (fi-skl-6770hq)
>

https://bugs.freedesktop.org/show_bug.cgi?id=97929

Patch merged day ago, just forgot to press send,
Thanks for review.

-Mika

> fi-bdw-5557u total:241  pass:226  dwarn:0   dfail:0   fail:0   skip:15 
> fi-bsw-n3050 total:241  pass:201  dwarn:0   dfail:0   fail:0   skip:40 
> fi-bxt-t5700 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
> fi-byt-j1900 total:241  pass:213  dwarn:0   dfail:0   fail:0   skip:28 
> fi-byt-n2820 total:241  pass:209  dwarn:0   dfail:0   fail:0   skip:32 
> fi-hsw-4770  total:241  pass:221  dwarn:0   dfail:0   fail:0   skip:20 
> fi-hsw-4770r total:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
> fi-ilk-650   total:241  pass:183  dwarn:4   dfail:0   fail:0   skip:54 
> fi-ivb-3520m total:241  pass:218  dwarn:0   dfail:0   fail:0   skip:23 
> fi-ivb-3770  total:241  pass:218  dwarn:0   dfail:0   fail:0   skip:23 
> fi-kbl-7200u total:241  pass:219  dwarn:0   dfail:0   fail:0   skip:22 
> fi-skl-6260u total:241  pass:227  dwarn:0   dfail:0   fail:0   skip:14 
> fi-skl-6700hqtotal:241  pass:220  dwarn:0   dfail:0   fail:0   skip:21 
> fi-skl-6700k total:241  pass:219  dwarn:1   dfail:0   fail:0   skip:21 
> fi-skl-6770hqtotal:241  pass:226  dwarn:1   dfail:0   fail:0   skip:14 
> fi-snb-2520m total:241  pass:208  dwarn:0   dfail:0   fail:0   skip:33 
> fi-snb-2600  total:241  pass:207  dwarn:0   dfail:0   fail:0   skip:34 
>
> c5ad9c11e819eebcad5b9be5aa5e991e89b26965 drm-intel-nightly: 
> 2016y-11m-01d-16h-36m-25s UTC integration manifest
> 3a0612c drm/i915: Move hangcheck code out from i915_irq.c
>
> == Logs ==
>
> For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2880/
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 06/15] drm/i915: Populate ctx ID for periodic OA reports

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 03:00:35PM +0530, sourab.gu...@intel.com wrote:
> +static u32 gen8_oa_buffer_get_ctx_id(struct i915_perf_stream *stream,
> + const u8 *report)
> +{
> + struct drm_i915_private *dev_priv = stream->dev_priv;
> +
> + /* The ctx ID present in the OA reports have intel_context::global_id
> +  * present, since this is programmed into the ELSP in execlist mode.
> +  * In non-execlist mode, fall back to retrieving the ctx ID from the
> +  * last saved ctx ID from command stream mode.
> +  */
> + if (i915.enable_execlists) {
> + u32 ctx_id = *(u32 *)(report + 12);
> + ctx_id &= 0xf;

This does not match the hw maximum. Please check to see who is correct.

> + return ctx_id;
> + } else {
> + if (!stream->cs_mode)
> + WARN_ONCE(1,
> + "CTX ID can't be retrieved if command stream mode not 
> enabled");

All these WARNs appear to be user triggerable.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 03:00:43PM +0530, sourab.gu...@intel.com wrote:
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 06c7b55..0dc2384 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1088,6 +1088,8 @@ static int i915_driver_init_hw(struct drm_i915_private 
> *dev_priv)
>   DRM_DEBUG_DRIVER("can't enable MSI");
>   }
>  
> + i915_perf_init_late(dev_priv);
> +
>   return 0;

Just a quick one:

Create i915_driver_init_late() to capture the new init phase you want to add.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fix pages pin counting around swizzle quirk

2016-11-04 Thread Chris Wilson
On Fri, Nov 04, 2016 at 10:50:44AM +0200, Joonas Lahtinen wrote:
> On ke, 2016-11-02 at 09:43 +, Chris Wilson wrote:
> > @@ -2458,17 +2459,16 @@ int __i915_gem_object_get_pages(struct 
> > drm_i915_gem_object *obj)
> >     if (err)
> >     return err;
> >  
> > -   if (likely(obj->mm.pages)) {
> > -   __i915_gem_object_pin_pages(obj);
> > -   goto unlock;
> > -   }
> > -
> > -   GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
> > +   if (unlikely(!obj->mm.pages)) {
> > +   GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
> > +   err = i915_gem_object_get_pages(obj);
> > +   if (err)
> > +   goto unlock;
> >  
> > -   err = i915_gem_object_get_pages(obj);
> > -   if (!err)
> > -   atomic_set_release(>mm.pages_pin_count, 1);
> > +   smp_mb__before_atomic();
> 
> This is not cool without atomic in sight. Inline wrap as
> __i915_gem_object_pages_mb() or something.

My first thought was to put in i915_gem_object_get_pages() since it
closes the action of setting up the obj->mm.pages and co. I didn't like
that because the association then with the use of the pages_pin_count as
the mutex was not as apparent. Now that you cannot see the atomic_inc()
at all here, you are left confused!

Would you rather this just used the raw atomic_inc() here?

> 
> > @@ -3707,6 +3707,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
> >  {
> >     int ret = 0;
> >  
> > +   GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
> 
> Rather confusing, simple mind would think as
> __i915_gem_object_pin_pages has GEM_BUG_ON(!obj->mm.pages),
> the next branch would never be taken?

GEM_BUG_ON(vma == obj) ? Sorry not parsing very well this morning.

GEM_BUG_ON(!obj->mm.pages) would be a weaker form of the above. The
challenge is to express that the vma->page is only valid for the current
lifespan of the obj->mm.pages, should we regenerate that sg_table, we
need to regenerate the vma->pages. So I want to say that we must be
holding a pages_pin_count to utilize the vma->pages.

> >     if (vma->pages)
> >     return 0;

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 10/15] drm/i915: Extract raw GPU timestamps from OA reports to forward in perf samples

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

The OA reports contain the least significant 32 bits of the gpu timestamp.
This patch enables retrieval of the timestamp field from OA reports, to
forward as 64 bit raw gpu timestamps in the perf samples.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/i915_perf.c | 47 ++--
 drivers/gpu/drm/i915/i915_reg.h  |  4 
 3 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a05335a..119c82b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2241,6 +2241,7 @@ struct drm_i915_private {
u32 ctx_flexeu0_off;
u32 n_pending_periodic_samples;
u32 pending_periodic_ts;
+   u64 last_gpu_ts;
 
struct i915_oa_ops ops;
const struct i915_oa_format *oa_formats;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 516fd54..b05c41a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -903,6 +903,24 @@ static int append_sample(struct i915_perf_stream *stream,
return 0;
 }
 
+static u64 get_gpu_ts_from_oa_report(struct drm_i915_private *dev_priv,
+   const u8 *report)
+{
+   u32 sample_ts = *(u32 *)(report + 4);
+   u32 delta;
+
+   /*
+* NB: We have to assume we're updating last_gpu_ts frequently
+* enough that it's never possible to see multiple overflows before
+* we compare sample_ts to last_gpu_ts. Since this is significantly
+* large duration (~6min for 80ns ts base), we can safely assume so.
+*/
+   delta = sample_ts - (u32)dev_priv->perf.oa.last_gpu_ts;
+   dev_priv->perf.oa.last_gpu_ts += delta;
+
+   return dev_priv->perf.oa.last_gpu_ts;
+}
+
 static int append_oa_buffer_sample(struct i915_perf_stream *stream,
char __user *buf, size_t count,
size_t *offset, const u8 *report)
@@ -940,10 +958,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream 
*stream,
if (sample_flags & SAMPLE_TAG)
data.tag = dev_priv->perf.last_tag;
 
-   /* Derive timestamp from OA report, after scaling with the ts base */
-#warning "FIXME: append_oa_buffer_sample: derive the timestamp from OA report"
+   /* Derive timestamp from OA report */
if (sample_flags & SAMPLE_TS)
-   data.ts = 0;
+   data.ts = get_gpu_ts_from_oa_report(dev_priv, report);
 
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1443,6 +1460,7 @@ static int append_one_cs_sample(struct i915_perf_stream 
*stream,
enum intel_engine_id id = stream->engine;
struct sample_data data = { 0 };
u32 sample_flags = stream->sample_flags;
+   u64 gpu_ts = 0;
int ret = 0;
 
if (sample_flags & SAMPLE_OA_REPORT) {
@@ -1459,6 +1477,9 @@ static int append_one_cs_sample(struct i915_perf_stream 
*stream,
sample_ts, U32_MAX);
if (ret)
return ret;
+
+   if (sample_flags & SAMPLE_TS)
+   gpu_ts = get_gpu_ts_from_oa_report(dev_priv, report);
}
 
if (sample_flags & SAMPLE_OA_SOURCE_INFO)
@@ -1480,20 +1501,16 @@ static int append_one_cs_sample(struct i915_perf_stream 
*stream,
}
 
if (sample_flags & SAMPLE_TS) {
-   /* For RCS, if OA samples are also being collected, derive the
-* timestamp from OA report, after scaling with the TS base.
+   /* If OA sampling is enabled, derive the ts from OA report.
 * Else, forward the timestamp collected via command stream.
 */
-#warning "FIXME: append_one_cs_sample: derive the timestamp from OA report"
-   if (sample_flags & SAMPLE_OA_REPORT)
-   data.ts = 0;
-   else
-   data.ts = *(u64 *)
+   if (!(sample_flags & SAMPLE_OA_REPORT))
+   gpu_ts = *(u64 *)
(dev_priv->perf.command_stream_buf[id].addr +
node->ts_offset);
+   data.ts = gpu_ts;
}
 
-
return append_sample(stream, buf, count, offset, );
 }
 
@@ -2279,9 +2296,15 @@ static void i915_ring_stream_enable(struct 
i915_perf_stream *stream)
 {
struct drm_i915_private *dev_priv = stream->dev_priv;
 
-   if (stream->sample_flags & SAMPLE_OA_REPORT)
+   if (stream->sample_flags & SAMPLE_OA_REPORT) {
dev_priv->perf.oa.ops.oa_enable(dev_priv);
 
+   if 

[Intel-gfx] [PATCH 11/15] drm/i915: Support opening multiple concurrent perf streams

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

This patch adds support for opening multiple concurrent perf streams for
different gpu engines, while having the restriction to open only a single
stream open for a particular gpu engine.
This enables userspace client to open multiple streams, one per engine,
at any time to capture sample data for multiple gpu engines.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-
 drivers/gpu/drm/i915/i915_perf.c | 69 ++--
 2 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 119c82b..e912679 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2204,7 +2204,7 @@ struct drm_i915_private {
 
 
struct hrtimer poll_check_timer;
-   struct i915_perf_stream *exclusive_stream;
+   struct i915_perf_stream *ring_stream[I915_NUM_ENGINES];
wait_queue_head_t poll_wq[I915_NUM_ENGINES];
atomic_t pollin[I915_NUM_ENGINES];
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b05c41a..8eb80e8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1086,7 +1086,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
 * an invalid ID. It could be good to annotate these
 * reports with a _CTX_SWITCH_AWAY reason later.
 */
-   if (!dev_priv->perf.exclusive_stream->ctx ||
+   if (!stream->ctx ||
dev_priv->perf.oa.specific_ctx_id == ctx_id ||
dev_priv->perf.oa.oa_buffer.last_ctx_id == ctx_id) {
 
@@ -1097,7 +1097,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
 * the switch-away reports with an invalid
 * context id to be recognisable by userspace.
 */
-   if (dev_priv->perf.exclusive_stream->ctx &&
+   if (stream->ctx &&
dev_priv->perf.oa.specific_ctx_id != ctx_id)
report32[2] = 0x;
 
@@ -1763,7 +1763,7 @@ static void i915_ring_stream_destroy(struct 
i915_perf_stream *stream)
 {
struct drm_i915_private *dev_priv = stream->dev_priv;
 
-   BUG_ON(stream != dev_priv->perf.exclusive_stream);
+   BUG_ON(stream != dev_priv->perf.ring_stream[stream->engine]);
 
if (stream->using_oa) {
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
@@ -1777,7 +1777,7 @@ static void i915_ring_stream_destroy(struct 
i915_perf_stream *stream)
if (stream->cs_mode)
free_command_stream_buf(dev_priv, stream->engine);
 
-   dev_priv->perf.exclusive_stream = NULL;
+   dev_priv->perf.ring_stream[stream->engine] = NULL;
 }
 
 static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
@@ -2220,14 +2220,14 @@ static void gen7_update_oacontrol_locked(struct 
drm_i915_private *dev_priv)
 {
assert_spin_locked(_priv->perf.hook_lock);
 
-   if (dev_priv->perf.exclusive_stream->state !=
+   if (dev_priv->perf.ring_stream[RCS]->state !=
I915_PERF_STREAM_DISABLED) {
unsigned long ctx_id = 0;
 
-   if (dev_priv->perf.exclusive_stream->ctx)
+   if (dev_priv->perf.ring_stream[RCS]->ctx)
ctx_id = dev_priv->perf.oa.specific_ctx_id;
 
-   if (dev_priv->perf.exclusive_stream->ctx == NULL || ctx_id) {
+   if (dev_priv->perf.ring_stream[RCS]->ctx == NULL || ctx_id) {
bool periodic = dev_priv->perf.oa.periodic;
u32 period_exponent = dev_priv->perf.oa.period_exponent;
u32 report_format = dev_priv->perf.oa.oa_buffer.format;
@@ -2366,15 +2366,6 @@ static int i915_ring_stream_init(struct i915_perf_stream 
*stream,
SAMPLE_TS);
int ret;
 
-   /* To avoid the complexity of having to accurately filter
-* counter reports and marshal to the appropriate client
-* we currently only allow exclusive access
-*/
-   if (dev_priv->perf.exclusive_stream) {
-   DRM_ERROR("Stream already in use\n");
-   return -EBUSY;
-   }
-
if ((props->sample_flags & SAMPLE_CTX_ID) && !props->cs_mode) {
if (IS_HASWELL(dev_priv)) {
DRM_ERROR(
@@ -2392,6 +2383,12 @@ static int i915_ring_stream_init(struct i915_perf_stream 
*stream,
if (require_oa_unit) {
int format_size;
 
+   /* Only allow exclusive access per stream */
+   if (dev_priv->perf.ring_stream[RCS]) {
+   DRM_ERROR("Stream:0 

[Intel-gfx] [PATCH 12/15] time: Expose current clocksource in use by timekeeping framework

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

For the drivers to be able to use the cross timestamp framework,
they need the information of current clocksource being used by the
kernel timekeeping. This is needed since the callback given by driver
into the get_device_system_crosststamp(), in order to synchronously read
the device time and system counter value, requires the knowledge of
the clocksource being used to read system counter value (as a part of
struct system_counterval_t).

Signed-off-by: Sourab Gupta 
---
 include/linux/timekeeping.h |  5 +
 kernel/time/timekeeping.c   | 12 
 2 files changed, 17 insertions(+)

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 816b754..101aaa3 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -336,6 +336,11 @@ extern int get_device_system_crosststamp(
struct system_device_crosststamp *xtstamp);
 
 /*
+ * Get current clocksource used by system timekeeping framework
+ */
+struct clocksource *get_current_clocksource(void);
+
+/*
  * Simultaneously snapshot realtime and monotonic raw clocks
  */
 extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e07fb09..bb1e9c0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1138,6 +1138,18 @@ int get_device_system_crosststamp(int (*get_time_fn)
 EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
 
 /**
+ * get_current_clocksource - Returns the current clocksource in used by tk_core
+ *
+ */
+struct clocksource *get_current_clocksource(void)
+{
+   struct timekeeper *tk = _core.timekeeper;
+
+   return tk->tkr_mono.clock;
+}
+EXPORT_SYMBOL_GPL(get_current_clocksource);
+
+/**
  * do_gettimeofday - Returns the time of day in a timeval
  * @tv:pointer to the timeval to be set
  *
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

Currently, we have the ability to only forward the GPU timestamps in the
samples (which are generated via OA reports or PIPE_CONTROL commands
inserted in the ring). This limits the ability to correlate these samples
with the system events. If we scale the GPU timestamps according the
timestamp base/frequency info present in bspec, it is observed that the
timestamps drift really quickly from the system time.

An ability is therefore needed to report timestamps in different clock
domains, such as CLOCK_MONOTONIC (or _MONO_RAW), in the perf samples to
be of more practical use to the userspace. This ability becomes important
when we want to correlate/plot GPU events/samples with other system events
on the same timeline (e.g. vblank events, or timestamps when work was
submitted to kernel, etc.)

The patch here proposes a mechanism to achieve this. The correlation between
gpu time and system time is established using the cross timestamp framework.
For this purpose, the timestamp clock associated with the command stream, is
abstracted as timecounter/cyclecounter, before utilizing cross timestamp
framework to retrieve gpu/system time correlated values.
Different such gpu/system time values are then used to detect and correct
the error in published gpu timestamp clock frequency. The userspace can
request CLOCK_MONOTONIC_RAW timestamps in samples by requesting the
corresponding property while opening the stream.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.c  |   2 +
 drivers/gpu/drm/i915/i915_drv.h  |  24 +++-
 drivers/gpu/drm/i915/i915_perf.c | 273 +++
 include/uapi/drm/i915_drm.h  |   9 +-
 4 files changed, 284 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 06c7b55..0dc2384 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1088,6 +1088,8 @@ static int i915_driver_init_hw(struct drm_i915_private 
*dev_priv)
DRM_DEBUG_DRIVER("can't enable MSI");
}
 
+   i915_perf_init_late(dev_priv);
+
return 0;
 
 out_ggtt:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e912679..557a124 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -42,6 +42,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include 
 #include 
@@ -1843,6 +1846,9 @@ struct i915_perf_stream {
/* Whether the OA unit is in use */
bool using_oa;
 
+   /* monotonic_raw clk timestamp (in ns) for last sample */
+   u64 last_sample_ts;
+
const struct i915_perf_stream_ops *ops;
 };
 
@@ -1889,6 +1895,20 @@ struct i915_perf_cs_data_node {
u32 tag;
 };
 
+/**
+ * struct i915_clock_info - decribes i915 timestamp clock
+ *
+ */
+struct i915_clock_info {
+   struct cyclecounter cc;
+   struct timecounter tc;
+   struct system_device_crosststamp xtstamp;
+   ktime_t clk_offset; /* Offset (in ns) between monoraw clk and gpu time 
*/
+   u32 timestamp_frequency;
+   u32 resync_period; /* in msecs */
+   struct delayed_work clk_sync_work;
+};
+
 struct drm_i915_private {
struct drm_device drm;
 
@@ -2189,6 +2209,8 @@ struct drm_i915_private {
 
struct i915_runtime_pm pm;
 
+   struct i915_clock_info ts_clk_info;
+
struct {
bool initialized;
 
@@ -2213,7 +2235,6 @@ struct drm_i915_private {
 
bool periodic;
int period_exponent;
-   int timestamp_frequency;
 
int tail_margin;
 
@@ -3796,6 +3817,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs 
*engine,
 
 /* i915_perf.c */
 extern void i915_perf_init(struct drm_i915_private *dev_priv);
+extern void i915_perf_init_late(struct drm_i915_private *dev_priv);
 extern void i915_perf_fini(struct drm_i915_private *dev_priv);
 extern void i915_perf_register(struct drm_i915_private *dev_priv);
 extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 8eb80e8..b11e953 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -189,6 +189,7 @@
 
 #include 
 #include 
+#include 
 
 #include "i915_drv.h"
 #include "intel_ringbuffer.h"
@@ -228,6 +229,9 @@
 #define POLL_FREQUENCY 200
 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
 
+#define MAX_CLK_SYNC_PERIOD (60*MSEC_PER_SEC)
+#define INIT_CLK_SYNC_PERIOD (20) /* in msecs */
+
 static u32 i915_perf_stream_paranoid = true;
 
 /* The maximum exponent the hardware accepts is 63 (essentially it selects one
@@ -254,13 +258,24 @@ static u32 i915_perf_stream_paranoid = true;
 #define TS_ADDR_ALIGN 8
 #define I915_PERF_TS_SAMPLE_SIZE 8
 
+/* Published frequency of GT command stream timestamp 

[Intel-gfx] [PATCH 00/15] Framework to collect command stream gpu metrics using i915 perf

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

Refloating the series rebased on Robert's latest patchset. Since Robert's
patches are being reviewed and this patch series extends his framework to
enable multiple concurrent streams to capture command stream based metrics,
it would be good to keep this work in perspective.
Looking to receive feedback on the series (and possibly r-b's :))

This series adds framework for collection of gpu performance metrics
associated with the command stream of a particular engine. These metrics
include OA reports, timestamps, mmio metrics, etc. These metrics are
are collected around batchbuffer boundaries.

This work utilizes the underlying infrastructure introduced in Robert Bragg's
patches for collecting periodic OA counter snapshots (based on Haswell):
https://patchwork.freedesktop.org/series/14505/

This patch set is based on Gen8+ version of Robert's patches which can be found
here: https://github.com/rib/linux/tree/wip/rib/oa-next

In the last series floated earlier
(https://patchwork.freedesktop.org/series/6154/), based on Chris's suggestion,
I had tried experimenting with using the cross timestamp framework for the
purpose of retrieving tightly coupled device/system timestamps. In our case,
this framework enables us to have correlated pairs of gpu+system time which
can be used over a period of time to correct the frequency of timestamp clock,
and thus enable to accurately send system time (_MONO_RAW) as requested to the
userspace. The results are generally observed to quite better with the use of
cross timestamps and the frequency delta gradually tapers down to 0 with
increasing correction periods.
The use of cross timestamp framework though requires us to have
clockcounter/timecounter abstraction for the timestamp clocksource, and
further requires few changes in the kernel timekeeping/clocksource code. I am
looking for feedback on the use of this framework and the changes involved.

These patches can be found for viewing at
https://github.com/sourabgu/linux/tree/oa-19oct

Sourab Gupta (15):
  drm/i915: Add ctx getparam ioctl parameter to retrieve ctx unique id
  drm/i915: Expose OA sample source to userspace
  drm/i915: Framework for capturing command stream based OA reports
  drm/i915: flush periodic samples, in case of no pending CS sample
requests
  drm/i915: Handle the overflow condition for command stream buf
  drm/i915: Populate ctx ID for periodic OA reports
  drm/i915: Add support for having pid output with OA report
  drm/i915: Add support for emitting execbuffer tags through OA counter
reports
  drm/i915: Extend i915 perf framework for collecting timestamps on all
gpu engines
  drm/i915: Extract raw GPU timestamps from OA reports to forward in
perf samples
  drm/i915: Support opening multiple concurrent perf streams
  time: Expose current clocksource in use by timekeeping framework
  time: export clocks_calc_mult_shift
  drm/i915: Mechanism to forward clock monotonic raw time in perf
samples
  drm/i915: Support for capturing MMIO register values

 drivers/gpu/drm/i915/i915_drv.c|2 +
 drivers/gpu/drm/i915/i915_drv.h|  112 +-
 drivers/gpu/drm/i915/i915_gem_context.c|3 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |6 +
 drivers/gpu/drm/i915/i915_perf.c   | 1911 
 drivers/gpu/drm/i915/i915_reg.h|6 +
 include/linux/timekeeping.h|5 +
 include/uapi/drm/i915_drm.h|   79 ++
 kernel/time/clocksource.c  |1 +
 kernel/time/timekeeping.c  |   12 +
 10 files changed, 1910 insertions(+), 227 deletions(-)

-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 07/15] drm/i915: Add support for having pid output with OA report

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

This patch introduces flags and adds support for having pid output with
the OA reports generated through the RCS commands.

When the stream is opened with pid sample type, the pid information is also
captured through the command stream samples and forwarded along with the
OA reports.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_perf.c | 48 +++-
 include/uapi/drm/i915_drm.h  |  7 ++
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 853cc7db..f250e7b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1872,6 +1872,7 @@ struct i915_perf_cs_data_node {
struct drm_i915_gem_request *request;
u32 offset;
u32 ctx_id;
+   u32 pid;
 };
 
 struct drm_i915_private {
@@ -2242,6 +2243,7 @@ struct drm_i915_private {
} command_stream_buf;
 
u32 last_ctx_id;
+   u32 last_pid;
struct list_head node_list;
spinlock_t node_list_lock;
} perf;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 84457f8..894d7a6 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -254,6 +254,7 @@ static u32 i915_perf_stream_paranoid = true;
 struct oa_sample_data {
u32 source;
u32 ctx_id;
+   u32 pid;
const u8 *report;
 };
 
@@ -309,6 +310,7 @@ static const enum intel_engine_id 
user_ring_map[I915_USER_RINGS + 1] = {
 #define SAMPLE_OA_REPORT   (1<<0)
 #define SAMPLE_OA_SOURCE_INFO  (1<<1)
 #define SAMPLE_CTX_ID  (1<<2)
+#define SAMPLE_PID (1<<3)
 
 struct perf_open_properties {
u32 sample_flags;
@@ -484,6 +486,7 @@ static void i915_perf_command_stream_hook_oa(struct 
drm_i915_gem_request *req)
goto out;
 
entry->ctx_id = ctx->hw_id;
+   entry->pid = current->pid;
i915_gem_request_assign(>request, req);
 
addr = dev_priv->perf.command_stream_buf.vma->node.start +
@@ -735,6 +738,12 @@ static int append_oa_sample(struct i915_perf_stream 
*stream,
buf += 4;
}
 
+   if (sample_flags & SAMPLE_PID) {
+   if (copy_to_user(buf, >pid, 4))
+   return -EFAULT;
+   buf += 4;
+   }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, data->report, report_size))
return -EFAULT;
@@ -777,6 +786,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream 
*stream,
data.ctx_id = dev_priv->perf.oa.ops.oa_buffer_get_ctx_id(
stream, report);
 
+   if (sample_flags & SAMPLE_PID)
+   data.pid = dev_priv->perf.last_pid;
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
 
@@ -1293,6 +1305,11 @@ static int append_oa_rcs_sample(struct i915_perf_stream 
*stream,
dev_priv->perf.last_ctx_id = node->ctx_id;
}
 
+   if (sample_flags & SAMPLE_PID) {
+   data.pid = node->pid;
+   dev_priv->perf.last_pid = node->pid;
+   }
+
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
 
@@ -2127,6 +2144,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
struct drm_i915_private *dev_priv = stream->dev_priv;
bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
  SAMPLE_OA_SOURCE_INFO);
+   bool require_cs_mode = props->sample_flags & SAMPLE_PID;
bool cs_sample_data = props->sample_flags & SAMPLE_OA_REPORT;
int ret;
 
@@ -2268,6 +2286,20 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
if (props->sample_flags & SAMPLE_CTX_ID) {
stream->sample_flags |= SAMPLE_CTX_ID;
stream->sample_size += 4;
+
+   /*
+* NB: it's meaningful to request SAMPLE_CTX_ID with just CS
+* mode or periodic OA mode sampling but we don't allow
+* SAMPLE_CTX_ID without either mode
+*/
+   if (!require_oa_unit)
+   require_cs_mode = true;
+   }
+
+   if (require_cs_mode && !props->cs_mode) {
+   DRM_ERROR("PID sampling requires a ring to be specified");
+   ret = -EINVAL;
+   goto cs_error;
}
 
if (props->cs_mode) {
@@ -2278,7 +2310,13 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
goto cs_error;
}
 
-   if (!(props->sample_flags & SAMPLE_CTX_ID)) {
+   /*
+* The only time we should 

[Intel-gfx] [PATCH 15/15] drm/i915: Support for capturing MMIO register values

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

This patch adds support for capturing MMIO register values through
i915 perf interface.
The userspace can request upto 8 MMIO register values to be dumped.
The addresses of these registers can be passed through the corresponding
property 'value' field while opening the stream.
The commands to dump the values of these MMIO registers are then
inserted into the ring alongwith other commands.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h  |   4 +
 drivers/gpu/drm/i915/i915_perf.c | 153 ++-
 include/uapi/drm/i915_drm.h  |  14 
 3 files changed, 168 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 557a124..14cd9cf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1885,6 +1885,7 @@ struct i915_perf_cs_data_node {
u32 start_offset;
u32 oa_offset;
u32 ts_offset;
+   u32 mmio_offset;
 
/* buffer size corresponding to this entry */
u32 size;
@@ -2230,6 +2231,9 @@ struct drm_i915_private {
wait_queue_head_t poll_wq[I915_NUM_ENGINES];
atomic_t pollin[I915_NUM_ENGINES];
 
+   u32 num_mmio;
+   u32 mmio_list[I915_PERF_MMIO_NUM_MAX];
+
struct {
u32 specific_ctx_id;
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index b11e953..ed6b31f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -277,6 +277,7 @@ struct sample_data {
u64 gpu_ts;
u64 clk_monoraw;
const u8 *report;
+   const u8 *mmio;
 };
 
 /* for sysctl proc_dointvec_minmax of i915_oa_min_timer_exponent */
@@ -335,6 +336,7 @@ static const enum intel_engine_id 
user_ring_map[I915_USER_RINGS + 1] = {
 #define SAMPLE_TAG (1<<4)
 #define SAMPLE_TS  (1<<5)
 #define SAMPLE_CLK_MONO_RAW(1<<6)
+#define SAMPLE_MMIO(1<<7)
 
 struct perf_open_properties {
u32 sample_flags;
@@ -567,6 +569,9 @@ static int insert_perf_entry(struct drm_i915_private 
*dev_priv,
sample_ts = true;
}
 
+   if (sample_flags & SAMPLE_MMIO)
+   entry_size += 4*dev_priv->perf.num_mmio;
+
spin_lock(_priv->perf.node_list_lock[id]);
if (list_empty(_priv->perf.node_list[id])) {
offset = 0;
@@ -644,6 +649,10 @@ out:
entry->ts_offset = ALIGN(entry->ts_offset, TS_ADDR_ALIGN);
offset = entry->ts_offset + I915_PERF_TS_SAMPLE_SIZE;
}
+   if (sample_flags & SAMPLE_MMIO) {
+   entry->mmio_offset = offset;
+   offset = entry->mmio_offset + 4*dev_priv->perf.num_mmio;
+   }
 
list_add_tail(>link, _priv->perf.node_list[id]);
 #ifndef CMD_STREAM_BUF_OVERFLOW_ALLOWED
@@ -744,6 +753,47 @@ static int i915_ring_stream_capture_ts(struct 
drm_i915_gem_request *req,
return 0;
 }
 
+static int i915_ring_stream_capture_mmio(struct drm_i915_gem_request *req,
+   u32 offset)
+{
+   struct drm_i915_private *dev_priv = req->i915;
+   enum intel_engine_id id = req->engine->id;
+   struct intel_ring *ring = req->ring;
+   int num_mmio = dev_priv->perf.num_mmio;
+   u32 mmio_addr, addr = 0;
+   int ret, i;
+
+   ret = intel_ring_begin(req, 4*num_mmio);
+   if (ret)
+   return ret;
+
+   mmio_addr =
+   dev_priv->perf.command_stream_buf[id].vma->node.start + offset;
+
+   for (i = 0; i < num_mmio; i++) {
+   uint32_t cmd;
+
+   addr = mmio_addr + 4*i;
+
+   if (INTEL_INFO(dev_priv)->gen >= 8)
+   cmd = MI_STORE_REGISTER_MEM_GEN8 |
+   MI_SRM_LRM_GLOBAL_GTT;
+   else
+   cmd = MI_STORE_REGISTER_MEM |
+   MI_SRM_LRM_GLOBAL_GTT;
+
+   intel_ring_emit(ring, cmd);
+   intel_ring_emit(ring, dev_priv->perf.mmio_list[i]);
+   intel_ring_emit(ring, addr);
+   if (INTEL_INFO(dev_priv)->gen >= 8)
+   intel_ring_emit(ring, 0);
+   else
+   intel_ring_emit(ring, MI_NOOP);
+   }
+   intel_ring_advance(ring);
+   return 0;
+}
+
 static void i915_ring_stream_cs_hook(struct i915_perf_stream *stream,
struct drm_i915_gem_request *req, u32 tag)
 {
@@ -784,6 +834,12 @@ static void i915_ring_stream_cs_hook(struct 
i915_perf_stream *stream,
goto err_unref;
}
 
+   if (sample_flags & SAMPLE_MMIO) {
+   ret = i915_ring_stream_capture_mmio(req,
+   entry->mmio_offset);
+   if (ret)
+   goto 

[Intel-gfx] [PATCH 05/15] drm/i915: Handle the overflow condition for command stream buf

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

Add a compile time option for detecting the overflow condition of command
stream buffer, and not overwriting the old entries in such a case.
Also, set a status flag to forward the overflow condition to userspace if
overflow is detected.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/i915_perf.c | 75 
 2 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dedb7f8..e9cf939 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2235,6 +2235,8 @@ struct drm_i915_private {
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
u8 *addr;
+#define I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW (1<<0)
+   u32 status;
} command_stream_buf;
 
struct list_head node_list;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2ee4711..e10e78f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -247,6 +247,9 @@ static u32 i915_perf_stream_paranoid = true;
 #define GEN8_OAREPORT_REASON_GO_TRANSITION  (1<<23)
 #define GEN9_OAREPORT_REASON_CLK_RATIO  (1<<24)
 
+/* For determining the behavior on overflow of command stream samples */
+#define CMD_STREAM_BUF_OVERFLOW_ALLOWED
+
 /* Data common to periodic and RCS based samples */
 struct oa_sample_data {
u32 source;
@@ -348,6 +351,7 @@ void i915_perf_command_stream_hook(struct 
drm_i915_gem_request *request)
mutex_unlock(_priv->perf.streams_lock);
 }
 
+#ifdef CMD_STREAM_BUF_OVERFLOW_ALLOWED
 /*
  * Release some perf entries to make space for a new entry data. We dereference
  * the associated request before deleting the entry. Also, no need to check for
@@ -374,25 +378,26 @@ static void release_some_perf_entries(struct 
drm_i915_private *dev_priv,
break;
}
 }
+#endif
 
 /*
- * Insert the perf entry to the end of the list. This function never fails,
- * since it always manages to insert the entry. If the space is exhausted in
- * the buffer, it will remove the oldest entries in order to make space.
+ * Insert the perf entry to the end of the list. If the overwrite of old 
entries
+ * is allowed, the function always manages to insert the entry and returns 0.
+ * If overwrite is not allowed, on detection of overflow condition, an
+ * appropriate status flag is set, and function returns -ENOSPC.
  */
-static void insert_perf_entry(struct drm_i915_private *dev_priv,
+static int insert_perf_entry(struct drm_i915_private *dev_priv,
struct i915_perf_cs_data_node *entry)
 {
struct i915_perf_cs_data_node *first_entry, *last_entry;
int max_offset = dev_priv->perf.command_stream_buf.obj->base.size;
u32 entry_size = dev_priv->perf.oa.oa_buffer.format_size;
+   int ret = 0;
 
spin_lock(_priv->perf.node_list_lock);
if (list_empty(_priv->perf.node_list)) {
entry->offset = 0;
-   list_add_tail(>link, _priv->perf.node_list);
-   spin_unlock(_priv->perf.node_list_lock);
-   return;
+   goto out;
}
 
first_entry = list_first_entry(_priv->perf.node_list,
@@ -410,29 +415,49 @@ static void insert_perf_entry(struct drm_i915_private 
*dev_priv,
 */
else if (entry_size < first_entry->offset)
entry->offset = 0;
-   /* Insufficient space. Overwrite existing old entries */
+   /* Insufficient space */
else {
+#ifdef CMD_STREAM_BUF_OVERFLOW_ALLOWED
u32 target_size = entry_size - first_entry->offset;
 
release_some_perf_entries(dev_priv, target_size);
entry->offset = 0;
+#else
+   dev_priv->perf.command_stream_buf.status |=
+   I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW;
+   ret = -ENOSPC;
+   goto out_unlock;
+#endif
}
} else {
/* Sufficient space available? */
if (last_entry->offset + 2*entry_size < first_entry->offset)
entry->offset = last_entry->offset + entry_size;
-   /* Insufficient space. Overwrite existing old entries */
+   /* Insufficient space */
else {
+#ifdef CMD_STREAM_BUF_OVERFLOW_ALLOWED
u32 target_size = entry_size -
(first_entry->offset - last_entry->offset -
entry_size);
 
release_some_perf_entries(dev_priv, target_size);

[Intel-gfx] [PATCH 13/15] time: export clocks_calc_mult_shift

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

Exporting clocks_calc_mult_shift is helpful for drivers to calculate
the mult/shift values for their clocks, given their frequency.
This is particularly useful when such drivers may want to associate
timecounter/cyclecounter abstraction for their clock sources, in order
to use the cross timestamp infrastructure for syncing device time with
system time.

Signed-off-by: Sourab Gupta 
---
 kernel/time/clocksource.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 6a5a310..e2de743 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -89,6 +89,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 
to, u32 maxsec)
*mult = tmp;
*shift = sft;
 }
+EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
 
 /*[Clocksource internal variables]-
  * curr_clocksource:
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 02/15] drm/i915: Expose OA sample source to userspace

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

This patch exposes a new sample source field to userspace. This field can
be populated to specify the origin of the OA report.
For e.g. for internally triggerred reports (non MI_RPC reports), the RPT_ID
field has bitfields for specifying the origin such as timer, or render ctx
switch, etc.
Likewise this field can be used to specify the source as MI_RPC when such
support is added.

Signed-off-by: Sourab Gupta 
Signed-off-by: Robert Bragg 
---
 drivers/gpu/drm/i915/i915_perf.c | 55 ++--
 include/uapi/drm/i915_drm.h  | 16 
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index d030cd7..58a1118 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -240,6 +240,13 @@ static u32 i915_perf_stream_paranoid = true;
  */
 #define OA_EXPONENT_MAX 31
 
+#define GEN8_OAREPORT_REASON_TIMER  (1<<19)
+#define GEN8_OAREPORT_REASON_TRIGGER1   (1<<20)
+#define GEN8_OAREPORT_REASON_TRIGGER2   (1<<21)
+#define GEN8_OAREPORT_REASON_CTX_SWITCH (1<<22)
+#define GEN8_OAREPORT_REASON_GO_TRANSITION  (1<<23)
+#define GEN9_OAREPORT_REASON_CLK_RATIO  (1<<24)
+
 /* for sysctl proc_dointvec_minmax of i915_oa_min_timer_exponent */
 static int zero;
 static int oa_exponent_max = OA_EXPONENT_MAX;
@@ -279,7 +286,8 @@ static struct i915_oa_format 
gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_C4_B8]  = { 7, 64 },
 };
 
-#define SAMPLE_OA_REPORT  (1<<0)
+#define SAMPLE_OA_REPORT   (1<<0)
+#define SAMPLE_OA_SOURCE_INFO  (1<<1)
 
 struct perf_open_properties {
u32 sample_flags;
@@ -385,6 +393,27 @@ static int append_oa_sample(struct i915_perf_stream 
*stream,
return -EFAULT;
buf += sizeof(header);
 
+   if (sample_flags & SAMPLE_OA_SOURCE_INFO) {
+   enum drm_i915_perf_oa_event_source source;
+
+   if (INTEL_INFO(dev_priv)->gen >= 8) {
+   u32 reason = *(u32 *)report;
+
+   if (reason & GEN8_OAREPORT_REASON_CTX_SWITCH)
+   source =
+   I915_PERF_OA_EVENT_SOURCE_CONTEXT_SWITCH;
+   else if (reason & GEN8_OAREPORT_REASON_TIMER)
+   source = I915_PERF_OA_EVENT_SOURCE_PERIODIC;
+   else
+   source = I915_PERF_OA_EVENT_SOURCE_UNDEFINED;
+   } else
+   source = I915_PERF_OA_EVENT_SOURCE_PERIODIC;
+
+   if (copy_to_user(buf, , 4))
+   return -EFAULT;
+   buf += 4;
+   }
+
if (sample_flags & SAMPLE_OA_REPORT) {
if (copy_to_user(buf, report, report_size))
return -EFAULT;
@@ -1453,11 +1482,6 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
return -EINVAL;
}
 
-   if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
-   DRM_ERROR("Only OA report sampling supported\n");
-   return -EINVAL;
-   }
-
if (!dev_priv->perf.oa.ops.init_oa_buffer) {
DRM_ERROR("OA unit not supported\n");
return -ENODEV;
@@ -1486,8 +1510,20 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 
format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
 
-   stream->sample_flags |= SAMPLE_OA_REPORT;
-   stream->sample_size += format_size;
+   if (props->sample_flags & SAMPLE_OA_REPORT) {
+   stream->sample_flags |= SAMPLE_OA_REPORT;
+   stream->sample_size += format_size;
+   }
+
+   if (props->sample_flags & SAMPLE_OA_SOURCE_INFO) {
+   if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
+   DRM_ERROR(
+   "OA source type can't be sampled without OA report");
+   return -EINVAL;
+   }
+   stream->sample_flags |= SAMPLE_OA_SOURCE_INFO;
+   stream->sample_size += 4;
+   }
 
dev_priv->perf.oa.oa_buffer.format_size = format_size;
BUG_ON(dev_priv->perf.oa.oa_buffer.format_size == 0);
@@ -2160,6 +2196,9 @@ static int read_properties_unlocked(struct 
drm_i915_private *dev_priv,
props->oa_periodic = true;
props->oa_period_exponent = value;
break;
+   case DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE:
+   props->sample_flags |= SAMPLE_OA_SOURCE_INFO;
+   break;
case DRM_I915_PERF_PROP_MAX:
BUG();
}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e95f666..0583812 100644
--- 

[Intel-gfx] [PATCH 04/15] drm/i915: flush periodic samples, in case of no pending CS sample requests

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

When there are no pending CS OA samples, flush the periodic OA samples
collected so far.

We can safely forward the periodic OA samples in the case we
have no pending CS samples, but we can't do so in the case we have
pending CS samples, since we don't know what the ordering between
pending CS samples and periodic samples will eventually be. If we
have no pending CS sample, it won't be possible for future pending CS
sample to have timestamps earlier than current periodic timestamp.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h  |   7 +-
 drivers/gpu/drm/i915/i915_perf.c | 163 +--
 2 files changed, 129 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0561315..dedb7f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1856,8 +1856,9 @@ struct i915_oa_ops {
char __user *buf,
size_t count,
size_t *offset,
-   u32 ts);
-   bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
+   u32 ts, u32 max_records);
+   int (*oa_buffer_num_samples)(struct drm_i915_private *dev_priv,
+   u32 *last_ts);
 };
 
 /*
@@ -2221,6 +,8 @@ struct drm_i915_private {
u32 gen7_latched_oastatus1;
u32 ctx_oactxctrl_off;
u32 ctx_flexeu0_off;
+   u32 n_pending_periodic_samples;
+   u32 pending_periodic_ts;
 
struct i915_oa_ops ops;
const struct i915_oa_format *oa_formats;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 7bbc757..2ee4711 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -540,13 +540,30 @@ static void i915_oa_rcs_free_requests(struct 
drm_i915_private *dev_priv)
  * pointers.  A race here could result in a false positive !empty status which
  * is acceptable.
  */
-static bool gen8_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private 
*dev_priv)
+static int
+gen8_oa_buffer_num_samples_fop_unlocked(struct drm_i915_private *dev_priv,
+   u32 *last_ts)
 {
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-   u32 head = I915_READ(GEN8_OAHEADPTR);
-   u32 tail = I915_READ(GEN8_OATAILPTR);
+   u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.addr;
+   u32 head = I915_READ(GEN8_OAHEADPTR) & GEN8_OAHEADPTR_MASK;
+   u32 tail = I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
+   u32 mask = (OA_BUFFER_SIZE - 1);
+   u32 num_samples;
+   u8 *report;
+
+   head -= dev_priv->perf.oa.oa_buffer.gtt_offset;
+   tail -= dev_priv->perf.oa.oa_buffer.gtt_offset;
+   num_samples = OA_TAKEN(tail, head) / report_size;
 
-   return OA_TAKEN(tail, head) < report_size;
+   /* read the timestamp of the last sample */
+   if (num_samples) {
+   head += report_size*(num_samples - 1);
+   report = oa_buf_base + (head & mask);
+   *last_ts = *(u32 *)(report + 4);
+   }
+
+   return num_samples;
 }
 
 /* NB: This is either called via fops or the poll check hrtimer (atomic ctx)
@@ -560,16 +577,32 @@ static bool gen8_oa_buffer_is_empty_fop_unlocked(struct 
drm_i915_private *dev_pr
  * pointers.  A race here could result in a false positive !empty status which
  * is acceptable.
  */
-static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private 
*dev_priv)
+static int
+gen7_oa_buffer_num_samples_fop_unlocked(struct drm_i915_private *dev_priv,
+   u32 *last_ts)
 {
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
u32 oastatus2 = I915_READ(GEN7_OASTATUS2);
u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+   u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.addr;
+   u32 mask = (OA_BUFFER_SIZE - 1);
+   int available_size;
+   u32 num_samples = 0;
+   u8 *report;
 
-   return OA_TAKEN(tail, head) <
-   dev_priv->perf.oa.tail_margin + report_size;
+   head -= dev_priv->perf.oa.oa_buffer.gtt_offset;
+   tail -= dev_priv->perf.oa.oa_buffer.gtt_offset;
+   available_size = OA_TAKEN(tail, head) - dev_priv->perf.oa.tail_margin;
+   if (available_size >= report_size) {
+   num_samples = available_size / report_size;
+   head += report_size*(num_samples - 1);
+   report = oa_buf_base + (head & mask);
+   *last_ts = *(u32 *)(report + 4);
+   }
+
+   return num_samples;
 }
 
 /**
@@ -698,7 +731,7 @@ static int 

[Intel-gfx] [PATCH 01/15] drm/i915: Add ctx getparam ioctl parameter to retrieve ctx unique id

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

This patch adds a new ctx getparam ioctl parameter, which can be used to
retrieve ctx unique id by userspace.

This can be used by userspace to map the i915 perf samples with their
particular ctx's, since those would be having ctx unique id's.
Otherwise the userspace has no way of maintaining this association,
since it has the knowledge of only per-drm file specific ctx handles.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 3 +++
 include/uapi/drm/i915_drm.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index e6616ed..d0efa5e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1078,6 +1078,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device 
*dev, void *data,
case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE);
break;
+   case I915_CONTEXT_PARAM_HW_ID:
+   args->value = ctx->hw_id;
+   break;
default:
ret = -EINVAL;
break;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f63a392..e95f666 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1223,6 +1223,7 @@ struct drm_i915_gem_context_param {
 #define I915_CONTEXT_PARAM_NO_ZEROMAP  0x2
 #define I915_CONTEXT_PARAM_GTT_SIZE0x3
 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE0x4
+#define I915_CONTEXT_PARAM_HW_ID   0x5
__u64 value;
 };
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 06/15] drm/i915: Populate ctx ID for periodic OA reports

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

This adds support for populating the ctx id for the periodic OA reports
when requested through the corresponding property.

For Gen8, the OA reports itself have the ctx ID and it is the one programmed
into HW while submitting workloads. Thus it's retrieved from reports itself.
For Gen7, the OA reports don't have any such field, and we can populate this
field with the last seen ctx ID while sending CS reports.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h  |  3 +++
 drivers/gpu/drm/i915/i915_perf.c | 52 +---
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e9cf939..853cc7db 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1859,6 +1859,8 @@ struct i915_oa_ops {
u32 ts, u32 max_records);
int (*oa_buffer_num_samples)(struct drm_i915_private *dev_priv,
u32 *last_ts);
+   u32 (*oa_buffer_get_ctx_id)(struct i915_perf_stream *stream,
+   const u8 *report);
 };
 
 /*
@@ -2239,6 +2241,7 @@ struct drm_i915_private {
u32 status;
} command_stream_buf;
 
+   u32 last_ctx_id;
struct list_head node_list;
spinlock_t node_list_lock;
} perf;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e10e78f..84457f8 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -637,6 +637,46 @@ gen7_oa_buffer_num_samples_fop_unlocked(struct 
drm_i915_private *dev_priv,
return num_samples;
 }
 
+static u32 gen7_oa_buffer_get_ctx_id(struct i915_perf_stream *stream,
+   const u8 *report)
+{
+   struct drm_i915_private *dev_priv = stream->dev_priv;
+
+   if (!stream->cs_mode)
+   WARN_ONCE(1,
+   "CTX ID can't be retrieved if command stream mode not 
enabled");
+
+   /*
+* OA reports generated in Gen7 don't have the ctx ID information.
+* Therefore, just rely on the ctx ID information from the last CS
+* sample forwarded
+*/
+   return dev_priv->perf.last_ctx_id;
+}
+
+static u32 gen8_oa_buffer_get_ctx_id(struct i915_perf_stream *stream,
+   const u8 *report)
+{
+   struct drm_i915_private *dev_priv = stream->dev_priv;
+
+   /* The ctx ID present in the OA reports have intel_context::global_id
+* present, since this is programmed into the ELSP in execlist mode.
+* In non-execlist mode, fall back to retrieving the ctx ID from the
+* last saved ctx ID from command stream mode.
+*/
+   if (i915.enable_execlists) {
+   u32 ctx_id = *(u32 *)(report + 12);
+   ctx_id &= 0xf;
+   return ctx_id;
+   } else {
+   if (!stream->cs_mode)
+   WARN_ONCE(1,
+   "CTX ID can't be retrieved if command stream mode not 
enabled");
+
+   return dev_priv->perf.last_ctx_id;
+   }
+}
+
 /**
  * Appends a status record to a userspace read() buffer.
  */
@@ -733,9 +773,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream 
*stream,
data.source = source;
}
 
-#warning "FIXME: append_oa_buffer_sample: read ctx ID from report and map that 
to an intel_context::global_id"
if (sample_flags & SAMPLE_CTX_ID)
-   data.ctx_id = 0;
+   data.ctx_id = dev_priv->perf.oa.ops.oa_buffer_get_ctx_id(
+   stream, report);
 
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -1248,8 +1288,10 @@ static int append_oa_rcs_sample(struct i915_perf_stream 
*stream,
if (sample_flags & SAMPLE_OA_SOURCE_INFO)
data.source = I915_PERF_OA_EVENT_SOURCE_RCS;
 
-   if (sample_flags & SAMPLE_CTX_ID)
+   if (sample_flags & SAMPLE_CTX_ID) {
data.ctx_id = node->ctx_id;
+   dev_priv->perf.last_ctx_id = node->ctx_id;
+   }
 
if (sample_flags & SAMPLE_OA_REPORT)
data.report = report;
@@ -3092,6 +3134,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
dev_priv->perf.oa.ops.read = gen7_oa_read;
dev_priv->perf.oa.ops.oa_buffer_num_samples =
gen7_oa_buffer_num_samples_fop_unlocked;
+   dev_priv->perf.oa.ops.oa_buffer_get_ctx_id =
+   gen7_oa_buffer_get_ctx_id;
 
dev_priv->perf.oa.timestamp_frequency = 1250;
 
@@ -3106,6 +3150,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
dev_priv->perf.oa.ops.read = gen8_oa_read;
   

[Intel-gfx] [PATCH 09/15] drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

This patch extends the i915  perf framework to handle the perf sample
collection for any given gpu engine. Particularly, the support
for collecting timestamp sample type is added, which can be requested for
any engine.
With this, for RCS, timestamps and OA reports can be collected together,
and provided to userspace in separate sample fields. For other engines,
the capabilility to collect timestamps is added.

The thing to note is that, still only a single stream instance can be
opened at any particular time. Though that stream may now be opened for any
gpu engine, for collection of timestamp samples.

So, this patch doesn't add the support to open multiple concurrent streams,
as yet. Though it lays the groundwork for this support to be added
susequently. Part of this groundwork involves having separate command
stream buffers, per engine, for holding the samples generated.
Likewise for a few other data structures maintaining per-engine state.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h  |  35 ++-
 drivers/gpu/drm/i915/i915_perf.c | 635 +--
 drivers/gpu/drm/i915/i915_reg.h  |   2 +
 include/uapi/drm/i915_drm.h  |   7 +
 4 files changed, 445 insertions(+), 234 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0f171f8..a05335a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1814,7 +1814,8 @@ struct i915_perf_stream_ops {
 * Routine to emit the commands in the command streamer associated
 * with the corresponding gpu engine.
 */
-   void (*command_stream_hook)(struct drm_i915_gem_request *req, u32 tag);
+   void (*command_stream_hook)(struct i915_perf_stream *stream,
+   struct drm_i915_gem_request *req, u32 tag);
 };
 
 enum i915_perf_stream_state {
@@ -1839,6 +1840,9 @@ struct i915_perf_stream {
/* Whether command stream based data collection is enabled */
bool cs_mode;
 
+   /* Whether the OA unit is in use */
+   bool using_oa;
+
const struct i915_perf_stream_ops *ops;
 };
 
@@ -1870,7 +1874,16 @@ struct i915_oa_ops {
 struct i915_perf_cs_data_node {
struct list_head link;
struct drm_i915_gem_request *request;
-   u32 offset;
+
+   /* Offsets into the GEM obj holding the data */
+   u32 start_offset;
+   u32 oa_offset;
+   u32 ts_offset;
+
+   /* buffer size corresponding to this entry */
+   u32 size;
+
+   /* Other metadata */
u32 ctx_id;
u32 pid;
u32 tag;
@@ -2189,14 +2202,14 @@ struct drm_i915_private {
 
spinlock_t hook_lock;
 
-   struct {
-   struct i915_perf_stream *exclusive_stream;
 
-   u32 specific_ctx_id;
+   struct hrtimer poll_check_timer;
+   struct i915_perf_stream *exclusive_stream;
+   wait_queue_head_t poll_wq[I915_NUM_ENGINES];
+   atomic_t pollin[I915_NUM_ENGINES];
 
-   struct hrtimer poll_check_timer;
-   wait_queue_head_t poll_wq;
-   atomic_t pollin;
+   struct {
+   u32 specific_ctx_id;
 
bool periodic;
int period_exponent;
@@ -2241,13 +2254,13 @@ struct drm_i915_private {
u8 *addr;
 #define I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW (1<<0)
u32 status;
-   } command_stream_buf;
+   } command_stream_buf[I915_NUM_ENGINES];
 
u32 last_ctx_id;
u32 last_pid;
u32 last_tag;
-   struct list_head node_list;
-   spinlock_t node_list_lock;
+   struct list_head node_list[I915_NUM_ENGINES];
+   spinlock_t node_list_lock[I915_NUM_ENGINES];
} perf;
 
/* Abstract the submission mechanism (legacy ringbuffer or execlists) 
away */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index ca523b1..516fd54 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -250,12 +250,17 @@ static u32 i915_perf_stream_paranoid = true;
 /* For determining the behavior on overflow of command stream samples */
 #define CMD_STREAM_BUF_OVERFLOW_ALLOWED
 
-/* Data common to periodic and RCS based samples */
-struct oa_sample_data {
+#define OA_ADDR_ALIGN 64
+#define TS_ADDR_ALIGN 8
+#define I915_PERF_TS_SAMPLE_SIZE 8
+
+/* Data common to all samples (periodic OA / CS based OA / Timestamps) */
+struct sample_data {
u32 source;
u32 ctx_id;
u32 pid;
u32 tag;
+   u64 ts;
const u8 *report;
 };
 
@@ -313,6 +318,7 @@ static const enum intel_engine_id 
user_ring_map[I915_USER_RINGS + 1] = {
 #define SAMPLE_CTX_ID  

[Intel-gfx] [PATCH 03/15] drm/i915: Framework for capturing command stream based OA reports

2016-11-04 Thread sourab . gupta
From: Sourab Gupta 

This patch introduces a framework to enable OA counter reports associated
with Render command stream. We can then associate the reports captured
through this mechanism with their corresponding context id's. This can be
further extended to associate any other metadata information with the
corresponding samples (since the association with Render command stream
gives us the ability to capture these information while inserting the
corresponding capture commands into the command stream).

The OA reports generated in this way are associated with a corresponding
workload, and thus can be used the delimit the workload (i.e. sample the
counters at the workload boundaries), within an ongoing stream of periodic
counter snapshots.

There may be usecases wherein we need more than periodic OA capture mode
which is supported currently. This mode is primarily used for two usecases:
- Ability to capture system wide metrics, alongwith the ability to map
  the reports back to individual contexts (particularly for HSW).
- Ability to inject tags for work, into the reports. This provides
  visibility into the multiple stages of work within single context.

The userspace will be able to distinguish between the periodic and CS based
OA reports by the virtue of source_info sample field.

The command MI_REPORT_PERF_COUNT can be used to capture snapshots of OA
counters, and is inserted at BB boundaries.
The data thus captured will be stored in a separate buffer, which will
be different from the buffer used otherwise for periodic OA capture mode.
The metadata information pertaining to snapshot is maintained in a list,
which also has offsets into the gem buffer object per captured snapshot.
In order to track whether the gpu has completed processing the node,
a field pertaining to corresponding gem request is added, which is tracked
for completion of the command.

Both periodic and RCS based reports are associated with a single stream
(corresponding to render engine), and it is expected to have the samples
in the sequential order according to their timestamps. Now, since these
reports are collected in separate buffers, these are merge sorted at the
time of forwarding to userspace during the read call.

v2: Aligining with the non-perf interface (custom drm ioctl based). Also,
few related patches are squashed together for better readability

Signed-off-by: Sourab Gupta 
Signed-off-by: Robert Bragg 
---
 drivers/gpu/drm/i915/i915_drv.h|  44 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   4 +
 drivers/gpu/drm/i915/i915_perf.c   | 895 -
 include/uapi/drm/i915_drm.h|  15 +
 4 files changed, 805 insertions(+), 153 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a6ac1c3..0561315 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1809,6 +1809,18 @@ struct i915_perf_stream_ops {
 * The stream will always be disabled before this is called.
 */
void (*destroy)(struct i915_perf_stream *stream);
+
+   /*
+* Routine to emit the commands in the command streamer associated
+* with the corresponding gpu engine.
+*/
+   void (*command_stream_hook)(struct drm_i915_gem_request *req);
+};
+
+enum i915_perf_stream_state {
+   I915_PERF_STREAM_DISABLED,
+   I915_PERF_STREAM_ENABLE_IN_PROGRESS,
+   I915_PERF_STREAM_ENABLED,
 };
 
 struct i915_perf_stream {
@@ -1816,11 +1828,16 @@ struct i915_perf_stream {
 
struct list_head link;
 
+   enum intel_engine_id engine;
u32 sample_flags;
int sample_size;
 
struct i915_gem_context *ctx;
bool enabled;
+   enum i915_perf_stream_state state;
+
+   /* Whether command stream based data collection is enabled */
+   bool cs_mode;
 
const struct i915_perf_stream_ops *ops;
 };
@@ -1838,10 +1855,22 @@ struct i915_oa_ops {
int (*read)(struct i915_perf_stream *stream,
char __user *buf,
size_t count,
-   size_t *offset);
+   size_t *offset,
+   u32 ts);
bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
 };
 
+/*
+ * List element to hold info about the perf sample data associated
+ * with a particular GPU command stream.
+ */
+struct i915_perf_cs_data_node {
+   struct list_head link;
+   struct drm_i915_gem_request *request;
+   u32 offset;
+   u32 ctx_id;
+};
+
 struct drm_i915_private {
struct drm_device drm;
 
@@ -2149,6 +2178,8 @@ struct drm_i915_private {
struct ctl_table_header *sysctl_header;
 
struct mutex lock;
+
+   struct mutex streams_lock;
struct list_head streams;
 
spinlock_t hook_lock;
@@ -2195,6 +2226,16 @@ struct 

Re: [Intel-gfx] [PATCH 06/12] drm/i915/scheduler: Execute requests in order of priorities

2016-11-04 Thread Chris Wilson
On Thu, Nov 03, 2016 at 07:47:39PM +, Chris Wilson wrote:
> On Thu, Nov 03, 2016 at 04:21:25PM +, Tvrtko Ursulin wrote:
> > >+static void update_priorities(struct i915_priotree *pt, int prio)
> > >+{
> > >+  struct drm_i915_gem_request *request =
> > >+  container_of(pt, struct drm_i915_gem_request, priotree);
> > >+  struct intel_engine_cs *engine = request->engine;
> > >+  struct i915_dependency *dep;
> > >+
> > >+  if (prio <= READ_ONCE(pt->priority))
> > >+  return;
> > >+
> > >+  /* Recursively bump all dependent priorities to match the new request */
> > >+  list_for_each_entry(dep, >pre_list, pre_link)
> > >+  update_priorities(dep->signal, prio);
> > 
> > John got in trouble from recursion in his scheduler, used for the
> > same thing AFAIR. Or was it the priority bumping? Either way, it
> > could be imperative to avoid it.

Spent some time tuning (but not very well) for very deep pipelines:

static struct intel_engine_cs *
pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
{
struct intel_engine_cs *engine;

engine = container_of(pt,
  struct drm_i915_gem_request,
  priotree)->engine;
if (engine != locked) {
if (locked)
spin_unlock_irq(>timeline->lock);
spin_lock_irq(>timeline->lock);
}

return engine;
}

static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
{
struct intel_engine_cs *engine = NULL;
struct i915_dependency *dep, *p;
struct i915_dependency stack;
LIST_HEAD(dfs);

if (prio <= READ_ONCE(request->priotree.priority))
return;

/* Need BKL in order to use the temporary link inside i915_dependency */
lockdep_assert_held(>i915->drm.struct_mutex);

stack.signal = >priotree;
list_add(_link, );

/* Recursively bump all dependent priorities to match the new request */
list_for_each_entry_safe(dep, p, , dfs_link) {
struct i915_priotree *pt = dep->signal;

list_for_each_entry(p, >pre_list, pre_link)
if (prio > READ_ONCE(p->signal->priority))
list_move_tail(>dfs_link, );

p = list_first_entry(>dfs_link, typeof(*p), dfs_link);
if (!RB_EMPTY_NODE(>node))
continue;

engine = pt_lock_engine(pt, engine);

if (prio > pt->priority && RB_EMPTY_NODE(>node)) {
pt->priority = prio;
list_del_init(>dfs_link);
}
}

/* Fifo and depth-first replacement ensure our deps execute before us */
list_for_each_entry_safe_reverse(dep, p, , dfs_link) {
struct i915_priotree *pt = dep->signal;

INIT_LIST_HEAD(>dfs_link);

engine = pt_lock_engine(pt, engine);

if (prio <= pt->priority)
continue;

GEM_BUG_ON(RB_EMPTY_NODE(>node));

pt->priority = prio;
rb_erase(>node, >execlist_queue);
if (insert_request(pt, >execlist_queue))
engine->execlist_first = >node;
}

if (engine)
spin_unlock_irq(>timeline->lock);

/* XXX Do we need to preempt to make room for us and our deps? */
}

But as always any linear list scales poorly. It is just fortunate that
typically we don't see 10,000s of requests in the pipeline that need PI.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v8 05/12] drm/i915: don't whitelist oacontrol in cmd parser

2016-11-04 Thread sourab gupta
On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
> Being able to program OACONTROL from a non-privileged batch buffer is
> not sufficient to be able to configure the OA unit. This was originally
> allowed to help enable Mesa to expose OA counters via the
> INTEL_performance_query extension, but the current implementation based
> on programming OACONTROL via a batch buffer isn't able to report useable
> data without a more complete OA unit configuration. Mesa handles the
> possibility that writes to OACONTROL may not be allowed and so only
> advertises the extension after explicitly testing that a write to
> OACONTROL succeeds. Based on this; removing OACONTROL from the whitelist
> should be ok for userspace.
> 
> Removing this simplifies adding a new kernel api for configuring the OA
> unit without needing to consider the possibility that userspace might
> trample on OACONTROL state which we'd like to start managing within
> the kernel instead. In particular running any Mesa based GL application
> currently results in clearing OACONTROL when initializing which would
> disable the capturing of metrics.
> 
> Signed-off-by: Robert Bragg 
> Reviewed-by: Matthew Auld 
Seems reasonable.
Reviewed-by: Sourab Gupta 

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v8 09/12] drm/i915: Add dev.i915.perf_stream_paranoid sysctl option

2016-11-04 Thread sourab gupta
On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
> Consistent with the kernel.perf_event_paranoid sysctl option that can
> allow non-root users to access system wide cpu metrics, this can
> optionally allow non-root users to access system wide OA counter metrics
> from Gen graphics hardware.
> 
> Signed-off-by: Robert Bragg 
> Reviewed-by: Matthew Auld 
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  1 +
>  drivers/gpu/drm/i915/i915_perf.c | 50 
> +++-
>  2 files changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 01438fb..a138f86 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2171,6 +2171,7 @@ struct drm_i915_private {
>   bool initialized;
>  
>   struct kobject *metrics_kobj;
> + struct ctl_table_header *sysctl_header;
>  
>   struct mutex lock;
>   struct list_head streams;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index 8d07c41..4e42073 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -64,6 +64,11 @@
>  #define POLL_FREQUENCY 200
>  #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
>  
> +/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
> +static int zero;
> +static int one = 1;
> +static u32 i915_perf_stream_paranoid = true;
> +
>  /* The maximum exponent the hardware accepts is 63 (essentially it selects 
> one
>   * of the 64bit timestamp bits to trigger reports from) but there's currently
>   * no known use case for sampling as infrequently as once per 47 thousand 
> years.
> @@ -1207,7 +1212,13 @@ i915_perf_open_ioctl_locked(struct drm_i915_private 
> *dev_priv,
>   }
>   }
>  
> - if (!specific_ctx && !capable(CAP_SYS_ADMIN)) {
> + /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
> +  * we check a dev.i915.perf_stream_paranoid sysctl option
> +  * to determine if it's ok to access system wide OA counters
> +  * without CAP_SYS_ADMIN privileges.
> +  */
> + if (!specific_ctx &&
> + i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
>   DRM_ERROR("Insufficient privileges to open system-wide i915 
> perf stream\n");
>   ret = -EACCES;
>   goto err_ctx;
> @@ -1454,6 +1465,39 @@ void i915_perf_unregister(struct drm_i915_private 
> *dev_priv)
>   dev_priv->perf.metrics_kobj = NULL;
>  }
>  
> +static struct ctl_table oa_table[] = {
> + {
> +  .procname = "perf_stream_paranoid",
> +  .data = _perf_stream_paranoid,
> +  .maxlen = sizeof(i915_perf_stream_paranoid),
> +  .mode = 0644,
> +  .proc_handler = proc_dointvec_minmax,
> +  .extra1 = ,
> +  .extra2 = ,
> +  },
> + {}
> +};
> +
> +static struct ctl_table i915_root[] = {
> + {
> +  .procname = "i915",
> +  .maxlen = 0,
> +  .mode = 0555,
> +  .child = oa_table,
> +  },
> + {}
> +};
> +
> +static struct ctl_table dev_root[] = {
> + {
> +  .procname = "dev",
> +  .maxlen = 0,
> +  .mode = 0555,
> +  .child = i915_root,
> +  },
> + {}
> +};
> +
>  void i915_perf_init(struct drm_i915_private *dev_priv)
>  {
>   if (!IS_HASWELL(dev_priv))
> @@ -1484,6 +1528,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>   dev_priv->perf.oa.n_builtin_sets =
>   i915_oa_n_builtin_metric_sets_hsw;
>  
> + dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
> +
>   dev_priv->perf.initialized = true;
>  }
>  
> @@ -1492,6 +1538,8 @@ void i915_perf_fini(struct drm_i915_private *dev_priv)
>   if (!dev_priv->perf.initialized)
>   return;
>  
> + unregister_sysctl_table(dev_priv->perf.sysctl_header);
> +
>   memset(_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
>   dev_priv->perf.initialized = false;
>  }

Looks fine.
Reviewed-by: Sourab Gupta  


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v8 08/12] drm/i915: advertise available metrics via sysfs

2016-11-04 Thread sourab gupta
On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
> Each metric set is given a sysfs entry like:
> 
> /sys/class/drm/card0/metrics//id
> 
> This allows userspace to enumerate the specific sets that are available
> for the current system. The 'id' file contains an unsigned integer that
> can be used to open the associated metric set via
> DRM_IOCTL_I915_PERF_OPEN. The  is a globally unique ID for a
> specific OA unit register configuration that can be reliably used by
> userspace as a key to lookup corresponding counter meta data and
> normalization equations.
> 
> The guid registry is currently maintained as part of gputop along with
> the XML metric set descriptions and code generation scripts, ref:
> 
>  https://github.com/rib/gputop
>  > gputop-data/guids.xml
>  > scripts/update-guids.py
>  > gputop-data/oa-*.xml
>  > scripts/i915-perf-kernelgen.py
> 
>  $ make -C gputop-data -f Makefile.xml SYSFS=1 WHITELIST=RenderBasic
> 
> Signed-off-by: Robert Bragg 
> Reviewed-by: Matthew Auld 
Looks good to me.
Reviewed-by: Sourab Gupta 

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v8 02/12] drm/i915: Add i915 perf infrastructure

2016-11-04 Thread sourab gupta
On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote:
> Adds base i915 perf infrastructure for Gen performance metrics.
> 
> This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
> properties to configure a stream of metrics and returns a new fd usable
> with standard VFS system calls including read() to read typed and sized
> records; ioctl() to enable or disable capture and poll() to wait for
> data.
> 
> A stream is opened something like:
> 
>   uint64_t properties[] = {
>   /* Single context sampling */
>   DRM_I915_PERF_PROP_CTX_HANDLE,ctx_handle,
> 
>   /* Include OA reports in samples */
>   DRM_I915_PERF_PROP_SAMPLE_OA, true,
> 
>   /* OA unit configuration */
>   DRM_I915_PERF_PROP_OA_METRICS_SET,metrics_set_id,
>   DRM_I915_PERF_PROP_OA_FORMAT, report_format,
>   DRM_I915_PERF_PROP_OA_EXPONENT,   period_exponent,
>};
>struct drm_i915_perf_open_param parm = {
>   .flags = I915_PERF_FLAG_FD_CLOEXEC |
>I915_PERF_FLAG_FD_NONBLOCK |
>I915_PERF_FLAG_DISABLED,
>   .properties_ptr = (uint64_t)properties,
>   .num_properties = sizeof(properties) / 16,
>};
>int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, );
> 
> Records read all start with a common { type, size } header with
> DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
> contain an extensible number of fields and it's the
> DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
> determine what's included in every sample.
> 
> No specific streams are supported yet so any attempt to open a stream
> will return an error.
> 
> v2:
> use i915_gem_context_get() - Chris Wilson
> v3:
> update read() interface to avoid passing state struct - Chris Wilson
> fix some rebase fallout, with i915-perf init/deinit
> v4:
> s/DRM_IORW/DRM_IOW/ - Emil Velikov
> 
> Signed-off-by: Robert Bragg 
> ---
>  drivers/gpu/drm/i915/Makefile|   3 +
>  drivers/gpu/drm/i915/i915_drv.c  |   4 +
>  drivers/gpu/drm/i915/i915_drv.h  |  91 
>  drivers/gpu/drm/i915/i915_perf.c | 443 
> +++
>  include/uapi/drm/i915_drm.h  |  67 ++
>  5 files changed, 608 insertions(+)
>  create mode 100644 drivers/gpu/drm/i915/i915_perf.c
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 6123400..8d4e25f 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
>  # virtual gpu code
>  i915-y += i915_vgpu.o
>  
> +# perf code
> +i915-y += i915_perf.o
> +
>  ifeq ($(CONFIG_DRM_I915_GVT),y)
>  i915-y += intel_gvt.o
>  include $(src)/gvt/Makefile
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index af3559d..685c96e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -836,6 +836,8 @@ static int i915_driver_init_early(struct drm_i915_private 
> *dev_priv,
>  
>   intel_detect_preproduction_hw(dev_priv);
>  
> + i915_perf_init(dev_priv);
> +
>   return 0;
>  
>  err_workqueues:
> @@ -849,6 +851,7 @@ static int i915_driver_init_early(struct drm_i915_private 
> *dev_priv,
>   */
>  static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
>  {
> + i915_perf_fini(dev_priv);
>   i915_gem_load_cleanup(_priv->drm);
>   i915_workqueues_cleanup(dev_priv);
>  }
> @@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
>   DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, 
> DRM_RENDER_ALLOW),
>   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, 
> i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW),
>   DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, 
> i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, 
> DRM_RENDER_ALLOW),
>  };
>  
>  static struct drm_driver driver = {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5a260db..7a65c0b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1767,6 +1767,84 @@ struct intel_wm_config {
>   bool sprites_scaled;
>  };
>  
> +struct i915_perf_stream;
> +
> +struct i915_perf_stream_ops {
> + /* Enables the collection of HW samples, either in response to
> +  * I915_PERF_IOCTL_ENABLE or implicitly called when stream is
> +  * opened without I915_PERF_FLAG_DISABLED.
> +  */
> + void (*enable)(struct i915_perf_stream *stream);
> +
> + /* Disables the collection of HW samples, either in response to
> +  * I915_PERF_IOCTL_DISABLE or implicitly called before
> +  * destroying the stream.
> +  */
> + void (*disable)(struct i915_perf_stream *stream);
> +
> + /* Return: true if any i915 perf records are ready to read()
> +  * for this 

Re: [Intel-gfx] [PATCH] drm/i915: Fix pages pin counting around swizzle quirk

2016-11-04 Thread Joonas Lahtinen
On ke, 2016-11-02 at 09:43 +, Chris Wilson wrote:
> @@ -2458,17 +2459,16 @@ int __i915_gem_object_get_pages(struct 
> drm_i915_gem_object *obj)
>   if (err)
>   return err;
>  
> - if (likely(obj->mm.pages)) {
> - __i915_gem_object_pin_pages(obj);
> - goto unlock;
> - }
> -
> - GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
> + if (unlikely(!obj->mm.pages)) {
> + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
> + err = i915_gem_object_get_pages(obj);
> + if (err)
> + goto unlock;
>  
> - err = i915_gem_object_get_pages(obj);
> - if (!err)
> - atomic_set_release(>mm.pages_pin_count, 1);
> + smp_mb__before_atomic();

This is not cool without atomic in sight. Inline wrap as
__i915_gem_object_pages_mb() or something.

> @@ -3707,6 +3707,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
>  {
>   int ret = 0;
>  
> + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));

Rather confusing, simple mind would think as
__i915_gem_object_pin_pages has GEM_BUG_ON(!obj->mm.pages),
the next branch would never be taken?

>   if (vma->pages)
>   return 0;
>  

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 1/3] lib: add igt_dummyload

2016-11-04 Thread Abdiel Janulgue


On 11/03/2016 04:38 PM, Ville Syrjälä wrote:
> On Thu, Nov 03, 2016 at 11:40:36AM +0200, Abdiel Janulgue wrote:
>> A lot of igt testcases need some GPU workload to make sure a race
>> window is big enough. Unfortunately having a fixed amount of
>> workload leads to spurious test failures or overtly long runtimes
>> on some fast/slow platforms. This library contains functionality
>> to submit GPU workloads that should consume exactly a specific
>> amount of time.
>>
>> v2 : Add recursive batch feature from Chris
>> v3 : Drop auto-tuned stuff. Add bo dependecy to recursive batch
>>  by adding a dummy reloc to the bo as suggested by Ville.
>>
>> Cc: Daniel Vetter 
>> Cc: Ville Syrjälä 
>> Cc: Chris Wilson 
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  lib/Makefile.sources |   2 +
>>  lib/igt.h|   1 +
>>  lib/igt_dummyload.c  | 274 
>> +++
>>  lib/igt_dummyload.h  |  42 
>>  4 files changed, 319 insertions(+)
>>  create mode 100644 lib/igt_dummyload.c
>>  create mode 100644 lib/igt_dummyload.h
>>
>> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
>> index e8e277b..7fc5ec2 100644
>> --- a/lib/Makefile.sources
>> +++ b/lib/Makefile.sources
>> @@ -75,6 +75,8 @@ lib_source_list =  \
>>  igt_draw.h  \
>>  igt_pm.c\
>>  igt_pm.h\
>> +igt_dummyload.c \
>> +igt_dummyload.h \
>>  uwildmat/uwildmat.h \
>>  uwildmat/uwildmat.c \
>>  $(NULL)
>> diff --git a/lib/igt.h b/lib/igt.h
>> index d751f24..a0028d5 100644
>> --- a/lib/igt.h
>> +++ b/lib/igt.h
>> @@ -32,6 +32,7 @@
>>  #include "igt_core.h"
>>  #include "igt_debugfs.h"
>>  #include "igt_draw.h"
>> +#include "igt_dummyload.h"
>>  #include "igt_fb.h"
>>  #include "igt_gt.h"
>>  #include "igt_kms.h"
>> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
>> new file mode 100644
>> index 000..d37a30b
>> --- /dev/null
>> +++ b/lib/igt_dummyload.c
>> @@ -0,0 +1,274 @@
>> +/*
>> + * Copyright © 2016 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
>> OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
>> DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include "igt.h"
>> +#include "igt_dummyload.h"
>> +#include 
>> +#include 
>> +#include 
>> +
>> +/**
>> + * SECTION:igt_dummyload
>> + * @short_description: Library for submitting GPU workloads
>> + * @title: Dummyload
>> + * @include: igt.h
>> + *
>> + * A lot of igt testcases need some GPU workload to make sure a race window 
>> is
>> + * big enough. Unfortunately having a fixed amount of workload leads to
>> + * spurious test failures or overtly long runtimes on some fast/slow 
>> platforms.
>> + * This library contains functionality to submit GPU workloads that should
>> + * consume exactly a specific amount of time.
>> + */
>> +
>> +#define NSEC_PER_SEC 10L
>> +
>> +#define gettid() syscall(__NR_gettid)
>> +#define sigev_notify_thread_id _sigev_un._tid
>> +
>> +#define LOCAL_I915_EXEC_BSD_SHIFT  (13)
>> +#define LOCAL_I915_EXEC_BSD_MASK   (3 << LOCAL_I915_EXEC_BSD_SHIFT)
>> +
>> +#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
>> +
>> +static void
>> +fill_object(struct drm_i915_gem_exec_object2 *obj, uint32_t gem_handle,
>> +struct drm_i915_gem_relocation_entry *relocs, uint32_t count)
>> +{
>> +memset(obj, 0, sizeof(*obj));
>> +obj->handle = gem_handle;
>> +obj->relocation_count = count;
>> +obj->relocs_ptr = (uintptr_t)relocs;
>> +}
>> +
>> +static void
>> +fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
>> +   uint32_t gem_handle, uint32_t offset,
>> +   uint32_t read_domains, uint32_t write_domains)
>> +{
>> +reloc->target_handle = gem_handle;
>> +

  1   2   >