[Intel-gfx] ✓ Fi.CI.BAT: success for drm: move allocation out of drm_get_format_name()
== Series Details == Series: drm: move allocation out of drm_get_format_name() URL : https://patchwork.freedesktop.org/series/14873/ State : success == Summary == Series 14873v1 drm: move allocation out of drm_get_format_name() https://patchwork.freedesktop.org/api/1.0/series/14873/revisions/1/mbox/ Test gem_sync: Subgroup basic-store-all: fail -> PASS (fi-hsw-4770r) fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 fi-bxt-t5700 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-hsw-4770r total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-ilk-650 total:241 pass:188 dwarn:0 dfail:0 fail:0 skip:53 fi-ivb-3520m total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-ivb-3770 total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 fi-skl-6770hqtotal:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-snb-2520m total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-snb-2600 total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 49a651a2e66ef603995f88a470d0986c2ef8b5b8 drm-intel-nightly: 2016y-11m-04d-18h-04m-36s UTC integration manifest 7bc4368 drm: move allocation out of drm_get_format_name() == Logs == For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2912/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm: move allocation out of drm_get_format_name()
Fixes: 90844f00049e9f42573fd31d7c32e8fd31d3fd07 drm: make drm_get_format_name thread-safe Signed-off-by: Eric Engestrom[danvet: Clarify that the returned pointer must be freed with kfree().] Signed-off-by: Daniel Vetter Suggested-by: Ville Syrjälä Signed-off-by: Eric Engestrom --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 7 ++--- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 7 ++--- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 3 +- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 7 ++--- drivers/gpu/drm/drm_atomic.c| 7 +++-- drivers/gpu/drm/drm_crtc.c | 7 +++-- drivers/gpu/drm/drm_fourcc.c| 12 +++- drivers/gpu/drm/drm_framebuffer.c | 7 +++-- drivers/gpu/drm/drm_modeset_helper.c| 7 +++-- drivers/gpu/drm/drm_plane.c | 7 +++-- drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c | 7 ++--- drivers/gpu/drm/i915/i915_debugfs.c | 8 ++--- drivers/gpu/drm/i915/intel_atomic_plane.c | 8 ++--- drivers/gpu/drm/i915/intel_display.c| 41 ++--- drivers/gpu/drm/radeon/atombios_crtc.c | 14 - drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 3 +- include/drm/drm_fourcc.h| 3 +- 17 files changed, 71 insertions(+), 84 deletions(-) diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h index dc0aafa..5a8cb4b 100644 --- a/include/drm/drm_fourcc.h +++ b/include/drm/drm_fourcc.h @@ -54,6 +54,7 @@ int drm_format_horz_chroma_subsampling(uint32_t format); int drm_format_vert_chroma_subsampling(uint32_t format); int drm_format_plane_width(int width, uint32_t format, int plane); int drm_format_plane_height(int height, uint32_t format, int plane); -char *drm_get_format_name(uint32_t format) __malloc; +typedef char drm_format_name_buf[32]; +char *drm_get_format_name(uint32_t format, drm_format_name_buf buf); #endif /* __DRM_FOURCC_H__ */ diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index cbb8b77..34ed520 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -79,17 +79,13 @@ uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t depth) EXPORT_SYMBOL(drm_mode_legacy_fb_format); /** - * drm_get_format_name - return a string for drm fourcc format + * drm_get_format_name - fill a string with a drm fourcc format's name * @format: format to compute name of + * @buf: caller-supplied buffer - * - * Note that the buffer returned by this function is owned by the caller - * and will need to be freed using kfree(). */ -char *drm_get_format_name(uint32_t format) +char *drm_get_format_name(uint32_t format, drm_format_name_buf buf) { - char *buf = kmalloc(32, GFP_KERNEL); - - snprintf(buf, 32, + snprintf(buf, sizeof(drm_format_name_buf), "%c%c%c%c %s-endian (0x%08x)", printable_char(format & 0xff), printable_char((format >> 8) & 0xff), diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 199d3f7..cefa3d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2032,7 +2032,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, u32 tmp, viewport_w, viewport_h; int r; bool bypass_lut = false; - char *format_name; + drm_format_name_buf format_name; /* no fb bound */ if (!atomic && !crtc->primary->fb) { @@ -2144,9 +2144,8 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, bypass_lut = true; break; default: - format_name = drm_get_format_name(target_fb->pixel_format); - DRM_ERROR("Unsupported screen format %s\n", format_name); - kfree(format_name); + DRM_ERROR("Unsupported screen format %s\n", + drm_get_format_name(target_fb->pixel_format, format_name)); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index ecd000e..462abb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2013,7 +2013,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, u32 tmp, viewport_w, viewport_h; int r; bool bypass_lut = false; - char *format_name; + drm_format_name_buf format_name; /* no fb bound */ if (!atomic && !crtc->primary->fb) { @@ -2125,9 +2125,8 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, bypass_lut = true; break; default: - format_name = drm_get_format_name(target_fb->pixel_format); -
Re: [Intel-gfx] [PATCH 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode
On Sat, 2016-11-05 at 00:32 +0200, Imre Deak wrote: > On Fri, 2016-11-04 at 21:01 +, Chris Wilson wrote: > > On Fri, Nov 04, 2016 at 10:33:24PM +0200, Imre Deak wrote: > > > On Thu, 2016-11-03 at 21:14 +, Chris Wilson wrote: > > > > Where is that guaranteed? I thought we only serialised with the > > > > pm > > > > interrupts. Remember this happens before rpm suspend, since > > > > gem_idle_work_handler is responsible for dropping the GPU > > > > wakelock. > > > > > > I meant that the 100msec after the last request signals > > > completion > > > and > > > this handler is scheduled is normally enough for the context > > > complete > > > interrupt to get delivered. But yea, it's not a guarantee. > > > > If only it was that deterministic! The idle_worker was scheduled > > 100ms > > after some retire_worker, just not necessarily the most recent. So > > it > > could be running exactly as active_requests -> 0 and so before the > > context-interrupt. > > Right, but we don't poll in that case, so there is no overhead. Ok, there is a small window in the idle_worker after the unlocked poll and before taking the lock where a new request could be submitted and retired. In that case active_requests could be 0 after taking the lock and we'd have the poll overhead there. We could detect this by the fact that there is a new idle_worker pending and bail out in that case. We shouldn't idle the GPU in that case anyway. > > Anyway, it was a good find! > > -Chris > > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode
On Fri, 2016-11-04 at 21:01 +, Chris Wilson wrote: > On Fri, Nov 04, 2016 at 10:33:24PM +0200, Imre Deak wrote: > > On Thu, 2016-11-03 at 21:14 +, Chris Wilson wrote: > > > Where is that guaranteed? I thought we only serialised with the > > > pm > > > interrupts. Remember this happens before rpm suspend, since > > > gem_idle_work_handler is responsible for dropping the GPU > > > wakelock. > > > > I meant that the 100msec after the last request signals completion > > and > > this handler is scheduled is normally enough for the context > > complete > > interrupt to get delivered. But yea, it's not a guarantee. > > If only it was that deterministic! The idle_worker was scheduled > 100ms > after some retire_worker, just not necessarily the most recent. So it > could be running exactly as active_requests -> 0 and so before the > context-interrupt. Right, but we don't poll in that case, so there is no overhead. > Anyway, it was a good find! > -Chris > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] RFC gemfs
The hope of this RFC is to gather some high-level feedback and ideas, since I couldn't really find any in-depth discussions on the mailing list regarding gemfs, only the odd whisper. But after talking with Joonas and grepping around, the parts of shmem fs we would initially need to have for a drop in replacement would roughly be something like: struct file * drm_gemfs_setup_file(const char *name, loff_t size, unsigned long flags) struct page * drm_gemfs_read_page(struct drm_gem_object *obj, pgoff_t index) struct page * drm_gemfs_read_page_gfp(struct drm_gem_object *obj, pgoff_t index, gfp_t gfp) void drm_gemfs_truncate(struct drm_gem_object *obj) Am I missing any? This is pretty much what we already have, minus truncate_range, since we don't care about partial truncation. Also we now operate at the gem object level and not the mapping, does this seem appropriate? The approach would then be to have our own in-kernel mount point for gemfs, with probably a fair amount of copy-paste from shmem fs, but in what should be a very stripped down form to suit our needs. Once we have gemfs in place, we should then have the much needed flexibility to change it as we see fit, for example, being able to control how migration is handled or where the backing pages are allocated from, which could be useful for handling stolen memory etc. Does this all sound reasonable, am I missing anything? Thanks, Matt ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/dp: Make space for null terminator in the DP device ID char array
== Series Details == Series: drm/dp: Make space for null terminator in the DP device ID char array URL : https://patchwork.freedesktop.org/series/14865/ State : failure == Summary == Series 14865v1 drm/dp: Make space for null terminator in the DP device ID char array https://patchwork.freedesktop.org/api/1.0/series/14865/revisions/1/mbox/ Test gem_ringfill: Subgroup basic-default-hang: pass -> INCOMPLETE (fi-hsw-4770) Test gem_sync: Subgroup basic-store-all: fail -> PASS (fi-hsw-4770r) fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 fi-bxt-t5700 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-hsw-4770 total:112 pass:102 dwarn:0 dfail:0 fail:0 skip:9 fi-hsw-4770r total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-ilk-650 total:241 pass:188 dwarn:0 dfail:0 fail:0 skip:53 fi-ivb-3520m total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-ivb-3770 total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 fi-skl-6770hqtotal:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-snb-2520m total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-snb-2600 total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 49a651a2e66ef603995f88a470d0986c2ef8b5b8 drm-intel-nightly: 2016y-11m-04d-18h-04m-36s UTC integration manifest 08543dd drm/dp: Make space for null terminator in the DP device ID char array == Logs == For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2911/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/4] drm/i915: Bail if plane/crtc init fails
On Fri, Nov 04, 2016 at 11:07:26PM +0200, Ville Syrjälä wrote: > On Fri, Nov 04, 2016 at 08:48:21PM +, Chris Wilson wrote: > > On Tue, Oct 25, 2016 at 06:58:02PM +0300, ville.syrj...@linux.intel.com > > wrote: > > > From: Ville Syrjälä> > > > > > Due to the plane->index not getting readjusted in drm_plane_cleanup(), > > > we can't continue initialization of some plane/crtc init fails. > > > Well, we sort of could I suppose if we left all initialized planes on > > > the list, but that would expose those planes to userspace as well. > > > > > > But for crtcs the situation is even worse since we assume that > > > pipe==crtc index occasionally, so we can't really deal with a partially > > > initialize set of crtcs. > > > > > > So seems safest to just abort the entire thing if anything goes wrong. > > > All the failure paths here are kmalloc()s anyway, so it seems unlikely > > > we'd get very far if these start failing. > > > > smatch spotted ERR_PTR(0) > > > > > @@ -15296,22 +15304,30 @@ static void intel_crtc_init(struct drm_device > > > *dev, int pipe) > > > } > > > > > > primary = intel_primary_plane_create(dev, pipe); > > > - if (!primary) > > > + if (IS_ERR(primary)) { > > > + ret = PTR_ERR(primary); > > > > Here... > > This looks correct to me, but the cursor and sprite paths are clearly > crap. Brain had already turned off. Yes, it was the plane and cursor, I just goofed in trimming. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [v2,1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode (rev2)
== Series Details == Series: series starting with [v2,1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode (rev2) URL : https://patchwork.freedesktop.org/series/14864/ State : warning == Summary == Series 14864v2 Series without cover letter https://patchwork.freedesktop.org/api/1.0/series/14864/revisions/2/mbox/ Test gem_sync: Subgroup basic-store-all: fail -> PASS (fi-hsw-4770r) Test kms_pipe_crc_basic: Subgroup read-crc-pipe-c: pass -> DMESG-WARN (fi-ivb-3770) fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-hsw-4770r total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-ilk-650 total:241 pass:188 dwarn:0 dfail:0 fail:0 skip:53 fi-ivb-3520m total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-ivb-3770 total:241 pass:218 dwarn:1 dfail:0 fail:0 skip:22 fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 fi-skl-6770hqtotal:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-snb-2520m total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-snb-2600 total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 49a651a2e66ef603995f88a470d0986c2ef8b5b8 drm-intel-nightly: 2016y-11m-04d-18h-04m-36s UTC integration manifest 80739c2 drm/i915: Add assert for no pending GPU requests during suspend/resume in LR mode ff4dfe2 drm/i915: Make sure engines are idle during GPU idling in LR mode == Logs == For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2910/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/4] drm/i915: Bail if plane/crtc init fails
On Fri, Nov 04, 2016 at 08:48:21PM +, Chris Wilson wrote: > On Tue, Oct 25, 2016 at 06:58:02PM +0300, ville.syrj...@linux.intel.com wrote: > > From: Ville Syrjälä> > > > Due to the plane->index not getting readjusted in drm_plane_cleanup(), > > we can't continue initialization of some plane/crtc init fails. > > Well, we sort of could I suppose if we left all initialized planes on > > the list, but that would expose those planes to userspace as well. > > > > But for crtcs the situation is even worse since we assume that > > pipe==crtc index occasionally, so we can't really deal with a partially > > initialize set of crtcs. > > > > So seems safest to just abort the entire thing if anything goes wrong. > > All the failure paths here are kmalloc()s anyway, so it seems unlikely > > we'd get very far if these start failing. > > smatch spotted ERR_PTR(0) > > > @@ -15296,22 +15304,30 @@ static void intel_crtc_init(struct drm_device > > *dev, int pipe) > > } > > > > primary = intel_primary_plane_create(dev, pipe); > > - if (!primary) > > + if (IS_ERR(primary)) { > > + ret = PTR_ERR(primary); > > Here... This looks correct to me, but the cursor and sprite paths are clearly crap. > > > goto fail; > > + } > > > > for_each_sprite(dev_priv, pipe, sprite) { > > - ret = intel_plane_init(dev, pipe, sprite); > > - if (ret) > > - DRM_DEBUG_KMS("pipe %c sprite %c init failed: %d\n", > > - pipe_name(pipe), sprite_name(pipe, > > sprite), ret); > > + struct intel_plane *plane; > > + > > + plane = intel_sprite_plane_create(dev, pipe, sprite); > > + if (!plane) { > > + ret = PTR_ERR(plane); > > and here. > > -- > Chris Wilson, Intel Open Source Technology Centre -- Ville Syrjälä Intel OTC ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/dp: Make space for null terminator in the DP device ID char array
The DP device identification string read from the DPCD registers is 6 characters long at max. and we store it in a char array of the same length without space for the NULL terminator. Fix this by increasing the array size to 7 and initialize it to an empty string. Signed-off-by: Dhinakaran Pandiyan--- drivers/gpu/drm/drm_dp_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 3e6fe82..3a39312 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -544,7 +544,7 @@ void drm_dp_downstream_debug(struct seq_file *m, DP_DETAILED_CAP_INFO_AVAILABLE; int clk; int bpc; - char id[6]; + char id[7] = ""; int len; uint8_t rev[2]; int type = port_cap[0] & DP_DS_PORT_TYPE_MASK; -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 2/2] drm/i915: Add assert for no pending GPU requests during suspend/resume in LR mode
On Fri, Nov 04, 2016 at 10:58:52PM +0200, Imre Deak wrote: > During resume we will reset the SW/HW tracking for each ring head/tail > pointers and so are not prepared to replay any pending requests (as > opposed to GPU reset time). Add an assert for this both to the suspend > and the resume code. > > v2: > - Check for ELSP port idle already during suspend and check !gt.awake > during resume. (Chris) > v3: > - Move the !gt.awake check to i915_gem_resume(). > > Cc: Chris Wilson> Cc: Mika Kuoppala > Signed-off-by: Imre Deak > --- > drivers/gpu/drm/i915/i915_gem.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 81ea88c..c344abc 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -4437,6 +4437,7 @@ int i915_gem_suspend(struct drm_device *dev) >* reset the GPU back to its idle, low power state. >*/ > WARN_ON(dev_priv->gt.awake); > + WARN_ON(i915.enable_execlists && !intel_lr_engines_idle(dev_priv)); Just WARN_ON(!intel_execlists_idle(dev_priv)); Being forward thinking intel_execlists_submission_idle(). Reviewed-by: Chris Wilson -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode
On Fri, Nov 04, 2016 at 10:33:24PM +0200, Imre Deak wrote: > On Thu, 2016-11-03 at 21:14 +, Chris Wilson wrote: > > Where is that guaranteed? I thought we only serialised with the pm > > interrupts. Remember this happens before rpm suspend, since > > gem_idle_work_handler is responsible for dropping the GPU wakelock. > > I meant that the 100msec after the last request signals completion and > this handler is scheduled is normally enough for the context complete > interrupt to get delivered. But yea, it's not a guarantee. If only it was that deterministic! The idle_worker was scheduled 100ms after some retire_worker, just not necessarily the most recent. So it could be running exactly as active_requests -> 0 and so before the context-interrupt. Anyway, it was a good find! -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 2/2] drm/i915: Add assert for no pending GPU requests during suspend/resume in LR mode
During resume we will reset the SW/HW tracking for each ring head/tail pointers and so are not prepared to replay any pending requests (as opposed to GPU reset time). Add an assert for this both to the suspend and the resume code. v2: - Check for ELSP port idle already during suspend and check !gt.awake during resume. (Chris) v3: - Move the !gt.awake check to i915_gem_resume(). Cc: Chris WilsonCc: Mika Kuoppala Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 81ea88c..c344abc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4437,6 +4437,7 @@ int i915_gem_suspend(struct drm_device *dev) * reset the GPU back to its idle, low power state. */ WARN_ON(dev_priv->gt.awake); + WARN_ON(i915.enable_execlists && !intel_lr_engines_idle(dev_priv)); /* * Neither the BIOS, ourselves or any other kernel @@ -4473,6 +4474,8 @@ void i915_gem_resume(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + WARN_ON(dev_priv->gt.awake); + mutex_lock(>struct_mutex); i915_gem_restore_gtt_mappings(dev); -- 2.5.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/4] drm/i915: Bail if plane/crtc init fails
On Tue, Oct 25, 2016 at 06:58:02PM +0300, ville.syrj...@linux.intel.com wrote: > From: Ville Syrjälä> > Due to the plane->index not getting readjusted in drm_plane_cleanup(), > we can't continue initialization of some plane/crtc init fails. > Well, we sort of could I suppose if we left all initialized planes on > the list, but that would expose those planes to userspace as well. > > But for crtcs the situation is even worse since we assume that > pipe==crtc index occasionally, so we can't really deal with a partially > initialize set of crtcs. > > So seems safest to just abort the entire thing if anything goes wrong. > All the failure paths here are kmalloc()s anyway, so it seems unlikely > we'd get very far if these start failing. smatch spotted ERR_PTR(0) > @@ -15296,22 +15304,30 @@ static void intel_crtc_init(struct drm_device *dev, > int pipe) > } > > primary = intel_primary_plane_create(dev, pipe); > - if (!primary) > + if (IS_ERR(primary)) { > + ret = PTR_ERR(primary); Here... > goto fail; > + } > > for_each_sprite(dev_priv, pipe, sprite) { > - ret = intel_plane_init(dev, pipe, sprite); > - if (ret) > - DRM_DEBUG_KMS("pipe %c sprite %c init failed: %d\n", > - pipe_name(pipe), sprite_name(pipe, > sprite), ret); > + struct intel_plane *plane; > + > + plane = intel_sprite_plane_create(dev, pipe, sprite); > + if (!plane) { > + ret = PTR_ERR(plane); and here. -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 2/2] drm/i915: Add assert for no pending GPU requests during suspend/resume in LR mode
During resume we will reset the SW/HW tracking for each ring head/tail pointers and so are not prepared to replay any pending requests (as opposed to GPU reset time). Add an assert for this both to the suspend and the resume code. v2: - Check for ELSP port idle already during suspend and check !gt.awake during resume. (Chris) Cc: Chris WilsonCc: Mika Kuoppala Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/intel_lrc.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 81ea88c..acc2030 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4437,6 +4437,7 @@ int i915_gem_suspend(struct drm_device *dev) * reset the GPU back to its idle, low power state. */ WARN_ON(dev_priv->gt.awake); + WARN_ON(i915.enable_execlists && !intel_lr_engines_idle(dev_priv)); /* * Neither the BIOS, ourselves or any other kernel diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7aa5665..77577d1 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2165,6 +2165,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) if (WARN_ON(IS_ERR(reg))) continue; + WARN_ON(dev_priv->gt.awake); + reg += LRC_STATE_PN * PAGE_SIZE / sizeof(*reg); reg[CTX_RING_HEAD+1] = 0; reg[CTX_RING_TAIL+1] = 0; -- 2.5.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode
We assume that the GPU is idle once receiving the seqno via the last request's user interrupt. In execlist mode the corresponding context completed interrupt can be delayed though and until this latter interrupt arrives we consider the request to be pending on the ELSP submit port. This can cause a problem during system suspend where this last request will be seen by the resume code as still pending. Such pending requests are normally replayed after a GPU reset, but during resume we reset both SW and HW tracking of the ring head/tail pointers, so replaying the pending request with its stale tail pointer will leave the ring in an inconsistent state. A subsequent request submission can lead then to the GPU executing from uninitialized area in the ring behind the above stale tail pointer. Fix this by making sure any pending request on the ELSP port is completed before suspending. I used a polling wait since the completion time I measured was <1ms and since normally we only need to wait during system suspend. GPU idling during runtime suspend is scheduled with a delay (currently 50-100ms) after the retirement of the last request at which point the context completed interrupt must have arrived already. The chance of this bug was increased by commit 1c777c5d1dcdf8fa0223fcff35fb387b5bb9517a Author: Imre DeakDate: Wed Oct 12 17:46:37 2016 +0300 drm/i915/hsw: Fix GPU hang during resume from S3-devices state but it could happen even without the explicit GPU reset, since we disable interrupts afterwards during the suspend sequence. v2: - Do an unlocked poll-wait first. (Chris) Cc: Chris Wilson Cc: Mika Kuoppala Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98470 Signed-off-by: Imre Deak --- drivers/gpu/drm/i915/i915_gem.c | 8 drivers/gpu/drm/i915/intel_lrc.c | 19 +++ drivers/gpu/drm/i915/intel_lrc.h | 1 + 3 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0dbf38c..81ea88c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2752,6 +2752,10 @@ i915_gem_idle_work_handler(struct work_struct *work) if (!READ_ONCE(dev_priv->gt.awake)) return; + if (i915.enable_execlists) + wait_for(READ_ONCE(dev_priv->gt.active_requests) || +intel_lr_engines_idle(dev_priv), 10); + if (READ_ONCE(dev_priv->gt.active_requests)) return; @@ -2769,6 +2773,10 @@ i915_gem_idle_work_handler(struct work_struct *work) if (dev_priv->gt.active_requests) goto out_unlock; + if (i915.enable_execlists && + wait_for(intel_lr_engines_idle(dev_priv), 10)) + DRM_ERROR("Timeout waiting for engines to idle\n"); + for_each_engine(engine, dev_priv, id) i915_gem_batch_pool_fini(>batch_pool); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fa3012c..7aa5665 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -522,6 +522,25 @@ static bool execlists_elsp_idle(struct intel_engine_cs *engine) return !engine->execlist_port[0].request; } +/** + * intel_lr_engines_idle() - Determine if all engine submission ports are idle + * @dev_priv: i915 device private + * + * Return true if there are no requests pending on any of the submission ports + * of any engines. + */ +bool intel_lr_engines_idle(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) + if (!execlists_elsp_idle(engine)) + return false; + + return true; +} + static bool execlists_elsp_ready(struct intel_engine_cs *engine) { int port; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4fed816..c855ffb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -87,6 +87,7 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, struct drm_i915_private; +bool intel_lr_engines_idle(struct drm_i915_private *dev_priv); void intel_lr_context_resume(struct drm_i915_private *dev_priv); uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine); -- 2.5.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] drm/i915: Make sure engines are idle during GPU idling in LR mode
On Thu, 2016-11-03 at 21:14 +, Chris Wilson wrote: > On Thu, Nov 03, 2016 at 10:57:23PM +0200, Imre Deak wrote: > > On Thu, 2016-11-03 at 18:59 +, Chris Wilson wrote: > > > On Thu, Nov 03, 2016 at 06:19:37PM +0200, Imre Deak wrote: > > > > We assume that the GPU is idle once receiving the seqno via the last > > > > request's user interrupt. In execlist mode the corresponding context > > > > completed interrupt can be delayed though and until this latter > > > > interrupt arrives we consider the request to be pending on the ELSP > > > > submit port. This can cause a problem during system suspend where this > > > > last request will be seen by the resume code as still pending. Such > > > > pending requests are normally replayed after a GPU reset, but during > > > > resume we reset both SW and HW tracking of the ring head/tail pointers, > > > > so replaying the pending request with its stale tale pointer will leave > > > > the ring in an inconsistent state. A subsequent request submission can > > > > lead then to the GPU executing from uninitialized area in the ring > > > > behind the above stale tail pointer. > > > > > > > > Fix this by making sure any pending request on the ELSP port is > > > > completed before suspending. I used a polling wait since the completion > > > > time I measured was <1ms and since normally we only need to wait during > > > > system suspend. GPU idling during runtime suspend is scheduled with a > > > > delay (currently 50-100ms) after the retirement of the last request at > > > > which point the context completed interrupt must have arrived already. > > > > > > > > The chance of this bug was increased by > > > > > > > > commit 1c777c5d1dcdf8fa0223fcff35fb387b5bb9517a > > > > Author: Imre Deak> > > > Date: Wed Oct 12 17:46:37 2016 +0300 > > > > > > > > drm/i915/hsw: Fix GPU hang during resume from S3-devices state > > > > > > > > but it could happen even without the explicit GPU reset, since we > > > > disable interrupts afterwards during the suspend sequence. > > > > > > > > Cc: Chris Wilson > > > > Cc: Mika Kuoppala > > > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98470 > > > > Signed-off-by: Imre Deak > > > > --- > > > > drivers/gpu/drm/i915/i915_gem.c | 3 +++ > > > > drivers/gpu/drm/i915/intel_lrc.c | 12 > > > > drivers/gpu/drm/i915/intel_lrc.h | 1 + > > > > 3 files changed, 16 insertions(+) > > > > > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c > > > > b/drivers/gpu/drm/i915/i915_gem.c > > > > index 1f995ce..5ff02b5 100644 > > > > --- a/drivers/gpu/drm/i915/i915_gem.c > > > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > > > @@ -2766,6 +2766,9 @@ i915_gem_idle_work_handler(struct work_struct > > > > *work) > > > > if (dev_priv->gt.active_requests) > > > > goto out_unlock; > > > > > > > > + if (i915.enable_execlists) > > > > + intel_lr_wait_engines_idle(dev_priv); > > > > > > Idle work handler... So runtime suspend. > > > Anyway this is not an ideal place for a stall under struct_mutex (even if > > > 16x10us, it's the principle!). > > > > During runtime suspend this won't add any overhead since the context > > done interrupt happened already (unless there is a bug somewhere else). > > Where is that guaranteed? I thought we only serialised with the pm > interrupts. Remember this happens before rpm suspend, since > gem_idle_work_handler is responsible for dropping the GPU wakelock. I meant that the 100msec after the last request signals completion and this handler is scheduled is normally enough for the context complete interrupt to get delivered. But yea, it's not a guarantee. > > > Move this to before the first READ_ONCE(dev_priv->gt.active_requests); > > > so we stall before taking the lock, and skip if any new requests arrive > > > whilst waiting. > > > > > > (Also i915.enable_execlists is forbidden. But meh) > > > > > > static struct drm_i915_gem_request * > > > execlists_active_port(struct intel_engine_cs *engine) > > > { > > > struct drm_i915_gem_request *request; > > > > > > request = READ_ONCE(engine->execlist_port[1]); > > > if (request) > > > return request; > > > > > > return READ_ONCE(engine->execlist_port[0]); > > > } > > > > > > /* Wait for execlists to settle, but bail if any new requests come in */ > > > for_each_engine(engine, dev_priv, id) { > > > struct drm_i915_gem_request *request; > > > > > > request = execlists_active_port(engine); > > > if (!request) > > > continue; > > > > > > if (wait_for(execlists_active_port(engine) != request, 10)) > > > DRM_ERROR("Timeout waiting for %s to idle\n", engine->name); > > > } > > > > Hm, but we still need to re-check and bail out if not idle with > > struct_mutex held, since gt.active_requests could go 0->1->0 before > > taking struct_mutex? I can rewrite things
Re: [Intel-gfx] [PATCH] drm/i915: Perform object clflushing asynchronously
On Fri, Nov 04, 2016 at 08:03:57PM +, Chris Wilson wrote: > Flushing the cachelines for an object is slow, can be as much as 100ms > for a large framebuffer. We currently do this under the struct_mutex BKL > on execution or on pageflip. But now with the ability to add fences to > obj->resv for both flips and execbuf (and we naturally wait on the fence > before CPU access), we can move the clflush operation to a workqueue and > signal a fence for completion, thereby doing the work asynchronously and > not blocking the driver or its clients. > > Suggested-by: Akash Goel> Signed-off-by: Chris Wilson > Cc: Akash Goel Needs a bit more work to restrict the async operations. In the end, I think only the explicit paths towards execbuf / flip should opt in, as the majority will want sync (pread/pwrite/set-domain). This idea came up in a discussion on whether we needed create2 for early clflush or whether we could explot set-domain for the same functionality. Now, we can do the clflush asynchronously from create, but we must do it synchronously in set-domain (albeit now it could be done outside of the struct_mutex). -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Perform object clflushing asynchronously
Flushing the cachelines for an object is slow, can be as much as 100ms for a large framebuffer. We currently do this under the struct_mutex BKL on execution or on pageflip. But now with the ability to add fences to obj->resv for both flips and execbuf (and we naturally wait on the fence before CPU access), we can move the clflush operation to a workqueue and signal a fence for completion, thereby doing the work asynchronously and not blocking the driver or its clients. Suggested-by: Akash GoelSigned-off-by: Chris Wilson Cc: Akash Goel --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h| 8 +- drivers/gpu/drm/i915/i915_gem.c| 60 +++-- drivers/gpu/drm/i915/i915_gem_clflush.c| 138 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 +- drivers/gpu/drm/i915/intel_display.c | 57 ++-- 6 files changed, 190 insertions(+), 80 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 0857e5035f4d..6afd402e440b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o # GEM code i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ + i915_gem_clflush.o \ i915_gem_context.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2754d5de76af..c80044267333 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3410,7 +3410,13 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); -bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); + +void i915_gem_clflush_init(struct drm_i915_private *i915); +int i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags); +#define I915_CLFLUSH_FORCE BIT(0) +#define I915_CLFLUSH_SYNC BIT(1) + int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cffe60237b6a..524f72774537 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -230,7 +230,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj) obj->mm.dirty = false; if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) - i915_gem_clflush_object(obj, false); + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; @@ -1570,6 +1570,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, mutex_unlock(>struct_mutex); + if (err == 0) + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT, + NULL); if (write_domain != 0) intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); @@ -3236,44 +3241,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) return ret; } -bool -i915_gem_clflush_object(struct drm_i915_gem_object *obj, - bool force) -{ - /* If we don't have a page list set up, then we're not pinned -* to GPU, and we can ignore the cache flush because it'll happen -* again at bind time. -*/ - if (!obj->mm.pages) - return false; - - /* -* Stolen memory is always coherent with the GPU as it is explicitly -* marked as wc by the system, or the system is cache-coherent. -*/ - if (obj->stolen || obj->phys_handle) - return false; - - /* If the GPU is snooping the contents of the CPU cache, -* we do not need to manually clear the CPU cache lines. However, -* the caches are only snooped when the render cache is -* flushed/invalidated. As we always have to emit invalidations -* and flushes when moving into and out of the RENDER domain, correct -* snooping behaviour occurs naturally as the result of our domain -* tracking. -*/ - if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { - obj->cache_dirty = true; - return false; - } - - trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->mm.pages); -
Re: [Intel-gfx] [PATCH v4 2/8] drm/i915/skl: New ddb allocation algorithm
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu: > From: Mahesh Kumar> > This patch implements new DDB allocation algorithm as per HW team > recommendation. This algo takecare of scenario where we allocate less > DDB > for the planes with lower relative pixel rate, but they require more > DDB > to work. > It also takes care of enabling same watermark level for each > plane, for efficient power saving. > > Changes since v1: > - Rebase on top of Paulo's patch series > > Changes since v2: > - Fix the for loop condition to enable WM > > Changes since v3: > - Fix crash in cursor i-g-t reported by Maarten > - Rebase after addressing Paulo's comments > - Few other ULT fixes > This will require a huge rebase due to the things that were already merged and those who are about to be merged. Also, this is a general improvement while the other patches are bug fixes. Can you please move this to the end of the series? I'd really like to get the other things merged first, in case we decide to backport the fixes. > Signed-off-by: Mahesh Kumar > --- > drivers/gpu/drm/i915/intel_pm.c | 149 +- > -- > 1 file changed, 79 insertions(+), 70 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > b/drivers/gpu/drm/i915/intel_pm.c > index 098336d..84ec6b1 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3344,6 +3344,7 @@ skl_ddb_min_alloc(const struct drm_plane_state > *pstate, > > static int > skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, > + struct skl_pipe_wm *pipe_wm, > struct skl_ddb_allocation *ddb /* out */) > { > struct drm_atomic_state *state = cstate->base.state; > @@ -3359,8 +3360,12 @@ skl_allocate_pipe_ddb(struct intel_crtc_state > *cstate, > uint16_t *minimum = cstate->wm.skl.minimum_blocks; > uint16_t *y_minimum = cstate->wm.skl.minimum_y_blocks; > unsigned int total_data_rate; > + uint16_t total_min_blocks = 0; > + uint16_t total_level_ddb = 0; > int num_active; > - int id, i; > + int max_level, level; > + int id, i, ret = 0; > + > > if (WARN_ON(!state)) > return 0; > @@ -3409,19 +3414,42 @@ skl_allocate_pipe_ddb(struct intel_crtc_state > *cstate, > } > > for (i = 0; i < PLANE_CURSOR; i++) { > - alloc_size -= minimum[i]; > - alloc_size -= y_minimum[i]; > + total_min_blocks += minimum[i]; > + total_min_blocks += y_minimum[i]; > } > > - /* > - * 2. Distribute the remaining space in proportion to the > amount of > - * data each plane needs to fetch from memory. > - * > - * FIXME: we may not allocate every single block here. > - */ > + for (level = ilk_wm_max_level(dev); level >= 0; level--) { > + total_level_ddb = 0; > + for (i = 0; i < PLANE_CURSOR; i++) { > + /* > + * TODO: We should calculate watermark > values for Y/UV > + * plane both in case of NV12 format and use > both values > + * for ddb calculation, As NV12 is disabled > as of now. > + * using only single plane value here. > + */ > + uint16_t min = minimum[i] + y_minimum[i]; > + uint16_t plane_level_ddb_wm = > + max(pipe_wm- > >wm[level].plane_res_b[i], min); > + total_level_ddb += plane_level_ddb_wm; > + } > + > + if (total_level_ddb <= alloc_size) > + break; > + } > + > + if ((level < 0) || (total_min_blocks > alloc_size)) { > + DRM_DEBUG_KMS("Requested display configuration > exceeds system DDB limitations"); > + DRM_DEBUG_KMS("minimum required %d/%d\n", (level < > 0) ? > + total_level_ddb : total_min_blocks, > alloc_size); > + ret = -EINVAL; > + goto exit; > + } > + max_level = level; > + alloc_size -= total_level_ddb; > + > total_data_rate = skl_get_total_relative_data_rate(cstate); > if (total_data_rate == 0) > - return 0; > + goto exit; > > start = alloc->start; > for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { > @@ -3436,7 +3464,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state > *cstate, > * promote the expression to 64 bits to avoid > overflowing, the > * result is < available as data_rate / > total_data_rate < 1 > */ > - plane_blocks = minimum[id]; > + plane_blocks = max(pipe_wm- > >wm[max_level].plane_res_b[id], > + minimum[id]); > plane_blocks += div_u64((uint64_t)alloc_size * > data_rate, >
Re: [Intel-gfx] [PATCH v4 8/8] drm/i915/bxt: Enable IPC support
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu: > From: Mahesh Kumar> > This patch adds IPC support for platforms. This patch enables IPC > only for BXT/KBL platform as for SKL recommendation is to keep is > disabled. > IPC (Isochronous Priority Control) is the hardware feature, which > dynamically controles the memory read priority of Display. > > When IPC is enabled, plane read requests are sent at high priority > until > filling above the transition watermark, then the requests are sent at > lower priority until dropping below the level 0 watermark. > The lower priority requests allow other memory clients to have better > memory access. When IPC is disabled, all plane read requests are sent > at > high priority. > > Changes since V1: > - Remove commandline parameter to disable ipc > - Address Paulo's comments > In addition to what others said, we also need the linetime/2 WA if we want to enable IPC. Also, see below. > Signed-off-by: Mahesh Kumar > --- > drivers/gpu/drm/i915/i915_drv.c | 2 ++ > drivers/gpu/drm/i915/i915_reg.h | 1 + > drivers/gpu/drm/i915/intel_drv.h | 1 + > drivers/gpu/drm/i915/intel_pm.c | 15 +++ > 4 files changed, 19 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c > b/drivers/gpu/drm/i915/i915_drv.c > index b5f601c..58abbaa 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -1415,6 +1415,8 @@ int i915_driver_load(struct pci_dev *pdev, > const struct pci_device_id *ent) > > intel_runtime_pm_enable(dev_priv); > > + intel_enable_ipc(dev_priv); > + > /* Everything is in place, we can now relax! */ > DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", > driver.name, driver.major, driver.minor, > driver.patchlevel, > diff --git a/drivers/gpu/drm/i915/i915_reg.h > b/drivers/gpu/drm/i915/i915_reg.h > index a9c467c..c9ebf23 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -6144,6 +6144,7 @@ enum { > #define DISP_FBC_WM_DIS (1<<15) > #define DISP_ARB_CTL2_MMIO(0x45004) > #define DISP_DATA_PARTITION_5_6 (1<<6) > +#define DISP_IPC_ENABLE (1<<3) > #define DBUF_CTL _MMIO(0x45008) > #define DBUF_POWER_REQUEST (1<<31) > #define DBUF_POWER_STATE(1<<30) > diff --git a/drivers/gpu/drm/i915/intel_drv.h > b/drivers/gpu/drm/i915/intel_drv.h > index 2c1897b..45b0fa4 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -1766,6 +1766,7 @@ void skl_write_plane_wm(struct intel_crtc > *intel_crtc, > uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state > *pipe_config); > bool ilk_disable_lp_wm(struct drm_device *dev); > int sanitize_rc6_option(struct drm_i915_private *dev_priv, int > enable_rc6); > +void intel_enable_ipc(struct drm_i915_private *dev_priv); > static inline int intel_enable_rc6(void) > { > return i915.enable_rc6; > diff --git a/drivers/gpu/drm/i915/intel_pm.c > b/drivers/gpu/drm/i915/intel_pm.c > index 4263212..543aa5d 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -4833,6 +4833,21 @@ void intel_update_watermarks(struct drm_crtc > *crtc) > dev_priv->display.update_wm(crtc); > } > > +void intel_enable_ipc(struct drm_i915_private *dev_priv) > +{ > + u32 val; > + > + /* enable IPC only for Broxton for now*/ > + if (!IS_BROXTON(dev_priv) || !IS_KABYLAKE(dev_priv)) > + return; This will always return... > + > + val = I915_READ(DISP_ARB_CTL2); > + > + val |= DISP_IPC_ENABLE; > + > + I915_WRITE(DISP_ARB_CTL2, val); > +} > + > /* > * Lock protecting IPS related data structures > */ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 7/8] drm/i915/skl+: change WM calc to fixed point 16.16
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu: > From: Mahesh Kumar> > This patch changes Watermak calculation to fixed point calculation. > Problem with current calculation is during plane_blocks_per_line > calculation we divide intermediate blocks with min_scanlines and > takes floor of the result because of integer operation. > hence we end-up assigning less blocks than required. Which leads to > flickers. > There are still variables that got auto-converted to 16.16 and need to be adjusted because later they are mixed with non-16.16 in non-safe ways. The fact that's it's hard to identify these things really worries me. > Signed-off-by: Mahesh Kumar > --- > drivers/gpu/drm/i915/intel_pm.c | 16 +++- > 1 file changed, 11 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > b/drivers/gpu/drm/i915/intel_pm.c > index 0eaaadc..4263212 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3527,16 +3527,19 @@ static uint32_t skl_pipe_pixel_rate(const > struct intel_crtc_state *config) > * for the read latency) and cpp should always be <= 8, so that > * should allow pixel_rate up to ~2 GHz which seems sufficient since > max > * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. > + * Both Method1 & Method2 returns fixedpoint 16.16 output > */ > static uint32_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, > uint32_t latency) > { > - uint32_t wm_intermediate_val, ret; > + uint64_t wm_intermediate_val; > + uint32_t ret; > > if (latency == 0) > return UINT_MAX; > > - wm_intermediate_val = latency * pixel_rate * cpp / 512; > - ret = DIV_ROUND_UP(wm_intermediate_val, 1000); > + wm_intermediate_val = latency * pixel_rate * cpp; > + wm_intermediate_val <<= 16; > + ret = DIV_ROUND_UP_ULL(wm_intermediate_val, 1000 * 512); > > return ret; > } > @@ -3658,12 +3661,15 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > if (y_tiled) { > plane_blocks_per_line = > DIV_ROUND_UP(plane_bytes_per_line * > y_min_scanlines, 512); > - plane_blocks_per_line /= y_min_scanlines; > + plane_blocks_per_line = (plane_blocks_per_line << > 16) / > + y_mi > n_scanlines; > } else if (x_tiled) { > plane_blocks_per_line = > DIV_ROUND_UP(plane_bytes_per_line, 512); > + plane_blocks_per_line <<= 16; > } else { > plane_blocks_per_line = > DIV_ROUND_UP(plane_bytes_per_line, 512) > + 1; > + plane_blocks_per_line <<= 16; > } > > method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); > @@ -3690,7 +3696,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > selected_result = method1; > } > > - res_blocks = selected_result + 1; > + res_blocks = DIV_ROUND_UP(selected_result, 1 << 16) + 1; > res_lines = DIV_ROUND_UP(selected_result, > plane_blocks_per_line); > > if (level >= 1 && level <= 7) { ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 1/2] drm/i915/dp: Enable DP audio stall fix for gen9 platforms
On Fri, 2016-11-04 at 17:48 +0200, Jani Nikula wrote: > On Wed, 26 Oct 2016, Dhinakaran Pandiyan> wrote: > > Enabling DP audio stall fix is necessary to play audio over DP HBR2. So, > > let's set this bit right before enabling the audio codec. Playing audio > > without setting this bit results in pipe FIFO underruns. > > > > This workaround is applicable only for audio sample rates up to 96kHz. For > > frequencies above 96kHz, this is insufficient and cdclk should be increased > > to at least 432 MHz, just like BDW. Since, the audio driver does not > > support sample rates > 48 kHz, we are safe with this fix for now. > > Do we still need this patch now that these two have been pushed? > > b30ce9e0552a drm/i915/dp: BDW cdclk fix for DP audio > 9c7540241885 drm/i915/dp: Extend BDW DP audio workaround to GEN9 platforms > > BR, > Jani. > > > No, we are good afaik. This patch would have helped us to make use of a lower cdclk (337.5 MHz), with constraints on audio bit rate. Operating at 432 MHz, like we do now, rules out the need for this patch. -DK > > > > v2: Inlined the code change within hsw_audio_codec_enable() (Jani) > > Fixed the port clock typo > > Added TODO comment > > Signed-off-by: Dhinakaran Pandiyan > > --- > > drivers/gpu/drm/i915/i915_reg.h| 5 + > > drivers/gpu/drm/i915/intel_audio.c | 30 +- > > 2 files changed, 34 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h > > b/drivers/gpu/drm/i915/i915_reg.h > > index 00efaa1..76dac48 100644 > > --- a/drivers/gpu/drm/i915/i915_reg.h > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > @@ -6236,6 +6236,11 @@ enum { > > #define SLICE_ECO_CHICKEN0 _MMIO(0x7308) > > #define PIXEL_MASK_CAMMING_DISABLE (1 << 14) > > > > +#define _CHICKEN_TRANS_A 0x420C0 > > +#define _CHICKEN_TRANS_B 0x420C4 > > +#define CHICKEN_TRANS(tran) _MMIO_TRANS(tran, _CHICKEN_TRANS_A, > > _CHICKEN_TRANS_B) > > +#define SPARE_13 (1<<13) > > + > > /* WaCatErrorRejectionIssue */ > > #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030) > > #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) > > diff --git a/drivers/gpu/drm/i915/intel_audio.c > > b/drivers/gpu/drm/i915/intel_audio.c > > index 7093cfb..894f11e 100644 > > --- a/drivers/gpu/drm/i915/intel_audio.c > > +++ b/drivers/gpu/drm/i915/intel_audio.c > > @@ -283,6 +283,8 @@ static void hsw_audio_codec_disable(struct > > intel_encoder *encoder) > > { > > struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); > > struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); > > + struct intel_crtc_state *crtc_config = intel_crtc->config; > > + enum transcoder cpu_transcoder = crtc_config->cpu_transcoder; > > enum pipe pipe = intel_crtc->pipe; > > uint32_t tmp; > > > > @@ -290,13 +292,21 @@ static void hsw_audio_codec_disable(struct > > intel_encoder *encoder) > > > > mutex_lock(_priv->av_mutex); > > > > + /*Disable DP audio stall fix for HBR2*/ > > + if (IS_GEN9(dev_priv) && intel_crtc_has_dp_encoder(crtc_config) && > > + crtc_config->port_clock >= 54) { > > + tmp = I915_READ(CHICKEN_TRANS(cpu_transcoder)); > > + tmp &= ~SPARE_13; > > + I915_WRITE(CHICKEN_TRANS(cpu_transcoder), tmp); > > + } > > + > > /* Disable timestamps */ > > tmp = I915_READ(HSW_AUD_CFG(pipe)); > > tmp &= ~AUD_CONFIG_N_VALUE_INDEX; > > tmp |= AUD_CONFIG_N_PROG_ENABLE; > > tmp &= ~AUD_CONFIG_UPPER_N_MASK; > > tmp &= ~AUD_CONFIG_LOWER_N_MASK; > > - if (intel_crtc_has_dp_encoder(intel_crtc->config)) > > + if (intel_crtc_has_dp_encoder(crtc_config)) > > tmp |= AUD_CONFIG_N_VALUE_INDEX; > > I915_WRITE(HSW_AUD_CFG(pipe), tmp); > > > > @@ -315,6 +325,8 @@ static void hsw_audio_codec_enable(struct drm_connector > > *connector, > > { > > struct drm_i915_private *dev_priv = to_i915(connector->dev); > > struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc); > > + struct intel_crtc_state *crtc_config = intel_crtc->config; > > + enum transcoder cpu_transcoder = crtc_config->cpu_transcoder; > > enum pipe pipe = intel_crtc->pipe; > > enum port port = intel_encoder->port; > > const uint8_t *eld = connector->eld; > > @@ -326,6 +338,22 @@ static void hsw_audio_codec_enable(struct > > drm_connector *connector, > > > > mutex_lock(_priv->av_mutex); > > > > + /* Enable DP audio stall fix for HBR2 > > +* > > +* TODO: This workaround is applicable only for audio sample rates up > > +* to 96kHz. For frequencies above 96kHz, this is insufficient and > > +* cdclk should be increased to at least 432 MHz, just like BDW. Since, > > +* the audio driver does not support sample rates > 48 kHz, we are safe > > +* with this fix for now. > > +*/ > > + > > + if
[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Remove the vma from the object list upon close
== Series Details == Series: drm/i915: Remove the vma from the object list upon close URL : https://patchwork.freedesktop.org/series/14850/ State : failure == Summary == Series 14850v1 drm/i915: Remove the vma from the object list upon close https://patchwork.freedesktop.org/api/1.0/series/14850/revisions/1/mbox/ Test gem_busy: Subgroup basic-hang-default: pass -> FAIL (fi-hsw-4770r) fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 fi-bxt-t5700 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-hsw-4770r total:241 pass:220 dwarn:0 dfail:0 fail:1 skip:20 fi-ilk-650 total:241 pass:188 dwarn:0 dfail:0 fail:0 skip:53 fi-ivb-3520m total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-ivb-3770 total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 fi-skl-6770hqtotal:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-snb-2520m total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-snb-2600 total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 00d2fcf7c84de382bd2ceb5eaf908f76900d0791 drm-intel-nightly: 2016y-11m-04d-15h-43m-43s UTC integration manifest e8336dc drm/i915: Remove the vma from the object list upon close == Logs == For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2908/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 6/8] drm/i915/skl: Add variables to check x_tile and y_tile
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu: > From: Mahesh Kumar> > This patch adds variable to check for X_tiled & y_tiled planes, > instead > of always checking against framebuffer-modifiers. > > Changes: > - Created separate patch as per Paulo's comment > - Added x_tiled variable as well > > Signed-off-by: Mahesh Kumar > --- > drivers/gpu/drm/i915/intel_pm.c | 22 +- > 1 file changed, 13 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > b/drivers/gpu/drm/i915/intel_pm.c > index a668204..0eaaadc 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3602,6 +3602,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > uint32_t plane_pixel_rate; > uint32_t y_tile_minimum, y_min_scanlines; > enum watermark_memory_wa mem_wa; > + bool y_tiled = false, x_tiled = false; > > if (latency == 0 || !cstate->base.active || !intel_pstate- > >base.visible) > return 0; > @@ -3621,6 +3622,12 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > cpp = drm_format_plane_cpp(fb->pixel_format, 0); > plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, > intel_pstate); > > + if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || > + fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) > + y_tiled = true; > + else if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) > + x_tiled = true; > + Or you could go with the simpler: y_tiled = fb->modifier[0] == Y_TILED || fb->modifier[0] == Yf_TILED; x_tiled = fb->modifier[0] == X_TILED; And this would allow you to even remove the initialization to false above, and would allow the compiler to complain in case we try to use uninitialized values. But that's just an optional bikeshed. Anyway, I like the patch but it needs a rebase. It's better to just include this patch in the beginning of the series so we can merge it more easily, independently of the others. > if (intel_rotation_90_or_270(pstate->rotation)) { > int cpp = (fb->pixel_format == DRM_FORMAT_NV12) ? > drm_format_plane_cpp(fb->pixel_format, 1) : > @@ -3648,16 +3655,15 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > y_min_scanlines *= 2; > > plane_bytes_per_line = width * cpp; > - if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || > - fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { > + if (y_tiled) { > plane_blocks_per_line = > DIV_ROUND_UP(plane_bytes_per_line * > y_min_scanlines, 512); > plane_blocks_per_line /= y_min_scanlines; > - } else if (fb->modifier[0] == DRM_FORMAT_MOD_NONE) { > + } else if (x_tiled) { > + plane_blocks_per_line = > DIV_ROUND_UP(plane_bytes_per_line, 512); > + } else { > plane_blocks_per_line = > DIV_ROUND_UP(plane_bytes_per_line, 512) > + 1; > - } else { > - plane_blocks_per_line = > DIV_ROUND_UP(plane_bytes_per_line, 512); > } > > method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); > @@ -3668,8 +3674,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > > y_tile_minimum = plane_blocks_per_line * y_min_scanlines; > > - if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || > - fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { > + if (y_tiled) { > selected_result = max(method2, y_tile_minimum); > } else { > uint32_t linetime_us = 0; > @@ -3689,8 +3694,7 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > res_lines = DIV_ROUND_UP(selected_result, > plane_blocks_per_line); > > if (level >= 1 && level <= 7) { > - if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || > - fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { > + if (y_tiled) { > res_blocks += y_tile_minimum; > res_lines += y_min_scanlines; > } else { ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 5/8] drm/i915/skl+: reset y_plane ddb structure also during calculation
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu: > From: Mahesh Kumar> > Current code clears only plane ddb allocation if total ddb allocated > to > pipe in zero. y_plane ddb still contains old value, clear that as > well. > > Signed-off-by: Mahesh Kumar > --- > drivers/gpu/drm/i915/intel_pm.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c > b/drivers/gpu/drm/i915/intel_pm.c > index 5b8f715..a668204 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3381,6 +3381,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state > *cstate, > alloc_size = skl_ddb_entry_size(alloc); > if (alloc_size == 0) { > memset(ddb->plane[pipe], 0, sizeof(ddb- > >plane[pipe])); > + memset(ddb->y_plane[pipe], 0, sizeof(ddb- > >y_plane[pipe])); With the latest code we can just remove both memset() calls. > return 0; > } > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 4/8] drm/i915/gen9: WM memory bandwidth related workaround
On Fri, Nov 04, 2016 at 03:09:04PM -0200, Paulo Zanoni wrote: > Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu: > > This patch implemnets Workariunds related to display arbitrated > > memory > > bandwidth. These WA are applicabe for all gen-9 based platforms. > > > > Changes since v1: > > - Rebase on top of Paulo's patch series > > Changes since v2: > > - Rebase/rework after addressing Paulo's comments in previous patch > > A lot of this code has changed since then, so this will need a > significant rebase. In the meantime, I added skl_needs_memory_bw_wa() > and we're now applying the WA by default: we just won't apply the WA > when we're pretty sure we don't need to. This helps avoiding underruns > by default. > > See more below. > > > > > > Signed-off-by: "Kumar, Mahesh"> > --- > > drivers/gpu/drm/i915/i915_drv.h | 9 +++ > > drivers/gpu/drm/i915/intel_drv.h | 11 +++ > > drivers/gpu/drm/i915/intel_pm.c | 146 > > +++ > > 3 files changed, 166 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h > > b/drivers/gpu/drm/i915/i915_drv.h > > index adbd9aa..c169360 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -1092,6 +1092,13 @@ enum intel_sbi_destination { > > SBI_MPHY, > > }; > > > > +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ > > +enum watermark_memory_wa { > > + WATERMARK_WA_NONE, > > + WATERMARK_WA_X_TILED, > > + WATERMARK_WA_Y_TILED, > > +}; > > + > > #define QUIRK_PIPEA_FORCE (1<<0) > > #define QUIRK_LVDS_SSC_DISABLE (1<<1) > > #define QUIRK_INVERT_BRIGHTNESS (1<<2) > > @@ -1644,6 +1651,8 @@ struct skl_ddb_allocation { > > > > struct skl_wm_values { > > unsigned dirty_pipes; > > + /* any WaterMark memory workaround Required */ > > We can remove this comment since it doesn't say anything the variable > name doesn't. > > > + enum watermark_memory_wa mem_wa; > > Now that we have a proper variable in the state struct, it probably > makes sense to just kill skl_needs_memory_bw_wa() and read this > variable when we need to. > > > > struct skl_ddb_allocation ddb; > > uint32_t wm_linetime[I915_MAX_PIPES]; > > uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8]; > > diff --git a/drivers/gpu/drm/i915/intel_drv.h > > b/drivers/gpu/drm/i915/intel_drv.h > > index f48e79a..2c1897b 100644 > > --- a/drivers/gpu/drm/i915/intel_drv.h > > +++ b/drivers/gpu/drm/i915/intel_drv.h > > @@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct > > drm_atomic_state *state, > > return to_intel_crtc_state(crtc_state); > > } > > > > +static inline struct intel_crtc_state * > > +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state, > > + struct intel_crtc *crtc) > > +{ > > + struct drm_crtc_state *crtc_state; > > + > > + crtc_state = drm_atomic_get_existing_crtc_state(state, > > >base); > > + > > + return to_intel_crtc_state(crtc_state); > > I really don't like the idea of calling to_intel_crtc_state() on a > potentially NULL pointer so the caller of this function will also check > for NULL. Even though it works today, I still think it's unsafe > practice. Please check crtc_state for NULL directly and then return > NULL. I want to make this safe by making it a compile error if offsetof(base) != 0. https://lists.freedesktop.org/archives/intel-gfx/2016-October/108175.html But I think we want to go further than that patch by adding a bit more type safety to things. I did play around with this stuff a bit more, and I have something sitting on a branch, but I didn't quite figure out what I want to do about const vs. non const yet. > > Also, I think this function should be extracted to its own commit, and > we'd probably be able to find some callers in the existing i915 code. I have, on some branch again, _intel_ versions of the for_each_foo_in_state() macros as well. I think those are going to allow a lot of ugly casting stuff to disappear. But I think I'll hold off until Maarten's new iterators go in before I try to send those out. -- Ville Syrjälä Intel OTC ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/2] shmem: Support for registration of driver/file owner specific ops
== Series Details == Series: series starting with [1/2] shmem: Support for registration of driver/file owner specific ops URL : https://patchwork.freedesktop.org/series/14845/ State : success == Summary == Series 14845v1 Series without cover letter https://patchwork.freedesktop.org/api/1.0/series/14845/revisions/1/mbox/ fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-hsw-4770r total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-ilk-650 total:241 pass:188 dwarn:0 dfail:0 fail:0 skip:53 fi-ivb-3520m total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-ivb-3770 total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 fi-skl-6770hqtotal:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-snb-2520m total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-snb-2600 total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 00d2fcf7c84de382bd2ceb5eaf908f76900d0791 drm-intel-nightly: 2016y-11m-04d-15h-43m-43s UTC integration manifest 95d9dd7 drm/i915: Make GPU pages movable e67b361 shmem: Support for registration of driver/file owner specific ops == Logs == For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2907/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 4/8] drm/i915/gen9: WM memory bandwidth related workaround
Em Qui, 2016-10-13 às 16:28 +0530, Kumar, Mahesh escreveu: > This patch implemnets Workariunds related to display arbitrated > memory > bandwidth. These WA are applicabe for all gen-9 based platforms. > > Changes since v1: > - Rebase on top of Paulo's patch series > Changes since v2: > - Rebase/rework after addressing Paulo's comments in previous patch A lot of this code has changed since then, so this will need a significant rebase. In the meantime, I added skl_needs_memory_bw_wa() and we're now applying the WA by default: we just won't apply the WA when we're pretty sure we don't need to. This helps avoiding underruns by default. See more below. > > Signed-off-by: "Kumar, Mahesh"> --- > drivers/gpu/drm/i915/i915_drv.h | 9 +++ > drivers/gpu/drm/i915/intel_drv.h | 11 +++ > drivers/gpu/drm/i915/intel_pm.c | 146 > +++ > 3 files changed, 166 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h > b/drivers/gpu/drm/i915/i915_drv.h > index adbd9aa..c169360 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1092,6 +1092,13 @@ enum intel_sbi_destination { > SBI_MPHY, > }; > > +/* SKL+ Watermark arbitrated display bandwidth Workarounds */ > +enum watermark_memory_wa { > + WATERMARK_WA_NONE, > + WATERMARK_WA_X_TILED, > + WATERMARK_WA_Y_TILED, > +}; > + > #define QUIRK_PIPEA_FORCE (1<<0) > #define QUIRK_LVDS_SSC_DISABLE (1<<1) > #define QUIRK_INVERT_BRIGHTNESS (1<<2) > @@ -1644,6 +1651,8 @@ struct skl_ddb_allocation { > > struct skl_wm_values { > unsigned dirty_pipes; > + /* any WaterMark memory workaround Required */ We can remove this comment since it doesn't say anything the variable name doesn't. > + enum watermark_memory_wa mem_wa; Now that we have a proper variable in the state struct, it probably makes sense to just kill skl_needs_memory_bw_wa() and read this variable when we need to. > struct skl_ddb_allocation ddb; > uint32_t wm_linetime[I915_MAX_PIPES]; > uint32_t plane[I915_MAX_PIPES][I915_MAX_PLANES][8]; > diff --git a/drivers/gpu/drm/i915/intel_drv.h > b/drivers/gpu/drm/i915/intel_drv.h > index f48e79a..2c1897b 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -1813,6 +1813,17 @@ intel_atomic_get_crtc_state(struct > drm_atomic_state *state, > return to_intel_crtc_state(crtc_state); > } > > +static inline struct intel_crtc_state * > +intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state, > + struct intel_crtc *crtc) > +{ > + struct drm_crtc_state *crtc_state; > + > + crtc_state = drm_atomic_get_existing_crtc_state(state, > >base); > + > + return to_intel_crtc_state(crtc_state); I really don't like the idea of calling to_intel_crtc_state() on a potentially NULL pointer so the caller of this function will also check for NULL. Even though it works today, I still think it's unsafe practice. Please check crtc_state for NULL directly and then return NULL. Also, I think this function should be extracted to its own commit, and we'd probably be able to find some callers in the existing i915 code. > +} > + > static inline struct intel_plane_state * > intel_atomic_get_existing_plane_state(struct drm_atomic_state > *state, > struct intel_plane *plane) > diff --git a/drivers/gpu/drm/i915/intel_pm.c > b/drivers/gpu/drm/i915/intel_pm.c > index 84ec6b1..5b8f715 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3589,6 +3589,8 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > { > struct drm_plane_state *pstate = _pstate->base; > struct drm_framebuffer *fb = pstate->fb; > + struct intel_atomic_state *intel_state = > + to_intel_atomic_state(cstate->base.state); > uint32_t latency = dev_priv->wm.skl_latency[level]; > uint32_t method1, method2; > uint32_t plane_bytes_per_line, plane_blocks_per_line; > @@ -3598,10 +3600,17 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, > uint32_t width = 0, height = 0; > uint32_t plane_pixel_rate; > uint32_t y_tile_minimum, y_min_scanlines; > + enum watermark_memory_wa mem_wa; > > if (latency == 0 || !cstate->base.active || !intel_pstate- > >base.visible) > return 0; > > + mem_wa = intel_state ? intel_state->wm_results.mem_wa : > WATERMARK_WA_NONE; > + if (mem_wa != WATERMARK_WA_NONE) { > + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) > + latency += 15; > + } > + > width = drm_rect_width(_pstate->base.src) >> 16; > height = drm_rect_height(_pstate->base.src) >> 16; > > @@ -3634,6 +3643,9 @@ static int skl_compute_plane_wm(const struct > drm_i915_private *dev_priv, >
Re: [Intel-gfx] linux-next: manual merge of the mali-dp tree with the drm-misc tree
On Sat, Nov 05, 2016 at 03:55:03AM +1100, Stephen Rothwell wrote: > Hi Liviu, > > On Fri, 4 Nov 2016 15:48:02 + Liviu Dudauwrote: > > > > Brian Starkey is a co-maintainer for the Mali DP tree, so his Signed-off-by > > alone should be good. Baoyou's patch is in my tree to stop him repeatedly > > send me the same patch over and over again :) But yes, I will add my > > Signed-off-by for that one. > > Sorry, but this is not sufficient. Please read section 11 of > Documentation/SubmittingPatches (or > Documentation/process/submitting-patches.rst where it has been moved > recently). If you are in the path of a patch to Linus, you must add a > Signed-off-by line, and as the person who committed those patches to > the tree, you are in the path. Thanks for correcting me. I will add my Signed-off-bys to the relevant patches. Best regards, Liviu > -- > Cheers, > Stephen Rothwell -- | I would like to | | fix the world, | | but they're not | | giving me the | \ source code! / --- ¯\_(ツ)_/¯ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] linux-next: manual merge of the mali-dp tree with the drm-misc tree
Hi Liviu, On Fri, 4 Nov 2016 15:48:02 + Liviu Dudauwrote: > > Brian Starkey is a co-maintainer for the Mali DP tree, so his Signed-off-by > alone should be good. Baoyou's patch is in my tree to stop him repeatedly > send me the same patch over and over again :) But yes, I will add my > Signed-off-by for that one. Sorry, but this is not sufficient. Please read section 11 of Documentation/SubmittingPatches (or Documentation/process/submitting-patches.rst where it has been moved recently). If you are in the path of a patch to Linus, you must add a Signed-off-by line, and as the person who committed those patches to the tree, you are in the path. -- Cheers, Stephen Rothwell ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/5] drm/i915: More assorted dev_priv cleanups
On Fri, Nov 04, 2016 at 04:03:55PM +, Tvrtko Ursulin wrote: > > On 04/11/2016 15:32, Ville Syrjälä wrote: > > On Fri, Nov 04, 2016 at 02:42:45PM +, Tvrtko Ursulin wrote: > >> From: Tvrtko Ursulin> >> > >> A small selection of macros which can only accept dev_priv from > >> now on and a resulting trickle of fixups. > >> > >> Signed-off-by: Tvrtko Ursulin > >> --- > >> drivers/gpu/drm/i915/i915_drv.h | 27 --- > >> drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- > >> drivers/gpu/drm/i915/i915_irq.c | 6 +++--- > >> drivers/gpu/drm/i915/intel_crt.c | 8 > >> drivers/gpu/drm/i915/intel_display.c | 4 ++-- > >> drivers/gpu/drm/i915/intel_dp.c | 2 +- > >> drivers/gpu/drm/i915/intel_hotplug.c | 2 +- > >> drivers/gpu/drm/i915/intel_psr.c | 2 +- > >> 8 files changed, 25 insertions(+), 28 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_drv.h > >> b/drivers/gpu/drm/i915/i915_drv.h > >> index 45a30f730216..6060e41d25e5 100644 > >> --- a/drivers/gpu/drm/i915/i915_drv.h > >> +++ b/drivers/gpu/drm/i915/i915_drv.h > >> @@ -2901,28 +2901,25 @@ struct drm_i915_cmd_table { > >> #define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \ > >> !(IS_I915G(dev_priv) || \ > >> IS_I915GM(dev_priv))) > >> -#define SUPPORTS_TV(dev) (INTEL_INFO(dev)->supports_tv) > >> -#define I915_HAS_HOTPLUG(dev) (INTEL_INFO(dev)->has_hotplug) > >> - > >> -#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) > >> -#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr) > >> -#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc) > >> +#define SUPPORTS_TV(dev_priv) ((dev_priv)->info.supports_tv) > >> +#define I915_HAS_HOTPLUG(dev_priv)((dev_priv)->info.has_hotplug) > >> > >> +#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) > >> +#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) > >> +#define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) > >> #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) > >> - > >> -#define HAS_DP_MST(dev) (INTEL_INFO(dev)->has_dp_mst) > >> - > >> +#define HAS_DP_MST(dev_priv) ((dev_priv)->info.has_dp_mst) > >> #define HAS_DDI(dev_priv) ((dev_priv)->info.has_ddi) > >> -#define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg) > >> -#define HAS_PSR(dev) (INTEL_INFO(dev)->has_psr) > >> -#define HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6) > >> -#define HAS_RC6p(dev) (INTEL_INFO(dev)->has_rc6p) > >> - > >> -#define HAS_CSR(dev) (INTEL_INFO(dev)->has_csr) > >> +#define HAS_PSR(dev_priv) ((dev_priv)->info.has_psr) > >> +#define HAS_RC6(dev_priv) ((dev_priv)->info.has_rc6) > >> +#define HAS_RC6p(dev_priv)((dev_priv)->info.has_rc6p) > >> +#define HAS_CSR(dev_priv) ((dev_priv)->info.has_csr) > >> > >> #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm) > >> #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc) > >> > >> +#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg) > > > > What's confusing me is this reordering of these macros. Was there a > > particular reason for doing that? > > Just because of its long name, so I pulled it out and separated so the > alignment is nicer in the blocks above it. The original grouping looked more based on functionality, so made a bit more sense to me. > > > Outside that it all looks pretty reasonable. Could got a bit further > > with passing around dev_priv in some cases, but I guess we can leave > > that to future work. > > Yes, I mention that in the cover letter. > > > One random idea that did pop into my head was this: > > > > static inline const struct ... * > > intel_info(struct drm_i915_private *dev_priv) > > { > > return _priv->info; > > } > > #define HAS_WHATEVER(dev_priv) (intel_info(dev_priv)->whatever) > > > > for some extra type safety. Any thoughts? > > Sounds like a good idea to me. And it would be really easy to do, > localized to i915_drv.h, and then when the last INTEL_INFO(dev) gets > converted we can make it use the inline as well. > > Regards, > > Tvrtko -- Ville Syrjälä Intel OTC ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Remove the vma from the object list upon close
Currently, the vma is being unlink from the object lookup on destroy. However, we are meant to be decoupling it upon close so that the user cannot access the closed vma whilst it remains active on the GPU. [ 34.074858] kernel BUG at drivers/gpu/drm/i915/i915_gem_gtt.c:3561! [ 34.074875] invalid opcode: [#1] PREEMPT SMP [ 34.074888] Modules linked in: snd_hda_intel i915 x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel lpc_ich mei_me mei snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_codec snd_hwdep snd_hda_core i2c_designware_platform i2c_designware_core snd_pcm e1000e ptp pps_core sdhci_acpi sdhci mmc_core i2c_hid [last unloaded: i915] [ 34.075010] CPU: 1 PID: 6224 Comm: gem_close_race Tainted: G U 4.9.0-rc3-CI-CI_DRM_1800+ #1 [ 34.075034] Hardware name: /NUC5i7RYB, BIOS RYBDWi35.86A.0355.2016.0224.1501 02/24/2016 [ 34.075057] task: 8802459a8040 task.stack: c9524000 [ 34.075074] RIP: 0010:[] [] i915_gem_obj_lookup_or_create_vma+0x8c/0xc0 [i915] [ 34.075118] RSP: 0018:c9527b68 EFLAGS: 00010202 [ 34.075135] RAX: 8802426c5e40 RBX: RCX: 8802447fc2a8 [ 34.075158] RDX: RSI: 8802447fc2a8 RDI: 880248a4a880 [ 34.075181] RBP: c9527b88 R08: 0008 R09: [ 34.075203] R10: 0001 R11: R12: 880248a4a880 [ 34.075225] R13: 8802447fc2a8 R14: 880243e9afa8 R15: 880248a4a9c8 [ 34.075248] FS: 7f9b43e59740() GS:880256c8() knlGS: [ 34.075273] CS: 0010 DS: ES: CR0: 80050033 [ 34.075292] CR2: 7f9b43419140 CR3: 00024455d000 CR4: 003406e0 [ 34.075314] Stack: [ 34.075323] c9527bd0 880243cb8008 880243e9afa8 [ 34.075353] c9527c08 a03874c7 c9527bb8 880243e9afa8 [ 34.075383] 880243e9afb0 c9527e10 8802447fc2a8 880243cb8040 [ 34.075414] Call Trace: [ 34.075435] [] eb_lookup_vmas.isra.7+0x247/0x330 [i915] [ 34.075468] [] i915_gem_do_execbuffer.isra.15+0x604/0x1a10 [i915] [ 34.075507] [] ? i915_gem_object_get_sg+0x347/0x380 [i915] [ 34.075532] [] ? __might_fault+0x3e/0x90 [ 34.075562] [] i915_gem_execbuffer2+0xc0/0x250 [i915] [ 34.075585] [] drm_ioctl+0x1f6/0x480 [ 34.075604] [] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 34.075635] [] ? i915_gem_execbuffer+0x330/0x330 [i915] [ 34.075658] [] do_vfs_ioctl+0x8e/0x690 [ 34.075677] [] ? _raw_spin_unlock_irqrestore+0x3d/0x60 [ 34.075700] [] ? SyS_timer_settime+0x141/0x1e0 [ 34.075721] [] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 34.075742] [] SyS_ioctl+0x3c/0x70 [ 34.075760] [] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 34.075781] Code: 44 a0 48 c7 c2 9a 7e 43 a0 be e0 0d 00 00 48 c7 c7 a0 45 44 a0 e8 55 b8 ce e0 48 85 db 74 a3 49 83 bd f8 03 00 00 00 74 99 0f 0b <0f> 0b 48 89 da 4c 89 ee 4c 89 e7 e8 04 a9 ff ff 48 89 da 49 89 [ 34.075955] RIP [] i915_gem_obj_lookup_or_create_vma+0x8c/0xc0 [i915] [ 34.075994] RSP Testcase: igt/gem_close_race/basic-threads Fixes: db6c2b4151f2 ("drm/i915: Store the vma in an rbtree...") Signed-off-by: Chris WilsonCc: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 52999e51a946..c5e77e040627 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3403,7 +3403,6 @@ void i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_closed(vma)); GEM_BUG_ON(vma->fence); - rb_erase(>obj_node, >obj->vma_tree); list_del(>vm_link); if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -3416,7 +3415,9 @@ void i915_vma_close(struct i915_vma *vma) GEM_BUG_ON(i915_vma_is_closed(vma)); vma->flags |= I915_VMA_CLOSED; - list_del_init(>obj_link); + list_del(>obj_link); + rb_erase(>obj_node, >obj->vma_tree); + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) WARN_ON(i915_vma_unbind(vma)); } -- 2.10.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/5] drm/i915: More assorted dev_priv cleanups
On 04/11/2016 15:32, Ville Syrjälä wrote: On Fri, Nov 04, 2016 at 02:42:45PM +, Tvrtko Ursulin wrote: From: Tvrtko UrsulinA small selection of macros which can only accept dev_priv from now on and a resulting trickle of fixups. Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 27 --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 6 +++--- drivers/gpu/drm/i915/intel_crt.c | 8 drivers/gpu/drm/i915/intel_display.c | 4 ++-- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_hotplug.c | 2 +- drivers/gpu/drm/i915/intel_psr.c | 2 +- 8 files changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 45a30f730216..6060e41d25e5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2901,28 +2901,25 @@ struct drm_i915_cmd_table { #define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \ !(IS_I915G(dev_priv) || \ IS_I915GM(dev_priv))) -#define SUPPORTS_TV(dev) (INTEL_INFO(dev)->supports_tv) -#define I915_HAS_HOTPLUG(dev) (INTEL_INFO(dev)->has_hotplug) - -#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) -#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr) -#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc) +#define SUPPORTS_TV(dev_priv) ((dev_priv)->info.supports_tv) +#define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug) +#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) +#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) +#define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) - -#define HAS_DP_MST(dev)(INTEL_INFO(dev)->has_dp_mst) - +#define HAS_DP_MST(dev_priv) ((dev_priv)->info.has_dp_mst) #define HAS_DDI(dev_priv) ((dev_priv)->info.has_ddi) -#define HAS_FPGA_DBG_UNCLAIMED(dev)(INTEL_INFO(dev)->has_fpga_dbg) -#define HAS_PSR(dev) (INTEL_INFO(dev)->has_psr) -#define HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6) -#define HAS_RC6p(dev) (INTEL_INFO(dev)->has_rc6p) - -#define HAS_CSR(dev) (INTEL_INFO(dev)->has_csr) +#define HAS_PSR(dev_priv) ((dev_priv)->info.has_psr) +#define HAS_RC6(dev_priv) ((dev_priv)->info.has_rc6) +#define HAS_RC6p(dev_priv) ((dev_priv)->info.has_rc6p) +#define HAS_CSR(dev_priv) ((dev_priv)->info.has_csr) #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc) +#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg) What's confusing me is this reordering of these macros. Was there a particular reason for doing that? Just because of its long name, so I pulled it out and separated so the alignment is nicer in the blocks above it. Outside that it all looks pretty reasonable. Could got a bit further with passing around dev_priv in some cases, but I guess we can leave that to future work. Yes, I mention that in the cover letter. One random idea that did pop into my head was this: static inline const struct ... * intel_info(struct drm_i915_private *dev_priv) { return _priv->info; } #define HAS_WHATEVER(dev_priv) (intel_info(dev_priv)->whatever) for some extra type safety. Any thoughts? Sounds like a good idea to me. And it would be really easy to do, localized to i915_drv.h, and then when the last INTEL_INFO(dev) gets converted we can make it use the inline as well. Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v4 1/2] drm/i915/dp: Enable DP audio stall fix for gen9 platforms
On Wed, 26 Oct 2016, Dhinakaran Pandiyanwrote: > Enabling DP audio stall fix is necessary to play audio over DP HBR2. So, > let's set this bit right before enabling the audio codec. Playing audio > without setting this bit results in pipe FIFO underruns. > > This workaround is applicable only for audio sample rates up to 96kHz. For > frequencies above 96kHz, this is insufficient and cdclk should be increased > to at least 432 MHz, just like BDW. Since, the audio driver does not > support sample rates > 48 kHz, we are safe with this fix for now. Do we still need this patch now that these two have been pushed? b30ce9e0552a drm/i915/dp: BDW cdclk fix for DP audio 9c7540241885 drm/i915/dp: Extend BDW DP audio workaround to GEN9 platforms BR, Jani. > > v2: Inlined the code change within hsw_audio_codec_enable() (Jani) > Fixed the port clock typo > Added TODO comment > Signed-off-by: Dhinakaran Pandiyan > --- > drivers/gpu/drm/i915/i915_reg.h| 5 + > drivers/gpu/drm/i915/intel_audio.c | 30 +- > 2 files changed, 34 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 00efaa1..76dac48 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -6236,6 +6236,11 @@ enum { > #define SLICE_ECO_CHICKEN0 _MMIO(0x7308) > #define PIXEL_MASK_CAMMING_DISABLE (1 << 14) > > +#define _CHICKEN_TRANS_A 0x420C0 > +#define _CHICKEN_TRANS_B 0x420C4 > +#define CHICKEN_TRANS(tran) _MMIO_TRANS(tran, _CHICKEN_TRANS_A, > _CHICKEN_TRANS_B) > +#define SPARE_13 (1<<13) > + > /* WaCatErrorRejectionIssue */ > #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030) > #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB(1<<11) > diff --git a/drivers/gpu/drm/i915/intel_audio.c > b/drivers/gpu/drm/i915/intel_audio.c > index 7093cfb..894f11e 100644 > --- a/drivers/gpu/drm/i915/intel_audio.c > +++ b/drivers/gpu/drm/i915/intel_audio.c > @@ -283,6 +283,8 @@ static void hsw_audio_codec_disable(struct intel_encoder > *encoder) > { > struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); > struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); > + struct intel_crtc_state *crtc_config = intel_crtc->config; > + enum transcoder cpu_transcoder = crtc_config->cpu_transcoder; > enum pipe pipe = intel_crtc->pipe; > uint32_t tmp; > > @@ -290,13 +292,21 @@ static void hsw_audio_codec_disable(struct > intel_encoder *encoder) > > mutex_lock(_priv->av_mutex); > > + /*Disable DP audio stall fix for HBR2*/ > + if (IS_GEN9(dev_priv) && intel_crtc_has_dp_encoder(crtc_config) && > + crtc_config->port_clock >= 54) { > + tmp = I915_READ(CHICKEN_TRANS(cpu_transcoder)); > + tmp &= ~SPARE_13; > + I915_WRITE(CHICKEN_TRANS(cpu_transcoder), tmp); > + } > + > /* Disable timestamps */ > tmp = I915_READ(HSW_AUD_CFG(pipe)); > tmp &= ~AUD_CONFIG_N_VALUE_INDEX; > tmp |= AUD_CONFIG_N_PROG_ENABLE; > tmp &= ~AUD_CONFIG_UPPER_N_MASK; > tmp &= ~AUD_CONFIG_LOWER_N_MASK; > - if (intel_crtc_has_dp_encoder(intel_crtc->config)) > + if (intel_crtc_has_dp_encoder(crtc_config)) > tmp |= AUD_CONFIG_N_VALUE_INDEX; > I915_WRITE(HSW_AUD_CFG(pipe), tmp); > > @@ -315,6 +325,8 @@ static void hsw_audio_codec_enable(struct drm_connector > *connector, > { > struct drm_i915_private *dev_priv = to_i915(connector->dev); > struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc); > + struct intel_crtc_state *crtc_config = intel_crtc->config; > + enum transcoder cpu_transcoder = crtc_config->cpu_transcoder; > enum pipe pipe = intel_crtc->pipe; > enum port port = intel_encoder->port; > const uint8_t *eld = connector->eld; > @@ -326,6 +338,22 @@ static void hsw_audio_codec_enable(struct drm_connector > *connector, > > mutex_lock(_priv->av_mutex); > > + /* Enable DP audio stall fix for HBR2 > + * > + * TODO: This workaround is applicable only for audio sample rates up > + * to 96kHz. For frequencies above 96kHz, this is insufficient and > + * cdclk should be increased to at least 432 MHz, just like BDW. Since, > + * the audio driver does not support sample rates > 48 kHz, we are safe > + * with this fix for now. > + */ > + > + if (IS_GEN9(dev_priv) && intel_crtc_has_dp_encoder(crtc_config) && > + crtc_config->port_clock >= 54) { > + tmp = I915_READ(CHICKEN_TRANS(cpu_transcoder)); > + tmp |= SPARE_13; > + I915_WRITE(CHICKEN_TRANS(cpu_transcoder), tmp); > + } > + > /* Enable audio presence detect, invalidate ELD */ > tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD); > tmp |=
Re: [Intel-gfx] linux-next: manual merge of the mali-dp tree with the drm-misc tree
On Fri, Nov 04, 2016 at 04:38:54PM +1100, Stephen Rothwell wrote: > Hi Liviu, > > On Thu, 3 Nov 2016 17:19:58 + Liviu Dudauwrote: > > > > I have revamped the mali-dp tree and rebased it on the newer > > version of drm-next (which includes the drm-misc change) and pushed the > > updated patch in my tree. > > Thanks for that. However, several of the commits in your tree now have > no Signed-off-by from you as the committer :-( Brian Starkey is a co-maintainer for the Mali DP tree, so his Signed-off-by alone should be good. Baoyou's patch is in my tree to stop him repeatedly send me the same patch over and over again :) But yes, I will add my Signed-off-by for that one. Many thanks, Liviu > > -- > Cheers, > Stephen Rothwell -- | I would like to | | fix the world, | | but they're not | | giving me the | \ source code! / --- ¯\_(ツ)_/¯ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [v4,1/2] drm/i915/dp: BDW cdclk fix for DP audio (rev2)
On Wed, 02 Nov 2016, Patchworkwrote: > == Series Details == > > Series: series starting with [v4,1/2] drm/i915/dp: BDW cdclk fix for DP audio > (rev2) > URL : https://patchwork.freedesktop.org/series/14688/ > State : warning > > == Summary == > > Series 14688v2 Series without cover letter > https://patchwork.freedesktop.org/api/1.0/series/14688/revisions/2/mbox/ > > Test drv_module_reload_basic: > pass -> DMESG-WARN (fi-skl-6770hq) LSPCON being cranky. Pushed both to drm-intel-next-queued, thanks for the patches. BR, Jani. > > fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 > fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 > fi-bxt-t5700 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 > fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 > fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 > fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 > fi-hsw-4770r total:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 > fi-ilk-650 total:241 pass:187 dwarn:0 dfail:0 fail:0 skip:54 > fi-ivb-3520m total:241 pass:218 dwarn:0 dfail:0 fail:0 skip:23 > fi-ivb-3770 total:241 pass:218 dwarn:0 dfail:0 fail:0 skip:23 > fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 > fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 > fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 > fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 > fi-skl-6770hqtotal:241 pass:226 dwarn:1 dfail:0 fail:0 skip:14 > fi-snb-2520m total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 > fi-snb-2600 total:241 pass:207 dwarn:0 dfail:0 fail:0 skip:34 > > bf6b989af8b0fde56a352d9005c97b2d8e3bbbe3 drm-intel-nightly: > 2016y-11m-02d-15h-44m-03s UTC integration manifest > 6346fda drm/i915/dp: Extend BDW DP audio workaround to GEN9 platforms > 884d02ce drm/i915/dp: BDW cdclk fix for DP audio > > == Logs == > > For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2895/ > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Jani Nikula, Intel Open Source Technology Center ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Fi.CI.BAT: failure for dev_priv cleanup continuation
== Series Details == Series: dev_priv cleanup continuation URL : https://patchwork.freedesktop.org/series/14844/ State : failure == Summary == Series 14844v1 dev_priv cleanup continuation https://patchwork.freedesktop.org/api/1.0/series/14844/revisions/1/mbox/ Test kms_busy: Subgroup basic-flip-default-c: pass -> INCOMPLETE (fi-skl-6260u) fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-hsw-4770r total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-ilk-650 total:241 pass:188 dwarn:0 dfail:0 fail:0 skip:53 fi-ivb-3520m total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-ivb-3770 total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-skl-6260u total:165 pass:159 dwarn:0 dfail:0 fail:0 skip:5 fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 fi-skl-6770hqtotal:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-snb-2520m total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-snb-2600 total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 f302fec3503812bb71a8f71511a0bd4f720d5091 drm-intel-nightly: 2016y-11m-04d-11h-56m-52s UTC integration manifest 01f89f2 drm/i915: Convert i915_drv.c to INTEL_GEN f1ccc0b drm/i915: Pass dev_priv to INTEL_INFO everywhere apart from the gen use ef23459 drm/i915: Further assorted dev_priv cleanups 28a34c7 drm/i915: More assorted dev_priv cleanups 8c5b825 drm/i915: Assorted dev_priv cleanups == Logs == For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2906/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/5] drm/i915: More assorted dev_priv cleanups
On Fri, Nov 04, 2016 at 02:42:45PM +, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin> > A small selection of macros which can only accept dev_priv from > now on and a resulting trickle of fixups. > > Signed-off-by: Tvrtko Ursulin > --- > drivers/gpu/drm/i915/i915_drv.h | 27 --- > drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- > drivers/gpu/drm/i915/i915_irq.c | 6 +++--- > drivers/gpu/drm/i915/intel_crt.c | 8 > drivers/gpu/drm/i915/intel_display.c | 4 ++-- > drivers/gpu/drm/i915/intel_dp.c | 2 +- > drivers/gpu/drm/i915/intel_hotplug.c | 2 +- > drivers/gpu/drm/i915/intel_psr.c | 2 +- > 8 files changed, 25 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 45a30f730216..6060e41d25e5 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2901,28 +2901,25 @@ struct drm_i915_cmd_table { > #define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \ >!(IS_I915G(dev_priv) || \ >IS_I915GM(dev_priv))) > -#define SUPPORTS_TV(dev) (INTEL_INFO(dev)->supports_tv) > -#define I915_HAS_HOTPLUG(dev) (INTEL_INFO(dev)->has_hotplug) > - > -#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) > -#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr) > -#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc) > +#define SUPPORTS_TV(dev_priv)((dev_priv)->info.supports_tv) > +#define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug) > > +#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) > +#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) > +#define HAS_FBC(dev_priv)((dev_priv)->info.has_fbc) > #define HAS_IPS(dev_priv)(IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) > - > -#define HAS_DP_MST(dev) (INTEL_INFO(dev)->has_dp_mst) > - > +#define HAS_DP_MST(dev_priv) ((dev_priv)->info.has_dp_mst) > #define HAS_DDI(dev_priv)((dev_priv)->info.has_ddi) > -#define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg) > -#define HAS_PSR(dev) (INTEL_INFO(dev)->has_psr) > -#define HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6) > -#define HAS_RC6p(dev)(INTEL_INFO(dev)->has_rc6p) > - > -#define HAS_CSR(dev) (INTEL_INFO(dev)->has_csr) > +#define HAS_PSR(dev_priv)((dev_priv)->info.has_psr) > +#define HAS_RC6(dev_priv)((dev_priv)->info.has_rc6) > +#define HAS_RC6p(dev_priv) ((dev_priv)->info.has_rc6p) > +#define HAS_CSR(dev_priv)((dev_priv)->info.has_csr) > > #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm) > #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc) > > +#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg) What's confusing me is this reordering of these macros. Was there a particular reason for doing that? Outside that it all looks pretty reasonable. Could got a bit further with passing around dev_priv in some cases, but I guess we can leave that to future work. One random idea that did pop into my head was this: static inline const struct ... * intel_info(struct drm_i915_private *dev_priv) { return _priv->info; } #define HAS_WHATEVER(dev_priv) (intel_info(dev_priv)->whatever) for some extra type safety. Any thoughts? > + > /* > * For now, anything with a GuC requires uCode loading, and then supports > * command submission once loaded. But these are logically independent > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c > b/drivers/gpu/drm/i915/i915_gpu_error.c > index d430b9441e6b..35b13f178b61 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -573,7 +573,7 @@ int i915_error_state_to_str(struct > drm_i915_error_state_buf *m, > pdev->subsystem_device); > err_printf(m, "IOMMU enabled?: %d\n", error->iommu); > > - if (HAS_CSR(dev)) { > + if (HAS_CSR(dev_priv)) { > struct intel_csr *csr = _priv->csr; > > err_printf(m, "DMC loaded: %s\n", > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 6d7505b5c5e7..285ee1e4352a 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -3678,7 +3678,7 @@ static void i915_irq_preinstall(struct drm_device * dev) > struct drm_i915_private *dev_priv = to_i915(dev); > int pipe; > > - if (I915_HAS_HOTPLUG(dev)) { > + if (I915_HAS_HOTPLUG(dev_priv)) { > i915_hotplug_interrupt_update(dev_priv, 0x, 0); > I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); > } > @@ -3712,7 +3712,7 @@ static int i915_irq_postinstall(struct drm_device *dev) > I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | >
[Intel-gfx] [maintainer-tools PATCH 1/2] dim: add a variable for nightly.conf
We'll change the name at some point, add some indirection, with a generic variable name. Signed-off-by: Jani Nikula--- dim | 26 +++--- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/dim b/dim index 8e95cd82407f..6a23c868856c 100755 --- a/dim +++ b/dim @@ -92,6 +92,9 @@ addr_intel_gfx="intel-gfx@lists.freedesktop.org" addr_dri_devel="dri-de...@lists.freedesktop.org" addr_intel_qa="\"Christophe Prigent\" " +# integration configuration +integration_config=nightly.conf + # # Command line options. # @@ -163,7 +166,7 @@ if [ "$subcommand" != "setup" -a "$subcommand" != "help" -a "$subcommand" != "us # Internal configuration that depends on a sane setup. # - dim_branches=`(source $DIM_PREFIX/drm-intel-rerere/nightly.conf ; echo $nightly_branches) | \ + dim_branches=`(source $DIM_PREFIX/drm-intel-rerere/$integration_config ; echo $nightly_branches) | \ xargs -n 1 echo | grep '^origin' | sed -e 's/^origin\///'` fi @@ -280,14 +283,16 @@ function dim_rebuild_nightly cd $rerere if [[ `git status --porcelain | grep -v "^[ ?][ ?]" | wc -l` -gt 0 ]]; then - warn_or_fail "-nightly configuration file not commited" + warn_or_fail "integration configuration file $integration_config not commited" fi - echo -n "Updating rerere cache and nightly.conf... " + echo -n "Updating rerere cache... " update_rerere_cache >& /dev/null echo "Done." - source $rerere/nightly.conf + echo -n "Reloading $integration_config... " + source $rerere/$integration_config + echo "Done." cd $DIM_PREFIX/$integration_branch if ! git branch --list $integration_branch | grep '\*' >& /dev/null ; then @@ -383,7 +388,6 @@ function dim_rebuild_nightly update_linux_next } - # push branch $1, rebuild nightly. the rest of the arguments are passed to git # push. function dim_push_branch @@ -615,9 +619,9 @@ function dim_create_branch git push $DRY_RUN $DIM_DRM_INTEL_REMOTE +$branch --set-upstream cd $DIM_PREFIX/drm-intel-rerere $DRY echo "nightly_branches=\"\$nightly_branches origin/$branch\"" \ ->> nightly.conf - $DRY git add nightly.conf - $DRY git commit --quiet -m "Adding $branch to -nightly" +>> $integration_config + $DRY git add $integration_config + $DRY git commit --quiet -m "Add $branch to $integration_config" } function dim_remove_branch @@ -643,9 +647,9 @@ function dim_remove_branch $DRY git fetch origin --prune cd $DIM_PREFIX/drm-intel-rerere full_branch="origin/$branch" - $DRY sed -e "/${full_branch//\//\\\/}/d" -i nightly.conf - $DRY git add nightly.conf - $DRY git commit --quiet -m "Deleted $branch and removed from -nightly" + $DRY sed -e "/${full_branch//\//\\\/}/d" -i $integration_config + $DRY git add $integration_config + $DRY git commit --quiet -m "Remove $branch from $integration_config" } function dim_cd -- 2.1.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [maintainer-tools PATCH 2/2] dim: switch to using remote agnostic integration branch config
NOTE: This change depends on nightly.conf changes that have been committed earlier to the drm-intel-rerere repo. Looking at that first makes this change more sensible. Use two arrays to configure the repos and branches to be merged to the integration branch: drm_tip_repos An associative array that maps repo names to urls. This is mostly a convenience for defining the other array. The repo names are symbolic, and not related to actual git remote names. It's also helpful for implementing dim create-branch and remove-branch. drm_tip_config An array of strings which describes the repos and branches to be used to generate the integration branch. The repos are listed using the symbolic repo names from the drm_tip_repos array. It's also possible to list an override sha, in case there's a need to hold back updating to the tip of the branch for some reason. dim as a whole still remains dependent on specific (and configured) remote names, but this change detaches nightly.conf from the remote names. Signed-off-by: Jani Nikula--- dim | 98 ++--- 1 file changed, 72 insertions(+), 26 deletions(-) diff --git a/dim b/dim index 6a23c868856c..66ea0dd918b3 100755 --- a/dim +++ b/dim @@ -95,6 +95,27 @@ addr_intel_qa="\"Christophe Prigent\" " # integration configuration integration_config=nightly.conf +function read_integration_config +{ + # clear everything first to allow configuration reload + unset drm_tip_repos drm_tip_config + declare -g -A drm_tip_repos + declare -g -a drm_tip_config + + if [ -r $DIM_PREFIX/drm-intel-rerere/$integration_config ]; then + source $DIM_PREFIX/drm-intel-rerere/$integration_config + fi + + dim_branches= + for conf in "${drm_tip_config[@]}"; do + read repo branch override <<< $conf + if [[ "$repo" = "drm-intel" ]]; then + dim_branches="$dim_branches $branch" + fi + done +} +read_integration_config + # # Command line options. # @@ -161,14 +182,30 @@ if [ "$subcommand" != "setup" -a "$subcommand" != "help" -a "$subcommand" != "us exit 1 fi done +fi - # - # Internal configuration that depends on a sane setup. - # +# get the remote name for url, depends on current repo +function url_to_remote +{ + local url="$1" - dim_branches=`(source $DIM_PREFIX/drm-intel-rerere/$integration_config ; echo $nightly_branches) | \ - xargs -n 1 echo | grep '^origin' | sed -e 's/^origin\///'` -fi + if [[ -z "$url" ]]; then + echoerr "$0 without url" + exit 1 + fi + + local remote=$(git remote -v | grep -m 1 "$url" | cut -f 1) + + if [[ -z "$remote" ]]; then + echoerr "No git remote for url $url found in $(pwd)" + echoerr "Please set it up using:" + echoerr "$ git remote add $url" + echoerr "with a name of your choice." + exit 1 + fi + + echo $remote +} function dim_uptodate { @@ -291,7 +328,7 @@ function dim_rebuild_nightly echo "Done." echo -n "Reloading $integration_config... " - source $rerere/$integration_config + read_integration_config echo "Done." cd $DIM_PREFIX/$integration_branch @@ -300,7 +337,8 @@ function dim_rebuild_nightly exit 1 fi - for remote in $(echo $nightly_branches | tr " " "\n" | sed 's|/.*$||g' | sort -u); do + for url in "${drm_tip_repos[@]}"; do + local remote=$(url_to_remote $url) echo -n "Fetching $remote... " # git fetch returns 128 if there's nothing to be fetched git fetch $remote >& /dev/null || true @@ -308,22 +346,17 @@ function dim_rebuild_nightly done # merge -fixes - for tree in $nightly_branches; do - local branch=${tree%:*} - local sha1=${tree#*:} - local name=${branch##*/} - - # the : separator is optional - if [[ $sha1 == $tree ]] ; then - sha1= - fi + for conf in "${drm_tip_config[@]}"; do + read repo branch override <<< $conf + local url=${drm_tip_repos[$repo]} + local remote=$(url_to_remote $url) + local sha1=$remote/$branch - echo -n "Merging $branch... " + echo -n "Merging $repo (local remote $remote) $branch... " - if [[ -n $sha1 ]] ; then + if [[ -n "$override" ]]; then + sha1=$override echo -n "Using override sha1: $sha1... " - else -
Re: [Intel-gfx] [PATCH 05/12] drm/i915/scheduler: Record all dependencies upon request construction
On Fri, Nov 04, 2016 at 02:44:44PM +, Tvrtko Ursulin wrote: > > On 03/11/2016 11:55, Chris Wilson wrote: > >On Thu, Nov 03, 2016 at 11:03:47AM +, Tvrtko Ursulin wrote: > >> > >>On 02/11/2016 17:50, Chris Wilson wrote: > >>>+struct i915_dependency { > >>>+ struct i915_priotree *signal; > >>>+ struct list_head pre_link, post_link; > >>>+ unsigned long flags; > >>>+#define I915_DEPENDENCY_ALLOC BIT(0) > >>>+}; > >>>+ > >>>+struct i915_priotree { > >>>+ struct list_head pre_list; /* who is before us, we depend upon */ > >>>+ struct list_head post_list; /* who is after us, they depend upon us */ > >>>+}; > >> > >>I need a picture to imagine this data structure. :( > > > >The names suck. > > When you wrote this I assumed you would respin shortly with some > better names? Not yet. I kind of like struct i915_dependency { struct i915_priotree *signaler; struct list_head signaler_link; struct list_head listener_link; }; struct i915_priotree { struct list_head signalers_list; /* before us, we depend on them */ struct list_head listeners_list; /* those after, who depend on us */ }; -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH igt] igt/gem_exec_reloc: Check we write the full 64bit relocation
Recently a patch ran successfully through BAT that broke 64bit relocations on a couple of machines. Oops. So lets add a very fast set of tests to check basic relocation handling. Signed-off-by: Chris Wilson--- tests/gem_exec_reloc.c| 199 ++ tests/intel-ci/fast-feedback.testlist | 3 + 2 files changed, 202 insertions(+) diff --git a/tests/gem_exec_reloc.c b/tests/gem_exec_reloc.c index 5f898da..b541b38 100644 --- a/tests/gem_exec_reloc.c +++ b/tests/gem_exec_reloc.c @@ -28,6 +28,9 @@ IGT_TEST_DESCRIPTION("Basic sanity check of execbuf-ioctl relocations."); #define LOCAL_I915_EXEC_BSD_SHIFT (13) #define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) +#define LOCAL_I915_EXEC_NO_RELOC (1<<11) +#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12) + #define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) static uint32_t find_last_set(uint64_t x) @@ -320,6 +323,193 @@ static void active(int fd, unsigned engine) gem_close(fd, obj[0].handle); } +static bool has_64bit_reloc(int fd) +{ + return intel_gen(intel_get_drm_devid(fd)) >= 8; +} + +static void basic_cpu(int fd) +{ + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_exec_object2 obj; + struct drm_i915_gem_execbuffer2 execbuf; + uint32_t bbe = MI_BATCH_BUFFER_END; + uint32_t trash; + uint64_t offset; + char *wc; + + memset(, 0, sizeof(obj)); + + obj.handle = gem_create(fd, 4096); + obj.relocs_ptr = (uintptr_t) + obj.relocation_count = 1; + gem_write(fd, obj.handle, 0, , sizeof(bbe)); + + memset(, 0, sizeof(reloc)); + reloc.offset = 4000; + reloc.target_handle = obj.handle; + reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + + memset(, 0, sizeof(execbuf)); + execbuf.buffers_ptr = (uintptr_t) + execbuf.buffer_count = 1; + + wc = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE); + offset = -1; + memcpy(wc + 4000, , sizeof(offset)); + + gem_set_domain(fd, obj.handle, + I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + gem_execbuf(fd, ); + + offset = 0; + memcpy(, wc + 4000, has_64bit_reloc(fd) ? 8 : 4); + munmap(wc, 4096); + + igt_assert_eq_u64(reloc.presumed_offset, offset); + igt_assert_eq_u64(obj.offset, offset); + + /* Simulate relocation */ + trash = obj.handle; + obj.handle = gem_create(fd, 4096); + gem_write(fd, obj.handle, 0, , sizeof(bbe)); + reloc.target_handle = obj.handle; + + wc = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE); + offset = -1; + memcpy(wc + 4000, , sizeof(offset)); + + gem_set_domain(fd, obj.handle, + I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + gem_execbuf(fd, ); + + offset = 0; + memcpy(, wc + 4000, has_64bit_reloc(fd) ? 8 : 4); + munmap(wc, 4096); + + igt_assert_eq_u64(reloc.presumed_offset, offset); + igt_assert_eq_u64(obj.offset, offset); + + gem_close(fd, obj.handle); + gem_close(fd, trash); +} + +static void basic_gtt(int fd) +{ + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_exec_object2 obj; + struct drm_i915_gem_execbuffer2 execbuf; + uint32_t bbe = MI_BATCH_BUFFER_END; + uint64_t offset; + char *wc; + + memset(, 0, sizeof(obj)); + + obj.handle = gem_create(fd, 4096); + obj.relocs_ptr = (uintptr_t) + obj.relocation_count = 1; + gem_write(fd, obj.handle, 0, , sizeof(bbe)); + + memset(, 0, sizeof(reloc)); + reloc.offset = 4000; + reloc.target_handle = obj.handle; + reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION; + + memset(, 0, sizeof(execbuf)); + execbuf.buffers_ptr = (uintptr_t) + execbuf.buffer_count = 1; + + wc = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE); + offset = -1; + memcpy(wc + 4000, , sizeof(offset)); + + gem_set_domain(fd, obj.handle, + I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + gem_execbuf(fd, ); + + offset = 0; + memcpy(, wc + 4000, has_64bit_reloc(fd) ? 8 : 4); + + igt_assert_eq_u64(reloc.presumed_offset, offset); + igt_assert_eq_u64(obj.offset, offset); + + offset = -1; + memcpy(wc + 4000, , sizeof(offset)); + + /* Simulate relocation */ + obj.offset += 4096; + reloc.presumed_offset += 4096; + memcpy(wc + 4000, , has_64bit_reloc(fd) ? 8 : 4); + + gem_execbuf(fd, ); + + offset = 0; + memcpy(, wc + 4000, has_64bit_reloc(fd) ? 8 : 4); + munmap(wc, 4096); + + igt_assert_eq_u64(reloc.presumed_offset, offset); + igt_assert_eq_u64(obj.offset, offset); + + gem_close(fd, obj.handle); +} + +static void basic_noreloc(int fd) +{ + struct drm_i915_gem_relocation_entry
[Intel-gfx] [PATCH 0/5] dev_priv cleanup continuation
From: Tvrtko UrsulinA few small patches towards the goal of getting rid of the __I915__ polymorphism. Series starts with three patches to convert some more IS/HAS macros to accepting dev_priv only, and continues with a patch to make all users of INTEL_INFO pass in dev_priv, apart from the ones which can be replaced with INTEL_GEN. This leaves the disruptive conversion to the latter as the only remaining bit before the __I915__ can be completely eliminated. To start with that, last patch converts i915_drv.c, going with the idea to do this gradually over time on a file by file basis. When all this is done at some point in the future, we can also tackle the opportunities to change some local function signatures to take dev_priv and so make further cleanups where appropriate. Tvrtko Ursulin (5): drm/i915: Assorted dev_priv cleanups drm/i915: More assorted dev_priv cleanups drm/i915: Further assorted dev_priv cleanups drm/i915: Pass dev_priv to INTEL_INFO everywhere apart from the gen use drm/i915: Convert i915_drv.c to INTEL_GEN drivers/gpu/drm/i915/i915_drv.c| 18 drivers/gpu/drm/i915/i915_drv.h| 70 +++--- drivers/gpu/drm/i915/i915_gem.c| 13 +++--- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 +-- drivers/gpu/drm/i915/i915_gem_gtt.c| 2 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 3 +- drivers/gpu/drm/i915/i915_gem_userptr.c| 3 +- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/i915_irq.c| 8 ++-- drivers/gpu/drm/i915/intel_color.c | 31 ++--- drivers/gpu/drm/i915/intel_crt.c | 8 ++-- drivers/gpu/drm/i915/intel_display.c | 32 +++--- drivers/gpu/drm/i915/intel_dp.c| 8 ++-- drivers/gpu/drm/i915/intel_fbdev.c | 10 ++--- drivers/gpu/drm/i915/intel_guc_loader.c| 10 ++--- drivers/gpu/drm/i915/intel_hotplug.c | 2 +- drivers/gpu/drm/i915/intel_pm.c| 7 +-- drivers/gpu/drm/i915/intel_psr.c | 2 +- 18 files changed, 118 insertions(+), 119 deletions(-) -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 5/5] drm/i915: Convert i915_drv.c to INTEL_GEN
From: Tvrtko UrsulinSigned-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 35940192e569..096c368bda0b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -374,12 +374,12 @@ static int intel_alloc_mchbar_resource(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915; + int reg = INTEL_GEN(dev_priv) >= 4 ? MCHBAR_I965 : MCHBAR_I915; u32 temp_lo, temp_hi = 0; u64 mchbar_addr; int ret; - if (INTEL_INFO(dev)->gen >= 4) + if (INTEL_GEN(dev_priv) >= 4) pci_read_config_dword(dev_priv->bridge_dev, reg + 4, _hi); pci_read_config_dword(dev_priv->bridge_dev, reg, _lo); mchbar_addr = ((u64)temp_hi << 32) | temp_lo; @@ -406,7 +406,7 @@ intel_alloc_mchbar_resource(struct drm_device *dev) return ret; } - if (INTEL_INFO(dev)->gen >= 4) + if (INTEL_GEN(dev_priv) >= 4) pci_write_config_dword(dev_priv->bridge_dev, reg + 4, upper_32_bits(dev_priv->mch_res.start)); @@ -420,7 +420,7 @@ static void intel_setup_mchbar(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int mchbar_reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915; + int mchbar_reg = INTEL_GEN(dev_priv) >= 4 ? MCHBAR_I965 : MCHBAR_I915; u32 temp; bool enabled; @@ -460,7 +460,7 @@ static void intel_teardown_mchbar(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int mchbar_reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915; + int mchbar_reg = INTEL_GEN(dev_priv) >= 4 ? MCHBAR_I965 : MCHBAR_I915; if (dev_priv->mchbar_need_disable) { if (IS_I915G(dev_priv) || IS_I915GM(dev_priv)) { @@ -879,7 +879,7 @@ static int i915_mmio_setup(struct drm_device *dev) * the register BAR remains the same size for all the earlier * generations up to Ironlake. */ - if (INTEL_INFO(dev)->gen < 5) + if (INTEL_GEN(dev_priv) < 5) mmio_size = 512 * 1024; else mmio_size = 2 * 1024 * 1024; @@ -1512,7 +1512,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) * Fujitsu FSC S7110 * Acer Aspire 1830T */ - if (!(hibernation && INTEL_INFO(dev_priv)->gen < 6)) + if (!(hibernation && INTEL_GEN(dev_priv) < 6)) pci_set_power_state(pdev, PCI_D3hot); dev_priv->suspended_to_idle = suspend_to_idle(dev_priv); -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] shmem: Support for registration of driver/file owner specific ops
From: Chris WilsonThis provides support for the drivers or shmem file owners to register a set of callbacks, which can be invoked from the address space operations methods implemented by shmem. This allow the file owners to hook into the shmem address space operations to do some extra/custom operations in addition to the default ones. The private_data field of address_space struct is used to store the pointer to driver specific ops. Currently only one ops field is defined, which is migratepage, but can be extended on an as-needed basis. The need for driver specific operations arises since some of the operations (like migratepage) may not be handled completely within shmem, so as to be effective, and would need some driver specific handling also. Specifically, i915.ko would like to participate in migratepage(). i915.ko uses shmemfs to provide swappable backing storage for its user objects, but when those objects are in use by the GPU it must pin the entire object until the GPU is idle. As a result, large chunks of memory can be arbitrarily withdrawn from page migration, resulting in premature out-of-memory due to fragmentation. However, if i915.ko can receive the migratepage() request, it can then flush the object from the GPU, remove its pin and thus enable the migration. Since gfx allocations are one of the major consumer of system memory, its imperative to have such a mechanism to effectively deal with fragmentation. And therefore the need for such a provision for initiating driver specific actions during address space operations. v2: - Drop dev_ prefix from the members of shmem_dev_info structure. (Joonas) - Change the return type of shmem_set_device_op() to void and remove the check for pre-existing data. (Joonas) - Rename shmem_set_device_op() to shmem_set_dev_info() to be consistent with shmem_dev_info structure. (Joonas) Cc: Hugh Dickins Cc: linux...@kvack.org Cc: linux-ker...@vger.linux.org Signed-off-by: Sourab Gupta Signed-off-by: Akash Goel Reviewed-by: Chris Wilson --- include/linux/shmem_fs.h | 13 + mm/shmem.c | 17 - 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index ff078e7..454c3ba 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -39,11 +39,24 @@ struct shmem_sb_info { unsigned long shrinklist_len; /* Length of shrinklist */ }; +struct shmem_dev_info { + void *private_data; + int (*migratepage)(struct address_space *mapping, + struct page *newpage, struct page *page, + enum migrate_mode mode, void *dev_priv_data); +}; + static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) { return container_of(inode, struct shmem_inode_info, vfs_inode); } +static inline void shmem_set_dev_info(struct address_space *mapping, + struct shmem_dev_info *info) +{ + mapping->private_data = info; +} + /* * Functions in mm/shmem.c called directly from elsewhere: */ diff --git a/mm/shmem.c b/mm/shmem.c index ad7813d..fce8de3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1290,6 +1290,21 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) return 0; } +#ifdef CONFIG_MIGRATION +static int shmem_migratepage(struct address_space *mapping, +struct page *newpage, struct page *page, +enum migrate_mode mode) +{ + struct shmem_dev_info *dev_info = mapping->private_data; + + if (dev_info && dev_info->migratepage) + return dev_info->migratepage(mapping, newpage, page, +mode, dev_info->private_data); + + return migrate_page(mapping, newpage, page, mode); +} +#endif + #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS) static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) { @@ -3654,7 +3669,7 @@ static void shmem_destroy_inodecache(void) .write_end = shmem_write_end, #endif #ifdef CONFIG_MIGRATION - .migratepage= migrate_page, + .migratepage= shmem_migratepage, #endif .error_remove_page = generic_error_remove_page, }; -- 1.9.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: Make GPU pages movable
From: Chris WilsonOn a long run of more than 2-3 days, physical memory tends to get fragmented severely, which considerably slows down the system. In such a scenario, the shrinker is also unable to help as lack of memory is not the actual problem, since it has been observed that there are enough free pages of 0 order. This also manifests itself when an indiviual zone in the mm runs out of pages and if we cannot migrate pages between zones, the kernel hits an out-of-memory even though there are free pages (and often all of swap) available. To address the issue of external fragementation, kernel does a compaction (which involves migration of pages) but it's efficacy depends upon how many pages are marked as MOVABLE, as only those pages can be migrated. Currently the backing pages for GPU buffers are allocated from shmemfs with GFP_RECLAIMABLE flag, in units of 4KB pages. In the case of limited swap space, it may not be possible always to reclaim or swap-out pages of all the inactive objects, to make way for free space allowing formation of higher order groups of physically-contiguous pages on compaction. Just marking the GPU pages as MOVABLE will not suffice, as i915.ko has to pin the pages if they are in use by GPU, which will prevent their migration. So the migratepage callback in shmem is also hooked up to get a notification when kernel initiates the page migration. On the notification, i915.ko appropriately unpin the pages. With this we can effectively mark the GPU pages as MOVABLE and hence mitigate the fragmentation problem. v2: - Rename the migration routine to gem_shrink_migratepage, move it to the shrinker file, and use the existing constructs (Chris) - To cleanup, add a new helper function to encapsulate all page migration skip conditions (Chris) - Add a new local helper function in shrinker file, for dropping the backing pages, and call the same from gem_shrink() also (Chris) v3: - Fix/invert the check on the return value of unsafe_drop_pages (Chris) v4: - Minor tidy v5: - Fix unsafe usage of unsafe_drop_pages() - Rebase onto vmap-notifier v6: - Remove i915_gem_object_get/put across unsafe_drop_pages() as with struct_mutex protection object can't disappear. (Chris) Testcase: igt/gem_shrink Bugzilla: (e.g.) https://bugs.freedesktop.org/show_bug.cgi?id=90254 Cc: Hugh Dickins Cc: linux...@kvack.org Signed-off-by: Sourab Gupta Signed-off-by: Akash Goel Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 9 ++- drivers/gpu/drm/i915/i915_gem_shrinker.c | 132 +++ 3 files changed, 142 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4735b417..7f2717b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1357,6 +1357,8 @@ struct intel_l3_parity { }; struct i915_gem_mm { + struct shmem_dev_info shmem_info; + /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; /** Protects the usage of the GTT stolen memory allocator. This is diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f995ce..f0d4ce7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2164,6 +2164,7 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) if (obj->mm.madv == I915_MADV_WILLNEED) mark_page_accessed(page); + set_page_private(page, 0); put_page(page); } obj->mm.dirty = false; @@ -2310,6 +2311,7 @@ static unsigned int swiotlb_max_size(void) sg->length += PAGE_SIZE; } last_pfn = page_to_pfn(page); + set_page_private(page, (unsigned long)obj); /* Check that the i965g/gm workaround works. */ WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x0010UL)); @@ -2334,8 +2336,10 @@ static unsigned int swiotlb_max_size(void) err_pages: sg_mark_end(sg); - for_each_sgt_page(page, sgt_iter, st) + for_each_sgt_page(page, sgt_iter, st) { + set_page_private(page, 0); put_page(page); + } sg_free_table(st); kfree(st); @@ -4185,6 +4189,8 @@ struct drm_i915_gem_object * goto fail; mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; + if (IS_ENABLED(MIGRATION)) + mask |= __GFP_MOVABLE; if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) { /* 965gm cannot relocate objects above 4GiB. */ mask &= ~__GFP_HIGHMEM; @@ -4193,6
Re: [Intel-gfx] [PATCH 05/12] drm/i915/scheduler: Record all dependencies upon request construction
On 03/11/2016 11:55, Chris Wilson wrote: On Thu, Nov 03, 2016 at 11:03:47AM +, Tvrtko Ursulin wrote: On 02/11/2016 17:50, Chris Wilson wrote: The scheduler needs to know the dependencies of each request for the lifetime of the request, as it may choose to reschedule the requests at any time and must ensure the dependency tree is not broken. This is in additional to using the fence to only allow execution after all dependencies have been completed. One option was to extend the fence to support the bidirectional dependency tracking required by the scheduler. However the mismatch in lifetimes between the submit fence and the request essentially meant that we had to build a completely separate struct (and we could not simply reuse the existing waitqueue in the fence for one half of the dependency tracking). The extra dependency tracking simply did not mesh well with the fence, and keeping it separate both keeps the fence implementation simpler and allows us to extend the dependency tracking into a priority tree (whilst maintaining support for reordering the tree). To avoid the additional allocations and list manipulations, the use of the priotree is disabled when there are no schedulers to use it. Signed-off-by: Chris Wilson--- drivers/gpu/drm/i915/i915_gem_request.c | 72 - drivers/gpu/drm/i915/i915_gem_request.h | 23 +++ 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 9c8605c834f9..13090f226203 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -113,6 +113,59 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) spin_unlock(_priv->mm.lock); } +static int +i915_priotree_add_dependency(struct i915_priotree *pt, +struct i915_priotree *signal, +struct i915_dependency *dep) +{ + unsigned long flags = 0; + + if (!dep) { + dep = kmalloc(sizeof(*dep), GFP_KERNEL); I will mention a dedicated cache again since this could possibly be our hottest allocation path. With a dedicated slab I've seen it grow to 5-7k objects in some benchmarks, with the request slab around 1k at the same time. I'm open to one. We allocate more of these than we do even for fences. I was thinking it could be added later, but if we can the api to always pass in the i915_dependency it will probably work better. + if (!dep) + return -ENOMEM; + + flags |= I915_DEPENDENCY_ALLOC; + } Not sure if it would be any nicer to just set the flags after allocating to I915_DEPENDENCY_ALLOC and add an else path to set it to zero here. I just tend to avoid if {} else {} if I can help, just a personal preference. +struct i915_dependency { + struct i915_priotree *signal; + struct list_head pre_link, post_link; + unsigned long flags; +#define I915_DEPENDENCY_ALLOC BIT(0) +}; + +struct i915_priotree { + struct list_head pre_list; /* who is before us, we depend upon */ + struct list_head post_list; /* who is after us, they depend upon us */ +}; I need a picture to imagine this data structure. :( The names suck. When you wrote this I assumed you would respin shortly with some better names? I tried to grasp it one more time since then but keep getting lost. :I Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 4/5] drm/i915: Pass dev_priv to INTEL_INFO everywhere apart from the gen use
From: Tvrtko UrsulinAfter this patch only conversion of INTEL_INFO(p)->gen to INTEL_GEN(dev_priv) remains before the __I915__ macro can be removed. Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/intel_color.c | 31 ++- drivers/gpu/drm/i915/intel_display.c | 28 +--- drivers/gpu/drm/i915/intel_fbdev.c | 10 +- drivers/gpu/drm/i915/intel_pm.c | 7 --- 6 files changed, 39 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 79cea49183b3..35940192e569 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -150,7 +150,7 @@ static void intel_detect_pch(struct drm_device *dev) /* In all current cases, num_pipes is equivalent to the PCH_NOP setting * (which really amounts to a PCH but no South Display). */ - if (INTEL_INFO(dev)->num_pipes == 0) { + if (INTEL_INFO(dev_priv)->num_pipes == 0) { dev_priv->pch_type = PCH_NOP; return; } @@ -607,7 +607,7 @@ static int i915_load_modeset_init(struct drm_device *dev) intel_modeset_gem_init(dev); - if (INTEL_INFO(dev)->num_pipes == 0) + if (INTEL_INFO(dev_priv)->num_pipes == 0) return 0; ret = intel_fbdev_init(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cad6de65947d..b98f11735c5b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -714,7 +714,7 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, */ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) { - ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; + ppgtt->pd_dirty_rings = INTEL_INFO(to_i915(ppgtt->base.dev))->ring_mask; } /* Removes entries from a single page table, releasing it if it's empty. diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 445108855275..3784940a4e7a 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -345,11 +345,10 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) static void broadwell_load_luts(struct drm_crtc_state *state) { struct drm_crtc *crtc = state->crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc_state *intel_state = to_intel_crtc_state(state); enum pipe pipe = to_intel_crtc(crtc)->pipe; - uint32_t i, lut_size = INTEL_INFO(dev)->color.degamma_lut_size; + uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; if (crtc_state_is_legacy(state)) { haswell_load_luts(state); @@ -428,8 +427,7 @@ static void broadwell_load_luts(struct drm_crtc_state *state) static void cherryview_load_luts(struct drm_crtc_state *state) { struct drm_crtc *crtc = state->crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum pipe pipe = to_intel_crtc(crtc)->pipe; struct drm_color_lut *lut; uint32_t i, lut_size; @@ -446,7 +444,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) if (state->degamma_lut) { lut = (struct drm_color_lut *) state->degamma_lut->data; - lut_size = INTEL_INFO(dev)->color.degamma_lut_size; + lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; for (i = 0; i < lut_size; i++) { /* Write LUT in U0.14 format. */ word0 = @@ -461,7 +459,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) if (state->gamma_lut) { lut = (struct drm_color_lut *) state->gamma_lut->data; - lut_size = INTEL_INFO(dev)->color.gamma_lut_size; + lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; for (i = 0; i < lut_size; i++) { /* Write LUT in U0.10 format. */ word0 = @@ -497,12 +495,12 @@ void intel_color_load_luts(struct drm_crtc_state *crtc_state) int intel_color_check(struct drm_crtc *crtc, struct drm_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); size_t gamma_length, degamma_length; - degamma_length = INTEL_INFO(dev)->color.degamma_lut_size * + degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size * sizeof(struct drm_color_lut); -
[Intel-gfx] [PATCH 2/5] drm/i915: More assorted dev_priv cleanups
From: Tvrtko UrsulinA small selection of macros which can only accept dev_priv from now on and a resulting trickle of fixups. Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 27 --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 6 +++--- drivers/gpu/drm/i915/intel_crt.c | 8 drivers/gpu/drm/i915/intel_display.c | 4 ++-- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_hotplug.c | 2 +- drivers/gpu/drm/i915/intel_psr.c | 2 +- 8 files changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 45a30f730216..6060e41d25e5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2901,28 +2901,25 @@ struct drm_i915_cmd_table { #define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \ !(IS_I915G(dev_priv) || \ IS_I915GM(dev_priv))) -#define SUPPORTS_TV(dev) (INTEL_INFO(dev)->supports_tv) -#define I915_HAS_HOTPLUG(dev) (INTEL_INFO(dev)->has_hotplug) - -#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) -#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr) -#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc) +#define SUPPORTS_TV(dev_priv) ((dev_priv)->info.supports_tv) +#define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug) +#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) +#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) +#define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) - -#define HAS_DP_MST(dev)(INTEL_INFO(dev)->has_dp_mst) - +#define HAS_DP_MST(dev_priv) ((dev_priv)->info.has_dp_mst) #define HAS_DDI(dev_priv) ((dev_priv)->info.has_ddi) -#define HAS_FPGA_DBG_UNCLAIMED(dev)(INTEL_INFO(dev)->has_fpga_dbg) -#define HAS_PSR(dev) (INTEL_INFO(dev)->has_psr) -#define HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6) -#define HAS_RC6p(dev) (INTEL_INFO(dev)->has_rc6p) - -#define HAS_CSR(dev) (INTEL_INFO(dev)->has_csr) +#define HAS_PSR(dev_priv) ((dev_priv)->info.has_psr) +#define HAS_RC6(dev_priv) ((dev_priv)->info.has_rc6) +#define HAS_RC6p(dev_priv) ((dev_priv)->info.has_rc6p) +#define HAS_CSR(dev_priv) ((dev_priv)->info.has_csr) #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc) +#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg) + /* * For now, anything with a GuC requires uCode loading, and then supports * command submission once loaded. But these are logically independent diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index d430b9441e6b..35b13f178b61 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -573,7 +573,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, pdev->subsystem_device); err_printf(m, "IOMMU enabled?: %d\n", error->iommu); - if (HAS_CSR(dev)) { + if (HAS_CSR(dev_priv)) { struct intel_csr *csr = _priv->csr; err_printf(m, "DMC loaded: %s\n", diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6d7505b5c5e7..285ee1e4352a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3678,7 +3678,7 @@ static void i915_irq_preinstall(struct drm_device * dev) struct drm_i915_private *dev_priv = to_i915(dev); int pipe; - if (I915_HAS_HOTPLUG(dev)) { + if (I915_HAS_HOTPLUG(dev_priv)) { i915_hotplug_interrupt_update(dev_priv, 0x, 0); I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); } @@ -3712,7 +3712,7 @@ static int i915_irq_postinstall(struct drm_device *dev) I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | I915_USER_INTERRUPT; - if (I915_HAS_HOTPLUG(dev)) { + if (I915_HAS_HOTPLUG(dev_priv)) { i915_hotplug_interrupt_update(dev_priv, 0x, 0); POSTING_READ(PORT_HOTPLUG_EN); @@ -3880,7 +3880,7 @@ static void i915_irq_uninstall(struct drm_device * dev) struct drm_i915_private *dev_priv = to_i915(dev); int pipe; - if (I915_HAS_HOTPLUG(dev)) { + if (I915_HAS_HOTPLUG(dev_priv)) { i915_hotplug_interrupt_update(dev_priv, 0x, 0); I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 30eb95b54dcf..fed61958ffd4 100644 ---
[Intel-gfx] [PATCH 3/5] drm/i915: Further assorted dev_priv cleanups
From: Tvrtko UrsulinA small selection of macros which can only accept dev_priv from now on and a resulting trickle of fixups. Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h| 12 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_irq.c| 2 +- drivers/gpu/drm/i915/intel_guc_loader.c| 10 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6060e41d25e5..f392b0fb9b86 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2700,7 +2700,7 @@ struct drm_i915_cmd_table { #define INTEL_DEVID(dev_priv) ((dev_priv)->info.device_id) #define REVID_FOREVER 0xff -#define INTEL_REVID(p) (__I915__(p)->drm.pdev->revision) +#define INTEL_REVID(dev_priv) ((dev_priv)->drm.pdev->revision) #define GEN_FOREVER (0) /* @@ -2925,13 +2925,13 @@ struct drm_i915_cmd_table { * command submission once loaded. But these are logically independent * properties, so we have separate macros to test them. */ -#define HAS_GUC(dev) (INTEL_INFO(dev)->has_guc) -#define HAS_GUC_UCODE(dev) (HAS_GUC(dev)) -#define HAS_GUC_SCHED(dev) (HAS_GUC(dev)) +#define HAS_GUC(dev_priv) ((dev_priv)->info.has_guc) +#define HAS_GUC_UCODE(dev_priv)(HAS_GUC(dev_priv)) +#define HAS_GUC_SCHED(dev_priv)(HAS_GUC(dev_priv)) -#define HAS_RESOURCE_STREAMER(dev) (INTEL_INFO(dev)->has_resource_streamer) +#define HAS_RESOURCE_STREAMER(dev_priv) ((dev_priv)->info.has_resource_streamer) -#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)->has_pooled_eu) +#define HAS_POOLED_EU(dev_priv)((dev_priv)->info.has_pooled_eu) #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9c7d9c88d879..f98921174161 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1616,7 +1616,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } if (args->flags & I915_EXEC_RESOURCE_STREAMER) { - if (!HAS_RESOURCE_STREAMER(dev)) { + if (!HAS_RESOURCE_STREAMER(dev_priv)) { DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 285ee1e4352a..cb8a75f6ca16 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4145,7 +4145,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) INIT_WORK(_priv->rps.work, gen6_pm_rps_work); INIT_WORK(_priv->l3_parity.error_work, ivybridge_parity_work); - if (HAS_GUC_SCHED(dev)) + if (HAS_GUC_SCHED(dev_priv)) dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT; /* Let's track the enabled rps events */ diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 1aa85236b788..34d6ad2cf7c1 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -566,7 +566,7 @@ int intel_guc_setup(struct drm_device *dev) ret = 0; } - if (err == 0 && !HAS_GUC_UCODE(dev)) + if (err == 0 && !HAS_GUC_UCODE(dev_priv)) ; /* Don't mention the GuC! */ else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); @@ -725,18 +725,18 @@ void intel_guc_init(struct drm_device *dev) struct intel_guc_fw *guc_fw = _priv->guc.guc_fw; const char *fw_path; - if (!HAS_GUC(dev)) { + if (!HAS_GUC(dev_priv)) { i915.enable_guc_loading = 0; i915.enable_guc_submission = 0; } else { /* A negative value means "use platform default" */ if (i915.enable_guc_loading < 0) - i915.enable_guc_loading = HAS_GUC_UCODE(dev); + i915.enable_guc_loading = HAS_GUC_UCODE(dev_priv); if (i915.enable_guc_submission < 0) - i915.enable_guc_submission = HAS_GUC_SCHED(dev); + i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv); } - if (!HAS_GUC_UCODE(dev)) { + if (!HAS_GUC_UCODE(dev_priv)) { fw_path = NULL; } else if (IS_SKYLAKE(dev_priv)) { fw_path = I915_SKL_GUC_UCODE; -- 2.7.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/5] drm/i915: Assorted dev_priv cleanups
From: Tvrtko UrsulinA small selection of macros which can only accept dev_priv from now on and a resulting trickle of fixups. Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h| 31 -- drivers/gpu/drm/i915/i915_gem.c| 13 +++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 3 ++- drivers/gpu/drm/i915/i915_gem_userptr.c| 3 ++- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/intel_dp.c| 6 +++--- 7 files changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4735b4177100..45a30f730216 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2851,28 +2851,31 @@ struct drm_i915_cmd_table { #define ALL_ENGINES(~0) #define HAS_ENGINE(dev_priv, id) \ - (!!(INTEL_INFO(dev_priv)->ring_mask & ENGINE_MASK(id))) + (!!((dev_priv)->info.ring_mask & ENGINE_MASK(id))) #define HAS_BSD(dev_priv) HAS_ENGINE(dev_priv, VCS) #define HAS_BSD2(dev_priv) HAS_ENGINE(dev_priv, VCS2) #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) #define HAS_VEBOX(dev_priv)HAS_ENGINE(dev_priv, VECS) -#define HAS_LLC(dev) (INTEL_INFO(dev)->has_llc) -#define HAS_SNOOP(dev) (INTEL_INFO(dev)->has_snoop) -#define HAS_EDRAM(dev) (!!(__I915__(dev)->edram_cap & EDRAM_ENABLED)) +#define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) +#define HAS_SNOOP(dev_priv)((dev_priv)->info.has_snoop) +#define HAS_EDRAM(dev_priv)(!!((dev_priv)->edram_cap & EDRAM_ENABLED)) #define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) -#define HWS_NEEDS_PHYSICAL(dev)(INTEL_INFO(dev)->hws_needs_physical) -#define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->has_hw_contexts) -#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->has_logical_ring_contexts) -#define USES_PPGTT(dev)(i915.enable_ppgtt) -#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt >= 2) -#define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3) +#define HWS_NEEDS_PHYSICAL(dev_priv) ((dev_priv)->info.hws_needs_physical) -#define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) -#define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) +#define HAS_HW_CONTEXTS(dev_priv) ((dev_priv)->info.has_hw_contexts) +#define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ + ((dev_priv)->info.has_logical_ring_contexts) +#define USES_PPGTT(dev_priv) (i915.enable_ppgtt) +#define USES_FULL_PPGTT(dev_priv) (i915.enable_ppgtt >= 2) +#define USES_FULL_48BIT_PPGTT(dev_priv)(i915.enable_ppgtt == 3) + +#define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay) +#define OVERLAY_NEEDS_PHYSICAL(dev_priv) \ + ((dev_priv)->info.overlay_needs_physical) /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv)(IS_I830(dev_priv) || IS_845G(dev_priv)) @@ -2889,8 +2892,8 @@ struct drm_i915_cmd_table { * legacy irq no. is shared with another device. The kernel then disables that * interrupt source and so prevents the other device from working properly. */ -#define HAS_AUX_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) -#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->has_gmbus_irq) +#define HAS_AUX_IRQ(dev_priv) ((dev_priv)->info.gen >= 5) +#define HAS_GMBUS_IRQ(dev_priv) ((dev_priv)->info.has_gmbus_irq) /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f995ced524e..e9808c8ef55b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -48,7 +48,7 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) { - return HAS_LLC(dev) || level != I915_CACHE_NONE; + return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE; } static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) @@ -1757,7 +1757,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) goto err_rpm; /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { ret = -EFAULT; goto err_unlock; } @@ -3414,7 +3414,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout
On Fri, 04 Nov 2016, Chris Wilsonwrote: > On Fri, Nov 04, 2016 at 12:59:08PM +, Tvrtko Ursulin wrote: >> >> On 04/11/2016 11:08, Chris Wilson wrote: >> >Valleyview and Cherryview are definitely limited to only scanning out >> >from the first 256MiB and 512MiB of the Global GTT respectively. Lets >> >presume that this behaviour was inherited from the display block copied >> >from g4x (not Ironlake) and all earlier generations are similarly >> >affected. For simplicity, impose that these platforms must scanout from >> >the mappable region. >> > >> >Reported-by: Luis Botello >> >Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036 >> >Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for >> >scanout") >> >Signed-off-by: Chris Wilson >> >Cc: Akash Goel >> >Cc: Joonas Lahtinen >> >Cc: # v4.9-rc1+ >> >--- >> >This leaves Ironlake -> Haswell with a bit of uncertainity. It is also >> >not clear if the scanout accessible region is similarly limited on all >> >gen8+, and so whether we need to similarly curtain the upper range for >> >their scanouts. >> >--- >> > drivers/gpu/drm/i915/i915_gem.c | 18 -- >> > 1 file changed, 16 insertions(+), 2 deletions(-) >> > >> >diff --git a/drivers/gpu/drm/i915/i915_gem.c >> >b/drivers/gpu/drm/i915/i915_gem.c >> >index 269e2487c104..408875fbec66 100644 >> >--- a/drivers/gpu/drm/i915/i915_gem.c >> >+++ b/drivers/gpu/drm/i915/i915_gem.c >> >@@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct >> >drm_i915_gem_object *obj, >> >if (view->type == I915_GGTT_VIEW_NORMAL) >> >vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, >> > PIN_MAPPABLE | PIN_NONBLOCK); >> >- if (IS_ERR(vma)) >> >- vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); >> >+ if (IS_ERR(vma)) { >> >+ struct drm_i915_private *i915 = to_i915(obj->base.dev); >> >> dev_priv ? >> >> What do we do with i915_params being a global i915? > > Sssh, I'm gradually waging war against dev_priv. > Eventually Jani won't be able to complain about i915 being the minority. > > The global modparams is an easy rename. I just liked that i915.foo was the same on both the kernel command line and in code. I kinda still do, but like Chris I'm not too fond of dev_priv either, and i915 seems like a good replacement. Seeing how module parameters multiply like rabbits, with all sorts of sanitization, how the parameters are changed in kernel, and /sys/module/i915/parameters/ not reflecting what the user did, maybe you could come up with something nice for that while at it... BR, Jani. -- Jani Nikula, Intel Open Source Technology Center ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915: Make GPU pages movable
On 11/4/2016 7:07 PM, Chris Wilson wrote: Best if we send these as a new series to unconfuse CI. Okay will send as a new series. On Fri, Nov 04, 2016 at 06:18:26PM +0530, akash.g...@intel.com wrote: +static int do_migrate_page(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + int ret = 0; + + if (!can_migrate_page(obj)) + return -EBUSY; + + /* HW access would be required for a GGTT bound object, for which +* device has to be kept awake. But a deadlock scenario can arise if +* the attempt is made to resume the device, when either a suspend +* or a resume operation is already happening concurrently from some +* other path and that only also triggers compaction. So only unbind +* if the device is currently awake. +*/ + if (!intel_runtime_pm_get_if_in_use(dev_priv)) + return -EBUSY; + + i915_gem_object_get(obj); + if (!unsafe_drop_pages(obj)) + ret = -EBUSY; + i915_gem_object_put(obj); Since the object release changes, we can now do this without the i915_gem_object_get / i915_gem_object_put (as we are guarded by the BKL struct_mutex). Fine will remove object_get/put as with struct_mutex protection object can't disappear across unsafe_drop_pages(). Best regards Akash -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915: Make GPU pages movable
Best if we send these as a new series to unconfuse CI. On Fri, Nov 04, 2016 at 06:18:26PM +0530, akash.g...@intel.com wrote: > +static int do_migrate_page(struct drm_i915_gem_object *obj) > +{ > + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); > + int ret = 0; > + > + if (!can_migrate_page(obj)) > + return -EBUSY; > + > + /* HW access would be required for a GGTT bound object, for which > + * device has to be kept awake. But a deadlock scenario can arise if > + * the attempt is made to resume the device, when either a suspend > + * or a resume operation is already happening concurrently from some > + * other path and that only also triggers compaction. So only unbind > + * if the device is currently awake. > + */ > + if (!intel_runtime_pm_get_if_in_use(dev_priv)) > + return -EBUSY; > + > + i915_gem_object_get(obj); > + if (!unsafe_drop_pages(obj)) > + ret = -EBUSY; > + i915_gem_object_put(obj); Since the object release changes, we can now do this without the i915_gem_object_get / i915_gem_object_put (as we are guarded by the BKL struct_mutex). -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [1/2] shmem: Support for registration of driver/file owner specific ops (rev4)
== Series Details == Series: series starting with [1/2] shmem: Support for registration of driver/file owner specific ops (rev4) URL : https://patchwork.freedesktop.org/series/4780/ State : failure == Summary == drivers/gpu/drm/i915/i915_drv.h: At top level: drivers/gpu/drm/i915/i915_drv.h:58:1: error: expected identifier or ‘(’ before ‘==’ token === ^ In file included from drivers/gpu/drm/i915/intel_guc.h:27:0, from drivers/gpu/drm/i915/i915_drv.h:60, from drivers/gpu/drm/i915/intel_csr.c:25: drivers/gpu/drm/i915/intel_guc_fwif.h:222:1: warning: empty declaration } __packed; ^ In file included from drivers/gpu/drm/i915/intel_csr.c:25:0: drivers/gpu/drm/i915/i915_drv.h:61:1: error: expected identifier or ‘(’ before ‘>>’ token >>> drm/i915: Make pages of GFX allocations movable ^ LD net/key/built-in.o scripts/Makefile.build:290: recipe for target 'drivers/gpu/drm/i915/i915_sysfs.o' failed make[4]: *** [drivers/gpu/drm/i915/i915_sysfs.o] Error 1 LD drivers/acpi/acpica/acpi.o scripts/Makefile.build:290: recipe for target 'drivers/gpu/drm/i915/i915_suspend.o' failed make[4]: *** [drivers/gpu/drm/i915/i915_suspend.o] Error 1 LD drivers/thermal/thermal_sys.o LD drivers/thermal/built-in.o LD drivers/iommu/built-in.o LD net/netlink/built-in.o LD drivers/video/console/built-in.o LD drivers/pci/pcie/pcieportdrv.o scripts/Makefile.build:290: recipe for target 'drivers/gpu/drm/i915/intel_csr.o' failed make[4]: *** [drivers/gpu/drm/i915/intel_csr.o] Error 1 scripts/Makefile.build:475: recipe for target 'drivers/gpu/drm/i915' failed make[3]: *** [drivers/gpu/drm/i915] Error 2 scripts/Makefile.build:475: recipe for target 'drivers/gpu/drm' failed make[2]: *** [drivers/gpu/drm] Error 2 scripts/Makefile.build:475: recipe for target 'drivers/gpu' failed make[1]: *** [drivers/gpu] Error 2 make[1]: *** Waiting for unfinished jobs LD drivers/spi/built-in.o LD kernel/sched/built-in.o LD drivers/video/built-in.o LD drivers/acpi/acpica/built-in.o LD drivers/tty/serial/8250/8250.o LD kernel/built-in.o LD drivers/acpi/built-in.o LD [M] drivers/net/ethernet/intel/igbvf/igbvf.o LD lib/raid6/raid6_pq.o LD lib/raid6/built-in.o LD [M] drivers/mmc/core/mmc_core.o LD drivers/mmc/built-in.o LD drivers/pci/pcie/aer/aerdriver.o LD drivers/usb/gadget/libcomposite.o LD net/unix/unix.o LD drivers/pci/pcie/aer/built-in.o LD net/unix/built-in.o LD drivers/pci/pcie/built-in.o LD [M] drivers/net/ethernet/intel/e1000/e1000.o LD [M] sound/pci/hda/snd-hda-codec-generic.o LD sound/pci/built-in.o LD net/packet/built-in.o LD sound/built-in.o LD drivers/usb/core/usbcore.o LD drivers/scsi/sd_mod.o LD drivers/scsi/built-in.o LD drivers/tty/serial/8250/8250_base.o LD drivers/tty/serial/8250/built-in.o LD drivers/usb/core/built-in.o LD drivers/pci/built-in.o LD drivers/tty/serial/built-in.o LD drivers/usb/gadget/udc/udc-core.o LD drivers/usb/gadget/udc/built-in.o LD drivers/usb/gadget/built-in.o LD net/xfrm/built-in.o CC arch/x86/kernel/cpu/capflags.o LD arch/x86/kernel/cpu/built-in.o LD arch/x86/kernel/built-in.o LD arch/x86/built-in.o AR lib/lib.a EXPORTS lib/lib-ksyms.o LD drivers/md/md-mod.o LD drivers/tty/vt/built-in.o LD [M] drivers/net/ethernet/intel/igb/igb.o LD lib/built-in.o LD drivers/md/built-in.o LD drivers/tty/built-in.o LD drivers/usb/host/xhci-hcd.o LD fs/btrfs/btrfs.o LD fs/ext4/ext4.o LD net/ipv6/ipv6.o LD fs/ext4/built-in.o LD fs/btrfs/built-in.o LD net/ipv6/built-in.o LD fs/built-in.o LD drivers/usb/host/built-in.o LD drivers/usb/built-in.o LD net/core/built-in.o LD net/ipv4/built-in.o LD net/built-in.o LD [M] drivers/net/ethernet/intel/e1000e/e1000e.o LD drivers/net/ethernet/built-in.o LD drivers/net/built-in.o Makefile:983: recipe for target 'drivers' failed make: *** [drivers] Error 2 Full logs at /archive/deploy/logs/Patchwork_2905 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v8 02/12] drm/i915: Add i915 perf infrastructure
On Fri, Nov 4, 2016 at 8:59 AM, sourab guptawrote: > On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote: > > Adds base i915 perf infrastructure for Gen performance metrics. > > > > This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 > > properties to configure a stream of metrics and returns a new fd usable > > with standard VFS system calls including read() to read typed and sized > > records; ioctl() to enable or disable capture and poll() to wait for > > data. > > > > A stream is opened something like: > > > > uint64_t properties[] = { > > /* Single context sampling */ > > DRM_I915_PERF_PROP_CTX_HANDLE,ctx_handle, > > > > /* Include OA reports in samples */ > > DRM_I915_PERF_PROP_SAMPLE_OA, true, > > > > /* OA unit configuration */ > > DRM_I915_PERF_PROP_OA_METRICS_SET,metrics_set_id, > > DRM_I915_PERF_PROP_OA_FORMAT, report_format, > > DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, > >}; > >struct drm_i915_perf_open_param parm = { > > .flags = I915_PERF_FLAG_FD_CLOEXEC | > >I915_PERF_FLAG_FD_NONBLOCK | > >I915_PERF_FLAG_DISABLED, > > .properties_ptr = (uint64_t)properties, > > .num_properties = sizeof(properties) / 16, > >}; > >int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ); > > > > Records read all start with a common { type, size } header with > > DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records > > contain an extensible number of fields and it's the > > DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that > > determine what's included in every sample. > > > > No specific streams are supported yet so any attempt to open a stream > > will return an error. > > > > v2: > > use i915_gem_context_get() - Chris Wilson > > v3: > > update read() interface to avoid passing state struct - Chris Wilson > > fix some rebase fallout, with i915-perf init/deinit > > v4: > > s/DRM_IORW/DRM_IOW/ - Emil Velikov > > > > Signed-off-by: Robert Bragg > > --- > > drivers/gpu/drm/i915/Makefile| 3 + > > drivers/gpu/drm/i915/i915_drv.c | 4 + > > drivers/gpu/drm/i915/i915_drv.h | 91 > > drivers/gpu/drm/i915/i915_perf.c | 443 ++ > + > > include/uapi/drm/i915_drm.h | 67 ++ > > 5 files changed, 608 insertions(+) > > create mode 100644 drivers/gpu/drm/i915/i915_perf.c > > > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/ > Makefile > > index 6123400..8d4e25f 100644 > > --- a/drivers/gpu/drm/i915/Makefile > > +++ b/drivers/gpu/drm/i915/Makefile > > @@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += > i915_gpu_error.o > > # virtual gpu code > > i915-y += i915_vgpu.o > > > > +# perf code > > +i915-y += i915_perf.o > > + > > ifeq ($(CONFIG_DRM_I915_GVT),y) > > i915-y += intel_gvt.o > > include $(src)/gvt/Makefile > > diff --git a/drivers/gpu/drm/i915/i915_drv.c > b/drivers/gpu/drm/i915/i915_drv.c > > index af3559d..685c96e 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.c > > +++ b/drivers/gpu/drm/i915/i915_drv.c > > @@ -836,6 +836,8 @@ static int i915_driver_init_early(struct > drm_i915_private *dev_priv, > > > > intel_detect_preproduction_hw(dev_priv); > > > > + i915_perf_init(dev_priv); > > + > > return 0; > > > > err_workqueues: > > @@ -849,6 +851,7 @@ static int i915_driver_init_early(struct > drm_i915_private *dev_priv, > > */ > > static void i915_driver_cleanup_early(struct drm_i915_private > *dev_priv) > > { > > + i915_perf_fini(dev_priv); > > i915_gem_load_cleanup(_priv->drm); > > i915_workqueues_cleanup(dev_priv); > > } > > @@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = > { > > DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, > DRM_RENDER_ALLOW), > > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, > i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW), > > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, > i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW), > > + DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, > DRM_RENDER_ALLOW), > > }; > > > > static struct drm_driver driver = { > > diff --git a/drivers/gpu/drm/i915/i915_drv.h > b/drivers/gpu/drm/i915/i915_drv.h > > index 5a260db..7a65c0b 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -1767,6 +1767,84 @@ struct intel_wm_config { > > bool sprites_scaled; > > }; > > > > +struct i915_perf_stream; > > + > > +struct i915_perf_stream_ops { > > + /* Enables the collection of HW samples, either in response to > > + * I915_PERF_IOCTL_ENABLE or implicitly called when stream is > > + * opened without I915_PERF_FLAG_DISABLED. > > + */ > > + void (*enable)(struct i915_perf_stream *stream); > > + > > + /* Disables the collection
Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout
On Fri, Nov 04, 2016 at 12:59:08PM +, Tvrtko Ursulin wrote: > > On 04/11/2016 11:08, Chris Wilson wrote: > >Valleyview and Cherryview are definitely limited to only scanning out > >from the first 256MiB and 512MiB of the Global GTT respectively. Lets > >presume that this behaviour was inherited from the display block copied > >from g4x (not Ironlake) and all earlier generations are similarly > >affected. For simplicity, impose that these platforms must scanout from > >the mappable region. > > > >Reported-by: Luis Botello> >Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036 > >Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for > >scanout") > >Signed-off-by: Chris Wilson > >Cc: Akash Goel > >Cc: Joonas Lahtinen > >Cc: # v4.9-rc1+ > >--- > >This leaves Ironlake -> Haswell with a bit of uncertainity. It is also > >not clear if the scanout accessible region is similarly limited on all > >gen8+, and so whether we need to similarly curtain the upper range for > >their scanouts. > >--- > > drivers/gpu/drm/i915/i915_gem.c | 18 -- > > 1 file changed, 16 insertions(+), 2 deletions(-) > > > >diff --git a/drivers/gpu/drm/i915/i915_gem.c > >b/drivers/gpu/drm/i915/i915_gem.c > >index 269e2487c104..408875fbec66 100644 > >--- a/drivers/gpu/drm/i915/i915_gem.c > >+++ b/drivers/gpu/drm/i915/i915_gem.c > >@@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct > >drm_i915_gem_object *obj, > > if (view->type == I915_GGTT_VIEW_NORMAL) > > vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, > >PIN_MAPPABLE | PIN_NONBLOCK); > >-if (IS_ERR(vma)) > >-vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); > >+if (IS_ERR(vma)) { > >+struct drm_i915_private *i915 = to_i915(obj->base.dev); > > dev_priv ? > > What do we do with i915_params being a global i915? Sssh, I'm gradually waging war against dev_priv. Eventually Jani won't be able to complain about i915 being the minority. The global modparams is an easy rename. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout
On 04/11/2016 11:08, Chris Wilson wrote: Valleyview and Cherryview are definitely limited to only scanning out from the first 256MiB and 512MiB of the Global GTT respectively. Lets presume that this behaviour was inherited from the display block copied from g4x (not Ironlake) and all earlier generations are similarly affected. For simplicity, impose that these platforms must scanout from the mappable region. Reported-by: Luis BotelloBugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036 Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for scanout") Signed-off-by: Chris Wilson Cc: Akash Goel Cc: Joonas Lahtinen Cc: # v4.9-rc1+ --- This leaves Ironlake -> Haswell with a bit of uncertainity. It is also not clear if the scanout accessible region is similarly limited on all gen8+, and so whether we need to similarly curtain the upper range for their scanouts. --- drivers/gpu/drm/i915/i915_gem.c | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 269e2487c104..408875fbec66 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (view->type == I915_GGTT_VIEW_NORMAL) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, PIN_MAPPABLE | PIN_NONBLOCK); - if (IS_ERR(vma)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); + if (IS_ERR(vma)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); dev_priv ? What do we do with i915_params being a global i915? Regards, Tvrtko + unsigned int flags; + + /* Valleyview and Cherryview are definitely limited to scanning +* out the first 256MiB and 512MiB respectively. Lets presume +* this behaviour was inherited from their g4x display engine +* and that all earlier gen are similarly limited. +*/ + flags = 0; + if (INTEL_GEN(i915) < 5 || + IS_VALLEYVIEW(i915) || + IS_CHERRYVIEW(i915)) + flags = PIN_MAPPABLE; + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); + } if (IS_ERR(vma)) goto err_unpin_display; ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: Make GPU pages movable
From: Chris WilsonOn a long run of more than 2-3 days, physical memory tends to get fragmented severely, which considerably slows down the system. In such a scenario, the shrinker is also unable to help as lack of memory is not the actual problem, since it has been observed that there are enough free pages of 0 order. This also manifests itself when an indiviual zone in the mm runs out of pages and if we cannot migrate pages between zones, the kernel hits an out-of-memory even though there are free pages (and often all of swap) available. To address the issue of external fragementation, kernel does a compaction (which involves migration of pages) but it's efficacy depends upon how many pages are marked as MOVABLE, as only those pages can be migrated. Currently the backing pages for GPU buffers are allocated from shmemfs with GFP_RECLAIMABLE flag, in units of 4KB pages. In the case of limited swap space, it may not be possible always to reclaim or swap-out pages of all the inactive objects, to make way for free space allowing formation of higher order groups of physically-contiguous pages on compaction. Just marking the GPU pages as MOVABLE will not suffice, as i915.ko has to pin the pages if they are in use by GPU, which will prevent their migration. So the migratepage callback in shmem is also hooked up to get a notification when kernel initiates the page migration. On the notification, i915.ko appropriately unpin the pages. With this we can effectively mark the GPU pages as MOVABLE and hence mitigate the fragmentation problem. v2: - Rename the migration routine to gem_shrink_migratepage, move it to the shrinker file, and use the existing constructs (Chris) - To cleanup, add a new helper function to encapsulate all page migration skip conditions (Chris) - Add a new local helper function in shrinker file, for dropping the backing pages, and call the same from gem_shrink() also (Chris) v3: - Fix/invert the check on the return value of unsafe_drop_pages (Chris) v4: - Minor tidy v5: - Fix unsafe usage of unsafe_drop_pages() - Rebase onto vmap-notifier Testcase: igt/gem_shrink Bugzilla: (e.g.) https://bugs.freedesktop.org/show_bug.cgi?id=90254 Cc: Hugh Dickins Cc: linux...@kvack.org Signed-off-by: Sourab Gupta Signed-off-by: Akash Goel Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 9 ++- drivers/gpu/drm/i915/i915_gem_shrinker.c | 134 +++ 3 files changed, 144 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4735b417..7f2717b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1357,6 +1357,8 @@ struct intel_l3_parity { }; struct i915_gem_mm { + struct shmem_dev_info shmem_info; + /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; /** Protects the usage of the GTT stolen memory allocator. This is diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f995ce..f0d4ce7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2164,6 +2164,7 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) if (obj->mm.madv == I915_MADV_WILLNEED) mark_page_accessed(page); + set_page_private(page, 0); put_page(page); } obj->mm.dirty = false; @@ -2310,6 +2311,7 @@ static unsigned int swiotlb_max_size(void) sg->length += PAGE_SIZE; } last_pfn = page_to_pfn(page); + set_page_private(page, (unsigned long)obj); /* Check that the i965g/gm workaround works. */ WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x0010UL)); @@ -2334,8 +2336,10 @@ static unsigned int swiotlb_max_size(void) err_pages: sg_mark_end(sg); - for_each_sgt_page(page, sgt_iter, st) + for_each_sgt_page(page, sgt_iter, st) { + set_page_private(page, 0); put_page(page); + } sg_free_table(st); kfree(st); @@ -4185,6 +4189,8 @@ struct drm_i915_gem_object * goto fail; mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; + if (IS_ENABLED(MIGRATION)) + mask |= __GFP_MOVABLE; if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) { /* 965gm cannot relocate objects above 4GiB. */ mask &= ~__GFP_HIGHMEM; @@ -4193,6 +4199,7 @@ struct drm_i915_gem_object * mapping = obj->base.filp->f_mapping; mapping_set_gfp_mask(mapping, mask); +
[Intel-gfx] [PATCH 1/2] shmem: Support for registration of driver/file owner specific ops
From: Chris WilsonThis provides support for the drivers or shmem file owners to register a set of callbacks, which can be invoked from the address space operations methods implemented by shmem. This allow the file owners to hook into the shmem address space operations to do some extra/custom operations in addition to the default ones. The private_data field of address_space struct is used to store the pointer to driver specific ops. Currently only one ops field is defined, which is migratepage, but can be extended on an as-needed basis. The need for driver specific operations arises since some of the operations (like migratepage) may not be handled completely within shmem, so as to be effective, and would need some driver specific handling also. Specifically, i915.ko would like to participate in migratepage(). i915.ko uses shmemfs to provide swappable backing storage for its user objects, but when those objects are in use by the GPU it must pin the entire object until the GPU is idle. As a result, large chunks of memory can be arbitrarily withdrawn from page migration, resulting in premature out-of-memory due to fragmentation. However, if i915.ko can receive the migratepage() request, it can then flush the object from the GPU, remove its pin and thus enable the migration. Since gfx allocations are one of the major consumer of system memory, its imperative to have such a mechanism to effectively deal with fragmentation. And therefore the need for such a provision for initiating driver specific actions during address space operations. v2: - Drop dev_ prefix from the members of shmem_dev_info structure. (Joonas) - Change the return type of shmem_set_device_op() to void and remove the check for pre-existing data. (Joonas) - Rename shmem_set_device_op() to shmem_set_dev_info() to be consistent with shmem_dev_info structure. (Joonas) Cc: Hugh Dickins Cc: linux...@kvack.org Cc: linux-ker...@vger.linux.org Signed-off-by: Sourab Gupta Signed-off-by: Akash Goel Reviewed-by: Chris Wilson --- include/linux/shmem_fs.h | 13 + mm/shmem.c | 17 - 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index ff078e7..22796a0 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -39,11 +39,24 @@ struct shmem_sb_info { unsigned long shrinklist_len; /* Length of shrinklist */ }; +struct shmem_dev_info { + void *private_data; + int (*migratepage)(struct address_space *mapping, + struct page *newpage, struct page *page, + enum migrate_mode mode, void *dev_priv_data); +}; + static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) { return container_of(inode, struct shmem_inode_info, vfs_inode); } +static inline void shmem_set_dev_info(struct address_space *mapping, +struct shmem_dev_info *info) +{ + mapping->private_data = info; +} + /* * Functions in mm/shmem.c called directly from elsewhere: */ diff --git a/mm/shmem.c b/mm/shmem.c index ad7813d..bf71ddd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1290,6 +1290,21 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) return 0; } +#ifdef CONFIG_MIGRATION +static int shmem_migratepage(struct address_space *mapping, +struct page *newpage, struct page *page, +enum migrate_mode mode) +{ + struct shmem_dev_info *dev_info = mapping->private_data; + + if (dev_info && dev_info->migratepage) + return dev_info->migratepage(mapping, newpage, page, + mode, dev_info->private_data); + + return migrate_page(mapping, newpage, page, mode); +} +#endif + #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS) static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) { @@ -3654,7 +3669,7 @@ static void shmem_destroy_inodecache(void) .write_end = shmem_write_end, #endif #ifdef CONFIG_MIGRATION - .migratepage= migrate_page, + .migratepage= shmem_migratepage, #endif .error_remove_page = generic_error_remove_page, }; -- 1.9.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2] drm/i915: Fix pages pin counting around swizzle quirk
On Fri, Nov 04, 2016 at 01:43:34PM +0200, Joonas Lahtinen wrote: > On pe, 2016-11-04 at 10:30 +, Chris Wilson wrote: > > @@ -3711,6 +3711,13 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) > > { > > int ret = 0; > > > > + /* The vma->pages are only valid within the lifespan of the borrowed > > + * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so > > + * must be the vma->pages. A simple rule is that vma->pages must only > > + * be accessed when the obj->mm.pages are pinned. > > + */ > > + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); > > + > > if (vma->pages) > > return 0; > > My confusion was vma == obj for the moment, but I think the comment is > still good. The barriers are much more sensible now, too. > > Reviewed-by: Joonas Lahtinen* fingers crossed that's the last we see of this quirk. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Limit Valleyview and earlier to only using mappable scanout
== Series Details == Series: drm/i915: Limit Valleyview and earlier to only using mappable scanout URL : https://patchwork.freedesktop.org/series/14835/ State : success == Summary == Series 14835v1 drm/i915: Limit Valleyview and earlier to only using mappable scanout https://patchwork.freedesktop.org/api/1.0/series/14835/revisions/1/mbox/ Test kms_force_connector_basic: Subgroup force-load-detect: dmesg-warn -> PASS (fi-snb-2520m) fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 fi-bxt-t5700 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-hsw-4770r total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-ilk-650 total:241 pass:188 dwarn:0 dfail:0 fail:0 skip:53 fi-ivb-3520m total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-ivb-3770 total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 fi-skl-6770hqtotal:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-snb-2520m total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-snb-2600 total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 21f242e536b5077c046df785a8c4c28374941c15 drm-intel-nightly: 2016y-11m-03d-21h-01m-03s UTC integration manifest b9cc4a5 drm/i915: Limit Valleyview and earlier to only using mappable scanout == Logs == For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2903/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2] drm/i915: Fix pages pin counting around swizzle quirk
On pe, 2016-11-04 at 10:30 +, Chris Wilson wrote: > @@ -3711,6 +3711,13 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) > { > int ret = 0; > > + /* The vma->pages are only valid within the lifespan of the borrowed > + * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so > + * must be the vma->pages. A simple rule is that vma->pages must only > + * be accessed when the obj->mm.pages are pinned. > + */ > + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); > + > if (vma->pages) > return 0; My confusion was vma == obj for the moment, but I think the comment is still good. The barriers are much more sensible now, too. Reviewed-by: Joonas LahtinenRegards, Joonas -- Joonas Lahtinen Open Source Technology Center Intel Corporation ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout
On Fri, Nov 04, 2016 at 01:29:04PM +0200, Jani Nikula wrote: > On Fri, 04 Nov 2016, Chris Wilsonwrote: > > Valleyview and Cherryview are definitely limited to only scanning out > > from the first 256MiB and 512MiB of the Global GTT respectively. Lets > > presume that this behaviour was inherited from the display block copied > > from g4x (not Ironlake) and all earlier generations are similarly > > affected. For simplicity, impose that these platforms must scanout from > > the mappable region. > > > > Reported-by: Luis Botello > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036 > > Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for > > scanout") > > Signed-off-by: Chris Wilson > > Cc: Akash Goel > > Cc: Joonas Lahtinen > > Cc: # v4.9-rc1+ > > --- > > This leaves Ironlake -> Haswell with a bit of uncertainity. It is also > > not clear if the scanout accessible region is similarly limited on all > > gen8+, and so whether we need to similarly curtain the upper range for > > their scanouts. > > --- > > drivers/gpu/drm/i915/i915_gem.c | 18 -- > > 1 file changed, 16 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c > > b/drivers/gpu/drm/i915/i915_gem.c > > index 269e2487c104..408875fbec66 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct > > drm_i915_gem_object *obj, > > if (view->type == I915_GGTT_VIEW_NORMAL) > > vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, > >PIN_MAPPABLE | PIN_NONBLOCK); > > - if (IS_ERR(vma)) > > - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); > > + if (IS_ERR(vma)) { > > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > > + unsigned int flags; > > + > > + /* Valleyview and Cherryview are definitely limited to scanning > > +* out the first 256MiB and 512MiB respectively. Lets presume > > +* this behaviour was inherited from their g4x display engine > > +* and that all earlier gen are similarly limited. > > +*/ > > + flags = 0; > > + if (INTEL_GEN(i915) < 5 || > > + IS_VALLEYVIEW(i915) || > > + IS_CHERRYVIEW(i915)) > > Since it's related to the display engine, HAS_GMCH_DISPLAY()? Ah, that's synonym I was thinking off. That describes the split I used here much better. We may need to refine this as more information becomes available (if ever!) -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v2] get-fences-locked
On pe, 2016-11-04 at 10:29 +, Chris Wilson wrote: > --- > drivers/dma-buf/reservation.c | 58 > +++ > include/linux/reservation.h | 4 +++ > 2 files changed, 62 insertions(+) Wrong branch. Regards, Joonas -- Joonas Lahtinen Open Source Technology Center Intel Corporation ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout
On Fri, 04 Nov 2016, Chris Wilsonwrote: > Valleyview and Cherryview are definitely limited to only scanning out > from the first 256MiB and 512MiB of the Global GTT respectively. Lets > presume that this behaviour was inherited from the display block copied > from g4x (not Ironlake) and all earlier generations are similarly > affected. For simplicity, impose that these platforms must scanout from > the mappable region. > > Reported-by: Luis Botello > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036 > Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for > scanout") > Signed-off-by: Chris Wilson > Cc: Akash Goel > Cc: Joonas Lahtinen > Cc: # v4.9-rc1+ > --- > This leaves Ironlake -> Haswell with a bit of uncertainity. It is also > not clear if the scanout accessible region is similarly limited on all > gen8+, and so whether we need to similarly curtain the upper range for > their scanouts. > --- > drivers/gpu/drm/i915/i915_gem.c | 18 -- > 1 file changed, 16 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 269e2487c104..408875fbec66 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct > drm_i915_gem_object *obj, > if (view->type == I915_GGTT_VIEW_NORMAL) > vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, > PIN_MAPPABLE | PIN_NONBLOCK); > - if (IS_ERR(vma)) > - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); > + if (IS_ERR(vma)) { > + struct drm_i915_private *i915 = to_i915(obj->base.dev); > + unsigned int flags; > + > + /* Valleyview and Cherryview are definitely limited to scanning > + * out the first 256MiB and 512MiB respectively. Lets presume > + * this behaviour was inherited from their g4x display engine > + * and that all earlier gen are similarly limited. > + */ > + flags = 0; > + if (INTEL_GEN(i915) < 5 || > + IS_VALLEYVIEW(i915) || > + IS_CHERRYVIEW(i915)) Since it's related to the display engine, HAS_GMCH_DISPLAY()? BR, Jani. > + flags = PIN_MAPPABLE; > + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); > + } > if (IS_ERR(vma)) > goto err_unpin_display; -- Jani Nikula, Intel Open Source Technology Center ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915: Fix pages pin counting around swizzle quirk (rev3)
== Series Details == Series: drm/i915: Fix pages pin counting around swizzle quirk (rev3) URL : https://patchwork.freedesktop.org/series/14720/ State : success == Summary == Series 14720v3 drm/i915: Fix pages pin counting around swizzle quirk https://patchwork.freedesktop.org/api/1.0/series/14720/revisions/3/mbox/ Test kms_force_connector_basic: Subgroup force-load-detect: dmesg-warn -> PASS (fi-snb-2520m) fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 fi-bxt-t5700 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-hsw-4770r total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 fi-ilk-650 total:241 pass:188 dwarn:0 dfail:0 fail:0 skip:53 fi-ivb-3520m total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-ivb-3770 total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 fi-skl-6770hqtotal:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 fi-snb-2520m total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 fi-snb-2600 total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 21f242e536b5077c046df785a8c4c28374941c15 drm-intel-nightly: 2016y-11m-03d-21h-01m-03s UTC integration manifest 25582a0 drm/i915: Fix pages pin counting around swizzle quirk == Logs == For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2902/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Limit Valleyview and earlier to only using mappable scanout
Valleyview and Cherryview are definitely limited to only scanning out from the first 256MiB and 512MiB of the Global GTT respectively. Lets presume that this behaviour was inherited from the display block copied from g4x (not Ironlake) and all earlier generations are similarly affected. For simplicity, impose that these platforms must scanout from the mappable region. Reported-by: Luis BotelloBugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036 Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for scanout") Signed-off-by: Chris Wilson Cc: Akash Goel Cc: Joonas Lahtinen Cc: # v4.9-rc1+ --- This leaves Ironlake -> Haswell with a bit of uncertainity. It is also not clear if the scanout accessible region is similarly limited on all gen8+, and so whether we need to similarly curtain the upper range for their scanouts. --- drivers/gpu/drm/i915/i915_gem.c | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 269e2487c104..408875fbec66 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3661,8 +3661,22 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (view->type == I915_GGTT_VIEW_NORMAL) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, PIN_MAPPABLE | PIN_NONBLOCK); - if (IS_ERR(vma)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); + if (IS_ERR(vma)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int flags; + + /* Valleyview and Cherryview are definitely limited to scanning +* out the first 256MiB and 512MiB respectively. Lets presume +* this behaviour was inherited from their g4x display engine +* and that all earlier gen are similarly limited. +*/ + flags = 0; + if (INTEL_GEN(i915) < 5 || + IS_VALLEYVIEW(i915) || + IS_CHERRYVIEW(i915)) + flags = PIN_MAPPABLE; + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); + } if (IS_ERR(vma)) goto err_unpin_display; -- 2.10.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] drm/i915: Fix pages pin counting around swizzle quirk
commit bc0629a76726 ("drm/i915: Track pages pinned due to swizzling quirk") fixed one problem, but revealed a whole lot more. The root cause of the pin count mismatch for the swizzle quirk (for L-shaped memory on gen3/4) was that we were incrementing the pages_pin_count upon getting the backing pages but then overwriting the pages_pin_count to set it to 1 afterwards. With a little bit of adjustment to satisfy the GEM_BUG_ON sanitychecks, the fix is to replace the explicit atomic_set with an atomic_inc. v2: Consistently use atomics (not mix atomics and helpers) within the lowlevel get_pages routines. This makes the atomic operations much clearer. Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation") Signed-off-by: Chris WilsonCc: Joonas Lahtinen Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c| 47 +++--- drivers/gpu/drm/i915/i915_gem_gtt.c| 7 + drivers/gpu/drm/i915/i915_gem_tiling.c | 1 + 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a7a9ae2c4bce..269e2487c104 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2376,12 +2376,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - if (i915_gem_object_is_tiled(obj) && - dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; - } - return st; err_pages: @@ -2414,12 +2408,21 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.get_page.sg_idx = 0; obj->mm.pages = pages; + + if (i915_gem_object_is_tiled(obj) && + to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + GEM_BUG_ON(obj->mm.quirked); + __i915_gem_object_pin_pages(obj); + obj->mm.quirked = true; + } } static int i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); + if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { DRM_DEBUG("Attempting to obtain a purgeable object\n"); return -EFAULT; @@ -2448,17 +2451,15 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (err) return err; - if (likely(obj->mm.pages)) { - __i915_gem_object_pin_pages(obj); - goto unlock; - } - - GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); + if (unlikely(!obj->mm.pages)) { + err = i915_gem_object_get_pages(obj); + if (err) + goto unlock; - err = i915_gem_object_get_pages(obj); - if (!err) - atomic_set_release(>mm.pages_pin_count, 1); + smp_mb__before_atomic(); + } + atomic_inc(>mm.pages_pin_count); unlock: mutex_unlock(>mm.lock); return err; @@ -2528,12 +2529,14 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, pinned = true; if (!atomic_inc_not_zero(>mm.pages_pin_count)) { - ret = i915_gem_object_get_pages(obj); - if (ret) - goto err_unlock; + if (unlikely(!obj->mm.pages)) { + ret = i915_gem_object_get_pages(obj); + if (ret) + goto err_unlock; - GEM_BUG_ON(atomic_read(>mm.pages_pin_count)); - atomic_set_release(>mm.pages_pin_count, 1); + smp_mb__before_atomic(); + } + atomic_inc(>mm.pages_pin_count); pinned = false; } GEM_BUG_ON(!obj->mm.pages); @@ -2986,7 +2989,7 @@ int i915_vma_unbind(struct i915_vma *vma) goto destroy; GEM_BUG_ON(obj->bind_count == 0); - GEM_BUG_ON(!obj->mm.pages); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); if (i915_vma_is_map_and_fenceable(vma)) { /* release the fence reg _after_ flushing */ @@ -3220,6 +3223,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) list_move_tail(>global_link, _priv->mm.bound_list); list_move_tail(>vm_link, >vm->inactive_list); obj->bind_count++; + GEM_BUG_ON(atomic_read(>mm.pages_pin_count) < obj->bind_count); return 0; @@ -4272,6 +4276,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj->mm.quirked = false; } if (args->madv == I915_MADV_WILLNEED) { +
[Intel-gfx] [PATCH v2] get-fences-locked
--- drivers/dma-buf/reservation.c | 58 +++ include/linux/reservation.h | 4 +++ 2 files changed, 62 insertions(+) diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 3c9ab53be2b9..0f254d0d9bec 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -133,6 +133,64 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, EXPORT_SYMBOL(reservation_object_add_excl_fence); /** + * reservation_object_get_fences_locked - Get an object's shared and exclusive + * fences + * @obj: the reservation object + * @pfence_excl: the returned exclusive fence (or NULL) + * @pshared_count: the number of shared fences returned + * @pshared: the array of shared fence ptrs returned (array is krealloc'd to + * the required size, and must be freed by caller) + * + * RETURNS + * Zero or -errno + */ +int reservation_object_get_fences_locked(struct reservation_object *obj, + struct dma_fence **pfence_excl, + unsigned *pshared_count, + struct dma_fence ***pshared) +{ + struct dma_fence **shared = NULL; + unsigned int count = 0; + struct radix_tree_iter iter; + void **slot; + + radix_tree_for_each_slot(slot, >shared, , 0) { + struct dma_fence *fence = radix_tree_deref_slot(slot); + + if (dma_fence_is_signaled(fence)) { + radix_tree_delete(>shared, iter.index); + continue; + } + + if ((count & -count) == count) { + struct dma_fence **nshared; + unsigned int sz; + + sz = count ? 2*count : 1; + nshared = krealloc(shared, + sz * sizeof(*shared), + GFP_TEMPORARY); + if (!nshared) { + while (count--) + dma_fence_put(shared[count]); + kfree(shared); + return -ENOMEM; + } + + shared = nshared; + } + + shared[count++] = dma_fence_get(fence); + } + + *pshared_count = count; + *pshared = shared; + *pfence_excl = dma_fence_get(rcu_dereference(obj->excl)); + return 0; +} +EXPORT_SYMBOL_GPL(reservation_object_get_fences_locked); + +/** * reservation_object_get_fences_rcu - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 697ec52427ca..4f39942906e2 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -161,6 +161,10 @@ void reservation_object_add_shared_fence(struct reservation_object *obj, void reservation_object_add_excl_fence(struct reservation_object *obj, struct dma_fence *fence); +int reservation_object_get_fences_locked(struct reservation_object *obj, + struct dma_fence **pfence_excl, + unsigned *pshared_count, + struct dma_fence ***pshared); int reservation_object_get_fences_rcu(struct reservation_object *obj, struct dma_fence **pfence_excl, unsigned *pshared_count, -- 2.10.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Fix pages pin counting around swizzle quirk
On Fri, Nov 04, 2016 at 09:36:31AM +, Chris Wilson wrote: > On Fri, Nov 04, 2016 at 10:50:44AM +0200, Joonas Lahtinen wrote: > > On ke, 2016-11-02 at 09:43 +, Chris Wilson wrote: > > > @@ -2458,17 +2459,16 @@ int __i915_gem_object_get_pages(struct > > > drm_i915_gem_object *obj) > > > if (err) > > > return err; > > > > > > - if (likely(obj->mm.pages)) { > > > - __i915_gem_object_pin_pages(obj); > > > - goto unlock; > > > - } > > > - > > > - GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); > > > + if (unlikely(!obj->mm.pages)) { > > > + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); > > > + err = i915_gem_object_get_pages(obj); > > > + if (err) > > > + goto unlock; > > > > > > - err = i915_gem_object_get_pages(obj); > > > - if (!err) > > > - atomic_set_release(>mm.pages_pin_count, 1); > > > + smp_mb__before_atomic(); > > > > This is not cool without atomic in sight. Inline wrap as > > __i915_gem_object_pages_mb() or something. > > My first thought was to put in i915_gem_object_get_pages() since it > closes the action of setting up the obj->mm.pages and co. I didn't like > that because the association then with the use of the pages_pin_count as > the mutex was not as apparent. Now that you cannot see the atomic_inc() > at all here, you are left confused! > > Would you rather this just used the raw atomic_inc() here? Actually, I like using atomics better here. It is definitely consistent as we then don't mix the raw atomics and the helpers. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for drm/i915/dp: Update connector status for DP MST hotplugs (rev2)
> == Series Details == > > Series: drm/i915/dp: Update connector status for DP MST hotplugs (rev2) > URL : https://patchwork.freedesktop.org/series/14821/ > State : warning > > == Summary == > > Series 14821v2 drm/i915/dp: Update connector status for DP MST hotplugs > https://patchwork.freedesktop.org/api/1.0/series/14821/revisions/2/mbox/ > > Test kms_force_connector_basic: > Subgroup force-edid: > pass -> DMESG-WARN (fi-snb-2520m) https://bugs.freedesktop.org/show_bug.cgi?id=74102 ? > Subgroup force-load-detect: > dmesg-warn -> PASS (fi-snb-2520m) > > fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 > fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 > fi-bxt-t5700 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 > fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 > fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 > fi-hsw-4770r total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 > fi-ilk-650 total:241 pass:188 dwarn:0 dfail:0 fail:0 skip:53 > fi-ivb-3520m total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 > fi-ivb-3770 total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 > fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 > fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 > fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 > fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 > fi-skl-6770hqtotal:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 > fi-snb-2520m total:241 pass:208 dwarn:1 dfail:0 fail:0 skip:32 > fi-snb-2600 total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 > fi-byt-n2820 failed to collect. IGT log at Patchwork_2901/fi-byt-n2820/igt.log > > 21f242e536b5077c046df785a8c4c28374941c15 drm-intel-nightly: 2016y-11m- > 03d-21h-01m-03s UTC integration manifest > bc09ce1 drm/i915/dp: Update connector status for DP MST hotplugs > > == Logs == > > For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2901/ Jani Saarinen Intel Finland Oy - BIC 0357606-4 - Westendinkatu 7, 02160 Espoo ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 08/15] drm/i915: Add support for emitting execbuffer tags through OA counter reports
On Fri, Nov 04, 2016 at 03:00:37PM +0530, sourab.gu...@intel.com wrote: > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index ead97b7f4..15921c7 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -832,6 +832,11 @@ struct drm_i915_gem_execbuffer2 { > #define i915_execbuffer2_get_context_id(eb2) \ > ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK) > > +/* upper 32 bits of rsvd1 field contain tag */ > +#define I915_EXEC_TAG_MASK (0xUL) > +#define i915_execbuffer2_get_tag(eb2) \ > + ((eb2).rsvd1 & I915_EXEC_TAG_MASK) Which does not return a u32 -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for drm/i915: Move hangcheck code out from i915_irq.c
Patchworkwrites: > == Series Details == > > Series: drm/i915: Move hangcheck code out from i915_irq.c > URL : https://patchwork.freedesktop.org/series/14685/ > State : warning > > == Summary == > > Series 14685v1 drm/i915: Move hangcheck code out from i915_irq.c > https://patchwork.freedesktop.org/api/1.0/series/14685/revisions/1/mbox/ > > Test gem_exec_suspend: > Subgroup basic-s3: > pass -> DMESG-WARN (fi-ilk-650) > Test kms_pipe_crc_basic: > Subgroup bad-nb-words-3: > dmesg-warn -> PASS (fi-ilk-650) > Subgroup bad-source: > dmesg-warn -> PASS (fi-ilk-650) > Subgroup nonblocking-crc-pipe-a-frame-sequence: > dmesg-warn -> PASS (fi-ilk-650) > Subgroup suspend-read-crc-pipe-b: > pass -> DMESG-WARN (fi-ilk-650) https://bugs.freedesktop.org/show_bug.cgi?id=98531 > Subgroup suspend-read-crc-pipe-c: > pass -> DMESG-WARN (fi-skl-6770hq) > https://bugs.freedesktop.org/show_bug.cgi?id=97929 Patch merged day ago, just forgot to press send, Thanks for review. -Mika > fi-bdw-5557u total:241 pass:226 dwarn:0 dfail:0 fail:0 skip:15 > fi-bsw-n3050 total:241 pass:201 dwarn:0 dfail:0 fail:0 skip:40 > fi-bxt-t5700 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 > fi-byt-j1900 total:241 pass:213 dwarn:0 dfail:0 fail:0 skip:28 > fi-byt-n2820 total:241 pass:209 dwarn:0 dfail:0 fail:0 skip:32 > fi-hsw-4770 total:241 pass:221 dwarn:0 dfail:0 fail:0 skip:20 > fi-hsw-4770r total:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 > fi-ilk-650 total:241 pass:183 dwarn:4 dfail:0 fail:0 skip:54 > fi-ivb-3520m total:241 pass:218 dwarn:0 dfail:0 fail:0 skip:23 > fi-ivb-3770 total:241 pass:218 dwarn:0 dfail:0 fail:0 skip:23 > fi-kbl-7200u total:241 pass:219 dwarn:0 dfail:0 fail:0 skip:22 > fi-skl-6260u total:241 pass:227 dwarn:0 dfail:0 fail:0 skip:14 > fi-skl-6700hqtotal:241 pass:220 dwarn:0 dfail:0 fail:0 skip:21 > fi-skl-6700k total:241 pass:219 dwarn:1 dfail:0 fail:0 skip:21 > fi-skl-6770hqtotal:241 pass:226 dwarn:1 dfail:0 fail:0 skip:14 > fi-snb-2520m total:241 pass:208 dwarn:0 dfail:0 fail:0 skip:33 > fi-snb-2600 total:241 pass:207 dwarn:0 dfail:0 fail:0 skip:34 > > c5ad9c11e819eebcad5b9be5aa5e991e89b26965 drm-intel-nightly: > 2016y-11m-01d-16h-36m-25s UTC integration manifest > 3a0612c drm/i915: Move hangcheck code out from i915_irq.c > > == Logs == > > For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_2880/ ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 06/15] drm/i915: Populate ctx ID for periodic OA reports
On Fri, Nov 04, 2016 at 03:00:35PM +0530, sourab.gu...@intel.com wrote: > +static u32 gen8_oa_buffer_get_ctx_id(struct i915_perf_stream *stream, > + const u8 *report) > +{ > + struct drm_i915_private *dev_priv = stream->dev_priv; > + > + /* The ctx ID present in the OA reports have intel_context::global_id > + * present, since this is programmed into the ELSP in execlist mode. > + * In non-execlist mode, fall back to retrieving the ctx ID from the > + * last saved ctx ID from command stream mode. > + */ > + if (i915.enable_execlists) { > + u32 ctx_id = *(u32 *)(report + 12); > + ctx_id &= 0xf; This does not match the hw maximum. Please check to see who is correct. > + return ctx_id; > + } else { > + if (!stream->cs_mode) > + WARN_ONCE(1, > + "CTX ID can't be retrieved if command stream mode not > enabled"); All these WARNs appear to be user triggerable. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples
On Fri, Nov 04, 2016 at 03:00:43PM +0530, sourab.gu...@intel.com wrote: > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 06c7b55..0dc2384 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -1088,6 +1088,8 @@ static int i915_driver_init_hw(struct drm_i915_private > *dev_priv) > DRM_DEBUG_DRIVER("can't enable MSI"); > } > > + i915_perf_init_late(dev_priv); > + > return 0; Just a quick one: Create i915_driver_init_late() to capture the new init phase you want to add. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Fix pages pin counting around swizzle quirk
On Fri, Nov 04, 2016 at 10:50:44AM +0200, Joonas Lahtinen wrote: > On ke, 2016-11-02 at 09:43 +, Chris Wilson wrote: > > @@ -2458,17 +2459,16 @@ int __i915_gem_object_get_pages(struct > > drm_i915_gem_object *obj) > > if (err) > > return err; > > > > - if (likely(obj->mm.pages)) { > > - __i915_gem_object_pin_pages(obj); > > - goto unlock; > > - } > > - > > - GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); > > + if (unlikely(!obj->mm.pages)) { > > + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); > > + err = i915_gem_object_get_pages(obj); > > + if (err) > > + goto unlock; > > > > - err = i915_gem_object_get_pages(obj); > > - if (!err) > > - atomic_set_release(>mm.pages_pin_count, 1); > > + smp_mb__before_atomic(); > > This is not cool without atomic in sight. Inline wrap as > __i915_gem_object_pages_mb() or something. My first thought was to put in i915_gem_object_get_pages() since it closes the action of setting up the obj->mm.pages and co. I didn't like that because the association then with the use of the pages_pin_count as the mutex was not as apparent. Now that you cannot see the atomic_inc() at all here, you are left confused! Would you rather this just used the raw atomic_inc() here? > > > @@ -3707,6 +3707,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) > > { > > int ret = 0; > > > > + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); > > Rather confusing, simple mind would think as > __i915_gem_object_pin_pages has GEM_BUG_ON(!obj->mm.pages), > the next branch would never be taken? GEM_BUG_ON(vma == obj) ? Sorry not parsing very well this morning. GEM_BUG_ON(!obj->mm.pages) would be a weaker form of the above. The challenge is to express that the vma->page is only valid for the current lifespan of the obj->mm.pages, should we regenerate that sg_table, we need to regenerate the vma->pages. So I want to say that we must be holding a pages_pin_count to utilize the vma->pages. > > if (vma->pages) > > return 0; -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 10/15] drm/i915: Extract raw GPU timestamps from OA reports to forward in perf samples
From: Sourab GuptaThe OA reports contain the least significant 32 bits of the gpu timestamp. This patch enables retrieval of the timestamp field from OA reports, to forward as 64 bit raw gpu timestamps in the perf samples. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 47 ++-- drivers/gpu/drm/i915/i915_reg.h | 4 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a05335a..119c82b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2241,6 +2241,7 @@ struct drm_i915_private { u32 ctx_flexeu0_off; u32 n_pending_periodic_samples; u32 pending_periodic_ts; + u64 last_gpu_ts; struct i915_oa_ops ops; const struct i915_oa_format *oa_formats; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 516fd54..b05c41a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -903,6 +903,24 @@ static int append_sample(struct i915_perf_stream *stream, return 0; } +static u64 get_gpu_ts_from_oa_report(struct drm_i915_private *dev_priv, + const u8 *report) +{ + u32 sample_ts = *(u32 *)(report + 4); + u32 delta; + + /* +* NB: We have to assume we're updating last_gpu_ts frequently +* enough that it's never possible to see multiple overflows before +* we compare sample_ts to last_gpu_ts. Since this is significantly +* large duration (~6min for 80ns ts base), we can safely assume so. +*/ + delta = sample_ts - (u32)dev_priv->perf.oa.last_gpu_ts; + dev_priv->perf.oa.last_gpu_ts += delta; + + return dev_priv->perf.oa.last_gpu_ts; +} + static int append_oa_buffer_sample(struct i915_perf_stream *stream, char __user *buf, size_t count, size_t *offset, const u8 *report) @@ -940,10 +958,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream, if (sample_flags & SAMPLE_TAG) data.tag = dev_priv->perf.last_tag; - /* Derive timestamp from OA report, after scaling with the ts base */ -#warning "FIXME: append_oa_buffer_sample: derive the timestamp from OA report" + /* Derive timestamp from OA report */ if (sample_flags & SAMPLE_TS) - data.ts = 0; + data.ts = get_gpu_ts_from_oa_report(dev_priv, report); if (sample_flags & SAMPLE_OA_REPORT) data.report = report; @@ -1443,6 +1460,7 @@ static int append_one_cs_sample(struct i915_perf_stream *stream, enum intel_engine_id id = stream->engine; struct sample_data data = { 0 }; u32 sample_flags = stream->sample_flags; + u64 gpu_ts = 0; int ret = 0; if (sample_flags & SAMPLE_OA_REPORT) { @@ -1459,6 +1477,9 @@ static int append_one_cs_sample(struct i915_perf_stream *stream, sample_ts, U32_MAX); if (ret) return ret; + + if (sample_flags & SAMPLE_TS) + gpu_ts = get_gpu_ts_from_oa_report(dev_priv, report); } if (sample_flags & SAMPLE_OA_SOURCE_INFO) @@ -1480,20 +1501,16 @@ static int append_one_cs_sample(struct i915_perf_stream *stream, } if (sample_flags & SAMPLE_TS) { - /* For RCS, if OA samples are also being collected, derive the -* timestamp from OA report, after scaling with the TS base. + /* If OA sampling is enabled, derive the ts from OA report. * Else, forward the timestamp collected via command stream. */ -#warning "FIXME: append_one_cs_sample: derive the timestamp from OA report" - if (sample_flags & SAMPLE_OA_REPORT) - data.ts = 0; - else - data.ts = *(u64 *) + if (!(sample_flags & SAMPLE_OA_REPORT)) + gpu_ts = *(u64 *) (dev_priv->perf.command_stream_buf[id].addr + node->ts_offset); + data.ts = gpu_ts; } - return append_sample(stream, buf, count, offset, ); } @@ -2279,9 +2296,15 @@ static void i915_ring_stream_enable(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - if (stream->sample_flags & SAMPLE_OA_REPORT) + if (stream->sample_flags & SAMPLE_OA_REPORT) { dev_priv->perf.oa.ops.oa_enable(dev_priv); + if
[Intel-gfx] [PATCH 11/15] drm/i915: Support opening multiple concurrent perf streams
From: Sourab GuptaThis patch adds support for opening multiple concurrent perf streams for different gpu engines, while having the restriction to open only a single stream open for a particular gpu engine. This enables userspace client to open multiple streams, one per engine, at any time to capture sample data for multiple gpu engines. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_perf.c | 69 ++-- 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 119c82b..e912679 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2204,7 +2204,7 @@ struct drm_i915_private { struct hrtimer poll_check_timer; - struct i915_perf_stream *exclusive_stream; + struct i915_perf_stream *ring_stream[I915_NUM_ENGINES]; wait_queue_head_t poll_wq[I915_NUM_ENGINES]; atomic_t pollin[I915_NUM_ENGINES]; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b05c41a..8eb80e8 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1086,7 +1086,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * an invalid ID. It could be good to annotate these * reports with a _CTX_SWITCH_AWAY reason later. */ - if (!dev_priv->perf.exclusive_stream->ctx || + if (!stream->ctx || dev_priv->perf.oa.specific_ctx_id == ctx_id || dev_priv->perf.oa.oa_buffer.last_ctx_id == ctx_id) { @@ -1097,7 +1097,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * the switch-away reports with an invalid * context id to be recognisable by userspace. */ - if (dev_priv->perf.exclusive_stream->ctx && + if (stream->ctx && dev_priv->perf.oa.specific_ctx_id != ctx_id) report32[2] = 0x; @@ -1763,7 +1763,7 @@ static void i915_ring_stream_destroy(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - BUG_ON(stream != dev_priv->perf.exclusive_stream); + BUG_ON(stream != dev_priv->perf.ring_stream[stream->engine]); if (stream->using_oa) { dev_priv->perf.oa.ops.disable_metric_set(dev_priv); @@ -1777,7 +1777,7 @@ static void i915_ring_stream_destroy(struct i915_perf_stream *stream) if (stream->cs_mode) free_command_stream_buf(dev_priv, stream->engine); - dev_priv->perf.exclusive_stream = NULL; + dev_priv->perf.ring_stream[stream->engine] = NULL; } static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) @@ -2220,14 +2220,14 @@ static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) { assert_spin_locked(_priv->perf.hook_lock); - if (dev_priv->perf.exclusive_stream->state != + if (dev_priv->perf.ring_stream[RCS]->state != I915_PERF_STREAM_DISABLED) { unsigned long ctx_id = 0; - if (dev_priv->perf.exclusive_stream->ctx) + if (dev_priv->perf.ring_stream[RCS]->ctx) ctx_id = dev_priv->perf.oa.specific_ctx_id; - if (dev_priv->perf.exclusive_stream->ctx == NULL || ctx_id) { + if (dev_priv->perf.ring_stream[RCS]->ctx == NULL || ctx_id) { bool periodic = dev_priv->perf.oa.periodic; u32 period_exponent = dev_priv->perf.oa.period_exponent; u32 report_format = dev_priv->perf.oa.oa_buffer.format; @@ -2366,15 +2366,6 @@ static int i915_ring_stream_init(struct i915_perf_stream *stream, SAMPLE_TS); int ret; - /* To avoid the complexity of having to accurately filter -* counter reports and marshal to the appropriate client -* we currently only allow exclusive access -*/ - if (dev_priv->perf.exclusive_stream) { - DRM_ERROR("Stream already in use\n"); - return -EBUSY; - } - if ((props->sample_flags & SAMPLE_CTX_ID) && !props->cs_mode) { if (IS_HASWELL(dev_priv)) { DRM_ERROR( @@ -2392,6 +2383,12 @@ static int i915_ring_stream_init(struct i915_perf_stream *stream, if (require_oa_unit) { int format_size; + /* Only allow exclusive access per stream */ + if (dev_priv->perf.ring_stream[RCS]) { + DRM_ERROR("Stream:0
[Intel-gfx] [PATCH 12/15] time: Expose current clocksource in use by timekeeping framework
From: Sourab GuptaFor the drivers to be able to use the cross timestamp framework, they need the information of current clocksource being used by the kernel timekeeping. This is needed since the callback given by driver into the get_device_system_crosststamp(), in order to synchronously read the device time and system counter value, requires the knowledge of the clocksource being used to read system counter value (as a part of struct system_counterval_t). Signed-off-by: Sourab Gupta --- include/linux/timekeeping.h | 5 + kernel/time/timekeeping.c | 12 2 files changed, 17 insertions(+) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 816b754..101aaa3 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -336,6 +336,11 @@ extern int get_device_system_crosststamp( struct system_device_crosststamp *xtstamp); /* + * Get current clocksource used by system timekeeping framework + */ +struct clocksource *get_current_clocksource(void); + +/* * Simultaneously snapshot realtime and monotonic raw clocks */ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e07fb09..bb1e9c0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1138,6 +1138,18 @@ int get_device_system_crosststamp(int (*get_time_fn) EXPORT_SYMBOL_GPL(get_device_system_crosststamp); /** + * get_current_clocksource - Returns the current clocksource in used by tk_core + * + */ +struct clocksource *get_current_clocksource(void) +{ + struct timekeeper *tk = _core.timekeeper; + + return tk->tkr_mono.clock; +} +EXPORT_SYMBOL_GPL(get_current_clocksource); + +/** * do_gettimeofday - Returns the time of day in a timeval * @tv:pointer to the timeval to be set * -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples
From: Sourab GuptaCurrently, we have the ability to only forward the GPU timestamps in the samples (which are generated via OA reports or PIPE_CONTROL commands inserted in the ring). This limits the ability to correlate these samples with the system events. If we scale the GPU timestamps according the timestamp base/frequency info present in bspec, it is observed that the timestamps drift really quickly from the system time. An ability is therefore needed to report timestamps in different clock domains, such as CLOCK_MONOTONIC (or _MONO_RAW), in the perf samples to be of more practical use to the userspace. This ability becomes important when we want to correlate/plot GPU events/samples with other system events on the same timeline (e.g. vblank events, or timestamps when work was submitted to kernel, etc.) The patch here proposes a mechanism to achieve this. The correlation between gpu time and system time is established using the cross timestamp framework. For this purpose, the timestamp clock associated with the command stream, is abstracted as timecounter/cyclecounter, before utilizing cross timestamp framework to retrieve gpu/system time correlated values. Different such gpu/system time values are then used to detect and correct the error in published gpu timestamp clock frequency. The userspace can request CLOCK_MONOTONIC_RAW timestamps in samples by requesting the corresponding property while opening the stream. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 24 +++- drivers/gpu/drm/i915/i915_perf.c | 273 +++ include/uapi/drm/i915_drm.h | 9 +- 4 files changed, 284 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 06c7b55..0dc2384 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1088,6 +1088,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("can't enable MSI"); } + i915_perf_init_late(dev_priv); + return 0; out_ggtt: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e912679..557a124 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #include #include @@ -1843,6 +1846,9 @@ struct i915_perf_stream { /* Whether the OA unit is in use */ bool using_oa; + /* monotonic_raw clk timestamp (in ns) for last sample */ + u64 last_sample_ts; + const struct i915_perf_stream_ops *ops; }; @@ -1889,6 +1895,20 @@ struct i915_perf_cs_data_node { u32 tag; }; +/** + * struct i915_clock_info - decribes i915 timestamp clock + * + */ +struct i915_clock_info { + struct cyclecounter cc; + struct timecounter tc; + struct system_device_crosststamp xtstamp; + ktime_t clk_offset; /* Offset (in ns) between monoraw clk and gpu time */ + u32 timestamp_frequency; + u32 resync_period; /* in msecs */ + struct delayed_work clk_sync_work; +}; + struct drm_i915_private { struct drm_device drm; @@ -2189,6 +2209,8 @@ struct drm_i915_private { struct i915_runtime_pm pm; + struct i915_clock_info ts_clk_info; + struct { bool initialized; @@ -2213,7 +2235,6 @@ struct drm_i915_private { bool periodic; int period_exponent; - int timestamp_frequency; int tail_margin; @@ -3796,6 +3817,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, /* i915_perf.c */ extern void i915_perf_init(struct drm_i915_private *dev_priv); +extern void i915_perf_init_late(struct drm_i915_private *dev_priv); extern void i915_perf_fini(struct drm_i915_private *dev_priv); extern void i915_perf_register(struct drm_i915_private *dev_priv); extern void i915_perf_unregister(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8eb80e8..b11e953 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -189,6 +189,7 @@ #include #include +#include #include "i915_drv.h" #include "intel_ringbuffer.h" @@ -228,6 +229,9 @@ #define POLL_FREQUENCY 200 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) +#define MAX_CLK_SYNC_PERIOD (60*MSEC_PER_SEC) +#define INIT_CLK_SYNC_PERIOD (20) /* in msecs */ + static u32 i915_perf_stream_paranoid = true; /* The maximum exponent the hardware accepts is 63 (essentially it selects one @@ -254,13 +258,24 @@ static u32 i915_perf_stream_paranoid = true; #define TS_ADDR_ALIGN 8 #define I915_PERF_TS_SAMPLE_SIZE 8 +/* Published frequency of GT command stream timestamp
[Intel-gfx] [PATCH 00/15] Framework to collect command stream gpu metrics using i915 perf
From: Sourab GuptaRefloating the series rebased on Robert's latest patchset. Since Robert's patches are being reviewed and this patch series extends his framework to enable multiple concurrent streams to capture command stream based metrics, it would be good to keep this work in perspective. Looking to receive feedback on the series (and possibly r-b's :)) This series adds framework for collection of gpu performance metrics associated with the command stream of a particular engine. These metrics include OA reports, timestamps, mmio metrics, etc. These metrics are are collected around batchbuffer boundaries. This work utilizes the underlying infrastructure introduced in Robert Bragg's patches for collecting periodic OA counter snapshots (based on Haswell): https://patchwork.freedesktop.org/series/14505/ This patch set is based on Gen8+ version of Robert's patches which can be found here: https://github.com/rib/linux/tree/wip/rib/oa-next In the last series floated earlier (https://patchwork.freedesktop.org/series/6154/), based on Chris's suggestion, I had tried experimenting with using the cross timestamp framework for the purpose of retrieving tightly coupled device/system timestamps. In our case, this framework enables us to have correlated pairs of gpu+system time which can be used over a period of time to correct the frequency of timestamp clock, and thus enable to accurately send system time (_MONO_RAW) as requested to the userspace. The results are generally observed to quite better with the use of cross timestamps and the frequency delta gradually tapers down to 0 with increasing correction periods. The use of cross timestamp framework though requires us to have clockcounter/timecounter abstraction for the timestamp clocksource, and further requires few changes in the kernel timekeeping/clocksource code. I am looking for feedback on the use of this framework and the changes involved. These patches can be found for viewing at https://github.com/sourabgu/linux/tree/oa-19oct Sourab Gupta (15): drm/i915: Add ctx getparam ioctl parameter to retrieve ctx unique id drm/i915: Expose OA sample source to userspace drm/i915: Framework for capturing command stream based OA reports drm/i915: flush periodic samples, in case of no pending CS sample requests drm/i915: Handle the overflow condition for command stream buf drm/i915: Populate ctx ID for periodic OA reports drm/i915: Add support for having pid output with OA report drm/i915: Add support for emitting execbuffer tags through OA counter reports drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines drm/i915: Extract raw GPU timestamps from OA reports to forward in perf samples drm/i915: Support opening multiple concurrent perf streams time: Expose current clocksource in use by timekeeping framework time: export clocks_calc_mult_shift drm/i915: Mechanism to forward clock monotonic raw time in perf samples drm/i915: Support for capturing MMIO register values drivers/gpu/drm/i915/i915_drv.c|2 + drivers/gpu/drm/i915/i915_drv.h| 112 +- drivers/gpu/drm/i915/i915_gem_context.c|3 + drivers/gpu/drm/i915/i915_gem_execbuffer.c |6 + drivers/gpu/drm/i915/i915_perf.c | 1911 drivers/gpu/drm/i915/i915_reg.h|6 + include/linux/timekeeping.h|5 + include/uapi/drm/i915_drm.h| 79 ++ kernel/time/clocksource.c |1 + kernel/time/timekeeping.c | 12 + 10 files changed, 1910 insertions(+), 227 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 07/15] drm/i915: Add support for having pid output with OA report
From: Sourab GuptaThis patch introduces flags and adds support for having pid output with the OA reports generated through the RCS commands. When the stream is opened with pid sample type, the pid information is also captured through the command stream samples and forwarded along with the OA reports. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_perf.c | 48 +++- include/uapi/drm/i915_drm.h | 7 ++ 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 853cc7db..f250e7b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1872,6 +1872,7 @@ struct i915_perf_cs_data_node { struct drm_i915_gem_request *request; u32 offset; u32 ctx_id; + u32 pid; }; struct drm_i915_private { @@ -2242,6 +2243,7 @@ struct drm_i915_private { } command_stream_buf; u32 last_ctx_id; + u32 last_pid; struct list_head node_list; spinlock_t node_list_lock; } perf; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 84457f8..894d7a6 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -254,6 +254,7 @@ static u32 i915_perf_stream_paranoid = true; struct oa_sample_data { u32 source; u32 ctx_id; + u32 pid; const u8 *report; }; @@ -309,6 +310,7 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { #define SAMPLE_OA_REPORT (1<<0) #define SAMPLE_OA_SOURCE_INFO (1<<1) #define SAMPLE_CTX_ID (1<<2) +#define SAMPLE_PID (1<<3) struct perf_open_properties { u32 sample_flags; @@ -484,6 +486,7 @@ static void i915_perf_command_stream_hook_oa(struct drm_i915_gem_request *req) goto out; entry->ctx_id = ctx->hw_id; + entry->pid = current->pid; i915_gem_request_assign(>request, req); addr = dev_priv->perf.command_stream_buf.vma->node.start + @@ -735,6 +738,12 @@ static int append_oa_sample(struct i915_perf_stream *stream, buf += 4; } + if (sample_flags & SAMPLE_PID) { + if (copy_to_user(buf, >pid, 4)) + return -EFAULT; + buf += 4; + } + if (sample_flags & SAMPLE_OA_REPORT) { if (copy_to_user(buf, data->report, report_size)) return -EFAULT; @@ -777,6 +786,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream, data.ctx_id = dev_priv->perf.oa.ops.oa_buffer_get_ctx_id( stream, report); + if (sample_flags & SAMPLE_PID) + data.pid = dev_priv->perf.last_pid; + if (sample_flags & SAMPLE_OA_REPORT) data.report = report; @@ -1293,6 +1305,11 @@ static int append_oa_rcs_sample(struct i915_perf_stream *stream, dev_priv->perf.last_ctx_id = node->ctx_id; } + if (sample_flags & SAMPLE_PID) { + data.pid = node->pid; + dev_priv->perf.last_pid = node->pid; + } + if (sample_flags & SAMPLE_OA_REPORT) data.report = report; @@ -2127,6 +2144,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, struct drm_i915_private *dev_priv = stream->dev_priv; bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT | SAMPLE_OA_SOURCE_INFO); + bool require_cs_mode = props->sample_flags & SAMPLE_PID; bool cs_sample_data = props->sample_flags & SAMPLE_OA_REPORT; int ret; @@ -2268,6 +2286,20 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (props->sample_flags & SAMPLE_CTX_ID) { stream->sample_flags |= SAMPLE_CTX_ID; stream->sample_size += 4; + + /* +* NB: it's meaningful to request SAMPLE_CTX_ID with just CS +* mode or periodic OA mode sampling but we don't allow +* SAMPLE_CTX_ID without either mode +*/ + if (!require_oa_unit) + require_cs_mode = true; + } + + if (require_cs_mode && !props->cs_mode) { + DRM_ERROR("PID sampling requires a ring to be specified"); + ret = -EINVAL; + goto cs_error; } if (props->cs_mode) { @@ -2278,7 +2310,13 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, goto cs_error; } - if (!(props->sample_flags & SAMPLE_CTX_ID)) { + /* +* The only time we should
[Intel-gfx] [PATCH 15/15] drm/i915: Support for capturing MMIO register values
From: Sourab GuptaThis patch adds support for capturing MMIO register values through i915 perf interface. The userspace can request upto 8 MMIO register values to be dumped. The addresses of these registers can be passed through the corresponding property 'value' field while opening the stream. The commands to dump the values of these MMIO registers are then inserted into the ring alongwith other commands. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.h | 4 + drivers/gpu/drm/i915/i915_perf.c | 153 ++- include/uapi/drm/i915_drm.h | 14 3 files changed, 168 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 557a124..14cd9cf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1885,6 +1885,7 @@ struct i915_perf_cs_data_node { u32 start_offset; u32 oa_offset; u32 ts_offset; + u32 mmio_offset; /* buffer size corresponding to this entry */ u32 size; @@ -2230,6 +2231,9 @@ struct drm_i915_private { wait_queue_head_t poll_wq[I915_NUM_ENGINES]; atomic_t pollin[I915_NUM_ENGINES]; + u32 num_mmio; + u32 mmio_list[I915_PERF_MMIO_NUM_MAX]; + struct { u32 specific_ctx_id; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index b11e953..ed6b31f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -277,6 +277,7 @@ struct sample_data { u64 gpu_ts; u64 clk_monoraw; const u8 *report; + const u8 *mmio; }; /* for sysctl proc_dointvec_minmax of i915_oa_min_timer_exponent */ @@ -335,6 +336,7 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { #define SAMPLE_TAG (1<<4) #define SAMPLE_TS (1<<5) #define SAMPLE_CLK_MONO_RAW(1<<6) +#define SAMPLE_MMIO(1<<7) struct perf_open_properties { u32 sample_flags; @@ -567,6 +569,9 @@ static int insert_perf_entry(struct drm_i915_private *dev_priv, sample_ts = true; } + if (sample_flags & SAMPLE_MMIO) + entry_size += 4*dev_priv->perf.num_mmio; + spin_lock(_priv->perf.node_list_lock[id]); if (list_empty(_priv->perf.node_list[id])) { offset = 0; @@ -644,6 +649,10 @@ out: entry->ts_offset = ALIGN(entry->ts_offset, TS_ADDR_ALIGN); offset = entry->ts_offset + I915_PERF_TS_SAMPLE_SIZE; } + if (sample_flags & SAMPLE_MMIO) { + entry->mmio_offset = offset; + offset = entry->mmio_offset + 4*dev_priv->perf.num_mmio; + } list_add_tail(>link, _priv->perf.node_list[id]); #ifndef CMD_STREAM_BUF_OVERFLOW_ALLOWED @@ -744,6 +753,47 @@ static int i915_ring_stream_capture_ts(struct drm_i915_gem_request *req, return 0; } +static int i915_ring_stream_capture_mmio(struct drm_i915_gem_request *req, + u32 offset) +{ + struct drm_i915_private *dev_priv = req->i915; + enum intel_engine_id id = req->engine->id; + struct intel_ring *ring = req->ring; + int num_mmio = dev_priv->perf.num_mmio; + u32 mmio_addr, addr = 0; + int ret, i; + + ret = intel_ring_begin(req, 4*num_mmio); + if (ret) + return ret; + + mmio_addr = + dev_priv->perf.command_stream_buf[id].vma->node.start + offset; + + for (i = 0; i < num_mmio; i++) { + uint32_t cmd; + + addr = mmio_addr + 4*i; + + if (INTEL_INFO(dev_priv)->gen >= 8) + cmd = MI_STORE_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT; + else + cmd = MI_STORE_REGISTER_MEM | + MI_SRM_LRM_GLOBAL_GTT; + + intel_ring_emit(ring, cmd); + intel_ring_emit(ring, dev_priv->perf.mmio_list[i]); + intel_ring_emit(ring, addr); + if (INTEL_INFO(dev_priv)->gen >= 8) + intel_ring_emit(ring, 0); + else + intel_ring_emit(ring, MI_NOOP); + } + intel_ring_advance(ring); + return 0; +} + static void i915_ring_stream_cs_hook(struct i915_perf_stream *stream, struct drm_i915_gem_request *req, u32 tag) { @@ -784,6 +834,12 @@ static void i915_ring_stream_cs_hook(struct i915_perf_stream *stream, goto err_unref; } + if (sample_flags & SAMPLE_MMIO) { + ret = i915_ring_stream_capture_mmio(req, + entry->mmio_offset); + if (ret) + goto
[Intel-gfx] [PATCH 05/15] drm/i915: Handle the overflow condition for command stream buf
From: Sourab GuptaAdd a compile time option for detecting the overflow condition of command stream buffer, and not overwriting the old entries in such a case. Also, set a status flag to forward the overflow condition to userspace if overflow is detected. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_perf.c | 75 2 files changed, 62 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dedb7f8..e9cf939 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2235,6 +2235,8 @@ struct drm_i915_private { struct drm_i915_gem_object *obj; struct i915_vma *vma; u8 *addr; +#define I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW (1<<0) + u32 status; } command_stream_buf; struct list_head node_list; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2ee4711..e10e78f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -247,6 +247,9 @@ static u32 i915_perf_stream_paranoid = true; #define GEN8_OAREPORT_REASON_GO_TRANSITION (1<<23) #define GEN9_OAREPORT_REASON_CLK_RATIO (1<<24) +/* For determining the behavior on overflow of command stream samples */ +#define CMD_STREAM_BUF_OVERFLOW_ALLOWED + /* Data common to periodic and RCS based samples */ struct oa_sample_data { u32 source; @@ -348,6 +351,7 @@ void i915_perf_command_stream_hook(struct drm_i915_gem_request *request) mutex_unlock(_priv->perf.streams_lock); } +#ifdef CMD_STREAM_BUF_OVERFLOW_ALLOWED /* * Release some perf entries to make space for a new entry data. We dereference * the associated request before deleting the entry. Also, no need to check for @@ -374,25 +378,26 @@ static void release_some_perf_entries(struct drm_i915_private *dev_priv, break; } } +#endif /* - * Insert the perf entry to the end of the list. This function never fails, - * since it always manages to insert the entry. If the space is exhausted in - * the buffer, it will remove the oldest entries in order to make space. + * Insert the perf entry to the end of the list. If the overwrite of old entries + * is allowed, the function always manages to insert the entry and returns 0. + * If overwrite is not allowed, on detection of overflow condition, an + * appropriate status flag is set, and function returns -ENOSPC. */ -static void insert_perf_entry(struct drm_i915_private *dev_priv, +static int insert_perf_entry(struct drm_i915_private *dev_priv, struct i915_perf_cs_data_node *entry) { struct i915_perf_cs_data_node *first_entry, *last_entry; int max_offset = dev_priv->perf.command_stream_buf.obj->base.size; u32 entry_size = dev_priv->perf.oa.oa_buffer.format_size; + int ret = 0; spin_lock(_priv->perf.node_list_lock); if (list_empty(_priv->perf.node_list)) { entry->offset = 0; - list_add_tail(>link, _priv->perf.node_list); - spin_unlock(_priv->perf.node_list_lock); - return; + goto out; } first_entry = list_first_entry(_priv->perf.node_list, @@ -410,29 +415,49 @@ static void insert_perf_entry(struct drm_i915_private *dev_priv, */ else if (entry_size < first_entry->offset) entry->offset = 0; - /* Insufficient space. Overwrite existing old entries */ + /* Insufficient space */ else { +#ifdef CMD_STREAM_BUF_OVERFLOW_ALLOWED u32 target_size = entry_size - first_entry->offset; release_some_perf_entries(dev_priv, target_size); entry->offset = 0; +#else + dev_priv->perf.command_stream_buf.status |= + I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW; + ret = -ENOSPC; + goto out_unlock; +#endif } } else { /* Sufficient space available? */ if (last_entry->offset + 2*entry_size < first_entry->offset) entry->offset = last_entry->offset + entry_size; - /* Insufficient space. Overwrite existing old entries */ + /* Insufficient space */ else { +#ifdef CMD_STREAM_BUF_OVERFLOW_ALLOWED u32 target_size = entry_size - (first_entry->offset - last_entry->offset - entry_size); release_some_perf_entries(dev_priv, target_size);
[Intel-gfx] [PATCH 13/15] time: export clocks_calc_mult_shift
From: Sourab GuptaExporting clocks_calc_mult_shift is helpful for drivers to calculate the mult/shift values for their clocks, given their frequency. This is particularly useful when such drivers may want to associate timecounter/cyclecounter abstraction for their clock sources, in order to use the cross timestamp infrastructure for syncing device time with system time. Signed-off-by: Sourab Gupta --- kernel/time/clocksource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 6a5a310..e2de743 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -89,6 +89,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) *mult = tmp; *shift = sft; } +EXPORT_SYMBOL_GPL(clocks_calc_mult_shift); /*[Clocksource internal variables]- * curr_clocksource: -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 02/15] drm/i915: Expose OA sample source to userspace
From: Sourab GuptaThis patch exposes a new sample source field to userspace. This field can be populated to specify the origin of the OA report. For e.g. for internally triggerred reports (non MI_RPC reports), the RPT_ID field has bitfields for specifying the origin such as timer, or render ctx switch, etc. Likewise this field can be used to specify the source as MI_RPC when such support is added. Signed-off-by: Sourab Gupta Signed-off-by: Robert Bragg --- drivers/gpu/drm/i915/i915_perf.c | 55 ++-- include/uapi/drm/i915_drm.h | 16 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index d030cd7..58a1118 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -240,6 +240,13 @@ static u32 i915_perf_stream_paranoid = true; */ #define OA_EXPONENT_MAX 31 +#define GEN8_OAREPORT_REASON_TIMER (1<<19) +#define GEN8_OAREPORT_REASON_TRIGGER1 (1<<20) +#define GEN8_OAREPORT_REASON_TRIGGER2 (1<<21) +#define GEN8_OAREPORT_REASON_CTX_SWITCH (1<<22) +#define GEN8_OAREPORT_REASON_GO_TRANSITION (1<<23) +#define GEN9_OAREPORT_REASON_CLK_RATIO (1<<24) + /* for sysctl proc_dointvec_minmax of i915_oa_min_timer_exponent */ static int zero; static int oa_exponent_max = OA_EXPONENT_MAX; @@ -279,7 +286,8 @@ static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_C4_B8] = { 7, 64 }, }; -#define SAMPLE_OA_REPORT (1<<0) +#define SAMPLE_OA_REPORT (1<<0) +#define SAMPLE_OA_SOURCE_INFO (1<<1) struct perf_open_properties { u32 sample_flags; @@ -385,6 +393,27 @@ static int append_oa_sample(struct i915_perf_stream *stream, return -EFAULT; buf += sizeof(header); + if (sample_flags & SAMPLE_OA_SOURCE_INFO) { + enum drm_i915_perf_oa_event_source source; + + if (INTEL_INFO(dev_priv)->gen >= 8) { + u32 reason = *(u32 *)report; + + if (reason & GEN8_OAREPORT_REASON_CTX_SWITCH) + source = + I915_PERF_OA_EVENT_SOURCE_CONTEXT_SWITCH; + else if (reason & GEN8_OAREPORT_REASON_TIMER) + source = I915_PERF_OA_EVENT_SOURCE_PERIODIC; + else + source = I915_PERF_OA_EVENT_SOURCE_UNDEFINED; + } else + source = I915_PERF_OA_EVENT_SOURCE_PERIODIC; + + if (copy_to_user(buf, , 4)) + return -EFAULT; + buf += 4; + } + if (sample_flags & SAMPLE_OA_REPORT) { if (copy_to_user(buf, report, report_size)) return -EFAULT; @@ -1453,11 +1482,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return -EINVAL; } - if (!(props->sample_flags & SAMPLE_OA_REPORT)) { - DRM_ERROR("Only OA report sampling supported\n"); - return -EINVAL; - } - if (!dev_priv->perf.oa.ops.init_oa_buffer) { DRM_ERROR("OA unit not supported\n"); return -ENODEV; @@ -1486,8 +1510,20 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size; - stream->sample_flags |= SAMPLE_OA_REPORT; - stream->sample_size += format_size; + if (props->sample_flags & SAMPLE_OA_REPORT) { + stream->sample_flags |= SAMPLE_OA_REPORT; + stream->sample_size += format_size; + } + + if (props->sample_flags & SAMPLE_OA_SOURCE_INFO) { + if (!(props->sample_flags & SAMPLE_OA_REPORT)) { + DRM_ERROR( + "OA source type can't be sampled without OA report"); + return -EINVAL; + } + stream->sample_flags |= SAMPLE_OA_SOURCE_INFO; + stream->sample_size += 4; + } dev_priv->perf.oa.oa_buffer.format_size = format_size; BUG_ON(dev_priv->perf.oa.oa_buffer.format_size == 0); @@ -2160,6 +2196,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; + case DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE: + props->sample_flags |= SAMPLE_OA_SOURCE_INFO; + break; case DRM_I915_PERF_PROP_MAX: BUG(); } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e95f666..0583812 100644 ---
[Intel-gfx] [PATCH 04/15] drm/i915: flush periodic samples, in case of no pending CS sample requests
From: Sourab GuptaWhen there are no pending CS OA samples, flush the periodic OA samples collected so far. We can safely forward the periodic OA samples in the case we have no pending CS samples, but we can't do so in the case we have pending CS samples, since we don't know what the ordering between pending CS samples and periodic samples will eventually be. If we have no pending CS sample, it won't be possible for future pending CS sample to have timestamps earlier than current periodic timestamp. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.h | 7 +- drivers/gpu/drm/i915/i915_perf.c | 163 +-- 2 files changed, 129 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0561315..dedb7f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1856,8 +1856,9 @@ struct i915_oa_ops { char __user *buf, size_t count, size_t *offset, - u32 ts); - bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv); + u32 ts, u32 max_records); + int (*oa_buffer_num_samples)(struct drm_i915_private *dev_priv, + u32 *last_ts); }; /* @@ -2221,6 +,8 @@ struct drm_i915_private { u32 gen7_latched_oastatus1; u32 ctx_oactxctrl_off; u32 ctx_flexeu0_off; + u32 n_pending_periodic_samples; + u32 pending_periodic_ts; struct i915_oa_ops ops; const struct i915_oa_format *oa_formats; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7bbc757..2ee4711 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -540,13 +540,30 @@ static void i915_oa_rcs_free_requests(struct drm_i915_private *dev_priv) * pointers. A race here could result in a false positive !empty status which * is acceptable. */ -static bool gen8_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv) +static int +gen8_oa_buffer_num_samples_fop_unlocked(struct drm_i915_private *dev_priv, + u32 *last_ts) { int report_size = dev_priv->perf.oa.oa_buffer.format_size; - u32 head = I915_READ(GEN8_OAHEADPTR); - u32 tail = I915_READ(GEN8_OATAILPTR); + u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.addr; + u32 head = I915_READ(GEN8_OAHEADPTR) & GEN8_OAHEADPTR_MASK; + u32 tail = I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; + u32 mask = (OA_BUFFER_SIZE - 1); + u32 num_samples; + u8 *report; + + head -= dev_priv->perf.oa.oa_buffer.gtt_offset; + tail -= dev_priv->perf.oa.oa_buffer.gtt_offset; + num_samples = OA_TAKEN(tail, head) / report_size; - return OA_TAKEN(tail, head) < report_size; + /* read the timestamp of the last sample */ + if (num_samples) { + head += report_size*(num_samples - 1); + report = oa_buf_base + (head & mask); + *last_ts = *(u32 *)(report + 4); + } + + return num_samples; } /* NB: This is either called via fops or the poll check hrtimer (atomic ctx) @@ -560,16 +577,32 @@ static bool gen8_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_pr * pointers. A race here could result in a false positive !empty status which * is acceptable. */ -static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv) +static int +gen7_oa_buffer_num_samples_fop_unlocked(struct drm_i915_private *dev_priv, + u32 *last_ts) { int report_size = dev_priv->perf.oa.oa_buffer.format_size; u32 oastatus2 = I915_READ(GEN7_OASTATUS2); u32 oastatus1 = I915_READ(GEN7_OASTATUS1); u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK; u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK; + u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.addr; + u32 mask = (OA_BUFFER_SIZE - 1); + int available_size; + u32 num_samples = 0; + u8 *report; - return OA_TAKEN(tail, head) < - dev_priv->perf.oa.tail_margin + report_size; + head -= dev_priv->perf.oa.oa_buffer.gtt_offset; + tail -= dev_priv->perf.oa.oa_buffer.gtt_offset; + available_size = OA_TAKEN(tail, head) - dev_priv->perf.oa.tail_margin; + if (available_size >= report_size) { + num_samples = available_size / report_size; + head += report_size*(num_samples - 1); + report = oa_buf_base + (head & mask); + *last_ts = *(u32 *)(report + 4); + } + + return num_samples; } /** @@ -698,7 +731,7 @@ static int
[Intel-gfx] [PATCH 01/15] drm/i915: Add ctx getparam ioctl parameter to retrieve ctx unique id
From: Sourab GuptaThis patch adds a new ctx getparam ioctl parameter, which can be used to retrieve ctx unique id by userspace. This can be used by userspace to map the i915 perf samples with their particular ctx's, since those would be having ctx unique id's. Otherwise the userspace has no way of maintaining this association, since it has the knowledge of only per-drm file specific ctx handles. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_gem_context.c | 3 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index e6616ed..d0efa5e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1078,6 +1078,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE); break; + case I915_CONTEXT_PARAM_HW_ID: + args->value = ctx->hw_id; + break; default: ret = -EINVAL; break; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f63a392..e95f666 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1223,6 +1223,7 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE0x4 +#define I915_CONTEXT_PARAM_HW_ID 0x5 __u64 value; }; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 06/15] drm/i915: Populate ctx ID for periodic OA reports
From: Sourab GuptaThis adds support for populating the ctx id for the periodic OA reports when requested through the corresponding property. For Gen8, the OA reports itself have the ctx ID and it is the one programmed into HW while submitting workloads. Thus it's retrieved from reports itself. For Gen7, the OA reports don't have any such field, and we can populate this field with the last seen ctx ID while sending CS reports. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_perf.c | 52 +--- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e9cf939..853cc7db 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1859,6 +1859,8 @@ struct i915_oa_ops { u32 ts, u32 max_records); int (*oa_buffer_num_samples)(struct drm_i915_private *dev_priv, u32 *last_ts); + u32 (*oa_buffer_get_ctx_id)(struct i915_perf_stream *stream, + const u8 *report); }; /* @@ -2239,6 +2241,7 @@ struct drm_i915_private { u32 status; } command_stream_buf; + u32 last_ctx_id; struct list_head node_list; spinlock_t node_list_lock; } perf; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e10e78f..84457f8 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -637,6 +637,46 @@ gen7_oa_buffer_num_samples_fop_unlocked(struct drm_i915_private *dev_priv, return num_samples; } +static u32 gen7_oa_buffer_get_ctx_id(struct i915_perf_stream *stream, + const u8 *report) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + + if (!stream->cs_mode) + WARN_ONCE(1, + "CTX ID can't be retrieved if command stream mode not enabled"); + + /* +* OA reports generated in Gen7 don't have the ctx ID information. +* Therefore, just rely on the ctx ID information from the last CS +* sample forwarded +*/ + return dev_priv->perf.last_ctx_id; +} + +static u32 gen8_oa_buffer_get_ctx_id(struct i915_perf_stream *stream, + const u8 *report) +{ + struct drm_i915_private *dev_priv = stream->dev_priv; + + /* The ctx ID present in the OA reports have intel_context::global_id +* present, since this is programmed into the ELSP in execlist mode. +* In non-execlist mode, fall back to retrieving the ctx ID from the +* last saved ctx ID from command stream mode. +*/ + if (i915.enable_execlists) { + u32 ctx_id = *(u32 *)(report + 12); + ctx_id &= 0xf; + return ctx_id; + } else { + if (!stream->cs_mode) + WARN_ONCE(1, + "CTX ID can't be retrieved if command stream mode not enabled"); + + return dev_priv->perf.last_ctx_id; + } +} + /** * Appends a status record to a userspace read() buffer. */ @@ -733,9 +773,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream, data.source = source; } -#warning "FIXME: append_oa_buffer_sample: read ctx ID from report and map that to an intel_context::global_id" if (sample_flags & SAMPLE_CTX_ID) - data.ctx_id = 0; + data.ctx_id = dev_priv->perf.oa.ops.oa_buffer_get_ctx_id( + stream, report); if (sample_flags & SAMPLE_OA_REPORT) data.report = report; @@ -1248,8 +1288,10 @@ static int append_oa_rcs_sample(struct i915_perf_stream *stream, if (sample_flags & SAMPLE_OA_SOURCE_INFO) data.source = I915_PERF_OA_EVENT_SOURCE_RCS; - if (sample_flags & SAMPLE_CTX_ID) + if (sample_flags & SAMPLE_CTX_ID) { data.ctx_id = node->ctx_id; + dev_priv->perf.last_ctx_id = node->ctx_id; + } if (sample_flags & SAMPLE_OA_REPORT) data.report = report; @@ -3092,6 +3134,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.read = gen7_oa_read; dev_priv->perf.oa.ops.oa_buffer_num_samples = gen7_oa_buffer_num_samples_fop_unlocked; + dev_priv->perf.oa.ops.oa_buffer_get_ctx_id = + gen7_oa_buffer_get_ctx_id; dev_priv->perf.oa.timestamp_frequency = 1250; @@ -3106,6 +3150,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.ops.read = gen8_oa_read;
[Intel-gfx] [PATCH 09/15] drm/i915: Extend i915 perf framework for collecting timestamps on all gpu engines
From: Sourab GuptaThis patch extends the i915 perf framework to handle the perf sample collection for any given gpu engine. Particularly, the support for collecting timestamp sample type is added, which can be requested for any engine. With this, for RCS, timestamps and OA reports can be collected together, and provided to userspace in separate sample fields. For other engines, the capabilility to collect timestamps is added. The thing to note is that, still only a single stream instance can be opened at any particular time. Though that stream may now be opened for any gpu engine, for collection of timestamp samples. So, this patch doesn't add the support to open multiple concurrent streams, as yet. Though it lays the groundwork for this support to be added susequently. Part of this groundwork involves having separate command stream buffers, per engine, for holding the samples generated. Likewise for a few other data structures maintaining per-engine state. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.h | 35 ++- drivers/gpu/drm/i915/i915_perf.c | 635 +-- drivers/gpu/drm/i915/i915_reg.h | 2 + include/uapi/drm/i915_drm.h | 7 + 4 files changed, 445 insertions(+), 234 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0f171f8..a05335a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1814,7 +1814,8 @@ struct i915_perf_stream_ops { * Routine to emit the commands in the command streamer associated * with the corresponding gpu engine. */ - void (*command_stream_hook)(struct drm_i915_gem_request *req, u32 tag); + void (*command_stream_hook)(struct i915_perf_stream *stream, + struct drm_i915_gem_request *req, u32 tag); }; enum i915_perf_stream_state { @@ -1839,6 +1840,9 @@ struct i915_perf_stream { /* Whether command stream based data collection is enabled */ bool cs_mode; + /* Whether the OA unit is in use */ + bool using_oa; + const struct i915_perf_stream_ops *ops; }; @@ -1870,7 +1874,16 @@ struct i915_oa_ops { struct i915_perf_cs_data_node { struct list_head link; struct drm_i915_gem_request *request; - u32 offset; + + /* Offsets into the GEM obj holding the data */ + u32 start_offset; + u32 oa_offset; + u32 ts_offset; + + /* buffer size corresponding to this entry */ + u32 size; + + /* Other metadata */ u32 ctx_id; u32 pid; u32 tag; @@ -2189,14 +2202,14 @@ struct drm_i915_private { spinlock_t hook_lock; - struct { - struct i915_perf_stream *exclusive_stream; - u32 specific_ctx_id; + struct hrtimer poll_check_timer; + struct i915_perf_stream *exclusive_stream; + wait_queue_head_t poll_wq[I915_NUM_ENGINES]; + atomic_t pollin[I915_NUM_ENGINES]; - struct hrtimer poll_check_timer; - wait_queue_head_t poll_wq; - atomic_t pollin; + struct { + u32 specific_ctx_id; bool periodic; int period_exponent; @@ -2241,13 +2254,13 @@ struct drm_i915_private { u8 *addr; #define I915_PERF_CMD_STREAM_BUF_STATUS_OVERFLOW (1<<0) u32 status; - } command_stream_buf; + } command_stream_buf[I915_NUM_ENGINES]; u32 last_ctx_id; u32 last_pid; u32 last_tag; - struct list_head node_list; - spinlock_t node_list_lock; + struct list_head node_list[I915_NUM_ENGINES]; + spinlock_t node_list_lock[I915_NUM_ENGINES]; } perf; /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index ca523b1..516fd54 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -250,12 +250,17 @@ static u32 i915_perf_stream_paranoid = true; /* For determining the behavior on overflow of command stream samples */ #define CMD_STREAM_BUF_OVERFLOW_ALLOWED -/* Data common to periodic and RCS based samples */ -struct oa_sample_data { +#define OA_ADDR_ALIGN 64 +#define TS_ADDR_ALIGN 8 +#define I915_PERF_TS_SAMPLE_SIZE 8 + +/* Data common to all samples (periodic OA / CS based OA / Timestamps) */ +struct sample_data { u32 source; u32 ctx_id; u32 pid; u32 tag; + u64 ts; const u8 *report; }; @@ -313,6 +318,7 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = { #define SAMPLE_CTX_ID
[Intel-gfx] [PATCH 03/15] drm/i915: Framework for capturing command stream based OA reports
From: Sourab GuptaThis patch introduces a framework to enable OA counter reports associated with Render command stream. We can then associate the reports captured through this mechanism with their corresponding context id's. This can be further extended to associate any other metadata information with the corresponding samples (since the association with Render command stream gives us the ability to capture these information while inserting the corresponding capture commands into the command stream). The OA reports generated in this way are associated with a corresponding workload, and thus can be used the delimit the workload (i.e. sample the counters at the workload boundaries), within an ongoing stream of periodic counter snapshots. There may be usecases wherein we need more than periodic OA capture mode which is supported currently. This mode is primarily used for two usecases: - Ability to capture system wide metrics, alongwith the ability to map the reports back to individual contexts (particularly for HSW). - Ability to inject tags for work, into the reports. This provides visibility into the multiple stages of work within single context. The userspace will be able to distinguish between the periodic and CS based OA reports by the virtue of source_info sample field. The command MI_REPORT_PERF_COUNT can be used to capture snapshots of OA counters, and is inserted at BB boundaries. The data thus captured will be stored in a separate buffer, which will be different from the buffer used otherwise for periodic OA capture mode. The metadata information pertaining to snapshot is maintained in a list, which also has offsets into the gem buffer object per captured snapshot. In order to track whether the gpu has completed processing the node, a field pertaining to corresponding gem request is added, which is tracked for completion of the command. Both periodic and RCS based reports are associated with a single stream (corresponding to render engine), and it is expected to have the samples in the sequential order according to their timestamps. Now, since these reports are collected in separate buffers, these are merge sorted at the time of forwarding to userspace during the read call. v2: Aligining with the non-perf interface (custom drm ioctl based). Also, few related patches are squashed together for better readability Signed-off-by: Sourab Gupta Signed-off-by: Robert Bragg --- drivers/gpu/drm/i915/i915_drv.h| 44 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 + drivers/gpu/drm/i915/i915_perf.c | 895 - include/uapi/drm/i915_drm.h| 15 + 4 files changed, 805 insertions(+), 153 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a6ac1c3..0561315 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1809,6 +1809,18 @@ struct i915_perf_stream_ops { * The stream will always be disabled before this is called. */ void (*destroy)(struct i915_perf_stream *stream); + + /* +* Routine to emit the commands in the command streamer associated +* with the corresponding gpu engine. +*/ + void (*command_stream_hook)(struct drm_i915_gem_request *req); +}; + +enum i915_perf_stream_state { + I915_PERF_STREAM_DISABLED, + I915_PERF_STREAM_ENABLE_IN_PROGRESS, + I915_PERF_STREAM_ENABLED, }; struct i915_perf_stream { @@ -1816,11 +1828,16 @@ struct i915_perf_stream { struct list_head link; + enum intel_engine_id engine; u32 sample_flags; int sample_size; struct i915_gem_context *ctx; bool enabled; + enum i915_perf_stream_state state; + + /* Whether command stream based data collection is enabled */ + bool cs_mode; const struct i915_perf_stream_ops *ops; }; @@ -1838,10 +1855,22 @@ struct i915_oa_ops { int (*read)(struct i915_perf_stream *stream, char __user *buf, size_t count, - size_t *offset); + size_t *offset, + u32 ts); bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv); }; +/* + * List element to hold info about the perf sample data associated + * with a particular GPU command stream. + */ +struct i915_perf_cs_data_node { + struct list_head link; + struct drm_i915_gem_request *request; + u32 offset; + u32 ctx_id; +}; + struct drm_i915_private { struct drm_device drm; @@ -2149,6 +2178,8 @@ struct drm_i915_private { struct ctl_table_header *sysctl_header; struct mutex lock; + + struct mutex streams_lock; struct list_head streams; spinlock_t hook_lock; @@ -2195,6 +2226,16 @@ struct
Re: [Intel-gfx] [PATCH 06/12] drm/i915/scheduler: Execute requests in order of priorities
On Thu, Nov 03, 2016 at 07:47:39PM +, Chris Wilson wrote: > On Thu, Nov 03, 2016 at 04:21:25PM +, Tvrtko Ursulin wrote: > > >+static void update_priorities(struct i915_priotree *pt, int prio) > > >+{ > > >+ struct drm_i915_gem_request *request = > > >+ container_of(pt, struct drm_i915_gem_request, priotree); > > >+ struct intel_engine_cs *engine = request->engine; > > >+ struct i915_dependency *dep; > > >+ > > >+ if (prio <= READ_ONCE(pt->priority)) > > >+ return; > > >+ > > >+ /* Recursively bump all dependent priorities to match the new request */ > > >+ list_for_each_entry(dep, >pre_list, pre_link) > > >+ update_priorities(dep->signal, prio); > > > > John got in trouble from recursion in his scheduler, used for the > > same thing AFAIR. Or was it the priority bumping? Either way, it > > could be imperative to avoid it. Spent some time tuning (but not very well) for very deep pipelines: static struct intel_engine_cs * pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) { struct intel_engine_cs *engine; engine = container_of(pt, struct drm_i915_gem_request, priotree)->engine; if (engine != locked) { if (locked) spin_unlock_irq(>timeline->lock); spin_lock_irq(>timeline->lock); } return engine; } static void execlists_schedule(struct drm_i915_gem_request *request, int prio) { struct intel_engine_cs *engine = NULL; struct i915_dependency *dep, *p; struct i915_dependency stack; LIST_HEAD(dfs); if (prio <= READ_ONCE(request->priotree.priority)) return; /* Need BKL in order to use the temporary link inside i915_dependency */ lockdep_assert_held(>i915->drm.struct_mutex); stack.signal = >priotree; list_add(_link, ); /* Recursively bump all dependent priorities to match the new request */ list_for_each_entry_safe(dep, p, , dfs_link) { struct i915_priotree *pt = dep->signal; list_for_each_entry(p, >pre_list, pre_link) if (prio > READ_ONCE(p->signal->priority)) list_move_tail(>dfs_link, ); p = list_first_entry(>dfs_link, typeof(*p), dfs_link); if (!RB_EMPTY_NODE(>node)) continue; engine = pt_lock_engine(pt, engine); if (prio > pt->priority && RB_EMPTY_NODE(>node)) { pt->priority = prio; list_del_init(>dfs_link); } } /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, , dfs_link) { struct i915_priotree *pt = dep->signal; INIT_LIST_HEAD(>dfs_link); engine = pt_lock_engine(pt, engine); if (prio <= pt->priority) continue; GEM_BUG_ON(RB_EMPTY_NODE(>node)); pt->priority = prio; rb_erase(>node, >execlist_queue); if (insert_request(pt, >execlist_queue)) engine->execlist_first = >node; } if (engine) spin_unlock_irq(>timeline->lock); /* XXX Do we need to preempt to make room for us and our deps? */ } But as always any linear list scales poorly. It is just fortunate that typically we don't see 10,000s of requests in the pipeline that need PI. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v8 05/12] drm/i915: don't whitelist oacontrol in cmd parser
On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote: > Being able to program OACONTROL from a non-privileged batch buffer is > not sufficient to be able to configure the OA unit. This was originally > allowed to help enable Mesa to expose OA counters via the > INTEL_performance_query extension, but the current implementation based > on programming OACONTROL via a batch buffer isn't able to report useable > data without a more complete OA unit configuration. Mesa handles the > possibility that writes to OACONTROL may not be allowed and so only > advertises the extension after explicitly testing that a write to > OACONTROL succeeds. Based on this; removing OACONTROL from the whitelist > should be ok for userspace. > > Removing this simplifies adding a new kernel api for configuring the OA > unit without needing to consider the possibility that userspace might > trample on OACONTROL state which we'd like to start managing within > the kernel instead. In particular running any Mesa based GL application > currently results in clearing OACONTROL when initializing which would > disable the capturing of metrics. > > Signed-off-by: Robert Bragg> Reviewed-by: Matthew Auld Seems reasonable. Reviewed-by: Sourab Gupta ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v8 09/12] drm/i915: Add dev.i915.perf_stream_paranoid sysctl option
On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote: > Consistent with the kernel.perf_event_paranoid sysctl option that can > allow non-root users to access system wide cpu metrics, this can > optionally allow non-root users to access system wide OA counter metrics > from Gen graphics hardware. > > Signed-off-by: Robert Bragg> Reviewed-by: Matthew Auld > --- > drivers/gpu/drm/i915/i915_drv.h | 1 + > drivers/gpu/drm/i915/i915_perf.c | 50 > +++- > 2 files changed, 50 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 01438fb..a138f86 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2171,6 +2171,7 @@ struct drm_i915_private { > bool initialized; > > struct kobject *metrics_kobj; > + struct ctl_table_header *sysctl_header; > > struct mutex lock; > struct list_head streams; > diff --git a/drivers/gpu/drm/i915/i915_perf.c > b/drivers/gpu/drm/i915/i915_perf.c > index 8d07c41..4e42073 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -64,6 +64,11 @@ > #define POLL_FREQUENCY 200 > #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) > > +/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ > +static int zero; > +static int one = 1; > +static u32 i915_perf_stream_paranoid = true; > + > /* The maximum exponent the hardware accepts is 63 (essentially it selects > one > * of the 64bit timestamp bits to trigger reports from) but there's currently > * no known use case for sampling as infrequently as once per 47 thousand > years. > @@ -1207,7 +1212,13 @@ i915_perf_open_ioctl_locked(struct drm_i915_private > *dev_priv, > } > } > > - if (!specific_ctx && !capable(CAP_SYS_ADMIN)) { > + /* Similar to perf's kernel.perf_paranoid_cpu sysctl option > + * we check a dev.i915.perf_stream_paranoid sysctl option > + * to determine if it's ok to access system wide OA counters > + * without CAP_SYS_ADMIN privileges. > + */ > + if (!specific_ctx && > + i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { > DRM_ERROR("Insufficient privileges to open system-wide i915 > perf stream\n"); > ret = -EACCES; > goto err_ctx; > @@ -1454,6 +1465,39 @@ void i915_perf_unregister(struct drm_i915_private > *dev_priv) > dev_priv->perf.metrics_kobj = NULL; > } > > +static struct ctl_table oa_table[] = { > + { > + .procname = "perf_stream_paranoid", > + .data = _perf_stream_paranoid, > + .maxlen = sizeof(i915_perf_stream_paranoid), > + .mode = 0644, > + .proc_handler = proc_dointvec_minmax, > + .extra1 = , > + .extra2 = , > + }, > + {} > +}; > + > +static struct ctl_table i915_root[] = { > + { > + .procname = "i915", > + .maxlen = 0, > + .mode = 0555, > + .child = oa_table, > + }, > + {} > +}; > + > +static struct ctl_table dev_root[] = { > + { > + .procname = "dev", > + .maxlen = 0, > + .mode = 0555, > + .child = i915_root, > + }, > + {} > +}; > + > void i915_perf_init(struct drm_i915_private *dev_priv) > { > if (!IS_HASWELL(dev_priv)) > @@ -1484,6 +1528,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > dev_priv->perf.oa.n_builtin_sets = > i915_oa_n_builtin_metric_sets_hsw; > > + dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); > + > dev_priv->perf.initialized = true; > } > > @@ -1492,6 +1538,8 @@ void i915_perf_fini(struct drm_i915_private *dev_priv) > if (!dev_priv->perf.initialized) > return; > > + unregister_sysctl_table(dev_priv->perf.sysctl_header); > + > memset(_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops)); > dev_priv->perf.initialized = false; > } Looks fine. Reviewed-by: Sourab Gupta ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v8 08/12] drm/i915: advertise available metrics via sysfs
On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote: > Each metric set is given a sysfs entry like: > > /sys/class/drm/card0/metrics//id > > This allows userspace to enumerate the specific sets that are available > for the current system. The 'id' file contains an unsigned integer that > can be used to open the associated metric set via > DRM_IOCTL_I915_PERF_OPEN. The is a globally unique ID for a > specific OA unit register configuration that can be reliably used by > userspace as a key to lookup corresponding counter meta data and > normalization equations. > > The guid registry is currently maintained as part of gputop along with > the XML metric set descriptions and code generation scripts, ref: > > https://github.com/rib/gputop > > gputop-data/guids.xml > > scripts/update-guids.py > > gputop-data/oa-*.xml > > scripts/i915-perf-kernelgen.py > > $ make -C gputop-data -f Makefile.xml SYSFS=1 WHITELIST=RenderBasic > > Signed-off-by: Robert Bragg> Reviewed-by: Matthew Auld Looks good to me. Reviewed-by: Sourab Gupta ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v8 02/12] drm/i915: Add i915 perf infrastructure
On Thu, 2016-10-27 at 19:14 -0700, Robert Bragg wrote: > Adds base i915 perf infrastructure for Gen performance metrics. > > This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64 > properties to configure a stream of metrics and returns a new fd usable > with standard VFS system calls including read() to read typed and sized > records; ioctl() to enable or disable capture and poll() to wait for > data. > > A stream is opened something like: > > uint64_t properties[] = { > /* Single context sampling */ > DRM_I915_PERF_PROP_CTX_HANDLE,ctx_handle, > > /* Include OA reports in samples */ > DRM_I915_PERF_PROP_SAMPLE_OA, true, > > /* OA unit configuration */ > DRM_I915_PERF_PROP_OA_METRICS_SET,metrics_set_id, > DRM_I915_PERF_PROP_OA_FORMAT, report_format, > DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent, >}; >struct drm_i915_perf_open_param parm = { > .flags = I915_PERF_FLAG_FD_CLOEXEC | >I915_PERF_FLAG_FD_NONBLOCK | >I915_PERF_FLAG_DISABLED, > .properties_ptr = (uint64_t)properties, > .num_properties = sizeof(properties) / 16, >}; >int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ); > > Records read all start with a common { type, size } header with > DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records > contain an extensible number of fields and it's the > DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that > determine what's included in every sample. > > No specific streams are supported yet so any attempt to open a stream > will return an error. > > v2: > use i915_gem_context_get() - Chris Wilson > v3: > update read() interface to avoid passing state struct - Chris Wilson > fix some rebase fallout, with i915-perf init/deinit > v4: > s/DRM_IORW/DRM_IOW/ - Emil Velikov > > Signed-off-by: Robert Bragg> --- > drivers/gpu/drm/i915/Makefile| 3 + > drivers/gpu/drm/i915/i915_drv.c | 4 + > drivers/gpu/drm/i915/i915_drv.h | 91 > drivers/gpu/drm/i915/i915_perf.c | 443 > +++ > include/uapi/drm/i915_drm.h | 67 ++ > 5 files changed, 608 insertions(+) > create mode 100644 drivers/gpu/drm/i915/i915_perf.c > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 6123400..8d4e25f 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -113,6 +113,9 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o > # virtual gpu code > i915-y += i915_vgpu.o > > +# perf code > +i915-y += i915_perf.o > + > ifeq ($(CONFIG_DRM_I915_GVT),y) > i915-y += intel_gvt.o > include $(src)/gvt/Makefile > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index af3559d..685c96e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -836,6 +836,8 @@ static int i915_driver_init_early(struct drm_i915_private > *dev_priv, > > intel_detect_preproduction_hw(dev_priv); > > + i915_perf_init(dev_priv); > + > return 0; > > err_workqueues: > @@ -849,6 +851,7 @@ static int i915_driver_init_early(struct drm_i915_private > *dev_priv, > */ > static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) > { > + i915_perf_fini(dev_priv); > i915_gem_load_cleanup(_priv->drm); > i915_workqueues_cleanup(dev_priv); > } > @@ -2556,6 +2559,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { > DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, > DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_GETPARAM, > i915_gem_context_getparam_ioctl, DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_SETPARAM, > i915_gem_context_setparam_ioctl, DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(I915_PERF_OPEN, i915_perf_open_ioctl, > DRM_RENDER_ALLOW), > }; > > static struct drm_driver driver = { > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 5a260db..7a65c0b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1767,6 +1767,84 @@ struct intel_wm_config { > bool sprites_scaled; > }; > > +struct i915_perf_stream; > + > +struct i915_perf_stream_ops { > + /* Enables the collection of HW samples, either in response to > + * I915_PERF_IOCTL_ENABLE or implicitly called when stream is > + * opened without I915_PERF_FLAG_DISABLED. > + */ > + void (*enable)(struct i915_perf_stream *stream); > + > + /* Disables the collection of HW samples, either in response to > + * I915_PERF_IOCTL_DISABLE or implicitly called before > + * destroying the stream. > + */ > + void (*disable)(struct i915_perf_stream *stream); > + > + /* Return: true if any i915 perf records are ready to read() > + * for this
Re: [Intel-gfx] [PATCH] drm/i915: Fix pages pin counting around swizzle quirk
On ke, 2016-11-02 at 09:43 +, Chris Wilson wrote: > @@ -2458,17 +2459,16 @@ int __i915_gem_object_get_pages(struct > drm_i915_gem_object *obj) > if (err) > return err; > > - if (likely(obj->mm.pages)) { > - __i915_gem_object_pin_pages(obj); > - goto unlock; > - } > - > - GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); > + if (unlikely(!obj->mm.pages)) { > + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); > + err = i915_gem_object_get_pages(obj); > + if (err) > + goto unlock; > > - err = i915_gem_object_get_pages(obj); > - if (!err) > - atomic_set_release(>mm.pages_pin_count, 1); > + smp_mb__before_atomic(); This is not cool without atomic in sight. Inline wrap as __i915_gem_object_pages_mb() or something. > @@ -3707,6 +3707,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) > { > int ret = 0; > > + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); Rather confusing, simple mind would think as __i915_gem_object_pin_pages has GEM_BUG_ON(!obj->mm.pages), the next branch would never be taken? > if (vma->pages) > return 0; > Regards, Joonas -- Joonas Lahtinen Open Source Technology Center Intel Corporation ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 1/3] lib: add igt_dummyload
On 11/03/2016 04:38 PM, Ville Syrjälä wrote: > On Thu, Nov 03, 2016 at 11:40:36AM +0200, Abdiel Janulgue wrote: >> A lot of igt testcases need some GPU workload to make sure a race >> window is big enough. Unfortunately having a fixed amount of >> workload leads to spurious test failures or overtly long runtimes >> on some fast/slow platforms. This library contains functionality >> to submit GPU workloads that should consume exactly a specific >> amount of time. >> >> v2 : Add recursive batch feature from Chris >> v3 : Drop auto-tuned stuff. Add bo dependecy to recursive batch >> by adding a dummy reloc to the bo as suggested by Ville. >> >> Cc: Daniel Vetter>> Cc: Ville Syrjälä >> Cc: Chris Wilson >> Signed-off-by: Abdiel Janulgue >> --- >> lib/Makefile.sources | 2 + >> lib/igt.h| 1 + >> lib/igt_dummyload.c | 274 >> +++ >> lib/igt_dummyload.h | 42 >> 4 files changed, 319 insertions(+) >> create mode 100644 lib/igt_dummyload.c >> create mode 100644 lib/igt_dummyload.h >> >> diff --git a/lib/Makefile.sources b/lib/Makefile.sources >> index e8e277b..7fc5ec2 100644 >> --- a/lib/Makefile.sources >> +++ b/lib/Makefile.sources >> @@ -75,6 +75,8 @@ lib_source_list = \ >> igt_draw.h \ >> igt_pm.c\ >> igt_pm.h\ >> +igt_dummyload.c \ >> +igt_dummyload.h \ >> uwildmat/uwildmat.h \ >> uwildmat/uwildmat.c \ >> $(NULL) >> diff --git a/lib/igt.h b/lib/igt.h >> index d751f24..a0028d5 100644 >> --- a/lib/igt.h >> +++ b/lib/igt.h >> @@ -32,6 +32,7 @@ >> #include "igt_core.h" >> #include "igt_debugfs.h" >> #include "igt_draw.h" >> +#include "igt_dummyload.h" >> #include "igt_fb.h" >> #include "igt_gt.h" >> #include "igt_kms.h" >> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c >> new file mode 100644 >> index 000..d37a30b >> --- /dev/null >> +++ b/lib/igt_dummyload.c >> @@ -0,0 +1,274 @@ >> +/* >> + * Copyright © 2016 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> + * copy of this software and associated documentation files (the >> "Software"), >> + * to deal in the Software without restriction, including without limitation >> + * the rights to use, copy, modify, merge, publish, distribute, sublicense, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the next >> + * paragraph) shall be included in all copies or substantial portions of the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >> OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> DEALINGS >> + * IN THE SOFTWARE. >> + * >> + */ >> + >> +#include "igt.h" >> +#include "igt_dummyload.h" >> +#include >> +#include >> +#include >> + >> +/** >> + * SECTION:igt_dummyload >> + * @short_description: Library for submitting GPU workloads >> + * @title: Dummyload >> + * @include: igt.h >> + * >> + * A lot of igt testcases need some GPU workload to make sure a race window >> is >> + * big enough. Unfortunately having a fixed amount of workload leads to >> + * spurious test failures or overtly long runtimes on some fast/slow >> platforms. >> + * This library contains functionality to submit GPU workloads that should >> + * consume exactly a specific amount of time. >> + */ >> + >> +#define NSEC_PER_SEC 10L >> + >> +#define gettid() syscall(__NR_gettid) >> +#define sigev_notify_thread_id _sigev_un._tid >> + >> +#define LOCAL_I915_EXEC_BSD_SHIFT (13) >> +#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) >> + >> +#define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) >> + >> +static void >> +fill_object(struct drm_i915_gem_exec_object2 *obj, uint32_t gem_handle, >> +struct drm_i915_gem_relocation_entry *relocs, uint32_t count) >> +{ >> +memset(obj, 0, sizeof(*obj)); >> +obj->handle = gem_handle; >> +obj->relocation_count = count; >> +obj->relocs_ptr = (uintptr_t)relocs; >> +} >> + >> +static void >> +fill_reloc(struct drm_i915_gem_relocation_entry *reloc, >> + uint32_t gem_handle, uint32_t offset, >> + uint32_t read_domains, uint32_t write_domains) >> +{ >> +reloc->target_handle = gem_handle; >> +